{
  "schema_version": "sensebench-run-v2",
  "run_id": "vllm-llama-4-maverick-int4-b300-p001-lexen-v1-20260614",
  "created_at": "2026-06-14T15:13:31.931784+00:00",
  "git_commit": null,
  "runner": {
    "github_handle": "vassiliphilippov",
    "name": "Vassili Philippov",
    "contact": null
  },
  "dataset": {
    "dataset_id": "lexen",
    "dataset_version": "lexen-v1",
    "dataset_revision": null,
    "content_hash": "sha256:5fd4382b93f19087a1e31f6dd7d1db17c1eb17ff80fcbe1d3fdd55c0c3ecefe8",
    "item_count": 4861
  },
  "prompt": {
    "id": "p001",
    "sensebench_version": "0.1.0"
  },
  "model": {
    "kind": "self_hosted_llm",
    "display_name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
    "requested_model": "hosted_vllm/meta-llama/Llama-4-Maverick-17B-128E-Instruct",
    "resolved_model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
    "resolved_model_counts": {
      "meta-llama/Llama-4-Maverick-17B-128E-Instruct": 4921
    },
    "llm_vendor": "Meta",
    "source_kind": "open_source",
    "license": "llama4",
    "model_url": "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct",
    "hf_revision": "b26390e93d51850e5874bb093244f3e11dcbe158",
    "quantization": "awq-int4",
    "inference_engine": "vllm",
    "inference_engine_version": "0.22.1",
    "container_image": "vllm/vllm-openai:v0.22.1",
    "serve_command": "exec vllm serve RedHatAI/Llama-4-Maverick-17B-128E-Instruct-quantized.w4a16 --port 8000 --served-model-name meta-llama/Llama-4-Maverick-17B-128E-Instruct --max-model-len 8192 --gpu-memory-utilization 0.90 --max-num-seqs 32 --language-model-only --revision b26390e93d51850e5874bb093244f3e11dcbe158",
    "endpoint_base_url": "http://localhost:8000/v1"
  },
  "sampling": {
    "temperature": 0.0,
    "top_p": null,
    "max_tokens": 2048,
    "seed": null,
    "extra": {}
  },
  "policy": {
    "votes_per_item": 1,
    "semantic_reasks_per_invalid_vote": 1,
    "tie_break": "earliest_vote",
    "monosemous_policy": "short_circuit"
  },
  "machine": {
    "platform": "Linux-6.8.0-117-generic-x86_64-with-glibc2.35",
    "cpu_model": "Intel(R) Xeon(R) 6776P",
    "cpu_cores": 256,
    "ram_gib": 3023.5,
    "gpu": {
      "name": "NVIDIA B300 SXM6 AC",
      "count": 1,
      "vram_mib_per_gpu": 275040,
      "driver_version": "590.48.01",
      "cuda_version": "13.1"
    },
    "provider": "vast.ai",
    "instance_id": "40919269",
    "hourly_rate_usd": 6.71875
  },
  "execution": {
    "concurrency": 64,
    "warmup_call_count": 8,
    "timing": {
      "benchmark_started_at": "2026-06-14T14:27:25.283202+00:00",
      "benchmark_ended_at": "2026-06-14T15:13:31.930156+00:00",
      "benchmark_seconds": 2766.646948114969,
      "setup_seconds": 2.791380910202861
    }
  },
  "totals": {
    "item_count": 4861,
    "correct_count": 4256,
    "accuracy": 0.8755400123431393,
    "call_count": 4921,
    "usage": {
      "input_tokens": 3057625,
      "cached_input_tokens": null,
      "output_tokens": 1448877,
      "reasoning_output_tokens": null
    },
    "cost": {
      "currency": "USD",
      "total_usd": 5.1634469951798465,
      "input_uncached_usd": null,
      "input_cached_usd": null,
      "output_usd": null,
      "input_uncached_unit_price_usd": null,
      "input_cached_unit_price_usd": null,
      "output_unit_price_usd": null,
      "source": "machine_time_estimate"
    },
    "elapsed_seconds": 2766.646948114969
  }
}
