[
  {
    "benchmark": "mmlu",
    "strategy": "majority",
    "n": 25,
    "accuracy": 1.0,
    "f1": 1.0,
    "latency_mean_ms": 16702.5512,
    "latency_std_ms": 6708.055199844984,
    "agreement_rate": 1.0,
    "cpu_mean": 92.992,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 1.0,
    "accuracy_ci_high": 1.0
  },
  {
    "benchmark": "mmlu",
    "strategy": "weighted",
    "n": 25,
    "accuracy": 1.0,
    "f1": 1.0,
    "latency_mean_ms": 15521.871160000002,
    "latency_std_ms": 5363.6923109096115,
    "agreement_rate": 1.0,
    "cpu_mean": 96.10000000000002,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 1.0,
    "accuracy_ci_high": 1.0
  },
  {
    "benchmark": "mmlu",
    "strategy": "isp",
    "n": 25,
    "accuracy": 1.0,
    "f1": 1.0,
    "latency_mean_ms": 15147.101280000003,
    "latency_std_ms": 4935.414184724795,
    "agreement_rate": 1.0,
    "cpu_mean": 95.84799999999997,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 1.0,
    "accuracy_ci_high": 1.0
  },
  {
    "benchmark": "mmlu",
    "strategy": "topic",
    "n": 25,
    "accuracy": 1.0,
    "f1": 1.0,
    "latency_mean_ms": 17547.183759999996,
    "latency_std_ms": 5457.580227551845,
    "agreement_rate": 1.0,
    "cpu_mean": 93.36400000000002,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 1.0,
    "accuracy_ci_high": 1.0
  },
  {
    "benchmark": "mmlu",
    "strategy": "debate",
    "n": 25,
    "accuracy": 1.0,
    "f1": 1.0,
    "latency_mean_ms": 65947.43432,
    "latency_std_ms": 18010.785620703307,
    "agreement_rate": 0.8,
    "cpu_mean": 96.22800000000001,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 1.0,
    "accuracy_ci_high": 1.0
  },
  {
    "benchmark": "gsm8k",
    "strategy": "majority",
    "n": 25,
    "accuracy": 0.6,
    "f1": 0.6,
    "latency_mean_ms": 11637.811959999999,
    "latency_std_ms": 1980.2985506813627,
    "agreement_rate": 0.6,
    "cpu_mean": 94.264,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 0.39361014383719795,
    "accuracy_ci_high": 0.806389856162802
  },
  {
    "benchmark": "gsm8k",
    "strategy": "weighted",
    "n": 25,
    "accuracy": 0.8,
    "f1": 0.8,
    "latency_mean_ms": 12952.785080000001,
    "latency_std_ms": 3326.2937030317357,
    "agreement_rate": 0.6,
    "cpu_mean": 95.36800000000001,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 0.631483388104907,
    "accuracy_ci_high": 0.9685166118950931
  },
  {
    "benchmark": "gsm8k",
    "strategy": "isp",
    "n": 25,
    "accuracy": 0.8,
    "f1": 0.8,
    "latency_mean_ms": 11935.73512,
    "latency_std_ms": 1931.8807780781342,
    "agreement_rate": 0.6,
    "cpu_mean": 94.736,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 0.631483388104907,
    "accuracy_ci_high": 0.9685166118950931
  },
  {
    "benchmark": "gsm8k",
    "strategy": "topic",
    "n": 25,
    "accuracy": 0.6,
    "f1": 0.6,
    "latency_mean_ms": 13374.217839999998,
    "latency_std_ms": 3295.0317955864843,
    "agreement_rate": 0.8,
    "cpu_mean": 89.56800000000001,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 0.39361014383719795,
    "accuracy_ci_high": 0.806389856162802
  },
  {
    "benchmark": "gsm8k",
    "strategy": "debate",
    "n": 25,
    "accuracy": 0.4,
    "f1": 0.4,
    "latency_mean_ms": 54589.582400000014,
    "latency_std_ms": 6530.343325757433,
    "agreement_rate": 0.6,
    "cpu_mean": 92.44400000000002,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 0.19361014383719796,
    "accuracy_ci_high": 0.606389856162802
  },
  {
    "benchmark": "truthfulqa",
    "strategy": "majority",
    "n": 25,
    "accuracy": 0.8,
    "f1": 0.8,
    "latency_mean_ms": 13219.0146,
    "latency_std_ms": 3880.548136297822,
    "agreement_rate": 0.8,
    "cpu_mean": 97.31600000000002,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 0.631483388104907,
    "accuracy_ci_high": 0.9685166118950931
  },
  {
    "benchmark": "truthfulqa",
    "strategy": "weighted",
    "n": 25,
    "accuracy": 1.0,
    "f1": 1.0,
    "latency_mean_ms": 12597.287599999998,
    "latency_std_ms": 2418.2251339878903,
    "agreement_rate": 0.8,
    "cpu_mean": 97.46,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 1.0,
    "accuracy_ci_high": 1.0
  },
  {
    "benchmark": "truthfulqa",
    "strategy": "isp",
    "n": 25,
    "accuracy": 1.0,
    "f1": 1.0,
    "latency_mean_ms": 13456.34012,
    "latency_std_ms": 4024.952133368682,
    "agreement_rate": 0.8,
    "cpu_mean": 97.98,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 1.0,
    "accuracy_ci_high": 1.0
  },
  {
    "benchmark": "truthfulqa",
    "strategy": "topic",
    "n": 25,
    "accuracy": 0.96,
    "f1": 0.96,
    "latency_mean_ms": 15219.863440000001,
    "latency_std_ms": 5112.7528164652485,
    "agreement_rate": 0.8,
    "cpu_mean": 91.44800000000001,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 0.8774440575348792,
    "accuracy_ci_high": 1.0425559424651207
  },
  {
    "benchmark": "truthfulqa",
    "strategy": "debate",
    "n": 25,
    "accuracy": 0.6,
    "f1": 0.6,
    "latency_mean_ms": 57227.058840000005,
    "latency_std_ms": 10362.60496680142,
    "agreement_rate": 0.8,
    "cpu_mean": 98.40800000000002,
    "gpu_util_mean": 0.0,
    "accuracy_ci_low": 0.39361014383719795,
    "accuracy_ci_high": 0.806389856162802
  }
]