Control Panel
Runs

Runs

Trajectory-first run records (backend: supabase). Click a run for replay + artifacts.

7 results
Run IDStatusCompositeModeBenchmarkTasksCreated
run-supabase-probe-1771761920success1.000localprobe-benchmark-v1n/a2/22/2026, 12:05:20 PM
run-persist-suite-check-001success0.988autobenchmark-v122/22/2026, 5:38:19 AM
run-persist-check-001success0.988autobenchmark-v1n/a2/22/2026, 5:38:18 AM
run-final-evaluator-suite-001success0.988autobenchmark-v122/22/2026, 5:32:37 AM
run-evaluator-suite-001success0.988autobenchmark-v122/22/2026, 5:30:58 AM
run-store-test-002success0.988autobenchmark-v1n/a2/22/2026, 5:26:45 AM
run-store-test-001error0.015autobenchmark-v1n/a2/22/2026, 5:25:41 AM
Showing 1-7 of 7
Raw JSON
[
  {
    "run_id": "run-supabase-probe-1771761920",
    "status": "success",
    "composite_score": 1,
    "execution_mode": "local",
    "benchmark_version_id": "probe-benchmark-v1",
    "task_count": null,
    "created_at": "2026-02-22T12:05:20Z"
  },
  {
    "run_id": "run-persist-suite-check-001",
    "status": "success",
    "composite_score": 0.9875,
    "execution_mode": "auto",
    "benchmark_version_id": "benchmark-v1",
    "task_count": 2,
    "created_at": "2026-02-22T05:38:19Z"
  },
  {
    "run_id": "run-persist-check-001",
    "status": "success",
    "composite_score": 0.9875,
    "execution_mode": "auto",
    "benchmark_version_id": "benchmark-v1",
    "task_count": null,
    "created_at": "2026-02-22T05:38:18Z"
  },
  {
    "run_id": "run-final-evaluator-suite-001",
    "status": "success",
    "composite_score": 0.9875,
    "execution_mode": "auto",
    "benchmark_version_id": "benchmark-v1",
    "task_count": 2,
    "created_at": "2026-02-22T05:32:37Z"
  },
  {
    "run_id": "run-evaluator-suite-001",
    "status": "success",
    "composite_score": 0.9875,
    "execution_mode": "auto",
    "benchmark_version_id": "benchmark-v1",
    "task_count": 2,
    "created_at": "2026-02-22T05:30:58Z"
  },
  {
    "run_id": "run-store-test-002",
    "status": "success",
    "composite_score": 0.9875,
    "execution_mode": "auto",
    "benchmark_version_id": "benchmark-v1",
    "task_count": null,
    "created_at": "2026-02-22T05:26:45Z"
  },
  {
    "run_id": "run-store-test-001",
    "status": "error",
    "composite_score": 0.015,
    "execution_mode": "auto",
    "benchmark_version_id": "benchmark-v1",
    "task_count": null,
    "created_at": "2026-02-22T05:25:41Z"
  }
]