Runs
Trajectory-first run records (backend: supabase). Click a run for replay + artifacts.
7 results
| Run ID | Status | Composite | Mode | Benchmark | Tasks | Created |
|---|---|---|---|---|---|---|
| run-supabase-probe-1771761920 | success | 1.000 | local | probe-benchmark-v1 | n/a | 2/22/2026, 12:05:20 PM |
| run-persist-suite-check-001 | success | 0.988 | auto | benchmark-v1 | 2 | 2/22/2026, 5:38:19 AM |
| run-persist-check-001 | success | 0.988 | auto | benchmark-v1 | n/a | 2/22/2026, 5:38:18 AM |
| run-final-evaluator-suite-001 | success | 0.988 | auto | benchmark-v1 | 2 | 2/22/2026, 5:32:37 AM |
| run-evaluator-suite-001 | success | 0.988 | auto | benchmark-v1 | 2 | 2/22/2026, 5:30:58 AM |
| run-store-test-002 | success | 0.988 | auto | benchmark-v1 | n/a | 2/22/2026, 5:26:45 AM |
| run-store-test-001 | error | 0.015 | auto | benchmark-v1 | n/a | 2/22/2026, 5:25:41 AM |
Showing 1-7 of 7
Raw JSON
[
{
"run_id": "run-supabase-probe-1771761920",
"status": "success",
"composite_score": 1,
"execution_mode": "local",
"benchmark_version_id": "probe-benchmark-v1",
"task_count": null,
"created_at": "2026-02-22T12:05:20Z"
},
{
"run_id": "run-persist-suite-check-001",
"status": "success",
"composite_score": 0.9875,
"execution_mode": "auto",
"benchmark_version_id": "benchmark-v1",
"task_count": 2,
"created_at": "2026-02-22T05:38:19Z"
},
{
"run_id": "run-persist-check-001",
"status": "success",
"composite_score": 0.9875,
"execution_mode": "auto",
"benchmark_version_id": "benchmark-v1",
"task_count": null,
"created_at": "2026-02-22T05:38:18Z"
},
{
"run_id": "run-final-evaluator-suite-001",
"status": "success",
"composite_score": 0.9875,
"execution_mode": "auto",
"benchmark_version_id": "benchmark-v1",
"task_count": 2,
"created_at": "2026-02-22T05:32:37Z"
},
{
"run_id": "run-evaluator-suite-001",
"status": "success",
"composite_score": 0.9875,
"execution_mode": "auto",
"benchmark_version_id": "benchmark-v1",
"task_count": 2,
"created_at": "2026-02-22T05:30:58Z"
},
{
"run_id": "run-store-test-002",
"status": "success",
"composite_score": 0.9875,
"execution_mode": "auto",
"benchmark_version_id": "benchmark-v1",
"task_count": null,
"created_at": "2026-02-22T05:26:45Z"
},
{
"run_id": "run-store-test-001",
"status": "error",
"composite_score": 0.015,
"execution_mode": "auto",
"benchmark_version_id": "benchmark-v1",
"task_count": null,
"created_at": "2026-02-22T05:25:41Z"
}
]