Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 19 additions & 12 deletions packages/console/app/src/routes/bench/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,25 @@ async function getBenchmarks() {
const rows = await Database.use((tx) =>
tx.select().from(BenchmarkTable).orderBy(desc(BenchmarkTable.timeCreated)).limit(100),
)
return rows.map((row) => {
const parsed = JSON.parse(row.result) as BenchmarkResult
const taskScores: Record<string, number> = {}
for (const t of parsed.tasks) {
taskScores[t.task.id] = t.averageScore
}
return {
id: row.id,
agent: row.agent,
model: row.model,
averageScore: parsed.averageScore,
taskScores,
return rows.flatMap((row) => {
try {
const parsed = JSON.parse(row.result) as BenchmarkResult
const taskScores: Record<string, number> = {}
for (const t of parsed.tasks) {
taskScores[t.task.id] = t.averageScore
}
return [
{
id: row.id,
agent: row.agent,
model: row.model,
averageScore: parsed.averageScore,
taskScores,
},
]
} catch (error) {
console.error(`Failed to parse benchmark result for row ${row.id}:`, error)
return []
}
})
}
Expand Down
Loading