diff --git a/skills/analysis/home-security-benchmark/scripts/generate-report.cjs b/skills/analysis/home-security-benchmark/scripts/generate-report.cjs
index cb44bc41..d5dda66d 100644
--- a/skills/analysis/home-security-benchmark/scripts/generate-report.cjs
+++ b/skills/analysis/home-security-benchmark/scripts/generate-report.cjs
@@ -61,8 +61,9 @@ function generateReport(resultsDir = RESULTS_DIR, opts = {}) {
}).filter(r => r.data);
// Load fixture images for Vision tab (base64)
+ // Skip in live mode — saves ~43MB of base64 per regeneration, making per-test updates instant
const fixtureImages = {};
- if (fs.existsSync(FIXTURES_DIR)) {
+ if (!liveMode && fs.existsSync(FIXTURES_DIR)) {
try {
const frames = fs.readdirSync(FIXTURES_DIR).filter(f => /\.(png|jpg|jpeg)$/i.test(f));
for (const f of frames) {
@@ -131,8 +132,8 @@ function buildHTML(allResults, fixtureImages, { liveMode = false, liveStatus = n
const fixtureJSON = JSON.stringify(fixtureImages);
- // Live mode: auto-refresh meta tag
- const refreshMeta = liveMode ? '' : '';
+ // Live mode: JS-based reload (stateful, preserves active tab + scroll)
+ const refreshMeta = '';
const liveBannerHTML = liveMode ? buildLiveBanner(liveStatus) : '';
return `
@@ -434,7 +435,7 @@ function buildSidebar() {
let html = '';
for (const [family, runs] of Object.entries(groups)) {
html += '
';
- html += '
▾ ' + esc(family) + ' (' + runs.length + ')
';
+ html += '
▾ ' + esc(family) + ' (' + runs.length + ')
';
html += '
';
for (const r of runs.reverse()) {
const sel = selectedIndices.has(r._idx);
@@ -508,6 +509,14 @@ function renderPerformance() {
html += statCard('Server Decode', fmt(srvDecode), 'tok/s', 'From llama-server /metrics');
html += statCard('Total Time', fmt(totalTime / 1000), 's', run.total + ' tests');
html += statCard('Throughput', fmt(tokPerSec), 'tok/s', fmtK(run.tokens || 0) + ' total tokens');
+
+ // GPU & Memory cards (from resource samples)
+ const res = perf?.resource;
+ if (res) {
+ html += statCard('GPU Utilization', res.gpu ? res.gpu.util + '' : '—', '%', res.gpu ? 'Renderer: ' + res.gpu.renderer + '% · Tiler: ' + res.gpu.tiler + '%' : 'MPS not available');
+ html += statCard('GPU Memory', res.gpu?.memUsedGB != null ? fmt(res.gpu.memUsedGB) : '—', 'GB', res.gpu?.memAllocGB != null ? 'Alloc: ' + fmt(res.gpu.memAllocGB) + ' GB' : 'MPS not available');
+ html += statCard('System Memory', fmt(res.sys?.usedGB), 'GB', 'of ' + fmt(res.sys?.totalGB) + ' GB total · Free: ' + fmt(res.sys?.freeGB) + ' GB');
+ }
html += '
';
// Comparison table if multiple selected
@@ -611,7 +620,36 @@ function renderQuality() {
// Multi-run comparison
if (sel.length > 1) {
- html += '
Quality Comparison
';
+ // High-level summary comparison
+ html += '
Overall Comparison
';
+ html += '
| Metric | ';
+ for (const r of sel) html += '' + esc(modelShort(r.model)) + ' ' + shortDate(r.timestamp) + ' | ';
+ html += '
';
+ const hasVlm = sel.some(r => r.vlmTotal > 0);
+ const hiRows = [
+ ['Pass Rate', r => r.total > 0 ? pct(r.passed, r.total) + '%' : '—'],
+ ['Score', r => r.passed + '/' + r.total],
+ ['LLM Score', r => r.llmTotal > 0 ? (r.llmPassed || 0) + '/' + (r.llmTotal || 0) : '—'],
+ ...(hasVlm ? [['VLM Score', r => r.vlmTotal > 0 ? (r.vlmPassed || 0) + '/' + (r.vlmTotal || 0) : '—']] : []),
+ ['Failed', r => String(r.failed)],
+ ['Time', r => fmt(r.timeMs / 1000) + 's'],
+ ['Throughput', r => r.timeMs > 0 && r.tokens ? fmt(r.tokens / (r.timeMs / 1000)) + ' tok/s' : '—'],
+ ];
+ for (const [label, fn] of hiRows) {
+ html += '| ' + label + ' | ';
+ // Find best value for highlighting
+ const vals = sel.map(fn);
+ for (let i = 0; i < sel.length; i++) {
+ const isBest = label === 'Failed' ? vals[i] === String(Math.min(...sel.map(r => r.failed))) :
+ label === 'Pass Rate' ? vals[i] === pct(Math.max(...sel.map(r => r.passed)), sel[0].total) + '%' : false;
+ html += ' 1 ? ' style="color:var(--green);font-weight:600"' : '') + '>' + vals[i] + ' | ';
+ }
+ html += '
';
+ }
+ html += '
';
+
+ // Per-suite breakdown
+ html += '
Suite Comparison
';
html += '
| Suite | ';
for (const r of sel) html += '' + esc(modelShort(r.model)) + ' | ';
html += '
';
@@ -823,9 +861,15 @@ function getActiveTab() {
function renderActiveTab() {
const tab = getActiveTab();
- if (tab === 'performance') renderPerformance();
- else if (tab === 'quality') renderQuality();
- else if (tab === 'vision') renderVision();
+ try {
+ if (tab === 'performance') renderPerformance();
+ else if (tab === 'quality') renderQuality();
+ else if (tab === 'vision') renderVision();
+ } catch (e) {
+ const panel = document.getElementById('tab-' + tab);
+ if (panel) panel.innerHTML = 'Render error: ' + e.message + '
' + e.stack + '
';
+ console.error('Tab render error:', e);
+ }
}
// ═══════════════════════════════════════════════════════════════════════════════
@@ -837,6 +881,52 @@ function refresh() {
renderActiveTab();
}
+// ═══════════════════════════════════════════════════════════════════════════════
+// LIVE RELOAD (stateful — preserves tab + scroll)
+// ═══════════════════════════════════════════════════════════════════════════════
+const IS_LIVE = ${liveMode ? 'true' : 'false'};
+
+function saveState() {
+ try {
+ sessionStorage.setItem('_bench_tab', getActiveTab());
+ sessionStorage.setItem('_bench_scroll', String(window.scrollY));
+ sessionStorage.setItem('_bench_selected', JSON.stringify([...selectedIndices]));
+ sessionStorage.setItem('_bench_primary', String(primaryIndex));
+ } catch {}
+}
+
+function restoreState() {
+ try {
+ // Restore selection
+ const savedSel = sessionStorage.getItem('_bench_selected');
+ if (savedSel) {
+ const arr = JSON.parse(savedSel).filter(i => i >= 0 && i < ALL_RUNS.length);
+ if (arr.length > 0) { selectedIndices = new Set(arr); }
+ }
+ const savedPrimary = sessionStorage.getItem('_bench_primary');
+ if (savedPrimary != null) {
+ const pi = parseInt(savedPrimary);
+ if (pi >= 0 && pi < ALL_RUNS.length) primaryIndex = pi;
+ }
+ // Restore tab
+ const tab = sessionStorage.getItem('_bench_tab');
+ if (tab && tab !== 'performance') {
+ document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
+ document.querySelectorAll('.tab-panel').forEach(p => p.classList.remove('active'));
+ const tabEl = document.querySelector('.tab[data-tab="' + tab + '"]');
+ if (tabEl) tabEl.classList.add('active');
+ const panel = document.getElementById('tab-' + tab);
+ if (panel) panel.classList.add('active');
+ }
+ const scroll = parseInt(sessionStorage.getItem('_bench_scroll') || '0');
+ if (scroll > 0) setTimeout(() => window.scrollTo(0, scroll), 50);
+ } catch {}
+}
+
+if (IS_LIVE) {
+ setTimeout(() => { saveState(); location.reload(); }, 5000);
+}
+
// ═══════════════════════════════════════════════════════════════════════════════
// INIT
// ═══════════════════════════════════════════════════════════════════════════════
@@ -846,6 +936,7 @@ document.getElementById('btn-compare').addEventListener('click', () => {
if (selectedIndices.size > 1) renderActiveTab();
});
+restoreState();
refresh();