diff --git a/examples/benchmark-react/README.md b/examples/benchmark-react/README.md index 6dcd2d2a2995..234b462e90c5 100644 --- a/examples/benchmark-react/README.md +++ b/examples/benchmark-react/README.md @@ -13,8 +13,8 @@ The repo has two benchmark suites: - **What we measure:** Wall-clock time from triggering an action (e.g. `init(100)` or `updateUser('user0')`) until a MutationObserver detects the expected DOM change in the benchmark container. Optionally we also record React Profiler commit duration and, with `BENCH_TRACE=true`, Chrome trace duration. - **Why:** Normalized caching should show wins on shared-entity updates (one store write, many components update), ref stability (fewer new object references), and derived-view memoization (`Query` schema avoids re-sorting when entities haven't changed). See [js-framework-benchmark "How the duration is measured"](https://github.com/krausest/js-framework-benchmark/wiki/How-the-duration-is-measured) for a similar timeline-based approach. -- **Statistical:** Warmup runs are discarded; we report median and 95% CI (as percentage of median). Libraries are interleaved per round to reduce environmental variance. Each round runs multiple sub-iterations per page visit and reports the median, further reducing per-sample noise. The default is 5 sub-iterations; individual scenarios can override this via `opsPerRound` in `bench/scenarios.ts` (e.g. `update-entity-sorted` uses 9, `list-detail-switch-10` uses 5). -- **No CPU throttling:** Runs at native speed with more samples for statistical significance rather than artificial slowdown. Small (cheap) scenarios use 2 warmup + up to 12 measurement rounds locally; large (expensive) scenarios use 1 warmup + up to 6 measurement rounds. Early stopping triggers when 95% CI margin drops below the target percentage. +- **Statistical:** Warmup runs are discarded; we report median and 95% CI (as percentage of median). Timing scenarios (navigation and mutation) use **convergent mode**: a single page load per scenario, with warmup iterations followed by adaptive measurement iterations where each iteration produces one sample and convergence is checked inline. This eliminates page-reload overhead between samples for faster, lower-variance results. Deterministic scenarios (ref-stability) run once. Memory scenarios use a separate outer loop with a fresh page per round. +- **No CPU throttling:** Runs at native speed with more samples for statistical significance rather than artificial slowdown. Convergent timing scenarios use 5 warmup + up to 50 measurement iterations (small) or 3 warmup + up to 40 (large). Early stopping triggers when 95% CI margin drops below the target percentage. ## Scenario categories @@ -55,10 +55,10 @@ Illustrative **relative** results with **baseline = 100%** (plain React useState | Category | Scenarios (representative) | data-client | tanstack-query | swr | baseline | |---|---|---:|---:|---:|---:| -| Navigation | `getlist-100`, `getlist-500`, `getlist-500-sorted` | ~97% | ~100% | ~100% | **100%** | -| Navigation | `list-detail-switch-10` | **~1652%** | ~231% | ~230% | 100% | -| Mutations | `update-entity`, `update-user`, `update-entity-sorted`, `update-entity-multi-view`, `unshift-item`, `delete-item`, `move-item` | **~6994%** | ~97% | ~99% | 100% | -| Scaling (10k items) | `update-user-10000` | **~9713%** | ~94% | ~100% | 100% | +| Navigation | `getlist-100`, `getlist-500`, `getlist-500-sorted` | ~98% | ~99% | ~99% | **100%** | +| Navigation | `list-detail-switch-10` | **~2381%** | ~225% | ~218% | 100% | +| Mutations | `update-entity`, `update-user`, `update-entity-sorted`, `update-entity-multi-view`, `unshift-item`, `delete-item`, `move-item` | **~8672%** | ~97% | ~99% | 100% | +| Scaling (10k items) | `update-user-10000` | **~9290%** | ~96% | ~100% | 100% | ## Latest measured results (network simulation on) @@ -70,19 +70,19 @@ Run: **2026-03-22**, Linux (WSL2), `yarn build:benchmark-react`, static preview | Scenario | data-client | tanstack-query | swr | baseline | |---|---:|---:|---:|---:| | **Navigation** | | | | | -| `getlist-100` | 20.16 ± 0.7% | 20.58 ± 0.8% | 20.58 ± 0.8% | 20.58 ± 0.0% | -| `getlist-500` | 12.05 ± 0.9% | 12.55 ± 0.0% | 12.61 ± 0.9% | 12.69 ± 1.4% | -| `getlist-500-sorted` | 12.56 ± 1.4% | 12.72 ± 0.5% | 12.79 ± 0.9% | 12.80 ± 1.4% | -| `list-detail-switch-10` | 12.06 ± 12.5% | 1.69 ± 1.0% | 1.68 ± 1.1% | 0.73 ± 0.1% | +| `getlist-100` | 20.45 ± 2.3% | 20.62 ± 0.8% | 20.73 ± 0.2% | 20.73 ± 0.5% | +| `getlist-500` | 12.53 ± 2.8% | 12.80 ± 0.2% | 12.71 ± 0.3% | 12.84 ± 0.2% | +| `getlist-500-sorted` | 12.92 ± 5.1% | 12.93 ± 1.1% | 12.90 ± 0.7% | 13.16 ± 3.6% | +| `list-detail-switch-10` | 17.38 ± 8.7% | 1.64 ± 1.7% | 1.59 ± 1.4% | 0.73 ± 0.1% | | **Mutations** | | | | | -| `update-entity` | 555.56 ± 8.4% | 6.99 ± 0.3% | 6.99 ± 0.3% | 7.17 ± 0.3% | -| `update-user` | 571.90 ± 12.8% | 6.94 ± 0.5% | 7.18 ± 0.0% | 7.16 ± 0.0% | -| `update-entity-sorted` | 588.24 ± 8.0% | 7.10 ± 0.3% | 7.09 ± 0.4% | 7.28 ± 0.0% | -| `update-entity-multi-view` | 555.56 ± 0.0% | 7.06 ± 0.3% | 7.08 ± 0.3% | 7.26 ± 0.2% | -| `update-user-10000` | 151.52 ± 10.8% | 1.46 ± 0.5% | 1.56 ± 0.2% | 1.56 ± 1.3% | -| `unshift-item` | 425.72 ± 5.0% | 6.90 ± 0.1% | 7.13 ± 0.3% | 7.14 ± 0.3% | -| `delete-item` | 526.32 ± 7.2% | 6.89 ± 0.3% | 7.13 ± 0.5% | 7.12 ± 1.0% | -| `move-item` | 285.71 ± 4.0% | 6.55 ± 0.5% | 6.99 ± 0.5% | 6.92 ± 0.8% | +| `update-entity` | 666.67 ± 9.0% | 6.98 ± 0.4% | 7.09 ± 0.4% | 7.23 ± 0.8% | +| `update-user` | 801.28 ± 9.4% | 7.04 ± 0.5% | 7.18 ± 0.1% | 7.24 ± 1.3% | +| `update-entity-sorted` | 625.00 ± 10.8% | 7.10 ± 0.0% | 7.10 ± 1.2% | 7.29 ± 0.9% | +| `update-entity-multi-view` | 645.83 ± 7.6% | 7.14 ± 0.2% | 7.16 ± 0.1% | 7.29 ± 0.3% | +| `update-user-10000` | 144.93 ± 1.7% | 1.49 ± 0.6% | 1.56 ± 1.7% | 1.56 ± 1.5% | +| `unshift-item` | 465.37 ± 3.6% | 6.90 ± 0.4% | 7.18 ± 0.2% | 7.21 ± 0.3% | +| `delete-item` | 833.33 ± 6.0% | 6.93 ± 0.1% | 7.17 ± 0.7% | 7.19 ± 0.7% | +| `move-item` | 333.33 ± 8.9% | 6.76 ± 0.6% | 6.99 ± 0.3% | 6.97 ± 0.2% | [Measured on a Ryzen 9 7950X; 64 GB RAM; Ubuntu (WSL2); Node 24.12.0; Chromium (Playwright)] @@ -189,11 +189,12 @@ Regressions >5% on stable scenarios or >15% on volatile scenarios are worth inve Scenarios are classified as `small` or `large` based on their cost: - - **Small** (2 warmup + 3–12 measurement): `getlist-100`, `update-entity`, `ref-stability-*`, `invalidate-and-resolve`, `unshift-item`, `delete-item` - - **Large** (1 warmup + 3–6 measurement): `getlist-500`, `getlist-500-sorted`, `update-user`, `update-user-10000`, `update-entity-sorted`, `update-entity-multi-view`, `list-detail-switch-10` - - **Memory** (opt-in, 1 warmup + 3 measurement): `memory-mount-unmount-cycle` — run with `--action memory` + - **Small** (convergent: 5 warmup + 5–50 measurement iterations): `getlist-100`, `update-entity`, `invalidate-and-resolve`, `unshift-item`, `delete-item` + - **Small** (deterministic, single run): `ref-stability-*` + - **Large** (convergent: 3 warmup + 5–40 measurement iterations): `getlist-500`, `getlist-500-sorted`, `update-user`, `update-user-10000`, `update-entity-sorted`, `update-entity-multi-view`, `list-detail-switch-10` + - **Memory** (opt-in, 1 warmup + 3 measurement rounds): `memory-mount-unmount-cycle` — run with `--action memory` - When running all scenarios (`yarn bench`), each group runs with its own warmup/measurement count. Use `--size` to run only one group. + Timing scenarios use convergent mode (single page load, inline convergence per scenario). Each group uses its own warmup/measurement config. Use `--size` to run only one group. ## Output diff --git a/examples/benchmark-react/bench/runner.ts b/examples/benchmark-react/bench/runner.ts index 88cd0ee96f19..e32912e0e1c1 100644 --- a/examples/benchmark-react/bench/runner.ts +++ b/examples/benchmark-react/bench/runner.ts @@ -1,6 +1,6 @@ /// import { chromium } from 'playwright'; -import type { Browser, CDPSession, Page } from 'playwright'; +import type { Browser, CDPSession, Locator, Page } from 'playwright'; import { collectMeasures, getMeasureDuration } from './measure.js'; import { collectHeapUsed } from './memory.js'; @@ -9,9 +9,11 @@ import { SCENARIOS, LIBRARIES, RUN_CONFIG, + CONVERGENT_CONFIG, ACTION_GROUPS, NETWORK_SIM_CONFIG, } from './scenarios.js'; +import type { ConvergentProfile } from './scenarios.js'; import { computeStats, isConverged } from './stats.js'; import { parseTraceDuration } from './tracing.js'; import type { Scenario, ScenarioSize } from '../src/shared/types.js'; @@ -159,7 +161,7 @@ interface ScenarioSamples { } // --------------------------------------------------------------------------- -// Scenario runner +// Scenario classification and helpers // --------------------------------------------------------------------------- const REF_STABILITY_METRICS = ['issueRefChanged', 'userRefChanged'] as const; @@ -173,14 +175,34 @@ function isRefStabilityScenario(scenario: Scenario): scenario is Scenario & { ); } -async function runScenario( +function isConvergentScenario(scenario: Scenario): boolean { + return ( + !scenario.deterministic && + scenario.category !== 'memory' && + scenario.resultMetric !== 'heapDelta' && + !isRefStabilityScenario(scenario) + ); +} + +function classifyAction(scenario: Scenario): { + isMountLike: boolean; + isUpdate: boolean; +} { + const isMountLike = + scenario.action === 'init' || + scenario.action === 'mountSortedView' || + scenario.action === 'initDoubleList' || + scenario.action === 'listDetailSwitch'; + return { isMountLike, isUpdate: !isMountLike }; +} + +async function setupBenchPage( page: Page, lib: string, scenario: Scenario, networkSim: boolean, -): Promise { - const appPath = `/${lib}/`; - await page.goto(`${BASE_URL}${appPath}`, { +): Promise<{ harness: Locator; bench: any }> { + await page.goto(`${BASE_URL}/${lib}/`, { waitUntil: 'networkidle', timeout: 120000, }); @@ -198,8 +220,8 @@ async function runScenario( if (networkSim) { await (bench as any).evaluate( - (api: any, config: { baseLatencyMs: number; recordsPerMs: number }) => - api.setNetworkSim(config), + (api: any, cfg: { baseLatencyMs: number; recordsPerMs: number }) => + api.setNetworkSim(cfg), NETWORK_SIM_CONFIG, ); } @@ -211,6 +233,183 @@ async function runScenario( ); } + return { harness, bench }; +} + +async function runPreMount( + page: Page, + harness: Locator, + bench: any, + scenario: Scenario, + networkSim: boolean, +): Promise { + const preMountAction = scenario.preMountAction ?? 'init'; + const mountCount = scenario.mountCount ?? 100; + await harness.evaluate(el => { + el.removeAttribute('data-bench-complete'); + el.removeAttribute('data-bench-timeout'); + }); + await (bench as any).evaluate( + (api: any, [action, n]: [string, number]) => api[action](n), + [preMountAction, mountCount], + ); + const preMountTimeout = networkSim ? 60000 : 10000; + await page.waitForSelector('[data-bench-complete]', { + timeout: preMountTimeout, + state: 'attached', + }); + const preMountTimedOut = await harness.evaluate(el => + el.hasAttribute('data-bench-timeout'), + ); + if (preMountTimedOut) { + throw new Error( + `Harness timeout during pre-mount (${preMountAction}): did not complete within 30 s`, + ); + } + await page.evaluate(() => { + performance.clearMarks(); + performance.clearMeasures(); + }); +} + +/** Run one timed iteration: cleanup, invoke action, wait, collect measures. */ +async function runIteration( + page: Page, + harness: Locator, + bench: any, + scenario: Scenario, + opts: { + isMountLike: boolean; + mountCount: number; + networkSim: boolean; + subIdx: number; + shouldTrace: boolean; + }, +): Promise<{ duration: number; reactCommit: number; traceDuration?: number }> { + const { isMountLike, mountCount, networkSim, subIdx, shouldTrace } = opts; + + if (subIdx > 0) { + if (isMountLike) { + await (bench as any).evaluate((api: any) => api.unmountAll()); + await page + .waitForSelector('[data-bench-item], [data-sorted-list]', { + state: 'detached', + timeout: 10000, + }) + .catch(() => {}); + await (bench as any).evaluate((api: any) => { + if (api.resetStore) api.resetStore(); + }); + await page.evaluate( + () => + new Promise(r => + requestAnimationFrame(() => requestAnimationFrame(() => r())), + ), + ); + } else { + await (bench as any).evaluate((api: any) => api.flushPendingMutations()); + await page.evaluate( + () => + new Promise(r => + requestAnimationFrame(() => requestAnimationFrame(() => r())), + ), + ); + } + } + + await page.evaluate(() => { + performance.clearMarks(); + performance.clearMeasures(); + }); + await harness.evaluate(el => { + el.removeAttribute('data-bench-complete'); + el.removeAttribute('data-bench-timeout'); + }); + + let cdpTracing: CDPSession | undefined; + const traceChunks: object[] = []; + if (shouldTrace) { + cdpTracing = await page.context().newCDPSession(page); + cdpTracing.on('Tracing.dataCollected', (params: { value: object[] }) => { + traceChunks.push(...params.value); + }); + await cdpTracing.send('Tracing.start', { + categories: 'devtools.timeline,blink', + }); + } + + const actionArgs = + scenario.action === 'deleteEntity' ? + [Math.min(subIdx + 1, mountCount)] + : scenario.args; + await (bench as any).evaluate( + (api: any, { action, args }: { action: string; args: unknown[] }) => { + api[action](...args); + }, + { action: scenario.action, args: actionArgs }, + ); + + const completeTimeout = networkSim ? 60000 : 10000; + await page.waitForSelector('[data-bench-complete]', { + timeout: completeTimeout, + state: 'attached', + }); + const timedOut = await harness.evaluate(el => + el.hasAttribute('data-bench-timeout'), + ); + if (timedOut) { + throw new Error( + `Harness timeout: MutationObserver did not detect expected DOM update within 30 s`, + ); + } + + await (bench as any).evaluate((api: any) => api.flushPendingMutations()); + + let traceDuration: number | undefined; + if (shouldTrace && cdpTracing) { + try { + const done = new Promise(resolve => { + cdpTracing!.on('Tracing.tracingComplete', () => resolve()); + }); + await cdpTracing.send('Tracing.end'); + await done; + const traceJson = + '[\n' + traceChunks.map(e => JSON.stringify(e)).join(',\n') + '\n]'; + traceDuration = parseTraceDuration(Buffer.from(traceJson)); + } catch { + traceDuration = undefined; + } finally { + await cdpTracing.detach().catch(() => {}); + } + } + + const measures = await collectMeasures(page); + const duration = + isMountLike ? + getMeasureDuration(measures, 'mount-duration') + : getMeasureDuration(measures, 'update-duration'); + const reactCommit = getMeasureDuration(measures, 'react-commit-update'); + + return { duration, reactCommit, traceDuration }; +} + +// --------------------------------------------------------------------------- +// Scenario runner (memory, ref-stability, and legacy inner loop) +// --------------------------------------------------------------------------- + +async function runScenario( + page: Page, + lib: string, + scenario: Scenario, + networkSim: boolean, +): Promise { + const { harness, bench } = await setupBenchPage( + page, + lib, + scenario, + networkSim, + ); + // --- Memory path (unchanged, always ops=1) --- const isMemory = scenario.action === 'mountUnmountCycle' && @@ -248,45 +447,12 @@ async function runScenario( return { value: heapAfter - heapBefore }; } - // --- Classify scenario --- - const isInit = scenario.action === 'init'; - const isMountLike = - isInit || - scenario.action === 'mountSortedView' || - scenario.action === 'initDoubleList' || - scenario.action === 'listDetailSwitch'; - const isUpdate = !isMountLike; + const { isMountLike, isUpdate } = classifyAction(scenario); const isRefStability = isRefStabilityScenario(scenario); - - // --- Pre-mount for update/ref-stability scenarios (once) --- const mountCount = scenario.mountCount ?? 100; + if (isUpdate || isRefStability) { - const preMountAction = scenario.preMountAction ?? 'init'; - await harness.evaluate(el => { - el.removeAttribute('data-bench-complete'); - el.removeAttribute('data-bench-timeout'); - }); - await (bench as any).evaluate( - (api: any, [action, n]: [string, number]) => api[action](n), - [preMountAction, mountCount], - ); - const preMountTimeout = networkSim ? 60000 : 10000; - await page.waitForSelector('[data-bench-complete]', { - timeout: preMountTimeout, - state: 'attached', - }); - const preMountTimedOut = await harness.evaluate(el => - el.hasAttribute('data-bench-timeout'), - ); - if (preMountTimedOut) { - throw new Error( - `Harness timeout during pre-mount (${preMountAction}): did not complete within 30 s`, - ); - } - await page.evaluate(() => { - performance.clearMarks(); - performance.clearMeasures(); - }); + await runPreMount(page, harness, bench, scenario, networkSim); } // --- Ref stability (deterministic, single run, early return) --- @@ -338,117 +504,14 @@ async function runScenario( const traceSubIdx = Math.floor(ops / 2); for (let subIdx = 0; subIdx < ops; subIdx++) { - // Mount scenarios: unmount + detach + resetStore + waitForPaint (skip first iteration — nothing mounted yet) - if (isMountLike && subIdx > 0) { - await (bench as any).evaluate((api: any) => api.unmountAll()); - await page - .waitForSelector('[data-bench-item], [data-sorted-list]', { - state: 'detached', - timeout: 10000, - }) - .catch(() => {}); - await (bench as any).evaluate((api: any) => { - if (api.resetStore) api.resetStore(); - }); - await page.evaluate( - () => - new Promise(r => - requestAnimationFrame(() => requestAnimationFrame(() => r())), - ), - ); - } - - // Mutation scenarios: flush pending from prior sub-iteration + let React commit the resolution - if (isUpdate && subIdx > 0) { - await (bench as any).evaluate((api: any) => api.flushPendingMutations()); - await page.evaluate( - () => - new Promise(r => - requestAnimationFrame(() => requestAnimationFrame(() => r())), - ), - ); - } - - // Clear perf marks/measures + reset harness flags - await page.evaluate(() => { - performance.clearMarks(); - performance.clearMeasures(); - }); - await harness.evaluate(el => { - el.removeAttribute('data-bench-complete'); - el.removeAttribute('data-bench-timeout'); - }); - - // Chrome tracing: only for the middle sub-iteration const shouldTrace = USE_TRACE && subIdx === traceSubIdx; - let cdpTracing: CDPSession | undefined; - const traceChunks: object[] = []; - if (shouldTrace) { - cdpTracing = await page.context().newCDPSession(page); - cdpTracing.on('Tracing.dataCollected', (params: { value: object[] }) => { - traceChunks.push(...params.value); - }); - await cdpTracing.send('Tracing.start', { - categories: 'devtools.timeline,blink', - }); - } - - // Vary args for deleteEntity so each sub-iteration deletes a different item - const actionArgs = - scenario.action === 'deleteEntity' ? - [Math.min(subIdx + 1, mountCount)] - : scenario.args; - await (bench as any).evaluate( - (api: any, { action, args }: { action: string; args: unknown[] }) => { - api[action](...args); - }, - { action: scenario.action, args: actionArgs }, - ); - - // Wait for completion - const completeTimeout = networkSim ? 60000 : 10000; - await page.waitForSelector('[data-bench-complete]', { - timeout: completeTimeout, - state: 'attached', - }); - const timedOut = await harness.evaluate(el => - el.hasAttribute('data-bench-timeout'), + const { duration, reactCommit, traceDuration } = await runIteration( + page, + harness, + bench, + scenario, + { isMountLike, mountCount, networkSim, subIdx, shouldTrace }, ); - if (timedOut) { - throw new Error( - `Harness timeout: MutationObserver did not detect expected DOM update within 30 s`, - ); - } - - await (bench as any).evaluate((api: any) => api.flushPendingMutations()); - - // Collect trace - let traceDuration: number | undefined; - if (shouldTrace && cdpTracing) { - try { - const done = new Promise(resolve => { - cdpTracing!.on('Tracing.tracingComplete', () => resolve()); - }); - await cdpTracing.send('Tracing.end'); - await done; - const traceJson = - '[\n' + traceChunks.map(e => JSON.stringify(e)).join(',\n') + '\n]'; - traceDuration = parseTraceDuration(Buffer.from(traceJson)); - } catch { - traceDuration = undefined; - } finally { - await cdpTracing.detach().catch(() => {}); - } - } - - // Collect performance measures - const measures = await collectMeasures(page); - const duration = - isMountLike ? - getMeasureDuration(measures, 'mount-duration') - : getMeasureDuration(measures, 'update-duration'); - const reactCommit = getMeasureDuration(measures, 'react-commit-update'); - durations.push(duration); if (reactCommit > 0) commitTimes.push(reactCommit); if (traceDuration != null) traceDurations.push(traceDuration); @@ -463,6 +526,99 @@ async function runScenario( }; } +// --------------------------------------------------------------------------- +// Convergent scenario runner (single page load, inline stat-sig convergence) +// --------------------------------------------------------------------------- + +const CONVERGENT_GC_INTERVAL = 15; + +async function runScenarioConvergent( + page: Page, + lib: string, + scenario: Scenario, + networkSim: boolean, + config: ConvergentProfile, + cdp?: CDPSession, +): Promise { + const { harness, bench } = await setupBenchPage( + page, + lib, + scenario, + networkSim, + ); + const { isMountLike, isUpdate } = classifyAction(scenario); + const mountCount = scenario.mountCount ?? 100; + + if (isUpdate) { + await runPreMount(page, harness, bench, scenario, networkSim); + } + + const totalMax = config.warmup + config.maxMeasurement; + const results: ScenarioResult[] = []; + const measurementValues: number[] = []; + // Trace early in measurement window so early convergence doesn't skip it + const traceSubIdx = config.warmup + Math.min(1, config.minMeasurement - 1); + let convergedAt: number | undefined; + + for (let subIdx = 0; subIdx < totalMax; subIdx++) { + const isWarmup = subIdx < config.warmup; + const measureIdx = subIdx - config.warmup; + + // Periodic GC to prevent heap pressure accumulation on long runs + if (cdp && subIdx > 0 && subIdx % CONVERGENT_GC_INTERVAL === 0) { + try { + await cdp.send('HeapProfiler.collectGarbage'); + } catch {} + await page.waitForTimeout(50); + } + + const shouldTrace = USE_TRACE && subIdx === traceSubIdx; + const { duration, reactCommit, traceDuration } = await runIteration( + page, + harness, + bench, + scenario, + { isMountLike, mountCount, networkSim, subIdx, shouldTrace }, + ); + + if (isWarmup) continue; + + measurementValues.push(duration); + results.push({ + value: duration, + reactCommit: reactCommit > 0 ? reactCommit : undefined, + traceDuration, + }); + + if ( + measureIdx + 1 >= config.minMeasurement && + isConverged( + measurementValues, + 0, + config.targetMarginPct, + config.minMeasurement, + ) + ) { + convergedAt = measureIdx + 1; + break; + } + } + + await bench.dispose(); + + if (convergedAt != null) { + process.stderr.write( + ` [converged] ${scenario.name} after ${convergedAt} measurements\n`, + ); + } else { + process.stderr.write( + ` [max reached] ${scenario.name} after ${config.maxMeasurement} measurements\n`, + ); + } + + return results; +} + // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- @@ -517,6 +673,7 @@ function recordResult( function warmupCount(scenario: Scenario): number { if (scenario.deterministic) return 0; if (scenario.category === 'memory') return MEMORY_WARMUP; + if (isConvergentScenario(scenario)) return 0; return RUN_CONFIG[scenario.size ?? 'small'].warmup; } @@ -640,14 +797,68 @@ async function main() { for (const s of deterministicScenarios) deterministicNames.add(s.name); } - // Adaptive-convergence rounds per size group + // Convergent timing scenarios: single page load with inline convergence + for (const [size, scenarios] of sizeGroups) { + const convergentScenarios = scenarios.filter( + s => !deterministicNames.has(s.name) && isConvergentScenario(s), + ); + if (convergentScenarios.length === 0) continue; + + const config = CONVERGENT_CONFIG[size]; + process.stderr.write( + `\n── ${size} convergent (${convergentScenarios.length} scenarios, ${config.warmup} warmup + up to ${config.maxMeasurement} measurements each) ──\n`, + ); + + for (const lib of libraries) { + const libScenarios = convergentScenarios.filter(s => + s.name.startsWith(`${lib}:`), + ); + if (libScenarios.length === 0) continue; + + const context = await browser.newContext(); + const page = await context.newPage(); + const cdp = await context.newCDPSession(page); + + for (const scenario of libScenarios) { + try { + await cdp.send('HeapProfiler.collectGarbage'); + } catch {} + await page.waitForTimeout(200); + + process.stderr.write(` ${scenario.name}...\n`); + try { + const results = await runScenarioConvergent( + page, + lib, + scenario, + networkSim, + config, + cdp, + ); + for (const result of results) { + recordResult(samples, scenario, result); + } + } catch (err) { + console.error( + ` ${scenario.name} FAILED:`, + err instanceof Error ? err.message : err, + ); + } + } + + await cdp.detach().catch(() => {}); + await context.close(); + } + } + + // Outer-loop adaptive rounds for any remaining non-convergent, non-deterministic scenarios for (const [size, scenarios] of sizeGroups) { const { warmup, minMeasurement, maxMeasurement, targetMarginPct } = RUN_CONFIG[size]; - const nonDeterministic = scenarios.filter( - s => !deterministicNames.has(s.name), + const outerLoopScenarios = scenarios.filter( + s => !deterministicNames.has(s.name) && !isConvergentScenario(s), ); - if (nonDeterministic.length === 0) continue; + if (outerLoopScenarios.length === 0) continue; const maxRounds = warmup + maxMeasurement; const converged = new Set(); @@ -657,11 +868,11 @@ async function main() { const phase = isMeasure ? 'measure' : 'warmup'; const phaseRound = isMeasure ? round - warmup + 1 : round + 1; const phaseTotal = isMeasure ? maxMeasurement : warmup; - const active = nonDeterministic.filter(s => !converged.has(s.name)); + const active = outerLoopScenarios.filter(s => !converged.has(s.name)); if (active.length === 0) break; process.stderr.write( - `\n── ${size} round ${round + 1}/${maxRounds} (${phase} ${phaseRound}/${phaseTotal}, ${active.length}/${nonDeterministic.length} active) ──\n`, + `\n── ${size} round ${round + 1}/${maxRounds} (${phase} ${phaseRound}/${phaseTotal}, ${active.length}/${outerLoopScenarios.length} active) ──\n`, ); await runRound(browser, active, libraries, networkSim, samples, { @@ -669,7 +880,6 @@ async function main() { showProgress: true, }); - // After each measurement round, check per-scenario convergence if (isMeasure) { for (const scenario of active) { if ( @@ -687,9 +897,9 @@ async function main() { ); } } - if (converged.size === nonDeterministic.length) { + if (converged.size === outerLoopScenarios.length) { process.stderr.write( - `\n── All ${size} scenarios converged, stopping early ──\n`, + `\n── All ${size} outer-loop scenarios converged, stopping early ──\n`, ); break; } diff --git a/examples/benchmark-react/bench/scenarios.ts b/examples/benchmark-react/bench/scenarios.ts index 2ce7df4f56ec..1968b53e790a 100644 --- a/examples/benchmark-react/bench/scenarios.ts +++ b/examples/benchmark-react/bench/scenarios.ts @@ -36,6 +36,30 @@ export const RUN_CONFIG: Record = { }, }; +/** Config for convergent (single-page) timing scenarios where each + * sub-iteration is an individual sample and convergence is checked inline. */ +export interface ConvergentProfile { + warmup: number; + minMeasurement: number; + maxMeasurement: number; + targetMarginPct: number; +} + +export const CONVERGENT_CONFIG: Record = { + small: { + warmup: 5, + minMeasurement: 5, + maxMeasurement: 50, + targetMarginPct: process.env.CI ? 5 : 8, + }, + large: { + warmup: 3, + minMeasurement: 5, + maxMeasurement: 40, + targetMarginPct: process.env.CI ? 8 : 12, + }, +}; + export const ACTION_GROUPS: Record = { mount: ['init', 'initDoubleList', 'mountSortedView', 'listDetailSwitch'], update: ['updateEntity', 'updateUser'],