erickirt · pull · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/.cursor/rules/benchmarking.mdc b/.cursor/rules/benchmarking.mdc
@@ -17,7 +17,7 @@ When working on **`packages/react`** or comparing data-client to other React dat
 - **Where it lives**: `examples/benchmark-react/`
 - **How to run**: From repo root: `yarn build:benchmark-react`, then `yarn workspace example-benchmark-react preview &` and in another terminal `cd examples/benchmark-react && yarn bench`
 - **What it measures**: Browser-based init/update duration, ref-stability counts, sorted-view (Query memoization), optional memory (heap delta), startup metrics (FCP/TBT), and React Profiler commit times. Compares data-client, TanStack Query, and SWR.
-- **CI**: `.github/workflows/benchmark-react.yml` runs on changes to `packages/react/src/**`, `packages/core/src/**`, `packages/endpoint/src/schemas/**`, `packages/normalizr/src/**`, or `examples/benchmark-react/**` and reports via `rhysd/github-action-benchmark` (customSmallerIsBetter). CI runs **data-client only** (hot-path scenarios) to track regressions; competitor libraries (TanStack Query, SWR) are for local comparison only.
+- **CI**: `.github/workflows/benchmark-react.yml` runs on changes to `packages/react/src/**`, `packages/core/src/**`, `packages/endpoint/src/schemas/**`, `packages/normalizr/src/**`, or `examples/benchmark-react/**` and reports via `rhysd/github-action-benchmark` (customBiggerIsBetter). CI runs **data-client only** (hot-path scenarios) to track regressions; competitor libraries (TanStack Query, SWR) are for local comparison only.
 - **Report viewer**: Open `examples/benchmark-react/bench/report-viewer.html` in a browser and paste `react-bench-output.json` to view a comparison table and charts. Toggle "React commit" and "Trace" filters. Use "Load history" for time-series.
 
 See `@examples/benchmark-react/README.md` for methodology, adding a new library, and interpreting results.

diff --git a/.github/workflows/benchmark-react.yml b/.github/workflows/benchmark-react.yml
@@ -60,7 +60,7 @@ jobs:
         uses: rhysd/github-action-benchmark@v1
         with:
           name: 'Benchmark React'
-          tool: 'customSmallerIsBetter'
+          tool: 'customBiggerIsBetter'
           output-file-path: examples/benchmark-react/react-bench-output.json
           github-token: "${{ secrets.GITHUB_TOKEN }}"
           gh-pages-branch: 'gh-pages-bench'
@@ -78,7 +78,7 @@ jobs:
         uses: rhysd/github-action-benchmark@v1
         with:
           name: 'Benchmark React'
-          tool: 'customSmallerIsBetter'
+          tool: 'customBiggerIsBetter'
           output-file-path: examples/benchmark-react/react-bench-output.json
           github-token: "${{ secrets.GITHUB_TOKEN }}"
           gh-pages-branch: 'gh-pages-bench'

diff --git a/examples/benchmark-react/README.md b/examples/benchmark-react/README.md
@@ -27,9 +27,9 @@ The repo has two benchmark suites:
 
 **Hot path (CI)**
 
-- **Get list** (`getlist-100`, `getlist-500`) — Time to show a ListView component that auto-fetches 100 or 500 issues from the list endpoint, then renders (unit: ms). Exercises the full fetch + normalization + render pipeline.
+- **Get list** (`getlist-100`, `getlist-500`) — Time to show a ListView component that auto-fetches 100 or 500 issues from the list endpoint, then renders (unit: ops/s). Exercises the full fetch + normalization + render pipeline.
 - **Get list sorted** (`getlist-500-sorted`) — Mount 500 issues through a sorted/derived view. data-client uses `useQuery(sortedIssuesQuery)` with `Query` schema memoization; competitors use `useMemo` + sort.
-- **Update entity** (`update-entity`) — Time to update one issue and propagate to the UI (unit: ms).
+- **Update entity** (`update-entity`) — Time to update one issue and propagate to the UI (unit: ops/s).
 - **Update entity sorted** (`update-entity-sorted`) — After mounting a sorted view, update one entity. data-client's `Query` memoization avoids re-sorting when sort keys are unchanged.
 - **Update entity multi-view** (`update-entity-multi-view`) — Update one issue that appears simultaneously in a list, a detail panel, and a pinned-cards strip. Exercises cross-query entity propagation: normalized cache updates once and all three views reflect the change; non-normalized libraries must invalidate and refetch each query independently.
 - **Update user (scaling)** (`update-user`, `update-user-10000`) — Update one shared user with 1,000 or 10,000 mounted issues to test subscriber scaling. Normalized cache: one store update, all views of that user update.
@@ -51,36 +51,40 @@ The repo has two benchmark suites:
 
 ## Expected results
 
-Illustrative **relative** results with **SWR = 100%** (baseline). For **duration** rows, each value is (library median ms ÷ SWR median ms) × 100 — **lower is faster**. For **ref-stability** rows, the same idea uses the “refs changed” count — **lower is fewer components that saw a new object reference**. Figures are rounded from the **Latest measured results** table below (network simulation on); absolute milliseconds will vary by machine, but **library-to-library ratios** are usually similar.
+Illustrative **relative** results with **baseline = 100%** (plain React useState/useEffect, no data library). For **throughput** rows, each value is (library ops/s ÷ baseline ops/s) × 100 — **higher is faster**. For **ref-stability** rows, the ratio uses the “refs changed” count — **lower is fewer components that saw a new object reference**. Figures are rounded from the **Latest measured results** table below (network simulation on); absolute ops/s will vary by machine, but **library-to-library ratios** are usually similar.
 
-| Category | Scenarios (representative) | data-client | tanstack-query | swr |
-|---|---|---:|---:|---:|
-| Navigation | `getlist-100`, `getlist-500`, `getlist-500-sorted` | ~103% | ~102% | **100%** |
-| Navigation | `list-detail-switch` | ~21% | ~102% | **100%** |
-| Mutations | `update-entity`, `update-user`, `update-entity-sorted`, `update-entity-multi-view`, `unshift-item`, `delete-item`, `move-item` | ~2% | ~102% | **100%** |
-| Scaling (10k items) | `update-user-10000` | ~5% | ~122% | **100%** |
+| Category | Scenarios (representative) | data-client | tanstack-query | swr | baseline |
+|---|---|---:|---:|---:|---:|
+| Navigation | `getlist-100`, `getlist-500`, `getlist-500-sorted` | ~96% | ~98% | ~99% | **100%** |
+| Navigation | `list-detail-switch-10` | **~949%** | ~199% | ~203% | 100% |
+| Mutations | `update-entity`, `update-user`, `update-entity-sorted`, `update-entity-multi-view`, `unshift-item`, `delete-item`, `move-item` | **~4486%** | ~96% | ~99% | 100% |
+| Scaling (10k items) | `update-user-10000` | **~2006%** | ~84% | ~103% | 100% |
 
 
 ## Latest measured results (network simulation on)
 
-Median per metric; range is approximate 95% CI margin from the runner (`stats.ts`). **Network simulation** applies the per-RPC delays in `bench/scenarios.ts` (`NETWORK_SIM_DELAYS`, e.g. `fetchIssueList` 80 ms, `updateUser` 50 ms) so list refetches after an author update pay extra latency compared to normalized propagation.
+Median ops/s per scenario; range is approximate 95% CI margin from the runner (`stats.ts`). **Network simulation** applies the per-RPC delays in `bench/scenarios.ts` (`NETWORK_SIM_DELAYS`, e.g. `fetchIssueList` 80 ms, `updateUser` 50 ms) so list refetches after an author update pay extra latency compared to normalized propagation.
 
 Run: **2026-03-21**, Linux (WSL2), `yarn build:benchmark-react`, static preview + `env -u CI npx tsx bench/runner.ts --network-sim true` (all libraries; memory scenarios not included). Numbers are **machine-specific**; use them for relative comparison between libraries, not as absolutes.
 
-| Scenario | Unit | data-client | tanstack-query | swr |
-|---|---|---:|---:|---:|
-| `getlist-100` | ms | 89.3 ± 0.22 | 88.7 ± 0.15 | 87.5 ± 0.50 |
-| `getlist-500` | ms | 102.3 ± 1.25 | 99.9 ± 1.25 | 98.4 ± 1.25 |
-| `getlist-500-sorted` | ms | 101.8 ± 1.61 | 99.2 ± 1.29 | 97.9 ± 0.63 |
-| `list-detail-switch` | ms | 144.4 ± 21.22 | 689.4 ± 20.83 | 674.5 ± 35.67 |
-| `update-entity` | ms | 2.8 ± 0.09 | 142.6 ± 0.31 | 142.4 ± 0.34 |
-| `update-user` | ms | 3.0 ± 0.13 | 142.7 ± 0.43 | 139.4 ± 0.51 |
-| `update-entity-sorted` | ms | 3.2 ± 0.24 | 141.3 ± 0.07 | 141.4 ± 0.56 |
-| `update-entity-multi-view` | ms | 2.8 ± 0.41 | 146.6 ± 7.25 | 145.3 ± 8.21 |
-| `update-user-10000` | ms | 10.3 ± 0.82 | 246.0 ± 1.35 | 201.2 ± 0.75 |
-| `unshift-item` | ms | 3.5 ± 0.06 | 144.5 ± 0.38 | 139.7 ± 0.07 |
-| `delete-item` | ms | 3.2 ± 0.10 | 144.4 ± 0.11 | 139.9 ± 0.11 |
-| `move-item` | ms | 3.5 ± 0.13 | 156.4 ± 0.50 | 146.4 ± 0.05 |
+| Scenario | data-client | tanstack-query | swr | baseline |
+|---|---:|---:|---:|---:|
+| **Navigation** | | | | |
+| `getlist-100` | 11.20 ± 0.03 | 11.27 ± 0.02 | 11.43 ± 0.07 | 11.55 ± 0.02 |
+| `getlist-500` | 9.78 ± 0.12 | 10.01 ± 0.13 | 10.16 ± 0.13 | 10.22 ± 0.07 |
+| `getlist-500-sorted` | 9.82 ± 0.16 | 10.08 ± 0.13 | 10.21 ± 0.07 | 10.29 ± 0.06 |
+| `list-detail-switch-10` | 6.93 ± 1.02 | 1.45 ± 0.04 | 1.48 ± 0.08 | 0.73 ± 0.00 |
+| **Mutations** | | | | |
+| `update-entity` | 357.14 ± 11.48 | 7.01 ± 0.02 | 7.02 ± 0.02 | 7.22 ± 0.00 |
+| `update-user` | 333.33 ± 14.44 | 7.01 ± 0.02 | 7.17 ± 0.03 | 7.22 ± 0.01 |
+| `update-entity-sorted` | 312.50 ± 23.44 | 7.08 ± 0.00 | 7.07 ± 0.03 | 7.28 ± 0.01 |
+| `update-entity-multi-view` | 357.14 ± 52.30 | 6.82 ± 0.34 | 6.88 ± 0.39 | 7.14 ± 0.36 |
+| `update-user-10000` | 97.09 ± 7.73 | 4.07 ± 0.02 | 4.97 ± 0.02 | 4.84 ± 0.03 |
+| `unshift-item` | 285.71 ± 4.90 | 6.92 ± 0.02 | 7.16 ± 0.00 | 7.16 ± 0.02 |
+| `delete-item` | 312.50 ± 9.77 | 6.93 ± 0.01 | 7.15 ± 0.01 | 7.16 ± 0.01 |
+| `move-item` | 285.71 ± 10.61 | 6.39 ± 0.02 | 6.83 ± 0.00 | 6.82 ± 0.00 |
+
+[Measured on a Ryzen 9 7950X; 64 GB RAM; Ubuntu (WSL2); Node 24.12.0; Chromium (Playwright)]
 
 ## Expected variance
 
@@ -94,7 +98,7 @@ Regressions >5% on stable scenarios or >15% on volatile scenarios are worth inve
 
 ## Interpreting results
 
-- **Lower is better** for duration (ms), ref-stability counts, and heap delta (bytes).
+- **Higher is better** for throughput (ops/s). **Lower is better** for ref-stability counts and heap delta (bytes).
 - **Ref-stability:** data-client's normalized cache keeps referential equality for unchanged entities, so `issueRefChanged` and `userRefChanged` should stay low. Non-normalized libs typically show higher counts because they create new object references for every cache write.
 - **React commit:** Reported as `(react commit)` suffix entries. These measure React Profiler `actualDuration` and isolate React reconciliation cost from layout/paint.
 - **Report viewer:** Toggle the "Base metrics", "React commit", and "Trace" checkboxes to filter the comparison table. Use "Load history" to compare multiple runs over time.
@@ -186,14 +190,14 @@ Regressions >5% on stable scenarios or >15% on volatile scenarios are worth inve
    Scenarios are classified as `small` or `large` based on their cost:
 
    - **Small** (3 warmup + 15 measurement): `getlist-100`, `update-entity`, `ref-stability-*`, `invalidate-and-resolve`, `unshift-item`, `delete-item`
-   - **Large** (1 warmup + 4 measurement): `getlist-500`, `getlist-500-sorted`, `update-user`, `update-user-10000`, `update-entity-sorted`, `update-entity-multi-view`, `list-detail-switch`
+   - **Large** (1 warmup + 4 measurement): `getlist-500`, `getlist-500-sorted`, `update-user`, `update-user-10000`, `update-entity-sorted`, `update-entity-multi-view`, `list-detail-switch-10`
    - **Memory** (opt-in, 1 warmup + 3 measurement): `memory-mount-unmount-cycle` — run with `--action memory`
 
    When running all scenarios (`yarn bench`), each group runs with its own warmup/measurement count. Use `--size` to run only one group.
 
 ## Output
 
-The runner prints a JSON array in `customSmallerIsBetter` format (name, unit, value, range) to stdout. In CI this is written to `react-bench-output.json` and sent to the benchmark action.
+The runner prints a JSON array in `customBiggerIsBetter` format (name, unit, value, range) to stdout. In CI this is written to `react-bench-output.json` and sent to the benchmark action.
 
 To view results locally, open `bench/report-viewer.html` in a browser and paste the JSON (or upload `react-bench-output.json`) to see a comparison table and bar chart.
 

diff --git a/examples/benchmark-react/bench/report-viewer.html b/examples/benchmark-react/bench/report-viewer.html
@@ -34,7 +34,7 @@ <h1>React benchmark report</h1>
     <label>Paste <code>react-bench-output.json</code> or upload:</label>
     <input type="file" id="file" accept=".json" />
     <br><br>
-    <textarea id="paste" placeholder='Paste JSON array here, e.g. [{"name":"data-client: mount-100-items","unit":"ms","value":12.5,"range":"± 1.2"}, ...]'></textarea>
+    <textarea id="paste" placeholder='Paste JSON array here, e.g. [{"name":"data-client: mount-100-items","unit":"ops/s","value":80.0,"range":"± 1.2"}, ...]'></textarea>
     <br>
     <div class="filter-row">
       <button id="render">Render table</button>
@@ -150,7 +150,7 @@ <h2>Time-series (load multiple runs)</h2>
           var minV = Infinity, maxV = -Infinity;
           libList.forEach(function (lib) {
             var r = byLib[lib];
-            if (r && typeof r.value === 'number' && (r.unit === 'bytes' || r.unit === 'ms' || r.unit === 'count')) {
+            if (r && typeof r.value === 'number' && (r.unit === 'bytes' || r.unit === 'ops/s' || r.unit === 'ms' || r.unit === 'count')) {
               if (r.value < minV) minV = r.value;
               if (r.value > maxV) maxV = r.value;
             }
@@ -159,11 +159,15 @@ <h2>Time-series (load multiple runs)</h2>
             var r = byLib[lib];
             if (!r) { cells.push('<td>\u2014</td>'); return; }
             var cls = '';
-            if (typeof r.value === 'number' && minV !== maxV && (r.unit === 'ms' || r.unit === 'bytes')) {
-              cls = r.value <= minV ? 'fast' : (r.value >= maxV ? 'slow' : '');
+            if (typeof r.value === 'number' && minV !== maxV) {
+              if (r.unit === 'ops/s') {
+                cls = r.value >= maxV ? 'fast' : (r.value <= minV ? 'slow' : '');
+              } else if (r.unit === 'ms' || r.unit === 'bytes') {
+                cls = r.value <= minV ? 'fast' : (r.value >= maxV ? 'slow' : '');
+              }
             }
             var range = r.range ? ' ' + r.range : '';
-            var unitLabel = r.unit === 'ms' ? ' ms' : r.unit === 'bytes' ? ' B' : '';
+            var unitLabel = r.unit === 'ops/s' ? ' ops/s' : r.unit === 'ms' ? ' ms' : r.unit === 'bytes' ? ' B' : '';
             cells.push('<td class="' + cls + '">' + (r.value != null ? Number(r.value) + unitLabel + range : '\u2014') + '</td>');
           });
           tbody += '<tr>' + cells.join('') + '</tr>';

diff --git a/examples/benchmark-react/bench/report.ts b/examples/benchmark-react/bench/report.ts
@@ -1,5 +1,5 @@
 /**
- * Format results as customSmallerIsBetter JSON for rhysd/github-action-benchmark.
+ * Format results as customBiggerIsBetter JSON for rhysd/github-action-benchmark.
  */
 export interface BenchmarkResult {
   name: string;

diff --git a/examples/benchmark-react/bench/runner.ts b/examples/benchmark-react/bench/runner.ts
@@ -73,7 +73,8 @@ function filterScenarios(scenarios: Scenario[]): {
       s =>
         s.name.startsWith('data-client:') &&
         s.category !== 'memory' &&
-        s.category !== 'startup',
+        s.category !== 'startup' &&
+        !s.deterministic,
     );
   } else if (
     !actions ||
@@ -389,7 +390,11 @@ function shuffle<T>(arr: T[]): T[] {
 function scenarioUnit(scenario: Scenario): string {
   if (isRefStabilityScenario(scenario)) return 'count';
   if (scenario.resultMetric === 'heapDelta') return 'bytes';
-  return 'ms';
+  return 'ops/s';
+}
+
+function msToOps(ms: number): number {
+  return ms > 0 ? 1000 / ms : 0;
 }
 
 function recordResult(
@@ -443,12 +448,17 @@ async function runRound(
       try {
         const result = await runScenario(page, lib, scenario, networkSim, cdp);
         recordResult(samples, scenario, result);
+        const unit = scenarioUnit(scenario);
+        const displayValue =
+          unit === 'ops/s' ?
+            `${msToOps(result.value).toFixed(2)} ops/s`
+          : `${result.value.toFixed(2)} ${unit}`;
         const commitSuffix =
           result.reactCommit != null ?
-            ` (commit ${result.reactCommit.toFixed(2)} ms)`
+            ` (commit ${msToOps(result.reactCommit).toFixed(2)} ops/s)`
           : '';
         process.stderr.write(
-          `  ${prefix}${scenario.name}: ${result.value.toFixed(2)} ${scenarioUnit(scenario)}${commitSuffix}\n`,
+          `  ${prefix}${scenario.name}: ${displayValue}${commitSuffix}\n`,
         );
       } catch (err) {
         console.error(
@@ -603,8 +613,11 @@ async function main() {
     const warmup = warmupCount(scenario);
     if (s.value.length <= warmup) continue;
 
-    const { median, range } = computeStats(s.value, warmup);
     const unit = scenarioUnit(scenario);
+    const isOps = unit === 'ops/s';
+    const statSamples =
+      isOps ? s.value.slice(warmup).map(msToOps) : s.value.slice(warmup);
+    const { median, range } = computeStats(statSamples, 0);
     report.push({
       name: scenario.name,
       unit,
@@ -617,13 +630,11 @@ async function main() {
       .slice(warmup)
       .filter(x => !Number.isNaN(x));
     if (reactSamples.length > 0 && !scenario.resultMetric) {
-      const { median: rcMedian, range: rcRange } = computeStats(
-        reactSamples,
-        0,
-      );
+      const rcOps = reactSamples.map(msToOps);
+      const { median: rcMedian, range: rcRange } = computeStats(rcOps, 0);
       report.push({
         name: `${scenario.name} (react commit)`,
-        unit: 'ms',
+        unit: 'ops/s',
         value: Math.round(rcMedian * 100) / 100,
         range: rcRange,
       });
@@ -632,13 +643,11 @@ async function main() {
     // Chrome trace durations (opt-in via BENCH_TRACE=true)
     const traceSamples = s.trace.slice(warmup).filter(x => !Number.isNaN(x));
     if (traceSamples.length > 0) {
-      const { median: trMedian, range: trRange } = computeStats(
-        traceSamples,
-        0,
-      );
+      const trOps = traceSamples.map(msToOps);
+      const { median: trMedian, range: trRange } = computeStats(trOps, 0);
       report.push({
         name: `${scenario.name} (trace)`,
-        unit: 'ms',
+        unit: 'ops/s',
         value: Math.round(trMedian * 100) / 100,
         range: trRange,
       });
@@ -657,6 +666,7 @@ async function main() {
     );
   }
   process.stderr.write('\n');
+
   process.stdout.write(formatReport(report));
 }
 

diff --git a/examples/benchmark-react/bench/scenarios.ts b/examples/benchmark-react/bench/scenarios.ts
@@ -134,9 +134,9 @@ const BASE_SCENARIOS: BaseScenario[] = [
     size: 'large',
   },
   {
-    nameSuffix: 'list-detail-switch',
+    nameSuffix: 'list-detail-switch-10',
     action: 'listDetailSwitch',
-    args: [1000],
+    args: [10, 1000],
     category: 'hotPath',
     size: 'large',
     renderLimit: 100,
@@ -186,7 +186,12 @@ const BASE_SCENARIOS: BaseScenario[] = [
   },
 ];
 
-export const LIBRARIES = ['data-client', 'tanstack-query', 'swr'] as const;
+export const LIBRARIES = [
+  'data-client',
+  'tanstack-query',
+  'swr',
+  'baseline',
+] as const;
 
 export const SCENARIOS: Scenario[] = LIBRARIES.flatMap(lib =>
   BASE_SCENARIOS.filter(

diff --git a/examples/benchmark-react/bench/validate.ts b/examples/benchmark-react/bench/validate.ts
@@ -498,7 +498,7 @@ test('listDetailSwitch completes with correct DOM transitions', async (page, lib
     return;
 
   await clearComplete(page);
-  await page.evaluate(() => window.__BENCH__!.listDetailSwitch!(20));
+  await page.evaluate(() => window.__BENCH__!.listDetailSwitch!(5, 20));
   await waitForComplete(page, 30000);
 
   const hasSortedList = await page.evaluate(