From 30f85ae953e6746bcfc5edb138d13fb1fce0dd52 Mon Sep 17 00:00:00 2001
From: carlos-alm <127798846+carlos-alm@users.noreply.github.com>
Date: Sun, 5 Apr 2026 14:55:38 -0600
Subject: [PATCH 1/3] fix(perf): wire engine selection through openRepo to fix
 query benchmarks

openRepo() always tried the native NAPI path first regardless of engine
selection, causing both "wasm" and "native" benchmark workers to measure
the same native rusqlite open/close overhead (~27ms vs ~10ms with direct
better-sqlite3). This produced a false 177% regression in fnDeps query
benchmarks between v3.7.0 and v3.9.0, blocking CI on all PRs.

- Add engine param to openRepo() and check CODEGRAPH_ENGINE env var
- Set CODEGRAPH_ENGINE in benchmark fork workers so query functions
  respect the engine selection per worker
- Add v3.9.0 fnDeps depth 1/3/5 to KNOWN_REGRESSIONS since the
  published data was measured with the broken engine routing
---
 scripts/lib/fork-engine.ts                |  2 +-
 src/db/connection.ts                      |  8 ++++++--
 tests/benchmarks/regression-guard.test.ts | 13 ++++++++++++-
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/scripts/lib/fork-engine.ts b/scripts/lib/fork-engine.ts
index 8e548e94..12a704a5 100644
--- a/scripts/lib/fork-engine.ts
+++ b/scripts/lib/fork-engine.ts
@@ -76,7 +76,7 @@ export function forkWorker(scriptPath, envKey, workerName, argv = [], timeoutMs
 		console.error(`\n[fork] Spawning ${workerName} worker (pid isolation)...`);
 
 		const child = fork(scriptPath, argv, {
-			env: { ...process.env, [envKey]: workerName },
+			env: { ...process.env, [envKey]: workerName, CODEGRAPH_ENGINE: workerName },
 			stdio: ['ignore', 'pipe', 'inherit', 'ipc'],
 		});
 
diff --git a/src/db/connection.ts b/src/db/connection.ts
index 333bc6ce..d91341d6 100644
--- a/src/db/connection.ts
+++ b/src/db/connection.ts
@@ -360,7 +360,7 @@ function openRepoNative(customDbPath?: string): { repo: Repository; close(): voi
  */
 export function openRepo(
   customDbPath?: string,
-  opts: { repo?: Repository } = {},
+  opts: { repo?: Repository; engine?: 'native' | 'wasm' | 'auto' } = {},
 ): { repo: Repository; close(): void } {
   if (opts.repo != null) {
     if (!(opts.repo instanceof Repository)) {
@@ -371,8 +371,12 @@ export function openRepo(
     return { repo: opts.repo, close() {} };
   }
 
+  // Respect explicit engine selection: opts.engine > CODEGRAPH_ENGINE env > auto.
+  // This ensures --engine wasm and benchmark workers bypass the native path.
+  const engine = opts.engine || process.env.CODEGRAPH_ENGINE || 'auto';
+
   // Try native rusqlite path first (Phase 6.14)
-  if (isNativeAvailable()) {
+  if (engine !== 'wasm' && isNativeAvailable()) {
     try {
       return openRepoNative(customDbPath);
     } catch (e) {
diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts
index 918de7fa..89e532dd 100644
--- a/tests/benchmarks/regression-guard.test.ts
+++ b/tests/benchmarks/regression-guard.test.ts
@@ -65,8 +65,19 @@ const SKIP_VERSIONS = new Set(['3.8.0']);
  * - 3.9.0:1-file rebuild — native incremental path re-runs graph-wide phases
  *   (structureMs, AST, CFG, dataflow) on single-file rebuilds. Documented in
  *   BUILD-BENCHMARKS.md Notes section with phase-level breakdown.
+ *
+ * - 3.9.0:fnDeps depth {1,3,5} — openRepo() always routed queries through the
+ *   native NAPI path regardless of engine selection, so both "wasm" and "native"
+ *   benchmark workers measured native rusqlite open/close overhead (~27ms vs
+ *   ~10ms with direct better-sqlite3). Fixed by wiring CODEGRAPH_ENGINE through
+ *   openRepo(); v3.10.0 benchmarks will reflect the corrected measurements.
  */
-const KNOWN_REGRESSIONS = new Set(['3.9.0:1-file rebuild']);
+const KNOWN_REGRESSIONS = new Set([
+  '3.9.0:1-file rebuild',
+  '3.9.0:fnDeps depth 1',
+  '3.9.0:fnDeps depth 3',
+  '3.9.0:fnDeps depth 5',
+]);
 
 /**
  * Maximum minor-version gap allowed for comparison. When the nearest

From 99b6e0f7048cf01823a22334970d7f588bf4fb98 Mon Sep 17 00:00:00 2001
From: carlos-alm <127798846+carlos-alm@users.noreply.github.com>
Date: Sun, 5 Apr 2026 17:37:29 -0600
Subject: [PATCH 2/3] fix: add engine guard to openReadonlyWithNative (#869)

---
 src/db/connection.ts | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/db/connection.ts b/src/db/connection.ts
index d91341d6..bc8ef827 100644
--- a/src/db/connection.ts
+++ b/src/db/connection.ts
@@ -416,8 +416,11 @@ export function openReadonlyWithNative(customPath?: string): {
 } {
   const db = openReadonlyOrFail(customPath);
 
+  // Respect explicit engine selection, consistent with openRepo().
+  const engine = process.env.CODEGRAPH_ENGINE || 'auto';
+
   let nativeDb: NativeDatabase | undefined;
-  if (isNativeAvailable()) {
+  if (engine !== 'wasm' && isNativeAvailable()) {
     try {
       const dbPath = findDbPath(customPath);
       const native = getNative();

From b6e919a52c5f8bf412ae7e3593e04e66e8b932c2 Mon Sep 17 00:00:00 2001
From: carlos-alm <127798846+carlos-alm@users.noreply.github.com>
Date: Sun, 5 Apr 2026 17:37:53 -0600
Subject: [PATCH 3/3] fix: add staleness check for KNOWN_REGRESSIONS entries
 (#869)

---
 tests/benchmarks/regression-guard.test.ts | 32 +++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts
index 89e532dd..92b0780c 100644
--- a/tests/benchmarks/regression-guard.test.ts
+++ b/tests/benchmarks/regression-guard.test.ts
@@ -343,6 +343,38 @@ describe('Benchmark regression guard', () => {
     'INCREMENTAL_BENCHMARK_DATA',
   );
 
+  // Warn when KNOWN_REGRESSIONS entries are stale (more than 1 minor version
+  // behind the current package version).  This makes the stale-exemption
+  // problem self-detecting rather than requiring manual bookkeeping.
+  test('KNOWN_REGRESSIONS entries are not stale', () => {
+    // eslint-disable-next-line @typescript-eslint/no-require-imports
+    const pkgVersion: string = JSON.parse(
+      fs.readFileSync(path.join(ROOT, 'package.json'), 'utf8'),
+    ).version;
+    const stale: string[] = [];
+    for (const entry of KNOWN_REGRESSIONS) {
+      const colonIdx = entry.indexOf(':');
+      if (colonIdx === -1) continue;
+      const entryVersion = entry.slice(0, colonIdx);
+      const gap = minorGap(entryVersion, pkgVersion);
+      if (gap > 1) {
+        stale.push(
+          `${entry} (version ${entryVersion} is ${gap} minor versions behind ${pkgVersion})`,
+        );
+      }
+    }
+    if (stale.length > 0) {
+      console.warn(
+        `[regression-guard] Stale KNOWN_REGRESSIONS entries — remove after verifying corrected data:\n  ${stale.join('\n  ')}`,
+      );
+    }
+    expect(
+      stale.length,
+      `KNOWN_REGRESSIONS has ${stale.length} stale entries (>1 minor version behind ${pkgVersion}). ` +
+        `Remove them after verifying the corrected benchmark data has landed:\n  ${stale.join('\n  ')}`,
+    ).toBe(0);
+  });
+
   // Validate newest-first ordering assumption for all history arrays
   test('build history is sorted newest-first', () => {
     assertNewestFirst(buildHistory, 'Build benchmark');