From 7a9112c3c2c7a24f5bb670aef1f564bd3cd82976 Mon Sep 17 00:00:00 2001
From: Adrien Friggeri <adrien@friggeri.net>
Date: Fri, 30 Jan 2026 22:41:37 +0000
Subject: [PATCH] Fail CI if snapshots aren't present

---
 nodejs/test/e2e/session.test.ts               |  4 +-
 test/harness/replayingCapiProxy.ts            | 80 ++++++++++++++-----
 ...ive_toolcallid_in_permission_requests.yaml |  2 +-
 ...ely_while_events_stream_in_background.yaml | 12 +--
 .../sendandwait_throws_on_timeout.yaml        |  8 ++
 5 files changed, 78 insertions(+), 28 deletions(-)
 create mode 100644 test/snapshots/session/sendandwait_throws_on_timeout.yaml

diff --git a/nodejs/test/e2e/session.test.ts b/nodejs/test/e2e/session.test.ts
index 9d5c0ef1..02dbe10f 100644
--- a/nodejs/test/e2e/session.test.ts
+++ b/nodejs/test/e2e/session.test.ts
@@ -387,7 +387,9 @@ describe("Send Blocking Behavior", async () => {
         expect(events).toContain("assistant.message");
     });
 
-    it("sendAndWait throws on timeout", async () => {
+    // Skip in CI - this test validates client-side timeout behavior, not LLM responses.
+    // The test intentionally times out before receiving a response, so there's no snapshot to replay.
+    it.skipIf(process.env.CI === "true")("sendAndWait throws on timeout", async () => {
         const session = await client.createSession();
 
         // Use a slow command to ensure timeout triggers before completion
diff --git a/test/harness/replayingCapiProxy.ts b/test/harness/replayingCapiProxy.ts
index b48a5b50..a700596b 100644
--- a/test/harness/replayingCapiProxy.ts
+++ b/test/harness/replayingCapiProxy.ts
@@ -32,6 +32,12 @@ const normalizedToolNames = {
   [shellConfig.writeShellToolName]: "${write_shell}",
 };
 
+/**
+ * Default model to use when no stored data is available for a given test.
+ * This enables responding to /models without needing to have a capture file.
+ */
+const defaultModel = "claude-sonnet-4.5";
+
 /**
  * An HTTP proxy that not only captures HTTP exchanges, but also stores them in a file on disk and
  * replays the stored responses on subsequent runs.
@@ -149,7 +155,9 @@ export class ReplayingCapiProxy extends CapturingHttpProxy {
           options.requestOptions.path?.startsWith("/stop") &&
           options.requestOptions.method === "POST"
         ) {
-          const skipWritingCache = options.requestOptions.path.includes("skipWritingCache=true");
+          const skipWritingCache = options.requestOptions.path.includes(
+            "skipWritingCache=true",
+          );
           options.onResponseStart(200, {});
           options.onResponseEnd();
           await this.stop(skipWritingCache);
@@ -184,13 +192,13 @@ export class ReplayingCapiProxy extends CapturingHttpProxy {
         }
 
         // Handle /models endpoint
-        if (
-          options.requestOptions.path === "/models" &&
-          state.storedData?.models.length
-        ) {
-          const modelsResponse = createGetModelsResponse(
-            state.storedData.models,
-          );
+        // Use stored models if available, otherwise use default model
+        if (options.requestOptions.path === "/models") {
+          const models =
+            state.storedData?.models && state.storedData.models.length > 0
+              ? state.storedData.models
+              : [defaultModel];
+          const modelsResponse = createGetModelsResponse(models);
           const body = JSON.stringify(modelsResponse);
           const headers = {
             "content-type": "application/json",
@@ -202,6 +210,27 @@ export class ReplayingCapiProxy extends CapturingHttpProxy {
           return;
         }
 
+        // Handle memory endpoints - return stub responses in tests
+        // Matches: /agents/*/memory/*/enabled, /agents/*/memory/*/recent, etc.
+        if (options.requestOptions.path?.match(/\/agents\/.*\/memory\//)) {
+          let body: string;
+          if (options.requestOptions.path.includes("/enabled")) {
+            body = JSON.stringify({ enabled: false });
+          } else if (options.requestOptions.path.includes("/recent")) {
+            body = JSON.stringify({ memories: [] });
+          } else {
+            body = JSON.stringify({});
+          }
+          const headers = {
+            "content-type": "application/json",
+            ...commonResponseHeaders,
+          };
+          options.onResponseStart(200, headers);
+          options.onData(Buffer.from(body));
+          options.onResponseEnd();
+          return;
+        }
+
         // Handle /chat/completions endpoint
         if (
           state.storedData &&
@@ -257,7 +286,7 @@ export class ReplayingCapiProxy extends CapturingHttpProxy {
         // Fallback to normal proxying if no cached response found
         // This implicitly captures the new exchange too
         if (process.env.CI === "true") {
-          await emitNoMatchingRequestWarning(
+          await exitWithNoMatchingRequestError(
             options,
             state.testInfo,
             state.workDir,
@@ -295,7 +324,7 @@ async function writeCapturesToDisk(
   }
 }
 
-async function emitNoMatchingRequestWarning(
+async function exitWithNoMatchingRequestError(
   options: PerformRequestOptions,
   testInfo: { file: string; line?: number } | undefined,
   workDir: string,
@@ -305,18 +334,27 @@ async function emitNoMatchingRequestWarning(
   if (testInfo?.file) parts.push(`file=${testInfo.file}`);
   if (typeof testInfo?.line === "number") parts.push(`line=${testInfo.line}`);
   const header = parts.length ? ` ${parts.join(",")}` : "";
-  const normalized = await parseAndNormalizeRequest(
-    options.body,
-    workDir,
-    toolResultNormalizers,
-  );
-  const normalizedMessages = normalized.conversations[0]?.messages ?? [];
-  const warningMessage =
-    `No cached response found for ${options.requestOptions.method} ${options.requestOptions.path}. ` +
-    `Final message: ${JSON.stringify(
+
+  let finalMessageInfo: string;
+  try {
+    const normalized = await parseAndNormalizeRequest(
+      options.body,
+      workDir,
+      toolResultNormalizers,
+    );
+    const normalizedMessages = normalized.conversations[0]?.messages ?? [];
+    finalMessageInfo = JSON.stringify(
       normalizedMessages[normalizedMessages.length - 1],
-    )}`;
-  process.stderr.write(`::warning${header}::${warningMessage}\n`);
+    );
+  } catch {
+    finalMessageInfo = `(unable to parse request body: ${options.body?.slice(0, 200) ?? "empty"})`;
+  }
+
+  const errorMessage =
+    `No cached response found for ${options.requestOptions.method} ${options.requestOptions.path}. ` +
+    `Final message: ${finalMessageInfo}`;
+  process.stderr.write(`::error${header}::${errorMessage}\n`);
+  options.onError(new Error(errorMessage));
 }
 
 async function findSavedChatCompletionResponse(
diff --git a/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml b/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml
index 63fde597..b529a01e 100644
--- a/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml
+++ b/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml
@@ -46,4 +46,4 @@ conversations:
           test
           <exited with exit code 0>
       - role: assistant
-        content: The command executed successfully and output "test".
+        content: Command executed successfully, output is "test".
diff --git a/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml b/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml
index 8deef905..6da08758 100644
--- a/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml
+++ b/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml
@@ -12,14 +12,15 @@ conversations:
             type: function
             function:
               name: report_intent
-              arguments: '{"intent":"Running sleep command"}'
+              arguments: '{"intent":"Running command"}'
       - role: assistant
         tool_calls:
           - id: toolcall_1
             type: function
             function:
               name: ${shell}
-              arguments: '{"command":"sleep 2 && echo done","description":"Run sleep 2 and echo done","initial_wait":5}'
+              arguments: '{"command":"sleep 2 && echo done","description":"Run sleep and echo
+                command","initial_wait":5,"mode":"sync"}'
   - messages:
       - role: system
         content: ${system}
@@ -31,12 +32,13 @@ conversations:
             type: function
             function:
               name: report_intent
-              arguments: '{"intent":"Running sleep command"}'
+              arguments: '{"intent":"Running command"}'
           - id: toolcall_1
             type: function
             function:
               name: ${shell}
-              arguments: '{"command":"sleep 2 && echo done","description":"Run sleep 2 and echo done","initial_wait":5}'
+              arguments: '{"command":"sleep 2 && echo done","description":"Run sleep and echo
+                command","initial_wait":5,"mode":"sync"}'
       - role: tool
         tool_call_id: toolcall_0
         content: Intent logged
@@ -46,4 +48,4 @@ conversations:
           done
           <exited with exit code 0>
       - role: assistant
-        content: The command completed successfully after a 2-second sleep and output "done".
+        content: Command completed successfully. The output is "done".
diff --git a/test/snapshots/session/sendandwait_throws_on_timeout.yaml b/test/snapshots/session/sendandwait_throws_on_timeout.yaml
new file mode 100644
index 00000000..0e019bda
--- /dev/null
+++ b/test/snapshots/session/sendandwait_throws_on_timeout.yaml
@@ -0,0 +1,8 @@
+models:
+  - claude-sonnet-4.5
+conversations:
+  - messages:
+      - role: system
+        content: ${system}
+      - role: user
+        content: Run 'sleep 2 && echo done'