From 7a9112c3c2c7a24f5bb670aef1f564bd3cd82976 Mon Sep 17 00:00:00 2001 From: Adrien Friggeri Date: Fri, 30 Jan 2026 22:41:37 +0000 Subject: [PATCH] Fail CI if snapshots aren't present --- nodejs/test/e2e/session.test.ts | 4 +- test/harness/replayingCapiProxy.ts | 80 ++++++++++++++----- ...ive_toolcallid_in_permission_requests.yaml | 2 +- ...ely_while_events_stream_in_background.yaml | 12 +-- .../sendandwait_throws_on_timeout.yaml | 8 ++ 5 files changed, 78 insertions(+), 28 deletions(-) create mode 100644 test/snapshots/session/sendandwait_throws_on_timeout.yaml diff --git a/nodejs/test/e2e/session.test.ts b/nodejs/test/e2e/session.test.ts index 9d5c0ef1..02dbe10f 100644 --- a/nodejs/test/e2e/session.test.ts +++ b/nodejs/test/e2e/session.test.ts @@ -387,7 +387,9 @@ describe("Send Blocking Behavior", async () => { expect(events).toContain("assistant.message"); }); - it("sendAndWait throws on timeout", async () => { + // Skip in CI - this test validates client-side timeout behavior, not LLM responses. + // The test intentionally times out before receiving a response, so there's no snapshot to replay. + it.skipIf(process.env.CI === "true")("sendAndWait throws on timeout", async () => { const session = await client.createSession(); // Use a slow command to ensure timeout triggers before completion diff --git a/test/harness/replayingCapiProxy.ts b/test/harness/replayingCapiProxy.ts index b48a5b50..a700596b 100644 --- a/test/harness/replayingCapiProxy.ts +++ b/test/harness/replayingCapiProxy.ts @@ -32,6 +32,12 @@ const normalizedToolNames = { [shellConfig.writeShellToolName]: "${write_shell}", }; +/** + * Default model to use when no stored data is available for a given test. + * This enables responding to /models without needing to have a capture file. + */ +const defaultModel = "claude-sonnet-4.5"; + /** * An HTTP proxy that not only captures HTTP exchanges, but also stores them in a file on disk and * replays the stored responses on subsequent runs. @@ -149,7 +155,9 @@ export class ReplayingCapiProxy extends CapturingHttpProxy { options.requestOptions.path?.startsWith("/stop") && options.requestOptions.method === "POST" ) { - const skipWritingCache = options.requestOptions.path.includes("skipWritingCache=true"); + const skipWritingCache = options.requestOptions.path.includes( + "skipWritingCache=true", + ); options.onResponseStart(200, {}); options.onResponseEnd(); await this.stop(skipWritingCache); @@ -184,13 +192,13 @@ export class ReplayingCapiProxy extends CapturingHttpProxy { } // Handle /models endpoint - if ( - options.requestOptions.path === "/models" && - state.storedData?.models.length - ) { - const modelsResponse = createGetModelsResponse( - state.storedData.models, - ); + // Use stored models if available, otherwise use default model + if (options.requestOptions.path === "/models") { + const models = + state.storedData?.models && state.storedData.models.length > 0 + ? state.storedData.models + : [defaultModel]; + const modelsResponse = createGetModelsResponse(models); const body = JSON.stringify(modelsResponse); const headers = { "content-type": "application/json", @@ -202,6 +210,27 @@ export class ReplayingCapiProxy extends CapturingHttpProxy { return; } + // Handle memory endpoints - return stub responses in tests + // Matches: /agents/*/memory/*/enabled, /agents/*/memory/*/recent, etc. + if (options.requestOptions.path?.match(/\/agents\/.*\/memory\//)) { + let body: string; + if (options.requestOptions.path.includes("/enabled")) { + body = JSON.stringify({ enabled: false }); + } else if (options.requestOptions.path.includes("/recent")) { + body = JSON.stringify({ memories: [] }); + } else { + body = JSON.stringify({}); + } + const headers = { + "content-type": "application/json", + ...commonResponseHeaders, + }; + options.onResponseStart(200, headers); + options.onData(Buffer.from(body)); + options.onResponseEnd(); + return; + } + // Handle /chat/completions endpoint if ( state.storedData && @@ -257,7 +286,7 @@ export class ReplayingCapiProxy extends CapturingHttpProxy { // Fallback to normal proxying if no cached response found // This implicitly captures the new exchange too if (process.env.CI === "true") { - await emitNoMatchingRequestWarning( + await exitWithNoMatchingRequestError( options, state.testInfo, state.workDir, @@ -295,7 +324,7 @@ async function writeCapturesToDisk( } } -async function emitNoMatchingRequestWarning( +async function exitWithNoMatchingRequestError( options: PerformRequestOptions, testInfo: { file: string; line?: number } | undefined, workDir: string, @@ -305,18 +334,27 @@ async function emitNoMatchingRequestWarning( if (testInfo?.file) parts.push(`file=${testInfo.file}`); if (typeof testInfo?.line === "number") parts.push(`line=${testInfo.line}`); const header = parts.length ? ` ${parts.join(",")}` : ""; - const normalized = await parseAndNormalizeRequest( - options.body, - workDir, - toolResultNormalizers, - ); - const normalizedMessages = normalized.conversations[0]?.messages ?? []; - const warningMessage = - `No cached response found for ${options.requestOptions.method} ${options.requestOptions.path}. ` + - `Final message: ${JSON.stringify( + + let finalMessageInfo: string; + try { + const normalized = await parseAndNormalizeRequest( + options.body, + workDir, + toolResultNormalizers, + ); + const normalizedMessages = normalized.conversations[0]?.messages ?? []; + finalMessageInfo = JSON.stringify( normalizedMessages[normalizedMessages.length - 1], - )}`; - process.stderr.write(`::warning${header}::${warningMessage}\n`); + ); + } catch { + finalMessageInfo = `(unable to parse request body: ${options.body?.slice(0, 200) ?? "empty"})`; + } + + const errorMessage = + `No cached response found for ${options.requestOptions.method} ${options.requestOptions.path}. ` + + `Final message: ${finalMessageInfo}`; + process.stderr.write(`::error${header}::${errorMessage}\n`); + options.onError(new Error(errorMessage)); } async function findSavedChatCompletionResponse( diff --git a/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml b/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml index 63fde597..b529a01e 100644 --- a/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml +++ b/test/snapshots/permissions/should_receive_toolcallid_in_permission_requests.yaml @@ -46,4 +46,4 @@ conversations: test - role: assistant - content: The command executed successfully and output "test". + content: Command executed successfully, output is "test". diff --git a/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml b/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml index 8deef905..6da08758 100644 --- a/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml +++ b/test/snapshots/session/send_returns_immediately_while_events_stream_in_background.yaml @@ -12,14 +12,15 @@ conversations: type: function function: name: report_intent - arguments: '{"intent":"Running sleep command"}' + arguments: '{"intent":"Running command"}' - role: assistant tool_calls: - id: toolcall_1 type: function function: name: ${shell} - arguments: '{"command":"sleep 2 && echo done","description":"Run sleep 2 and echo done","initial_wait":5}' + arguments: '{"command":"sleep 2 && echo done","description":"Run sleep and echo + command","initial_wait":5,"mode":"sync"}' - messages: - role: system content: ${system} @@ -31,12 +32,13 @@ conversations: type: function function: name: report_intent - arguments: '{"intent":"Running sleep command"}' + arguments: '{"intent":"Running command"}' - id: toolcall_1 type: function function: name: ${shell} - arguments: '{"command":"sleep 2 && echo done","description":"Run sleep 2 and echo done","initial_wait":5}' + arguments: '{"command":"sleep 2 && echo done","description":"Run sleep and echo + command","initial_wait":5,"mode":"sync"}' - role: tool tool_call_id: toolcall_0 content: Intent logged @@ -46,4 +48,4 @@ conversations: done - role: assistant - content: The command completed successfully after a 2-second sleep and output "done". + content: Command completed successfully. The output is "done". diff --git a/test/snapshots/session/sendandwait_throws_on_timeout.yaml b/test/snapshots/session/sendandwait_throws_on_timeout.yaml new file mode 100644 index 00000000..0e019bda --- /dev/null +++ b/test/snapshots/session/sendandwait_throws_on_timeout.yaml @@ -0,0 +1,8 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Run 'sleep 2 && echo done'