From 9184aa6de8966cc6dc022e3aff8f0a597973bdbf Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 20 Mar 2026 15:56:01 -0700 Subject: [PATCH 01/13] =?UTF-8?q?chore:=20housekeeping=20=E2=80=94=20remov?= =?UTF-8?q?e=20spec=20files,=20add=20engines=20field,=20fix=20Dockerfile?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../specs/2026-03-15-trust-section-design.md | 93 ------------------- package.json | 3 + 2 files changed, 3 insertions(+), 93 deletions(-) delete mode 100644 docs/superpowers/specs/2026-03-15-trust-section-design.md diff --git a/docs/superpowers/specs/2026-03-15-trust-section-design.md b/docs/superpowers/specs/2026-03-15-trust-section-design.md deleted file mode 100644 index 5282d63..0000000 --- a/docs/superpowers/specs/2026-03-15-trust-section-design.md +++ /dev/null @@ -1,93 +0,0 @@ -# Design: "Reliability" Trust Section for llmock Docs Site - -## Summary - -Add a new section to the llmock docs site (`docs/index.html`) between "Fixture-driven. Zero boilerplate." (code examples) and "llmock vs MSW" (comparison table). The section explains why users can trust that llmock's response shapes match real provider APIs, and how three-way drift detection keeps it that way. - -## Placement - -``` -Features ("Stop paying for flaky tests") -Code Examples ("Fixture-driven. Zero boilerplate.") -→ NEW: Reliability ("Verified against real APIs. Every day.") -Comparison ("llmock vs MSW") -Claude Code Integration -Real-World Usage -Footer -``` - -## Section Structure - -### Header - -- **Section label**: `RELIABILITY` -- **Headline**: "Verified against real APIs. Every day." -- **Description paragraph**: "A mock that doesn't match reality is worse than no mock — your tests pass, but production breaks. llmock runs three-way drift detection that compares SDK types, real API responses, and mock output to catch shape mismatches before you do." - -### Triangle Diagram - -SVG-based diagram showing three nodes arranged in a triangle: - -- **Top center**: "SDK Types" (blue border, `{ }` icon) — "What TypeScript types say the shape should be" -- **Bottom left**: "Real API" (green border, `↔` icon) — "What OpenAI, Claude, Gemini actually return" -- **Bottom right**: "llmock" (purple border, `⚙` icon) — "What the mock produces for the same request" - -Dashed connector lines between all three nodes with horizontal labels at each midpoint: - -- Left edge: "SDK = Real?" -- Right edge: "SDK = Mock?" -- Bottom edge: "Real = Mock?" - -### Diagnosis Cards (3-column grid) - -Three cards explaining the possible outcomes: - -1. **Red dot — "Mock doesn't match real"**: llmock needs updating — test fails immediately. The SDK comparison tells us why it drifted. -2. **Amber dot — "Provider changed, SDK is behind"**: Early warning — the real API has new fields that neither the SDK nor llmock know about yet. -3. **Green dot — "All three agree"**: No drift — the mock matches reality and the SDK types are current. - -Key principle: any mismatch between real API and mock is a failure, regardless of SDK state. The SDK layer diagnoses _why_ drift happened, it doesn't gate severity. - -### Drift Report Snippet - -Monospace terminal-style block showing `$ pnpm test:drift` output with three distinct examples: - -1. `[critical] LLMOCK DRIFT` — missing field (`choices[].message.refusal`: SDK has it, real has it, mock doesn't) -2. `[critical] TYPE MISMATCH` — wrong type (`content[].input`: SDK says object, real says object, mock says string) -3. `[warning] PROVIDER ADDED FIELD` — new field (`choices[].message.annotations`: only real API has it) - -Footer line: "2 critical (test fails) · 1 warning (logged) · detected before any user reported it" - -### CI Footer - -Badge showing "Daily CI" with green dot, text: "Drift tests across 4 providers run automatically every day." - -## Styling - -All styles must use the site's CSS custom properties (not hardcoded hex): - -- Background: `var(--bg-deep)` (page) / `var(--bg-card)` (cards) -- Borders: `var(--border)` -- Text: `var(--text-primary)` (headings) / `var(--text-secondary)` (body) / `var(--text-dim)` (labels) -- Accent: `var(--accent)` (green) -- Uses existing `.section-label`, `.section-title`, `.section-desc` CSS classes -- Section uses `class="reveal"` for scroll-triggered animation -- Triangle diagram uses inline SVG for connector lines - -## CI Cadence Change - -The drift CI workflow (`.github/workflows/test-drift.yml`) will be updated from weekly (Monday 6am UTC) to daily (6am UTC every day). The cron changes from `0 6 * * 1` to `0 6 * * *`. - -DRIFT.md and the site footer text will be updated to say "every day" instead of "every week." - -## Files to Modify - -| File | Change | -| ---------------------------------- | --------------------------------------------------------------------------------------------------------------------- | -| `docs/index.html` | Insert new section between code examples and comparison. New CSS for triangle diagram, diagnosis cards, drift report. | -| `.github/workflows/test-drift.yml` | Change cron from `0 6 * * 1` to `0 6 * * *` | -| `DRIFT.md` | Update schedule references from weekly to daily; update cost estimate in Cost section for daily cadence | - -## Validated Mockup - -The approved design is in `.superpowers/brainstorm/84286-1773621431/trust-section-v4.html`. diff --git a/package.json b/package.json index 9464fd6..8533538 100644 --- a/package.json +++ b/package.json @@ -4,6 +4,9 @@ "description": "Deterministic mock LLM server for testing (OpenAI, Anthropic, Gemini)", "license": "MIT", "packageManager": "pnpm@10.28.2", + "engines": { + "node": ">=20.15.0" + }, "type": "module", "exports": { ".": { From cf3978d3996fc60193495d2c745dac124ea4cfc9 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 20 Mar 2026 15:56:18 -0700 Subject: [PATCH 02/13] =?UTF-8?q?feat:=20v1.6.0=20=E2=80=94=20endpoints,?= =?UTF-8?q?=20chaos,=20metrics,=20record-and-replay?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New provider endpoints: Ollama, Cohere, Vertex AI, Bedrock Converse, Bedrock streaming (invoke-with-response-stream). New features: Prometheus metrics (/metrics), record-and-replay proxy, strict mode (503 on no-match), stream collapse, AWS EventStream binary framing, NDJSON writer, auth header redaction. Wire up missing imports (proxyAndRecord, createMetricsRegistry, normalizePathLabel), add RecordConfig type, expand MockServerOptions and ServerInstance.defaults with metrics/strict/record fields, add optional registry param to applyChaos for chaos counter tracking, add enableRecording/disableRecording to LLMock class, remove unused RecordConfig imports, deduplicate ChaosConfig import in CLI. --- src/aws-event-stream.ts | 156 +++++++++ src/bedrock-converse.ts | 648 ++++++++++++++++++++++++++++++++++ src/bedrock.ts | 379 +++++++++++++++++++- src/chaos.ts | 6 + src/cli.ts | 2 +- src/cohere.ts | 654 ++++++++++++++++++++++++++++++++++ src/embeddings.ts | 50 +++ src/gemini.ts | 41 ++- src/helpers.ts | 8 +- src/index.ts | 30 +- src/llmock.ts | 15 + src/messages.ts | 38 +- src/metrics.ts | 256 ++++++++++++++ src/ndjson-writer.ts | 53 +++ src/ollama.ts | 754 ++++++++++++++++++++++++++++++++++++++++ src/recorder.ts | 380 ++++++++++++++++++++ src/responses.ts | 38 +- src/server.ts | 447 +++++++++++++++++++++--- src/stream-collapse.ts | 586 +++++++++++++++++++++++++++++++ src/types.ts | 11 + src/ws-gemini-live.ts | 9 +- src/ws-realtime.ts | 11 +- src/ws-responses.ts | 9 +- 23 files changed, 4511 insertions(+), 70 deletions(-) create mode 100644 src/aws-event-stream.ts create mode 100644 src/bedrock-converse.ts create mode 100644 src/cohere.ts create mode 100644 src/metrics.ts create mode 100644 src/ndjson-writer.ts create mode 100644 src/ollama.ts create mode 100644 src/recorder.ts create mode 100644 src/stream-collapse.ts diff --git a/src/aws-event-stream.ts b/src/aws-event-stream.ts new file mode 100644 index 0000000..1021d80 --- /dev/null +++ b/src/aws-event-stream.ts @@ -0,0 +1,156 @@ +/** + * AWS Event Stream binary frame encoder. + * + * Implements the AWS binary event stream framing protocol used by Bedrock's + * streaming (invoke-with-response-stream) endpoint. Each frame carries a set of + * string headers and a raw-bytes payload, wrapped in a prelude with CRC32 + * checksums for integrity. + * + * Binary frame layout: + * [total_length: 4B uint32-BE] + * [headers_length: 4B uint32-BE] + * [prelude_crc32: 4B CRC32 of first 8 bytes] + * [headers: variable] + * [payload: variable, raw JSON bytes] + * [message_crc32: 4B CRC32 of entire frame minus last 4 bytes] + */ + +import { crc32 } from "node:zlib"; +import type * as http from "node:http"; +import type { StreamingProfile } from "./types.js"; +import { delay, calculateDelay } from "./sse-writer.js"; + +// ─── Header encoding ──────────────────────────────────────────────────────── + +function encodeHeaders(headers: Record): Buffer { + const parts: Buffer[] = []; + for (const [name, value] of Object.entries(headers)) { + const nameBytes = Buffer.from(name, "utf8"); + const valueBytes = Buffer.from(value, "utf8"); + + // name_length (1 byte) + name + type (1 byte, 7 = STRING) + + // value_length (2 bytes BE) + value + const header = Buffer.alloc(1 + nameBytes.length + 1 + 2 + valueBytes.length); + let offset = 0; + header.writeUInt8(nameBytes.length, offset); + offset += 1; + nameBytes.copy(header, offset); + offset += nameBytes.length; + header.writeUInt8(7, offset); // STRING type + offset += 1; + header.writeUInt16BE(valueBytes.length, offset); + offset += 2; + valueBytes.copy(header, offset); + + parts.push(header); + } + return Buffer.concat(parts); +} + +// ─── Frame encoding ───────────────────────────────────────────────────────── + +/** + * Encode a single AWS Event Stream binary frame with the given headers and + * payload buffer. + */ +export function encodeEventStreamFrame(headers: Record, payload: Buffer): Buffer { + const headersBuffer = encodeHeaders(headers); + const headersLength = headersBuffer.length; + + // prelude (8) + prelude_crc (4) + headers + payload + message_crc (4) + const totalLength = 4 + 4 + 4 + headersLength + payload.length + 4; + + const frame = Buffer.alloc(totalLength); + let offset = 0; + + // Prelude + frame.writeUInt32BE(totalLength, offset); + offset += 4; + frame.writeUInt32BE(headersLength, offset); + offset += 4; + + // Prelude CRC32 (covers first 8 bytes) + const preludeCrc = crc32(frame.subarray(0, 8)); + frame.writeUInt32BE(preludeCrc >>> 0, offset); + offset += 4; + + // Headers + headersBuffer.copy(frame, offset); + offset += headersLength; + + // Payload + payload.copy(frame, offset); + offset += payload.length; + + // Message CRC32 (covers entire frame minus last 4 bytes) + const messageCrc = crc32(frame.subarray(0, totalLength - 4)); + frame.writeUInt32BE(messageCrc >>> 0, offset); + + return frame; +} + +// ─── Convenience wrappers ─────────────────────────────────────────────────── + +/** + * Encode an event-stream message with standard AWS headers for a JSON event. + * + * Sets `:content-type` = `application/json`, `:event-type` = eventType, + * `:message-type` = `event`. + */ +export function encodeEventStreamMessage(eventType: string, jsonPayload: object): Buffer { + const headers: Record = { + ":content-type": "application/json", + ":event-type": eventType, + ":message-type": "event", + }; + const payload = Buffer.from(JSON.stringify(jsonPayload), "utf8"); + return encodeEventStreamFrame(headers, payload); +} + +/** + * Write a sequence of event-stream frames to an HTTP response with optional + * timing control. Mirrors the writeSSEStream pattern from sse-writer.ts. + * + * Returns `true` when all events are written, or `false` if interrupted. + */ +export async function writeEventStream( + res: http.ServerResponse, + events: Array<{ eventType: string; payload: object }>, + options?: { + latency?: number; + streamingProfile?: StreamingProfile; + signal?: AbortSignal; + onChunkSent?: () => void; + }, +): Promise { + const opts = options ?? {}; + const latency = opts.latency ?? 0; + const profile = opts.streamingProfile; + const signal = opts.signal; + const onChunkSent = opts.onChunkSent; + + if (res.writableEnded) return true; + res.setHeader("Content-Type", "application/vnd.amazon.eventstream"); + res.setHeader("Transfer-Encoding", "chunked"); + + let chunkIndex = 0; + for (const event of events) { + const chunkDelay = calculateDelay(chunkIndex, profile, latency); + if (chunkDelay > 0) { + await delay(chunkDelay, signal); + } + if (signal?.aborted) return false; + if (res.writableEnded) return true; + + const frame = encodeEventStreamMessage(event.eventType, event.payload); + res.write(frame); + onChunkSent?.(); + if (signal?.aborted) return false; + chunkIndex++; + } + + if (!res.writableEnded) { + res.end(); + } + return true; +} diff --git a/src/bedrock-converse.ts b/src/bedrock-converse.ts new file mode 100644 index 0000000..0880549 --- /dev/null +++ b/src/bedrock-converse.ts @@ -0,0 +1,648 @@ +/** + * AWS Bedrock Converse API support. + * + * Translates incoming Converse and Converse-stream requests (Bedrock Converse + * format) into the ChatCompletionRequest format used by the fixture router, + * and converts fixture responses back into Converse API format — either a + * single JSON response or an Event Stream binary stream. + */ + +import type * as http from "node:http"; +import type { + ChaosConfig, + ChatCompletionRequest, + ChatMessage, + Fixture, + RecordConfig, + ToolCall, + ToolDefinition, +} from "./types.js"; +import { + generateToolUseId, + isTextResponse, + isToolCallResponse, + isErrorResponse, + flattenHeaders, +} from "./helpers.js"; +import { matchFixture } from "./router.js"; +import { writeErrorResponse } from "./sse-writer.js"; +import { writeEventStream } from "./aws-event-stream.js"; +import { createInterruptionSignal } from "./interruption.js"; +import type { Journal } from "./journal.js"; +import type { Logger } from "./logger.js"; +import { applyChaos } from "./chaos.js"; +import type { MetricsRegistry } from "./metrics.js"; +import { proxyAndRecord } from "./recorder.js"; +import { buildBedrockStreamTextEvents, buildBedrockStreamToolCallEvents } from "./bedrock.js"; + +// ─── Converse request types ───────────────────────────────────────────────── + +interface ConverseContentBlock { + text?: string; + toolUse?: { toolUseId: string; name: string; input: object }; + toolResult?: { toolUseId: string; content: { text?: string }[] }; +} + +interface ConverseMessage { + role: "user" | "assistant"; + content: ConverseContentBlock[]; +} + +interface ConverseToolSpec { + name: string; + description?: string; + inputSchema?: object; +} + +interface ConverseRequest { + messages: ConverseMessage[]; + system?: { text: string }[]; + inferenceConfig?: { maxTokens?: number; temperature?: number }; + toolConfig?: { tools: { toolSpec: ConverseToolSpec }[] }; +} + +// ─── Input conversion: Converse → ChatCompletionRequest ───────────────────── + +export function converseToCompletionRequest( + req: ConverseRequest, + modelId: string, +): ChatCompletionRequest { + const messages: ChatMessage[] = []; + + // system field → system message + if (req.system && req.system.length > 0) { + const systemText = req.system.map((s) => s.text).join(""); + if (systemText) { + messages.push({ role: "system", content: systemText }); + } + } + + for (const msg of req.messages) { + if (msg.role === "user") { + // Check for toolResult blocks + const toolResults = msg.content.filter((b) => b.toolResult); + const textBlocks = msg.content.filter((b) => b.text !== undefined && !b.toolResult); + + if (toolResults.length > 0) { + for (const block of toolResults) { + const tr = block.toolResult!; + const resultContent = tr.content.map((c) => c.text ?? "").join(""); + messages.push({ + role: "tool", + content: resultContent, + tool_call_id: tr.toolUseId, + }); + } + if (textBlocks.length > 0) { + messages.push({ + role: "user", + content: textBlocks.map((b) => b.text ?? "").join(""), + }); + } + continue; + } + + // Plain user message + const text = msg.content + .filter((b) => b.text !== undefined) + .map((b) => b.text ?? "") + .join(""); + messages.push({ role: "user", content: text }); + } else if (msg.role === "assistant") { + const toolUseBlocks = msg.content.filter((b) => b.toolUse); + const textContent = msg.content + .filter((b) => b.text !== undefined) + .map((b) => b.text ?? "") + .join(""); + + if (toolUseBlocks.length > 0) { + messages.push({ + role: "assistant", + content: textContent || null, + tool_calls: toolUseBlocks.map((b) => ({ + id: b.toolUse!.toolUseId, + type: "function" as const, + function: { + name: b.toolUse!.name, + arguments: JSON.stringify(b.toolUse!.input), + }, + })), + }); + } else { + messages.push({ role: "assistant", content: textContent || null }); + } + } + } + + // Convert tools + let tools: ToolDefinition[] | undefined; + if (req.toolConfig?.tools && req.toolConfig.tools.length > 0) { + tools = req.toolConfig.tools.map((t) => ({ + type: "function" as const, + function: { + name: t.toolSpec.name, + description: t.toolSpec.description, + parameters: t.toolSpec.inputSchema, + }, + })); + } + + return { + model: modelId, + messages, + stream: false, + temperature: req.inferenceConfig?.temperature, + tools, + }; +} + +// ─── Response builders ────────────────────────────────────────────────────── + +function buildConverseTextResponse(content: string): object { + return { + output: { + message: { + role: "assistant", + content: [{ text: content }], + }, + }, + stopReason: "end_turn", + usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }, + }; +} + +function buildConverseToolCallResponse(toolCalls: ToolCall[], logger: Logger): object { + return { + output: { + message: { + role: "assistant", + content: toolCalls.map((tc) => { + let argsObj: unknown; + try { + argsObj = JSON.parse(tc.arguments || "{}"); + } catch { + logger.warn( + `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`, + ); + argsObj = {}; + } + return { + toolUse: { + toolUseId: tc.id || generateToolUseId(), + name: tc.name, + input: argsObj, + }, + }; + }), + }, + }, + stopReason: "tool_use", + usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }, + }; +} + +// ─── Request handlers ─────────────────────────────────────────────────────── + +export async function handleConverse( + req: http.IncomingMessage, + res: http.ServerResponse, + raw: string, + modelId: string, + fixtures: Fixture[], + journal: Journal, + defaults: { + latency: number; + chunkSize: number; + logger: Logger; + chaos?: ChaosConfig; + registry?: MetricsRegistry; + record?: RecordConfig; + strict?: boolean; + }, + setCorsHeaders: (res: http.ServerResponse) => void, +): Promise { + const { logger } = defaults; + setCorsHeaders(res); + + const urlPath = req.url ?? `/model/${modelId}/converse`; + + let converseReq: ConverseRequest; + try { + converseReq = JSON.parse(raw) as ConverseRequest; + } catch { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: {} as ChatCompletionRequest, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { + message: "Malformed JSON", + type: "invalid_request_error", + }, + }), + ); + return; + } + + if (!converseReq.messages || !Array.isArray(converseReq.messages)) { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: {} as ChatCompletionRequest, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { + message: "Invalid request: messages array is required", + type: "invalid_request_error", + }, + }), + ); + return; + } + + const completionReq = converseToCompletionRequest(converseReq, modelId); + + const fixture = matchFixture(fixtures, completionReq, journal.fixtureMatchCounts); + + if (fixture) { + journal.incrementFixtureMatchCount(fixture, fixtures); + } + + if ( + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + }, + defaults.registry, + ) + ) + return; + + if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + completionReq, + "bedrock", + urlPath, + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + if (defaults.strict) { + logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`); + } + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: strictStatus, fixture: null }, + }); + writeErrorResponse( + res, + strictStatus, + JSON.stringify({ + error: { + message: strictMessage, + type: "invalid_request_error", + }, + }), + ); + return; + } + + const response = fixture.response; + + // Error response + if (isErrorResponse(response)) { + const status = response.status ?? 500; + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status, fixture }, + }); + writeErrorResponse(res, status, JSON.stringify(response)); + return; + } + + // Text response + if (isTextResponse(response)) { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 200, fixture }, + }); + const body = buildConverseTextResponse(response.content); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify(body)); + return; + } + + // Tool call response + if (isToolCallResponse(response)) { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 200, fixture }, + }); + const body = buildConverseToolCallResponse(response.toolCalls, logger); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify(body)); + return; + } + + // Unknown response type + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 500, fixture }, + }); + writeErrorResponse( + res, + 500, + JSON.stringify({ + error: { + message: "Fixture response did not match any known type", + type: "server_error", + }, + }), + ); +} + +export async function handleConverseStream( + req: http.IncomingMessage, + res: http.ServerResponse, + raw: string, + modelId: string, + fixtures: Fixture[], + journal: Journal, + defaults: { + latency: number; + chunkSize: number; + logger: Logger; + chaos?: ChaosConfig; + registry?: MetricsRegistry; + record?: RecordConfig; + strict?: boolean; + }, + setCorsHeaders: (res: http.ServerResponse) => void, +): Promise { + const { logger } = defaults; + setCorsHeaders(res); + + const urlPath = req.url ?? `/model/${modelId}/converse-stream`; + + let converseReq: ConverseRequest; + try { + converseReq = JSON.parse(raw) as ConverseRequest; + } catch { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: {} as ChatCompletionRequest, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { + message: "Malformed JSON", + type: "invalid_request_error", + }, + }), + ); + return; + } + + if (!converseReq.messages || !Array.isArray(converseReq.messages)) { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: {} as ChatCompletionRequest, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { + message: "Invalid request: messages array is required", + type: "invalid_request_error", + }, + }), + ); + return; + } + + const completionReq = converseToCompletionRequest(converseReq, modelId); + + const fixture = matchFixture(fixtures, completionReq, journal.fixtureMatchCounts); + + if (fixture) { + journal.incrementFixtureMatchCount(fixture, fixtures); + } + + if ( + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + }, + defaults.registry, + ) + ) + return; + + if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + completionReq, + "bedrock", + urlPath, + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + if (defaults.strict) { + logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`); + } + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: strictStatus, fixture: null }, + }); + writeErrorResponse( + res, + strictStatus, + JSON.stringify({ + error: { + message: strictMessage, + type: "invalid_request_error", + }, + }), + ); + return; + } + + const response = fixture.response; + const latency = fixture.latency ?? defaults.latency; + const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize); + + // Error response + if (isErrorResponse(response)) { + const status = response.status ?? 500; + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status, fixture }, + }); + writeErrorResponse(res, status, JSON.stringify(response)); + return; + } + + // Text response — stream as Event Stream + if (isTextResponse(response)) { + const journalEntry = journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 200, fixture }, + }); + const events = buildBedrockStreamTextEvents(response.content, chunkSize); + const interruption = createInterruptionSignal(fixture); + const completed = await writeEventStream(res, events, { + latency, + streamingProfile: fixture.streamingProfile, + signal: interruption?.signal, + onChunkSent: interruption?.tick, + }); + if (!completed) { + if (!res.writableEnded) res.destroy(); + journalEntry.response.interrupted = true; + journalEntry.response.interruptReason = interruption?.reason(); + } + interruption?.cleanup(); + return; + } + + // Tool call response — stream as Event Stream + if (isToolCallResponse(response)) { + const journalEntry = journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 200, fixture }, + }); + const events = buildBedrockStreamToolCallEvents(response.toolCalls, chunkSize, logger); + const interruption = createInterruptionSignal(fixture); + const completed = await writeEventStream(res, events, { + latency, + streamingProfile: fixture.streamingProfile, + signal: interruption?.signal, + onChunkSent: interruption?.tick, + }); + if (!completed) { + if (!res.writableEnded) res.destroy(); + journalEntry.response.interrupted = true; + journalEntry.response.interruptReason = interruption?.reason(); + } + interruption?.cleanup(); + return; + } + + // Unknown response type + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 500, fixture }, + }); + writeErrorResponse( + res, + 500, + JSON.stringify({ + error: { + message: "Fixture response did not match any known type", + type: "server_error", + }, + }), + ); +} diff --git a/src/bedrock.ts b/src/bedrock.ts index cee4bb7..3b8ffbf 100644 --- a/src/bedrock.ts +++ b/src/bedrock.ts @@ -13,6 +13,7 @@ import type { ChatCompletionRequest, ChatMessage, Fixture, + RecordConfig, ToolCall, ToolDefinition, } from "./types.js"; @@ -26,9 +27,12 @@ import { } from "./helpers.js"; import { matchFixture } from "./router.js"; import { writeErrorResponse } from "./sse-writer.js"; +import { writeEventStream } from "./aws-event-stream.js"; +import { createInterruptionSignal } from "./interruption.js"; import type { Journal } from "./journal.js"; import type { Logger } from "./logger.js"; import { applyChaos } from "./chaos.js"; +import { proxyAndRecord } from "./recorder.js"; // ─── Bedrock Claude request types ──────────────────────────────────────────── @@ -313,19 +317,48 @@ export async function handleBedrock( return; if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + completionReq, + "bedrock", + urlPath, + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + if (defaults.strict) { + logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`); + } journal.add({ method: req.method ?? "POST", path: urlPath, headers: flattenHeaders(req.headers), body: completionReq, - response: { status: 404, fixture: null }, + response: { status: strictStatus, fixture: null }, }); writeErrorResponse( res, - 404, + strictStatus, JSON.stringify({ error: { - message: "No fixture matched", + message: strictMessage, type: "invalid_request_error", }, }), @@ -406,3 +439,343 @@ export async function handleBedrock( }), ); } + +// ─── Streaming event builders ─────────────────────────────────────────────── + +export function buildBedrockStreamTextEvents( + content: string, + chunkSize: number, +): Array<{ eventType: string; payload: object }> { + const events: Array<{ eventType: string; payload: object }> = []; + + events.push({ + eventType: "messageStart", + payload: { role: "assistant" }, + }); + + events.push({ + eventType: "contentBlockStart", + payload: { contentBlockIndex: 0, start: {} }, + }); + + for (let i = 0; i < content.length; i += chunkSize) { + const slice = content.slice(i, i + chunkSize); + events.push({ + eventType: "contentBlockDelta", + payload: { + contentBlockIndex: 0, + delta: { type: "text_delta", text: slice }, + }, + }); + } + + events.push({ + eventType: "contentBlockStop", + payload: { contentBlockIndex: 0 }, + }); + + events.push({ + eventType: "messageStop", + payload: { stopReason: "end_turn" }, + }); + + return events; +} + +export function buildBedrockStreamToolCallEvents( + toolCalls: ToolCall[], + chunkSize: number, + logger: Logger, +): Array<{ eventType: string; payload: object }> { + const events: Array<{ eventType: string; payload: object }> = []; + + events.push({ + eventType: "messageStart", + payload: { role: "assistant" }, + }); + + for (let tcIdx = 0; tcIdx < toolCalls.length; tcIdx++) { + const tc = toolCalls[tcIdx]; + const toolUseId = tc.id || generateToolUseId(); + + events.push({ + eventType: "contentBlockStart", + payload: { + contentBlockIndex: tcIdx, + start: { + toolUse: { toolUseId, name: tc.name }, + }, + }, + }); + + let argsStr: string; + try { + const parsed = JSON.parse(tc.arguments || "{}"); + argsStr = JSON.stringify(parsed); + } catch { + logger.warn( + `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`, + ); + argsStr = "{}"; + } + + for (let i = 0; i < argsStr.length; i += chunkSize) { + const slice = argsStr.slice(i, i + chunkSize); + events.push({ + eventType: "contentBlockDelta", + payload: { + contentBlockIndex: tcIdx, + delta: { type: "input_json_delta", inputJSON: slice }, + }, + }); + } + + events.push({ + eventType: "contentBlockStop", + payload: { contentBlockIndex: tcIdx }, + }); + } + + events.push({ + eventType: "messageStop", + payload: { stopReason: "tool_use" }, + }); + + return events; +} + +// ─── Streaming request handler ────────────────────────────────────────────── + +export async function handleBedrockStream( + req: http.IncomingMessage, + res: http.ServerResponse, + raw: string, + modelId: string, + fixtures: Fixture[], + journal: Journal, + defaults: { + latency: number; + chunkSize: number; + logger: Logger; + chaos?: ChaosConfig; + registry?: MetricsRegistry; + record?: RecordConfig; + strict?: boolean; + }, + setCorsHeaders: (res: http.ServerResponse) => void, +): Promise { + const { logger } = defaults; + setCorsHeaders(res); + + const urlPath = req.url ?? `/model/${modelId}/invoke-with-response-stream`; + + let bedrockReq: BedrockRequest; + try { + bedrockReq = JSON.parse(raw) as BedrockRequest; + } catch { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: {} as ChatCompletionRequest, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { + message: "Malformed JSON", + type: "invalid_request_error", + }, + }), + ); + return; + } + + if (!bedrockReq.messages || !Array.isArray(bedrockReq.messages)) { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: {} as ChatCompletionRequest, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { + message: "Invalid request: messages array is required", + type: "invalid_request_error", + }, + }), + ); + return; + } + + const completionReq = bedrockToCompletionRequest(bedrockReq, modelId); + + const fixture = matchFixture(fixtures, completionReq, journal.fixtureMatchCounts); + + if (fixture) { + journal.incrementFixtureMatchCount(fixture, fixtures); + } + + if ( + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + }, + defaults.registry, + ) + ) + return; + + if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + completionReq, + "bedrock", + urlPath, + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + if (defaults.strict) { + logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`); + } + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: strictStatus, fixture: null }, + }); + writeErrorResponse( + res, + strictStatus, + JSON.stringify({ + error: { + message: strictMessage, + type: "invalid_request_error", + }, + }), + ); + return; + } + + const response = fixture.response; + const latency = fixture.latency ?? defaults.latency; + const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize); + + // Error response + if (isErrorResponse(response)) { + const status = response.status ?? 500; + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status, fixture }, + }); + writeErrorResponse(res, status, JSON.stringify(response)); + return; + } + + // Text response — stream as Event Stream + if (isTextResponse(response)) { + const journalEntry = journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 200, fixture }, + }); + const events = buildBedrockStreamTextEvents(response.content, chunkSize); + const interruption = createInterruptionSignal(fixture); + const completed = await writeEventStream(res, events, { + latency, + streamingProfile: fixture.streamingProfile, + signal: interruption?.signal, + onChunkSent: interruption?.tick, + }); + if (!completed) { + if (!res.writableEnded) res.destroy(); + journalEntry.response.interrupted = true; + journalEntry.response.interruptReason = interruption?.reason(); + } + interruption?.cleanup(); + return; + } + + // Tool call response — stream as Event Stream + if (isToolCallResponse(response)) { + const journalEntry = journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 200, fixture }, + }); + const events = buildBedrockStreamToolCallEvents(response.toolCalls, chunkSize, logger); + const interruption = createInterruptionSignal(fixture); + const completed = await writeEventStream(res, events, { + latency, + streamingProfile: fixture.streamingProfile, + signal: interruption?.signal, + onChunkSent: interruption?.tick, + }); + if (!completed) { + if (!res.writableEnded) res.destroy(); + journalEntry.response.interrupted = true; + journalEntry.response.interruptReason = interruption?.reason(); + } + interruption?.cleanup(); + return; + } + + // Unknown response type + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 500, fixture }, + }); + writeErrorResponse( + res, + 500, + JSON.stringify({ + error: { + message: "Fixture response did not match any known type", + type: "server_error", + }, + }), + ); +} diff --git a/src/chaos.ts b/src/chaos.ts index 7cdcdd3..05e130f 100644 --- a/src/chaos.ts +++ b/src/chaos.ts @@ -11,6 +11,7 @@ import type * as http from "node:http"; import type { ChaosConfig, ChatCompletionRequest, Fixture } from "./types.js"; import { writeErrorResponse } from "./sse-writer.js"; import type { Journal } from "./journal.js"; +import type { MetricsRegistry } from "./metrics.js"; export type ChaosAction = "drop" | "malformed" | "disconnect"; @@ -106,10 +107,15 @@ export function applyChaos( rawHeaders: http.IncomingHttpHeaders, journal: Journal, context: ChaosJournalContext, + registry?: MetricsRegistry, ): boolean { const action = evaluateChaos(fixture, serverDefaults, rawHeaders); if (!action) return false; + if (registry) { + registry.incrementCounter("llmock_chaos_triggered_total", { action }); + } + switch (action) { case "drop": { journal.add({ diff --git a/src/cli.ts b/src/cli.ts index d452b48..20b6e29 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -6,6 +6,7 @@ import { createServer } from "./server.js"; import { loadFixtureFile, loadFixturesFromDir, validateFixtures } from "./fixture-loader.js"; import { Logger, type LogLevel } from "./logger.js"; import { watchFixtures } from "./watcher.js"; +import type { ChaosConfig } from "./types.js"; const HELP = ` Usage: llmock [options] @@ -81,7 +82,6 @@ if (Number.isNaN(chunkSize) || chunkSize < 1) { const logger = new Logger(logLevel); // Parse chaos config from CLI flags -import type { ChaosConfig } from "./types.js"; let chaos: ChaosConfig | undefined; { const dropStr = values["chaos-drop"]; diff --git a/src/cohere.ts b/src/cohere.ts new file mode 100644 index 0000000..ba5099f --- /dev/null +++ b/src/cohere.ts @@ -0,0 +1,654 @@ +/** + * Cohere v2 Chat API endpoint support. + * + * Translates incoming /v2/chat requests into the ChatCompletionRequest + * format used by the fixture router, and converts fixture responses back into + * Cohere's typed SSE streaming (or non-streaming) format. + * + * Cohere uses typed SSE events (event: + data: lines), similar to the + * Claude Messages handler in messages.ts. + */ + +import type * as http from "node:http"; +import type { + ChaosConfig, + ChatCompletionRequest, + ChatMessage, + Fixture, + RecordConfig, + StreamingProfile, + ToolCall, + ToolDefinition, +} from "./types.js"; +import { + generateMessageId, + generateToolCallId, + isTextResponse, + isToolCallResponse, + isErrorResponse, + flattenHeaders, +} from "./helpers.js"; +import { matchFixture } from "./router.js"; +import { writeErrorResponse, delay, calculateDelay } from "./sse-writer.js"; +import { createInterruptionSignal } from "./interruption.js"; +import type { Journal } from "./journal.js"; +import type { Logger } from "./logger.js"; +import { applyChaos } from "./chaos.js"; +import type { MetricsRegistry } from "./metrics.js"; +import { proxyAndRecord } from "./recorder.js"; + +// ─── Cohere v2 Chat request types ─────────────────────────────────────────── + +interface CohereMessage { + role: "user" | "assistant" | "system" | "tool"; + content: string; + tool_call_id?: string; +} + +interface CohereToolDef { + type: string; + function: { + name: string; + description?: string; + parameters?: object; + }; +} + +interface CohereRequest { + model: string; + messages: CohereMessage[]; + stream?: boolean; + tools?: CohereToolDef[]; + response_format?: { type: string; json_schema?: object }; +} + +// ─── Cohere SSE event types ───────────────────────────────────────────────── + +interface CohereSSEEvent { + type: string; + [key: string]: unknown; +} + +// ─── Zero-value usage block ───────────────────────────────────────────────── + +const ZERO_USAGE = { + billed_units: { input_tokens: 0, output_tokens: 0, search_units: 0, classifications: 0 }, + tokens: { input_tokens: 0, output_tokens: 0 }, +}; + +// ─── Input conversion: Cohere → ChatCompletionRequest ─────────────────────── + +export function cohereToCompletionRequest(req: CohereRequest): ChatCompletionRequest { + const messages: ChatMessage[] = []; + + for (const msg of req.messages) { + if (msg.role === "system") { + messages.push({ role: "system", content: msg.content }); + } else if (msg.role === "user") { + messages.push({ role: "user", content: msg.content }); + } else if (msg.role === "assistant") { + messages.push({ role: "assistant", content: msg.content }); + } else if (msg.role === "tool") { + messages.push({ + role: "tool", + content: msg.content, + tool_call_id: msg.tool_call_id, + }); + } + } + + // Convert tools + let tools: ToolDefinition[] | undefined; + if (req.tools && req.tools.length > 0) { + tools = req.tools.map((t) => ({ + type: "function" as const, + function: { + name: t.function.name, + description: t.function.description, + parameters: t.function.parameters, + }, + })); + } + + return { + model: req.model, + messages, + stream: req.stream, + tools, + }; +} + +// ─── Response building: fixture → Cohere v2 Chat format ───────────────────── + +// Non-streaming text response +function buildCohereTextResponse(content: string): object { + return { + id: generateMessageId(), + finish_reason: "COMPLETE", + message: { + role: "assistant", + content: [{ type: "text", text: content }], + tool_calls: [], + tool_plan: "", + citations: [], + }, + usage: ZERO_USAGE, + }; +} + +// Non-streaming tool call response +function buildCohereToolCallResponse(toolCalls: ToolCall[], logger: Logger): object { + const cohereCalls = toolCalls.map((tc) => { + // Validate arguments JSON + try { + JSON.parse(tc.arguments || "{}"); + } catch { + logger.warn( + `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`, + ); + } + return { + id: tc.id || generateToolCallId(), + type: "function", + function: { + name: tc.name, + arguments: tc.arguments || "{}", + }, + }; + }); + + return { + id: generateMessageId(), + finish_reason: "TOOL_CALL", + message: { + role: "assistant", + content: [], + tool_calls: cohereCalls, + tool_plan: "", + citations: [], + }, + usage: ZERO_USAGE, + }; +} + +// ─── Streaming event builders ─────────────────────────────────────────────── + +function buildCohereTextStreamEvents(content: string, chunkSize: number): CohereSSEEvent[] { + const msgId = generateMessageId(); + const events: CohereSSEEvent[] = []; + + // message-start + events.push({ + id: msgId, + type: "message-start", + delta: { + message: { + role: "assistant", + content: [], + tool_plan: "", + tool_calls: [], + citations: [], + }, + }, + }); + + // content-start (type: "text" only, no text field) + events.push({ + type: "content-start", + index: 0, + delta: { + message: { + content: { type: "text" }, + }, + }, + }); + + // content-delta — text chunks + for (let i = 0; i < content.length; i += chunkSize) { + const slice = content.slice(i, i + chunkSize); + events.push({ + type: "content-delta", + index: 0, + delta: { + message: { + content: { type: "text", text: slice }, + }, + }, + }); + } + + // content-end + events.push({ + type: "content-end", + index: 0, + }); + + // message-end + events.push({ + type: "message-end", + delta: { + finish_reason: "COMPLETE", + usage: ZERO_USAGE, + }, + }); + + return events; +} + +function buildCohereToolCallStreamEvents( + toolCalls: ToolCall[], + chunkSize: number, + logger: Logger, +): CohereSSEEvent[] { + const msgId = generateMessageId(); + const events: CohereSSEEvent[] = []; + + // message-start + events.push({ + id: msgId, + type: "message-start", + delta: { + message: { + role: "assistant", + content: [], + tool_plan: "", + tool_calls: [], + citations: [], + }, + }, + }); + + // tool-plan-delta + events.push({ + type: "tool-plan-delta", + delta: { + message: { + tool_plan: "I will use the requested tool.", + }, + }, + }); + + for (let idx = 0; idx < toolCalls.length; idx++) { + const tc = toolCalls[idx]; + const callId = tc.id || generateToolCallId(); + + // Validate arguments JSON + let argsJson: string; + try { + JSON.parse(tc.arguments || "{}"); + argsJson = tc.arguments || "{}"; + } catch { + logger.warn( + `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`, + ); + argsJson = "{}"; + } + + // tool-call-start + events.push({ + type: "tool-call-start", + index: idx, + delta: { + message: { + tool_calls: { + id: callId, + type: "function", + function: { + name: tc.name, + arguments: "", + }, + }, + }, + }, + }); + + // tool-call-delta — chunked arguments + for (let i = 0; i < argsJson.length; i += chunkSize) { + const slice = argsJson.slice(i, i + chunkSize); + events.push({ + type: "tool-call-delta", + index: idx, + delta: { + message: { + tool_calls: { + function: { + arguments: slice, + }, + }, + }, + }, + }); + } + + // tool-call-end + events.push({ + type: "tool-call-end", + index: idx, + }); + } + + // message-end + events.push({ + type: "message-end", + delta: { + finish_reason: "TOOL_CALL", + usage: ZERO_USAGE, + }, + }); + + return events; +} + +// ─── SSE writer for Cohere typed events ───────────────────────────────────── + +interface CohereStreamOptions { + latency?: number; + streamingProfile?: StreamingProfile; + signal?: AbortSignal; + onChunkSent?: () => void; +} + +async function writeCohereSSEStream( + res: http.ServerResponse, + events: CohereSSEEvent[], + optionsOrLatency?: number | CohereStreamOptions, +): Promise { + const opts: CohereStreamOptions = + typeof optionsOrLatency === "number" ? { latency: optionsOrLatency } : (optionsOrLatency ?? {}); + const latency = opts.latency ?? 0; + const profile = opts.streamingProfile; + const signal = opts.signal; + const onChunkSent = opts.onChunkSent; + + if (res.writableEnded) return true; + res.setHeader("Content-Type", "text/event-stream"); + res.setHeader("Cache-Control", "no-cache"); + res.setHeader("Connection", "keep-alive"); + + let chunkIndex = 0; + for (const event of events) { + const chunkDelay = calculateDelay(chunkIndex, profile, latency); + if (chunkDelay > 0) await delay(chunkDelay, signal); + if (signal?.aborted) return false; + if (res.writableEnded) return true; + res.write(`event: ${event.type}\ndata: ${JSON.stringify(event)}\n\n`); + onChunkSent?.(); + if (signal?.aborted) return false; + chunkIndex++; + } + + if (!res.writableEnded) { + res.end(); + } + return true; +} + +// ─── Request handler ──────────────────────────────────────────────────────── + +export async function handleCohere( + req: http.IncomingMessage, + res: http.ServerResponse, + raw: string, + fixtures: Fixture[], + journal: Journal, + defaults: { + latency: number; + chunkSize: number; + logger: Logger; + chaos?: ChaosConfig; + registry?: MetricsRegistry; + record?: RecordConfig; + strict?: boolean; + }, + setCorsHeaders: (res: http.ServerResponse) => void, +): Promise { + const { logger } = defaults; + setCorsHeaders(res); + + let cohereReq: CohereRequest; + try { + cohereReq = JSON.parse(raw) as CohereRequest; + } catch { + journal.add({ + method: req.method ?? "POST", + path: req.url ?? "/v2/chat", + headers: flattenHeaders(req.headers), + body: {} as ChatCompletionRequest, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { + message: "Malformed JSON", + type: "invalid_request_error", + }, + }), + ); + return; + } + + // Validate required model field + if (!cohereReq.model) { + journal.add({ + method: req.method ?? "POST", + path: req.url ?? "/v2/chat", + headers: flattenHeaders(req.headers), + body: {} as ChatCompletionRequest, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { + message: "model is required", + type: "invalid_request_error", + }, + }), + ); + return; + } + + if (!cohereReq.messages || !Array.isArray(cohereReq.messages)) { + journal.add({ + method: req.method ?? "POST", + path: req.url ?? "/v2/chat", + headers: flattenHeaders(req.headers), + body: {} as ChatCompletionRequest, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { + message: "Invalid request: messages array is required", + type: "invalid_request_error", + }, + }), + ); + return; + } + + // Convert to ChatCompletionRequest for fixture matching + const completionReq = cohereToCompletionRequest(cohereReq); + + const fixture = matchFixture(fixtures, completionReq, journal.fixtureMatchCounts); + + if (fixture) { + journal.incrementFixtureMatchCount(fixture, fixtures); + } + + if ( + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { + method: req.method ?? "POST", + path: req.url ?? "/v2/chat", + headers: flattenHeaders(req.headers), + body: completionReq, + }, + defaults.registry, + ) + ) + return; + + if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + completionReq, + "cohere", + req.url ?? "/v2/chat", + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method: req.method ?? "POST", + path: req.url ?? "/v2/chat", + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + if (defaults.strict) { + logger.error( + `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v2/chat"}`, + ); + } + journal.add({ + method: req.method ?? "POST", + path: req.url ?? "/v2/chat", + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: strictStatus, fixture: null }, + }); + writeErrorResponse( + res, + strictStatus, + JSON.stringify({ + error: { + message: strictMessage, + type: "invalid_request_error", + }, + }), + ); + return; + } + + const response = fixture.response; + const latency = fixture.latency ?? defaults.latency; + const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize); + + // Error response + if (isErrorResponse(response)) { + const status = response.status ?? 500; + journal.add({ + method: req.method ?? "POST", + path: req.url ?? "/v2/chat", + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status, fixture }, + }); + writeErrorResponse(res, status, JSON.stringify(response)); + return; + } + + // Text response + if (isTextResponse(response)) { + const journalEntry = journal.add({ + method: req.method ?? "POST", + path: req.url ?? "/v2/chat", + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 200, fixture }, + }); + if (cohereReq.stream !== true) { + const body = buildCohereTextResponse(response.content); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify(body)); + } else { + const events = buildCohereTextStreamEvents(response.content, chunkSize); + const interruption = createInterruptionSignal(fixture); + const completed = await writeCohereSSEStream(res, events, { + latency, + streamingProfile: fixture.streamingProfile, + signal: interruption?.signal, + onChunkSent: interruption?.tick, + }); + if (!completed) { + if (!res.writableEnded) res.destroy(); + journalEntry.response.interrupted = true; + journalEntry.response.interruptReason = interruption?.reason(); + } + interruption?.cleanup(); + } + return; + } + + // Tool call response + if (isToolCallResponse(response)) { + const journalEntry = journal.add({ + method: req.method ?? "POST", + path: req.url ?? "/v2/chat", + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 200, fixture }, + }); + if (cohereReq.stream !== true) { + const body = buildCohereToolCallResponse(response.toolCalls, logger); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify(body)); + } else { + const events = buildCohereToolCallStreamEvents(response.toolCalls, chunkSize, logger); + const interruption = createInterruptionSignal(fixture); + const completed = await writeCohereSSEStream(res, events, { + latency, + streamingProfile: fixture.streamingProfile, + signal: interruption?.signal, + onChunkSent: interruption?.tick, + }); + if (!completed) { + if (!res.writableEnded) res.destroy(); + journalEntry.response.interrupted = true; + journalEntry.response.interruptReason = interruption?.reason(); + } + interruption?.cleanup(); + } + return; + } + + // Unknown response type + journal.add({ + method: req.method ?? "POST", + path: req.url ?? "/v2/chat", + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 500, fixture }, + }); + writeErrorResponse( + res, + 500, + JSON.stringify({ + error: { + message: "Fixture response did not match any known type", + type: "server_error", + }, + }), + ); +} diff --git a/src/embeddings.ts b/src/embeddings.ts index d28d1e7..b86577a 100644 --- a/src/embeddings.ts +++ b/src/embeddings.ts @@ -20,6 +20,7 @@ import { writeErrorResponse } from "./sse-writer.js"; import type { Journal } from "./journal.js"; import type { Logger } from "./logger.js"; import { applyChaos } from "./chaos.js"; +import { proxyAndRecord } from "./recorder.js"; // ─── Embeddings API request types ────────────────────────────────────────── @@ -157,6 +158,55 @@ export async function handleEmbeddings( return; } + // No fixture match — try record-and-replay proxy if configured + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + syntheticReq, + "openai", + req.url ?? "/v1/embeddings", + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method: req.method ?? "POST", + path: req.url ?? "/v1/embeddings", + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + + if (defaults.strict) { + logger.error( + `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v1/embeddings"}`, + ); + journal.add({ + method: req.method ?? "POST", + path: req.url ?? "/v1/embeddings", + headers: flattenHeaders(req.headers), + body: syntheticReq, + response: { status: 503, fixture: null }, + }); + writeErrorResponse( + res, + 503, + JSON.stringify({ + error: { + message: "Strict mode: no fixture matched", + type: "invalid_request_error", + code: "no_fixture_match", + }, + }), + ); + return; + } + // No fixture match — generate deterministic embeddings from input text logger.warn( `No embedding fixture matched for "${combinedInput.slice(0, 80)}" — returning deterministic fallback`, diff --git a/src/gemini.ts b/src/gemini.ts index e61e34c..9e5f096 100644 --- a/src/gemini.ts +++ b/src/gemini.ts @@ -29,6 +29,7 @@ import { createInterruptionSignal } from "./interruption.js"; import type { Journal } from "./journal.js"; import type { Logger } from "./logger.js"; import { applyChaos } from "./chaos.js"; +import { proxyAndRecord } from "./recorder.js"; // ─── Gemini request types ─────────────────────────────────────────────────── @@ -380,6 +381,7 @@ export async function handleGemini( journal: Journal, defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig }, setCorsHeaders: (res: http.ServerResponse) => void, + providerKey: string = "gemini", ): Promise { const { logger } = defaults; setCorsHeaders(res); @@ -430,21 +432,50 @@ export async function handleGemini( return; if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + completionReq, + providerKey, + path, + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method: req.method ?? "POST", + path, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + if (defaults.strict) { + logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${path}`); + } journal.add({ method: req.method ?? "POST", path, headers: flattenHeaders(req.headers), body: completionReq, - response: { status: 404, fixture: null }, + response: { status: strictStatus, fixture: null }, }); writeErrorResponse( res, - 404, + strictStatus, JSON.stringify({ error: { - message: "No fixture matched", - code: 404, - status: "NOT_FOUND", + message: strictMessage, + code: strictStatus, + status: defaults.strict ? "UNAVAILABLE" : "NOT_FOUND", }, }), ); diff --git a/src/helpers.ts b/src/helpers.ts index d141198..ae48a19 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -11,11 +11,17 @@ import type { ChatCompletion, } from "./types.js"; +const REDACTED_HEADERS = new Set(["authorization", "x-api-key", "api-key"]); + export function flattenHeaders(headers: http.IncomingHttpHeaders): Record { const flat: Record = {}; for (const [key, value] of Object.entries(headers)) { if (value === undefined) continue; - flat[key] = Array.isArray(value) ? value.join(", ") : value; + if (REDACTED_HEADERS.has(key.toLowerCase())) { + flat[key] = "[REDACTED]"; + } else { + flat[key] = Array.isArray(value) ? value.join(", ") : value; + } } return flat; } diff --git a/src/index.ts b/src/index.ts index 773fb16..a770b96 100644 --- a/src/index.ts +++ b/src/index.ts @@ -24,7 +24,35 @@ export type { ResponsesSSEEvent } from "./responses.js"; export { handleMessages } from "./messages.js"; export { handleGemini } from "./gemini.js"; export { handleEmbeddings } from "./embeddings.js"; -export { handleBedrock, bedrockToCompletionRequest } from "./bedrock.js"; +export { handleBedrock, bedrockToCompletionRequest, handleBedrockStream } from "./bedrock.js"; + +// Bedrock Converse +export { + handleConverse, + handleConverseStream, + converseToCompletionRequest, +} from "./bedrock-converse.js"; + +// AWS Event Stream +export { + encodeEventStreamFrame, + encodeEventStreamMessage, + writeEventStream, +} from "./aws-event-stream.js"; + +// Metrics +export { createMetricsRegistry, normalizePathLabel } from "./metrics.js"; +export type { MetricsRegistry } from "./metrics.js"; + +// NDJSON +export { writeNDJSONStream } from "./ndjson-writer.js"; +export type { NDJSONStreamOptions } from "./ndjson-writer.js"; + +// Ollama +export { handleOllama, handleOllamaGenerate, ollamaToCompletionRequest } from "./ollama.js"; + +// Cohere +export { handleCohere, cohereToCompletionRequest } from "./cohere.js"; // WebSocket export { WebSocketConnection, upgradeToWebSocket, computeAcceptKey } from "./ws-framing.js"; diff --git a/src/llmock.ts b/src/llmock.ts index eefc88f..d338dcd 100644 --- a/src/llmock.ts +++ b/src/llmock.ts @@ -6,6 +6,7 @@ import type { FixtureOpts, FixtureResponse, MockServerOptions, + RecordConfig, } from "./types.js"; import { createServer, type ServerInstance } from "./server.js"; import { loadFixtureFile, loadFixturesFromDir } from "./fixture-loader.js"; @@ -168,6 +169,20 @@ export class LLMock { return this; } + // ---- Recording ---- + + enableRecording(config: RecordConfig): this { + this.options.record = config; + if (this.serverInstance) this.serverInstance.defaults.record = config; + return this; + } + + disableRecording(): this { + delete this.options.record; + if (this.serverInstance) delete this.serverInstance.defaults.record; + return this; + } + // ---- Reset ---- reset(): this { diff --git a/src/messages.ts b/src/messages.ts index bcc8f5c..5fb38d2 100644 --- a/src/messages.ts +++ b/src/messages.ts @@ -30,6 +30,7 @@ import { createInterruptionSignal } from "./interruption.js"; import type { Journal } from "./journal.js"; import type { Logger } from "./logger.js"; import { applyChaos } from "./chaos.js"; +import { proxyAndRecord } from "./recorder.js"; // ─── Claude Messages API request types ────────────────────────────────────── @@ -480,19 +481,50 @@ export async function handleMessages( return; if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + completionReq, + "anthropic", + req.url ?? "/v1/messages", + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method: req.method ?? "POST", + path: req.url ?? "/v1/messages", + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + if (defaults.strict) { + logger.error( + `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v1/messages"}`, + ); + } journal.add({ method: req.method ?? "POST", path: req.url ?? "/v1/messages", headers: flattenHeaders(req.headers), body: completionReq, - response: { status: 404, fixture: null }, + response: { status: strictStatus, fixture: null }, }); writeErrorResponse( res, - 404, + strictStatus, JSON.stringify({ error: { - message: "No fixture matched", + message: strictMessage, type: "invalid_request_error", }, }), diff --git a/src/metrics.ts b/src/metrics.ts new file mode 100644 index 0000000..48b71a3 --- /dev/null +++ b/src/metrics.ts @@ -0,0 +1,256 @@ +/** + * Lightweight Prometheus metrics registry for LLMock. + * + * Zero external dependencies — implements counters, histograms, and gauges + * with Prometheus text exposition format serialization. + */ + +// --------------------------------------------------------------------------- +// Public interface +// --------------------------------------------------------------------------- + +export interface MetricsRegistry { + incrementCounter(name: string, labels: Record): void; + observeHistogram(name: string, labels: Record, value: number): void; + setGauge(name: string, labels: Record, value: number): void; + serialize(): string; + reset(): void; +} + +// --------------------------------------------------------------------------- +// Histogram bucket boundaries (Prometheus default-ish) +// --------------------------------------------------------------------------- + +const HISTOGRAM_BUCKETS = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]; + +// --------------------------------------------------------------------------- +// Internal helpers +// --------------------------------------------------------------------------- + +/** Build a stable label key string for map lookups: `label1="v1",label2="v2"` */ +function labelKey(labels: Record): string { + const entries = Object.entries(labels).sort(([a], [b]) => a.localeCompare(b)); + if (entries.length === 0) return ""; + return entries.map(([k, v]) => `${k}="${escapeLabelValue(v)}"`).join(","); +} + +/** Escape a label value per Prometheus text exposition format. */ +function escapeLabelValue(v: string): string { + return v.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n"); +} + +/** Format labels for Prometheus output: `{label1="v1",label2="v2"}` */ +function formatLabels(labels: Record): string { + return `{${labelKey(labels)}}`; +} + +// --------------------------------------------------------------------------- +// Internal metric storage types +// --------------------------------------------------------------------------- + +interface CounterData { + type: "counter"; + /** Map from labelKey → value */ + series: Map; value: number }>; +} + +interface HistogramData { + type: "histogram"; + /** Map from labelKey → bucket counts, sum, count */ + series: Map< + string, + { + labels: Record; + bucketCounts: number[]; // one per HISTOGRAM_BUCKETS entry + sum: number; + count: number; + } + >; +} + +interface GaugeData { + type: "gauge"; + /** Map from labelKey → value */ + series: Map; value: number }>; +} + +type MetricData = CounterData | HistogramData | GaugeData; + +// --------------------------------------------------------------------------- +// Registry implementation +// --------------------------------------------------------------------------- + +export function createMetricsRegistry(): MetricsRegistry { + /** Ordered map: metric name → data. Insertion order preserved for stable output. */ + const metrics = new Map(); + + function getOrCreateCounter(name: string): CounterData { + let data = metrics.get(name); + if (!data) { + data = { type: "counter", series: new Map() }; + metrics.set(name, data); + } + if (data.type !== "counter") throw new Error(`Metric ${name} is not a counter`); + return data as CounterData; + } + + function getOrCreateHistogram(name: string): HistogramData { + let data = metrics.get(name); + if (!data) { + data = { type: "histogram", series: new Map() }; + metrics.set(name, data); + } + if (data.type !== "histogram") throw new Error(`Metric ${name} is not a histogram`); + return data as HistogramData; + } + + function getOrCreateGauge(name: string): GaugeData { + let data = metrics.get(name); + if (!data) { + data = { type: "gauge", series: new Map() }; + metrics.set(name, data); + } + if (data.type !== "gauge") throw new Error(`Metric ${name} is not a gauge`); + return data as GaugeData; + } + + return { + incrementCounter(name: string, labels: Record): void { + const counter = getOrCreateCounter(name); + const key = labelKey(labels); + const existing = counter.series.get(key); + if (existing) { + existing.value += 1; + } else { + counter.series.set(key, { labels, value: 1 }); + } + }, + + observeHistogram(name: string, labels: Record, value: number): void { + const histogram = getOrCreateHistogram(name); + const key = labelKey(labels); + let existing = histogram.series.get(key); + if (!existing) { + existing = { + labels, + bucketCounts: new Array(HISTOGRAM_BUCKETS.length).fill(0) as number[], + sum: 0, + count: 0, + }; + histogram.series.set(key, existing); + } + // Update cumulative bucket counts + for (let i = 0; i < HISTOGRAM_BUCKETS.length; i++) { + if (value <= HISTOGRAM_BUCKETS[i]) { + existing.bucketCounts[i] += 1; + } + } + existing.sum += value; + existing.count += 1; + }, + + setGauge(name: string, labels: Record, value: number): void { + const gauge = getOrCreateGauge(name); + const key = labelKey(labels); + const existing = gauge.series.get(key); + if (existing) { + existing.value = value; + } else { + gauge.series.set(key, { labels, value }); + } + }, + + serialize(): string { + const lines: string[] = []; + + for (const [name, data] of metrics) { + switch (data.type) { + case "counter": { + lines.push(`# TYPE ${name} counter`); + for (const series of data.series.values()) { + lines.push(`${name}${formatLabels(series.labels)} ${series.value}`); + } + break; + } + case "histogram": { + lines.push(`# TYPE ${name} histogram`); + for (const series of data.series.values()) { + const lblStr = labelKey(series.labels); + const lblPrefix = lblStr ? `${lblStr},` : ""; + // Bucket lines + for (let i = 0; i < HISTOGRAM_BUCKETS.length; i++) { + lines.push( + `${name}_bucket{${lblPrefix}le="${HISTOGRAM_BUCKETS[i]}"} ${series.bucketCounts[i]}`, + ); + } + // +Inf bucket + lines.push(`${name}_bucket{${lblPrefix}le="+Inf"} ${series.count}`); + // Sum and count + lines.push(`${name}_sum${formatLabels(series.labels)} ${series.sum}`); + lines.push(`${name}_count${formatLabels(series.labels)} ${series.count}`); + } + break; + } + case "gauge": { + lines.push(`# TYPE ${name} gauge`); + for (const series of data.series.values()) { + lines.push(`${name}${formatLabels(series.labels)} ${series.value}`); + } + break; + } + } + } + + return lines.length > 0 ? lines.join("\n") + "\n" : ""; + }, + + reset(): void { + metrics.clear(); + }, + }; +} + +// --------------------------------------------------------------------------- +// Path normalization for metric labels +// --------------------------------------------------------------------------- + +// Regex patterns for parametric API routes +const BEDROCK_RE = + /^\/model\/([^/]+)\/(invoke|invoke-with-response-stream|converse|converse-stream)$/; +const GEMINI_RE = /^\/v1beta\/models\/([^:]+):(generateContent|streamGenerateContent)$/; +const AZURE_RE = /^\/openai\/deployments\/([^/]+)\/(chat\/completions|embeddings)$/; +const VERTEX_RE = + /^\/v1\/projects\/([^/]+)\/locations\/([^/]+)\/publishers\/google\/models\/([^:]+):(.+)$/; + +/** + * Normalize parametric API paths to route patterns for use as metric labels. + * Replaces dynamic segments (model IDs, deployment names, etc.) with placeholders. + */ +export function normalizePathLabel(pathname: string): string { + // Bedrock: /model/{modelId}/{operation} + const bedrockMatch = pathname.match(BEDROCK_RE); + if (bedrockMatch) { + return `/model/{modelId}/${bedrockMatch[2]}`; + } + + // Gemini: /v1beta/models/{model}:{action} + const geminiMatch = pathname.match(GEMINI_RE); + if (geminiMatch) { + return `/v1beta/models/{model}:${geminiMatch[2]}`; + } + + // Azure: /openai/deployments/{id}/{operation} + const azureMatch = pathname.match(AZURE_RE); + if (azureMatch) { + return `/openai/deployments/{id}/${azureMatch[2]}`; + } + + // Vertex AI: /v1/projects/{p}/locations/{l}/publishers/google/models/{m}:{action} + const vertexMatch = pathname.match(VERTEX_RE); + if (vertexMatch) { + return `/v1/projects/{p}/locations/{l}/publishers/google/models/{m}:${vertexMatch[4]}`; + } + + // Static path — return as-is + return pathname; +} diff --git a/src/ndjson-writer.ts b/src/ndjson-writer.ts new file mode 100644 index 0000000..1e2ab7d --- /dev/null +++ b/src/ndjson-writer.ts @@ -0,0 +1,53 @@ +/** + * NDJSON streaming writer for Ollama endpoints. + * + * Mirrors writeSSEStream from sse-writer.ts but writes newline-delimited JSON + * (one JSON object per line) instead of SSE events. + */ + +import type * as http from "node:http"; +import type { StreamingProfile } from "./types.js"; +import { delay, calculateDelay } from "./sse-writer.js"; + +export interface NDJSONStreamOptions { + latency?: number; + streamingProfile?: StreamingProfile; + signal?: AbortSignal; + onChunkSent?: () => void; +} + +export async function writeNDJSONStream( + res: http.ServerResponse, + chunks: object[], + options?: NDJSONStreamOptions, +): Promise { + const opts = options ?? {}; + const latency = opts.latency ?? 0; + const profile = opts.streamingProfile; + const signal = opts.signal; + const onChunkSent = opts.onChunkSent; + + if (res.writableEnded) return true; + res.setHeader("Content-Type", "application/x-ndjson"); + res.setHeader("Cache-Control", "no-cache"); + res.setHeader("Connection", "keep-alive"); + + let chunkIndex = 0; + for (const chunk of chunks) { + const chunkDelay = calculateDelay(chunkIndex, profile, latency); + if (chunkDelay > 0) { + await delay(chunkDelay, signal); + } + if (signal?.aborted) return false; + if (res.writableEnded) return true; + res.write(JSON.stringify(chunk) + "\n"); + onChunkSent?.(); + if (signal?.aborted) return false; + chunkIndex++; + } + + if (!res.writableEnded) { + res.end(); + } + return true; +} diff --git a/src/ollama.ts b/src/ollama.ts new file mode 100644 index 0000000..0ddcc62 --- /dev/null +++ b/src/ollama.ts @@ -0,0 +1,754 @@ +/** + * Ollama API endpoint support. + * + * Translates incoming /api/chat and /api/generate requests into the + * ChatCompletionRequest format used by the fixture router, and converts + * fixture responses back into Ollama's NDJSON streaming or non-streaming format. + * + * Key differences from OpenAI: + * - Ollama defaults to stream: true (opposite of OpenAI) + * - Streaming uses NDJSON, not SSE + * - Tool call arguments are objects, not JSON strings + * - Tool calls have no id field + */ + +import type * as http from "node:http"; +import type { + ChaosConfig, + ChatCompletionRequest, + ChatMessage, + Fixture, + RecordConfig, + ToolCall, + ToolDefinition, +} from "./types.js"; +import { isTextResponse, isToolCallResponse, isErrorResponse, flattenHeaders } from "./helpers.js"; +import { matchFixture } from "./router.js"; +import { writeErrorResponse } from "./sse-writer.js"; +import { writeNDJSONStream } from "./ndjson-writer.js"; +import { createInterruptionSignal } from "./interruption.js"; +import type { Journal } from "./journal.js"; +import type { Logger } from "./logger.js"; +import { applyChaos } from "./chaos.js"; +import type { MetricsRegistry } from "./metrics.js"; +import { proxyAndRecord } from "./recorder.js"; + +// ─── Ollama request types ──────────────────────────────────────────────────── + +interface OllamaMessage { + role: string; + content: string; +} + +interface OllamaToolDef { + type: string; + function: { + name: string; + description?: string; + parameters?: object; + }; +} + +interface OllamaRequest { + model: string; + messages: OllamaMessage[]; + stream?: boolean; // default true! + options?: { temperature?: number; num_predict?: number }; + tools?: OllamaToolDef[]; +} + +interface OllamaGenerateRequest { + model: string; + prompt: string; + stream?: boolean; // default true! + options?: { temperature?: number; num_predict?: number }; +} + +// ─── Duration fields (zeroed, required on final/non-streaming responses) ──── + +const DURATION_FIELDS = { + done_reason: "stop" as const, + total_duration: 0, + load_duration: 0, + prompt_eval_count: 0, + prompt_eval_duration: 0, + eval_count: 0, + eval_duration: 0, +}; + +// ─── Input conversion: Ollama → ChatCompletionRequest ──────────────────────── + +export function ollamaToCompletionRequest(req: OllamaRequest): ChatCompletionRequest { + const messages: ChatMessage[] = []; + + for (const msg of req.messages) { + messages.push({ + role: msg.role as ChatMessage["role"], + content: msg.content, + }); + } + + // Convert tools + let tools: ToolDefinition[] | undefined; + if (req.tools && req.tools.length > 0) { + tools = req.tools.map((t) => ({ + type: "function" as const, + function: { + name: t.function.name, + description: t.function.description, + parameters: t.function.parameters, + }, + })); + } + + return { + model: req.model, + messages, + stream: req.stream, + temperature: req.options?.temperature, + max_tokens: req.options?.num_predict, + tools, + }; +} + +function ollamaGenerateToCompletionRequest(req: OllamaGenerateRequest): ChatCompletionRequest { + return { + model: req.model, + messages: [{ role: "user", content: req.prompt }], + stream: req.stream, + temperature: req.options?.temperature, + max_tokens: req.options?.num_predict, + }; +} + +// ─── Response builders: /api/chat ──────────────────────────────────────────── + +function buildOllamaChatTextChunks(content: string, model: string, chunkSize: number): object[] { + const chunks: object[] = []; + + for (let i = 0; i < content.length; i += chunkSize) { + const slice = content.slice(i, i + chunkSize); + chunks.push({ + model, + message: { role: "assistant", content: slice }, + done: false, + }); + } + + // Final chunk with done: true and all duration fields + chunks.push({ + model, + message: { role: "assistant", content: "" }, + done: true, + ...DURATION_FIELDS, + }); + + return chunks; +} + +function buildOllamaChatTextResponse(content: string, model: string): object { + return { + model, + message: { role: "assistant", content }, + done: true, + ...DURATION_FIELDS, + }; +} + +function buildOllamaChatToolCallChunks( + toolCalls: ToolCall[], + model: string, + logger: Logger, +): object[] { + const ollamaToolCalls = toolCalls.map((tc) => { + let argsObj: unknown; + try { + argsObj = JSON.parse(tc.arguments || "{}"); + } catch { + logger.warn( + `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`, + ); + argsObj = {}; + } + return { + function: { + name: tc.name, + arguments: argsObj, + }, + }; + }); + + // Tool calls are sent in a single chunk (no streaming of individual args) + const chunks: object[] = []; + chunks.push({ + model, + message: { + role: "assistant", + content: "", + tool_calls: ollamaToolCalls, + }, + done: false, + }); + + // Final chunk + chunks.push({ + model, + message: { role: "assistant", content: "" }, + done: true, + ...DURATION_FIELDS, + }); + + return chunks; +} + +function buildOllamaChatToolCallResponse( + toolCalls: ToolCall[], + model: string, + logger: Logger, +): object { + const ollamaToolCalls = toolCalls.map((tc) => { + let argsObj: unknown; + try { + argsObj = JSON.parse(tc.arguments || "{}"); + } catch { + logger.warn( + `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`, + ); + argsObj = {}; + } + return { + function: { + name: tc.name, + arguments: argsObj, + }, + }; + }); + + return { + model, + message: { + role: "assistant", + content: "", + tool_calls: ollamaToolCalls, + }, + done: true, + ...DURATION_FIELDS, + }; +} + +// ─── Response builders: /api/generate ──────────────────────────────────────── + +function buildOllamaGenerateTextChunks( + content: string, + model: string, + chunkSize: number, +): object[] { + const chunks: object[] = []; + const createdAt = new Date().toISOString(); + + for (let i = 0; i < content.length; i += chunkSize) { + const slice = content.slice(i, i + chunkSize); + chunks.push({ + model, + created_at: createdAt, + response: slice, + done: false, + }); + } + + // Final chunk + chunks.push({ + model, + created_at: createdAt, + response: "", + done: true, + ...DURATION_FIELDS, + context: [], + }); + + return chunks; +} + +function buildOllamaGenerateTextResponse(content: string, model: string): object { + return { + model, + created_at: new Date().toISOString(), + response: content, + done: true, + ...DURATION_FIELDS, + context: [], + }; +} + +// ─── Request handler: /api/chat ────────────────────────────────────────────── + +export async function handleOllama( + req: http.IncomingMessage, + res: http.ServerResponse, + raw: string, + fixtures: Fixture[], + journal: Journal, + defaults: { + latency: number; + chunkSize: number; + logger: Logger; + chaos?: ChaosConfig; + registry?: MetricsRegistry; + record?: RecordConfig; + strict?: boolean; + }, + setCorsHeaders: (res: http.ServerResponse) => void, +): Promise { + const { logger } = defaults; + setCorsHeaders(res); + + const urlPath = req.url ?? "/api/chat"; + + let ollamaReq: OllamaRequest; + try { + ollamaReq = JSON.parse(raw) as OllamaRequest; + } catch { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: {} as ChatCompletionRequest, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { + message: "Malformed JSON", + type: "invalid_request_error", + }, + }), + ); + return; + } + + if (!ollamaReq.messages || !Array.isArray(ollamaReq.messages)) { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: {} as ChatCompletionRequest, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { + message: "Invalid request: messages array is required", + type: "invalid_request_error", + }, + }), + ); + return; + } + + // Convert to ChatCompletionRequest for fixture matching + const completionReq = ollamaToCompletionRequest(ollamaReq); + + const fixture = matchFixture(fixtures, completionReq, journal.fixtureMatchCounts); + + if (fixture) { + journal.incrementFixtureMatchCount(fixture, fixtures); + } + + if ( + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + }, + defaults.registry, + ) + ) + return; + + if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + completionReq, + "ollama", + urlPath, + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + if (defaults.strict) { + logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`); + } + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: strictStatus, fixture: null }, + }); + writeErrorResponse( + res, + strictStatus, + JSON.stringify({ + error: { + message: strictMessage, + type: "invalid_request_error", + }, + }), + ); + return; + } + + const response = fixture.response; + const latency = fixture.latency ?? defaults.latency; + const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize); + + // Ollama defaults to streaming when stream is absent or true + const streaming = ollamaReq.stream !== false; + + // Error response + if (isErrorResponse(response)) { + const status = response.status ?? 500; + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status, fixture }, + }); + writeErrorResponse(res, status, JSON.stringify(response)); + return; + } + + // Text response + if (isTextResponse(response)) { + const journalEntry = journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 200, fixture }, + }); + if (!streaming) { + const body = buildOllamaChatTextResponse(response.content, completionReq.model); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify(body)); + } else { + const chunks = buildOllamaChatTextChunks(response.content, completionReq.model, chunkSize); + const interruption = createInterruptionSignal(fixture); + const completed = await writeNDJSONStream(res, chunks, { + latency, + streamingProfile: fixture.streamingProfile, + signal: interruption?.signal, + onChunkSent: interruption?.tick, + }); + if (!completed) { + if (!res.writableEnded) res.destroy(); + journalEntry.response.interrupted = true; + journalEntry.response.interruptReason = interruption?.reason(); + } + interruption?.cleanup(); + } + return; + } + + // Tool call response + if (isToolCallResponse(response)) { + const journalEntry = journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 200, fixture }, + }); + if (!streaming) { + const body = buildOllamaChatToolCallResponse(response.toolCalls, completionReq.model, logger); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify(body)); + } else { + const chunks = buildOllamaChatToolCallChunks(response.toolCalls, completionReq.model, logger); + const interruption = createInterruptionSignal(fixture); + const completed = await writeNDJSONStream(res, chunks, { + latency, + streamingProfile: fixture.streamingProfile, + signal: interruption?.signal, + onChunkSent: interruption?.tick, + }); + if (!completed) { + if (!res.writableEnded) res.destroy(); + journalEntry.response.interrupted = true; + journalEntry.response.interruptReason = interruption?.reason(); + } + interruption?.cleanup(); + } + return; + } + + // Unknown response type + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 500, fixture }, + }); + writeErrorResponse( + res, + 500, + JSON.stringify({ + error: { + message: "Fixture response did not match any known type", + type: "server_error", + }, + }), + ); +} + +// ─── Request handler: /api/generate ────────────────────────────────────────── + +export async function handleOllamaGenerate( + req: http.IncomingMessage, + res: http.ServerResponse, + raw: string, + fixtures: Fixture[], + journal: Journal, + defaults: { + latency: number; + chunkSize: number; + logger: Logger; + chaos?: ChaosConfig; + registry?: MetricsRegistry; + record?: RecordConfig; + strict?: boolean; + }, + setCorsHeaders: (res: http.ServerResponse) => void, +): Promise { + setCorsHeaders(res); + + const urlPath = req.url ?? "/api/generate"; + + let generateReq: OllamaGenerateRequest; + try { + generateReq = JSON.parse(raw) as OllamaGenerateRequest; + } catch { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: {} as ChatCompletionRequest, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { + message: "Malformed JSON", + type: "invalid_request_error", + }, + }), + ); + return; + } + + if (!generateReq.prompt || typeof generateReq.prompt !== "string") { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: {} as ChatCompletionRequest, + response: { status: 400, fixture: null }, + }); + writeErrorResponse( + res, + 400, + JSON.stringify({ + error: { + message: "Invalid request: prompt field is required", + type: "invalid_request_error", + }, + }), + ); + return; + } + + // Convert to ChatCompletionRequest for fixture matching + const completionReq = ollamaGenerateToCompletionRequest(generateReq); + + const fixture = matchFixture(fixtures, completionReq, journal.fixtureMatchCounts); + + if (fixture) { + journal.incrementFixtureMatchCount(fixture, fixtures); + } + + if ( + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + }, + defaults.registry, + ) + ) + return; + + if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + completionReq, + "ollama", + urlPath, + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + if (defaults.strict) { + defaults.logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`); + } + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: strictStatus, fixture: null }, + }); + writeErrorResponse( + res, + strictStatus, + JSON.stringify({ + error: { + message: strictMessage, + type: "invalid_request_error", + }, + }), + ); + return; + } + + const response = fixture.response; + const latency = fixture.latency ?? defaults.latency; + const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize); + + // Ollama defaults to streaming when stream is absent or true + const streaming = generateReq.stream !== false; + + // Error response + if (isErrorResponse(response)) { + const status = response.status ?? 500; + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status, fixture }, + }); + writeErrorResponse(res, status, JSON.stringify(response)); + return; + } + + // Text response (only type supported for /api/generate) + if (isTextResponse(response)) { + const journalEntry = journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 200, fixture }, + }); + if (!streaming) { + const body = buildOllamaGenerateTextResponse(response.content, completionReq.model); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify(body)); + } else { + const chunks = buildOllamaGenerateTextChunks( + response.content, + completionReq.model, + chunkSize, + ); + const interruption = createInterruptionSignal(fixture); + const completed = await writeNDJSONStream(res, chunks, { + latency, + streamingProfile: fixture.streamingProfile, + signal: interruption?.signal, + onChunkSent: interruption?.tick, + }); + if (!completed) { + if (!res.writableEnded) res.destroy(); + journalEntry.response.interrupted = true; + journalEntry.response.interruptReason = interruption?.reason(); + } + interruption?.cleanup(); + } + return; + } + + // Tool call responses not supported for /api/generate — fall through to error + journal.add({ + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: 500, fixture }, + }); + writeErrorResponse( + res, + 500, + JSON.stringify({ + error: { + message: "Fixture response did not match any known type", + type: "server_error", + }, + }), + ); +} diff --git a/src/recorder.ts b/src/recorder.ts new file mode 100644 index 0000000..650b331 --- /dev/null +++ b/src/recorder.ts @@ -0,0 +1,380 @@ +import * as http from "node:http"; +import * as https from "node:https"; +import * as fs from "node:fs"; +import * as path from "node:path"; +import type { + ChatCompletionRequest, + Fixture, + FixtureResponse, + RecordConfig, + ToolCall, +} from "./types.js"; +import { getLastMessageByRole, getTextContent } from "./router.js"; +import type { Logger } from "./logger.js"; +import { collapseStreamingResponse } from "./stream-collapse.js"; + +let recordCounter = 0; + +/** + * Proxy an unmatched request to the real upstream provider, record the + * response as a fixture on disk and in memory, then relay the response + * back to the original client. + * + * Returns `true` if the request was proxied (provider configured), + * `false` if no upstream URL is configured for the given provider key. + */ +export async function proxyAndRecord( + req: http.IncomingMessage, + res: http.ServerResponse, + request: ChatCompletionRequest, + providerKey: string, + pathname: string, + fixtures: Fixture[], + defaults: { record?: RecordConfig; logger: Logger }, + rawBody?: string, +): Promise { + const record = defaults.record; + if (!record) return false; + + const providers = record.providers as Record; + const upstreamUrl = providers[providerKey]; + + if (!upstreamUrl) { + defaults.logger.warn(`No upstream URL configured for provider "${providerKey}" — cannot proxy`); + return false; + } + + const fixturePath = record.fixturePath ?? "./fixtures/recorded"; + const target = new URL(pathname, upstreamUrl); + + defaults.logger.warn(`NO FIXTURE MATCH — proxying to ${upstreamUrl}${pathname}`); + + // Forward relevant headers, strip x-llmock-* headers + const forwardHeaders: Record = {}; + const headersToForward = ["authorization", "x-api-key", "content-type", "accept"]; + for (const name of headersToForward) { + const val = req.headers[name]; + if (val !== undefined) { + forwardHeaders[name] = Array.isArray(val) ? val.join(", ") : val; + } + } + + const requestBody = rawBody ?? JSON.stringify(request); + + // Make upstream request + let upstreamStatus: number; + let upstreamHeaders: http.IncomingHttpHeaders; + let upstreamBody: string; + let rawBuffer: Buffer; + + try { + const result = await makeUpstreamRequest(target, forwardHeaders, requestBody); + upstreamStatus = result.status; + upstreamHeaders = result.headers; + upstreamBody = result.body; + rawBuffer = result.rawBuffer; + } catch (err) { + const msg = err instanceof Error ? err.message : "Unknown proxy error"; + defaults.logger.error(`Proxy request failed: ${msg}`); + res.writeHead(502, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + error: { message: `Proxy to upstream failed: ${msg}`, type: "proxy_error" }, + }), + ); + return true; + } + + // Detect streaming response and collapse if necessary + const contentType = upstreamHeaders["content-type"]; + const ctString = Array.isArray(contentType) ? contentType.join(", ") : (contentType ?? ""); + const isBinaryStream = ctString.toLowerCase().includes("application/vnd.amazon.eventstream"); + const collapsed = collapseStreamingResponse( + ctString, + providerKey, + isBinaryStream ? rawBuffer : upstreamBody, + ); + + let fixtureResponse: FixtureResponse; + + if (collapsed) { + // Streaming response — use collapsed result + defaults.logger.warn(`Streaming response detected (${ctString}) — collapsing to fixture`); + if (collapsed.droppedChunks && collapsed.droppedChunks > 0) { + defaults.logger.warn(`${collapsed.droppedChunks} chunk(s) dropped during stream collapse`); + } + if (collapsed.toolCalls && collapsed.toolCalls.length > 0) { + if (collapsed.content) { + defaults.logger.warn( + "Collapsed response has both content and toolCalls — preferring toolCalls", + ); + } + fixtureResponse = { toolCalls: collapsed.toolCalls }; + } else { + fixtureResponse = { content: collapsed.content ?? "" }; + } + } else { + // Non-streaming — try to parse as JSON + let parsedResponse: unknown = null; + try { + parsedResponse = JSON.parse(upstreamBody); + } catch { + // Not JSON — could be an unknown format + defaults.logger.warn("Upstream response is not valid JSON — saving raw response"); + } + fixtureResponse = buildFixtureResponse(parsedResponse, upstreamStatus); + } + + // Build the match criteria from the original request + const fixtureMatch = buildFixtureMatch(request); + + // Build and save the fixture + const fixture: Fixture = { match: fixtureMatch, response: fixtureResponse }; + + // Check if the match is empty (all undefined values) — warn but still save to disk + const matchValues = Object.values(fixtureMatch); + const isEmptyMatch = matchValues.length === 0 || matchValues.every((v) => v === undefined); + if (isEmptyMatch) { + defaults.logger.warn( + "Recorded fixture has empty match criteria — skipping in-memory registration", + ); + } + + const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); + const filename = `${providerKey}-${timestamp}-${recordCounter++}.json`; + const filepath = path.join(fixturePath, filename); + + try { + // Ensure fixture directory exists + fs.mkdirSync(fixturePath, { recursive: true }); + + // Exclude auth headers from saved fixture (they're in the match/response, not headers) + const fileContent = isEmptyMatch + ? { + fixtures: [fixture], + _warning: "Empty match criteria — this fixture will not match any request", + } + : { fixtures: [fixture] }; + fs.writeFileSync(filepath, JSON.stringify(fileContent, null, 2), "utf-8"); + } catch (err) { + const msg = err instanceof Error ? err.message : "Unknown filesystem error"; + defaults.logger.error(`Failed to save fixture to disk: ${msg}`); + } + + // Register in memory so subsequent identical requests match (skip if empty match) + if (!isEmptyMatch) { + fixtures.push(fixture); + } + + defaults.logger.warn(`Response recorded → ${filepath}`); + + // Relay upstream response to client + const relayHeaders: Record = {}; + if (ctString) { + relayHeaders["Content-Type"] = ctString; + } + res.writeHead(upstreamStatus, relayHeaders); + res.end(upstreamBody); + + return true; +} + +// --------------------------------------------------------------------------- +// Internal helpers +// --------------------------------------------------------------------------- + +function makeUpstreamRequest( + target: URL, + headers: Record, + body: string, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string; rawBuffer: Buffer }> { + return new Promise((resolve, reject) => { + const transport = target.protocol === "https:" ? https : http; + const req = transport.request( + target, + { + method: "POST", + headers: { + ...headers, + "Content-Length": Buffer.byteLength(body).toString(), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (chunk: Buffer) => chunks.push(chunk)); + res.on("end", () => { + const rawBuffer = Buffer.concat(chunks); + resolve({ + status: res.statusCode ?? 500, + headers: res.headers, + body: rawBuffer.toString(), + rawBuffer, + }); + }); + }, + ); + req.on("error", reject); + req.write(body); + req.end(); + }); +} + +/** + * Detect the response format from the parsed upstream JSON and convert + * it into an llmock FixtureResponse. + */ +function buildFixtureResponse(parsed: unknown, status: number): FixtureResponse { + if (parsed === null || parsed === undefined) { + // Raw / unparseable response — save as error + return { + error: { message: "Upstream returned non-JSON response", type: "proxy_error" }, + status, + }; + } + + const obj = parsed as Record; + + // Error response + if (obj.error) { + const err = obj.error as Record; + return { + error: { + message: String(err.message ?? "Unknown error"), + type: String(err.type ?? "api_error"), + code: err.code ? String(err.code) : undefined, + }, + status, + }; + } + + // OpenAI embeddings: { data: [{ embedding: [...] }] } + if (Array.isArray(obj.data) && obj.data.length > 0) { + const first = obj.data[0] as Record; + if (Array.isArray(first.embedding)) { + return { embedding: first.embedding as number[] }; + } + } + + // Direct embedding: { embedding: [...] } + if (Array.isArray(obj.embedding)) { + return { embedding: obj.embedding as number[] }; + } + + // OpenAI chat completion: { choices: [{ message: { content, tool_calls } }] } + if (Array.isArray(obj.choices) && obj.choices.length > 0) { + const choice = obj.choices[0] as Record; + const message = choice.message as Record | undefined; + if (message) { + // Tool calls + if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) { + const toolCalls: ToolCall[] = (message.tool_calls as Array>).map( + (tc) => { + const fn = tc.function as Record; + return { + name: String(fn.name), + arguments: String(fn.arguments), + }; + }, + ); + return { toolCalls }; + } + // Text content + if (typeof message.content === "string") { + return { content: message.content }; + } + } + } + + // Anthropic: { content: [{ type: "text", text: "..." }] } or tool_use + if (Array.isArray(obj.content) && obj.content.length > 0) { + const blocks = obj.content as Array>; + // Check for tool_use blocks first + const toolUseBlocks = blocks.filter((b) => b.type === "tool_use"); + if (toolUseBlocks.length > 0) { + const toolCalls: ToolCall[] = toolUseBlocks.map((b) => ({ + name: String(b.name), + arguments: typeof b.input === "string" ? b.input : JSON.stringify(b.input), + })); + return { toolCalls }; + } + // Text blocks + const textBlock = blocks.find((b) => b.type === "text"); + if (textBlock && typeof textBlock.text === "string") { + return { content: textBlock.text }; + } + } + + // Gemini: { candidates: [{ content: { parts: [{ text: "..." }] } }] } + if (Array.isArray(obj.candidates) && obj.candidates.length > 0) { + const candidate = obj.candidates[0] as Record; + const content = candidate.content as Record | undefined; + if (content && Array.isArray(content.parts)) { + const parts = content.parts as Array>; + // Tool calls (functionCall) + const fnCallParts = parts.filter((p) => p.functionCall); + if (fnCallParts.length > 0) { + const toolCalls: ToolCall[] = fnCallParts.map((p) => { + const fc = p.functionCall as Record; + return { + name: String(fc.name), + arguments: typeof fc.args === "string" ? fc.args : JSON.stringify(fc.args), + }; + }); + return { toolCalls }; + } + // Text + const textPart = parts.find((p) => typeof p.text === "string"); + if (textPart && typeof textPart.text === "string") { + return { content: textPart.text }; + } + } + } + + // Ollama: { message: { content: "..." } } + if (obj.message && typeof obj.message === "object") { + const msg = obj.message as Record; + if (typeof msg.content === "string") { + return { content: msg.content }; + } + // Ollama message with content array (like Cohere) + if (Array.isArray(msg.content) && msg.content.length > 0) { + const first = msg.content[0] as Record; + if (typeof first.text === "string") { + return { content: first.text }; + } + } + } + + // Fallback: unknown format — save as error + return { + error: { + message: "Could not detect response format from upstream", + type: "proxy_error", + }, + status, + }; +} + +/** + * Derive fixture match criteria from the original request. + */ +function buildFixtureMatch(request: ChatCompletionRequest): { + userMessage?: string; + inputText?: string; +} { + // Embedding request + if (request.embeddingInput) { + return { inputText: request.embeddingInput }; + } + + // Chat request — match on the last user message + const lastUser = getLastMessageByRole(request.messages ?? [], "user"); + if (lastUser) { + const text = getTextContent(lastUser.content); + if (text) { + return { userMessage: text }; + } + } + + return {}; +} diff --git a/src/responses.ts b/src/responses.ts index beba4ec..28e2af0 100644 --- a/src/responses.ts +++ b/src/responses.ts @@ -30,6 +30,7 @@ import { createInterruptionSignal } from "./interruption.js"; import type { Journal } from "./journal.js"; import type { Logger } from "./logger.js"; import { applyChaos } from "./chaos.js"; +import { proxyAndRecord } from "./recorder.js"; // ─── Responses API request types ──────────────────────────────────────────── @@ -544,19 +545,50 @@ export async function handleResponses( return; if (!fixture) { + if (defaults.record) { + const proxied = await proxyAndRecord( + req, + res, + completionReq, + "openai", + req.url ?? "/v1/responses", + fixtures, + defaults, + raw, + ); + if (proxied) { + journal.add({ + method: req.method ?? "POST", + path: req.url ?? "/v1/responses", + headers: flattenHeaders(req.headers), + body: completionReq, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + if (defaults.strict) { + defaults.logger.error( + `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v1/responses"}`, + ); + } journal.add({ method: req.method ?? "POST", path: req.url ?? "/v1/responses", headers: flattenHeaders(req.headers), body: completionReq, - response: { status: 404, fixture: null }, + response: { status: strictStatus, fixture: null }, }); writeErrorResponse( res, - 404, + strictStatus, JSON.stringify({ error: { - message: "No fixture matched", + message: strictMessage, type: "invalid_request_error", code: "no_fixture_match", }, diff --git a/src/server.ts b/src/server.ts index cef414d..8f8b4b3 100644 --- a/src/server.ts +++ b/src/server.ts @@ -1,5 +1,11 @@ import * as http from "node:http"; -import type { Fixture, ChatCompletionRequest, ChaosConfig, MockServerOptions } from "./types.js"; +import type { + Fixture, + ChatCompletionRequest, + ChaosConfig, + MockServerOptions, + RecordConfig, +} from "./types.js"; import { Journal } from "./journal.js"; import { matchFixture } from "./router.js"; import { writeSSEStream, writeErrorResponse } from "./sse-writer.js"; @@ -17,20 +23,33 @@ import { import { handleResponses } from "./responses.js"; import { handleMessages } from "./messages.js"; import { handleGemini } from "./gemini.js"; -import { handleBedrock } from "./bedrock.js"; +import { handleBedrock, handleBedrockStream } from "./bedrock.js"; +import { handleConverse, handleConverseStream } from "./bedrock-converse.js"; import { handleEmbeddings } from "./embeddings.js"; +import { handleOllama, handleOllamaGenerate } from "./ollama.js"; +import { handleCohere } from "./cohere.js"; import { upgradeToWebSocket, type WebSocketConnection } from "./ws-framing.js"; import { handleWebSocketResponses } from "./ws-responses.js"; import { handleWebSocketRealtime } from "./ws-realtime.js"; import { handleWebSocketGeminiLive } from "./ws-gemini-live.js"; import { Logger } from "./logger.js"; import { applyChaos } from "./chaos.js"; +import { createMetricsRegistry, normalizePathLabel, type MetricsRegistry } from "./metrics.js"; +import { proxyAndRecord } from "./recorder.js"; export interface ServerInstance { server: http.Server; journal: Journal; url: string; - defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig }; + defaults: { + latency: number; + chunkSize: number; + logger: Logger; + chaos?: ChaosConfig; + registry?: MetricsRegistry; + strict?: boolean; + record?: RecordConfig; + }; } const COMPLETIONS_PATH = "/v1/chat/completions"; @@ -40,11 +59,21 @@ const GEMINI_LIVE_PATH = "/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent"; const MESSAGES_PATH = "/v1/messages"; const EMBEDDINGS_PATH = "/v1/embeddings"; +const COHERE_CHAT_PATH = "/v2/chat"; const DEFAULT_CHUNK_SIZE = 20; const GEMINI_PATH_RE = /^\/v1beta\/models\/([^:]+):(generateContent|streamGenerateContent)$/; const AZURE_DEPLOYMENT_RE = /^\/openai\/deployments\/([^/]+)\/(chat\/completions|embeddings)$/; const BEDROCK_INVOKE_RE = /^\/model\/([^/]+)\/invoke$/; +const BEDROCK_STREAM_RE = /^\/model\/([^/]+)\/invoke-with-response-stream$/; +const BEDROCK_CONVERSE_RE = /^\/model\/([^/]+)\/converse$/; +const BEDROCK_CONVERSE_STREAM_RE = /^\/model\/([^/]+)\/converse-stream$/; +const VERTEX_AI_RE = + /^\/v1\/projects\/[^/]+\/locations\/[^/]+\/publishers\/google\/models\/([^/:]+):(generateContent|streamGenerateContent)$/; + +const OLLAMA_CHAT_PATH = "/api/chat"; +const OLLAMA_GENERATE_PATH = "/api/generate"; +const OLLAMA_TAGS_PATH = "/api/tags"; const HEALTH_PATH = "/health"; const READY_PATH = "/ready"; @@ -93,8 +122,17 @@ async function handleCompletions( res: http.ServerResponse, fixtures: Fixture[], journal: Journal, - defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig }, + defaults: { + latency: number; + chunkSize: number; + logger: Logger; + chaos?: ChaosConfig; + registry?: MetricsRegistry; + strict?: boolean; + record?: RecordConfig; + }, modelFallback?: string, + providerKey?: string, ): Promise { setCorsHeaders(res); @@ -167,29 +205,70 @@ async function handleCompletions( // Apply chaos before normal response handling if ( - applyChaos(res, fixture, defaults.chaos, req.headers, journal, { - method, - path, - headers: flatHeaders, - body, - }) + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { + method, + path, + headers: flatHeaders, + body, + }, + defaults.registry, + ) ) return; if (!fixture) { + // Try record-and-replay proxy if configured + if (defaults.record && providerKey) { + const proxied = await proxyAndRecord( + req, + res, + body, + providerKey, + req.url ?? COMPLETIONS_PATH, + fixtures, + defaults, + ); + if (proxied) { + journal.add({ + method: req.method ?? "POST", + path: req.url ?? COMPLETIONS_PATH, + headers: flattenHeaders(req.headers), + body, + response: { status: res.statusCode ?? 200, fixture: null }, + }); + return; + } + } + + const strictStatus = defaults.strict ? 503 : 404; + const strictMessage = defaults.strict + ? "Strict mode: no fixture matched" + : "No fixture matched"; + if (defaults.strict) { + defaults.logger.error( + `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? COMPLETIONS_PATH}`, + ); + } + journal.add({ method: req.method ?? "POST", path: req.url ?? COMPLETIONS_PATH, headers: flattenHeaders(req.headers), body, - response: { status: 404, fixture: null }, + response: { status: strictStatus, fixture: null }, }); writeErrorResponse( res, - 404, + strictStatus, JSON.stringify({ error: { - message: "No fixture matched", + message: strictMessage, type: "invalid_request_error", code: "no_fixture_match", }, @@ -310,15 +389,25 @@ export async function createServer( const host = options?.host ?? "127.0.0.1"; const port = options?.port ?? 0; const logger = new Logger(options?.logLevel ?? "silent"); + const registry = options?.metrics ? createMetricsRegistry() : undefined; + const serverOptions = options ?? {}; const defaults = { - latency: options?.latency ?? 0, - chunkSize: Math.max(1, options?.chunkSize ?? DEFAULT_CHUNK_SIZE), + latency: serverOptions.latency ?? 0, + chunkSize: Math.max(1, serverOptions.chunkSize ?? DEFAULT_CHUNK_SIZE), logger, chaos: options?.chaos, + registry, + strict: options?.strict, + record: options?.record, }; const journal = new Journal(); + // Set initial fixtures-loaded gauge + if (registry) { + registry.setGauge("llmock_fixtures_loaded", {}, fixtures.length); + } + const server = http.createServer((req: http.IncomingMessage, res: http.ServerResponse) => { // OPTIONS preflight if (req.method === "OPTIONS") { @@ -326,10 +415,34 @@ export async function createServer( return; } + // Record start time for metrics + const startTime = registry ? process.hrtime.bigint() : 0n; + // Parse the URL pathname (strip query string) const parsedUrl = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`); let pathname = parsedUrl.pathname; + // Instrument response completion for metrics + if (registry) { + const rawPathname = pathname; + res.on("finish", () => { + const normalizedPath = normalizePathLabel(rawPathname); + const method = req.method ?? "UNKNOWN"; + const status = String(res.statusCode); + registry.incrementCounter("llmock_requests_total", { + method, + path: normalizedPath, + status, + }); + const elapsed = Number(process.hrtime.bigint() - startTime) / 1e9; + registry.observeHistogram( + "llmock_request_duration_seconds", + { method, path: normalizedPath }, + elapsed, + ); + }); + } + // Azure OpenAI: /openai/deployments/{id}/{operation} → /v1/{operation} (chat/completions, embeddings) // Must be checked BEFORE the generic /openai/ prefix strip let azureDeploymentId: string | undefined; @@ -361,6 +474,18 @@ export async function createServer( return; } + // Prometheus metrics + if (pathname === "/metrics" && req.method === "GET") { + if (!registry) { + handleNotFound(res, "Not found"); + return; + } + setCorsHeaders(res); + res.writeHead(200, { "Content-Type": "text/plain; version=0.0.4; charset=utf-8" }); + res.end(registry.serialize()); + return; + } + // Models listing if (pathname === MODELS_PATH && req.method === "GET") { setCorsHeaders(res); @@ -435,8 +560,8 @@ export async function createServer( } else if (!res.writableEnded) { try { res.write(`event: error\ndata: ${JSON.stringify({ error: { message: msg } })}\n\n`); - } catch { - /* */ + } catch (writeErr) { + logger.debug("Failed to write error recovery response:", writeErr); } res.end(); } @@ -459,8 +584,32 @@ export async function createServer( } else if (!res.writableEnded) { try { res.write(`event: error\ndata: ${JSON.stringify({ error: { message: msg } })}\n\n`); - } catch { - /* */ + } catch (writeErr) { + logger.debug("Failed to write error recovery response:", writeErr); + } + res.end(); + } + }); + return; + } + + // POST /v2/chat — Cohere v2 Chat API + if (pathname === COHERE_CHAT_PATH && req.method === "POST") { + readBody(req) + .then((raw) => handleCohere(req, res, raw, fixtures, journal, defaults, setCorsHeaders)) + .catch((err: unknown) => { + const msg = err instanceof Error ? err.message : "Internal error"; + if (!res.headersSent) { + writeErrorResponse( + res, + 500, + JSON.stringify({ error: { message: msg, type: "server_error" } }), + ); + } else if (!res.writableEnded) { + try { + res.write(`event: error\ndata: ${JSON.stringify({ error: { message: msg } })}\n\n`); + } catch (writeErr) { + logger.debug("Failed to write error recovery response:", writeErr); } res.end(); } @@ -540,8 +689,48 @@ export async function createServer( } else if (!res.writableEnded) { try { res.write(`data: ${JSON.stringify({ error: { message: msg } })}\n\n`); - } catch { - /* */ + } catch (writeErr) { + logger.debug("Failed to write error recovery response:", writeErr); + } + res.end(); + } + }); + return; + } + + // POST /v1/projects/{project}/locations/{location}/publishers/google/models/{model}:(generateContent|streamGenerateContent) — Vertex AI + const vertexMatch = pathname.match(VERTEX_AI_RE); + if (vertexMatch && req.method === "POST") { + const vertexModel = vertexMatch[1]; + const streaming = vertexMatch[2] === "streamGenerateContent"; + readBody(req) + .then((raw) => + handleGemini( + req, + res, + raw, + vertexModel, + streaming, + fixtures, + journal, + defaults, + setCorsHeaders, + "vertexai", + ), + ) + .catch((err: unknown) => { + const msg = err instanceof Error ? err.message : "Internal error"; + if (!res.headersSent) { + writeErrorResponse( + res, + 500, + JSON.stringify({ error: { message: msg, type: "server_error" } }), + ); + } else if (!res.writableEnded) { + try { + res.write(`data: ${JSON.stringify({ error: { message: msg } })}\n\n`); + } catch (writeErr) { + logger.debug("Failed to write error recovery response:", writeErr); } res.end(); } @@ -572,6 +761,165 @@ export async function createServer( return; } + // POST /model/{modelId}/invoke-with-response-stream — AWS Bedrock Claude streaming + const bedrockStreamMatch = pathname.match(BEDROCK_STREAM_RE); + if (bedrockStreamMatch && req.method === "POST") { + const bedrockModelId = bedrockStreamMatch[1]; + readBody(req) + .then((raw) => + handleBedrockStream( + req, + res, + raw, + bedrockModelId, + fixtures, + journal, + defaults, + setCorsHeaders, + ), + ) + .catch((err: unknown) => { + const msg = err instanceof Error ? err.message : "Internal error"; + if (!res.headersSent) { + writeErrorResponse( + res, + 500, + JSON.stringify({ error: { message: msg, type: "server_error" } }), + ); + } else if (!res.writableEnded) { + res.destroy(); + } + }); + return; + } + + // POST /model/{modelId}/converse — AWS Bedrock Converse API + const converseMatch = pathname.match(BEDROCK_CONVERSE_RE); + if (converseMatch && req.method === "POST") { + const converseModelId = converseMatch[1]; + readBody(req) + .then((raw) => + handleConverse( + req, + res, + raw, + converseModelId, + fixtures, + journal, + defaults, + setCorsHeaders, + ), + ) + .catch((err: unknown) => { + const msg = err instanceof Error ? err.message : "Internal error"; + if (!res.headersSent) { + writeErrorResponse( + res, + 500, + JSON.stringify({ error: { message: msg, type: "server_error" } }), + ); + } else if (!res.writableEnded) { + res.destroy(); + } + }); + return; + } + + // POST /model/{modelId}/converse-stream — AWS Bedrock Converse streaming API + const converseStreamMatch = pathname.match(BEDROCK_CONVERSE_STREAM_RE); + if (converseStreamMatch && req.method === "POST") { + const converseStreamModelId = converseStreamMatch[1]; + readBody(req) + .then((raw) => + handleConverseStream( + req, + res, + raw, + converseStreamModelId, + fixtures, + journal, + defaults, + setCorsHeaders, + ), + ) + .catch((err: unknown) => { + const msg = err instanceof Error ? err.message : "Internal error"; + if (!res.headersSent) { + writeErrorResponse( + res, + 500, + JSON.stringify({ error: { message: msg, type: "server_error" } }), + ); + } else if (!res.writableEnded) { + res.destroy(); + } + }); + return; + } + + // POST /api/chat — Ollama Chat API + if (pathname === OLLAMA_CHAT_PATH && req.method === "POST") { + readBody(req) + .then((raw) => handleOllama(req, res, raw, fixtures, journal, defaults, setCorsHeaders)) + .catch((err: unknown) => { + const msg = err instanceof Error ? err.message : "Internal error"; + if (!res.headersSent) { + writeErrorResponse( + res, + 500, + JSON.stringify({ error: { message: msg, type: "server_error" } }), + ); + } else if (!res.writableEnded) { + res.destroy(); + } + }); + return; + } + + // POST /api/generate — Ollama Generate API + if (pathname === OLLAMA_GENERATE_PATH && req.method === "POST") { + readBody(req) + .then((raw) => + handleOllamaGenerate(req, res, raw, fixtures, journal, defaults, setCorsHeaders), + ) + .catch((err: unknown) => { + const msg = err instanceof Error ? err.message : "Internal error"; + if (!res.headersSent) { + writeErrorResponse( + res, + 500, + JSON.stringify({ error: { message: msg, type: "server_error" } }), + ); + } else if (!res.writableEnded) { + res.destroy(); + } + }); + return; + } + + // GET /api/tags — Ollama Models listing + if (pathname === OLLAMA_TAGS_PATH && req.method === "GET") { + setCorsHeaders(res); + const modelIds = new Set(); + for (const f of fixtures) { + if (f.match.model && typeof f.match.model === "string") { + modelIds.add(f.match.model); + } + } + const ids = modelIds.size > 0 ? [...modelIds] : DEFAULT_MODELS; + const models = ids.map((name) => ({ + name, + model: name, + modified_at: new Date().toISOString(), + size: 0, + digest: "", + details: {}, + })); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ models })); + return; + } + // POST /v1/chat/completions — Chat Completions API if (pathname !== COMPLETIONS_PATH) { handleNotFound(res, "Not found"); @@ -582,33 +930,40 @@ export async function createServer( return; } - handleCompletions(req, res, fixtures, journal, defaults, azureDeploymentId).catch( - (err: unknown) => { - const msg = err instanceof Error ? err.message : "Internal error"; - if (!res.headersSent) { - writeErrorResponse( - res, - 500, - JSON.stringify({ - error: { - message: msg, - type: "server_error", - }, - }), + const completionsProvider = azureDeploymentId ? "azure" : "openai"; + handleCompletions( + req, + res, + fixtures, + journal, + defaults, + azureDeploymentId, + completionsProvider, + ).catch((err: unknown) => { + const msg = err instanceof Error ? err.message : "Internal error"; + if (!res.headersSent) { + writeErrorResponse( + res, + 500, + JSON.stringify({ + error: { + message: msg, + type: "server_error", + }, + }), + ); + } else if (!res.writableEnded) { + // Headers already sent (SSE stream in progress) — write error event then close + try { + res.write( + `data: ${JSON.stringify({ error: { message: msg, type: "server_error" } })}\n\n`, ); - } else if (!res.writableEnded) { - // Headers already sent (SSE stream in progress) — write error event then close - try { - res.write( - `data: ${JSON.stringify({ error: { message: msg, type: "server_error" } })}\n\n`, - ); - } catch { - // write itself failed, nothing more we can do - } - res.end(); + } catch (writeErr) { + logger.debug("Failed to write error recovery response:", writeErr); } - }, - ); + res.end(); + } + }); }); // ─── WebSocket upgrade handling ────────────────────────────────────────── diff --git a/src/stream-collapse.ts b/src/stream-collapse.ts new file mode 100644 index 0000000..6d4558e --- /dev/null +++ b/src/stream-collapse.ts @@ -0,0 +1,586 @@ +/** + * Stream collapsing functions for record-and-replay. + * + * Each function takes a raw streaming response body (SSE, NDJSON, or binary + * EventStream) and collapses it into a non-streaming fixture response + * containing either `{ content }` or `{ toolCalls }`. + */ + +import { crc32 } from "node:zlib"; +import type { ToolCall } from "./types.js"; + +// --------------------------------------------------------------------------- +// Result type shared by all collapse functions +// --------------------------------------------------------------------------- + +export interface CollapseResult { + content?: string; + toolCalls?: ToolCall[]; + droppedChunks?: number; +} + +// --------------------------------------------------------------------------- +// 1. OpenAI SSE +// --------------------------------------------------------------------------- + +/** + * Collapse OpenAI Chat Completions SSE stream into a single response. + * + * Format: + * data: {"id":"chatcmpl-123","choices":[{"delta":{"content":"Hello"}}]}\n\n + * data: [DONE]\n\n + */ +export function collapseOpenAISSE(body: string): CollapseResult { + const lines = body.split("\n\n").filter((l) => l.trim().length > 0); + let content = ""; + let droppedChunks = 0; + const toolCallMap = new Map(); + + for (const line of lines) { + const dataLine = line.split("\n").find((l) => l.startsWith("data:")); + if (!dataLine) continue; + + const payload = dataLine.slice(5).trim(); + if (payload === "[DONE]") continue; + + let parsed: Record; + try { + parsed = JSON.parse(payload) as Record; + } catch { + droppedChunks++; + continue; + } + + const choices = parsed.choices as Array> | undefined; + if (!choices || choices.length === 0) continue; + + const delta = choices[0].delta as Record | undefined; + if (!delta) continue; + + // Text content + if (typeof delta.content === "string") { + content += delta.content; + } + + // Tool calls + const toolCalls = delta.tool_calls as Array> | undefined; + if (toolCalls) { + for (const tc of toolCalls) { + const index = tc.index as number; + const fn = tc.function as Record | undefined; + + if (!toolCallMap.has(index)) { + toolCallMap.set(index, { + id: (tc.id as string) ?? "", + name: (fn?.name as string) ?? "", + arguments: "", + }); + } + + const entry = toolCallMap.get(index)!; + if (fn?.name && typeof fn.name === "string" && !entry.name) { + entry.name = fn.name; + } + if (tc.id && typeof tc.id === "string" && !entry.id) { + entry.id = tc.id; + } + if (fn?.arguments && typeof fn.arguments === "string") { + entry.arguments += fn.arguments; + } + } + } + } + + if (toolCallMap.size > 0) { + const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b); + return { + toolCalls: sorted.map(([, tc]) => ({ + name: tc.name, + arguments: tc.arguments, + ...(tc.id ? { id: tc.id } : {}), + })), + ...(droppedChunks > 0 ? { droppedChunks } : {}), + }; + } + + return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) }; +} + +// --------------------------------------------------------------------------- +// 2. Anthropic SSE +// --------------------------------------------------------------------------- + +/** + * Collapse Anthropic Claude Messages SSE stream into a single response. + * + * Format: + * event: message_start\ndata: {...}\n\n + * event: content_block_delta\ndata: {"delta":{"type":"text_delta","text":"Hello"}}\n\n + */ +export function collapseAnthropicSSE(body: string): CollapseResult { + const blocks = body.split("\n\n").filter((b) => b.trim().length > 0); + let content = ""; + let droppedChunks = 0; + const toolCallMap = new Map(); + + for (const block of blocks) { + const lines = block.split("\n"); + const eventLine = lines.find((l) => l.startsWith("event:")); + const dataLine = lines.find((l) => l.startsWith("data:")); + if (!dataLine) continue; + + const eventType = eventLine ? eventLine.slice(6).trim() : ""; + const payload = dataLine.slice(5).trim(); + + let parsed: Record; + try { + parsed = JSON.parse(payload) as Record; + } catch { + droppedChunks++; + continue; + } + + if (eventType === "content_block_start") { + const index = parsed.index as number; + const contentBlock = parsed.content_block as Record | undefined; + if (contentBlock?.type === "tool_use") { + toolCallMap.set(index, { + id: (contentBlock.id as string) ?? "", + name: (contentBlock.name as string) ?? "", + arguments: "", + }); + } + } + + if (eventType === "content_block_delta") { + const index = parsed.index as number; + const delta = parsed.delta as Record | undefined; + if (!delta) continue; + + if (delta.type === "text_delta" && typeof delta.text === "string") { + content += delta.text; + } + + if (delta.type === "input_json_delta" && typeof delta.partial_json === "string") { + const entry = toolCallMap.get(index); + if (entry) { + entry.arguments += delta.partial_json; + } + } + } + } + + if (toolCallMap.size > 0) { + const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b); + return { + toolCalls: sorted.map(([, tc]) => ({ + name: tc.name, + arguments: tc.arguments, + ...(tc.id ? { id: tc.id } : {}), + })), + ...(droppedChunks > 0 ? { droppedChunks } : {}), + }; + } + + return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) }; +} + +// --------------------------------------------------------------------------- +// 3. Gemini SSE +// --------------------------------------------------------------------------- + +/** + * Collapse Gemini SSE stream into a single response. + * + * Format (data-only, no event prefix, no [DONE]): + * data: {"candidates":[{"content":{"parts":[{"text":"Hello"}]}}]}\n\n + */ +export function collapseGeminiSSE(body: string): CollapseResult { + const lines = body.split("\n\n").filter((l) => l.trim().length > 0); + let content = ""; + let droppedChunks = 0; + + for (const line of lines) { + const dataLine = line.split("\n").find((l) => l.startsWith("data:")); + if (!dataLine) continue; + + const payload = dataLine.slice(5).trim(); + + let parsed: Record; + try { + parsed = JSON.parse(payload) as Record; + } catch { + droppedChunks++; + continue; + } + + const candidates = parsed.candidates as Array> | undefined; + if (!candidates || candidates.length === 0) continue; + + const candidateContent = candidates[0].content as Record | undefined; + if (!candidateContent) continue; + + const parts = candidateContent.parts as Array> | undefined; + if (!parts || parts.length === 0) continue; + + // Handle functionCall parts + const fnCallParts = parts.filter((p) => p.functionCall); + if (fnCallParts.length > 0) { + const toolCallMap = new Map(); + for (let i = 0; i < fnCallParts.length; i++) { + const fc = fnCallParts[i].functionCall as Record; + toolCallMap.set(i, { + name: String(fc.name ?? ""), + arguments: typeof fc.args === "string" ? (fc.args as string) : JSON.stringify(fc.args), + }); + } + if (toolCallMap.size > 0) { + const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b); + return { + toolCalls: sorted.map(([, tc]) => ({ + name: tc.name, + arguments: tc.arguments, + })), + ...(droppedChunks > 0 ? { droppedChunks } : {}), + }; + } + } + + if (typeof parts[0].text === "string") { + content += parts[0].text; + } + } + + return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) }; +} + +// --------------------------------------------------------------------------- +// 4. Ollama NDJSON +// --------------------------------------------------------------------------- + +/** + * Collapse Ollama NDJSON stream into a single response. + * + * /api/chat format: + * {"model":"llama3","message":{"role":"assistant","content":"Hello"},"done":false}\n + * + * /api/generate format: + * {"model":"llama3","response":"Hello","done":false}\n + */ +export function collapseOllamaNDJSON(body: string): CollapseResult { + const lines = body.split("\n").filter((l) => l.trim().length > 0); + let content = ""; + let droppedChunks = 0; + + for (const line of lines) { + let parsed: Record; + try { + parsed = JSON.parse(line.trim()) as Record; + } catch { + droppedChunks++; + continue; + } + + // /api/chat format + const message = parsed.message as Record | undefined; + if (message && typeof message.content === "string") { + content += message.content; + } + + // /api/generate format + else if (typeof parsed.response === "string") { + content += parsed.response; + } + } + + return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) }; +} + +// --------------------------------------------------------------------------- +// 5. Cohere SSE +// --------------------------------------------------------------------------- + +/** + * Collapse Cohere SSE stream into a single response. + * + * Format: + * event: content-delta\ndata: {"type":"content-delta","delta":{"message":{"content":{"text":"Hello"}}}}\n\n + */ +export function collapseCohereSS(body: string): CollapseResult { + const blocks = body.split("\n\n").filter((b) => b.trim().length > 0); + let content = ""; + let droppedChunks = 0; + const toolCallMap = new Map(); + + for (const block of blocks) { + const lines = block.split("\n"); + const eventLine = lines.find((l) => l.startsWith("event:")); + const dataLine = lines.find((l) => l.startsWith("data:")); + if (!dataLine) continue; + + const eventType = eventLine ? eventLine.slice(6).trim() : ""; + const payload = dataLine.slice(5).trim(); + + let parsed: Record; + try { + parsed = JSON.parse(payload) as Record; + } catch { + droppedChunks++; + continue; + } + + if (eventType === "content-delta") { + const delta = parsed.delta as Record | undefined; + const message = delta?.message as Record | undefined; + const contentObj = message?.content as Record | undefined; + if (contentObj && typeof contentObj.text === "string") { + content += contentObj.text; + } + } + + if (eventType === "tool-call-start") { + const index = parsed.index as number; + const delta = parsed.delta as Record | undefined; + const message = delta?.message as Record | undefined; + const toolCalls = message?.tool_calls as Record | undefined; + if (toolCalls) { + const fn = toolCalls.function as Record | undefined; + toolCallMap.set(index, { + id: (toolCalls.id as string) ?? "", + name: (fn?.name as string) ?? "", + arguments: "", + }); + } + } + + if (eventType === "tool-call-delta") { + const index = parsed.index as number; + const delta = parsed.delta as Record | undefined; + const message = delta?.message as Record | undefined; + const toolCalls = message?.tool_calls as Record | undefined; + if (toolCalls) { + const fn = toolCalls.function as Record | undefined; + if (fn && typeof fn.arguments === "string") { + const entry = toolCallMap.get(index); + if (entry) { + entry.arguments += fn.arguments; + } + } + } + } + } + + if (toolCallMap.size > 0) { + const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b); + return { + toolCalls: sorted.map(([, tc]) => ({ + name: tc.name, + arguments: tc.arguments, + ...(tc.id ? { id: tc.id } : {}), + })), + ...(droppedChunks > 0 ? { droppedChunks } : {}), + }; + } + + return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) }; +} + +// --------------------------------------------------------------------------- +// 6. Bedrock EventStream (binary) +// --------------------------------------------------------------------------- + +/** + * Decode AWS Event Stream binary frames and extract JSON payloads. + * + * Binary frame layout: + * [total_length: 4B uint32-BE] + * [headers_length: 4B uint32-BE] + * [prelude_crc32: 4B] + * [headers: variable] + * [payload: variable] + * [message_crc32: 4B] + */ +function decodeEventStreamFrames( + buf: Buffer, +): Array<{ headers: Record; payload: Buffer }> { + const frames: Array<{ headers: Record; payload: Buffer }> = []; + let offset = 0; + + while (offset < buf.length) { + if (offset + 12 > buf.length) break; + + const totalLength = buf.readUInt32BE(offset); + const headersLength = buf.readUInt32BE(offset + 4); + + // Validate prelude CRC + const preludeCrc = buf.readUInt32BE(offset + 8); + const computedPreludeCrc = crc32(buf.subarray(offset, offset + 8)); + if (preludeCrc >>> 0 !== computedPreludeCrc >>> 0) { + break; // CRC mismatch — stop parsing + } + + // Parse headers + const headersStart = offset + 12; + const headersEnd = headersStart + headersLength; + const headers: Record = {}; + let hOffset = headersStart; + + while (hOffset < headersEnd) { + const nameLen = buf.readUInt8(hOffset); + hOffset += 1; + const name = buf.subarray(hOffset, hOffset + nameLen).toString("utf8"); + hOffset += nameLen; + // Skip header type byte (type 7 = STRING) + hOffset += 1; + const valueLen = buf.readUInt16BE(hOffset); + hOffset += 2; + const value = buf.subarray(hOffset, hOffset + valueLen).toString("utf8"); + hOffset += valueLen; + headers[name] = value; + } + + // Extract payload + const payloadStart = headersEnd; + const payloadEnd = offset + totalLength - 4; // minus message CRC + const payload = buf.subarray(payloadStart, payloadEnd); + + // Validate message CRC (covers entire frame minus last 4 bytes) + const messageCrc = buf.readUInt32BE(offset + totalLength - 4); + const computedMessageCrc = crc32(buf.subarray(offset, offset + totalLength - 4)); + if (messageCrc >>> 0 !== computedMessageCrc >>> 0) { + break; // Message CRC mismatch — stop parsing + } + + frames.push({ headers, payload }); + offset += totalLength; + } + + return frames; +} + +/** + * Collapse Bedrock binary Event Stream into a single response. + * + * Each frame contains a JSON payload with event types like: + * contentBlockDelta, contentBlockStart, etc. + */ +export function collapseBedrockEventStream(body: Buffer): CollapseResult { + const frames = decodeEventStreamFrames(body); + let content = ""; + let droppedChunks = 0; + const toolCallMap = new Map(); + + for (const frame of frames) { + let parsed: Record; + try { + parsed = JSON.parse(frame.payload.toString("utf8")) as Record; + } catch { + droppedChunks++; + continue; + } + + // contentBlockStart — may initiate a tool_use block + if (parsed.contentBlockStart) { + const blockStart = parsed.contentBlockStart as Record; + const index = (parsed.contentBlockIndex ?? blockStart.contentBlockIndex) as + | number + | undefined; + const start = blockStart.start as Record | undefined; + if (start?.toolUse && index !== undefined) { + const toolUse = start.toolUse as Record; + toolCallMap.set(index, { + id: (toolUse.toolUseId as string) ?? "", + name: (toolUse.name as string) ?? "", + arguments: "", + }); + } + } + + // contentBlockDelta + if (parsed.contentBlockDelta) { + const blockDelta = parsed.contentBlockDelta as Record; + const index = (parsed.contentBlockIndex ?? blockDelta.contentBlockIndex) as + | number + | undefined; + const delta = blockDelta.delta as Record | undefined; + if (!delta) continue; + + // Text delta + if (typeof delta.text === "string") { + content += delta.text; + } + + // Tool use input JSON delta + if (typeof delta.toolUse === "object" && delta.toolUse !== null) { + const toolUseDelta = delta.toolUse as Record; + if (typeof toolUseDelta.input === "string" && index !== undefined) { + const entry = toolCallMap.get(index); + if (entry) { + entry.arguments += toolUseDelta.input; + } + } + } + } + } + + if (toolCallMap.size > 0) { + const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b); + return { + toolCalls: sorted.map(([, tc]) => ({ + name: tc.name, + arguments: tc.arguments, + ...(tc.id ? { id: tc.id } : {}), + })), + ...(droppedChunks > 0 ? { droppedChunks } : {}), + }; + } + + return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) }; +} + +// --------------------------------------------------------------------------- +// Dispatch helper — pick the right collapse function by provider +// --------------------------------------------------------------------------- + +/** + * Collapse a streaming response body into a non-streaming fixture response. + * Returns null if the content type is not a known streaming format. + */ +export function collapseStreamingResponse( + contentType: string, + providerKey: string, + body: string | Buffer, +): CollapseResult | null { + const ct = contentType.toLowerCase(); + + if (ct.includes("application/vnd.amazon.eventstream")) { + const buf = typeof body === "string" ? Buffer.from(body, "binary") : body; + return collapseBedrockEventStream(buf); + } + + if (ct.includes("application/x-ndjson")) { + const str = typeof body === "string" ? body : body.toString("utf8"); + return collapseOllamaNDJSON(str); + } + + if (ct.includes("text/event-stream")) { + const str = typeof body === "string" ? body : body.toString("utf8"); + switch (providerKey) { + case "openai": + case "azure": + return collapseOpenAISSE(str); + case "anthropic": + return collapseAnthropicSSE(str); + case "gemini": + case "vertexai": + return collapseGeminiSSE(str); + case "cohere": + return collapseCohereSS(str); + default: + // Try OpenAI format as default for unknown SSE providers + return collapseOpenAISSE(str); + } + } + + return null; +} diff --git a/src/types.ts b/src/types.ts index 8433548..df0ee6b 100644 --- a/src/types.ts +++ b/src/types.ts @@ -215,6 +215,11 @@ export interface ChatCompletionMessage { // Server options +export interface RecordConfig { + providers: Record; + fixturePath?: string; +} + export interface MockServerOptions { port?: number; host?: string; @@ -223,4 +228,10 @@ export interface MockServerOptions { /** Log verbosity. CLI default is "info"; programmatic default (when omitted) is "silent". */ logLevel?: "silent" | "info" | "debug"; chaos?: ChaosConfig; + /** Enable Prometheus-compatible /metrics endpoint. */ + metrics?: boolean; + /** Strict mode: return 503 instead of 404 when no fixture matches. */ + strict?: boolean; + /** Record-and-replay: proxy unmatched requests to upstream and save fixtures. */ + record?: RecordConfig; } diff --git a/src/ws-gemini-live.ts b/src/ws-gemini-live.ts index 88d1abb..15f70bf 100644 --- a/src/ws-gemini-live.ts +++ b/src/ws-gemini-live.ts @@ -171,7 +171,7 @@ export function handleWebSocketGeminiLive( ws: WebSocketConnection, fixtures: Fixture[], journal: Journal, - defaults: { latency: number; chunkSize: number; model: string; logger: Logger }, + defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean }, ): void { const { logger } = defaults; const session: SessionState = { @@ -206,7 +206,7 @@ async function processMessage( ws: WebSocketConnection, fixtures: Fixture[], journal: Journal, - defaults: { latency: number; chunkSize: number; model: string; logger: Logger }, + defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean }, session: SessionState, ): Promise { let parsed: GeminiLiveMessage; @@ -303,6 +303,11 @@ async function processMessage( } if (!fixture) { + if (defaults.strict) { + defaults.logger.warn(`STRICT: No fixture matched for WebSocket message`); + ws.close(1008, "Strict mode: no fixture matched"); + return; + } journal.add({ method: "WS", path, diff --git a/src/ws-realtime.ts b/src/ws-realtime.ts index 15e0608..6c9955d 100644 --- a/src/ws-realtime.ts +++ b/src/ws-realtime.ts @@ -130,7 +130,7 @@ export function handleWebSocketRealtime( ws: WebSocketConnection, fixtures: Fixture[], journal: Journal, - defaults: { latency: number; chunkSize: number; model: string; logger: Logger }, + defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean }, ): void { const { logger } = defaults; const sessionId = generateId("sess"); @@ -176,7 +176,7 @@ async function processMessage( ws: WebSocketConnection, fixtures: Fixture[], journal: Journal, - defaults: { latency: number; chunkSize: number; model: string; logger: Logger }, + defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean }, session: SessionConfig, conversationItems: RealtimeItem[], ): Promise { @@ -246,7 +246,7 @@ async function handleResponseCreate( ws: WebSocketConnection, fixtures: Fixture[], journal: Journal, - defaults: { latency: number; chunkSize: number; model: string; logger: Logger }, + defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean }, session: SessionConfig, conversationItems: RealtimeItem[], ): Promise { @@ -266,6 +266,11 @@ async function handleResponseCreate( } if (!fixture) { + if (defaults.strict) { + defaults.logger.warn(`STRICT: No fixture matched for WebSocket message`); + ws.close(1008, "Strict mode: no fixture matched"); + return; + } journal.add({ method: "WS", path: "/v1/realtime", diff --git a/src/ws-responses.ts b/src/ws-responses.ts index 5d73def..60ab4b7 100644 --- a/src/ws-responses.ts +++ b/src/ws-responses.ts @@ -57,7 +57,7 @@ export function handleWebSocketResponses( ws: WebSocketConnection, fixtures: Fixture[], journal: Journal, - defaults: { latency: number; chunkSize: number; model: string; logger: Logger }, + defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean }, ): void { const { logger } = defaults; // Serialize message processing to prevent event interleaving @@ -82,7 +82,7 @@ async function processMessage( ws: WebSocketConnection, fixtures: Fixture[], journal: Journal, - defaults: { latency: number; chunkSize: number; model: string; logger: Logger }, + defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean }, ): Promise { let parsed: unknown; try { @@ -143,6 +143,11 @@ async function processMessage( } if (!fixture) { + if (defaults.strict) { + defaults.logger.warn(`STRICT: No fixture matched for WebSocket message`); + ws.close(1008, "Strict mode: no fixture matched"); + return; + } journal.add({ method: "WS", path: "/v1/responses", From aa527b676113830bbd0d28818728aa80f6153b9b Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 20 Mar 2026 15:56:29 -0700 Subject: [PATCH 03/13] =?UTF-8?q?test:=201176=20tests=20=E2=80=94=20compre?= =?UTF-8?q?hensive=20coverage=20across=20all=20new=20features?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests for: Bedrock streaming, Bedrock Converse, AWS EventStream binary framing, Ollama chat/generate, Cohere v2 chat, Vertex AI, metrics endpoint, record-and-replay proxy, stream collapse, strict mode, and multi-provider recording. --- src/__tests__/aws-event-stream.test.ts | 391 +++ src/__tests__/bedrock-stream.test.ts | 1155 +++++++++ src/__tests__/cohere.test.ts | 932 +++++++ src/__tests__/drift/bedrock-stream.drift.ts | 145 ++ src/__tests__/drift/cohere.drift.ts | 213 ++ src/__tests__/drift/ollama.drift.ts | 219 ++ src/__tests__/drift/vertex-ai.drift.ts | 165 ++ src/__tests__/metrics.test.ts | 579 +++++ src/__tests__/ollama.test.ts | 1045 ++++++++ src/__tests__/recorder.test.ts | 2531 +++++++++++++++++++ src/__tests__/server.test.ts | 10 +- src/__tests__/stream-collapse.test.ts | 1429 +++++++++++ src/__tests__/vertex-ai.test.ts | 524 ++++ 13 files changed, 9333 insertions(+), 5 deletions(-) create mode 100644 src/__tests__/aws-event-stream.test.ts create mode 100644 src/__tests__/bedrock-stream.test.ts create mode 100644 src/__tests__/cohere.test.ts create mode 100644 src/__tests__/drift/bedrock-stream.drift.ts create mode 100644 src/__tests__/drift/cohere.drift.ts create mode 100644 src/__tests__/drift/ollama.drift.ts create mode 100644 src/__tests__/drift/vertex-ai.drift.ts create mode 100644 src/__tests__/metrics.test.ts create mode 100644 src/__tests__/ollama.test.ts create mode 100644 src/__tests__/recorder.test.ts create mode 100644 src/__tests__/stream-collapse.test.ts create mode 100644 src/__tests__/vertex-ai.test.ts diff --git a/src/__tests__/aws-event-stream.test.ts b/src/__tests__/aws-event-stream.test.ts new file mode 100644 index 0000000..6245fbd --- /dev/null +++ b/src/__tests__/aws-event-stream.test.ts @@ -0,0 +1,391 @@ +import { describe, it, expect, vi, afterEach } from "vitest"; +import { crc32 } from "node:zlib"; +import { PassThrough } from "node:stream"; +import type * as http from "node:http"; +import { + encodeEventStreamFrame, + encodeEventStreamMessage, + writeEventStream, +} from "../aws-event-stream.js"; + +// ─── Test helpers ──────────────────────────────────────────────────────────── + +function makeMockResponse(): { + res: http.ServerResponse; + chunks: Buffer[]; + headers: () => Record; + ended: () => boolean; +} { + const stream = new PassThrough(); + const chunks: Buffer[] = []; + stream.on("data", (chunk: Buffer) => chunks.push(Buffer.from(chunk))); + + const writtenHeaders: Record = {}; + let isEnded = false; + + const res = { + setHeader(name: string, value: string) { + writtenHeaders[name] = value; + }, + writeHead(statusCode: number, headers?: Record) { + if (headers) { + for (const [k, v] of Object.entries(headers)) { + writtenHeaders[k] = v; + } + } + }, + write(data: Buffer | string) { + stream.write(data); + }, + end(data?: Buffer | string) { + if (data !== undefined) { + stream.write(data); + } + isEnded = true; + stream.end(); + }, + writableEnded: false, + } as unknown as http.ServerResponse; + + // Make writableEnded track our isEnded state + Object.defineProperty(res, "writableEnded", { + get: () => isEnded, + }); + + return { + res, + chunks, + headers: () => writtenHeaders, + ended: () => isEnded, + }; +} + +/** + * Parse the binary frame manually and return its components. + */ +function parseFrame(frame: Buffer) { + const totalLength = frame.readUInt32BE(0); + const headersLength = frame.readUInt32BE(4); + const preludeCrc = frame.readUInt32BE(8); + const headersStart = 12; + const headersEnd = headersStart + headersLength; + const payloadStart = headersEnd; + const payloadEnd = totalLength - 4; + const messageCrc = frame.readUInt32BE(totalLength - 4); + + // Parse headers + const headers: Array<{ name: string; type: number; value: string }> = []; + let offset = headersStart; + while (offset < headersEnd) { + const nameLen = frame.readUInt8(offset); + offset += 1; + const name = frame.subarray(offset, offset + nameLen).toString("utf8"); + offset += nameLen; + const type = frame.readUInt8(offset); + offset += 1; + const valueLen = frame.readUInt16BE(offset); + offset += 2; + const value = frame.subarray(offset, offset + valueLen).toString("utf8"); + offset += valueLen; + headers.push({ name, type, value }); + } + + const payload = frame.subarray(payloadStart, payloadEnd); + + return { totalLength, headersLength, preludeCrc, headers, payload, messageCrc }; +} + +// ─── encodeEventStreamFrame ───────────────────────────────────────────────── + +describe("encodeEventStreamFrame", () => { + it("produces a frame whose total_length field matches actual buffer size", () => { + const headers = { ":event-type": "contentBlockDelta" }; + const payload = Buffer.from(JSON.stringify({ hello: "world" }), "utf8"); + const frame = encodeEventStreamFrame(headers, payload); + + const totalLength = frame.readUInt32BE(0); + expect(totalLength).toBe(frame.length); + }); + + it("headers_length field matches actual serialised headers size", () => { + const headers = { + ":content-type": "application/json", + ":event-type": "contentBlockDelta", + }; + const payload = Buffer.from("{}", "utf8"); + const frame = encodeEventStreamFrame(headers, payload); + + const parsed = parseFrame(frame); + + // Manually compute expected headers size + let expectedLen = 0; + for (const [name, value] of Object.entries(headers)) { + const nameBytes = Buffer.byteLength(name, "utf8"); + const valueBytes = Buffer.byteLength(value, "utf8"); + expectedLen += 1 + nameBytes + 1 + 2 + valueBytes; + } + expect(parsed.headersLength).toBe(expectedLen); + }); + + it("prelude CRC32 covers first 8 bytes correctly", () => { + const headers = { ":message-type": "event" }; + const payload = Buffer.from("test", "utf8"); + const frame = encodeEventStreamFrame(headers, payload); + + const expected = crc32(frame.subarray(0, 8)); + expect(frame.readUInt32BE(8)).toBe(expected >>> 0); + }); + + it("message CRC32 covers entire frame minus last 4 bytes", () => { + const headers = { key: "val" }; + const payload = Buffer.from(JSON.stringify({ n: 42 }), "utf8"); + const frame = encodeEventStreamFrame(headers, payload); + + const expected = crc32(frame.subarray(0, frame.length - 4)); + expect(frame.readUInt32BE(frame.length - 4)).toBe(expected >>> 0); + }); + + it("encodes each header with name_length + name + type(7) + value_length + value", () => { + const headers = { ":event-type": "chunk", ":message-type": "event" }; + const payload = Buffer.alloc(0); + const frame = encodeEventStreamFrame(headers, payload); + + const parsed = parseFrame(frame); + expect(parsed.headers).toHaveLength(2); + + expect(parsed.headers[0].name).toBe(":event-type"); + expect(parsed.headers[0].type).toBe(7); + expect(parsed.headers[0].value).toBe("chunk"); + + expect(parsed.headers[1].name).toBe(":message-type"); + expect(parsed.headers[1].type).toBe(7); + expect(parsed.headers[1].value).toBe("event"); + }); + + it("payload is raw bytes (not base64)", () => { + const obj = { text: "hello world" }; + const payload = Buffer.from(JSON.stringify(obj), "utf8"); + const frame = encodeEventStreamFrame({}, payload); + + const parsed = parseFrame(frame); + const decoded = JSON.parse(parsed.payload.toString("utf8")); + expect(decoded).toEqual(obj); + }); + + it("handles empty headers and empty payload", () => { + const frame = encodeEventStreamFrame({}, Buffer.alloc(0)); + const parsed = parseFrame(frame); + + // 4 (total) + 4 (headers_length) + 4 (prelude_crc) + 0 (headers) + 0 (payload) + 4 (msg_crc) = 16 + expect(parsed.totalLength).toBe(16); + expect(parsed.headersLength).toBe(0); + expect(parsed.headers).toHaveLength(0); + expect(parsed.payload.length).toBe(0); + }); + + it("large payload (100KB) encoding correctness", () => { + const largeString = "A".repeat(100 * 1024); + const payload = Buffer.from(JSON.stringify({ data: largeString }), "utf8"); + const frame = encodeEventStreamFrame({ ":event-type": "big" }, payload); + + const parsed = parseFrame(frame); + expect(parsed.totalLength).toBe(frame.length); + + // Verify CRCs + const expectedPrelude = crc32(frame.subarray(0, 8)); + expect(parsed.preludeCrc).toBe(expectedPrelude >>> 0); + const expectedMsg = crc32(frame.subarray(0, frame.length - 4)); + expect(parsed.messageCrc).toBe(expectedMsg >>> 0); + + // Verify payload + const decoded = JSON.parse(parsed.payload.toString("utf8")); + expect(decoded.data.length).toBe(100 * 1024); + }); + + it("handles UTF-8 multi-byte characters in headers and payload", () => { + const headers = { "x-emoji": "\u{1F600}" }; + const payload = Buffer.from(JSON.stringify({ msg: "\u{1F4A9}" }), "utf8"); + const frame = encodeEventStreamFrame(headers, payload); + + const parsed = parseFrame(frame); + expect(parsed.headers[0].value).toBe("\u{1F600}"); + const decoded = JSON.parse(parsed.payload.toString("utf8")); + expect(decoded.msg).toBe("\u{1F4A9}"); + }); +}); + +// ─── encodeEventStreamMessage ─────────────────────────────────────────────── + +describe("encodeEventStreamMessage", () => { + it("wraps JSON payload with standard AWS headers", () => { + const frame = encodeEventStreamMessage("contentBlockDelta", { delta: { text: "hi" } }); + const parsed = parseFrame(frame); + + const headerMap = Object.fromEntries(parsed.headers.map((h) => [h.name, h.value])); + expect(headerMap[":content-type"]).toBe("application/json"); + expect(headerMap[":event-type"]).toBe("contentBlockDelta"); + expect(headerMap[":message-type"]).toBe("event"); + }); + + it("payload is raw JSON bytes (not base64)", () => { + const obj = { delta: { text: "test" } }; + const frame = encodeEventStreamMessage("contentBlockDelta", obj); + const parsed = parseFrame(frame); + + const decoded = JSON.parse(parsed.payload.toString("utf8")); + expect(decoded).toEqual(obj); + }); + + it("round-trip: encode then parse produces identical data", () => { + const eventType = "messageStop"; + const payload = { stop_reason: "end_turn", usage: { input_tokens: 10, output_tokens: 5 } }; + const frame = encodeEventStreamMessage(eventType, payload); + const parsed = parseFrame(frame); + + // Verify structural integrity + expect(parsed.totalLength).toBe(frame.length); + const preludeCrc = crc32(frame.subarray(0, 8)); + expect(parsed.preludeCrc).toBe(preludeCrc >>> 0); + const messageCrc = crc32(frame.subarray(0, frame.length - 4)); + expect(parsed.messageCrc).toBe(messageCrc >>> 0); + + // Verify content + const headerMap = Object.fromEntries(parsed.headers.map((h) => [h.name, h.value])); + expect(headerMap[":event-type"]).toBe(eventType); + expect(JSON.parse(parsed.payload.toString("utf8"))).toEqual(payload); + }); +}); + +// ─── writeEventStream ─────────────────────────────────────────────────────── + +describe("writeEventStream", () => { + afterEach(() => { + vi.useRealTimers(); + }); + + it("sets Content-Type to application/vnd.amazon.eventstream", async () => { + const { res, headers } = makeMockResponse(); + await writeEventStream(res, []); + expect(headers()["Content-Type"]).toBe("application/vnd.amazon.eventstream"); + }); + + it("writes binary frames for each event", async () => { + const { res, chunks } = makeMockResponse(); + const events = [ + { eventType: "contentBlockDelta", payload: { delta: { text: "A" } } }, + { eventType: "contentBlockDelta", payload: { delta: { text: "B" } } }, + ]; + await writeEventStream(res, events); + + // Wait a tick for PassThrough to flush + await new Promise((r) => setTimeout(r, 10)); + + const output = Buffer.concat(chunks); + expect(output.length).toBeGreaterThan(0); + + // Parse the first frame from the output + const firstTotalLen = output.readUInt32BE(0); + const firstParsed = parseFrame(output.subarray(0, firstTotalLen)); + const firstPayload = JSON.parse(firstParsed.payload.toString("utf8")); + expect(firstPayload).toEqual({ delta: { text: "A" } }); + + // Parse the second frame + const secondParsed = parseFrame(output.subarray(firstTotalLen)); + const secondPayload = JSON.parse(secondParsed.payload.toString("utf8")); + expect(secondPayload).toEqual({ delta: { text: "B" } }); + }); + + it("returns true when stream completes normally", async () => { + const { res } = makeMockResponse(); + const result = await writeEventStream(res, [{ eventType: "test", payload: { data: 1 } }]); + expect(result).toBe(true); + }); + + it("calls res.end() when done", async () => { + const { res, ended } = makeMockResponse(); + await writeEventStream(res, []); + expect(ended()).toBe(true); + }); + + it("returns true immediately when res.writableEnded is already true", async () => { + const { res, headers } = makeMockResponse(); + // Force writableEnded to true + Object.defineProperty(res, "writableEnded", { get: () => true }); + const result = await writeEventStream(res, [{ eventType: "test", payload: { data: 1 } }]); + expect(result).toBe(true); + expect(headers()["Content-Type"]).toBeUndefined(); + }); + + it("supports streaming profile delays", async () => { + vi.useFakeTimers(); + const { res } = makeMockResponse(); + const events = [ + { eventType: "test", payload: { n: 1 } }, + { eventType: "test", payload: { n: 2 } }, + ]; + + const promise = writeEventStream(res, events, { + streamingProfile: { ttft: 100, tps: 10 }, + }); + await vi.runAllTimersAsync(); + const result = await promise; + expect(result).toBe(true); + }); + + it("supports latency option", async () => { + vi.useFakeTimers(); + const { res } = makeMockResponse(); + const events = [{ eventType: "test", payload: { n: 1 } }]; + + const promise = writeEventStream(res, events, { latency: 50 }); + await vi.runAllTimersAsync(); + const result = await promise; + expect(result).toBe(true); + }); + + it("stops mid-stream on abort signal and returns false", async () => { + const { res } = makeMockResponse(); + const controller = new AbortController(); + + const events = [ + { eventType: "test", payload: { n: 1 } }, + { eventType: "test", payload: { n: 2 } }, + { eventType: "test", payload: { n: 3 } }, + ]; + + let chunksSent = 0; + const result = await writeEventStream(res, events, { + signal: controller.signal, + onChunkSent: () => { + chunksSent++; + if (chunksSent === 1) controller.abort(); + }, + }); + + expect(result).toBe(false); + // Should have written exactly one frame before abort + expect(chunksSent).toBe(1); + }); + + it("sets Transfer-Encoding: chunked header", async () => { + const { res, headers } = makeMockResponse(); + await writeEventStream(res, [{ eventType: "test", payload: { n: 1 } }]); + expect(headers()["Transfer-Encoding"]).toBe("chunked"); + }); + + it("onChunkSent fires per event", async () => { + const { res } = makeMockResponse(); + const events = [ + { eventType: "test", payload: { n: 1 } }, + { eventType: "test", payload: { n: 2 } }, + { eventType: "test", payload: { n: 3 } }, + ]; + let count = 0; + await writeEventStream(res, events, { + onChunkSent: () => { + count++; + }, + }); + expect(count).toBe(3); + }); +}); diff --git a/src/__tests__/bedrock-stream.test.ts b/src/__tests__/bedrock-stream.test.ts new file mode 100644 index 0000000..0fa3f03 --- /dev/null +++ b/src/__tests__/bedrock-stream.test.ts @@ -0,0 +1,1155 @@ +import { describe, it, expect, afterEach } from "vitest"; +import * as http from "node:http"; +import { crc32 } from "node:zlib"; +import type { Fixture } from "../types.js"; +import { createServer, type ServerInstance } from "../server.js"; +import { converseToCompletionRequest } from "../bedrock-converse.js"; + +// --- helpers --- + +function post( + url: string, + body: unknown, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); +} + +function postBinary( + url: string, + body: unknown, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: Buffer }> { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); +} + +/** + * Parse sequential binary Event Stream frames from a buffer. + */ +interface ParsedFrame { + eventType: string; + messageType: string; + payload: unknown; + preludeCrc: { expected: number; actual: number }; + messageCrc: { expected: number; actual: number }; +} + +function parseFrames(buf: Buffer): ParsedFrame[] { + const frames: ParsedFrame[] = []; + let offset = 0; + + while (offset < buf.length) { + const totalLength = buf.readUInt32BE(offset); + const frame = buf.subarray(offset, offset + totalLength); + + // Compute CRCs for later assertion + const computedPreludeCrc = crc32(frame.subarray(0, 8)) >>> 0; + const storedPreludeCrc = frame.readUInt32BE(8); + const computedMessageCrc = crc32(frame.subarray(0, totalLength - 4)) >>> 0; + const storedMessageCrc = frame.readUInt32BE(totalLength - 4); + + // Parse headers + const headersLength = frame.readUInt32BE(4); + const headersStart = 12; + const headersEnd = headersStart + headersLength; + const headers: Record = {}; + let hOffset = headersStart; + while (hOffset < headersEnd) { + const nameLen = frame.readUInt8(hOffset); + hOffset += 1; + const name = frame.subarray(hOffset, hOffset + nameLen).toString("utf8"); + hOffset += nameLen; + hOffset += 1; // type byte (7 = STRING) + const valueLen = frame.readUInt16BE(hOffset); + hOffset += 2; + const value = frame.subarray(hOffset, hOffset + valueLen).toString("utf8"); + hOffset += valueLen; + headers[name] = value; + } + + // Parse payload + const payloadStart = headersEnd; + const payloadEnd = totalLength - 4; + const payloadBuf = frame.subarray(payloadStart, payloadEnd); + let payload: unknown = null; + if (payloadBuf.length > 0) { + payload = JSON.parse(payloadBuf.toString("utf8")); + } + + frames.push({ + eventType: headers[":event-type"] ?? "", + messageType: headers[":message-type"] ?? "", + payload, + preludeCrc: { expected: storedPreludeCrc, actual: computedPreludeCrc }, + messageCrc: { expected: storedMessageCrc, actual: computedMessageCrc }, + }); + + offset += totalLength; + } + + return frames; +} + +function postPartialBinary( + url: string, + body: unknown, +): Promise<{ body: Buffer; aborted: boolean }> { + return new Promise((resolve) => { + const data = JSON.stringify(body); + const parsed = new URL(url); + const chunks: Buffer[] = []; + let aborted = false; + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + }, + }, + (res) => { + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ body: Buffer.concat(chunks), aborted }); + }); + res.on("error", () => { + aborted = true; + }); + res.on("aborted", () => { + aborted = true; + }); + res.on("close", () => { + resolve({ body: Buffer.concat(chunks), aborted }); + }); + }, + ); + req.on("error", () => { + aborted = true; + resolve({ body: Buffer.concat(chunks), aborted }); + }); + req.write(data); + req.end(); + }); +} + +// --- fixtures --- + +const textFixture: Fixture = { + match: { userMessage: "hello" }, + response: { content: "Hi there!" }, +}; + +const toolFixture: Fixture = { + match: { userMessage: "weather" }, + response: { + toolCalls: [ + { + name: "get_weather", + arguments: '{"city":"SF"}', + }, + ], + }, +}; + +const errorFixture: Fixture = { + match: { userMessage: "fail" }, + response: { + error: { + message: "Rate limited", + type: "rate_limit_error", + }, + status: 429, + }, +}; + +const allFixtures: Fixture[] = [textFixture, toolFixture, errorFixture]; + +// --- test lifecycle --- + +let instance: ServerInstance | null = null; + +afterEach(async () => { + if (instance) { + await new Promise((resolve) => { + instance!.server.close(() => resolve()); + }); + instance = null; + } +}); + +// ─── invoke-with-response-stream ──────────────────────────────────────────── + +describe("POST /model/{modelId}/invoke-with-response-stream", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("returns text response as binary Event Stream frames", async () => { + instance = await createServer(allFixtures); + const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "hello" }], + }); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("application/vnd.amazon.eventstream"); + + const frames = parseFrames(res.body); + expect(frames.length).toBeGreaterThanOrEqual(5); + + // messageStart + expect(frames[0].eventType).toBe("messageStart"); + expect(frames[0].payload).toEqual({ role: "assistant" }); + + // contentBlockStart + expect(frames[1].eventType).toBe("contentBlockStart"); + expect(frames[1].payload).toEqual({ contentBlockIndex: 0, start: {} }); + + // Content delta(s) — collect text + const deltas = frames.filter((f) => f.eventType === "contentBlockDelta"); + expect(deltas.length).toBeGreaterThanOrEqual(1); + const fullText = deltas + .map((f) => (f.payload as { delta: { text: string } }).delta.text) + .join(""); + expect(fullText).toBe("Hi there!"); + + // contentBlockStop + const stopBlock = frames.find((f) => f.eventType === "contentBlockStop"); + expect(stopBlock).toBeDefined(); + expect(stopBlock!.payload).toEqual({ contentBlockIndex: 0 }); + + // messageStop + const msgStop = frames.find((f) => f.eventType === "messageStop"); + expect(msgStop).toBeDefined(); + expect(msgStop!.payload).toEqual({ stopReason: "end_turn" }); + }); + + it("returns tool call response as binary Event Stream frames", async () => { + instance = await createServer(allFixtures); + const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "weather" }], + }); + + expect(res.status).toBe(200); + const frames = parseFrames(res.body); + + // messageStart + expect(frames[0].eventType).toBe("messageStart"); + expect(frames[0].payload).toEqual({ role: "assistant" }); + + // contentBlockStart with toolUse + expect(frames[1].eventType).toBe("contentBlockStart"); + const startPayload = frames[1].payload as { + contentBlockIndex: number; + start: { toolUse: { toolUseId: string; name: string } }; + }; + expect(startPayload.contentBlockIndex).toBe(0); + expect(startPayload.start.toolUse.name).toBe("get_weather"); + expect(startPayload.start.toolUse.toolUseId).toBeDefined(); + + // contentBlockDelta(s) with input_json_delta + const deltas = frames.filter((f) => f.eventType === "contentBlockDelta"); + expect(deltas.length).toBeGreaterThanOrEqual(1); + const fullJson = deltas + .map((f) => (f.payload as { delta: { inputJSON: string } }).delta.inputJSON) + .join(""); + expect(JSON.parse(fullJson)).toEqual({ city: "SF" }); + + // messageStop + const msgStop = frames.find((f) => f.eventType === "messageStop"); + expect(msgStop!.payload).toEqual({ stopReason: "tool_use" }); + }); + + it("Content-Type is application/vnd.amazon.eventstream", async () => { + instance = await createServer(allFixtures); + const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "hello" }], + }); + + expect(res.headers["content-type"]).toBe("application/vnd.amazon.eventstream"); + }); + + it("binary frames have valid CRC32 checksums", async () => { + instance = await createServer(allFixtures); + const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "hello" }], + }); + + const frames = parseFrames(res.body); + expect(frames.length).toBeGreaterThan(0); + for (const frame of frames) { + expect(frame.preludeCrc.actual).toBe(frame.preludeCrc.expected); + expect(frame.messageCrc.actual).toBe(frame.messageCrc.expected); + } + }); + + it("returns error fixture with correct status", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "fail" }], + }); + + expect(res.status).toBe(429); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Rate limited"); + }); + + it("returns 404 when no fixture matches", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "nomatch" }], + }); + + expect(res.status).toBe(404); + }); + + it("returns 400 for malformed JSON", async () => { + instance = await createServer(allFixtures); + const parsed = new URL(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`); + const res = await new Promise<{ status: number; body: string }>((resolve, reject) => { + const raw = "{not valid"; + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(raw), + }, + }, + (r) => { + const chunks: Buffer[] = []; + r.on("data", (c: Buffer) => chunks.push(c)); + r.on("end", () => { + resolve({ + status: r.statusCode ?? 0, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(raw); + req.end(); + }); + + expect(res.status).toBe(400); + }); +}); + +// ─── invoke-with-response-stream: missing messages ────────────────────────── + +describe("POST /model/{modelId}/invoke-with-response-stream (missing messages)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("23. returns 400 for empty body (no messages)", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {}); + + expect(res.status).toBe(400); + const body = JSON.parse(res.body); + expect(body.error.message).toContain("messages"); + }); +}); + +// ─── invoke-with-response-stream: multiple tool calls ─────────────────────── + +describe("POST /model/{modelId}/invoke-with-response-stream (multiple tool calls)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("24. emits correct contentBlockIndex for 2 tool calls", async () => { + const multiToolFixture: Fixture = { + match: { userMessage: "multi-tool" }, + response: { + toolCalls: [ + { name: "get_weather", arguments: '{"city":"NYC"}' }, + { name: "get_time", arguments: '{"tz":"EST"}' }, + ], + }, + }; + instance = await createServer([multiToolFixture]); + const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "multi-tool" }], + }); + + expect(res.status).toBe(200); + const frames = parseFrames(res.body); + + // Find contentBlockStart frames + const blockStarts = frames.filter((f) => f.eventType === "contentBlockStart"); + expect(blockStarts.length).toBeGreaterThanOrEqual(2); + + // First tool at contentBlockIndex 0 + const start0 = blockStarts[0].payload as { + contentBlockIndex: number; + start: { toolUse: { name: string } }; + }; + expect(start0.contentBlockIndex).toBe(0); + expect(start0.start.toolUse.name).toBe("get_weather"); + + // Second tool at contentBlockIndex 1 + const start1 = blockStarts[1].payload as { + contentBlockIndex: number; + start: { toolUse: { name: string } }; + }; + expect(start1.contentBlockIndex).toBe(1); + expect(start1.start.toolUse.name).toBe("get_time"); + + // contentBlockStop should also have correct indices + const blockStops = frames.filter((f) => f.eventType === "contentBlockStop"); + expect(blockStops.length).toBeGreaterThanOrEqual(2); + expect((blockStops[0].payload as { contentBlockIndex: number }).contentBlockIndex).toBe(0); + expect((blockStops[1].payload as { contentBlockIndex: number }).contentBlockIndex).toBe(1); + + // messageStop should indicate tool_use + const msgStop = frames.find((f) => f.eventType === "messageStop"); + expect(msgStop!.payload).toEqual({ stopReason: "tool_use" }); + }); +}); + +// ─── invoke-with-response-stream: interruption ───────────────────────────── + +describe("POST /model/{modelId}/invoke-with-response-stream (interruption)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("truncateAfterChunks truncates the stream", async () => { + const truncatedFixture: Fixture = { + match: { userMessage: "hello" }, + response: { content: "Hello, World! This is a longer message for chunking." }, + chunkSize: 5, + truncateAfterChunks: 3, + }; + instance = await createServer([truncatedFixture]); + + const res = await postPartialBinary( + `${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, + { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "hello" }], + }, + ); + + // Stream was truncated — res.destroy() causes abrupt close + expect(res.aborted).toBe(true); + + // Journal should record interruption + await new Promise((r) => setTimeout(r, 50)); + const entry = instance.journal.getLast(); + expect(entry!.response.interrupted).toBe(true); + expect(entry!.response.interruptReason).toBe("truncateAfterChunks"); + }); +}); + +// ─── invoke-with-response-stream: chaos ───────────────────────────────────── + +describe("POST /model/{modelId}/invoke-with-response-stream (chaos)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("chaos drops requests when dropRate is 1", async () => { + instance = await createServer(allFixtures, { chaos: { dropRate: 1.0 } }); + const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "hello" }], + }); + + // Chaos drop returns 500 with server_error + expect(res.status).toBe(500); + const body = JSON.parse(res.body); + expect(body.error.type).toBe("server_error"); + }); +}); + +// ─── Converse non-streaming ───────────────────────────────────────────────── + +describe("POST /model/{modelId}/converse (non-streaming)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("returns text response in Converse format", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, { + messages: [{ role: "user", content: [{ text: "hello" }] }], + }); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("application/json"); + + const body = JSON.parse(res.body); + expect(body.output.message.role).toBe("assistant"); + expect(body.output.message.content).toHaveLength(1); + expect(body.output.message.content[0].text).toBe("Hi there!"); + expect(body.stopReason).toBe("end_turn"); + expect(body.usage).toEqual({ inputTokens: 0, outputTokens: 0, totalTokens: 0 }); + }); + + it("returns tool call response in Converse format", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, { + messages: [{ role: "user", content: [{ text: "weather" }] }], + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.output.message.role).toBe("assistant"); + expect(body.output.message.content).toHaveLength(1); + expect(body.output.message.content[0].toolUse.name).toBe("get_weather"); + expect(body.output.message.content[0].toolUse.input).toEqual({ city: "SF" }); + expect(body.output.message.content[0].toolUse.toolUseId).toBeDefined(); + expect(body.stopReason).toBe("tool_use"); + }); + + it("returns 404 when no fixture matches", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, { + messages: [{ role: "user", content: [{ text: "nomatch" }] }], + }); + + expect(res.status).toBe(404); + }); + + it("returns 400 for missing messages", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {}); + + expect(res.status).toBe(400); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Invalid request: messages array is required"); + }); + + it("chaos applies to converse endpoint", async () => { + instance = await createServer(allFixtures, { chaos: { dropRate: 1.0 } }); + const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, { + messages: [{ role: "user", content: [{ text: "hello" }] }], + }); + + expect(res.status).toBe(500); + }); +}); + +// ─── Converse streaming ───────────────────────────────────────────────────── + +describe("POST /model/{modelId}/converse-stream", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("returns text response as Event Stream", async () => { + instance = await createServer(allFixtures); + const res = await postBinary(`${instance.url}/model/${MODEL_ID}/converse-stream`, { + messages: [{ role: "user", content: [{ text: "hello" }] }], + }); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("application/vnd.amazon.eventstream"); + + const frames = parseFrames(res.body); + + // Verify event sequence + expect(frames[0].eventType).toBe("messageStart"); + expect(frames[0].payload).toEqual({ role: "assistant" }); + + expect(frames[1].eventType).toBe("contentBlockStart"); + + const deltas = frames.filter((f) => f.eventType === "contentBlockDelta"); + const fullText = deltas + .map((f) => (f.payload as { delta: { text: string } }).delta.text) + .join(""); + expect(fullText).toBe("Hi there!"); + + const msgStop = frames.find((f) => f.eventType === "messageStop"); + expect(msgStop!.payload).toEqual({ stopReason: "end_turn" }); + }); + + it("returns tool call response as Event Stream", async () => { + instance = await createServer(allFixtures); + const res = await postBinary(`${instance.url}/model/${MODEL_ID}/converse-stream`, { + messages: [{ role: "user", content: [{ text: "weather" }] }], + }); + + expect(res.status).toBe(200); + const frames = parseFrames(res.body); + + expect(frames[0].eventType).toBe("messageStart"); + + const startFrame = frames.find((f) => f.eventType === "contentBlockStart"); + const startPayload = startFrame!.payload as { + contentBlockIndex: number; + start: { toolUse: { toolUseId: string; name: string } }; + }; + expect(startPayload.start.toolUse.name).toBe("get_weather"); + + const deltas = frames.filter((f) => f.eventType === "contentBlockDelta"); + const fullJson = deltas + .map((f) => (f.payload as { delta: { inputJSON: string } }).delta.inputJSON) + .join(""); + expect(JSON.parse(fullJson)).toEqual({ city: "SF" }); + + const msgStop = frames.find((f) => f.eventType === "messageStop"); + expect(msgStop!.payload).toEqual({ stopReason: "tool_use" }); + }); + + it("supports streaming profile (ttft/tps)", async () => { + const profileFixture: Fixture = { + match: { userMessage: "hello" }, + response: { content: "Hi" }, + streamingProfile: { ttft: 0, tps: 10000 }, + }; + instance = await createServer([profileFixture]); + + const res = await postBinary(`${instance.url}/model/${MODEL_ID}/converse-stream`, { + messages: [{ role: "user", content: [{ text: "hello" }] }], + }); + + expect(res.status).toBe(200); + const frames = parseFrames(res.body); + expect(frames.length).toBeGreaterThan(0); + }); + + it("truncateAfterChunks interrupts the stream", async () => { + const truncatedFixture: Fixture = { + match: { userMessage: "hello" }, + response: { content: "Hello, World! This is a longer message." }, + chunkSize: 5, + truncateAfterChunks: 2, + }; + instance = await createServer([truncatedFixture]); + + const res = await postPartialBinary(`${instance.url}/model/${MODEL_ID}/converse-stream`, { + messages: [{ role: "user", content: [{ text: "hello" }] }], + }); + + // Stream was truncated — res.destroy() causes abrupt close + expect(res.aborted).toBe(true); + + // Journal should record interruption + await new Promise((r) => setTimeout(r, 50)); + const entry = instance.journal.getLast(); + expect(entry!.response.interrupted).toBe(true); + expect(entry!.response.interruptReason).toBe("truncateAfterChunks"); + }); + + it("chaos applies to converse-stream endpoint", async () => { + instance = await createServer(allFixtures, { chaos: { dropRate: 1.0 } }); + const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, { + messages: [{ role: "user", content: [{ text: "hello" }] }], + }); + + expect(res.status).toBe(500); + }); +}); + +// ─── converseToCompletionRequest unit tests ───────────────────────────────── + +describe("converseToCompletionRequest", () => { + it("converts system messages", () => { + const result = converseToCompletionRequest( + { + messages: [{ role: "user", content: [{ text: "hi" }] }], + system: [{ text: "You are a helpful assistant." }], + }, + "anthropic.claude-3-5-sonnet", + ); + + expect(result.messages[0]).toEqual({ + role: "system", + content: "You are a helpful assistant.", + }); + expect(result.messages[1]).toEqual({ role: "user", content: "hi" }); + }); + + it("concatenates multiple system blocks", () => { + const result = converseToCompletionRequest( + { + messages: [{ role: "user", content: [{ text: "hi" }] }], + system: [{ text: "You are " }, { text: "a helpful assistant." }], + }, + "anthropic.claude-3-5-sonnet", + ); + + expect(result.messages[0]).toEqual({ + role: "system", + content: "You are a helpful assistant.", + }); + }); + + it("converts user messages with text content", () => { + const result = converseToCompletionRequest( + { + messages: [{ role: "user", content: [{ text: "Hello" }, { text: " World" }] }], + }, + "model-id", + ); + + expect(result.messages[0]).toEqual({ role: "user", content: "Hello World" }); + }); + + it("converts tool results in user messages", () => { + const result = converseToCompletionRequest( + { + messages: [ + { + role: "user", + content: [ + { + toolResult: { + toolUseId: "toolu_123", + content: [{ text: "72F and sunny" }], + }, + }, + { text: "Tell me more" }, + ], + }, + ], + }, + "model-id", + ); + + expect(result.messages[0]).toEqual({ + role: "tool", + content: "72F and sunny", + tool_call_id: "toolu_123", + }); + expect(result.messages[1]).toEqual({ + role: "user", + content: "Tell me more", + }); + }); + + it("converts assistant messages with toolUse blocks", () => { + const result = converseToCompletionRequest( + { + messages: [ + { role: "user", content: [{ text: "search" }] }, + { + role: "assistant", + content: [ + { text: "Let me search." }, + { + toolUse: { + toolUseId: "toolu_456", + name: "search", + input: { query: "cats" }, + }, + }, + ], + }, + ], + }, + "model-id", + ); + + expect(result.messages[1]).toMatchObject({ + role: "assistant", + content: "Let me search.", + tool_calls: [ + { + id: "toolu_456", + type: "function", + function: { name: "search", arguments: '{"query":"cats"}' }, + }, + ], + }); + }); + + it("converts tool definitions from toolConfig", () => { + const result = converseToCompletionRequest( + { + messages: [{ role: "user", content: [{ text: "hi" }] }], + toolConfig: { + tools: [ + { + toolSpec: { + name: "get_weather", + description: "Get weather for a city", + inputSchema: { + type: "object", + properties: { city: { type: "string" } }, + required: ["city"], + }, + }, + }, + ], + }, + }, + "model-id", + ); + + expect(result.tools).toHaveLength(1); + expect(result.tools![0]).toEqual({ + type: "function", + function: { + name: "get_weather", + description: "Get weather for a city", + parameters: { + type: "object", + properties: { city: { type: "string" } }, + required: ["city"], + }, + }, + }); + }); + + it("passes through inferenceConfig temperature", () => { + const result = converseToCompletionRequest( + { + messages: [{ role: "user", content: [{ text: "hi" }] }], + inferenceConfig: { temperature: 0.7 }, + }, + "model-id", + ); + + expect(result.temperature).toBe(0.7); + }); + + it("sets model from modelId parameter", () => { + const result = converseToCompletionRequest( + { + messages: [{ role: "user", content: [{ text: "hi" }] }], + }, + "anthropic.claude-3-5-sonnet-20241022-v2:0", + ); + + expect(result.model).toBe("anthropic.claude-3-5-sonnet-20241022-v2:0"); + }); +}); + +// ─── Converse edge cases ───────────────────────────────────────────────────── + +function postRaw(url: string, raw: string): Promise<{ status: number; body: string }> { + return new Promise((resolve, reject) => { + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(raw), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(raw); + req.end(); + }); +} + +describe("POST /model/{modelId}/converse (malformed JSON)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("returns 400 for malformed JSON body", async () => { + instance = await createServer(allFixtures); + const res = await postRaw(`${instance.url}/model/${MODEL_ID}/converse`, "{not valid"); + + expect(res.status).toBe(400); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Malformed JSON"); + }); +}); + +describe("POST /model/{modelId}/converse-stream (missing messages)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("returns 400 when messages array is missing", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, {}); + + expect(res.status).toBe(400); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Invalid request: messages array is required"); + }); +}); + +// ─── invoke-with-response-stream: unknown response type → 500 ────────────── + +describe("POST /model/{modelId}/invoke-with-response-stream (unknown response type)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("returns 500 for embedding fixture on streaming endpoint", async () => { + const embeddingFixture: Fixture = { + match: { userMessage: "embed-stream" }, + response: { embedding: [0.1, 0.2, 0.3] }, + }; + instance = await createServer([embeddingFixture]); + const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "embed-stream" }], + }); + + expect(res.status).toBe(500); + const body = JSON.parse(res.body); + expect(body.error.message).toContain("did not match any known type"); + }); +}); + +// ─── invoke-with-response-stream: malformed tool call arguments ───────────── + +describe("POST /model/{modelId}/invoke-with-response-stream (malformed tool args)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("malformed tool call arguments fall back to empty JSON string", async () => { + const badArgsFixture: Fixture = { + match: { userMessage: "bad-tool-args" }, + response: { + toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }], + }, + }; + instance = await createServer([badArgsFixture]); + const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "bad-tool-args" }], + }); + + expect(res.status).toBe(200); + const frames = parseFrames(res.body); + + // Find contentBlockDelta frames with inputJSON + const deltas = frames.filter((f) => f.eventType === "contentBlockDelta"); + const fullJson = deltas + .map((f) => { + const payload = f.payload as { delta: { inputJSON?: string } }; + return payload.delta.inputJSON ?? ""; + }) + .join(""); + // Malformed arguments should fall back to "{}" + expect(fullJson).toBe("{}"); + }); +}); + +// ─── invoke-with-response-stream: empty content string ────────────────────── + +describe("POST /model/{modelId}/invoke-with-response-stream (empty content)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("empty content produces event sequence with zero content deltas", async () => { + const emptyContentFixture: Fixture = { + match: { userMessage: "empty-content" }, + response: { content: "" }, + }; + instance = await createServer([emptyContentFixture]); + const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "empty-content" }], + }); + + expect(res.status).toBe(200); + const frames = parseFrames(res.body); + + // Should still have messageStart, contentBlockStart, contentBlockStop, messageStop + expect(frames[0].eventType).toBe("messageStart"); + expect(frames.find((f) => f.eventType === "contentBlockStart")).toBeDefined(); + expect(frames.find((f) => f.eventType === "contentBlockStop")).toBeDefined(); + expect(frames.find((f) => f.eventType === "messageStop")).toBeDefined(); + + // Content deltas should be zero (empty string → no chunks) + const deltas = frames.filter((f) => f.eventType === "contentBlockDelta"); + expect(deltas).toHaveLength(0); + }); +}); + +// ─── converse-stream: malformed JSON → 400 ────────────────────────────────── + +describe("POST /model/{modelId}/converse-stream (malformed JSON)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("returns 400 for malformed JSON body", async () => { + instance = await createServer(allFixtures); + const res = await postRaw(`${instance.url}/model/${MODEL_ID}/converse-stream`, "{not valid"); + + expect(res.status).toBe(400); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Malformed JSON"); + }); +}); + +// ─── Strict mode: converse and converse-stream ────────────────────────────── + +describe("POST /model/{modelId}/converse (strict mode)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("returns 503 in strict mode when no fixture matches", async () => { + instance = await createServer([], { strict: true }); + const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, { + messages: [{ role: "user", content: [{ text: "nomatch" }] }], + }); + + expect(res.status).toBe(503); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Strict mode: no fixture matched"); + }); +}); + +describe("POST /model/{modelId}/converse-stream (strict mode)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("returns 503 in strict mode when no fixture matches", async () => { + instance = await createServer([], { strict: true }); + const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, { + messages: [{ role: "user", content: [{ text: "nomatch" }] }], + }); + + expect(res.status).toBe(503); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Strict mode: no fixture matched"); + }); +}); + +// ─── Unknown response type through converse and converse-stream ───────────── + +describe("POST /model/{modelId}/converse (unknown response type)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("returns 500 for embedding fixture on converse endpoint", async () => { + const embeddingFixture: Fixture = { + match: { userMessage: "embed-converse" }, + response: { embedding: [0.1, 0.2, 0.3] }, + }; + instance = await createServer([embeddingFixture]); + const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, { + messages: [{ role: "user", content: [{ text: "embed-converse" }] }], + }); + + expect(res.status).toBe(500); + const body = JSON.parse(res.body); + expect(body.error.message).toContain("did not match any known type"); + }); +}); + +describe("POST /model/{modelId}/converse-stream (unknown response type)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("returns 500 for embedding fixture on converse-stream endpoint", async () => { + const embeddingFixture: Fixture = { + match: { userMessage: "embed-stream" }, + response: { embedding: [0.1, 0.2, 0.3] }, + }; + instance = await createServer([embeddingFixture]); + const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, { + messages: [{ role: "user", content: [{ text: "embed-stream" }] }], + }); + + expect(res.status).toBe(500); + const body = JSON.parse(res.body); + expect(body.error.message).toContain("did not match any known type"); + }); +}); + +// ─── Error fixture through converse-stream ────────────────────────────────── + +describe("POST /model/{modelId}/converse-stream (error fixture)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("returns error fixture with correct status through /converse-stream", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, { + messages: [{ role: "user", content: [{ text: "fail" }] }], + }); + + expect(res.status).toBe(429); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Rate limited"); + }); +}); + +// ─── Error fixture through /converse endpoint ─────────────────────────────── + +describe("POST /model/{modelId}/converse (error fixture)", () => { + const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + it("returns error fixture with correct status through /converse", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, { + messages: [{ role: "user", content: [{ text: "fail" }] }], + }); + + expect(res.status).toBe(429); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Rate limited"); + }); +}); diff --git a/src/__tests__/cohere.test.ts b/src/__tests__/cohere.test.ts new file mode 100644 index 0000000..4b6228f --- /dev/null +++ b/src/__tests__/cohere.test.ts @@ -0,0 +1,932 @@ +import { describe, it, expect, afterEach } from "vitest"; +import * as http from "node:http"; +import type { Fixture } from "../types.js"; +import { createServer, type ServerInstance } from "../server.js"; +import { cohereToCompletionRequest } from "../cohere.js"; + +// --- helpers --- + +function post( + url: string, + body: unknown, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); +} + +function postRaw(url: string, raw: string): Promise<{ status: number; body: string }> { + return new Promise((resolve, reject) => { + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(raw), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(raw); + req.end(); + }); +} + +function postWithHeaders( + url: string, + body: unknown, + extraHeaders: Record, +): Promise<{ status: number; body: string }> { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + ...extraHeaders, + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); +} + +interface SSEEvent { + event: string; + data: Record; +} + +function parseSSEEvents(body: string): SSEEvent[] { + const events: SSEEvent[] = []; + const blocks = body.split("\n\n").filter((b) => b.trim() !== ""); + for (const block of blocks) { + const lines = block.split("\n"); + let eventType = ""; + let dataStr = ""; + for (const line of lines) { + if (line.startsWith("event: ")) { + eventType = line.slice(7); + } else if (line.startsWith("data: ")) { + dataStr = line.slice(6); + } + } + if (eventType && dataStr) { + events.push({ event: eventType, data: JSON.parse(dataStr) as Record }); + } + } + return events; +} + +// --- fixtures --- + +const textFixture: Fixture = { + match: { userMessage: "hello" }, + response: { content: "The capital of France is Paris." }, +}; + +const toolFixture: Fixture = { + match: { userMessage: "weather" }, + response: { + toolCalls: [ + { + name: "get_weather", + arguments: '{"city":"SF"}', + }, + ], + }, +}; + +const errorFixture: Fixture = { + match: { userMessage: "fail" }, + response: { + error: { + message: "Rate limited", + type: "rate_limit_error", + }, + status: 429, + }, +}; + +const allFixtures: Fixture[] = [textFixture, toolFixture, errorFixture]; + +// --- tests --- + +let instance: ServerInstance | null = null; + +afterEach(async () => { + if (instance) { + await new Promise((resolve) => { + instance!.server.close(() => resolve()); + }); + instance = null; + } +}); + +// ─── Unit tests: cohereToCompletionRequest ────────────────────────────────── + +describe("cohereToCompletionRequest", () => { + it("converts basic user message", () => { + const result = cohereToCompletionRequest({ + model: "command-r-plus", + messages: [{ role: "user", content: "hello" }], + }); + expect(result.model).toBe("command-r-plus"); + expect(result.messages).toEqual([{ role: "user", content: "hello" }]); + }); + + it("converts system message", () => { + const result = cohereToCompletionRequest({ + model: "command-r-plus", + messages: [ + { role: "system", content: "Be helpful" }, + { role: "user", content: "hello" }, + ], + }); + expect(result.messages[0]).toEqual({ role: "system", content: "Be helpful" }); + expect(result.messages[1]).toEqual({ role: "user", content: "hello" }); + }); + + it("converts tool message with tool_call_id", () => { + const result = cohereToCompletionRequest({ + model: "command-r-plus", + messages: [ + { + role: "tool", + content: '{"temp":72}', + tool_call_id: "call_abc", + }, + ], + }); + expect(result.messages[0]).toEqual({ + role: "tool", + content: '{"temp":72}', + tool_call_id: "call_abc", + }); + }); + + it("converts tools", () => { + const result = cohereToCompletionRequest({ + model: "command-r-plus", + messages: [{ role: "user", content: "hi" }], + tools: [ + { + type: "function", + function: { + name: "get_weather", + description: "Get weather", + parameters: { type: "object", properties: { city: { type: "string" } } }, + }, + }, + ], + }); + expect(result.tools).toHaveLength(1); + expect(result.tools![0]).toEqual({ + type: "function", + function: { + name: "get_weather", + description: "Get weather", + parameters: { type: "object", properties: { city: { type: "string" } } }, + }, + }); + }); + + it("passes through stream field", () => { + const result = cohereToCompletionRequest({ + model: "command-r-plus", + messages: [{ role: "user", content: "hi" }], + stream: true, + }); + expect(result.stream).toBe(true); + }); + + it("returns undefined tools when none provided", () => { + const result = cohereToCompletionRequest({ + model: "command-r-plus", + messages: [{ role: "user", content: "hi" }], + }); + expect(result.tools).toBeUndefined(); + }); +}); + +// ─── Unit tests: cohereToCompletionRequest (assistant message) ─────────────── + +describe("cohereToCompletionRequest (assistant message)", () => { + it("converts assistant message", () => { + const result = cohereToCompletionRequest({ + model: "command-r-plus", + messages: [ + { role: "user", content: "hello" }, + { role: "assistant", content: "Hi there" }, + ], + }); + expect(result.messages[1]).toEqual({ role: "assistant", content: "Hi there" }); + }); +}); + +// ─── Integration tests: POST /v2/chat (non-streaming text) ───────────────── + +describe("POST /v2/chat (non-streaming text)", () => { + it("returns text response with all required fields", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "hello" }], + stream: false, + }); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("application/json"); + + const body = JSON.parse(res.body); + expect(body.id).toMatch(/^msg_/); + expect(body.finish_reason).toBe("COMPLETE"); + expect(body.message.role).toBe("assistant"); + expect(body.message.content).toEqual([ + { type: "text", text: "The capital of France is Paris." }, + ]); + expect(body.message.tool_calls).toEqual([]); + expect(body.message.tool_plan).toBe(""); + expect(body.message.citations).toEqual([]); + expect(body.usage.billed_units).toEqual({ + input_tokens: 0, + output_tokens: 0, + search_units: 0, + classifications: 0, + }); + expect(body.usage.tokens).toEqual({ input_tokens: 0, output_tokens: 0 }); + }); +}); + +// ─── Integration tests: POST /v2/chat (non-streaming tool call) ───────────── + +describe("POST /v2/chat (non-streaming tool call)", () => { + it("returns tool call with TOOL_CALL finish_reason", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "weather" }], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.finish_reason).toBe("TOOL_CALL"); + expect(body.message.tool_calls).toHaveLength(1); + expect(body.message.tool_calls[0].id).toMatch(/^call_/); + expect(body.message.tool_calls[0].type).toBe("function"); + expect(body.message.tool_calls[0].function.name).toBe("get_weather"); + expect(body.message.tool_calls[0].function.arguments).toBe('{"city":"SF"}'); + expect(body.message.content).toEqual([]); + expect(body.usage).toBeDefined(); + }); +}); + +// ─── Integration tests: POST /v2/chat (streaming text) ───────────────────── + +describe("POST /v2/chat (streaming text)", () => { + it("produces correct event sequence", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "hello" }], + stream: true, + }); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("text/event-stream"); + + const events = parseSSEEvents(res.body); + expect(events.length).toBeGreaterThanOrEqual(5); + + // message-start + expect(events[0].event).toBe("message-start"); + expect(events[0].data.type).toBe("message-start"); + const msgStart = events[0].data.delta as Record; + const startMsg = msgStart.message as Record; + expect(startMsg.role).toBe("assistant"); + expect(startMsg.content).toEqual([]); + expect(startMsg.tool_plan).toBe(""); + expect(startMsg.tool_calls).toEqual([]); + expect(startMsg.citations).toEqual([]); + + // content-start (type: "text" only, no text field) + expect(events[1].event).toBe("content-start"); + expect(events[1].data.type).toBe("content-start"); + expect(events[1].data.index).toBe(0); + const csDelta = events[1].data.delta as Record; + const csMsg = csDelta.message as Record; + const csContent = csMsg.content as Record; + expect(csContent.type).toBe("text"); + expect(csContent).not.toHaveProperty("text"); + + // content-delta(s) + const contentDeltas = events.filter((e) => e.event === "content-delta"); + expect(contentDeltas.length).toBeGreaterThanOrEqual(1); + for (const cd of contentDeltas) { + expect(cd.data.type).toBe("content-delta"); + expect(cd.data.index).toBe(0); + const delta = cd.data.delta as Record; + const msg = delta.message as Record; + const content = msg.content as Record; + expect(content.type).toBe("text"); + expect(typeof content.text).toBe("string"); + } + + // Reconstruct full text from deltas + const fullText = contentDeltas + .map((cd) => { + const delta = cd.data.delta as Record; + const msg = delta.message as Record; + const content = msg.content as Record; + return content.text as string; + }) + .join(""); + expect(fullText).toBe("The capital of France is Paris."); + + // content-end + const contentEnd = events.find((e) => e.event === "content-end"); + expect(contentEnd).toBeDefined(); + expect(contentEnd!.data.type).toBe("content-end"); + expect(contentEnd!.data.index).toBe(0); + + // message-end + const msgEnd = events[events.length - 1]; + expect(msgEnd.event).toBe("message-end"); + expect(msgEnd.data.type).toBe("message-end"); + const endDelta = msgEnd.data.delta as Record; + expect(endDelta.finish_reason).toBe("COMPLETE"); + const usage = endDelta.usage as Record; + expect(usage.billed_units).toEqual({ + input_tokens: 0, + output_tokens: 0, + search_units: 0, + classifications: 0, + }); + expect(usage.tokens).toEqual({ input_tokens: 0, output_tokens: 0 }); + }); + + it("content-start has type:text only and no text field", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "hello" }], + stream: true, + }); + + const events = parseSSEEvents(res.body); + const contentStart = events.find((e) => e.event === "content-start"); + expect(contentStart).toBeDefined(); + const delta = contentStart!.data.delta as Record; + const msg = delta.message as Record; + const content = msg.content as Record; + expect(content.type).toBe("text"); + expect(Object.keys(content)).toEqual(["type"]); + }); +}); + +// ─── Integration tests: POST /v2/chat (streaming tool calls) ──────────────── + +describe("POST /v2/chat (streaming tool calls)", () => { + it("produces correct tool call event sequence", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "weather" }], + stream: true, + }); + + expect(res.status).toBe(200); + const events = parseSSEEvents(res.body); + + // message-start + expect(events[0].event).toBe("message-start"); + + // tool-plan-delta + const planDelta = events.find((e) => e.event === "tool-plan-delta"); + expect(planDelta).toBeDefined(); + expect(planDelta!.data.type).toBe("tool-plan-delta"); + const planMsg = (planDelta!.data.delta as Record).message as Record< + string, + unknown + >; + expect(typeof planMsg.tool_plan).toBe("string"); + + // tool-call-start + const tcStart = events.find((e) => e.event === "tool-call-start"); + expect(tcStart).toBeDefined(); + expect(tcStart!.data.type).toBe("tool-call-start"); + expect(tcStart!.data.index).toBe(0); + const tcStartDelta = tcStart!.data.delta as Record; + const tcStartMsg = tcStartDelta.message as Record; + const tcStartCalls = tcStartMsg.tool_calls as Record; + expect(tcStartCalls.id).toMatch(/^call_/); + expect(tcStartCalls.type).toBe("function"); + const tcStartFn = tcStartCalls.function as Record; + expect(tcStartFn.name).toBe("get_weather"); + expect(tcStartFn.arguments).toBe(""); + + // tool-call-delta(s) + const tcDeltas = events.filter((e) => e.event === "tool-call-delta"); + expect(tcDeltas.length).toBeGreaterThanOrEqual(1); + const argsAccum = tcDeltas + .map((e) => { + const delta = e.data.delta as Record; + const msg = delta.message as Record; + const calls = msg.tool_calls as Record; + const fn = calls.function as Record; + return fn.arguments as string; + }) + .join(""); + expect(argsAccum).toBe('{"city":"SF"}'); + + // tool-call-end + const tcEnd = events.find((e) => e.event === "tool-call-end"); + expect(tcEnd).toBeDefined(); + expect(tcEnd!.data.type).toBe("tool-call-end"); + expect(tcEnd!.data.index).toBe(0); + + // message-end with TOOL_CALL + const msgEnd = events[events.length - 1]; + expect(msgEnd.event).toBe("message-end"); + const endDelta = msgEnd.data.delta as Record; + expect(endDelta.finish_reason).toBe("TOOL_CALL"); + expect(endDelta.usage).toBeDefined(); + }); +}); + +// ─── Integration tests: POST /v2/chat (message-end usage) ─────────────────── + +describe("POST /v2/chat (message-end usage)", () => { + it("includes usage with both billed_units and tokens", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "hello" }], + stream: true, + }); + + const events = parseSSEEvents(res.body); + const msgEnd = events.find((e) => e.event === "message-end"); + expect(msgEnd).toBeDefined(); + const delta = msgEnd!.data.delta as Record; + const usage = delta.usage as Record; + expect(usage.billed_units).toBeDefined(); + expect(usage.tokens).toBeDefined(); + const billedUnits = usage.billed_units as Record; + expect(billedUnits.input_tokens).toBe(0); + expect(billedUnits.output_tokens).toBe(0); + expect(billedUnits.search_units).toBe(0); + expect(billedUnits.classifications).toBe(0); + const tokens = usage.tokens as Record; + expect(tokens.input_tokens).toBe(0); + expect(tokens.output_tokens).toBe(0); + }); +}); + +// ─── Integration tests: POST /v2/chat (validation) ────────────────────────── + +describe("POST /v2/chat (validation)", () => { + it("returns 400 when model is missing", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v2/chat`, { + messages: [{ role: "user", content: "hello" }], + }); + + expect(res.status).toBe(400); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("model is required"); + }); + + it("returns 400 when messages array is missing", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r", + }); + + expect(res.status).toBe(400); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Invalid request: messages array is required"); + }); + + it("returns 400 for malformed JSON", async () => { + instance = await createServer(allFixtures); + const res = await postRaw(`${instance.url}/v2/chat`, "{not valid"); + + expect(res.status).toBe(400); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Malformed JSON"); + }); + + it("returns 404 when no fixture matches", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "nomatch" }], + stream: false, + }); + + expect(res.status).toBe(404); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("No fixture matched"); + }); +}); + +// ─── Integration tests: POST /v2/chat (streaming profile) ─────────────────── + +describe("POST /v2/chat (streaming profile)", () => { + it("applies streaming profile latency", async () => { + const slowFixture: Fixture = { + match: { userMessage: "slow" }, + response: { content: "AB" }, + chunkSize: 1, + streamingProfile: { ttft: 50, tps: 20, jitter: 0 }, + }; + instance = await createServer([slowFixture]); + + const start = Date.now(); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "slow" }], + stream: true, + }); + const elapsed = Date.now() - start; + + expect(res.status).toBe(200); + // Should have noticeable delay from streaming profile + expect(elapsed).toBeGreaterThanOrEqual(80); + }); +}); + +// ─── Integration tests: POST /v2/chat (interruption) ──────────────────────── + +describe("POST /v2/chat (interruption)", () => { + it("truncates after specified number of chunks", async () => { + const truncFixture: Fixture = { + match: { userMessage: "truncate" }, + response: { content: "ABCDEFGHIJ" }, + chunkSize: 1, + truncateAfterChunks: 3, + }; + instance = await createServer([truncFixture]); + + const res = await new Promise<{ aborted: boolean; body: string }>((resolve) => { + const data = JSON.stringify({ + model: "command-r-plus", + messages: [{ role: "user", content: "truncate" }], + stream: true, + }); + const parsed = new URL(`${instance!.url}/v2/chat`); + const chunks: Buffer[] = []; + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + }, + }, + (res) => { + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ aborted: false, body: Buffer.concat(chunks).toString() }); + }); + res.on("aborted", () => { + resolve({ aborted: true, body: Buffer.concat(chunks).toString() }); + }); + }, + ); + req.on("error", () => { + resolve({ aborted: true, body: Buffer.concat(chunks).toString() }); + }); + req.write(data); + req.end(); + }); + + // Stream was truncated — res.destroy() causes abrupt close + expect(res.aborted).toBe(true); + + // Journal should record interruption + await new Promise((r) => setTimeout(r, 50)); + const entry = instance.journal.getLast(); + expect(entry!.response.interrupted).toBe(true); + expect(entry!.response.interruptReason).toBe("truncateAfterChunks"); + }); +}); + +// ─── Integration tests: POST /v2/chat (chaos) ────────────────────────────── + +describe("POST /v2/chat (chaos)", () => { + it("drops request when chaos drop header is set to 1.0", async () => { + instance = await createServer(allFixtures); + const res = await postWithHeaders( + `${instance.url}/v2/chat`, + { + model: "command-r-plus", + messages: [{ role: "user", content: "hello" }], + stream: false, + }, + { "x-llmock-chaos-drop": "1.0" }, + ); + + expect(res.status).toBe(500); + const body = JSON.parse(res.body); + expect(body.error.code).toBe("chaos_drop"); + }); +}); + +// ─── Integration tests: POST /v2/chat (error fixture) ─────────────────────── + +describe("POST /v2/chat (error fixture)", () => { + it("returns error fixture with correct status", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "fail" }], + stream: false, + }); + + expect(res.status).toBe(429); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Rate limited"); + }); +}); + +// ─── Integration tests: POST /v2/chat (streaming default) ─────────────────── + +describe("POST /v2/chat (streaming default)", () => { + it("20. returns non-streaming JSON when stream field is omitted", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "hello" }], + // stream field intentionally omitted — Cohere defaults to non-streaming + }); + + expect(res.status).toBe(200); + // Should be non-streaming JSON, NOT SSE + expect(res.headers["content-type"]).toBe("application/json"); + + const body = JSON.parse(res.body); + expect(body.id).toMatch(/^msg_/); + expect(body.finish_reason).toBe("COMPLETE"); + expect(body.message.role).toBe("assistant"); + expect(body.message.content).toEqual([ + { type: "text", text: "The capital of France is Paris." }, + ]); + }); +}); + +// ─── Integration tests: POST /v2/chat (multiple tool calls) ───────────────── + +describe("POST /v2/chat (multiple tool calls)", () => { + const multiToolFixture: Fixture = { + match: { userMessage: "multi-tool" }, + response: { + toolCalls: [ + { name: "get_weather", arguments: '{"city":"NYC"}' }, + { name: "get_time", arguments: '{"tz":"EST"}' }, + ], + }, + }; + + it("21a. non-streaming returns 2 items in tool_calls array", async () => { + instance = await createServer([multiToolFixture]); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "multi-tool" }], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.finish_reason).toBe("TOOL_CALL"); + expect(body.message.tool_calls).toHaveLength(2); + expect(body.message.tool_calls[0].function.name).toBe("get_weather"); + expect(body.message.tool_calls[1].function.name).toBe("get_time"); + }); + + it("21b. streaming produces 2 tool-call-start events", async () => { + instance = await createServer([multiToolFixture]); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "multi-tool" }], + stream: true, + }); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("text/event-stream"); + + const events = parseSSEEvents(res.body); + const toolCallStarts = events.filter((e) => e.event === "tool-call-start"); + expect(toolCallStarts).toHaveLength(2); + + // First tool at index 0 + expect(toolCallStarts[0].data.index).toBe(0); + const tc0Delta = toolCallStarts[0].data.delta as Record; + const tc0Msg = tc0Delta.message as Record; + const tc0Calls = tc0Msg.tool_calls as Record; + const tc0Fn = tc0Calls.function as Record; + expect(tc0Fn.name).toBe("get_weather"); + + // Second tool at index 1 + expect(toolCallStarts[1].data.index).toBe(1); + const tc1Delta = toolCallStarts[1].data.delta as Record; + const tc1Msg = tc1Delta.message as Record; + const tc1Calls = tc1Msg.tool_calls as Record; + const tc1Fn = tc1Calls.function as Record; + expect(tc1Fn.name).toBe("get_time"); + + // message-end should have TOOL_CALL finish_reason + const msgEnd = events.find((e) => e.event === "message-end"); + expect(msgEnd).toBeDefined(); + const endDelta = msgEnd!.data.delta as Record; + expect(endDelta.finish_reason).toBe("TOOL_CALL"); + }); +}); + +// ─── Integration tests: POST /v2/chat (malformed tool call arguments) ─────── + +describe("POST /v2/chat (malformed tool call arguments)", () => { + it("falls back to empty string when arguments is not valid JSON", async () => { + const badArgsFixture: Fixture = { + match: { userMessage: "bad-args" }, + response: { + toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }], + }, + }; + instance = await createServer([badArgsFixture]); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "bad-args" }], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.message.tool_calls).toHaveLength(1); + expect(body.message.tool_calls[0].function.name).toBe("fn"); + // Cohere passes through the arguments string as-is (logs warning) + expect(body.message.tool_calls[0].function.arguments).toBe("NOT VALID JSON"); + }); +}); + +// ─── Integration tests: POST /v2/chat (strict mode) ──────────────────────── + +describe("POST /v2/chat (strict mode)", () => { + it("returns 503 in strict mode with no fixtures", async () => { + instance = await createServer([], { strict: true }); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "hello" }], + stream: false, + }); + + expect(res.status).toBe(503); + const body = JSON.parse(res.body); + expect(body.error.message).toContain("no fixture matched"); + }); +}); + +// ─── Integration tests: POST /v2/chat (unknown response type → 500) ───────── + +describe("POST /v2/chat (unknown response type)", () => { + it("returns 500 for a fixture with unrecognizable response shape", async () => { + const weirdFixture: Fixture = { + match: { userMessage: "weird" }, + response: { embedding: [0.1, 0.2, 0.3] }, + }; + instance = await createServer([weirdFixture]); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "weird" }], + stream: false, + }); + + expect(res.status).toBe(500); + const body = JSON.parse(res.body); + expect(body.error.message).toContain("did not match any known type"); + }); +}); + +// ─── Integration tests: POST /v2/chat (error fixture no explicit status) ──── + +describe("POST /v2/chat (error fixture no explicit status)", () => { + it("defaults to 500 when error fixture has no status", async () => { + const noStatusError: Fixture = { + match: { userMessage: "err-no-status" }, + response: { + error: { + message: "Something went wrong", + type: "server_error", + }, + }, + }; + instance = await createServer([noStatusError]); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "err-no-status" }], + stream: false, + }); + + expect(res.status).toBe(500); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Something went wrong"); + }); +}); + +// ─── Integration tests: POST /v2/chat (CORS headers) ──────────────────────── + +describe("POST /v2/chat (CORS headers)", () => { + it("includes CORS headers in response", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "hello" }], + stream: false, + }); + + expect(res.headers["access-control-allow-origin"]).toBe("*"); + }); +}); + +// ─── Integration tests: POST /v2/chat (journal) ──────────────────────────── + +describe("POST /v2/chat (journal)", () => { + it("records request in the journal", async () => { + instance = await createServer(allFixtures); + await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "hello" }], + stream: false, + }); + + expect(instance.journal.size).toBe(1); + const entry = instance.journal.getLast(); + expect(entry!.path).toBe("/v2/chat"); + expect(entry!.response.status).toBe(200); + expect(entry!.response.fixture).toBe(textFixture); + expect(entry!.body.model).toBe("command-r-plus"); + }); +}); diff --git a/src/__tests__/drift/bedrock-stream.drift.ts b/src/__tests__/drift/bedrock-stream.drift.ts new file mode 100644 index 0000000..01e0750 --- /dev/null +++ b/src/__tests__/drift/bedrock-stream.drift.ts @@ -0,0 +1,145 @@ +/** + * AWS Bedrock drift tests. + * + * Three-way comparison: SDK types x real API x llmock output. + * Covers invoke-with-response-stream and converse endpoints. + */ + +import { describe, it, expect, beforeAll, afterAll } from "vitest"; +import type { ServerInstance } from "../../server.js"; +import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js"; +import { httpPost, startDriftServer, stopDriftServer } from "./helpers.js"; + +// --------------------------------------------------------------------------- +// Credentials check +// --------------------------------------------------------------------------- + +const HAS_CREDENTIALS = + !!process.env.AWS_ACCESS_KEY_ID && + !!process.env.AWS_SECRET_ACCESS_KEY && + !!process.env.AWS_REGION; + +// --------------------------------------------------------------------------- +// Server lifecycle +// --------------------------------------------------------------------------- + +let instance: ServerInstance; + +beforeAll(async () => { + instance = await startDriftServer(); +}); + +afterAll(async () => { + await stopDriftServer(instance); +}); + +// --------------------------------------------------------------------------- +// SDK shape stubs +// --------------------------------------------------------------------------- + +/** + * Minimal Bedrock InvokeModel response shape. + * Bedrock wraps the model output in its own envelope. + */ +function bedrockInvokeResponseShape() { + return extractShape({ + body: "base64-encoded-string", + contentType: "application/json", + $metadata: { + httpStatusCode: 200, + requestId: "req-abc", + }, + }); +} + +/** + * Minimal Bedrock Converse response shape. + */ +function bedrockConverseResponseShape() { + return extractShape({ + output: { + message: { + role: "assistant", + content: [{ text: "Hello!" }], + }, + }, + stopReason: "end_turn", + usage: { + inputTokens: 10, + outputTokens: 5, + totalTokens: 15, + }, + metrics: { + latencyMs: 100, + }, + $metadata: { + httpStatusCode: 200, + requestId: "req-abc", + }, + }); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe.skipIf(!HAS_CREDENTIALS)("Bedrock drift", () => { + it("invoke-with-response-stream mock shape is plausible", async () => { + const sdkShape = bedrockInvokeResponseShape(); + + // Bedrock streaming uses binary event-stream framing, so we test the + // mock's JSON response shape for the non-streaming invoke endpoint. + const mockRes = await httpPost( + `${instance.url}/model/anthropic.claude-3-haiku-20240307-v1:0/invoke`, + { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 10, + messages: [{ role: "user", content: "Say hello" }], + }, + ); + + expect(mockRes.status).toBe(200); + + // When real AWS credentials are available, send the same request to + // the real Bedrock API and compare shapes. For now, validate mock + // against the SDK shape as both real and expected. + if (mockRes.status === 200) { + const mockShape = extractShape(JSON.parse(mockRes.body)); + const diffs = triangulate(sdkShape, sdkShape, mockShape); + const report = formatDriftReport("Bedrock Invoke", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + } + }); + + it("converse mock shape matches SDK expectations", async () => { + const sdkShape = bedrockConverseResponseShape(); + + const mockRes = await httpPost( + `${instance.url}/model/anthropic.claude-3-haiku-20240307-v1:0/converse`, + { + messages: [ + { + role: "user", + content: [{ text: "Say hello" }], + }, + ], + inferenceConfig: { maxTokens: 10 }, + }, + ); + + expect(mockRes.status).toBe(200); + + if (mockRes.status === 200) { + const mockShape = extractShape(JSON.parse(mockRes.body)); + const diffs = triangulate(sdkShape, sdkShape, mockShape); + const report = formatDriftReport("Bedrock Converse", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + } + }); +}); diff --git a/src/__tests__/drift/cohere.drift.ts b/src/__tests__/drift/cohere.drift.ts new file mode 100644 index 0000000..a4a2beb --- /dev/null +++ b/src/__tests__/drift/cohere.drift.ts @@ -0,0 +1,213 @@ +/** + * Cohere drift tests. + * + * Three-way comparison: expected shape x real API x llmock output. + * Covers /v2/chat non-streaming and streaming endpoints. + * + * Requires: COHERE_API_KEY + */ + +import { describe, it, expect, beforeAll, afterAll } from "vitest"; +import type { ServerInstance } from "../../server.js"; +import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js"; +import { httpPost, parseDataOnlySSE, startDriftServer, stopDriftServer } from "./helpers.js"; + +// --------------------------------------------------------------------------- +// Credentials check +// --------------------------------------------------------------------------- + +const COHERE_API_KEY = process.env.COHERE_API_KEY; +const HAS_CREDENTIALS = !!COHERE_API_KEY; + +// --------------------------------------------------------------------------- +// Server lifecycle +// --------------------------------------------------------------------------- + +let instance: ServerInstance; + +beforeAll(async () => { + instance = await startDriftServer(); +}); + +afterAll(async () => { + await stopDriftServer(instance); +}); + +// --------------------------------------------------------------------------- +// SDK shape stubs +// --------------------------------------------------------------------------- + +/** + * Minimal Cohere /v2/chat response shape (non-streaming). + */ +function cohereChatResponseShape() { + return extractShape({ + id: "chat-abc123", + finish_reason: "COMPLETE", + message: { + role: "assistant", + content: [{ type: "text", text: "Hello!" }], + }, + usage: { + billed_units: { + input_tokens: 10, + output_tokens: 5, + }, + tokens: { + input_tokens: 10, + output_tokens: 5, + }, + }, + }); +} + +/** + * Minimal Cohere /v2/chat streaming chunk shape. + */ +function cohereChatStreamChunkShape() { + return extractShape({ + id: "chat-abc123", + type: "content-delta", + delta: { + message: { + content: { text: "Hel" }, + }, + }, + }); +} + +// --------------------------------------------------------------------------- +// Real API helpers +// --------------------------------------------------------------------------- + +async function cohereChatNonStreaming( + messages: { role: string; content: string }[], +): Promise<{ status: number; body: string }> { + const res = await fetch("https://api.cohere.com/v2/chat", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${COHERE_API_KEY}`, + }, + body: JSON.stringify({ + model: "command-r-plus", + messages, + stream: false, + max_tokens: 10, + }), + }); + return { status: res.status, body: await res.text() }; +} + +async function cohereChatStreaming( + messages: { role: string; content: string }[], +): Promise<{ status: number; body: string }> { + const res = await fetch("https://api.cohere.com/v2/chat", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${COHERE_API_KEY}`, + }, + body: JSON.stringify({ + model: "command-r-plus", + messages, + stream: true, + max_tokens: 10, + }), + }); + return { status: res.status, body: await res.text() }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe.skipIf(!HAS_CREDENTIALS)("Cohere drift", () => { + it("non-streaming /v2/chat shape matches", async () => { + const sdkShape = cohereChatResponseShape(); + const messages = [{ role: "user", content: "Say hello" }]; + + const [realRes, mockRes] = await Promise.all([ + cohereChatNonStreaming(messages), + httpPost(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages, + stream: false, + }), + ]); + + expect(realRes.status).toBe(200); + expect(mockRes.status).toBeLessThan(500); + + if (mockRes.status === 200) { + const realShape = extractShape(JSON.parse(realRes.body)); + const mockShape = extractShape(JSON.parse(mockRes.body)); + + const diffs = triangulate(sdkShape, realShape, mockShape); + const report = formatDriftReport("Cohere /v2/chat (non-streaming)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + } + }); + + it("streaming /v2/chat shape matches", async () => { + const sdkChunkShape = cohereChatStreamChunkShape(); + const messages = [{ role: "user", content: "Say hello" }]; + + const [realRes, mockRes] = await Promise.all([ + cohereChatStreaming(messages), + httpPost(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages, + stream: true, + }), + ]); + + expect(realRes.status).toBe(200); + expect(mockRes.status).toBeLessThan(500); + + if (mockRes.status === 200) { + // Parse SSE chunks from both responses + const realChunks = parseDataOnlySSE(realRes.body); + const mockChunks = parseDataOnlySSE(mockRes.body); + + if (realChunks.length > 0 && mockChunks.length > 0) { + // Compare first chunk shape (content-delta) + const realChunkShape = extractShape(realChunks[0]); + const mockChunkShape = extractShape(mockChunks[0]); + + const diffs = triangulate(sdkChunkShape, realChunkShape, mockChunkShape); + const report = formatDriftReport("Cohere /v2/chat (streaming first chunk)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + + // Also compare the LAST chunk shape (has finish_reason, usage) + const sdkLastChunkShape = extractShape({ + id: "chat-abc123", + type: "message-end", + delta: { + finish_reason: "COMPLETE", + usage: { + billed_units: { input_tokens: 10, output_tokens: 5 }, + tokens: { input_tokens: 10, output_tokens: 5 }, + }, + }, + }); + + const realLastShape = extractShape(realChunks[realChunks.length - 1]); + const mockLastShape = extractShape(mockChunks[mockChunks.length - 1]); + + const lastDiffs = triangulate(sdkLastChunkShape, realLastShape, mockLastShape); + const lastReport = formatDriftReport("Cohere /v2/chat (streaming last chunk)", lastDiffs); + + if (shouldFail(lastDiffs)) { + expect.soft([], lastReport).toEqual(lastDiffs.filter((d) => d.severity === "critical")); + } + } + } + }); +}); diff --git a/src/__tests__/drift/ollama.drift.ts b/src/__tests__/drift/ollama.drift.ts new file mode 100644 index 0000000..4e0114e --- /dev/null +++ b/src/__tests__/drift/ollama.drift.ts @@ -0,0 +1,219 @@ +/** + * Ollama drift tests. + * + * Compares llmock's Ollama endpoint output shapes against a real local + * Ollama instance. Skips automatically if Ollama is not reachable. + * + * Requires: local Ollama running at http://localhost:11434 + */ + +import { describe, it, expect, beforeAll, afterAll } from "vitest"; +import type { ServerInstance } from "../../server.js"; +import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js"; +import { httpPost, startDriftServer, stopDriftServer } from "./helpers.js"; + +// --------------------------------------------------------------------------- +// Connectivity check +// --------------------------------------------------------------------------- + +let OLLAMA_REACHABLE = false; + +async function checkOllamaConnectivity(): Promise { + try { + const res = await fetch("http://localhost:11434/api/tags", { + signal: AbortSignal.timeout(3000), + }); + return res.ok; + } catch { + return false; + } +} + +// --------------------------------------------------------------------------- +// Server lifecycle +// --------------------------------------------------------------------------- + +let instance: ServerInstance; + +beforeAll(async () => { + OLLAMA_REACHABLE = await checkOllamaConnectivity(); + instance = await startDriftServer(); +}); + +afterAll(async () => { + await stopDriftServer(instance); +}); + +// --------------------------------------------------------------------------- +// SDK shape stubs +// --------------------------------------------------------------------------- + +/** + * Minimal Ollama /api/chat response shape (non-streaming final message). + */ +function ollamaChatResponseShape() { + return extractShape({ + model: "llama3.2", + created_at: "2024-01-01T00:00:00Z", + message: { + role: "assistant", + content: "Hello!", + }, + done: true, + done_reason: "stop", + total_duration: 1000000, + load_duration: 100000, + prompt_eval_count: 10, + prompt_eval_duration: 500000, + eval_count: 5, + eval_duration: 400000, + }); +} + +/** + * Minimal Ollama /api/generate response shape (non-streaming). + */ +function ollamaGenerateResponseShape() { + return extractShape({ + model: "llama3.2", + created_at: "2024-01-01T00:00:00Z", + response: "Hello!", + done: true, + done_reason: "stop", + total_duration: 1000000, + load_duration: 100000, + prompt_eval_count: 10, + prompt_eval_duration: 500000, + eval_count: 5, + eval_duration: 400000, + }); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +// --------------------------------------------------------------------------- +// Streaming shape stubs +// --------------------------------------------------------------------------- + +/** + * Minimal Ollama /api/chat streaming chunk shape (non-final). + */ +function ollamaChatStreamChunkShape() { + return extractShape({ + model: "llama3.2", + created_at: "2024-01-01T00:00:00Z", + message: { + role: "assistant", + content: "H", + }, + done: false, + }); +} + +function parseNDJSON(body: string): object[] { + return body + .split("\n") + .filter((line) => line.trim() !== "") + .map((line) => JSON.parse(line) as object); +} + +describe.skipIf(!OLLAMA_REACHABLE)("Ollama drift", () => { + it("/api/chat response shape matches", async () => { + const sdkShape = ollamaChatResponseShape(); + + const body = { + model: "llama3.2", + messages: [{ role: "user", content: "Say hello" }], + stream: false, + }; + + const [realRes, mockRes] = await Promise.all([ + httpPost("http://localhost:11434/api/chat", body), + httpPost(`${instance.url}/api/chat`, body), + ]); + + expect(realRes.status).toBe(200); + expect(mockRes.status).toBeLessThan(500); + + if (mockRes.status === 200) { + const realShape = extractShape(JSON.parse(realRes.body)); + const mockShape = extractShape(JSON.parse(mockRes.body)); + + const diffs = triangulate(sdkShape, realShape, mockShape); + const report = formatDriftReport("Ollama /api/chat", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + } + }); + + it("/api/chat streaming NDJSON chunk shapes match", async () => { + const sdkChunkShape = ollamaChatStreamChunkShape(); + + const body = { + model: "llama3.2", + messages: [{ role: "user", content: "Say hello" }], + stream: true, + }; + + const [realRes, mockRes] = await Promise.all([ + httpPost("http://localhost:11434/api/chat", body), + httpPost(`${instance.url}/api/chat`, body), + ]); + + expect(realRes.status).toBe(200); + expect(mockRes.status).toBeLessThan(500); + + if (mockRes.status === 200) { + const realChunks = parseNDJSON(realRes.body); + const mockChunks = parseNDJSON(mockRes.body); + + expect(realChunks.length).toBeGreaterThan(0); + expect(mockChunks.length).toBeGreaterThan(0); + + // Compare first (non-final) chunk shapes + const realFirstShape = extractShape(realChunks[0]); + const mockFirstShape = extractShape(mockChunks[0]); + + const diffs = triangulate(sdkChunkShape, realFirstShape, mockFirstShape); + const report = formatDriftReport("Ollama /api/chat (streaming chunk)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + } + }); + + it("/api/generate response shape matches", async () => { + const sdkShape = ollamaGenerateResponseShape(); + + const body = { + model: "llama3.2", + prompt: "Say hello", + stream: false, + }; + + const [realRes, mockRes] = await Promise.all([ + httpPost("http://localhost:11434/api/generate", body), + httpPost(`${instance.url}/api/generate`, body), + ]); + + expect(realRes.status).toBe(200); + expect(mockRes.status).toBeLessThan(500); + + if (mockRes.status === 200) { + const realShape = extractShape(JSON.parse(realRes.body)); + const mockShape = extractShape(JSON.parse(mockRes.body)); + + const diffs = triangulate(sdkShape, realShape, mockShape); + const report = formatDriftReport("Ollama /api/generate", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + } + }); +}); diff --git a/src/__tests__/drift/vertex-ai.drift.ts b/src/__tests__/drift/vertex-ai.drift.ts new file mode 100644 index 0000000..358bc21 --- /dev/null +++ b/src/__tests__/drift/vertex-ai.drift.ts @@ -0,0 +1,165 @@ +/** + * Vertex AI / Gemini drift tests. + * + * Verifies that llmock's Vertex AI routing produces response shapes + * consistent with the Gemini generateContent endpoint. + * + * Requires: GOOGLE_APPLICATION_CREDENTIALS or (VERTEX_AI_PROJECT + VERTEX_AI_LOCATION) + */ + +import { describe, it, expect, beforeAll, afterAll } from "vitest"; +import type { ServerInstance } from "../../server.js"; +import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js"; +import { httpPost, startDriftServer, stopDriftServer } from "./helpers.js"; + +// --------------------------------------------------------------------------- +// Credentials check +// --------------------------------------------------------------------------- + +const HAS_CREDENTIALS = + !!process.env.GOOGLE_APPLICATION_CREDENTIALS || + (!!process.env.VERTEX_AI_PROJECT && !!process.env.VERTEX_AI_LOCATION); + +// --------------------------------------------------------------------------- +// Server lifecycle +// --------------------------------------------------------------------------- + +let instance: ServerInstance; + +beforeAll(async () => { + instance = await startDriftServer(); +}); + +afterAll(async () => { + await stopDriftServer(instance); +}); + +// --------------------------------------------------------------------------- +// SDK shape stubs +// --------------------------------------------------------------------------- + +/** + * Minimal Gemini generateContent response shape. + * Vertex AI uses the same response format as consumer Gemini. + */ +function geminiGenerateContentShape() { + return extractShape({ + candidates: [ + { + content: { + parts: [{ text: "Hello!" }], + role: "model", + }, + finishReason: "STOP", + index: 0, + }, + ], + usageMetadata: { + promptTokenCount: 10, + candidatesTokenCount: 5, + totalTokenCount: 15, + }, + modelVersion: "gemini-2.5-flash", + }); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe.skipIf(!HAS_CREDENTIALS)("Vertex AI drift", () => { + it("generateContent mock shape matches Gemini format", async () => { + const sdkShape = geminiGenerateContentShape(); + + // Vertex AI routing in llmock follows the path pattern: + // /v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent + const mockRes = await httpPost( + `${instance.url}/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-2.5-flash:generateContent`, + { + contents: [ + { + role: "user", + parts: [{ text: "Say hello" }], + }, + ], + generationConfig: { maxOutputTokens: 10 }, + }, + ); + + expect(mockRes.status).toBeLessThan(500); + + if (mockRes.status === 200) { + const mockShape = extractShape(JSON.parse(mockRes.body)); + const diffs = triangulate(sdkShape, sdkShape, mockShape); + const report = formatDriftReport("Vertex AI generateContent", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + } + }); + + it("streamGenerateContent mock shape matches Gemini SSE format", async () => { + const sdkChunkShape = extractShape({ + candidates: [ + { + content: { + parts: [{ text: "Hello" }], + role: "model", + }, + finishReason: "STOP", + }, + ], + usageMetadata: { + promptTokenCount: 10, + candidatesTokenCount: 5, + totalTokenCount: 15, + }, + }); + + // Vertex AI streaming uses SSE with the same chunk shape as consumer Gemini + const mockRes = await httpPost( + `${instance.url}/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-2.5-flash:streamGenerateContent?alt=sse`, + { + contents: [ + { + role: "user", + parts: [{ text: "Say hello" }], + }, + ], + generationConfig: { maxOutputTokens: 10 }, + }, + ); + + expect(mockRes.status).toBeLessThan(500); + + if (mockRes.status === 200) { + // Parse SSE chunks and extract shapes + const chunks = mockRes.body + .split("\n") + .filter((line: string) => line.startsWith("data: ")) + .map((line: string) => JSON.parse(line.slice(6))); + + expect(chunks.length).toBeGreaterThan(0); + + // Each chunk should have the candidates structure + for (const chunk of chunks) { + const chunkShape = extractShape(chunk); + expect(chunkShape.kind).toBe("object"); + if (chunkShape.kind === "object") { + expect(chunkShape.fields).toHaveProperty("candidates"); + } + } + + // Last chunk should match the SDK shape (has finishReason and usageMetadata) + const lastChunk = chunks[chunks.length - 1]; + const lastShape = extractShape(lastChunk); + const diffs = triangulate(sdkChunkShape, sdkChunkShape, lastShape); + const report = formatDriftReport("Vertex AI streamGenerateContent (last chunk)", diffs); + + if (shouldFail(diffs)) { + expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical")); + } + } + }); +}); diff --git a/src/__tests__/metrics.test.ts b/src/__tests__/metrics.test.ts new file mode 100644 index 0000000..0d1948c --- /dev/null +++ b/src/__tests__/metrics.test.ts @@ -0,0 +1,579 @@ +import { describe, it, expect, afterEach, beforeEach } from "vitest"; +import http from "node:http"; +import { createMetricsRegistry, normalizePathLabel, type MetricsRegistry } from "../metrics.js"; +import { createServer, type ServerInstance } from "../server.js"; +import type { Fixture, ChatCompletionRequest } from "../types.js"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +async function httpPost( + url: string, + body: object, + headers?: Record, +): Promise<{ status: number; body: string; headers: Record }> { + return new Promise((resolve, reject) => { + const req = http.request( + url, + { + method: "POST", + headers: { + "Content-Type": "application/json", + ...headers, + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c) => chunks.push(c)); + res.on("end", () => + resolve({ + status: res.statusCode!, + body: Buffer.concat(chunks).toString(), + headers: Object.fromEntries( + Object.entries(res.headers).map(([k, v]) => [ + k, + Array.isArray(v) ? v.join(", ") : (v ?? ""), + ]), + ), + }), + ); + }, + ); + req.on("error", reject); + req.write(JSON.stringify(body)); + req.end(); + }); +} + +async function httpGet( + url: string, +): Promise<{ status: number; body: string; headers: Record }> { + return new Promise((resolve, reject) => { + const req = http.request(url, { method: "GET" }, (res) => { + const chunks: Buffer[] = []; + res.on("data", (c) => chunks.push(c)); + res.on("end", () => + resolve({ + status: res.statusCode!, + body: Buffer.concat(chunks).toString(), + headers: Object.fromEntries( + Object.entries(res.headers).map(([k, v]) => [ + k, + Array.isArray(v) ? v.join(", ") : (v ?? ""), + ]), + ), + }), + ); + }); + req.on("error", reject); + req.end(); + }); +} + +function chatRequest(userContent: string): ChatCompletionRequest { + return { + model: "gpt-4", + messages: [{ role: "user", content: userContent }], + }; +} + +// --------------------------------------------------------------------------- +// Unit tests: MetricsRegistry +// --------------------------------------------------------------------------- + +describe("MetricsRegistry", () => { + let registry: MetricsRegistry; + + beforeEach(() => { + registry = createMetricsRegistry(); + }); + + describe("Counter", () => { + it("increments and serializes correct value", () => { + registry.incrementCounter("http_requests_total", { method: "POST" }); + registry.incrementCounter("http_requests_total", { method: "POST" }); + registry.incrementCounter("http_requests_total", { method: "POST" }); + const output = registry.serialize(); + expect(output).toContain('http_requests_total{method="POST"} 3'); + }); + + it("tracks different label combos separately", () => { + registry.incrementCounter("http_requests_total", { method: "POST", path: "/a" }); + registry.incrementCounter("http_requests_total", { method: "POST", path: "/a" }); + registry.incrementCounter("http_requests_total", { method: "GET", path: "/b" }); + const output = registry.serialize(); + expect(output).toContain('http_requests_total{method="POST",path="/a"} 2'); + expect(output).toContain('http_requests_total{method="GET",path="/b"} 1'); + }); + }); + + describe("Histogram", () => { + it("observes values with cumulative buckets, +Inf = count", () => { + // Observe values: 0.003, 0.05, 1.5 + registry.observeHistogram("request_duration_seconds", {}, 0.003); + registry.observeHistogram("request_duration_seconds", {}, 0.05); + registry.observeHistogram("request_duration_seconds", {}, 1.5); + const output = registry.serialize(); + + // Bucket 0.005: 1 observation (0.003) + expect(output).toContain('request_duration_seconds_bucket{le="0.005"} 1'); + // Bucket 0.01: 1 observation (cumulative, still just 0.003) + expect(output).toContain('request_duration_seconds_bucket{le="0.01"} 1'); + // Bucket 0.05: 2 observations (0.003, 0.05) + expect(output).toContain('request_duration_seconds_bucket{le="0.05"} 2'); + // Bucket 0.1: 2 observations + expect(output).toContain('request_duration_seconds_bucket{le="0.1"} 2'); + // Bucket 2.5: 3 observations (all) + expect(output).toContain('request_duration_seconds_bucket{le="2.5"} 3'); + // +Inf = count = 3 + expect(output).toContain('request_duration_seconds_bucket{le="+Inf"} 3'); + }); + + it("has correct _sum and _count suffixes", () => { + registry.observeHistogram("request_duration_seconds", {}, 0.5); + registry.observeHistogram("request_duration_seconds", {}, 1.5); + const output = registry.serialize(); + expect(output).toContain("request_duration_seconds_sum{} 2"); + expect(output).toContain("request_duration_seconds_count{} 2"); + }); + + it("tracks labels separately in histograms", () => { + registry.observeHistogram("req_dur", { method: "POST" }, 0.01); + registry.observeHistogram("req_dur", { method: "GET" }, 5.0); + const output = registry.serialize(); + // POST: bucket le=0.01 should have 1 + expect(output).toContain('req_dur_bucket{method="POST",le="0.01"} 1'); + // POST: +Inf should have 1 + expect(output).toContain('req_dur_bucket{method="POST",le="+Inf"} 1'); + // GET: bucket le=0.01 should have 0 + expect(output).toContain('req_dur_bucket{method="GET",le="0.01"} 0'); + // GET: bucket le=5 should have 1 + expect(output).toContain('req_dur_bucket{method="GET",le="5"} 1'); + // GET: +Inf should have 1 + expect(output).toContain('req_dur_bucket{method="GET",le="+Inf"} 1'); + }); + }); + + describe("Histogram edge: value > all buckets", () => { + it("28. only +Inf increments when value exceeds all bucket bounds", () => { + registry.observeHistogram("big_value_hist", {}, 100); + const output = registry.serialize(); + + // All finite buckets should have 0 + for (const b of [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]) { + expect(output).toContain(`big_value_hist_bucket{le="${b}"} 0`); + } + // Only +Inf should have 1 + expect(output).toContain('big_value_hist_bucket{le="+Inf"} 1'); + expect(output).toContain("big_value_hist_count{} 1"); + expect(output).toContain("big_value_hist_sum{} 100"); + }); + }); + + describe("Empty registry serialization", () => { + it("29. returns empty string from fresh registry", () => { + const freshRegistry = createMetricsRegistry(); + expect(freshRegistry.serialize()).toBe(""); + }); + }); + + describe("Type mismatch errors", () => { + it("throws when observing histogram on a counter name", () => { + registry.incrementCounter("foo", {}); + expect(() => registry.observeHistogram("foo", {}, 0.5)).toThrow( + "Metric foo is not a histogram", + ); + }); + + it("throws when incrementing counter on a histogram name", () => { + registry.observeHistogram("bar", {}, 0.5); + expect(() => registry.incrementCounter("bar", {})).toThrow("Metric bar is not a counter"); + }); + }); + + describe("Gauge type mismatch errors", () => { + it("throws when incrementing counter on a gauge name", () => { + registry.setGauge("x", {}, 1); + expect(() => registry.incrementCounter("x", {})).toThrow("Metric x is not a counter"); + }); + + it("throws when observing histogram on a gauge name", () => { + registry.setGauge("y", {}, 1); + expect(() => registry.observeHistogram("y", {}, 0.5)).toThrow("Metric y is not a histogram"); + }); + + it("throws when setting gauge on a counter name", () => { + registry.incrementCounter("z", {}); + expect(() => registry.setGauge("z", {}, 1)).toThrow("Metric z is not a gauge"); + }); + }); + + describe("Histogram value exactly 0", () => { + it("observe 0, verify it lands in 0.005 bucket", () => { + registry.observeHistogram("zero_hist", {}, 0); + const output = registry.serialize(); + // 0 <= 0.005, so the 0.005 bucket should have 1 + expect(output).toContain('zero_hist_bucket{le="0.005"} 1'); + expect(output).toContain('zero_hist_bucket{le="+Inf"} 1'); + expect(output).toContain("zero_hist_sum{} 0"); + expect(output).toContain("zero_hist_count{} 1"); + }); + }); + + describe("Histogram negative value", () => { + it("observe -1, verify it lands in ALL finite buckets (cumulative), +Inf/count/sum correct", () => { + registry.observeHistogram("neg_hist", {}, -1); + const output = registry.serialize(); + // -1 <= every positive bucket boundary, so all finite buckets should have 1 + for (const b of [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]) { + expect(output).toContain(`neg_hist_bucket{le="${b}"} 1`); + } + expect(output).toContain('neg_hist_bucket{le="+Inf"} 1'); + expect(output).toContain("neg_hist_count{} 1"); + expect(output).toContain("neg_hist_sum{} -1"); + }); + }); + + describe("Counter with empty labels serialization format", () => { + it("serializes counter with empty labels as name{} value", () => { + registry.incrementCounter("empty_label_counter", {}); + const output = registry.serialize(); + expect(output).toContain("empty_label_counter{} 1"); + }); + }); + + describe("Label value escaping", () => { + it("escapes backslash, double-quote, and newline in label values", () => { + registry.incrementCounter("escaped_metric", { val: 'back\\slash "quoted" new\nline' }); + const output = registry.serialize(); + expect(output).toContain('val="back\\\\slash \\"quoted\\" new\\nline"'); + }); + }); + + describe("Label sort order stability", () => { + it("maps {b:2,a:1} and {a:1,b:2} to the same series", () => { + registry.incrementCounter("sorted_counter", { b: "2", a: "1" }); + registry.incrementCounter("sorted_counter", { a: "1", b: "2" }); + const output = registry.serialize(); + // Should be one series with value 2, not two series with value 1 + expect(output).toContain('sorted_counter{a="1",b="2"} 2'); + // Should not contain a separate series with value 1 + expect(output).not.toMatch(/sorted_counter\{[^}]*\} 1/); + }); + }); + + describe("Gauge", () => { + it("sets and updates value", () => { + registry.setGauge("fixtures_loaded", {}, 5); + let output = registry.serialize(); + expect(output).toContain("fixtures_loaded{} 5"); + + registry.setGauge("fixtures_loaded", {}, 10); + output = registry.serialize(); + expect(output).toContain("fixtures_loaded{} 10"); + // Old value should not be present + expect(output).not.toMatch(/fixtures_loaded\{\} 5/); + }); + }); + + describe("serialize()", () => { + it("produces valid Prometheus text exposition format", () => { + registry.incrementCounter("my_counter", { env: "test" }); + registry.setGauge("my_gauge", {}, 42); + const output = registry.serialize(); + + // Should contain TYPE lines + expect(output).toMatch(/^# TYPE my_counter counter$/m); + expect(output).toMatch(/^# TYPE my_gauge gauge$/m); + // Metric lines + expect(output).toContain('my_counter{env="test"} 1'); + expect(output).toContain("my_gauge{} 42"); + }); + }); + + describe("reset()", () => { + it("clears all metrics", () => { + registry.incrementCounter("c", {}); + registry.observeHistogram("h", {}, 0.5); + registry.setGauge("g", {}, 1); + registry.reset(); + const output = registry.serialize(); + expect(output).toBe(""); + }); + }); + + describe("histogram→gauge type mismatch", () => { + it("throws when setting gauge on a histogram name", () => { + registry.observeHistogram("x", {}, 0.5); + expect(() => registry.setGauge("x", {}, 1)).toThrow("Metric x is not a gauge"); + }); + }); + + describe("Gauge with non-empty labels", () => { + it("serializes gauge with labels correctly", () => { + registry.setGauge("g", { region: "us" }, 42); + const output = registry.serialize(); + expect(output).toContain('g{region="us"} 42'); + }); + }); + + describe("Gauge multi-series", () => { + it("tracks multiple label combos independently", () => { + registry.setGauge("g", { region: "us" }, 10); + registry.setGauge("g", { region: "eu" }, 20); + const output = registry.serialize(); + expect(output).toContain('g{region="us"} 10'); + expect(output).toContain('g{region="eu"} 20'); + }); + }); + + describe("reset then re-accumulate", () => { + it("counter restarts from zero after reset", () => { + registry.incrementCounter("c", {}); + registry.reset(); + registry.incrementCounter("c", {}); + const output = registry.serialize(); + expect(output).toContain("c{} 1"); + expect(output).not.toMatch(/c\{\} 2/); + }); + }); +}); + +// --------------------------------------------------------------------------- +// Unit tests: normalizePathLabel +// --------------------------------------------------------------------------- + +describe("normalizePathLabel", () => { + it("normalizes Bedrock invoke path", () => { + expect(normalizePathLabel("/model/anthropic.claude-3-haiku/invoke")).toBe( + "/model/{modelId}/invoke", + ); + }); + + it("normalizes Bedrock invoke-with-response-stream", () => { + expect(normalizePathLabel("/model/anthropic.claude-3-haiku/invoke-with-response-stream")).toBe( + "/model/{modelId}/invoke-with-response-stream", + ); + }); + + it("normalizes Bedrock converse", () => { + expect(normalizePathLabel("/model/anthropic.claude-3-haiku/converse")).toBe( + "/model/{modelId}/converse", + ); + }); + + it("normalizes Bedrock converse-stream", () => { + expect(normalizePathLabel("/model/anthropic.claude-3-haiku/converse-stream")).toBe( + "/model/{modelId}/converse-stream", + ); + }); + + it("normalizes Gemini generateContent path", () => { + expect(normalizePathLabel("/v1beta/models/gemini-2.0-flash:generateContent")).toBe( + "/v1beta/models/{model}:generateContent", + ); + }); + + it("normalizes Gemini streamGenerateContent path", () => { + expect(normalizePathLabel("/v1beta/models/gemini-2.0-flash:streamGenerateContent")).toBe( + "/v1beta/models/{model}:streamGenerateContent", + ); + }); + + it("normalizes Azure deployment path", () => { + expect(normalizePathLabel("/openai/deployments/my-gpt4/chat/completions")).toBe( + "/openai/deployments/{id}/chat/completions", + ); + }); + + it("normalizes Azure deployment embeddings path", () => { + expect(normalizePathLabel("/openai/deployments/my-gpt4/embeddings")).toBe( + "/openai/deployments/{id}/embeddings", + ); + }); + + it("normalizes Vertex AI path", () => { + expect( + normalizePathLabel( + "/v1/projects/my-proj/locations/us-central1/publishers/google/models/gemini:generateContent", + ), + ).toBe("/v1/projects/{p}/locations/{l}/publishers/google/models/{m}:generateContent"); + }); + + it("leaves static /api/chat unchanged", () => { + expect(normalizePathLabel("/api/chat")).toBe("/api/chat"); + }); + + it("leaves static /v1/chat/completions unchanged", () => { + expect(normalizePathLabel("/v1/chat/completions")).toBe("/v1/chat/completions"); + }); + + it("leaves static /v1/messages unchanged", () => { + expect(normalizePathLabel("/v1/messages")).toBe("/v1/messages"); + }); + + it("leaves static /v1/embeddings unchanged", () => { + expect(normalizePathLabel("/v1/embeddings")).toBe("/v1/embeddings"); + }); + + it("partial match: /model/foo/unknown-op returns as-is", () => { + expect(normalizePathLabel("/model/foo/unknown-op")).toBe("/model/foo/unknown-op"); + }); + + it("empty string returns empty string", () => { + expect(normalizePathLabel("")).toBe(""); + }); + + it("normalizes Vertex AI streamGenerateContent path", () => { + expect( + normalizePathLabel( + "/v1/projects/my-proj/locations/us-central1/publishers/google/models/gemini:streamGenerateContent", + ), + ).toBe("/v1/projects/{p}/locations/{l}/publishers/google/models/{m}:streamGenerateContent"); + }); +}); + +describe("MetricsRegistry: all three types serialized together", () => { + it("counter + histogram + gauge all appear in serialize output", () => { + const reg = createMetricsRegistry(); + reg.incrementCounter("c_total", { env: "test" }); + reg.observeHistogram("h_seconds", { op: "read" }, 0.05); + reg.setGauge("g_loaded", {}, 7); + + const output = reg.serialize(); + expect(output).toContain("# TYPE c_total counter"); + expect(output).toContain('c_total{env="test"} 1'); + expect(output).toContain("# TYPE h_seconds histogram"); + expect(output).toContain('h_seconds_bucket{op="read",le="0.05"} 1'); + expect(output).toContain("# TYPE g_loaded gauge"); + expect(output).toContain("g_loaded{} 7"); + }); +}); + +describe("MetricsRegistry: status label in counter output", () => { + it("status label appears correctly in serialized counter", () => { + const reg = createMetricsRegistry(); + reg.incrementCounter("llmock_requests_total", { status: "200", path: "/v1/chat/completions" }); + reg.incrementCounter("llmock_requests_total", { status: "200", path: "/v1/chat/completions" }); + reg.incrementCounter("llmock_requests_total", { status: "404", path: "/v1/chat/completions" }); + + const output = reg.serialize(); + expect(output).toContain('llmock_requests_total{path="/v1/chat/completions",status="200"} 2'); + expect(output).toContain('llmock_requests_total{path="/v1/chat/completions",status="404"} 1'); + }); +}); + +// --------------------------------------------------------------------------- +// Integration tests: /metrics endpoint through the server +// --------------------------------------------------------------------------- + +let instance: ServerInstance | null = null; + +afterEach(async () => { + if (instance) { + await new Promise((resolve) => instance!.server.close(() => resolve())); + instance = null; + } +}); + +describe("integration: /metrics endpoint", () => { + it("returns 404 when metrics disabled (default)", async () => { + const fixtures: Fixture[] = [ + { + match: { userMessage: "hello" }, + response: { content: "hi" }, + }, + ]; + instance = await createServer(fixtures); + const res = await httpGet(`${instance.url}/metrics`); + expect(res.status).toBe(404); + }); + + it("returns 200 with correct content-type when metrics enabled", async () => { + const fixtures: Fixture[] = [ + { + match: { userMessage: "hello" }, + response: { content: "hi" }, + }, + ]; + instance = await createServer(fixtures, { metrics: true }); + const res = await httpGet(`${instance.url}/metrics`); + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("text/plain; version=0.0.4; charset=utf-8"); + }); + + it("increments counters after sending requests", async () => { + const fixtures: Fixture[] = [ + { + match: { userMessage: "hello" }, + response: { content: "hi" }, + }, + ]; + instance = await createServer(fixtures, { metrics: true }); + + // Send two requests + await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello")); + await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello")); + + const res = await httpGet(`${instance.url}/metrics`); + expect(res.body).toContain("llmock_requests_total"); + // Should have count of 2 for the completions path + expect(res.body).toMatch(/llmock_requests_total\{[^}]*path="\/v1\/chat\/completions"[^}]*\} 2/); + }); + + it("records histogram bucket distribution after a request", async () => { + const fixtures: Fixture[] = [ + { + match: { userMessage: "hello" }, + response: { content: "hi" }, + }, + ]; + instance = await createServer(fixtures, { metrics: true }); + + await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello")); + + const res = await httpGet(`${instance.url}/metrics`); + // Should have histogram buckets + expect(res.body).toContain("llmock_request_duration_seconds_bucket"); + expect(res.body).toContain("llmock_request_duration_seconds_count"); + expect(res.body).toContain("llmock_request_duration_seconds_sum"); + // +Inf bucket should equal count + const infMatch = res.body.match( + /llmock_request_duration_seconds_bucket\{[^}]*le="\+Inf"\} (\d+)/, + ); + const countMatch = res.body.match(/llmock_request_duration_seconds_count\{[^}]*\} (\d+)/); + expect(infMatch).not.toBeNull(); + expect(countMatch).not.toBeNull(); + expect(infMatch![1]).toBe(countMatch![1]); + }); + + it("increments chaos counter when chaos triggers", async () => { + const fixtures: Fixture[] = [ + { + match: { userMessage: "hello" }, + response: { content: "hi" }, + }, + ]; + instance = await createServer(fixtures, { + metrics: true, + chaos: { dropRate: 1.0 }, // 100% drop + }); + + await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello")); + + const res = await httpGet(`${instance.url}/metrics`); + expect(res.body).toContain("llmock_chaos_triggered_total"); + expect(res.body).toMatch(/llmock_chaos_triggered_total\{[^}]*action="drop"[^}]*\} 1/); + }); + + it("tracks fixtures loaded gauge", async () => { + const fixtures: Fixture[] = [ + { match: { userMessage: "a" }, response: { content: "1" } }, + { match: { userMessage: "b" }, response: { content: "2" } }, + ]; + instance = await createServer(fixtures, { metrics: true }); + const res = await httpGet(`${instance.url}/metrics`); + expect(res.body).toContain("llmock_fixtures_loaded{} 2"); + }); +}); diff --git a/src/__tests__/ollama.test.ts b/src/__tests__/ollama.test.ts new file mode 100644 index 0000000..c9870b4 --- /dev/null +++ b/src/__tests__/ollama.test.ts @@ -0,0 +1,1045 @@ +import { describe, it, expect, afterEach } from "vitest"; +import * as http from "node:http"; +import type { Fixture } from "../types.js"; +import { createServer, type ServerInstance } from "../server.js"; +import { ollamaToCompletionRequest } from "../ollama.js"; +import { writeNDJSONStream } from "../ndjson-writer.js"; + +// --- helpers --- + +function post( + url: string, + body: unknown, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); +} + +function get( + url: string, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> { + return new Promise((resolve, reject) => { + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "GET", + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.end(); + }); +} + +function postRaw(url: string, raw: string): Promise<{ status: number; body: string }> { + return new Promise((resolve, reject) => { + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(raw), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(raw); + req.end(); + }); +} + +function parseNDJSON(body: string): object[] { + return body + .split("\n") + .filter((line) => line.trim() !== "") + .map((line) => JSON.parse(line) as object); +} + +// --- fixtures --- + +const textFixture: Fixture = { + match: { userMessage: "hello" }, + response: { content: "Hi there!" }, +}; + +const toolFixture: Fixture = { + match: { userMessage: "weather" }, + response: { + toolCalls: [ + { + name: "get_weather", + arguments: '{"city":"NYC"}', + }, + ], + }, +}; + +const modelFixture: Fixture = { + match: { model: "llama3", userMessage: "greet" }, + response: { content: "Hello from Ollama!" }, +}; + +const errorFixture: Fixture = { + match: { userMessage: "fail" }, + response: { + error: { + message: "Rate limited", + type: "rate_limit_error", + }, + status: 429, + }, +}; + +const allFixtures: Fixture[] = [textFixture, toolFixture, modelFixture, errorFixture]; + +// --- tests --- + +let instance: ServerInstance | null = null; + +afterEach(async () => { + if (instance) { + await new Promise((resolve) => { + instance!.server.close(() => resolve()); + }); + instance = null; + } +}); + +// ─── Unit tests: ollamaToCompletionRequest ────────────────────────────────── + +describe("ollamaToCompletionRequest", () => { + it("converts basic chat request", () => { + const result = ollamaToCompletionRequest({ + model: "llama3", + messages: [{ role: "user", content: "hello" }], + }); + expect(result.model).toBe("llama3"); + expect(result.messages).toEqual([{ role: "user", content: "hello" }]); + }); + + it("passes through stream field", () => { + const result = ollamaToCompletionRequest({ + model: "llama3", + messages: [{ role: "user", content: "hi" }], + stream: false, + }); + expect(result.stream).toBe(false); + }); + + it("converts options to temperature and max_tokens", () => { + const result = ollamaToCompletionRequest({ + model: "llama3", + messages: [{ role: "user", content: "hi" }], + options: { temperature: 0.7, num_predict: 100 }, + }); + expect(result.temperature).toBe(0.7); + expect(result.max_tokens).toBe(100); + }); + + it("converts tools", () => { + const result = ollamaToCompletionRequest({ + model: "llama3", + messages: [{ role: "user", content: "hi" }], + tools: [ + { + type: "function", + function: { + name: "get_weather", + description: "Get weather", + parameters: { type: "object", properties: { city: { type: "string" } } }, + }, + }, + ], + }); + expect(result.tools).toHaveLength(1); + expect(result.tools![0]).toEqual({ + type: "function", + function: { + name: "get_weather", + description: "Get weather", + parameters: { type: "object", properties: { city: { type: "string" } } }, + }, + }); + }); + + it("returns undefined tools when none provided", () => { + const result = ollamaToCompletionRequest({ + model: "llama3", + messages: [{ role: "user", content: "hi" }], + }); + expect(result.tools).toBeUndefined(); + }); +}); + +// ─── Unit tests: NDJSON writer ────────────────────────────────────────────── + +describe("writeNDJSONStream", () => { + it("writes correct NDJSON format", async () => { + const chunks: string[] = []; + const res = { + writableEnded: false, + setHeader: () => {}, + write: (data: string) => { + chunks.push(data); + return true; + }, + end: () => { + (res as { writableEnded: boolean }).writableEnded = true; + }, + } as unknown as http.ServerResponse; + + const data = [ + { model: "llama3", done: false }, + { model: "llama3", done: true }, + ]; + const completed = await writeNDJSONStream(res, data); + + expect(completed).toBe(true); + expect(chunks).toHaveLength(2); + expect(chunks[0]).toBe('{"model":"llama3","done":false}\n'); + expect(chunks[1]).toBe('{"model":"llama3","done":true}\n'); + }); + + it("respects abort signal for interruption", async () => { + const chunks: string[] = []; + const controller = new AbortController(); + const res = { + writableEnded: false, + setHeader: () => {}, + write: (data: string) => { + chunks.push(data); + // Abort after first chunk + controller.abort(); + return true; + }, + end: () => { + (res as { writableEnded: boolean }).writableEnded = true; + }, + } as unknown as http.ServerResponse; + + const data = [ + { model: "llama3", done: false }, + { model: "llama3", done: false }, + { model: "llama3", done: true }, + ]; + const completed = await writeNDJSONStream(res, data, { signal: controller.signal }); + + expect(completed).toBe(false); + expect(chunks).toHaveLength(1); + }); + + it("applies streaming profile latency", async () => { + const chunks: string[] = []; + const res = { + writableEnded: false, + setHeader: () => {}, + write: (data: string) => { + chunks.push(data); + return true; + }, + end: () => { + (res as { writableEnded: boolean }).writableEnded = true; + }, + } as unknown as http.ServerResponse; + + const data = [{ done: false }, { done: true }]; + const start = Date.now(); + await writeNDJSONStream(res, data, { + streamingProfile: { ttft: 50, tps: 100, jitter: 0 }, + }); + const elapsed = Date.now() - start; + + // Should have at least some delay from the streaming profile + expect(elapsed).toBeGreaterThanOrEqual(40); // ttft ~50ms + 1/100 tps ~10ms + expect(chunks).toHaveLength(2); + }); +}); + +// ─── Integration tests: POST /api/chat (non-streaming) ───────────────────── + +describe("POST /api/chat (non-streaming)", () => { + it("returns text response with all final fields", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "hello" }], + stream: false, + }); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("application/json"); + + const body = JSON.parse(res.body); + expect(body.model).toBe("llama3"); + expect(body.message.role).toBe("assistant"); + expect(body.message.content).toBe("Hi there!"); + expect(body.done).toBe(true); + expect(body.done_reason).toBe("stop"); + expect(body.total_duration).toBe(0); + expect(body.load_duration).toBe(0); + expect(body.prompt_eval_count).toBe(0); + expect(body.prompt_eval_duration).toBe(0); + expect(body.eval_count).toBe(0); + expect(body.eval_duration).toBe(0); + }); + + it("returns tool call with arguments as object and no id", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "weather" }], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.done).toBe(true); + expect(body.message.tool_calls).toHaveLength(1); + expect(body.message.tool_calls[0].function.name).toBe("get_weather"); + // Arguments must be an OBJECT, not a JSON string + expect(body.message.tool_calls[0].function.arguments).toEqual({ city: "NYC" }); + // No id field on tool calls + expect(body.message.tool_calls[0].id).toBeUndefined(); + }); +}); + +// ─── Integration tests: POST /api/chat (streaming) ────────────────────────── + +describe("POST /api/chat (streaming)", () => { + it("streams NDJSON when stream is absent (default streaming)", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "hello" }], + // stream field intentionally omitted — Ollama defaults to true + }); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("application/x-ndjson"); + + const chunks = parseNDJSON(res.body); + expect(chunks.length).toBeGreaterThan(1); + + // All non-final chunks should have done: false + const nonFinal = chunks.slice(0, -1); + for (const chunk of nonFinal) { + expect((chunk as { done: boolean }).done).toBe(false); + } + + // Final chunk should have done: true and all duration fields + const final = chunks[chunks.length - 1] as Record; + expect(final.done).toBe(true); + expect(final.done_reason).toBe("stop"); + expect(final.total_duration).toBe(0); + expect(final.load_duration).toBe(0); + expect(final.prompt_eval_count).toBe(0); + expect(final.prompt_eval_duration).toBe(0); + expect(final.eval_count).toBe(0); + expect(final.eval_duration).toBe(0); + }); + + it("streams NDJSON when stream is explicitly true", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "hello" }], + stream: true, + }); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("application/x-ndjson"); + + const chunks = parseNDJSON(res.body); + expect(chunks.length).toBeGreaterThan(1); + }); + + it("reconstructs full text from streaming chunks", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "hello" }], + stream: true, + }); + + const chunks = parseNDJSON(res.body) as Array<{ + message: { content: string }; + done: boolean; + }>; + const fullText = chunks + .filter((c) => !c.done) + .map((c) => c.message.content) + .join(""); + expect(fullText).toBe("Hi there!"); + }); + + it("streams tool call with arguments as object", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "weather" }], + stream: true, + }); + + const chunks = parseNDJSON(res.body) as Array<{ + message: { tool_calls?: Array<{ function: { name: string; arguments: unknown } }> }; + done: boolean; + }>; + const toolChunk = chunks.find((c) => c.message.tool_calls && c.message.tool_calls.length > 0); + expect(toolChunk).toBeDefined(); + expect(toolChunk!.message.tool_calls![0].function.name).toBe("get_weather"); + expect(toolChunk!.message.tool_calls![0].function.arguments).toEqual({ city: "NYC" }); + }); + + it("uses fixture chunkSize for text streaming", async () => { + const bigChunkFixture: Fixture = { + match: { userMessage: "bigchunk" }, + response: { content: "ABCDEFGHIJ" }, + chunkSize: 5, + }; + instance = await createServer([bigChunkFixture], { chunkSize: 2 }); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "bigchunk" }], + stream: true, + }); + + const chunks = parseNDJSON(res.body) as Array<{ + message: { content: string }; + done: boolean; + }>; + // 10 chars / chunkSize 5 = 2 content chunks + 1 final = 3 total + expect(chunks).toHaveLength(3); + expect(chunks[0].message.content).toBe("ABCDE"); + expect(chunks[1].message.content).toBe("FGHIJ"); + expect(chunks[2].done).toBe(true); + }); +}); + +// ─── Integration tests: POST /api/chat (streaming profile) ───────────────── + +describe("POST /api/chat (streaming profile)", () => { + it("applies streaming profile latency", async () => { + const slowFixture: Fixture = { + match: { userMessage: "slow" }, + response: { content: "AB" }, + chunkSize: 1, + streamingProfile: { ttft: 50, tps: 20, jitter: 0 }, + }; + instance = await createServer([slowFixture]); + + const start = Date.now(); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "slow" }], + stream: true, + }); + const elapsed = Date.now() - start; + + expect(res.status).toBe(200); + // Should have noticeable delay: ttft 50ms + at least 2 chunks at 20tps (50ms each) + final + expect(elapsed).toBeGreaterThanOrEqual(80); + }); +}); + +// ─── Integration tests: POST /api/chat (interruption) ─────────────────────── + +describe("POST /api/chat (interruption)", () => { + it("truncates after specified number of chunks", async () => { + const truncFixture: Fixture = { + match: { userMessage: "truncate" }, + response: { content: "ABCDEFGHIJ" }, + chunkSize: 1, + truncateAfterChunks: 3, + }; + instance = await createServer([truncFixture]); + + // Use a custom request that tolerates abrupt socket close + const res = await new Promise<{ aborted: boolean; body: string }>((resolve) => { + const data = JSON.stringify({ + model: "llama3", + messages: [{ role: "user", content: "truncate" }], + stream: true, + }); + const parsed = new URL(`${instance!.url}/api/chat`); + const chunks: Buffer[] = []; + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + }, + }, + (res) => { + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ aborted: false, body: Buffer.concat(chunks).toString() }); + }); + res.on("aborted", () => { + resolve({ aborted: true, body: Buffer.concat(chunks).toString() }); + }); + }, + ); + req.on("error", () => { + resolve({ aborted: true, body: Buffer.concat(chunks).toString() }); + }); + req.write(data); + req.end(); + }); + + // Stream was truncated — res.destroy() causes abrupt close + expect(res.aborted).toBe(true); + + // Journal should record interruption + await new Promise((r) => setTimeout(r, 50)); + const entry = instance.journal.getLast(); + expect(entry!.response.interrupted).toBe(true); + expect(entry!.response.interruptReason).toBe("truncateAfterChunks"); + }); +}); + +// ─── Integration tests: POST /api/chat (chaos) ───────────────────────────── + +describe("POST /api/chat (chaos)", () => { + it("drops request when chaos drop header is set to 1.0", async () => { + instance = await createServer(allFixtures); + const res = await new Promise<{ status: number; body: string }>((resolve, reject) => { + const data = JSON.stringify({ + model: "llama3", + messages: [{ role: "user", content: "hello" }], + stream: false, + }); + const parsed = new URL(`${instance!.url}/api/chat`); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + "x-llmock-chaos-drop": "1.0", + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); + + expect(res.status).toBe(500); + const body = JSON.parse(res.body); + expect(body.error.code).toBe("chaos_drop"); + }); +}); + +// ─── Integration tests: POST /api/chat (error handling) ───────────────────── + +describe("POST /api/chat (error handling)", () => { + it("returns error fixture with correct status", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "fail" }], + stream: false, + }); + + expect(res.status).toBe(429); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Rate limited"); + }); + + it("returns 404 when no fixture matches", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "nomatch" }], + stream: false, + }); + + expect(res.status).toBe(404); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("No fixture matched"); + }); + + it("returns 400 when messages array is missing from /api/chat", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + stream: false, + }); + + expect(res.status).toBe(400); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Invalid request: messages array is required"); + }); + + it("returns 400 when prompt is missing from /api/generate", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/generate`, { + model: "llama3", + stream: false, + }); + + expect(res.status).toBe(400); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Invalid request: prompt field is required"); + }); + + it("returns 400 for malformed JSON", async () => { + instance = await createServer(allFixtures); + const res = await postRaw(`${instance.url}/api/chat`, "{not valid"); + + expect(res.status).toBe(400); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Malformed JSON"); + }); +}); + +// ─── Integration tests: POST /api/generate (non-streaming) ───────────────── + +describe("POST /api/generate (non-streaming)", () => { + it("returns text in response field (not message)", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/generate`, { + model: "llama3", + prompt: "hello", + stream: false, + }); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("application/json"); + + const body = JSON.parse(res.body); + expect(body.model).toBe("llama3"); + expect(body.response).toBe("Hi there!"); + expect(body.done).toBe(true); + expect(body.done_reason).toBe("stop"); + expect(body.context).toEqual([]); + expect(body.created_at).toBeDefined(); + // Should NOT have message field + expect(body.message).toBeUndefined(); + }); +}); + +// ─── Integration tests: POST /api/generate (error/chaos/strict/no-match) ──── + +describe("POST /api/generate (error fixture)", () => { + it("19a. returns error fixture through /api/generate", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/generate`, { + model: "llama3", + prompt: "fail", + stream: false, + }); + + expect(res.status).toBe(429); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Rate limited"); + }); +}); + +describe("POST /api/generate (chaos)", () => { + it("19b. drops request with chaos-drop header", async () => { + instance = await createServer(allFixtures); + const res = await new Promise<{ status: number; body: string }>((resolve, reject) => { + const data = JSON.stringify({ + model: "llama3", + prompt: "hello", + stream: false, + }); + const parsed = new URL(`${instance!.url}/api/generate`); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + "x-llmock-chaos-drop": "1.0", + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); + + expect(res.status).toBe(500); + const body = JSON.parse(res.body); + expect(body.error.code).toBe("chaos_drop"); + }); +}); + +describe("POST /api/generate (strict mode)", () => { + it("19c. returns 503 in strict mode with no fixtures", async () => { + instance = await createServer([], { strict: true }); + const res = await post(`${instance.url}/api/generate`, { + model: "llama3", + prompt: "hello", + stream: false, + }); + + expect(res.status).toBe(503); + const body = JSON.parse(res.body); + expect(body.error.message).toContain("no fixture matched"); + }); +}); + +describe("POST /api/generate (no fixture match)", () => { + it("19d. returns 404 when no fixture matches", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/generate`, { + model: "llama3", + prompt: "nomatch_xyz", + stream: false, + }); + + expect(res.status).toBe(404); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("No fixture matched"); + }); +}); + +// ─── Integration tests: POST /api/generate (streaming) ────────────────────── + +describe("POST /api/generate (streaming)", () => { + it("streams NDJSON with response field", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/generate`, { + model: "llama3", + prompt: "hello", + // stream omitted — defaults to true + }); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("application/x-ndjson"); + + const chunks = parseNDJSON(res.body) as Array<{ + model: string; + created_at: string; + response: string; + done: boolean; + }>; + expect(chunks.length).toBeGreaterThan(1); + + // Non-final chunks use response field + const nonFinal = chunks.slice(0, -1); + for (const chunk of nonFinal) { + expect(chunk.response).toBeDefined(); + expect(chunk.done).toBe(false); + expect(chunk.created_at).toBeDefined(); + // Should NOT have message field + expect((chunk as Record).message).toBeUndefined(); + } + + // Reconstruct text + const fullText = nonFinal.map((c) => c.response).join(""); + expect(fullText).toBe("Hi there!"); + + // Final chunk + const final = chunks[chunks.length - 1] as Record; + expect(final.done).toBe(true); + expect(final.response).toBe(""); + expect(final.done_reason).toBe("stop"); + expect(final.context).toEqual([]); + }); + + it("defaults to streaming when stream field is absent", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/generate`, { + model: "llama3", + prompt: "hello", + }); + + expect(res.headers["content-type"]).toBe("application/x-ndjson"); + }); +}); + +// ─── Integration tests: GET /api/tags ─────────────────────────────────────── + +describe("GET /api/tags", () => { + it("returns model list from fixtures", async () => { + instance = await createServer(allFixtures); + const res = await get(`${instance.url}/api/tags`); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("application/json"); + + const body = JSON.parse(res.body); + expect(body.models).toBeDefined(); + expect(Array.isArray(body.models)).toBe(true); + // modelFixture has model: "llama3", so it should appear + const names = body.models.map((m: { name: string }) => m.name); + expect(names).toContain("llama3"); + }); + + it("returns default models when no fixture has model match", async () => { + const noModelFixtures: Fixture[] = [ + { match: { userMessage: "hi" }, response: { content: "hello" } }, + ]; + instance = await createServer(noModelFixtures); + const res = await get(`${instance.url}/api/tags`); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.models.length).toBeGreaterThan(0); + // Default models should include standard ones + const names = body.models.map((m: { name: string }) => m.name); + expect(names).toContain("gpt-4"); + }); +}); + +// ─── Integration tests: journal ───────────────────────────────────────────── + +describe("POST /api/chat (journal)", () => { + it("records request in the journal", async () => { + instance = await createServer(allFixtures); + await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "hello" }], + stream: false, + }); + + expect(instance.journal.size).toBe(1); + const entry = instance.journal.getLast(); + expect(entry!.path).toBe("/api/chat"); + expect(entry!.response.status).toBe(200); + expect(entry!.response.fixture).toBe(textFixture); + expect(entry!.body.model).toBe("llama3"); + }); +}); + +describe("POST /api/generate (journal)", () => { + it("records request in the journal", async () => { + instance = await createServer(allFixtures); + await post(`${instance.url}/api/generate`, { + model: "llama3", + prompt: "hello", + stream: false, + }); + + expect(instance.journal.size).toBe(1); + const entry = instance.journal.getLast(); + expect(entry!.path).toBe("/api/generate"); + expect(entry!.response.status).toBe(200); + }); +}); + +// ─── Integration tests: malformed tool call arguments ─────────────────────── + +describe("POST /api/chat (malformed tool call arguments)", () => { + it("falls back to empty object when arguments is not valid JSON", async () => { + const badArgsFixture: Fixture = { + match: { userMessage: "bad-args" }, + response: { + toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }], + }, + }; + instance = await createServer([badArgsFixture]); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "bad-args" }], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.message.tool_calls).toHaveLength(1); + expect(body.message.tool_calls[0].function.name).toBe("fn"); + // Malformed JSON falls back to empty object + expect(body.message.tool_calls[0].function.arguments).toEqual({}); + }); +}); + +// ─── Integration tests: tool call on /api/generate → 500 ─────────────────── + +describe("POST /api/generate (tool call fixture)", () => { + it("returns 500 'unknown type' for tool call fixtures on /api/generate", async () => { + const tcFixture: Fixture = { + match: { userMessage: "tool-gen" }, + response: { + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + }, + }; + instance = await createServer([tcFixture]); + const res = await post(`${instance.url}/api/generate`, { + model: "llama3", + prompt: "tool-gen", + stream: false, + }); + + expect(res.status).toBe(500); + const body = JSON.parse(res.body); + expect(body.error.message).toContain("did not match any known type"); + }); +}); + +// ─── Integration tests: CORS ──────────────────────────────────────────────── + +describe("POST /api/chat (CORS)", () => { + it("includes CORS headers", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "hello" }], + stream: false, + }); + + expect(res.headers["access-control-allow-origin"]).toBe("*"); + }); +}); + +// ─── Integration tests: strict mode → 503 ────────────────────────────────── + +describe("POST /api/chat (strict mode)", () => { + it("returns 503 in strict mode with no matching fixture", async () => { + instance = await createServer([], { strict: true }); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "hello" }], + stream: false, + }); + + expect(res.status).toBe(503); + const body = JSON.parse(res.body); + expect(body.error.message).toContain("no fixture matched"); + }); +}); + +// ─── Integration tests: multiple tool calls ───────────────────────────────── + +describe("POST /api/chat (multiple tool calls)", () => { + it("returns 2 tool calls in a single non-streaming response", async () => { + const multiToolFixture: Fixture = { + match: { userMessage: "multi-tool" }, + response: { + toolCalls: [ + { name: "get_weather", arguments: '{"city":"NYC"}' }, + { name: "get_time", arguments: '{"tz":"EST"}' }, + ], + }, + }; + instance = await createServer([multiToolFixture]); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "multi-tool" }], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.message.tool_calls).toHaveLength(2); + expect(body.message.tool_calls[0].function.name).toBe("get_weather"); + expect(body.message.tool_calls[0].function.arguments).toEqual({ city: "NYC" }); + expect(body.message.tool_calls[1].function.name).toBe("get_time"); + expect(body.message.tool_calls[1].function.arguments).toEqual({ tz: "EST" }); + }); +}); + +// ─── Integration tests: error fixture with no explicit status ─────────────── + +describe("POST /api/chat (error fixture no explicit status)", () => { + it("defaults to 500 when error fixture has no status", async () => { + const noStatusError: Fixture = { + match: { userMessage: "err-no-status" }, + response: { + error: { + message: "Something went wrong", + type: "server_error", + }, + }, + }; + instance = await createServer([noStatusError]); + const res = await post(`${instance.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "err-no-status" }], + stream: false, + }); + + expect(res.status).toBe(500); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Something went wrong"); + }); +}); diff --git a/src/__tests__/recorder.test.ts b/src/__tests__/recorder.test.ts new file mode 100644 index 0000000..499f93d --- /dev/null +++ b/src/__tests__/recorder.test.ts @@ -0,0 +1,2531 @@ +import { describe, it, expect, afterEach } from "vitest"; +import * as http from "node:http"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import type { Fixture, FixtureFile } from "../types.js"; +import { createServer, type ServerInstance } from "../server.js"; +import { proxyAndRecord } from "../recorder.js"; +import type { RecordConfig } from "../types.js"; +import { Logger } from "../logger.js"; +import { LLMock } from "../llmock.js"; +import { encodeEventStreamMessage } from "../aws-event-stream.js"; + +// --------------------------------------------------------------------------- +// HTTP helpers +// --------------------------------------------------------------------------- + +function post( + url: string, + body: unknown, + headers?: Record, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + ...headers, + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); +} + +function get( + url: string, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> { + return new Promise((resolve, reject) => { + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname + parsed.search, + method: "GET", + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.end(); + }); +} + +// --------------------------------------------------------------------------- +// Test state +// --------------------------------------------------------------------------- + +let upstream: ServerInstance | undefined; +let recorder: ServerInstance | undefined; +let tmpDir: string | undefined; + +afterEach(async () => { + if (recorder) { + await new Promise((resolve) => recorder!.server.close(() => resolve())); + recorder = undefined; + } + if (upstream) { + await new Promise((resolve) => upstream!.server.close(() => resolve())); + upstream = undefined; + } + if (tmpDir) { + fs.rmSync(tmpDir, { recursive: true, force: true }); + tmpDir = undefined; + } +}); + +// --------------------------------------------------------------------------- +// Unit tests — proxyAndRecord function directly +// --------------------------------------------------------------------------- + +describe("proxyAndRecord", () => { + it("returns false when provider is not configured", async () => { + const fixtures: Fixture[] = []; + const logger = new Logger("silent"); + const record: RecordConfig = { providers: {} }; + + // Create a mock req/res pair — we just need them to exist, + // proxyAndRecord should return false before using them + const { req, res } = createMockReqRes(); + + const result = await proxyAndRecord( + req, + res, + { model: "gpt-4", messages: [{ role: "user", content: "hello" }] }, + "openai", + "/v1/chat/completions", + fixtures, + { record, logger }, + ); + + expect(result).toBe(false); + }); + + it("returns false when record config is undefined", async () => { + const fixtures: Fixture[] = []; + const logger = new Logger("silent"); + + const { req, res } = createMockReqRes(); + + const result = await proxyAndRecord( + req, + res, + { model: "gpt-4", messages: [{ role: "user", content: "hello" }] }, + "openai", + "/v1/chat/completions", + fixtures, + { record: undefined, logger }, + ); + + expect(result).toBe(false); + }); +}); + +// --------------------------------------------------------------------------- +// Integration tests — upstream mock + recording proxy +// --------------------------------------------------------------------------- + +describe("recorder integration", () => { + it("proxies unmatched request to upstream and returns correct response", async () => { + const { recorderUrl } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "capital of France" }, + response: { content: "Paris is the capital of France." }, + }, + ]); + + const resp = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "What is the capital of France?" }], + }); + + expect(resp.status).toBe(200); + const body = JSON.parse(resp.body); + expect(body.choices[0].message.content).toBe("Paris is the capital of France."); + }); + + it("saves fixture file to disk with correct format", async () => { + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "capital of France" }, + response: { content: "Paris is the capital of France." }, + }, + ]); + + await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "What is the capital of France?" }], + }); + + // Check that a fixture file was created + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + // Validate fixture content + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + expect(fixtureContent.fixtures).toHaveLength(1); + expect(fixtureContent.fixtures[0].match.userMessage).toBe("What is the capital of France?"); + expect((fixtureContent.fixtures[0].response as { content: string }).content).toBe( + "Paris is the capital of France.", + ); + }); + + it("recorded fixture is reused for subsequent identical requests", async () => { + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "capital of France" }, + response: { content: "Paris is the capital of France." }, + }, + ]); + + // First request — proxied + await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "What is the capital of France?" }], + }); + + // Second request — should match the recorded fixture + const resp2 = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "What is the capital of France?" }], + }); + + expect(resp2.status).toBe(200); + const body2 = JSON.parse(resp2.body); + expect(body2.choices[0].message.content).toBe("Paris is the capital of France."); + + // Only one fixture file should exist (no second proxy) + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + }); + + it("records journal entry for proxied request", async () => { + const { recorderUrl } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "capital of France" }, + response: { content: "Paris is the capital of France." }, + }, + ]); + + await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "What is the capital of France?" }], + }); + + // Check journal + const journalResp = await get(`${recorderUrl}/v1/_requests`); + const entries = JSON.parse(journalResp.body); + expect(entries.length).toBeGreaterThanOrEqual(1); + }); + + it("does not save auth headers in fixture file", async () => { + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "capital of France" }, + response: { content: "Paris is the capital of France." }, + }, + ]); + + await post( + `${recorderUrl}/v1/chat/completions`, + { + model: "gpt-4", + messages: [{ role: "user", content: "What is the capital of France?" }], + }, + { Authorization: "Bearer sk-secret-key-12345" }, + ); + + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json")); + const content = fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"); + + // The fixture file should not contain any auth headers/secrets + expect(content).not.toContain("sk-secret-key-12345"); + expect(content).not.toContain("Authorization"); + expect(content).not.toContain("authorization"); + }); + + it("records tool call response from upstream", async () => { + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "weather" }, + response: { + toolCalls: [{ name: "get_weather", arguments: '{"city":"Paris"}' }], + }, + }, + ]); + + const resp = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "What is the weather?" }], + tools: [{ type: "function", function: { name: "get_weather", parameters: {} } }], + }); + + expect(resp.status).toBe(200); + const body = JSON.parse(resp.body); + expect(body.choices[0].message.tool_calls).toBeDefined(); + expect(body.choices[0].message.tool_calls[0].function.name).toBe("get_weather"); + + // Check saved fixture has toolCalls + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + const savedResponse = fixtureContent.fixtures[0].response as { toolCalls: unknown[] }; + expect(savedResponse.toolCalls).toBeDefined(); + expect(savedResponse.toolCalls).toHaveLength(1); + }); + + it("records embedding response from upstream", async () => { + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder( + [ + { + match: { inputText: "hello world" }, + response: { embedding: [0.1, 0.2, 0.3] }, + }, + ], + "openai", + ); + + const resp = await post(`${recorderUrl}/v1/embeddings`, { + model: "text-embedding-3-small", + input: "hello world", + }); + + expect(resp.status).toBe(200); + const body = JSON.parse(resp.body); + expect(body.data[0].embedding).toEqual([0.1, 0.2, 0.3]); + + // Check saved fixture + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + const savedResponse = fixtureContent.fixtures[0].response as { embedding: number[] }; + expect(savedResponse.embedding).toEqual([0.1, 0.2, 0.3]); + }); + + it("records upstream error status as error fixture", async () => { + // Upstream with no matching fixture for our request → 404 + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "something else entirely" }, + response: { content: "not what we asked" }, + }, + ]); + + await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "unmatched request" }], + }); + + // The upstream returns 404 (no fixture match), which gets proxied + // The recorder should save an error fixture + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + const savedResponse = fixtureContent.fixtures[0].response as { + error: { message: string }; + status?: number; + }; + expect(savedResponse.error).toBeDefined(); + expect(savedResponse.status).toBe(404); + }); +}); + +// --------------------------------------------------------------------------- +// Integration tests — streaming upstream → collapsed fixture +// --------------------------------------------------------------------------- + +describe("recorder streaming collapse", () => { + it("collapses OpenAI SSE streaming response to non-streaming fixture", async () => { + // Upstream has a fixture; when recorder proxies with stream:true, + // upstream returns SSE, recorder should collapse it + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "capital of France" }, + response: { content: "Paris is the capital of France." }, + }, + ]); + + // Send request with stream: true — upstream llmock will return SSE + const resp = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "What is the capital of France?" }], + stream: true, + }); + + expect(resp.status).toBe(200); + // The recorder relays the raw SSE to the client + // But the saved fixture should be collapsed + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + expect(fixtureContent.fixtures).toHaveLength(1); + const savedResponse = fixtureContent.fixtures[0].response as { content: string }; + expect(savedResponse.content).toBe("Paris is the capital of France."); + }); + + it("collapsed streaming fixture works on replay (second request matches)", async () => { + const { recorderUrl } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "capital of France" }, + response: { content: "Paris is the capital of France." }, + }, + ]); + + // First request — stream:true, proxied to upstream, collapsed on save + await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "What is the capital of France?" }], + stream: true, + }); + + // Second request — non-streaming, should match the collapsed fixture + const resp2 = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "What is the capital of France?" }], + }); + + expect(resp2.status).toBe(200); + const body2 = JSON.parse(resp2.body); + expect(body2.choices[0].message.content).toBe("Paris is the capital of France."); + }); + + it("collapses streaming tool call response to fixture with toolCalls", async () => { + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "weather" }, + response: { + toolCalls: [{ name: "get_weather", arguments: '{"city":"Paris"}' }], + }, + }, + ]); + + // Send streaming request + const resp = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "What is the weather?" }], + stream: true, + tools: [{ type: "function", function: { name: "get_weather", parameters: {} } }], + }); + + expect(resp.status).toBe(200); + + // Check saved fixture has toolCalls (not SSE) + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + const savedResponse = fixtureContent.fixtures[0].response as { toolCalls: unknown[] }; + expect(savedResponse.toolCalls).toBeDefined(); + expect(savedResponse.toolCalls).toHaveLength(1); + }); +}); + +// --------------------------------------------------------------------------- +// Integration tests — multi-provider proxy routing +// --------------------------------------------------------------------------- + +describe("recorder multi-provider routing", () => { + it("proxies Anthropic messages request to anthropic upstream", async () => { + // Upstream for Anthropic + const anthropicUpstream = await createServer( + [ + { + match: { userMessage: "bonjour" }, + response: { content: "Salut!" }, + }, + ], + { port: 0 }, + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + + recorder = await createServer([], { + port: 0, + record: { + providers: { anthropic: anthropicUpstream.url }, + fixturePath: tmpDir, + }, + }); + + const resp = await post(`${recorder.url}/v1/messages`, { + model: "claude-3-sonnet", + max_tokens: 100, + messages: [{ role: "user", content: "bonjour" }], + }); + + expect(resp.status).toBe(200); + // Anthropic handler translates to/from Claude format; the upstream + // is another llmock so it returns OpenAI format which gets proxied raw + const body = JSON.parse(resp.body); + // The proxied response should contain content + expect(body).toBeDefined(); + + // Fixture file created on disk + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles.length).toBeGreaterThanOrEqual(1); + + // Clean up the extra upstream + await new Promise((resolve) => anthropicUpstream.server.close(() => resolve())); + }); + + it("unconfigured provider returns 404 (no proxy)", async () => { + // Only openai provider configured, not gemini + const { recorderUrl } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "test" }, + response: { content: "ok" }, + }, + ]); + + // Send a Gemini-format request — no upstream configured for gemini + const resp = await post(`${recorderUrl}/v1beta/models/gemini-pro:generateContent`, { + contents: [{ parts: [{ text: "hello gemini" }], role: "user" }], + }); + + // Should get 404 — no fixture and no gemini upstream + expect(resp.status).toBe(404); + }); +}); + +// --------------------------------------------------------------------------- +// Integration tests — strict mode +// --------------------------------------------------------------------------- + +describe("recorder strict mode", () => { + it("strict mode without recording: unmatched request returns 503 with error logged", async () => { + recorder = await createServer([], { + port: 0, + strict: true, + logLevel: "debug", + }); + + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "no fixture here" }], + }); + + expect(resp.status).toBe(503); + const body = JSON.parse(resp.body); + expect(body.error.message).toBe("Strict mode: no fixture matched"); + }); + + it("record + strict: proxy succeeds when upstream is available", async () => { + await setupUpstreamAndRecorder([ + { + match: { userMessage: "hello" }, + response: { content: "world" }, + }, + ]); + + // Override to also set strict on the recorder + // Need to create a new recorder with both record + strict + await new Promise((resolve) => recorder!.server.close(() => resolve())); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + strict: true, + record: { providers: { openai: upstream!.url }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "hello" }], + }); + + expect(resp.status).toBe(200); + const body = JSON.parse(resp.body); + expect(body.choices[0].message.content).toBe("world"); + }); +}); + +// --------------------------------------------------------------------------- +// Integration tests — enableRecording / disableRecording on LLMock +// --------------------------------------------------------------------------- + +describe("LLMock enableRecording / disableRecording", () => { + let mock: LLMock; + let upstreamServer: ServerInstance; + + afterEach(async () => { + if (mock) { + try { + await mock.stop(); + } catch { + // ignore if not started + } + } + if (upstreamServer) { + await new Promise((resolve) => upstreamServer.server.close(() => resolve())); + } + if (tmpDir) { + fs.rmSync(tmpDir, { recursive: true, force: true }); + tmpDir = undefined; + } + }); + + it("enableRecording allows proxying; disableRecording returns to 404", async () => { + // Set up upstream + upstreamServer = await createServer( + [ + { + match: { userMessage: "hello" }, + response: { content: "from upstream" }, + }, + ], + { port: 0 }, + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + + mock = new LLMock(); + const url = await mock.start(); + + // Without recording: request gets 404 + const resp1 = await post(`${url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "hello" }], + }); + expect(resp1.status).toBe(404); + + // Enable recording + mock.enableRecording({ + providers: { openai: upstreamServer.url }, + fixturePath: tmpDir, + }); + + // Now request should proxy to upstream + const resp2 = await post(`${url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "hello" }], + }); + expect(resp2.status).toBe(200); + const body2 = JSON.parse(resp2.body); + expect(body2.choices[0].message.content).toBe("from upstream"); + + // Disable recording + mock.disableRecording(); + + // Recorded fixture should still work (it was added to memory) + const resp3 = await post(`${url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "hello" }], + }); + expect(resp3.status).toBe(200); + const body3 = JSON.parse(resp3.body); + expect(body3.choices[0].message.content).toBe("from upstream"); + + // A different message should 404 (no recording, no fixture) + const resp4 = await post(`${url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "something else" }], + }); + expect(resp4.status).toBe(404); + }); +}); + +// --------------------------------------------------------------------------- +// Integration tests — multi-provider recording (Gemini, Ollama, Cohere, Bedrock, Vertex AI) +// --------------------------------------------------------------------------- + +describe("recorder multi-provider recording", () => { + let servers: http.Server[] = []; + + afterEach(async () => { + for (const s of servers) { + await new Promise((resolve) => s.close(() => resolve())); + } + servers = []; + }); + + function trackServer(si: ServerInstance): ServerInstance { + servers.push(si.server); + return si; + } + + it("records Gemini generateContent request through full proxy", async () => { + const geminiUpstream = trackServer( + await createServer( + [{ match: { userMessage: "test gemini" }, response: { content: "Gemini says hello" } }], + { port: 0 }, + ), + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { gemini: geminiUpstream.url }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, { + contents: [{ parts: [{ text: "test gemini" }], role: "user" }], + }); + + expect(resp.status).toBe(200); + + // Fixture file saved with gemini prefix + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.startsWith("gemini-") && f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + expect(fixtureContent.fixtures).toHaveLength(1); + expect(fixtureContent.fixtures[0].match.userMessage).toBe("test gemini"); + }); + + it("records Ollama /api/chat request through full proxy", async () => { + const ollamaUpstream = trackServer( + await createServer( + [{ match: { userMessage: "test ollama" }, response: { content: "Ollama says hello" } }], + { port: 0 }, + ), + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { ollama: ollamaUpstream.url }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "test ollama" }], + stream: false, + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.startsWith("ollama-") && f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + expect(fixtureContent.fixtures[0].match.userMessage).toBe("test ollama"); + }); + + it("records Cohere /v2/chat request through full proxy", async () => { + const cohereUpstream = trackServer( + await createServer( + [{ match: { userMessage: "test cohere" }, response: { content: "Cohere says hello" } }], + { port: 0 }, + ), + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { cohere: cohereUpstream.url }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "test cohere" }], + stream: false, + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.startsWith("cohere-") && f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + expect(fixtureContent.fixtures[0].match.userMessage).toBe("test cohere"); + }); + + it("records Bedrock /model/{id}/invoke request through full proxy", async () => { + const bedrockUpstream = trackServer( + await createServer( + [{ match: { userMessage: "test bedrock" }, response: { content: "Bedrock says hello" } }], + { port: 0 }, + ), + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { bedrock: bedrockUpstream.url }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/model/claude-v3/invoke`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 100, + messages: [{ role: "user", content: "test bedrock" }], + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.startsWith("bedrock-") && f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + expect(fixtureContent.fixtures[0].match.userMessage).toBe("test bedrock"); + }); + + it("records Vertex AI request through vertexai provider key", async () => { + // Vertex AI now uses "vertexai" as the provider key + const vertexUpstream = trackServer( + await createServer( + [{ match: { userMessage: "test vertex" }, response: { content: "Vertex says hello" } }], + { port: 0 }, + ), + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { vertexai: vertexUpstream.url }, fixturePath: tmpDir }, + }); + + const resp = await post( + `${recorder.url}/v1/projects/my-project/locations/us-central1/publishers/google/models/gemini-2.0-flash:generateContent`, + { contents: [{ parts: [{ text: "test vertex" }], role: "user" }] }, + ); + + expect(resp.status).toBe(200); + + // Uses vertexai prefix (separate provider key from gemini) + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.startsWith("vertexai-") && f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + }); + + it("records Anthropic streaming request through handleMessages", async () => { + const anthropicUpstream = trackServer( + await createServer( + [ + { + match: { userMessage: "stream anthropic" }, + response: { content: "Anthropic streamed" }, + }, + ], + { port: 0 }, + ), + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { anthropic: anthropicUpstream.url }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/messages`, { + model: "claude-3-sonnet", + max_tokens: 100, + messages: [{ role: "user", content: "stream anthropic" }], + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.startsWith("anthropic-") && f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + }); + + it("records multiple providers simultaneously", async () => { + const openaiUpstream = trackServer( + await createServer( + [{ match: { userMessage: "multi openai" }, response: { content: "OpenAI multi" } }], + { port: 0 }, + ), + ); + const geminiUpstream = trackServer( + await createServer( + [{ match: { userMessage: "multi gemini" }, response: { content: "Gemini multi" } }], + { port: 0 }, + ), + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { + providers: { openai: openaiUpstream.url, gemini: geminiUpstream.url }, + fixturePath: tmpDir, + }, + }); + + // OpenAI request + const resp1 = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "multi openai" }], + }); + expect(resp1.status).toBe(200); + + // Gemini request + const resp2 = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, { + contents: [{ parts: [{ text: "multi gemini" }], role: "user" }], + }); + expect(resp2.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const openaiFixtures = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json")); + const geminiFixtures = files.filter((f) => f.startsWith("gemini-") && f.endsWith(".json")); + expect(openaiFixtures).toHaveLength(1); + expect(geminiFixtures).toHaveLength(1); + }); +}); + +// --------------------------------------------------------------------------- +// Integration tests — streaming recording through full server +// --------------------------------------------------------------------------- + +describe("recorder streaming through full server", () => { + it("OpenAI streaming request collapses and saves fixture with correct content", async () => { + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "stream test" }, + response: { content: "Streamed content from upstream" }, + }, + ]); + + const resp = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "stream test" }], + stream: true, + }); + + expect(resp.status).toBe(200); + // SSE data relayed to client + expect(resp.body).toContain("data:"); + + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + const savedResponse = fixtureContent.fixtures[0].response as { content: string }; + expect(savedResponse.content).toBe("Streamed content from upstream"); + }); + + it("streaming tool call recording preserves toolCalls in fixture", async () => { + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "stream tools" }, + response: { + toolCalls: [{ name: "search", arguments: '{"query":"test"}' }], + }, + }, + ]); + + const resp = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "stream tools" }], + stream: true, + tools: [{ type: "function", function: { name: "search", parameters: {} } }], + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + const savedResponse = fixtureContent.fixtures[0].response as { + toolCalls: Array<{ name: string; arguments: string }>; + }; + expect(savedResponse.toolCalls).toBeDefined(); + expect(savedResponse.toolCalls).toHaveLength(1); + expect(savedResponse.toolCalls[0].name).toBe("search"); + expect(savedResponse.toolCalls[0].arguments).toBe('{"query":"test"}'); + }); +}); + +// --------------------------------------------------------------------------- +// End-to-end replay verification +// --------------------------------------------------------------------------- + +describe("recorder end-to-end replay", () => { + it("record → verify fixture on disk → replay from fixture (not proxy)", async () => { + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "replay test" }, + response: { content: "Replay this content" }, + }, + ]); + + // First request — proxied to upstream + const resp1 = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "replay test" }], + }); + expect(resp1.status).toBe(200); + + // Verify fixture file on disk + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + expect(fixtureContent.fixtures[0].match.userMessage).toBe("replay test"); + expect((fixtureContent.fixtures[0].response as { content: string }).content).toBe( + "Replay this content", + ); + + // Clear journal to distinguish proxy vs fixture-match + await fetch(`${recorderUrl}/v1/_requests`, { method: "DELETE" }); + + // Second request — should match recorded fixture + const resp2 = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "replay test" }], + }); + expect(resp2.status).toBe(200); + const body2 = JSON.parse(resp2.body); + expect(body2.choices[0].message.content).toBe("Replay this content"); + + // Journal should show the request was served with a fixture match (not null) + const journalResp = await get(`${recorderUrl}/v1/_requests`); + const entries = JSON.parse(journalResp.body); + expect(entries).toHaveLength(1); + expect(entries[0].response.fixture).not.toBeNull(); + + // Still only one fixture file (no second proxy) + const files2 = fs.readdirSync(fixturePath); + const fixtureFiles2 = files2.filter((f) => f.endsWith(".json")); + expect(fixtureFiles2).toHaveLength(1); + }); + + it("record tool call → replay → toolCalls match", async () => { + const { recorderUrl } = await setupUpstreamAndRecorder([ + { + match: { userMessage: "tool replay" }, + response: { + toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }], + }, + }, + ]); + + // Record + await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "tool replay" }], + tools: [{ type: "function", function: { name: "get_weather", parameters: {} } }], + }); + + // Replay + const resp2 = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "tool replay" }], + tools: [{ type: "function", function: { name: "get_weather", parameters: {} } }], + }); + expect(resp2.status).toBe(200); + const body2 = JSON.parse(resp2.body); + expect(body2.choices[0].message.tool_calls).toBeDefined(); + expect(body2.choices[0].message.tool_calls[0].function.name).toBe("get_weather"); + expect(body2.choices[0].message.tool_calls[0].function.arguments).toBe('{"city":"NYC"}'); + }); + + it("record embedding → replay → embedding vector matches", async () => { + const { recorderUrl } = await setupUpstreamAndRecorder( + [{ match: { inputText: "embed replay" }, response: { embedding: [0.5, 0.6, 0.7] } }], + "openai", + ); + + // Record + await post(`${recorderUrl}/v1/embeddings`, { + model: "text-embedding-3-small", + input: "embed replay", + }); + + // Replay + const resp2 = await post(`${recorderUrl}/v1/embeddings`, { + model: "text-embedding-3-small", + input: "embed replay", + }); + expect(resp2.status).toBe(200); + const body2 = JSON.parse(resp2.body); + expect(body2.data[0].embedding).toEqual([0.5, 0.6, 0.7]); + }); +}); + +// --------------------------------------------------------------------------- +// Edge cases +// --------------------------------------------------------------------------- + +describe("recorder edge cases", () => { + it("upstream 500 error recorded as error fixture and replayed", async () => { + // Upstream returns error for any request + upstream = await createServer( + [ + { + match: { userMessage: "trigger error" }, + response: { + error: { message: "Internal server error", type: "server_error" }, + status: 500, + }, + }, + ], + { port: 0 }, + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { openai: upstream.url }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "trigger error" }], + }); + + expect(resp.status).toBe(500); + + // Fixture file created with error response + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + const savedResponse = fixtureContent.fixtures[0].response as { + error: { message: string }; + status?: number; + }; + expect(savedResponse.error).toBeDefined(); + expect(savedResponse.status).toBe(500); + + // Replay: second identical request matches the recorded error fixture + const resp2 = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "trigger error" }], + }); + expect(resp2.status).toBe(500); + }); + + it("empty match _warning field assertion: present in saved file, NOT in memory", async () => { + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { + // Upstream matches everything via predicate + match: { predicate: () => true }, + response: { content: "empty match response" }, + }, + ]); + + // Send a request with only a system message (no user message → empty match) + const resp = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "system", content: "You are a helpful assistant" }], + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + // Saved file should have _warning field + const fileContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"), + ); + expect(fileContent._warning).toBeDefined(); + expect(fileContent._warning).toContain("Empty match"); + + // In-memory fixtures should NOT have been augmented (empty match skipped) + // Send same request again — it should proxy again (not match from memory) + const resp2 = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "system", content: "You are a helpful assistant" }], + }); + // Should still return 200 (proxied again since empty match wasn't added to memory) + expect(resp2.status).toBe(200); + + // Now TWO fixture files on disk (proxied twice) + const files2 = fs.readdirSync(fixturePath); + const fixtureFiles2 = files2.filter((f) => f.endsWith(".json")); + expect(fixtureFiles2).toHaveLength(2); + }); + + it("default fixturePath: omit fixturePath from config, verify default path used", async () => { + upstream = await createServer( + [{ match: { userMessage: "default path" }, response: { content: "default path response" } }], + { port: 0 }, + ); + + // Create recorder with no fixturePath — should default to "./fixtures/recorded" + recorder = await createServer([], { + port: 0, + record: { providers: { openai: upstream.url } }, + }); + + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "default path" }], + }); + + expect(resp.status).toBe(200); + + // Check the default path + const defaultPath = path.resolve("./fixtures/recorded"); + expect(fs.existsSync(defaultPath)).toBe(true); + const files = fs.readdirSync(defaultPath); + const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json")); + expect(fixtureFiles.length).toBeGreaterThanOrEqual(1); + + // Clean up the default path files we just created + for (const f of fixtureFiles) { + fs.unlinkSync(path.join(defaultPath, f)); + } + // Remove dir if empty + try { + fs.rmdirSync(defaultPath); + } catch { + // ignore — might not be empty if other tests ran + } + }); + + it("request with system-only messages (no user message) derives empty match", async () => { + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { + // Upstream matches everything via predicate + match: { predicate: () => true }, + response: { content: "system only response" }, + }, + ]); + + const resp = await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "system", content: "You are a helpful assistant" }], + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + // The match should have no userMessage (no user message in request) + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + expect(fixtureContent.fixtures[0].match.userMessage).toBeUndefined(); + }); + + it("recording path created automatically (mkdirSync recursive)", async () => { + upstream = await createServer( + [{ match: { userMessage: "auto dir" }, response: { content: "dir created" } }], + { port: 0 }, + ); + + // Use a nested path that doesn't exist + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + const nestedPath = path.join(tmpDir, "nested", "deep", "fixtures"); + + recorder = await createServer([], { + port: 0, + record: { providers: { openai: upstream.url }, fixturePath: nestedPath }, + }); + + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "auto dir" }], + }); + + expect(resp.status).toBe(200); + + // Nested directory was created + expect(fs.existsSync(nestedPath)).toBe(true); + const files = fs.readdirSync(nestedPath); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + }); + + it("fixture file naming follows {provider}-{ISO-timestamp}.json format", async () => { + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { match: { userMessage: "naming test" }, response: { content: "named" } }, + ]); + + await post(`${recorderUrl}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "naming test" }], + }); + + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + // Pattern: openai-YYYY-MM-DDTHH-MM-SS-mmmZ-{counter}.json (colons and dots replaced with dashes) + const pattern = /^openai-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-\d{3}Z-\d+\.json$/; + expect(fixtureFiles[0]).toMatch(pattern); + }); + + it("upstream returns empty response body — handled gracefully", async () => { + // Create a raw HTTP server that returns 200 with empty body + const emptyServer = http.createServer((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(""); + }); + await new Promise((resolve) => emptyServer.listen(0, "127.0.0.1", resolve)); + const emptyAddr = emptyServer.address() as { port: number }; + const emptyUrl = `http://127.0.0.1:${emptyAddr.port}`; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { openai: emptyUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "empty body test" }], + }); + + // Should not crash — returns the upstream status + expect(resp.status).toBe(200); + + // Fixture file should still be created (with error/fallback response) + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + await new Promise((resolve) => emptyServer.close(() => resolve())); + }); +}); + +// --------------------------------------------------------------------------- +// Strict mode thorough tests +// --------------------------------------------------------------------------- + +describe("recorder strict mode thorough", () => { + it("strict mode + recording but provider not configured: 503 returned", async () => { + // Only anthropic configured, but request goes to openai endpoint + const anthropicUpstream = await createServer( + [{ match: { userMessage: "strict test" }, response: { content: "ok" } }], + { port: 0 }, + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + strict: true, + record: { providers: { anthropic: anthropicUpstream.url }, fixturePath: tmpDir }, + }); + + // OpenAI endpoint — no openai provider configured + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "strict test" }], + }); + + expect(resp.status).toBe(503); + const body = JSON.parse(resp.body); + expect(body.error.message).toBe("Strict mode: no fixture matched"); + + await new Promise((resolve) => anthropicUpstream.server.close(() => resolve())); + }); +}); + +// --------------------------------------------------------------------------- +// enableRecording / disableRecording lifecycle (extended) +// --------------------------------------------------------------------------- + +describe("LLMock enableRecording / disableRecording lifecycle", () => { + let mock: LLMock; + let upstreamServer: ServerInstance; + + afterEach(async () => { + if (mock) { + try { + await mock.stop(); + } catch { + // ignore + } + } + if (upstreamServer) { + await new Promise((resolve) => upstreamServer.server.close(() => resolve())); + } + if (tmpDir) { + fs.rmSync(tmpDir, { recursive: true, force: true }); + tmpDir = undefined; + } + }); + + it("recorded fixtures persist on disk after disableRecording", async () => { + upstreamServer = await createServer( + [{ match: { userMessage: "persist test" }, response: { content: "persisted" } }], + { port: 0 }, + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + mock = new LLMock(); + const url = await mock.start(); + + mock.enableRecording({ + providers: { openai: upstreamServer.url }, + fixturePath: tmpDir, + }); + + await post(`${url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "persist test" }], + }); + + mock.disableRecording(); + + // Fixture files still on disk + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + // And the fixture is usable — request still matches from in-memory fixture + const resp = await post(`${url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "persist test" }], + }); + expect(resp.status).toBe(200); + const body = JSON.parse(resp.body); + expect(body.choices[0].message.content).toBe("persisted"); + }); + + it("re-enable recording after disable works for new requests", async () => { + upstreamServer = await createServer( + [ + { match: { userMessage: "first" }, response: { content: "first response" } }, + { match: { userMessage: "second" }, response: { content: "second response" } }, + ], + { port: 0 }, + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + mock = new LLMock(); + const url = await mock.start(); + + // First recording session + mock.enableRecording({ + providers: { openai: upstreamServer.url }, + fixturePath: tmpDir, + }); + await post(`${url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "first" }], + }); + mock.disableRecording(); + + // Second recording session + mock.enableRecording({ + providers: { openai: upstreamServer.url }, + fixturePath: tmpDir, + }); + const resp = await post(`${url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "second" }], + }); + expect(resp.status).toBe(200); + mock.disableRecording(); + + // Both fixtures on disk + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(2); + }); +}); + +// --------------------------------------------------------------------------- +// Auth header tests (extended) +// --------------------------------------------------------------------------- + +describe("recorder auth header handling", () => { + it("x-api-key (Anthropic) forwarded to upstream but not saved in fixture", async () => { + const anthropicUpstream = await createServer( + [{ match: { userMessage: "api key test" }, response: { content: "ok" } }], + { port: 0 }, + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { anthropic: anthropicUpstream.url }, fixturePath: tmpDir }, + }); + + await post( + `${recorder.url}/v1/messages`, + { + model: "claude-3-sonnet", + max_tokens: 100, + messages: [{ role: "user", content: "api key test" }], + }, + { "x-api-key": "sk-ant-secret-123" }, + ); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles.length).toBeGreaterThanOrEqual(1); + + const content = fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"); + expect(content).not.toContain("sk-ant-secret-123"); + expect(content).not.toContain("x-api-key"); + + await new Promise((resolve) => anthropicUpstream.server.close(() => resolve())); + }); + + it("multiple auth header types all absent from fixture", async () => { + const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([ + { match: { userMessage: "multi auth" }, response: { content: "multi auth ok" } }, + ]); + + await post( + `${recorderUrl}/v1/chat/completions`, + { + model: "gpt-4", + messages: [{ role: "user", content: "multi auth" }], + }, + { + Authorization: "Bearer sk-openai-secret", + "x-api-key": "sk-anthropic-secret", + "api-key": "azure-secret-key", + }, + ); + + const files = fs.readdirSync(fixturePath); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + const content = fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"); + + expect(content).not.toContain("sk-openai-secret"); + expect(content).not.toContain("sk-anthropic-secret"); + expect(content).not.toContain("azure-secret-key"); + expect(content).not.toContain("Authorization"); + expect(content).not.toContain("authorization"); + expect(content).not.toContain("x-api-key"); + expect(content).not.toContain("api-key"); + }); + + it("custom non-auth headers from client are NOT forwarded to upstream", async () => { + // We'll verify by checking that the upstream doesn't receive custom headers. + // Create a raw upstream that echoes back received headers. + let receivedHeaders: http.IncomingHttpHeaders = {}; + const echoServer = http.createServer((req, res) => { + receivedHeaders = req.headers; + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + choices: [{ message: { role: "assistant", content: "echo" }, index: 0 }], + model: "gpt-4", + }), + ); + }); + await new Promise((resolve) => echoServer.listen(0, "127.0.0.1", resolve)); + const echoAddr = echoServer.address() as { port: number }; + const echoUrl = `http://127.0.0.1:${echoAddr.port}`; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { openai: echoUrl }, fixturePath: tmpDir }, + }); + + await post( + `${recorder.url}/v1/chat/completions`, + { + model: "gpt-4", + messages: [{ role: "user", content: "header test" }], + }, + { + Authorization: "Bearer sk-test", + "X-Custom-Header": "should-not-forward", + "X-Request-Id": "req-123", + }, + ); + + // Authorization is forwarded, custom headers are not + expect(receivedHeaders["authorization"]).toBe("Bearer sk-test"); + expect(receivedHeaders["x-custom-header"]).toBeUndefined(); + expect(receivedHeaders["x-request-id"]).toBeUndefined(); + + await new Promise((resolve) => echoServer.close(() => resolve())); + }); +}); + +// --------------------------------------------------------------------------- +// Upstream connection failure → 502 +// --------------------------------------------------------------------------- + +describe("recorder upstream connection failure", () => { + it("returns 502 when upstream is unreachable", async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { + providers: { openai: "http://127.0.0.1:1" }, + fixturePath: tmpDir, + }, + }); + + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "unreachable upstream" }], + }); + + expect(resp.status).toBe(502); + const body = JSON.parse(resp.body); + expect(body.error.type).toBe("proxy_error"); + }); +}); + +// --------------------------------------------------------------------------- +// Filesystem write failure — response still relayed +// --------------------------------------------------------------------------- + +describe("recorder filesystem write failure", () => { + it("relays response to client even when fixture write fails", async () => { + upstream = await createServer( + [{ match: { userMessage: "fs fail" }, response: { content: "still works" } }], + { port: 0 }, + ); + + // Use a path that cannot be a directory (a regular file) + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + const blockedPath = path.join(tmpDir, "blocked"); + fs.writeFileSync(blockedPath, "i am a file not a directory"); + + recorder = await createServer([], { + port: 0, + logLevel: "silent", + record: { + providers: { openai: upstream.url }, + fixturePath: blockedPath, + }, + }); + + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "fs fail" }], + }); + + // Response still relayed to client + expect(resp.status).toBe(200); + const body = JSON.parse(resp.body); + expect(body.choices[0].message.content).toBe("still works"); + }); +}); + +// --------------------------------------------------------------------------- +// buildFixtureResponse for non-OpenAI formats +// --------------------------------------------------------------------------- + +describe("recorder buildFixtureResponse non-OpenAI formats", () => { + let servers: http.Server[] = []; + + afterEach(async () => { + for (const s of servers) { + await new Promise((resolve) => s.close(() => resolve())); + } + servers = []; + }); + + function createRawUpstream(responseBody: object): Promise<{ url: string; server: http.Server }> { + return new Promise((resolve) => { + const srv = http.createServer((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify(responseBody)); + }); + srv.listen(0, "127.0.0.1", () => { + const addr = srv.address() as { port: number }; + servers.push(srv); + resolve({ url: `http://127.0.0.1:${addr.port}`, server: srv }); + }); + }); + } + + it("records Anthropic format (content array with type/text)", async () => { + const { url: upstreamUrl } = await createRawUpstream({ + id: "msg_123", + type: "message", + role: "assistant", + content: [{ type: "text", text: "Bonjour from Anthropic" }], + stop_reason: "end_turn", + }); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { anthropic: upstreamUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/messages`, { + model: "claude-3-sonnet", + max_tokens: 100, + messages: [{ role: "user", content: "hello anthropic" }], + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as { fixtures: Array<{ response: { content?: string } }> }; + expect(fixtureContent.fixtures[0].response.content).toBe("Bonjour from Anthropic"); + }); + + it("records Gemini format (candidates array)", async () => { + const { url: upstreamUrl } = await createRawUpstream({ + candidates: [ + { + content: { role: "model", parts: [{ text: "Hello from Gemini" }] }, + finishReason: "STOP", + }, + ], + }); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { gemini: upstreamUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, { + contents: [{ parts: [{ text: "hello gemini" }], role: "user" }], + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as { fixtures: Array<{ response: { content?: string } }> }; + expect(fixtureContent.fixtures[0].response.content).toBe("Hello from Gemini"); + }); + + it("records Ollama format (message object)", async () => { + const { url: upstreamUrl } = await createRawUpstream({ + model: "llama3", + message: { role: "assistant", content: "Hello from Ollama" }, + done: true, + }); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { ollama: upstreamUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "hello ollama" }], + stream: false, + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as { fixtures: Array<{ response: { content?: string } }> }; + expect(fixtureContent.fixtures[0].response.content).toBe("Hello from Ollama"); + }); +}); + +// --------------------------------------------------------------------------- +// Content + toolCalls coexistence +// --------------------------------------------------------------------------- + +describe("recorder content + toolCalls coexistence", () => { + it("saves toolCalls when both content and tool_calls are in OpenAI response", async () => { + // Create raw upstream returning both content and tool_calls + const rawServer = http.createServer((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + id: "chatcmpl-coexist", + choices: [ + { + index: 0, + message: { + role: "assistant", + content: "I'll look that up for you.", + tool_calls: [ + { + id: "call_coex", + type: "function", + function: { name: "search", arguments: '{"q":"test"}' }, + }, + ], + }, + }, + ], + model: "gpt-4", + }), + ); + }); + await new Promise((resolve) => rawServer.listen(0, "127.0.0.1", resolve)); + const rawAddr = rawServer.address() as { port: number }; + const rawUrl = `http://127.0.0.1:${rawAddr.port}`; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { openai: rawUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "coexist test" }], + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as { + fixtures: Array<{ + response: { content?: string; toolCalls?: Array<{ name: string; arguments: string }> }; + }>; + }; + // toolCalls should win + expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined(); + expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1); + expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("search"); + + await new Promise((resolve) => rawServer.close(() => resolve())); + }); +}); + +// --------------------------------------------------------------------------- +// Non-OpenAI streaming through recorder +// --------------------------------------------------------------------------- + +describe("recorder non-OpenAI streaming", () => { + it("collapses Anthropic SSE streaming to fixture content", async () => { + // Create a raw upstream that returns Anthropic SSE format + const rawServer = http.createServer((_req, res) => { + res.writeHead(200, { "Content-Type": "text/event-stream" }); + res.write( + `event: message_start\ndata: ${JSON.stringify({ type: "message_start", message: { id: "msg_s", role: "assistant" } })}\n\n`, + ); + res.write( + `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Streamed " } })}\n\n`, + ); + res.write( + `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Anthropic" } })}\n\n`, + ); + res.write(`event: message_stop\ndata: ${JSON.stringify({ type: "message_stop" })}\n\n`); + res.end(); + }); + await new Promise((resolve) => rawServer.listen(0, "127.0.0.1", resolve)); + const rawAddr = rawServer.address() as { port: number }; + const rawUrl = `http://127.0.0.1:${rawAddr.port}`; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { anthropic: rawUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/messages`, { + model: "claude-3-sonnet", + max_tokens: 100, + messages: [{ role: "user", content: "stream anthropic test" }], + stream: true, + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as { fixtures: Array<{ response: { content?: string } }> }; + expect(fixtureContent.fixtures[0].response.content).toBe("Streamed Anthropic"); + + await new Promise((resolve) => rawServer.close(() => resolve())); + }); +}); + +// --------------------------------------------------------------------------- +// Integration tests — streaming through recorder: Gemini SSE + Ollama NDJSON +// --------------------------------------------------------------------------- + +describe("recorder streaming collapse: Gemini SSE", () => { + it("collapses Gemini SSE streaming response to non-streaming fixture", async () => { + // Create upstream with gemini provider + upstream = await createServer( + [ + { + match: { userMessage: "hello gemini" }, + response: { content: "Gemini says hello back." }, + }, + ], + { port: 0 }, + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + + recorder = await createServer([], { + port: 0, + record: { providers: { gemini: upstream.url }, fixturePath: tmpDir }, + }); + + // Send streaming Gemini request + const resp = await post( + `${recorder.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, + { + contents: [{ parts: [{ text: "hello gemini" }], role: "user" }], + }, + ); + + expect(resp.status).toBe(200); + + // Check saved fixture + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + expect(fixtureContent.fixtures).toHaveLength(1); + const savedResponse = fixtureContent.fixtures[0].response as { content: string }; + expect(savedResponse.content).toBe("Gemini says hello back."); + }); +}); + +describe("recorder streaming collapse: Cohere SSE", () => { + it("collapses Cohere SSE streaming response to non-streaming fixture", async () => { + upstream = await createServer( + [ + { + match: { userMessage: "hello cohere" }, + response: { content: "Cohere says hello." }, + }, + ], + { port: 0 }, + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + + recorder = await createServer([], { + port: 0, + record: { providers: { cohere: upstream.url }, fixturePath: tmpDir }, + }); + + // Send streaming Cohere request + const resp = await post(`${recorder.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "hello cohere" }], + stream: true, + }); + + expect(resp.status).toBe(200); + + // Check saved fixture + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + expect(fixtureContent.fixtures).toHaveLength(1); + const savedResponse = fixtureContent.fixtures[0].response as { content: string }; + expect(savedResponse.content).toBe("Cohere says hello."); + }); +}); + +describe("recorder streaming collapse: Ollama NDJSON", () => { + it("collapses Ollama NDJSON streaming response to non-streaming fixture", async () => { + upstream = await createServer( + [ + { + match: { userMessage: "hello ollama" }, + response: { content: "Ollama says hi." }, + }, + ], + { port: 0 }, + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + + recorder = await createServer([], { + port: 0, + record: { providers: { ollama: upstream.url }, fixturePath: tmpDir }, + }); + + // Send streaming Ollama request (stream defaults to true) + const resp = await post(`${recorder.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "hello ollama" }], + }); + + expect(resp.status).toBe(200); + + // Check saved fixture + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + expect(fixtureContent.fixtures).toHaveLength(1); + const savedResponse = fixtureContent.fixtures[0].response as { content: string }; + expect(savedResponse.content).toBe("Ollama says hi."); + }); +}); + +// --------------------------------------------------------------------------- +// buildFixtureResponse format detection +// --------------------------------------------------------------------------- + +describe("buildFixtureResponse format detection", () => { + let servers: http.Server[] = []; + + afterEach(async () => { + for (const s of servers) { + await new Promise((resolve) => s.close(() => resolve())); + } + servers = []; + }); + + function createRawUpstreamWithStatus( + responseBody: object | string, + status: number = 200, + contentType: string = "application/json", + ): Promise<{ url: string; server: http.Server }> { + return new Promise((resolve) => { + const srv = http.createServer((_req, res) => { + res.writeHead(status, { "Content-Type": contentType }); + res.end(typeof responseBody === "string" ? responseBody : JSON.stringify(responseBody)); + }); + srv.listen(0, "127.0.0.1", () => { + const addr = srv.address() as { port: number }; + servers.push(srv); + resolve({ url: `http://127.0.0.1:${addr.port}`, server: srv }); + }); + }); + } + + it("detects Anthropic tool_use format and saves toolCalls", async () => { + const { url: upstreamUrl } = await createRawUpstreamWithStatus({ + content: [ + { + type: "tool_use", + id: "toolu_123", + name: "get_weather", + input: { city: "SF" }, + }, + ], + role: "assistant", + stop_reason: "tool_use", + }); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { anthropic: upstreamUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/messages`, { + model: "claude-3-sonnet", + max_tokens: 100, + messages: [{ role: "user", content: "tool use format test" }], + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as { + fixtures: Array<{ + response: { + content?: string; + toolCalls?: Array<{ name: string; arguments: string }>; + }; + }>; + }; + // Should be toolCalls, NOT content + expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined(); + expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1); + expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("get_weather"); + expect(JSON.parse(fixtureContent.fixtures[0].response.toolCalls![0].arguments)).toEqual({ + city: "SF", + }); + expect(fixtureContent.fixtures[0].response.content).toBeUndefined(); + }); + + it("detects Gemini functionCall format and saves toolCalls", async () => { + const { url: upstreamUrl } = await createRawUpstreamWithStatus({ + candidates: [ + { + content: { + parts: [ + { + functionCall: { + name: "get_weather", + args: { city: "SF" }, + }, + }, + ], + }, + }, + ], + }); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { gemini: upstreamUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, { + contents: [{ parts: [{ text: "gemini tool call test" }], role: "user" }], + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as { + fixtures: Array<{ + response: { + content?: string; + toolCalls?: Array<{ name: string; arguments: string }>; + }; + }>; + }; + expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined(); + expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1); + expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("get_weather"); + expect(JSON.parse(fixtureContent.fixtures[0].response.toolCalls![0].arguments)).toEqual({ + city: "SF", + }); + expect(fixtureContent.fixtures[0].response.content).toBeUndefined(); + }); + + it("unknown format falls back to error response", async () => { + const { url: upstreamUrl } = await createRawUpstreamWithStatus({ + custom: "data", + status: "ok", + }); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { openai: upstreamUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "unknown format test" }], + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as { + fixtures: Array<{ + response: { + error?: { message: string; type: string }; + }; + }>; + }; + expect(fixtureContent.fixtures[0].response.error).toBeDefined(); + expect(fixtureContent.fixtures[0].response.error!.message).toContain( + "Could not detect response format", + ); + expect(fixtureContent.fixtures[0].response.error!.type).toBe("proxy_error"); + }); + + it("detects direct embedding format (top-level embedding array)", async () => { + const { url: upstreamUrl } = await createRawUpstreamWithStatus({ + embedding: [0.1, 0.2, 0.3], + }); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { openai: upstreamUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/embeddings`, { + model: "text-embedding-3-small", + input: "direct embedding test", + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as { + fixtures: Array<{ + response: { embedding?: number[] }; + }>; + }; + expect(fixtureContent.fixtures[0].response.embedding).toEqual([0.1, 0.2, 0.3]); + }); + + it("preserves error code field from upstream error response", async () => { + const { url: upstreamUrl } = await createRawUpstreamWithStatus( + { + error: { + message: "Rate limited", + type: "rate_limit_error", + code: "rate_limit", + }, + }, + 429, + ); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { openai: upstreamUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "rate limit test" }], + }); + + expect(resp.status).toBe(429); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as { + fixtures: Array<{ + response: { + error?: { message: string; type: string; code?: string }; + status?: number; + }; + }>; + }; + expect(fixtureContent.fixtures[0].response.error).toBeDefined(); + expect(fixtureContent.fixtures[0].response.error!.message).toBe("Rate limited"); + expect(fixtureContent.fixtures[0].response.error!.type).toBe("rate_limit_error"); + expect(fixtureContent.fixtures[0].response.error!.code).toBe("rate_limit"); + expect(fixtureContent.fixtures[0].response.status).toBe(429); + }); +}); + +// --------------------------------------------------------------------------- +// Bedrock EventStream binary through recorder +// --------------------------------------------------------------------------- + +describe("recorder Bedrock EventStream binary", () => { + it("collapses Bedrock binary EventStream to text fixture", async () => { + // Create a raw upstream returning application/vnd.amazon.eventstream binary + const rawServer = http.createServer((_req, res) => { + res.writeHead(200, { "Content-Type": "application/vnd.amazon.eventstream" }); + + // Write binary EventStream frames using encodeEventStreamMessage + const frame1 = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { + delta: { text: "Hello " }, + contentBlockIndex: 0, + }, + contentBlockIndex: 0, + }); + const frame2 = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { + delta: { text: "from Bedrock" }, + contentBlockIndex: 0, + }, + contentBlockIndex: 0, + }); + const frame3 = encodeEventStreamMessage("messageStop", { + messageStop: { stopReason: "end_turn" }, + }); + + res.write(frame1); + res.write(frame2); + res.write(frame3); + res.end(); + }); + await new Promise((resolve) => rawServer.listen(0, "127.0.0.1", resolve)); + const rawAddr = rawServer.address() as { port: number }; + const rawUrl = `http://127.0.0.1:${rawAddr.port}`; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { bedrock: rawUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/model/claude-v3/invoke-with-response-stream`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 100, + messages: [{ role: "user", content: "bedrock binary test" }], + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as { fixtures: Array<{ response: { content?: string } }> }; + expect(fixtureContent.fixtures[0].response.content).toBe("Hello from Bedrock"); + + await new Promise((resolve) => rawServer.close(() => resolve())); + }); +}); + +// --------------------------------------------------------------------------- +// Streaming edge cases — droppedChunks and content+toolCalls coexistence +// --------------------------------------------------------------------------- + +describe("recorder streaming edge cases", () => { + let servers: http.Server[] = []; + + afterEach(async () => { + for (const s of servers) { + await new Promise((resolve) => s.close(() => resolve())); + } + servers = []; + }); + + it("streaming with malformed chunks: fixture still saved with surviving content", async () => { + // Create a raw upstream that returns SSE with malformed chunks mixed in + const rawServer = http.createServer((_req, res) => { + res.writeHead(200, { "Content-Type": "text/event-stream" }); + res.write( + `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "Hello" } }] })}\n\n`, + ); + res.write(`data: {MALFORMED JSON!!!\n\n`); + res.write( + `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: " World" } }] })}\n\n`, + ); + res.write(`data: [DONE]\n\n`); + res.end(); + }); + servers.push(rawServer); + await new Promise((resolve) => rawServer.listen(0, "127.0.0.1", resolve)); + const rawAddr = rawServer.address() as { port: number }; + const rawUrl = `http://127.0.0.1:${rawAddr.port}`; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { openai: rawUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "droppedchunks test" }], + stream: true, + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + const savedResponse = fixtureContent.fixtures[0].response as { content: string }; + // Surviving content from non-malformed chunks + expect(savedResponse.content).toBe("Hello World"); + }); + + it("streaming with content + toolCalls: fixture saves toolCalls (not content)", async () => { + // Create a raw upstream that returns SSE with both text and tool call deltas + const rawServer = http.createServer((_req, res) => { + res.writeHead(200, { "Content-Type": "text/event-stream" }); + res.write( + `data: ${JSON.stringify({ + id: "c1", + choices: [{ delta: { content: "Calling tool..." } }], + })}\n\n`, + ); + res.write( + `data: ${JSON.stringify({ + id: "c1", + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "call_abc", + type: "function", + function: { name: "get_weather", arguments: '{"city":"SF"}' }, + }, + ], + }, + }, + ], + })}\n\n`, + ); + res.write(`data: [DONE]\n\n`); + res.end(); + }); + servers.push(rawServer); + await new Promise((resolve) => rawServer.listen(0, "127.0.0.1", resolve)); + const rawAddr = rawServer.address() as { port: number }; + const rawUrl = `http://127.0.0.1:${rawAddr.port}`; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { openai: rawUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "content+tools test" }], + stream: true, + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as FixtureFile; + const savedResponse = fixtureContent.fixtures[0].response as { + toolCalls?: Array<{ name: string; arguments: string }>; + content?: string; + }; + // When toolCalls exist, they win over content + expect(savedResponse.toolCalls).toBeDefined(); + expect(savedResponse.toolCalls).toHaveLength(1); + expect(savedResponse.toolCalls![0].name).toBe("get_weather"); + }); +}); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function createMockReqRes(): { req: http.IncomingMessage; res: http.ServerResponse } { + // Create minimal mock objects — only needed for type compatibility, + // proxyAndRecord returns false before touching them in these test cases + const req = Object.create(http.IncomingMessage.prototype) as http.IncomingMessage; + req.headers = {}; + const res = Object.create(http.ServerResponse.prototype) as http.ServerResponse; + return { req, res }; +} + +async function setupUpstreamAndRecorder( + upstreamFixtures: Fixture[], + providerKey: string = "openai", +): Promise<{ upstreamUrl: string; recorderUrl: string; fixturePath: string }> { + // Create upstream "real API" server + upstream = await createServer(upstreamFixtures, { port: 0 }); + + // Create temp directory for recorded fixtures + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + + // Create recording llmock (no fixtures — everything proxies) + const providers: Record = {}; + providers[providerKey] = upstream.url; + + recorder = await createServer([], { + port: 0, + record: { providers, fixturePath: tmpDir }, + }); + + return { + upstreamUrl: upstream.url, + recorderUrl: recorder.url, + fixturePath: tmpDir, + }; +} diff --git a/src/__tests__/server.test.ts b/src/__tests__/server.test.ts index 4993444..3a61f4d 100644 --- a/src/__tests__/server.test.ts +++ b/src/__tests__/server.test.ts @@ -789,7 +789,7 @@ describe("journal", () => { ); const entry = instance.journal.getLast(); - expect(entry!.headers["authorization"]).toBe("Bearer sk-test"); + expect(entry!.headers["authorization"]).toBe("[REDACTED]"); }); }); @@ -1016,7 +1016,7 @@ describe("header forwarding in journal", () => { const entry = instance.journal.getLast(); expect(entry).not.toBeNull(); - expect(entry!.headers["authorization"]).toBe("Bearer test-key"); + expect(entry!.headers["authorization"]).toBe("[REDACTED]"); expect(entry!.headers["x-custom-header"]).toBe("custom-value"); expect(entry!.headers["content-type"]).toBe("application/json"); }); @@ -1055,7 +1055,7 @@ describe("header forwarding in journal", () => { const entries = JSON.parse(res.body); expect(entries).toHaveLength(1); - expect(entries[0].headers["authorization"]).toBe("Bearer api-key-123"); + expect(entries[0].headers["authorization"]).toBe("[REDACTED]"); expect(entries[0].headers["x-request-id"]).toBe("req-abc-def"); expect(entries[0].headers["content-type"]).toBe("application/json"); expect(entries[0].headers["host"]).toBeDefined(); @@ -1075,8 +1075,8 @@ describe("header forwarding in journal", () => { const entries = instance.journal.getAll(); expect(entries).toHaveLength(2); - expect(entries[0].headers["authorization"]).toBe("Bearer key-one"); - expect(entries[1].headers["authorization"]).toBe("Bearer key-two"); + expect(entries[0].headers["authorization"]).toBe("[REDACTED]"); + expect(entries[1].headers["authorization"]).toBe("[REDACTED]"); }); }); diff --git a/src/__tests__/stream-collapse.test.ts b/src/__tests__/stream-collapse.test.ts new file mode 100644 index 0000000..eb35fb3 --- /dev/null +++ b/src/__tests__/stream-collapse.test.ts @@ -0,0 +1,1429 @@ +import { describe, it, expect } from "vitest"; +import { + collapseOpenAISSE, + collapseAnthropicSSE, + collapseGeminiSSE, + collapseOllamaNDJSON, + collapseCohereSS, + collapseBedrockEventStream, + collapseStreamingResponse, +} from "../stream-collapse.js"; +import { encodeEventStreamMessage, encodeEventStreamFrame } from "../aws-event-stream.js"; + +// --------------------------------------------------------------------------- +// 1. OpenAI SSE +// --------------------------------------------------------------------------- + +describe("collapseOpenAISSE", () => { + it("collapses text content from SSE chunks", () => { + const body = [ + `data: ${JSON.stringify({ id: "chatcmpl-123", choices: [{ delta: { role: "assistant" } }] })}`, + "", + `data: ${JSON.stringify({ id: "chatcmpl-123", choices: [{ delta: { content: "Hello" } }] })}`, + "", + `data: ${JSON.stringify({ id: "chatcmpl-123", choices: [{ delta: { content: " world" } }] })}`, + "", + `data: ${JSON.stringify({ id: "chatcmpl-123", choices: [{ delta: { content: "!" } }] })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.content).toBe("Hello world!"); + expect(result.toolCalls).toBeUndefined(); + }); + + it("collapses tool calls with merged arguments", () => { + const body = [ + `data: ${JSON.stringify({ + id: "chatcmpl-456", + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "call_abc", + type: "function", + function: { name: "get_weather", arguments: '{"ci' }, + }, + ], + }, + }, + ], + })}`, + "", + `data: ${JSON.stringify({ + id: "chatcmpl-456", + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + function: { arguments: 'ty":"Pa' }, + }, + ], + }, + }, + ], + })}`, + "", + `data: ${JSON.stringify({ + id: "chatcmpl-456", + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + function: { arguments: 'ris"}' }, + }, + ], + }, + }, + ], + })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.toolCalls![0].arguments).toBe('{"city":"Paris"}'); + expect(result.toolCalls![0].id).toBe("call_abc"); + expect(result.content).toBeUndefined(); + }); + + it("handles multiple tool calls", () => { + const body = [ + `data: ${JSON.stringify({ + id: "chatcmpl-789", + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "call_1", + type: "function", + function: { name: "func_a", arguments: '{"x":1}' }, + }, + { + index: 1, + id: "call_2", + type: "function", + function: { name: "func_b", arguments: '{"y":2}' }, + }, + ], + }, + }, + ], + })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.toolCalls).toHaveLength(2); + expect(result.toolCalls![0].name).toBe("func_a"); + expect(result.toolCalls![1].name).toBe("func_b"); + }); + + it("returns empty content for empty stream", () => { + const body = "data: [DONE]\n\n"; + const result = collapseOpenAISSE(body); + expect(result.content).toBe(""); + }); + + it("counts droppedChunks for malformed JSON mixed with valid chunks", () => { + const body = [ + `data: ${JSON.stringify({ id: "chatcmpl-d1", choices: [{ delta: { content: "A" } }] })}`, + "", + `data: {INVALID JSON!!!`, + "", + `data: ${JSON.stringify({ id: "chatcmpl-d1", choices: [{ delta: { content: "B" } }] })}`, + "", + `data: also broken`, + "", + `data: ${JSON.stringify({ id: "chatcmpl-d1", choices: [{ delta: { content: "C" } }] })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.content).toBe("ABC"); + expect(result.droppedChunks).toBe(2); + }); + + it("choices with no delta property are skipped (continue)", () => { + const body = [ + `data: ${JSON.stringify({ id: "chatcmpl-nd", choices: [{ finish_reason: "stop" }] })}`, + "", + `data: ${JSON.stringify({ id: "chatcmpl-nd", choices: [{ delta: { content: "OK" } }] })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.content).toBe("OK"); + }); + + it("captures both text deltas and tool call deltas in same stream", () => { + const body = [ + `data: ${JSON.stringify({ + id: "chatcmpl-mix", + choices: [{ delta: { content: "Calling tool..." } }], + })}`, + "", + `data: ${JSON.stringify({ + id: "chatcmpl-mix", + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "call_mix", + type: "function", + function: { name: "lookup", arguments: '{"q":"test"}' }, + }, + ], + }, + }, + ], + })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + // When tool calls exist, they win over content + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("lookup"); + expect(result.toolCalls![0].arguments).toBe('{"q":"test"}'); + }); +}); + +// --------------------------------------------------------------------------- +// 2. Anthropic SSE +// --------------------------------------------------------------------------- + +describe("collapseAnthropicSSE", () => { + it("collapses text content from SSE chunks", () => { + const body = [ + `event: message_start`, + `data: ${JSON.stringify({ type: "message_start", message: { id: "msg_123", role: "assistant" } })}`, + "", + `event: content_block_start`, + `data: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })}`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hello" } })}`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: " world" } })}`, + "", + `event: content_block_stop`, + `data: ${JSON.stringify({ type: "content_block_stop", index: 0 })}`, + "", + `event: message_stop`, + `data: ${JSON.stringify({ type: "message_stop" })}`, + "", + ].join("\n"); + + const result = collapseAnthropicSSE(body); + expect(result.content).toBe("Hello world"); + expect(result.toolCalls).toBeUndefined(); + }); + + it("collapses tool use with input_json_delta", () => { + const body = [ + `event: message_start`, + `data: ${JSON.stringify({ type: "message_start", message: { id: "msg_456" } })}`, + "", + `event: content_block_start`, + `data: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "tool_use", id: "toolu_abc", name: "get_weather", input: {} } })}`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"ci' } })}`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: 'ty":"Paris"}' } })}`, + "", + `event: content_block_stop`, + `data: ${JSON.stringify({ type: "content_block_stop", index: 0 })}`, + "", + `event: message_stop`, + `data: ${JSON.stringify({ type: "message_stop" })}`, + "", + ].join("\n"); + + const result = collapseAnthropicSSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.toolCalls![0].arguments).toBe('{"city":"Paris"}'); + expect(result.toolCalls![0].id).toBe("toolu_abc"); + expect(result.content).toBeUndefined(); + }); + it("counts droppedChunks for malformed JSON mixed with valid chunks", () => { + const body = [ + `event: content_block_delta`, + `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hi" } })}`, + "", + `event: content_block_delta`, + `data: {BROKEN JSON`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: " there" } })}`, + "", + ].join("\n"); + + const result = collapseAnthropicSSE(body); + expect(result.content).toBe("Hi there"); + expect(result.droppedChunks).toBe(1); + }); +}); + +// --------------------------------------------------------------------------- +// 3. Gemini SSE +// --------------------------------------------------------------------------- + +describe("collapseGeminiSSE", () => { + it("collapses text content from data-only SSE", () => { + const body = [ + `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "Hello" }] } }] })}`, + "", + `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: " world" }] } }] })}`, + "", + ].join("\n"); + + const result = collapseGeminiSSE(body); + expect(result.content).toBe("Hello world"); + }); + + it("handles empty candidates gracefully", () => { + const body = `data: ${JSON.stringify({ candidates: [] })}\n\n`; + const result = collapseGeminiSSE(body); + expect(result.content).toBe(""); + }); + + it("collapses functionCall parts into toolCalls", () => { + const body = [ + `data: ${JSON.stringify({ + candidates: [ + { + content: { + role: "model", + parts: [ + { + functionCall: { + name: "get_weather", + args: { city: "Paris" }, + }, + }, + ], + }, + finishReason: "FUNCTION_CALL", + }, + ], + })}`, + "", + ].join("\n"); + + const result = collapseGeminiSSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(JSON.parse(result.toolCalls![0].arguments)).toEqual({ city: "Paris" }); + expect(result.content).toBeUndefined(); + }); + it("counts droppedChunks for malformed JSON mixed with valid chunks", () => { + const body = [ + `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "X" }] } }] })}`, + "", + `data: NOT VALID JSON AT ALL`, + "", + `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "Y" }] } }] })}`, + "", + ].join("\n"); + + const result = collapseGeminiSSE(body); + expect(result.content).toBe("XY"); + expect(result.droppedChunks).toBe(1); + }); + + it("includes droppedChunks in functionCall return path (bug fix)", () => { + const body = [ + `data: NOT VALID JSON`, + "", + `data: ${JSON.stringify({ + candidates: [ + { + content: { + role: "model", + parts: [ + { + functionCall: { + name: "get_weather", + args: { city: "Paris" }, + }, + }, + ], + }, + finishReason: "FUNCTION_CALL", + }, + ], + })}`, + "", + ].join("\n"); + + const result = collapseGeminiSSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.droppedChunks).toBe(1); + }); + + it("candidate with no content property is skipped (continue)", () => { + const body = [ + `data: ${JSON.stringify({ candidates: [{ finishReason: "SAFETY" }] })}`, + "", + `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "OK" }] } }] })}`, + "", + ].join("\n"); + + const result = collapseGeminiSSE(body); + expect(result.content).toBe("OK"); + }); +}); + +// --------------------------------------------------------------------------- +// 4. Ollama NDJSON +// --------------------------------------------------------------------------- + +describe("collapseOllamaNDJSON", () => { + it("collapses /api/chat format (message.content)", () => { + const body = [ + JSON.stringify({ + model: "llama3", + message: { role: "assistant", content: "Hello" }, + done: false, + }), + JSON.stringify({ + model: "llama3", + message: { role: "assistant", content: " world" }, + done: false, + }), + JSON.stringify({ model: "llama3", message: { role: "assistant", content: "" }, done: true }), + ].join("\n"); + + const result = collapseOllamaNDJSON(body); + expect(result.content).toBe("Hello world"); + }); + + it("collapses /api/generate format (response field)", () => { + const body = [ + JSON.stringify({ model: "llama3", response: "Hello", done: false }), + JSON.stringify({ model: "llama3", response: " world", done: false }), + JSON.stringify({ model: "llama3", response: "", done: true }), + ].join("\n"); + + const result = collapseOllamaNDJSON(body); + expect(result.content).toBe("Hello world"); + }); +}); + +// --------------------------------------------------------------------------- +// 5. Cohere SSE +// --------------------------------------------------------------------------- + +describe("collapseCohereSS", () => { + it("collapses text content from content-delta events", () => { + const body = [ + `event: message-start`, + `data: ${JSON.stringify({ type: "message-start", delta: { message: { role: "assistant" } } })}`, + "", + `event: content-delta`, + `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: "Hello" } } } })}`, + "", + `event: content-delta`, + `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: " world" } } } })}`, + "", + `event: message-end`, + `data: ${JSON.stringify({ type: "message-end", delta: { finish_reason: "COMPLETE" } })}`, + "", + ].join("\n"); + + const result = collapseCohereSS(body); + expect(result.content).toBe("Hello world"); + expect(result.toolCalls).toBeUndefined(); + }); + + it("collapses tool calls from tool-call events", () => { + const body = [ + `event: message-start`, + `data: ${JSON.stringify({ type: "message-start", delta: { message: { role: "assistant" } } })}`, + "", + `event: tool-call-start`, + `data: ${JSON.stringify({ + type: "tool-call-start", + index: 0, + delta: { + message: { + tool_calls: { + id: "call_xyz", + type: "function", + function: { name: "get_weather", arguments: "" }, + }, + }, + }, + })}`, + "", + `event: tool-call-delta`, + `data: ${JSON.stringify({ + type: "tool-call-delta", + index: 0, + delta: { message: { tool_calls: { function: { arguments: '{"city"' } } } }, + })}`, + "", + `event: tool-call-delta`, + `data: ${JSON.stringify({ + type: "tool-call-delta", + index: 0, + delta: { message: { tool_calls: { function: { arguments: ':"Paris"}' } } } }, + })}`, + "", + `event: message-end`, + `data: ${JSON.stringify({ type: "message-end", delta: { finish_reason: "TOOL_CALL" } })}`, + "", + ].join("\n"); + + const result = collapseCohereSS(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.toolCalls![0].arguments).toBe('{"city":"Paris"}'); + expect(result.toolCalls![0].id).toBe("call_xyz"); + expect(result.content).toBeUndefined(); + }); +}); + +// --------------------------------------------------------------------------- +// 6. Bedrock EventStream (binary) +// --------------------------------------------------------------------------- + +describe("collapseBedrockEventStream", () => { + it("collapses text content from binary event frames", () => { + const frame1 = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { + delta: { text: "Hello" }, + }, + }); + const frame2 = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { + delta: { text: " world" }, + }, + }); + + const buf = Buffer.concat([frame1, frame2]); + const result = collapseBedrockEventStream(buf); + expect(result.content).toBe("Hello world"); + }); + + it("handles empty buffer", () => { + const result = collapseBedrockEventStream(Buffer.alloc(0)); + expect(result.content).toBe(""); + }); + + it("collapses tool call from contentBlockStart + contentBlockDelta with toolUse", () => { + const startFrame = encodeEventStreamMessage("contentBlockStart", { + contentBlockIndex: 0, + contentBlockStart: { + contentBlockIndex: 0, + start: { + toolUse: { + toolUseId: "tool_123", + name: "get_weather", + }, + }, + }, + }); + const deltaFrame1 = encodeEventStreamMessage("contentBlockDelta", { + contentBlockIndex: 0, + contentBlockDelta: { + contentBlockIndex: 0, + delta: { + toolUse: { input: '{"ci' }, + }, + }, + }); + const deltaFrame2 = encodeEventStreamMessage("contentBlockDelta", { + contentBlockIndex: 0, + contentBlockDelta: { + contentBlockIndex: 0, + delta: { + toolUse: { input: 'ty":"Paris"}' }, + }, + }, + }); + + const buf = Buffer.concat([startFrame, deltaFrame1, deltaFrame2]); + const result = collapseBedrockEventStream(buf); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.toolCalls![0].arguments).toBe('{"city":"Paris"}'); + expect(result.toolCalls![0].id).toBe("tool_123"); + }); + + it("stops parsing gracefully on corrupted prelude CRC", () => { + const goodFrame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { + delta: { text: "Good" }, + }, + }); + const badFrame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { + delta: { text: "Bad" }, + }, + }); + // Corrupt the prelude CRC (bytes 8-11) of the bad frame + const badFrameBuf = Buffer.from(badFrame); + badFrameBuf.writeUInt32BE(0xdeadbeef, 8); + + const buf = Buffer.concat([goodFrame, badFrameBuf]); + const result = collapseBedrockEventStream(buf); + // Should parse the good frame but stop at the corrupted one + expect(result.content).toBe("Good"); + }); +}); + +// --------------------------------------------------------------------------- +// collapseStreamingResponse dispatch +// --------------------------------------------------------------------------- + +describe("collapseStreamingResponse", () => { + it("returns null for application/json (not streaming)", () => { + const result = collapseStreamingResponse("application/json", "openai", '{"choices":[]}'); + expect(result).toBeNull(); + }); + + it("dispatches text/event-stream to OpenAI for openai provider", () => { + const body = `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "hi" } }] })}\n\ndata: [DONE]\n\n`; + const result = collapseStreamingResponse("text/event-stream", "openai", body); + expect(result).not.toBeNull(); + expect(result!.content).toBe("hi"); + }); + + it("dispatches text/event-stream to Anthropic for anthropic provider", () => { + const body = [ + `event: content_block_delta`, + `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "hi" } })}`, + "", + ].join("\n"); + const result = collapseStreamingResponse("text/event-stream", "anthropic", body); + expect(result).not.toBeNull(); + expect(result!.content).toBe("hi"); + }); + + it("dispatches text/event-stream to Gemini for gemini provider", () => { + const body = `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "hi" }] } }] })}\n\n`; + const result = collapseStreamingResponse("text/event-stream", "gemini", body); + expect(result).not.toBeNull(); + expect(result!.content).toBe("hi"); + }); + + it("dispatches application/x-ndjson to Ollama", () => { + const body = JSON.stringify({ + model: "m", + message: { role: "assistant", content: "hi" }, + done: true, + }); + const result = collapseStreamingResponse("application/x-ndjson", "ollama", body); + expect(result).not.toBeNull(); + expect(result!.content).toBe("hi"); + }); + + it("dispatches text/event-stream to Cohere for cohere provider", () => { + const body = [ + `event: content-delta`, + `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: "hi" } } } })}`, + "", + ].join("\n"); + const result = collapseStreamingResponse("text/event-stream", "cohere", body); + expect(result).not.toBeNull(); + expect(result!.content).toBe("hi"); + }); + + it("dispatches application/vnd.amazon.eventstream to Bedrock", () => { + const frame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { delta: { text: "hi" } }, + }); + const result = collapseStreamingResponse( + "application/vnd.amazon.eventstream", + "bedrock", + frame, + ); + expect(result).not.toBeNull(); + expect(result!.content).toBe("hi"); + }); + + it('dispatches text/event-stream with "azure" to OpenAI collapse', () => { + const body = `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "azure-hi" } }] })}\n\ndata: [DONE]\n\n`; + const result = collapseStreamingResponse("text/event-stream", "azure", body); + expect(result).not.toBeNull(); + expect(result!.content).toBe("azure-hi"); + }); + + it('dispatches text/event-stream with "vertexai" to Gemini collapse', () => { + const body = `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "vertex-hi" }] } }] })}\n\n`; + const result = collapseStreamingResponse("text/event-stream", "vertexai", body); + expect(result).not.toBeNull(); + expect(result!.content).toBe("vertex-hi"); + }); + + it('dispatches text/event-stream with "unknown-provider" to OpenAI collapse (fallback)', () => { + const body = `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "fallback-hi" } }] })}\n\ndata: [DONE]\n\n`; + const result = collapseStreamingResponse("text/event-stream", "unknown-provider", body); + expect(result).not.toBeNull(); + expect(result!.content).toBe("fallback-hi"); + }); + + it("Bedrock: string body through collapseStreamingResponse (not Buffer)", () => { + // Build a valid frame and convert to binary string + const frame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { delta: { text: "str-body" } }, + }); + const binaryStr = frame.toString("binary"); + const result = collapseStreamingResponse( + "application/vnd.amazon.eventstream", + "bedrock", + binaryStr, + ); + expect(result).not.toBeNull(); + expect(result!.content).toBe("str-body"); + }); + + it("collapseStreamingResponse with Buffer input for non-Bedrock SSE provider", () => { + const sseStr = `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "buf-hi" } }] })}\n\ndata: [DONE]\n\n`; + const buf = Buffer.from(sseStr, "utf8"); + const result = collapseStreamingResponse("text/event-stream", "openai", buf); + expect(result).not.toBeNull(); + expect(result!.content).toBe("buf-hi"); + }); +}); + +// --------------------------------------------------------------------------- +// droppedChunks: Ollama, Cohere, Bedrock +// --------------------------------------------------------------------------- + +describe("collapseOllamaNDJSON droppedChunks", () => { + it("counts droppedChunks for malformed JSON lines mixed with valid ones", () => { + const body = [ + JSON.stringify({ + model: "llama3", + message: { role: "assistant", content: "A" }, + done: false, + }), + "NOT VALID JSON", + JSON.stringify({ + model: "llama3", + message: { role: "assistant", content: "B" }, + done: false, + }), + "{also broken", + JSON.stringify({ model: "llama3", message: { role: "assistant", content: "" }, done: true }), + ].join("\n"); + + const result = collapseOllamaNDJSON(body); + expect(result.content).toBe("AB"); + expect(result.droppedChunks).toBe(2); + }); +}); + +describe("collapseCohereSS droppedChunks", () => { + it("counts droppedChunks for malformed JSON events mixed with valid ones", () => { + const body = [ + `event: content-delta`, + `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: "X" } } } })}`, + "", + `event: content-delta`, + `data: {BROKEN`, + "", + `event: content-delta`, + `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: "Y" } } } })}`, + "", + ].join("\n"); + + const result = collapseCohereSS(body); + expect(result.content).toBe("XY"); + expect(result.droppedChunks).toBe(1); + }); +}); + +describe("collapseBedrockEventStream droppedChunks", () => { + it("counts droppedChunks for valid frame with malformed JSON payload", () => { + const goodFrame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { delta: { text: "Good" } }, + }); + + // Build a frame with non-JSON payload + const badPayload = Buffer.from("NOT JSON AT ALL", "utf8"); + const badFrame = encodeEventStreamFrame( + { + ":content-type": "application/json", + ":event-type": "contentBlockDelta", + ":message-type": "event", + }, + badPayload, + ); + + const goodFrame2 = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { delta: { text: " data" } }, + }); + + const buf = Buffer.concat([goodFrame, badFrame, goodFrame2]); + const result = collapseBedrockEventStream(buf); + expect(result.content).toBe("Good data"); + expect(result.droppedChunks).toBe(1); + }); +}); + +// --------------------------------------------------------------------------- +// Message CRC validation +// --------------------------------------------------------------------------- + +describe("collapseBedrockEventStream message CRC validation", () => { + it("stops parsing on corrupted message CRC", () => { + const goodFrame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { delta: { text: "Good" } }, + }); + const badFrame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { delta: { text: "Bad" } }, + }); + // Corrupt the message CRC (last 4 bytes) of the bad frame + const badFrameBuf = Buffer.from(badFrame); + badFrameBuf.writeUInt32BE(0xdeadbeef, badFrameBuf.length - 4); + + const buf = Buffer.concat([goodFrame, badFrameBuf]); + const result = collapseBedrockEventStream(buf); + // Should parse the good frame but stop at the corrupted one + expect(result.content).toBe("Good"); + }); +}); + +// --------------------------------------------------------------------------- +// Multiple tool calls: Anthropic, Cohere, Bedrock +// --------------------------------------------------------------------------- + +describe("collapseAnthropicSSE multiple tool calls", () => { + it("collapses 2 tool_use blocks at different content_block indices", () => { + const body = [ + `event: message_start`, + `data: ${JSON.stringify({ type: "message_start", message: { id: "msg_multi" } })}`, + "", + `event: content_block_start`, + `data: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "tool_use", id: "toolu_1", name: "get_weather", input: {} } })}`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"city":"NYC"}' } })}`, + "", + `event: content_block_stop`, + `data: ${JSON.stringify({ type: "content_block_stop", index: 0 })}`, + "", + `event: content_block_start`, + `data: ${JSON.stringify({ type: "content_block_start", index: 1, content_block: { type: "tool_use", id: "toolu_2", name: "get_time", input: {} } })}`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ type: "content_block_delta", index: 1, delta: { type: "input_json_delta", partial_json: '{"tz":"EST"}' } })}`, + "", + `event: content_block_stop`, + `data: ${JSON.stringify({ type: "content_block_stop", index: 1 })}`, + "", + `event: message_stop`, + `data: ${JSON.stringify({ type: "message_stop" })}`, + "", + ].join("\n"); + + const result = collapseAnthropicSSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(2); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.toolCalls![0].arguments).toBe('{"city":"NYC"}'); + expect(result.toolCalls![0].id).toBe("toolu_1"); + expect(result.toolCalls![1].name).toBe("get_time"); + expect(result.toolCalls![1].arguments).toBe('{"tz":"EST"}'); + expect(result.toolCalls![1].id).toBe("toolu_2"); + }); +}); + +describe("collapseCohereSS multiple tool calls", () => { + it("collapses 2 tool-call-start events at different indices", () => { + const body = [ + `event: message-start`, + `data: ${JSON.stringify({ type: "message-start", delta: { message: { role: "assistant" } } })}`, + "", + `event: tool-call-start`, + `data: ${JSON.stringify({ + type: "tool-call-start", + index: 0, + delta: { + message: { + tool_calls: { + id: "call_1", + type: "function", + function: { name: "get_weather", arguments: "" }, + }, + }, + }, + })}`, + "", + `event: tool-call-delta`, + `data: ${JSON.stringify({ + type: "tool-call-delta", + index: 0, + delta: { message: { tool_calls: { function: { arguments: '{"city":"NYC"}' } } } }, + })}`, + "", + `event: tool-call-start`, + `data: ${JSON.stringify({ + type: "tool-call-start", + index: 1, + delta: { + message: { + tool_calls: { + id: "call_2", + type: "function", + function: { name: "get_time", arguments: "" }, + }, + }, + }, + })}`, + "", + `event: tool-call-delta`, + `data: ${JSON.stringify({ + type: "tool-call-delta", + index: 1, + delta: { message: { tool_calls: { function: { arguments: '{"tz":"EST"}' } } } }, + })}`, + "", + `event: message-end`, + `data: ${JSON.stringify({ type: "message-end", delta: { finish_reason: "TOOL_CALL" } })}`, + "", + ].join("\n"); + + const result = collapseCohereSS(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(2); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.toolCalls![0].arguments).toBe('{"city":"NYC"}'); + expect(result.toolCalls![0].id).toBe("call_1"); + expect(result.toolCalls![1].name).toBe("get_time"); + expect(result.toolCalls![1].arguments).toBe('{"tz":"EST"}'); + expect(result.toolCalls![1].id).toBe("call_2"); + }); +}); + +describe("collapseBedrockEventStream multiple tool calls", () => { + it("collapses 2 contentBlockStart+contentBlockDelta pairs at different indices", () => { + const startFrame0 = encodeEventStreamMessage("contentBlockStart", { + contentBlockIndex: 0, + contentBlockStart: { + contentBlockIndex: 0, + start: { toolUse: { toolUseId: "tool_1", name: "get_weather" } }, + }, + }); + const deltaFrame0 = encodeEventStreamMessage("contentBlockDelta", { + contentBlockIndex: 0, + contentBlockDelta: { + contentBlockIndex: 0, + delta: { toolUse: { input: '{"city":"NYC"}' } }, + }, + }); + const startFrame1 = encodeEventStreamMessage("contentBlockStart", { + contentBlockIndex: 1, + contentBlockStart: { + contentBlockIndex: 1, + start: { toolUse: { toolUseId: "tool_2", name: "get_time" } }, + }, + }); + const deltaFrame1 = encodeEventStreamMessage("contentBlockDelta", { + contentBlockIndex: 1, + contentBlockDelta: { + contentBlockIndex: 1, + delta: { toolUse: { input: '{"tz":"EST"}' } }, + }, + }); + + const buf = Buffer.concat([startFrame0, deltaFrame0, startFrame1, deltaFrame1]); + const result = collapseBedrockEventStream(buf); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(2); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.toolCalls![0].arguments).toBe('{"city":"NYC"}'); + expect(result.toolCalls![0].id).toBe("tool_1"); + expect(result.toolCalls![1].name).toBe("get_time"); + expect(result.toolCalls![1].arguments).toBe('{"tz":"EST"}'); + expect(result.toolCalls![1].id).toBe("tool_2"); + }); +}); + +// --------------------------------------------------------------------------- +// Empty input: Ollama, Anthropic, Cohere +// --------------------------------------------------------------------------- + +// --------------------------------------------------------------------------- +// Defensive branch coverage — OpenAI +// --------------------------------------------------------------------------- + +describe("collapseOpenAISSE defensive branches", () => { + it("SSE block with no data: line is skipped", () => { + const body = ["event: something", "", "data: [DONE]", ""].join("\n"); + const result = collapseOpenAISSE(body); + expect(result.content).toBe(""); + }); + + it("empty choices array is skipped", () => { + const body = [ + `data: ${JSON.stringify({ id: "c1", choices: [] })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + const result = collapseOpenAISSE(body); + expect(result.content).toBe(""); + }); + + it("tool call delta with no id — result toolCall has no id field", () => { + const body = [ + `data: ${JSON.stringify({ + id: "c1", + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + type: "function", + function: { name: "fn", arguments: '{"x":1}' }, + }, + ], + }, + }, + ], + })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("fn"); + expect(result.toolCalls![0]).not.toHaveProperty("id"); + }); + + it("droppedChunks returned alongside toolCalls", () => { + const body = [ + `data: {BROKEN JSON`, + "", + `data: ${JSON.stringify({ + id: "c1", + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + id: "call_1", + type: "function", + function: { name: "fn", arguments: '{"x":1}' }, + }, + ], + }, + }, + ], + })}`, + "", + "data: [DONE]", + "", + ].join("\n"); + + const result = collapseOpenAISSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.droppedChunks).toBe(1); + }); +}); + +// --------------------------------------------------------------------------- +// Defensive branch coverage — Anthropic +// --------------------------------------------------------------------------- + +describe("collapseAnthropicSSE defensive branches", () => { + it("SSE block with no data: line is skipped", () => { + const body = ["event: content_block_delta", ""].join("\n"); + const result = collapseAnthropicSSE(body); + expect(result.content).toBe(""); + }); + + it("tool_use content_block_start with no id — result has no id field", () => { + const body = [ + `event: content_block_start`, + `data: ${JSON.stringify({ + type: "content_block_start", + index: 0, + content_block: { type: "tool_use", name: "fn", input: {} }, + })}`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ + type: "content_block_delta", + index: 0, + delta: { type: "input_json_delta", partial_json: '{"x":1}' }, + })}`, + "", + ].join("\n"); + + const result = collapseAnthropicSSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("fn"); + expect(result.toolCalls![0]).not.toHaveProperty("id"); + }); + + it("orphaned input_json_delta for unknown index — no crash, data ignored", () => { + const body = [ + `event: content_block_delta`, + `data: ${JSON.stringify({ + type: "content_block_delta", + index: 5, + delta: { type: "input_json_delta", partial_json: '{"orphan":true}' }, + })}`, + "", + ].join("\n"); + + const result = collapseAnthropicSSE(body); + // No tool calls created, no crash + expect(result.content).toBe(""); + expect(result.toolCalls).toBeUndefined(); + }); + + it("droppedChunks returned alongside toolCalls", () => { + const body = [ + `event: content_block_start`, + `data: {BROKEN`, + "", + `event: content_block_start`, + `data: ${JSON.stringify({ + type: "content_block_start", + index: 0, + content_block: { type: "tool_use", id: "toolu_1", name: "fn", input: {} }, + })}`, + "", + `event: content_block_delta`, + `data: ${JSON.stringify({ + type: "content_block_delta", + index: 0, + delta: { type: "input_json_delta", partial_json: '{"x":1}' }, + })}`, + "", + ].join("\n"); + + const result = collapseAnthropicSSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.droppedChunks).toBe(1); + }); +}); + +// --------------------------------------------------------------------------- +// Defensive branch coverage — Gemini +// --------------------------------------------------------------------------- + +describe("collapseGeminiSSE defensive branches", () => { + it("empty parts array is skipped", () => { + const body = [`data: ${JSON.stringify({ candidates: [{ content: { parts: [] } }] })}`, ""].join( + "\n", + ); + + const result = collapseGeminiSSE(body); + expect(result.content).toBe(""); + }); + + it("functionCall args as string — preserved as string", () => { + const body = [ + `data: ${JSON.stringify({ + candidates: [ + { + content: { + role: "model", + parts: [{ functionCall: { name: "fn", args: "already-a-string" } }], + }, + finishReason: "FUNCTION_CALL", + }, + ], + })}`, + "", + ].join("\n"); + + const result = collapseGeminiSSE(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].arguments).toBe("already-a-string"); + }); +}); + +// --------------------------------------------------------------------------- +// Defensive branch coverage — Cohere +// --------------------------------------------------------------------------- + +describe("collapseCohereSS defensive branches", () => { + it("SSE block with no data: line is skipped", () => { + const body = ["event: content-delta", ""].join("\n"); + const result = collapseCohereSS(body); + expect(result.content).toBe(""); + }); + + it("tool-call-start with no id — result has no id field", () => { + const body = [ + `event: tool-call-start`, + `data: ${JSON.stringify({ + type: "tool-call-start", + index: 0, + delta: { + message: { + tool_calls: { + type: "function", + function: { name: "fn", arguments: "" }, + }, + }, + }, + })}`, + "", + `event: tool-call-delta`, + `data: ${JSON.stringify({ + type: "tool-call-delta", + index: 0, + delta: { message: { tool_calls: { function: { arguments: '{"x":1}' } } } }, + })}`, + "", + ].join("\n"); + + const result = collapseCohereSS(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("fn"); + expect(result.toolCalls![0]).not.toHaveProperty("id"); + }); + + it("orphaned tool-call-delta for unknown index — no crash", () => { + const body = [ + `event: tool-call-delta`, + `data: ${JSON.stringify({ + type: "tool-call-delta", + index: 5, + delta: { message: { tool_calls: { function: { arguments: '{"orphan":true}' } } } }, + })}`, + "", + ].join("\n"); + + const result = collapseCohereSS(body); + expect(result.content).toBe(""); + expect(result.toolCalls).toBeUndefined(); + }); + + it("droppedChunks returned alongside toolCalls", () => { + const body = [ + `event: tool-call-start`, + `data: {BROKEN`, + "", + `event: tool-call-start`, + `data: ${JSON.stringify({ + type: "tool-call-start", + index: 0, + delta: { + message: { + tool_calls: { + id: "call_1", + type: "function", + function: { name: "fn", arguments: "" }, + }, + }, + }, + })}`, + "", + `event: tool-call-delta`, + `data: ${JSON.stringify({ + type: "tool-call-delta", + index: 0, + delta: { message: { tool_calls: { function: { arguments: '{"x":1}' } } } }, + })}`, + "", + ].join("\n"); + + const result = collapseCohereSS(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.droppedChunks).toBe(1); + }); +}); + +// --------------------------------------------------------------------------- +// Defensive branch coverage — Bedrock +// --------------------------------------------------------------------------- + +describe("collapseBedrockEventStream defensive branches", () => { + it("contentBlockStart without toolUse — no tool entry created", () => { + const startFrame = encodeEventStreamMessage("contentBlockStart", { + contentBlockIndex: 0, + contentBlockStart: { + contentBlockIndex: 0, + start: {}, + }, + }); + const deltaFrame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { delta: { text: "Hello" } }, + }); + + const buf = Buffer.concat([startFrame, deltaFrame]); + const result = collapseBedrockEventStream(buf); + expect(result.content).toBe("Hello"); + expect(result.toolCalls).toBeUndefined(); + }); + + it("contentBlockDelta without delta — skipped", () => { + const frame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockIndex: 0, + contentBlockDelta: { + contentBlockIndex: 0, + }, + }); + + const buf = Buffer.from(frame); + const result = collapseBedrockEventStream(buf); + expect(result.content).toBe(""); + }); + + it("tool call with no toolUseId — result has no id field", () => { + const startFrame = encodeEventStreamMessage("contentBlockStart", { + contentBlockIndex: 0, + contentBlockStart: { + contentBlockIndex: 0, + start: { + toolUse: { name: "fn" }, + }, + }, + }); + const deltaFrame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockIndex: 0, + contentBlockDelta: { + contentBlockIndex: 0, + delta: { toolUse: { input: '{"x":1}' } }, + }, + }); + + const buf = Buffer.concat([startFrame, deltaFrame]); + const result = collapseBedrockEventStream(buf); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("fn"); + expect(result.toolCalls![0]).not.toHaveProperty("id"); + }); + + it("orphaned toolUse delta for unknown index — no crash", () => { + const deltaFrame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockIndex: 5, + contentBlockDelta: { + contentBlockIndex: 5, + delta: { toolUse: { input: '{"orphan":true}' } }, + }, + }); + + const buf = Buffer.from(deltaFrame); + const result = collapseBedrockEventStream(buf); + // No tool entry for index 5, so delta is silently ignored + expect(result.content).toBe(""); + expect(result.toolCalls).toBeUndefined(); + }); + + it("droppedChunks returned alongside toolCalls", () => { + const startFrame = encodeEventStreamMessage("contentBlockStart", { + contentBlockIndex: 0, + contentBlockStart: { + contentBlockIndex: 0, + start: { toolUse: { toolUseId: "tool_1", name: "fn" } }, + }, + }); + const deltaFrame = encodeEventStreamMessage("contentBlockDelta", { + contentBlockIndex: 0, + contentBlockDelta: { + contentBlockIndex: 0, + delta: { toolUse: { input: '{"x":1}' } }, + }, + }); + + // Build a frame with non-JSON payload for droppedChunks + const badPayload = Buffer.from("NOT JSON", "utf8"); + const badFrame = encodeEventStreamFrame( + { + ":content-type": "application/json", + ":event-type": "contentBlockDelta", + ":message-type": "event", + }, + badPayload, + ); + + const buf = Buffer.concat([badFrame, startFrame, deltaFrame]); + const result = collapseBedrockEventStream(buf); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.droppedChunks).toBe(1); + }); +}); + +// --------------------------------------------------------------------------- +// Defensive branch coverage — Ollama +// --------------------------------------------------------------------------- + +describe("collapseOllamaNDJSON defensive branches", () => { + it("line with neither message.content nor response — no content added", () => { + const body = [JSON.stringify({ model: "x", done: true })].join("\n"); + + const result = collapseOllamaNDJSON(body); + expect(result.content).toBe(""); + }); +}); + +// --------------------------------------------------------------------------- +// Original empty input tests +// --------------------------------------------------------------------------- + +describe("empty input collapse", () => { + it('collapseOllamaNDJSON("") returns { content: "" }', () => { + const result = collapseOllamaNDJSON(""); + expect(result.content).toBe(""); + }); + + it('collapseAnthropicSSE("") returns { content: "" }', () => { + const result = collapseAnthropicSSE(""); + expect(result.content).toBe(""); + }); + + it('collapseCohereSS("") returns { content: "" }', () => { + const result = collapseCohereSS(""); + expect(result.content).toBe(""); + }); +}); diff --git a/src/__tests__/vertex-ai.test.ts b/src/__tests__/vertex-ai.test.ts new file mode 100644 index 0000000..fc033ac --- /dev/null +++ b/src/__tests__/vertex-ai.test.ts @@ -0,0 +1,524 @@ +import { describe, it, expect, afterEach } from "vitest"; +import * as http from "node:http"; +import type { Fixture } from "../types.js"; +import { createServer, type ServerInstance } from "../server.js"; + +// --- helpers --- + +function post( + url: string, + body: unknown, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); +} + +function parseGeminiSSEChunks(body: string): unknown[] { + const chunks: unknown[] = []; + for (const line of body.split("\n")) { + if (line.startsWith("data: ")) { + chunks.push(JSON.parse(line.slice(6))); + } + } + return chunks; +} + +// --- fixtures --- + +const textFixture: Fixture = { + match: { userMessage: "hello" }, + response: { content: "Hi there!" }, +}; + +const toolFixture: Fixture = { + match: { userMessage: "weather" }, + response: { + toolCalls: [ + { + name: "get_weather", + arguments: '{"city":"NYC"}', + }, + ], + }, +}; + +// --- tests --- + +let instance: ServerInstance | null = null; + +afterEach(async () => { + if (instance) { + await new Promise((resolve) => { + instance!.server.close(() => resolve()); + }); + instance = null; + } +}); + +const VERTEX_BASE = "/v1/projects/my-project/locations/us-central1/publishers/google/models"; + +function vertexUrl(base: string, model: string, action: string): string { + return `${base}${VERTEX_BASE}/${model}:${action}`; +} + +const geminiBody = (text: string) => ({ + contents: [{ role: "user", parts: [{ text }] }], +}); + +// ─── Non-streaming (generateContent) ──────────────────────────────────────── + +describe("Vertex AI: generateContent (non-streaming)", () => { + it("routes to Gemini handler and returns correct text response", async () => { + instance = await createServer([textFixture]); + const res = await post( + vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"), + geminiBody("hello"), + ); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("application/json"); + + const body = JSON.parse(res.body); + expect(body.candidates).toHaveLength(1); + expect(body.candidates[0].content.role).toBe("model"); + expect(body.candidates[0].content.parts[0].text).toBe("Hi there!"); + expect(body.candidates[0].finishReason).toBe("STOP"); + expect(body.usageMetadata).toBeDefined(); + }); + + it("extracts model name from URL path and records it in journal", async () => { + instance = await createServer([textFixture]); + await post(vertexUrl(instance.url, "gemini-1.5-pro", "generateContent"), geminiBody("hello")); + + const entry = instance.journal.getLast(); + expect(entry).not.toBeNull(); + expect(entry!.body.model).toBe("gemini-1.5-pro"); + }); + + it("returns tool call response with functionCall parts", async () => { + instance = await createServer([toolFixture]); + const res = await post( + vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"), + geminiBody("weather"), + ); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.candidates[0].content.parts[0].functionCall).toBeDefined(); + expect(body.candidates[0].content.parts[0].functionCall.name).toBe("get_weather"); + expect(body.candidates[0].content.parts[0].functionCall.args).toEqual({ city: "NYC" }); + expect(body.candidates[0].finishReason).toBe("FUNCTION_CALL"); + }); +}); + +// ─── Streaming (streamGenerateContent) ────────────────────────────────────── + +describe("Vertex AI: streamGenerateContent (streaming)", () => { + it("streams text response as SSE", async () => { + instance = await createServer([textFixture]); + const res = await post( + vertexUrl(instance.url, "gemini-2.0-flash", "streamGenerateContent"), + geminiBody("hello"), + ); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("text/event-stream"); + + const chunks = parseGeminiSSEChunks(res.body) as { + candidates: { + content: { role: string; parts: { text?: string }[] }; + finishReason?: string; + }[]; + usageMetadata?: unknown; + }[]; + + expect(chunks.length).toBeGreaterThan(0); + + // Reconstruct content from text parts + const fullText = chunks.map((c) => c.candidates[0].content.parts[0].text ?? "").join(""); + expect(fullText).toBe("Hi there!"); + + // Last chunk has finishReason + const lastChunk = chunks[chunks.length - 1]; + expect(lastChunk.candidates[0].finishReason).toBe("STOP"); + expect(lastChunk.usageMetadata).toBeDefined(); + }); + + it("streams tool calls as SSE", async () => { + instance = await createServer([toolFixture]); + const res = await post( + vertexUrl(instance.url, "gemini-2.0-flash", "streamGenerateContent"), + geminiBody("weather"), + ); + + expect(res.status).toBe(200); + const chunks = parseGeminiSSEChunks(res.body) as { + candidates: { + content: { + parts: { functionCall?: { name: string; args: unknown } }[]; + }; + finishReason?: string; + }[]; + }[]; + + expect(chunks).toHaveLength(1); + expect(chunks[0].candidates[0].content.parts[0].functionCall!.name).toBe("get_weather"); + expect(chunks[0].candidates[0].finishReason).toBe("FUNCTION_CALL"); + }); +}); + +// ─── Response format parity with consumer Gemini ──────────────────────────── + +describe("Vertex AI: response format matches consumer Gemini", () => { + it("non-streaming responses are identical", async () => { + instance = await createServer([textFixture]); + + const vertexRes = await post( + vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"), + geminiBody("hello"), + ); + const geminiRes = await post( + `${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, + geminiBody("hello"), + ); + + const vertexBody = JSON.parse(vertexRes.body); + const geminiBody_ = JSON.parse(geminiRes.body); + + // Structure should be identical (candidates, usageMetadata) + expect(vertexBody.candidates[0].content).toEqual(geminiBody_.candidates[0].content); + expect(vertexBody.candidates[0].finishReason).toEqual(geminiBody_.candidates[0].finishReason); + expect(Object.keys(vertexBody)).toEqual(Object.keys(geminiBody_)); + }); + + it("streaming responses are identical", async () => { + instance = await createServer([textFixture]); + + const vertexRes = await post( + vertexUrl(instance.url, "gemini-2.0-flash", "streamGenerateContent"), + geminiBody("hello"), + ); + const geminiRes = await post( + `${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, + geminiBody("hello"), + ); + + const vertexChunks = parseGeminiSSEChunks(vertexRes.body); + const geminiChunks = parseGeminiSSEChunks(geminiRes.body); + + expect(vertexChunks.length).toBe(geminiChunks.length); + // Each chunk should have the same structure + for (let i = 0; i < vertexChunks.length; i++) { + expect(vertexChunks[i]).toEqual(geminiChunks[i]); + } + }); +}); + +// ─── Tool call parity with consumer Gemini ────────────────────────────────── + +describe("Vertex AI: tool call parity with consumer Gemini", () => { + it("non-streaming tool call responses have same structure", async () => { + instance = await createServer([toolFixture]); + + const vertexRes = await post( + vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"), + geminiBody("weather"), + ); + const geminiRes = await post( + `${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`, + geminiBody("weather"), + ); + + const vertexBody = JSON.parse(vertexRes.body); + const geminiBody_ = JSON.parse(geminiRes.body); + + // Both should have FUNCTION_CALL finish reason + expect(vertexBody.candidates[0].finishReason).toBe("FUNCTION_CALL"); + expect(geminiBody_.candidates[0].finishReason).toBe("FUNCTION_CALL"); + + // Same role + expect(vertexBody.candidates[0].content.role).toBe(geminiBody_.candidates[0].content.role); + + // Same function name and args (IDs differ since they're randomly generated) + const vertexFc = vertexBody.candidates[0].content.parts[0].functionCall; + const geminiFc = geminiBody_.candidates[0].content.parts[0].functionCall; + expect(vertexFc.name).toBe(geminiFc.name); + expect(vertexFc.args).toEqual(geminiFc.args); + + // Same top-level keys + expect(Object.keys(vertexBody)).toEqual(Object.keys(geminiBody_)); + }); + + it("streaming tool call responses have same structure", async () => { + instance = await createServer([toolFixture]); + + const vertexRes = await post( + vertexUrl(instance.url, "gemini-2.0-flash", "streamGenerateContent"), + geminiBody("weather"), + ); + const geminiRes = await post( + `${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`, + geminiBody("weather"), + ); + + const vertexChunks = parseGeminiSSEChunks(vertexRes.body) as Array>; + const geminiChunks = parseGeminiSSEChunks(geminiRes.body) as Array>; + + expect(vertexChunks.length).toBe(geminiChunks.length); + + // Compare structure: same finishReason, same function name/args + for (let i = 0; i < vertexChunks.length; i++) { + const vc = vertexChunks[i].candidates as Array>; + const gc = geminiChunks[i].candidates as Array>; + expect(vc[0].finishReason).toBe(gc[0].finishReason); + const vContent = vc[0].content as Record; + const gContent = gc[0].content as Record; + expect(vContent.role).toBe(gContent.role); + const vParts = vContent.parts as Array>; + const gParts = gContent.parts as Array>; + // Same function name and args + const vFc = vParts[0].functionCall as Record; + const gFc = gParts[0].functionCall as Record; + expect(vFc.name).toBe(gFc.name); + expect(vFc.args).toEqual(gFc.args); + } + }); +}); + +// ─── Query parameter resilience ───────────────────────────────────────────── + +describe("Vertex AI: query parameter resilience", () => { + it("?alt=sse does not break routing", async () => { + instance = await createServer([textFixture]); + const urlPath = `${VERTEX_BASE}/gemini-2.0-flash:streamGenerateContent`; + + const res = await new Promise<{ status: number; body: string }>((resolve, reject) => { + const data = JSON.stringify(geminiBody("hello")); + const parsed = new URL(instance!.url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: `${urlPath}?alt=sse`, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); + + expect(res.status).toBe(200); + const chunks = parseGeminiSSEChunks(res.body); + const fullText = chunks + .map( + (c) => + ((c as Record).candidates as Array>)?.[0] && + ( + ( + ( + (c as Record).candidates as Array> + )?.[0] as Record + )?.content as Record + )?.parts, + ) + .filter(Boolean) + .map((parts) => ((parts as Array>)[0]?.text as string) ?? "") + .join(""); + expect(fullText).toBe("Hi there!"); + }); +}); + +// ─── Various project/location combinations ────────────────────────────────── + +describe("Vertex AI: various project/location combinations", () => { + const combos = [ + { project: "my-project", location: "us-central1" }, + { project: "prod-123", location: "europe-west4" }, + { project: "test_project_456", location: "asia-east1" }, + { project: "my-org-project", location: "us-east1" }, + ]; + + for (const { project, location } of combos) { + it(`routes ${project}/${location} correctly`, async () => { + instance = await createServer([textFixture]); + const path = `/v1/projects/${project}/locations/${location}/publishers/google/models/gemini-2.0-flash:generateContent`; + const res = await post(`${instance.url}${path}`, geminiBody("hello")); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.candidates[0].content.parts[0].text).toBe("Hi there!"); + + // Clean up for next iteration + await new Promise((resolve) => { + instance!.server.close(() => resolve()); + }); + instance = null; + }); + } +}); + +// ─── Malformed URL / Wrong method / Strict mode ───────────────────────────── + +describe("Vertex AI: malformed URL", () => { + it("22a. returns 404 for unknown action in URL", async () => { + instance = await createServer([textFixture]); + const res = await post( + `${instance.url}/v1/projects/p/locations/l/publishers/google/models/m:unknownAction`, + geminiBody("hello"), + ); + + expect(res.status).toBe(404); + }); +}); + +describe("Vertex AI: wrong HTTP method", () => { + it("22b. returns 404 for GET to a valid Vertex AI path", async () => { + instance = await createServer([textFixture]); + const res = await new Promise<{ status: number; body: string }>((resolve, reject) => { + const parsed = new URL(vertexUrl(instance!.url, "gemini-2.0-flash", "generateContent")); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "GET", + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.end(); + }); + + expect(res.status).toBe(404); + }); +}); + +describe("Vertex AI: malformed JSON body", () => { + it("returns 400 for non-JSON body", async () => { + instance = await createServer([textFixture]); + const parsed = new URL(vertexUrl(instance.url, "gemini-2.0-flash", "generateContent")); + const res = await new Promise<{ status: number; body: string }>((resolve, reject) => { + const raw = "not json"; + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(raw), + }, + }, + (r) => { + const chunks: Buffer[] = []; + r.on("data", (c: Buffer) => chunks.push(c)); + r.on("end", () => { + resolve({ + status: r.statusCode ?? 0, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(raw); + req.end(); + }); + + expect(res.status).toBe(400); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Malformed JSON"); + }); +}); + +describe("Vertex AI: strict mode", () => { + it("22c. returns 503 in strict mode with no fixtures", async () => { + instance = await createServer([], { strict: true }); + const res = await post( + vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"), + geminiBody("hello"), + ); + + expect(res.status).toBe(503); + const body = JSON.parse(res.body); + expect(body.error.message).toContain("no fixture matched"); + }); +}); + +// ─── Chaos ────────────────────────────────────────────────────────────────── + +describe("Vertex AI: chaos applies", () => { + it("drops request when dropRate is 1.0", async () => { + instance = await createServer([textFixture], { chaos: { dropRate: 1.0 } }); + const res = await post( + vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"), + geminiBody("hello"), + ); + + expect(res.status).toBe(500); + const body = JSON.parse(res.body); + expect(body.error.code).toBe("chaos_drop"); + }); + + it("records chaos action in journal", async () => { + instance = await createServer([textFixture], { chaos: { dropRate: 1.0 } }); + await post(vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"), geminiBody("hello")); + + const entries = instance.journal.getAll(); + expect(entries).toHaveLength(1); + expect(entries[0].response.chaosAction).toBe("drop"); + }); +}); From 1feef7863d40d9d711f0824c1703dd6c8fe71e18 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 20 Mar 2026 15:56:39 -0700 Subject: [PATCH 04/13] docs: update skill and README for v1.6.0 features --- README.md | 42 ++++++--- skills/write-fixtures/SKILL.md | 151 +++++++++++++++++++++++++++++---- 2 files changed, 162 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index f310c12..b14985a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # @copilotkit/llmock [![Unit Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml) [![Drift Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml) [![npm version](https://img.shields.io/npm/v/@copilotkit/llmock)](https://www.npmjs.com/package/@copilotkit/llmock) -Deterministic mock LLM server for testing. A real HTTP server on a real port — not an in-process interceptor — so every process in your stack (Playwright, Next.js, agent workers, microservices) can point at it via `OPENAI_BASE_URL` / `ANTHROPIC_BASE_URL` and get reproducible, instant responses. Streams SSE in real OpenAI, Claude, Gemini, Bedrock, and Azure API formats, driven entirely by fixtures. Zero runtime dependencies. +Deterministic mock LLM server for testing. A real HTTP server on a real port — not an in-process interceptor — so every process in your stack (Playwright, Next.js, agent workers, microservices) can point at it via `OPENAI_BASE_URL` / `ANTHROPIC_BASE_URL` and get reproducible, instant responses. Streams SSE in real OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, and Cohere API formats, driven entirely by fixtures. Zero runtime dependencies. ## Quick Start @@ -72,17 +72,20 @@ MSW can't intercept any of those calls. llmock can — it's a real server on a r ## Features -- **[Multi-provider support](https://llmock.copilotkit.dev/compatible-providers.html)** — [OpenAI Chat Completions](https://llmock.copilotkit.dev/chat-completions.html), [OpenAI Responses](https://llmock.copilotkit.dev/responses-api.html), [Anthropic Claude](https://llmock.copilotkit.dev/claude-messages.html), [Google Gemini](https://llmock.copilotkit.dev/gemini.html), [AWS Bedrock](https://llmock.copilotkit.dev/aws-bedrock.html), [Azure OpenAI](https://llmock.copilotkit.dev/azure-openai.html) +- **[Multi-provider support](https://llmock.copilotkit.dev/compatible-providers.html)** — [OpenAI Chat Completions](https://llmock.copilotkit.dev/chat-completions.html), [OpenAI Responses](https://llmock.copilotkit.dev/responses-api.html), [Anthropic Claude](https://llmock.copilotkit.dev/claude-messages.html), [Google Gemini](https://llmock.copilotkit.dev/gemini.html), [AWS Bedrock](https://llmock.copilotkit.dev/aws-bedrock.html) (streaming + Converse), [Azure OpenAI](https://llmock.copilotkit.dev/azure-openai.html), [Vertex AI](https://llmock.copilotkit.dev/vertex-ai.html), [Ollama](https://llmock.copilotkit.dev/ollama.html), [Cohere](https://llmock.copilotkit.dev/cohere.html) - **[Embeddings API](https://llmock.copilotkit.dev/embeddings.html)** — OpenAI-compatible embedding responses with configurable dimensions - **[Structured output / JSON mode](https://llmock.copilotkit.dev/structured-output.html)** — `response_format`, `json_schema`, and function calling - **[Sequential responses](https://llmock.copilotkit.dev/sequential-responses.html)** — Stateful multi-turn fixtures that return different responses on each call - **[Streaming physics](https://llmock.copilotkit.dev/streaming-physics.html)** — Configurable `ttft`, `tps`, and `jitter` for realistic timing - **[WebSocket APIs](https://llmock.copilotkit.dev/websocket.html)** — OpenAI Responses WS, Realtime API, and Gemini Live - **[Error injection](https://llmock.copilotkit.dev/error-injection.html)** — One-shot errors, rate limiting, and provider-specific error formats +- **[Chaos testing](https://llmock.copilotkit.dev/chaos.html)** — Probabilistic failure injection: 500 errors, malformed JSON, mid-stream disconnects +- **[Prometheus metrics](https://llmock.copilotkit.dev/metrics.html)** — Request counts, latencies, and fixture match rates at `/metrics` - **[Request journal](https://llmock.copilotkit.dev/docs.html)** — Record, inspect, and assert on every request - **[Fixture validation](https://llmock.copilotkit.dev/fixtures.html)** — Schema validation at load time with `--validate-on-load` - **CLI with hot-reload** — Standalone server with `--watch` for live fixture editing - **[Docker + Helm](https://llmock.copilotkit.dev/docker.html)** — Container image and Helm chart for CI/CD pipelines +- **Record-and-replay** — VCR-style proxy-on-miss records real API responses as fixtures for deterministic replay - **[Drift detection](https://llmock.copilotkit.dev/drift-detection.html)** — Daily CI runs against real APIs to catch response format changes - **Claude Code integration** — `/write-fixtures` skill teaches your AI assistant how to write fixtures correctly @@ -92,17 +95,24 @@ MSW can't intercept any of those calls. llmock can — it's a real server on a r llmock [options] ``` -| Option | Short | Default | Description | -| -------------------- | ----- | ------------ | ----------------------------------------- | -| `--port` | `-p` | `4010` | Port to listen on | -| `--host` | `-h` | `127.0.0.1` | Host to bind to | -| `--fixtures` | `-f` | `./fixtures` | Path to fixtures directory or file | -| `--latency` | `-l` | `0` | Latency between SSE chunks (ms) | -| `--chunk-size` | `-c` | `20` | Characters per SSE chunk | -| `--watch` | `-w` | | Watch fixture path for changes and reload | -| `--log-level` | | `info` | Log verbosity: `silent`, `info`, `debug` | -| `--validate-on-load` | | | Validate fixture schemas at startup | -| `--help` | | | Show help | +| Option | Short | Default | Description | +| -------------------- | ----- | ------------ | ------------------------------------------- | +| `--port` | `-p` | `4010` | Port to listen on | +| `--host` | `-h` | `127.0.0.1` | Host to bind to | +| `--fixtures` | `-f` | `./fixtures` | Path to fixtures directory or file | +| `--latency` | `-l` | `0` | Latency between SSE chunks (ms) | +| `--chunk-size` | `-c` | `20` | Characters per SSE chunk | +| `--watch` | `-w` | | Watch fixture path for changes and reload | +| `--log-level` | | `info` | Log verbosity: `silent`, `info`, `debug` | +| `--validate-on-load` | | | Validate fixture schemas at startup | +| `--chaos-drop` | | `0` | Chaos: probability of 500 errors (0-1) | +| `--chaos-malformed` | | `0` | Chaos: probability of malformed JSON (0-1) | +| `--chaos-disconnect` | | `0` | Chaos: probability of disconnect (0-1) | +| `--metrics` | | | Enable Prometheus metrics at /metrics | +| `--record` | | | Record mode: proxy unmatched to real APIs | +| `--strict` | | | Strict mode: fail on unmatched requests | +| `--provider-*` | | | Upstream URL per provider (with `--record`) | +| `--help` | | | Show help | ```bash # Start with bundled example fixtures @@ -113,6 +123,12 @@ llmock -p 8080 -f ./my-fixtures # Simulate slow responses llmock --latency 100 --chunk-size 5 + +# Record mode: proxy unmatched requests to real APIs and save as fixtures +llmock --record --provider-openai https://api.openai.com --provider-anthropic https://api.anthropic.com + +# Strict mode in CI: fail if any request doesn't match a fixture +llmock --strict -f ./fixtures ``` ## Documentation diff --git a/skills/write-fixtures/SKILL.md b/skills/write-fixtures/SKILL.md index cfaeb24..6c2e102 100644 --- a/skills/write-fixtures/SKILL.md +++ b/skills/write-fixtures/SKILL.md @@ -7,7 +7,7 @@ description: Use when writing test fixtures for @copilotkit/llmock — mock LLM ## What llmock Is -Zero-dependency mock LLM server. Fixture-driven. Multi-provider (OpenAI, Anthropic, Gemini, AWS Bedrock, Azure OpenAI). Runs a real HTTP server on a real port — works across processes, unlike MSW-style interceptors. WebSocket support for OpenAI Responses/Realtime and Gemini Live APIs. +Zero-dependency mock LLM server. Fixture-driven. Multi-provider (OpenAI, Anthropic, Gemini, AWS Bedrock, Azure OpenAI, Vertex AI, Ollama, Cohere). Runs a real HTTP server on a real port — works across processes, unlike MSW-style interceptors. WebSocket support for OpenAI Responses/Realtime and Gemini Live APIs. Chaos testing and Prometheus metrics. ## Core Mental Model @@ -73,6 +73,22 @@ The embedding vector is returned for each input in the request. If no embedding { error: { message: "Rate limited", type: "rate_limit_error" }, status: 429 } ``` +### Chaos (Failure Injection) + +The optional `chaos` field on a fixture enables probabilistic failure injection: + +```typescript +{ + chaos?: { + dropRate?: number; // Probability (0-1) of returning a 500 error + malformedRate?: number; // Probability (0-1) of returning malformed JSON + disconnectRate?: number; // Probability (0-1) of disconnecting mid-stream + } +} +``` + +Rates are evaluated per-request. When triggered, the chaos failure replaces the normal response. + ## Common Patterns ### Basic text fixture @@ -212,6 +228,25 @@ mock.onMessage( ); ``` +### Chaos testing (probabilistic failures) + +```typescript +mock.addFixture({ + match: { userMessage: "flaky" }, + response: { content: "Sometimes works!" }, + chaos: { dropRate: 0.3 }, +}); +``` + +30% of requests matching this fixture will get a 500 error instead of the response. Can also use `malformedRate` (garbled JSON) or `disconnectRate` (connection dropped mid-stream). + +Server-level chaos applies to ALL requests: + +```typescript +mock.setChaos({ dropRate: 0.1 }); // 10% of all requests fail +mock.clearChaos(); // Remove server-level chaos +``` + ### Error injection (one-shot) ```typescript @@ -248,22 +283,32 @@ Load with `mock.loadFixtureFile("./fixtures/greetings.json")` or `mock.loadFixtu All providers share the same fixture pool — write fixtures once, they work for any endpoint. -| Endpoint | Provider | Protocol | -| ------------------------------------------------ | ------------- | --------- | -| `POST /v1/chat/completions` | OpenAI | HTTP | -| `POST /v1/responses` | OpenAI | HTTP + WS | -| `POST /v1/messages` | Anthropic | HTTP | -| `POST /v1/embeddings` | OpenAI | HTTP | -| `POST /v1beta/models/{model}:{method}` | Google Gemini | HTTP | -| `POST /model/{modelId}/invoke` | AWS Bedrock | HTTP | -| `POST /openai/deployments/{id}/chat/completions` | Azure OpenAI | HTTP | -| `POST /openai/deployments/{id}/embeddings` | Azure OpenAI | HTTP | -| `GET /health` | — | HTTP | -| `GET /ready` | — | HTTP | -| `GET /v1/models` | OpenAI-compat | HTTP | -| `WS /v1/responses` | OpenAI | WebSocket | -| `WS /v1/realtime` | OpenAI | WebSocket | -| `WS /ws/google.ai...BidiGenerateContent` | Gemini Live | WebSocket | +| Endpoint | Provider | Protocol | +| ---------------------------------------------------------------------------------------- | ------------- | --------- | +| `POST /v1/chat/completions` | OpenAI | HTTP | +| `POST /v1/responses` | OpenAI | HTTP + WS | +| `POST /v1/messages` | Anthropic | HTTP | +| `POST /v1/embeddings` | OpenAI | HTTP | +| `POST /v1beta/models/{model}:{method}` | Google Gemini | HTTP | +| `POST /model/{modelId}/invoke` | AWS Bedrock | HTTP | +| `POST /openai/deployments/{id}/chat/completions` | Azure OpenAI | HTTP | +| `POST /openai/deployments/{id}/embeddings` | Azure OpenAI | HTTP | +| `GET /health` | — | HTTP | +| `GET /ready` | — | HTTP | +| `POST /model/{modelId}/invoke-with-response-stream` | AWS Bedrock | HTTP | +| `POST /model/{modelId}/converse` | AWS Bedrock | HTTP | +| `POST /model/{modelId}/converse-stream` | AWS Bedrock | HTTP | +| `POST /v1/projects/{p}/locations/{l}/publishers/google/models/{m}:generateContent` | Vertex AI | HTTP | +| `POST /v1/projects/{p}/locations/{l}/publishers/google/models/{m}:streamGenerateContent` | Vertex AI | HTTP | +| `POST /api/chat` | Ollama | HTTP | +| `POST /api/generate` | Ollama | HTTP | +| `GET /api/tags` | Ollama | HTTP | +| `POST /v2/chat` | Cohere | HTTP | +| `GET /metrics` | — | HTTP | +| `GET /v1/models` | OpenAI-compat | HTTP | +| `WS /v1/responses` | OpenAI | WebSocket | +| `WS /v1/realtime` | OpenAI | WebSocket | +| `WS /ws/google.ai...BidiGenerateContent` | Gemini Live | WebSocket | ## Critical Gotchas @@ -289,10 +334,20 @@ All providers share the same fixture pool — write fixtures once, they work for 11. **Sequential response counts are tracked per fixture** — counts reset with `reset()` or `resetMatchCounts()`. The count increments after each match of that fixture group (all fixtures sharing the same non-`sequenceIndex` match fields). -12. **Bedrock uses Anthropic Messages format internally** — the adapter normalizes Bedrock requests to `ChatCompletionRequest`, so the same fixtures work. Bedrock is non-streaming only. +12. **Bedrock uses Anthropic Messages format internally** — the adapter normalizes Bedrock requests to `ChatCompletionRequest`, so the same fixtures work. Bedrock supports both non-streaming (`/invoke`, `/converse`) and streaming (`/invoke-with-response-stream`, `/converse-stream`) endpoints. 13. **Azure OpenAI routes through the same handlers** — `/openai/deployments/{id}/chat/completions` maps to the completions handler, `/openai/deployments/{id}/embeddings` maps to the embeddings handler. Fixtures work unchanged. +14. **Ollama defaults to streaming** — opposite of OpenAI. Set `stream: false` explicitly in the request for non-streaming responses. + +15. **Ollama tool call `arguments` is an object, not a JSON string** — unlike OpenAI where `arguments` is a JSON string, Ollama sends and expects a plain object. + +16. **Bedrock streaming uses binary Event Stream format** — not SSE. The `invoke-with-response-stream` and `converse-stream` endpoints use AWS Event Stream binary encoding. + +17. **Vertex AI routes to the same handler as consumer Gemini** — the same fixtures work for both Vertex AI (`/v1/projects/.../models/{m}:generateContent`) and consumer Gemini (`/v1beta/models/{model}:generateContent`). + +18. **Cohere requires `model` field** — returns 400 if `model` is missing from the request body. + ## Debugging Fixture Mismatches When a fixture doesn't match: @@ -351,7 +406,67 @@ const mock = await LLMock.create({ port: 0 }); // creates + starts in one call | `getRequests()` | All journal entries | | `getLastRequest()` | Most recent journal entry | | `clearRequests()` | Clear journal only | +| `setChaos(opts)` | Set server-level chaos rates | +| `clearChaos()` | Remove server-level chaos | | `url` / `baseUrl` | Server URL (throws if not started) | | `port` | Server port number | Sequential responses use `on()` with `sequenceIndex` in the match — there is no dedicated convenience method. + +## Record-and-Replay (VCR Mode) + +llmock supports a VCR-style record-and-replay workflow: unmatched requests are proxied to real provider APIs, and the responses are saved as standard llmock fixture files for deterministic replay. + +### CLI usage + +```bash +# Record mode: proxy unmatched requests to real OpenAI and Anthropic APIs +llmock --record \ + --provider-openai https://api.openai.com \ + --provider-anthropic https://api.anthropic.com \ + -f ./fixtures + +# Strict mode: fail on unmatched requests (no proxying, no catch-all 404) +llmock --strict -f ./fixtures +``` + +- `--record` enables proxy-on-miss. Requires at least one `--provider-*` flag. +- `--strict` returns a 404 error for unmatched requests instead of proxying, even if `--record` is set. Use this in CI to ensure all requests hit fixtures. +- Provider flags: `--provider-openai`, `--provider-anthropic`, `--provider-gemini`, `--provider-vertexai`, `--provider-bedrock`, `--provider-azure`, `--provider-ollama`, `--provider-cohere`. + +### How it works + +1. **Existing fixtures are served first** — the router checks all loaded fixtures before considering the proxy. +2. **Misses are proxied** — if no fixture matches and recording is enabled, the request is forwarded to the real provider API. +3. **Auth headers are forwarded but NOT saved** — `Authorization`, `x-api-key`, and `api-key` headers are passed through to the upstream provider, but stripped from the recorded fixture. +4. **Responses are saved as standard fixtures** — recorded files land in `{fixturePath}/recorded/` and use the same JSON format as hand-written fixtures. Nothing special about them. +5. **Streaming responses are collapsed** — SSE streams are collapsed into a single text or tool-call response for the fixture. The original streaming format is preserved in the live proxy response. +6. **Loud logging** — every proxy hit logs at `info` level so you can see exactly which requests are being forwarded. + +### Programmatic API + +```typescript +const mock = new LLMock({ port: 0 }); +await mock.start(); + +// Enable recording at runtime +mock.enableRecording({ + providers: { + openai: "https://api.openai.com", + anthropic: "https://api.anthropic.com", + }, + fixturePath: "./fixtures/recorded", +}); + +// ... run tests that hit real APIs for uncovered cases ... + +// Disable recording (back to fixture-only mode) +mock.disableRecording(); +``` + +### Workflow + +1. **Bootstrap**: Run your test suite with `--record` and provider URLs. All requests that don't match existing fixtures are proxied and recorded. +2. **Review**: Check the recorded fixtures in `{fixturePath}/recorded/`. Edit or reorganize as needed. +3. **Lock down**: Run your test suite with `--strict` to ensure every request hits a fixture. No network calls escape. +4. **Maintain**: When APIs change, delete stale fixtures and re-record. From 711a6002b2a7b48870d9ebee36d2f71bd730fd84 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 20 Mar 2026 17:12:01 -0700 Subject: [PATCH 05/13] fix: restore CLI flags and defaults getters lost during merge Restore --record, --strict, --metrics, --provider-* CLI flags that were lost during commit regrouping. Restore getter-based defaults (get chaos(), get record(), get strict()) for live config propagation. Remove direct defaults mutation in setChaos/clearChaos/enableRecording/disableRecording since getters read from the options object directly. --- src/cli.ts | 48 +++++++++++++++++++++++++++++++++++++++++++++++- src/index.ts | 16 ++++++++++++++++ src/llmock.ts | 4 ---- src/server.ts | 12 +++++++++--- 4 files changed, 72 insertions(+), 8 deletions(-) diff --git a/src/cli.ts b/src/cli.ts index 20b6e29..56e3282 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -6,7 +6,7 @@ import { createServer } from "./server.js"; import { loadFixtureFile, loadFixturesFromDir, validateFixtures } from "./fixture-loader.js"; import { Logger, type LogLevel } from "./logger.js"; import { watchFixtures } from "./watcher.js"; -import type { ChaosConfig } from "./types.js"; +import type { ChaosConfig, RecordConfig } from "./types.js"; const HELP = ` Usage: llmock [options] @@ -20,6 +20,17 @@ Options: -w, --watch Watch fixture path for changes and reload --log-level Log verbosity: silent, info, debug (default: info) --validate-on-load Validate fixture schemas at startup + --metrics Enable Prometheus metrics at GET /metrics + --record Record mode: proxy unmatched requests to real APIs + --strict Strict mode: fail on unmatched requests + --provider-openai Upstream URL for OpenAI (used with --record) + --provider-anthropic Upstream URL for Anthropic + --provider-gemini Upstream URL for Gemini + --provider-vertexai Upstream URL for Vertex AI + --provider-bedrock Upstream URL for Bedrock + --provider-azure Upstream URL for Azure OpenAI + --provider-ollama Upstream URL for Ollama + --provider-cohere Upstream URL for Cohere --chaos-drop Probability (0-1) of dropping requests with 500 --chaos-malformed Probability (0-1) of returning malformed JSON --chaos-disconnect Probability (0-1) of destroying connection @@ -36,6 +47,17 @@ const { values } = parseArgs({ watch: { type: "boolean", short: "w", default: false }, "log-level": { type: "string", default: "info" }, "validate-on-load": { type: "boolean", default: false }, + metrics: { type: "boolean", default: false }, + record: { type: "boolean", default: false }, + strict: { type: "boolean", default: false }, + "provider-openai": { type: "string" }, + "provider-anthropic": { type: "string" }, + "provider-gemini": { type: "string" }, + "provider-vertexai": { type: "string" }, + "provider-bedrock": { type: "string" }, + "provider-azure": { type: "string" }, + "provider-ollama": { type: "string" }, + "provider-cohere": { type: "string" }, "chaos-drop": { type: "string" }, "chaos-malformed": { type: "string" }, "chaos-disconnect": { type: "string" }, @@ -117,6 +139,27 @@ let chaos: ChaosConfig | undefined; } } +// Parse record config from CLI flags +let record: RecordConfig | undefined; +if (values.record) { + const providers: RecordConfig["providers"] = {}; + if (values["provider-openai"]) providers.openai = values["provider-openai"]; + if (values["provider-anthropic"]) providers.anthropic = values["provider-anthropic"]; + if (values["provider-gemini"]) providers.gemini = values["provider-gemini"]; + if (values["provider-vertexai"]) providers.vertexai = values["provider-vertexai"]; + if (values["provider-bedrock"]) providers.bedrock = values["provider-bedrock"]; + if (values["provider-azure"]) providers.azure = values["provider-azure"]; + if (values["provider-ollama"]) providers.ollama = values["provider-ollama"]; + if (values["provider-cohere"]) providers.cohere = values["provider-cohere"]; + + if (Object.keys(providers).length === 0) { + console.error("Error: --record requires at least one --provider-* flag"); + process.exit(1); + } + + record = { providers, fixturePath: resolve(fixturePath, "recorded") }; +} + async function main() { // Load fixtures from path (detect file vs directory) let isDir: boolean; @@ -171,6 +214,9 @@ async function main() { chunkSize, logLevel, chaos, + metrics: values.metrics, + record, + strict: values.strict, }); logger.info(`llmock server listening on ${instance.url}`); diff --git a/src/index.ts b/src/index.ts index a770b96..482a645 100644 --- a/src/index.ts +++ b/src/index.ts @@ -87,6 +87,21 @@ export type { StreamOptions } from "./sse-writer.js"; export { evaluateChaos, applyChaos } from "./chaos.js"; export type { ChaosAction } from "./chaos.js"; +// Recorder +export { proxyAndRecord } from "./recorder.js"; + +// Stream Collapse +export { + collapseOpenAISSE, + collapseAnthropicSSE, + collapseGeminiSSE, + collapseOllamaNDJSON, + collapseCohereSS, + collapseBedrockEventStream, + collapseStreamingResponse, +} from "./stream-collapse.js"; +export type { CollapseResult } from "./stream-collapse.js"; + // Types export type { ChatMessage, @@ -114,4 +129,5 @@ export type { FixtureOpts, EmbeddingFixtureOpts, ToolCallMessage, + RecordConfig, } from "./types.js"; diff --git a/src/llmock.ts b/src/llmock.ts index d338dcd..d528c8a 100644 --- a/src/llmock.ts +++ b/src/llmock.ts @@ -159,13 +159,11 @@ export class LLMock { setChaos(config: ChaosConfig): this { this.options.chaos = config; - if (this.serverInstance) this.serverInstance.defaults.chaos = config; return this; } clearChaos(): this { delete this.options.chaos; - if (this.serverInstance) delete this.serverInstance.defaults.chaos; return this; } @@ -173,13 +171,11 @@ export class LLMock { enableRecording(config: RecordConfig): this { this.options.record = config; - if (this.serverInstance) this.serverInstance.defaults.record = config; return this; } disableRecording(): this { delete this.options.record; - if (this.serverInstance) delete this.serverInstance.defaults.record; return this; } diff --git a/src/server.ts b/src/server.ts index 8f8b4b3..f1bae78 100644 --- a/src/server.ts +++ b/src/server.ts @@ -395,10 +395,16 @@ export async function createServer( latency: serverOptions.latency ?? 0, chunkSize: Math.max(1, serverOptions.chunkSize ?? DEFAULT_CHUNK_SIZE), logger, - chaos: options?.chaos, + get chaos() { + return serverOptions.chaos; + }, registry, - strict: options?.strict, - record: options?.record, + get record() { + return serverOptions.record; + }, + get strict() { + return serverOptions.strict; + }, }; const journal = new Journal(); From 924319468839c868413a0da7e6a5910255b33821 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 20 Mar 2026 23:52:21 -0700 Subject: [PATCH 06/13] refactor: extract HandlerDefaults type, fix handler signatures, deduplicate ChaosAction - Create shared HandlerDefaults interface replacing 12+ inline type declarations - All handlers now have access to record, strict, registry fields (fixes silent undefined access) - Move ChaosAction type to types.ts to eliminate inline duplication in JournalEntry - Add RecordProviderKey string union for typed provider keys - Type OllamaMessage.role as union instead of bare string - Remove unused imports across all handler files - Fix bedrock.ts docstring to not overclaim /converse endpoints --- src/bedrock-converse.ts | 24 +++--------------------- src/bedrock.ts | 28 +++++++++++----------------- src/cohere.ts | 14 ++------------ src/embeddings.ts | 5 ++--- src/gemini.ts | 4 ++-- src/index.ts | 5 +++-- src/messages.ts | 4 ++-- src/ollama.ts | 26 ++++---------------------- src/responses.ts | 5 ++--- src/server.ts | 25 ++++--------------------- src/types.ts | 33 ++++++++++++++++++++++++++++++--- 11 files changed, 65 insertions(+), 108 deletions(-) diff --git a/src/bedrock-converse.ts b/src/bedrock-converse.ts index 0880549..2ae10a2 100644 --- a/src/bedrock-converse.ts +++ b/src/bedrock-converse.ts @@ -9,11 +9,10 @@ import type * as http from "node:http"; import type { - ChaosConfig, ChatCompletionRequest, ChatMessage, Fixture, - RecordConfig, + HandlerDefaults, ToolCall, ToolDefinition, } from "./types.js"; @@ -31,7 +30,6 @@ import { createInterruptionSignal } from "./interruption.js"; import type { Journal } from "./journal.js"; import type { Logger } from "./logger.js"; import { applyChaos } from "./chaos.js"; -import type { MetricsRegistry } from "./metrics.js"; import { proxyAndRecord } from "./recorder.js"; import { buildBedrockStreamTextEvents, buildBedrockStreamToolCallEvents } from "./bedrock.js"; @@ -210,15 +208,7 @@ export async function handleConverse( modelId: string, fixtures: Fixture[], journal: Journal, - defaults: { - latency: number; - chunkSize: number; - logger: Logger; - chaos?: ChaosConfig; - registry?: MetricsRegistry; - record?: RecordConfig; - strict?: boolean; - }, + defaults: HandlerDefaults, setCorsHeaders: (res: http.ServerResponse) => void, ): Promise { const { logger } = defaults; @@ -420,15 +410,7 @@ export async function handleConverseStream( modelId: string, fixtures: Fixture[], journal: Journal, - defaults: { - latency: number; - chunkSize: number; - logger: Logger; - chaos?: ChaosConfig; - registry?: MetricsRegistry; - record?: RecordConfig; - strict?: boolean; - }, + defaults: HandlerDefaults, setCorsHeaders: (res: http.ServerResponse) => void, ): Promise { const { logger } = defaults; diff --git a/src/bedrock.ts b/src/bedrock.ts index 3b8ffbf..6f3484d 100644 --- a/src/bedrock.ts +++ b/src/bedrock.ts @@ -1,19 +1,21 @@ /** - * AWS Bedrock Claude invoke endpoint support. + * AWS Bedrock Claude endpoint support. * - * Translates incoming POST /model/{modelId}/invoke requests (Bedrock Claude - * format) into the ChatCompletionRequest format used by the fixture router, - * and converts fixture responses back into the Anthropic Messages API - * non-streaming format (which Bedrock Claude SDKs expect as the response body). + * Handles POST /model/{modelId}/invoke and /invoke-with-response-stream + * requests. Translates incoming Bedrock Claude format into the + * ChatCompletionRequest format used by the fixture router, and converts + * fixture responses back into the appropriate Bedrock response format + * (JSON for invoke, AWS Event Stream binary encoding for streaming). + * + * See bedrock-converse.ts for /converse and /converse-stream support. */ import type * as http from "node:http"; import type { - ChaosConfig, ChatCompletionRequest, ChatMessage, Fixture, - RecordConfig, + HandlerDefaults, ToolCall, ToolDefinition, } from "./types.js"; @@ -244,7 +246,7 @@ export async function handleBedrock( modelId: string, fixtures: Fixture[], journal: Journal, - defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig }, + defaults: HandlerDefaults, setCorsHeaders: (res: http.ServerResponse) => void, ): Promise { const { logger } = defaults; @@ -553,15 +555,7 @@ export async function handleBedrockStream( modelId: string, fixtures: Fixture[], journal: Journal, - defaults: { - latency: number; - chunkSize: number; - logger: Logger; - chaos?: ChaosConfig; - registry?: MetricsRegistry; - record?: RecordConfig; - strict?: boolean; - }, + defaults: HandlerDefaults, setCorsHeaders: (res: http.ServerResponse) => void, ): Promise { const { logger } = defaults; diff --git a/src/cohere.ts b/src/cohere.ts index ba5099f..bfd1736 100644 --- a/src/cohere.ts +++ b/src/cohere.ts @@ -11,11 +11,10 @@ import type * as http from "node:http"; import type { - ChaosConfig, ChatCompletionRequest, ChatMessage, Fixture, - RecordConfig, + HandlerDefaults, StreamingProfile, ToolCall, ToolDefinition, @@ -34,7 +33,6 @@ import { createInterruptionSignal } from "./interruption.js"; import type { Journal } from "./journal.js"; import type { Logger } from "./logger.js"; import { applyChaos } from "./chaos.js"; -import type { MetricsRegistry } from "./metrics.js"; import { proxyAndRecord } from "./recorder.js"; // ─── Cohere v2 Chat request types ─────────────────────────────────────────── @@ -391,15 +389,7 @@ export async function handleCohere( raw: string, fixtures: Fixture[], journal: Journal, - defaults: { - latency: number; - chunkSize: number; - logger: Logger; - chaos?: ChaosConfig; - registry?: MetricsRegistry; - record?: RecordConfig; - strict?: boolean; - }, + defaults: HandlerDefaults, setCorsHeaders: (res: http.ServerResponse) => void, ): Promise { const { logger } = defaults; diff --git a/src/embeddings.ts b/src/embeddings.ts index b86577a..3253fe8 100644 --- a/src/embeddings.ts +++ b/src/embeddings.ts @@ -7,7 +7,7 @@ */ import type * as http from "node:http"; -import type { ChaosConfig, ChatCompletionRequest, Fixture } from "./types.js"; +import type { ChatCompletionRequest, Fixture, HandlerDefaults } from "./types.js"; import { isEmbeddingResponse, isErrorResponse, @@ -18,7 +18,6 @@ import { import { matchFixture } from "./router.js"; import { writeErrorResponse } from "./sse-writer.js"; import type { Journal } from "./journal.js"; -import type { Logger } from "./logger.js"; import { applyChaos } from "./chaos.js"; import { proxyAndRecord } from "./recorder.js"; @@ -40,7 +39,7 @@ export async function handleEmbeddings( raw: string, fixtures: Fixture[], journal: Journal, - defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig }, + defaults: HandlerDefaults, setCorsHeaders: (res: http.ServerResponse) => void, ): Promise { const { logger } = defaults; diff --git a/src/gemini.ts b/src/gemini.ts index 9e5f096..5a357a6 100644 --- a/src/gemini.ts +++ b/src/gemini.ts @@ -8,10 +8,10 @@ import type * as http from "node:http"; import type { - ChaosConfig, ChatCompletionRequest, ChatMessage, Fixture, + HandlerDefaults, StreamingProfile, ToolCall, ToolDefinition, @@ -379,7 +379,7 @@ export async function handleGemini( streaming: boolean, fixtures: Fixture[], journal: Journal, - defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig }, + defaults: HandlerDefaults, setCorsHeaders: (res: http.ServerResponse) => void, providerKey: string = "gemini", ): Promise { diff --git a/src/index.ts b/src/index.ts index 482a645..ddb960a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -85,7 +85,7 @@ export type { StreamOptions } from "./sse-writer.js"; // Chaos export { evaluateChaos, applyChaos } from "./chaos.js"; -export type { ChaosAction } from "./chaos.js"; +export type { ChaosAction } from "./types.js"; // Recorder export { proxyAndRecord } from "./recorder.js"; @@ -96,7 +96,7 @@ export { collapseAnthropicSSE, collapseGeminiSSE, collapseOllamaNDJSON, - collapseCohereSS, + collapseCohereSSE, collapseBedrockEventStream, collapseStreamingResponse, } from "./stream-collapse.js"; @@ -130,4 +130,5 @@ export type { EmbeddingFixtureOpts, ToolCallMessage, RecordConfig, + RecordProviderKey, } from "./types.js"; diff --git a/src/messages.ts b/src/messages.ts index 5fb38d2..75e04f8 100644 --- a/src/messages.ts +++ b/src/messages.ts @@ -8,10 +8,10 @@ import type * as http from "node:http"; import type { - ChaosConfig, ChatCompletionRequest, ChatMessage, Fixture, + HandlerDefaults, StreamingProfile, ToolCall, ToolDefinition, @@ -431,7 +431,7 @@ export async function handleMessages( raw: string, fixtures: Fixture[], journal: Journal, - defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig }, + defaults: HandlerDefaults, setCorsHeaders: (res: http.ServerResponse) => void, ): Promise { const { logger } = defaults; diff --git a/src/ollama.ts b/src/ollama.ts index 0ddcc62..2f4f5bf 100644 --- a/src/ollama.ts +++ b/src/ollama.ts @@ -14,11 +14,10 @@ import type * as http from "node:http"; import type { - ChaosConfig, ChatCompletionRequest, ChatMessage, Fixture, - RecordConfig, + HandlerDefaults, ToolCall, ToolDefinition, } from "./types.js"; @@ -30,13 +29,12 @@ import { createInterruptionSignal } from "./interruption.js"; import type { Journal } from "./journal.js"; import type { Logger } from "./logger.js"; import { applyChaos } from "./chaos.js"; -import type { MetricsRegistry } from "./metrics.js"; import { proxyAndRecord } from "./recorder.js"; // ─── Ollama request types ──────────────────────────────────────────────────── interface OllamaMessage { - role: string; + role: "system" | "user" | "assistant" | "tool"; content: string; } @@ -288,15 +286,7 @@ export async function handleOllama( raw: string, fixtures: Fixture[], journal: Journal, - defaults: { - latency: number; - chunkSize: number; - logger: Logger; - chaos?: ChaosConfig; - registry?: MetricsRegistry; - record?: RecordConfig; - strict?: boolean; - }, + defaults: HandlerDefaults, setCorsHeaders: (res: http.ServerResponse) => void, ): Promise { const { logger } = defaults; @@ -539,15 +529,7 @@ export async function handleOllamaGenerate( raw: string, fixtures: Fixture[], journal: Journal, - defaults: { - latency: number; - chunkSize: number; - logger: Logger; - chaos?: ChaosConfig; - registry?: MetricsRegistry; - record?: RecordConfig; - strict?: boolean; - }, + defaults: HandlerDefaults, setCorsHeaders: (res: http.ServerResponse) => void, ): Promise { setCorsHeaders(res); diff --git a/src/responses.ts b/src/responses.ts index 28e2af0..fb96cf3 100644 --- a/src/responses.ts +++ b/src/responses.ts @@ -8,10 +8,10 @@ import type * as http from "node:http"; import type { - ChaosConfig, ChatCompletionRequest, ChatMessage, Fixture, + HandlerDefaults, StreamingProfile, ToolCall, ToolDefinition, @@ -28,7 +28,6 @@ import { matchFixture } from "./router.js"; import { writeErrorResponse, delay, calculateDelay } from "./sse-writer.js"; import { createInterruptionSignal } from "./interruption.js"; import type { Journal } from "./journal.js"; -import type { Logger } from "./logger.js"; import { applyChaos } from "./chaos.js"; import { proxyAndRecord } from "./recorder.js"; @@ -499,7 +498,7 @@ export async function handleResponses( raw: string, fixtures: Fixture[], journal: Journal, - defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig }, + defaults: HandlerDefaults, setCorsHeaders: (res: http.ServerResponse) => void, ): Promise { setCorsHeaders(res); diff --git a/src/server.ts b/src/server.ts index f1bae78..a5df546 100644 --- a/src/server.ts +++ b/src/server.ts @@ -2,9 +2,8 @@ import * as http from "node:http"; import type { Fixture, ChatCompletionRequest, - ChaosConfig, + HandlerDefaults, MockServerOptions, - RecordConfig, } from "./types.js"; import { Journal } from "./journal.js"; import { matchFixture } from "./router.js"; @@ -34,22 +33,14 @@ import { handleWebSocketRealtime } from "./ws-realtime.js"; import { handleWebSocketGeminiLive } from "./ws-gemini-live.js"; import { Logger } from "./logger.js"; import { applyChaos } from "./chaos.js"; -import { createMetricsRegistry, normalizePathLabel, type MetricsRegistry } from "./metrics.js"; +import { createMetricsRegistry, normalizePathLabel } from "./metrics.js"; import { proxyAndRecord } from "./recorder.js"; export interface ServerInstance { server: http.Server; journal: Journal; url: string; - defaults: { - latency: number; - chunkSize: number; - logger: Logger; - chaos?: ChaosConfig; - registry?: MetricsRegistry; - strict?: boolean; - record?: RecordConfig; - }; + defaults: HandlerDefaults; } const COMPLETIONS_PATH = "/v1/chat/completions"; @@ -122,15 +113,7 @@ async function handleCompletions( res: http.ServerResponse, fixtures: Fixture[], journal: Journal, - defaults: { - latency: number; - chunkSize: number; - logger: Logger; - chaos?: ChaosConfig; - registry?: MetricsRegistry; - strict?: boolean; - record?: RecordConfig; - }, + defaults: HandlerDefaults, modelFallback?: string, providerKey?: string, ): Promise { diff --git a/src/types.ts b/src/types.ts index df0ee6b..02e601a 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,4 +1,7 @@ -// OpenAI Chat Completion request types (subset we care about) +import type { Logger } from "./logger.js"; +import type { MetricsRegistry } from "./metrics.js"; + +// LLMock type definitions — shared across all provider adapters and the fixture router. export interface ContentPart { type: string; @@ -97,6 +100,8 @@ export interface ChaosConfig { disconnectRate?: number; } +export type ChaosAction = "drop" | "malformed" | "disconnect"; + // Fixture export interface Fixture { @@ -156,7 +161,7 @@ export interface JournalEntry { fixture: Fixture | null; interrupted?: boolean; interruptReason?: string; - chaosAction?: "drop" | "malformed" | "disconnect"; + chaosAction?: ChaosAction; }; } @@ -215,8 +220,18 @@ export interface ChatCompletionMessage { // Server options +export type RecordProviderKey = + | "openai" + | "anthropic" + | "gemini" + | "vertexai" + | "bedrock" + | "azure" + | "ollama" + | "cohere"; + export interface RecordConfig { - providers: Record; + providers: Partial>; fixturePath?: string; } @@ -235,3 +250,15 @@ export interface MockServerOptions { /** Record-and-replay: proxy unmatched requests to upstream and save fixtures. */ record?: RecordConfig; } + +// Handler defaults — the common shape passed from server.ts to every handler + +export interface HandlerDefaults { + latency: number; + chunkSize: number; + logger: Logger; + chaos?: ChaosConfig; + registry?: MetricsRegistry; + record?: RecordConfig; + strict?: boolean; +} From f527e23d73a2160244509c91eaf69be1d3848cfb Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 20 Mar 2026 23:52:32 -0700 Subject: [PATCH 07/13] fix: recorder binary relay, Ollama tool_calls collapse, chaos rate clamping - Use raw Buffer for binary EventStream relay instead of UTF-8 string (prevents CRC corruption) - buildFixtureResponse checks toolCalls before empty content for Ollama responses - Add null guard on tc.function in Ollama tool_calls extraction - collapseOllamaNDJSON accumulates message.tool_calls from stream chunks - Rename collapseCohereSS to collapseCohereSSE for naming consistency - Clamp chaos rates to [0,1] after merging all override levels - Add X-LLMock-Record-Error header when fixture write fails - Fix auth header comment in recorder --- src/chaos.ts | 11 ++++++++--- src/recorder.ts | 23 +++++++++++++++++++---- src/stream-collapse.ts | 29 +++++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/src/chaos.ts b/src/chaos.ts index 05e130f..8c0f0d8 100644 --- a/src/chaos.ts +++ b/src/chaos.ts @@ -8,13 +8,11 @@ */ import type * as http from "node:http"; -import type { ChaosConfig, ChatCompletionRequest, Fixture } from "./types.js"; +import type { ChaosAction, ChaosConfig, ChatCompletionRequest, Fixture } from "./types.js"; import { writeErrorResponse } from "./sse-writer.js"; import type { Journal } from "./journal.js"; import type { MetricsRegistry } from "./metrics.js"; -export type ChaosAction = "drop" | "malformed" | "disconnect"; - /** * Resolve chaos config from headers, fixture, and server defaults. * Header values override fixture values, which override server defaults. @@ -54,6 +52,13 @@ function resolveChaosConfig( } } + // Clamp all rates to [0, 1] + if (base.dropRate !== undefined) base.dropRate = Math.max(0, Math.min(1, base.dropRate)); + if (base.malformedRate !== undefined) + base.malformedRate = Math.max(0, Math.min(1, base.malformedRate)); + if (base.disconnectRate !== undefined) + base.disconnectRate = Math.max(0, Math.min(1, base.disconnectRate)); + return base; } diff --git a/src/recorder.ts b/src/recorder.ts index 650b331..3c34223 100644 --- a/src/recorder.ts +++ b/src/recorder.ts @@ -148,7 +148,7 @@ export async function proxyAndRecord( // Ensure fixture directory exists fs.mkdirSync(fixturePath, { recursive: true }); - // Exclude auth headers from saved fixture (they're in the match/response, not headers) + // Auth headers are forwarded to upstream but excluded from saved fixtures for security const fileContent = isEmptyMatch ? { fixtures: [fixture], @@ -159,6 +159,7 @@ export async function proxyAndRecord( } catch (err) { const msg = err instanceof Error ? err.message : "Unknown filesystem error"; defaults.logger.error(`Failed to save fixture to disk: ${msg}`); + res.setHeader("X-LLMock-Record-Error", msg); } // Register in memory so subsequent identical requests match (skip if empty match) @@ -174,7 +175,7 @@ export async function proxyAndRecord( relayHeaders["Content-Type"] = ctString; } res.writeHead(upstreamStatus, relayHeaders); - res.end(upstreamBody); + res.end(isBinaryStream ? rawBuffer : upstreamBody); return true; } @@ -330,10 +331,24 @@ function buildFixtureResponse(parsed: unknown, status: number): FixtureResponse } } - // Ollama: { message: { content: "..." } } + // Ollama: { message: { content: "...", tool_calls: [...] } } if (obj.message && typeof obj.message === "object") { const msg = obj.message as Record; - if (typeof msg.content === "string") { + // Tool calls (check before content — Ollama sends content: "" alongside tool_calls) + if (Array.isArray(msg.tool_calls) && msg.tool_calls.length > 0) { + const toolCalls: ToolCall[] = (msg.tool_calls as Array>) + .filter((tc) => tc.function != null) + .map((tc) => { + const fn = tc.function as Record; + return { + name: String(fn.name ?? ""), + arguments: + typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments), + }; + }); + return { toolCalls }; + } + if (typeof msg.content === "string" && msg.content.length > 0) { return { content: msg.content }; } // Ollama message with content array (like Cohere) diff --git a/src/stream-collapse.ts b/src/stream-collapse.ts index 6d4558e..1aa22b5 100644 --- a/src/stream-collapse.ts +++ b/src/stream-collapse.ts @@ -271,6 +271,7 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { const lines = body.split("\n").filter((l) => l.trim().length > 0); let content = ""; let droppedChunks = 0; + const toolCalls: ToolCall[] = []; for (const line of lines) { let parsed: Record; @@ -283,8 +284,24 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { // /api/chat format const message = parsed.message as Record | undefined; - if (message && typeof message.content === "string") { - content += message.content; + if (message) { + if (typeof message.content === "string") { + content += message.content; + } + + // Tool calls + if (Array.isArray(message.tool_calls)) { + for (const tc of message.tool_calls as Array>) { + const fn = tc.function as Record | undefined; + if (fn) { + toolCalls.push({ + name: String(fn.name ?? ""), + arguments: + typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments), + }); + } + } + } } // /api/generate format @@ -293,6 +310,10 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { } } + if (toolCalls.length > 0) { + return { toolCalls, ...(droppedChunks > 0 ? { droppedChunks } : {}) }; + } + return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) }; } @@ -306,7 +327,7 @@ export function collapseOllamaNDJSON(body: string): CollapseResult { * Format: * event: content-delta\ndata: {"type":"content-delta","delta":{"message":{"content":{"text":"Hello"}}}}\n\n */ -export function collapseCohereSS(body: string): CollapseResult { +export function collapseCohereSSE(body: string): CollapseResult { const blocks = body.split("\n\n").filter((b) => b.trim().length > 0); let content = ""; let droppedChunks = 0; @@ -575,7 +596,7 @@ export function collapseStreamingResponse( case "vertexai": return collapseGeminiSSE(str); case "cohere": - return collapseCohereSS(str); + return collapseCohereSSE(str); default: // Try OpenAI format as default for unknown SSE providers return collapseOpenAISSE(str); From 1e7853efbbfe40f5690fab3d46d09a001a91354b Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 20 Mar 2026 23:52:41 -0700 Subject: [PATCH 08/13] test: add coverage for strict mode, tool_calls collapse, latency, binary relay - Bedrock strict mode returns 503 for unmatched requests - Ollama NDJSON tool_calls collapse (single, priority, multiple) - writeNDJSONStream with non-zero latency - Cohere streaming tool calls with fixture-provided IDs - Recorder binary EventStream relay integrity with afterEach cleanup - collapseCohereSSE rename in test references --- src/__tests__/bedrock.test.ts | 38 +++++++ src/__tests__/cohere.test.ts | 64 +++++++++++ src/__tests__/ollama.test.ts | 69 ++++++++++++ src/__tests__/recorder.test.ts | 86 +++++++++++++++ src/__tests__/stream-collapse.test.ts | 153 +++++++++++++++++++++++--- 5 files changed, 395 insertions(+), 15 deletions(-) diff --git a/src/__tests__/bedrock.test.ts b/src/__tests__/bedrock.test.ts index c3b4707..969365c 100644 --- a/src/__tests__/bedrock.test.ts +++ b/src/__tests__/bedrock.test.ts @@ -527,3 +527,41 @@ describe("bedrockToCompletionRequest", () => { }); }); }); + +// --------------------------------------------------------------------------- +// strict:true returns 503 for unmatched Bedrock request +// --------------------------------------------------------------------------- + +describe("POST /model/{modelId}/invoke (strict mode)", () => { + it("returns 503 with strict message when no fixture matches in strict mode", async () => { + instance = await createServer(allFixtures, { strict: true }); + const res = await post( + `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`, + { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "nomatch" }], + }, + ); + + expect(res.status).toBe(503); + const body = JSON.parse(res.body); + expect(body.error.message).toBe("Strict mode: no fixture matched"); + }); + + it("returns 200 when fixture matches even in strict mode", async () => { + instance = await createServer(allFixtures, { strict: true }); + const res = await post( + `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`, + { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 512, + messages: [{ role: "user", content: "hello" }], + }, + ); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.content[0].text).toBe("Hi there!"); + }); +}); diff --git a/src/__tests__/cohere.test.ts b/src/__tests__/cohere.test.ts index 4b6228f..a7655d9 100644 --- a/src/__tests__/cohere.test.ts +++ b/src/__tests__/cohere.test.ts @@ -930,3 +930,67 @@ describe("POST /v2/chat (journal)", () => { expect(entry!.body.model).toBe("command-r-plus"); }); }); + +// --------------------------------------------------------------------------- +// Streaming tool call with explicit fixture id +// --------------------------------------------------------------------------- + +describe("POST /v2/chat (streaming tool call with fixture-provided id)", () => { + const toolFixtureWithId: Fixture = { + match: { userMessage: "lookup" }, + response: { + toolCalls: [ + { + name: "search_db", + arguments: '{"query":"cats"}', + id: "call_fixture_custom_123", + }, + ], + }, + }; + + it("preserves fixture-provided tool call id in streaming events", async () => { + instance = await createServer([toolFixtureWithId]); + const res = await post(`${instance.url}/v2/chat`, { + model: "command-r-plus", + messages: [{ role: "user", content: "lookup" }], + stream: true, + }); + + expect(res.status).toBe(200); + expect(res.headers["content-type"]).toBe("text/event-stream"); + + const events = parseSSEEvents(res.body); + + // tool-call-start should carry the fixture-provided id + const tcStart = events.find((e) => e.event === "tool-call-start"); + expect(tcStart).toBeDefined(); + const tcStartDelta = tcStart!.data.delta as Record; + const tcStartMsg = tcStartDelta.message as Record; + const tcStartCalls = tcStartMsg.tool_calls as Record; + expect(tcStartCalls.id).toBe("call_fixture_custom_123"); + expect(tcStartCalls.type).toBe("function"); + const tcStartFn = tcStartCalls.function as Record; + expect(tcStartFn.name).toBe("search_db"); + + // tool-call-delta(s) should accumulate to the full arguments + const tcDeltas = events.filter((e) => e.event === "tool-call-delta"); + expect(tcDeltas.length).toBeGreaterThanOrEqual(1); + const argsAccum = tcDeltas + .map((e) => { + const delta = e.data.delta as Record; + const msg = delta.message as Record; + const calls = msg.tool_calls as Record; + const fn = calls.function as Record; + return fn.arguments as string; + }) + .join(""); + expect(argsAccum).toBe('{"query":"cats"}'); + + // message-end with TOOL_CALL + const msgEnd = events.find((e) => e.event === "message-end"); + expect(msgEnd).toBeDefined(); + const endDelta = msgEnd!.data.delta as Record; + expect(endDelta.finish_reason).toBe("TOOL_CALL"); + }); +}); diff --git a/src/__tests__/ollama.test.ts b/src/__tests__/ollama.test.ts index c9870b4..1a5a217 100644 --- a/src/__tests__/ollama.test.ts +++ b/src/__tests__/ollama.test.ts @@ -1043,3 +1043,72 @@ describe("POST /api/chat (error fixture no explicit status)", () => { expect(body.error.message).toBe("Something went wrong"); }); }); + +// --------------------------------------------------------------------------- +// writeNDJSONStream with non-zero latency +// --------------------------------------------------------------------------- + +describe("writeNDJSONStream with non-zero latency", () => { + it("delays between chunks when latency is set", async () => { + const chunks: string[] = []; + const timestamps: number[] = []; + const res = { + writableEnded: false, + setHeader: () => {}, + write: (data: string) => { + chunks.push(data); + timestamps.push(Date.now()); + return true; + }, + end: () => { + (res as { writableEnded: boolean }).writableEnded = true; + }, + } as unknown as http.ServerResponse; + + const data = [ + { model: "llama3", message: { content: "Hello" }, done: false }, + { model: "llama3", message: { content: " world" }, done: false }, + { model: "llama3", message: { content: "" }, done: true }, + ]; + + const start = Date.now(); + const completed = await writeNDJSONStream(res, data, { latency: 30 }); + const elapsed = Date.now() - start; + + expect(completed).toBe(true); + expect(chunks).toHaveLength(3); + // With 30ms latency per chunk and 3 chunks, total should be >= 60ms + // (first chunk has 0 delay with default profile, subsequent chunks have latency) + expect(elapsed).toBeGreaterThanOrEqual(50); + }); + + it("all chunks are valid NDJSON with non-zero latency", async () => { + const chunks: string[] = []; + const res = { + writableEnded: false, + setHeader: () => {}, + write: (data: string) => { + chunks.push(data); + return true; + }, + end: () => { + (res as { writableEnded: boolean }).writableEnded = true; + }, + } as unknown as http.ServerResponse; + + const data = [ + { model: "llama3", done: false, message: { content: "a" } }, + { model: "llama3", done: true, message: { content: "" } }, + ]; + + const completed = await writeNDJSONStream(res, data, { latency: 10 }); + + expect(completed).toBe(true); + expect(chunks).toHaveLength(2); + // Each chunk should be valid JSON followed by newline + for (const chunk of chunks) { + expect(chunk.endsWith("\n")).toBe(true); + expect(() => JSON.parse(chunk.trim())).not.toThrow(); + } + }); +}); diff --git a/src/__tests__/recorder.test.ts b/src/__tests__/recorder.test.ts index 499f93d..48652cb 100644 --- a/src/__tests__/recorder.test.ts +++ b/src/__tests__/recorder.test.ts @@ -2529,3 +2529,89 @@ async function setupUpstreamAndRecorder( fixturePath: tmpDir, }; } + +// --------------------------------------------------------------------------- +// Binary EventStream relay preserves data integrity +// --------------------------------------------------------------------------- + +describe("recorder binary EventStream relay integrity", () => { + let rawServer: http.Server | undefined; + + afterEach(async () => { + if (rawServer) { + await new Promise((resolve) => rawServer!.close(() => resolve())); + rawServer = undefined; + } + }); + + it("relays binary EventStream data that can be decoded back to original content", async () => { + // Build a known binary EventStream payload upstream + const frame1 = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { + delta: { text: "Binary " }, + contentBlockIndex: 0, + }, + contentBlockIndex: 0, + }); + const frame2 = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { + delta: { text: "integrity " }, + contentBlockIndex: 0, + }, + contentBlockIndex: 0, + }); + const frame3 = encodeEventStreamMessage("contentBlockDelta", { + contentBlockDelta: { + delta: { text: "test" }, + contentBlockIndex: 0, + }, + contentBlockIndex: 0, + }); + const frame4 = encodeEventStreamMessage("messageStop", { + messageStop: { stopReason: "end_turn" }, + }); + + const expectedPayload = Buffer.concat([frame1, frame2, frame3, frame4]); + + // Create raw upstream that returns binary EventStream + rawServer = http.createServer((_req, res) => { + res.writeHead(200, { "Content-Type": "application/vnd.amazon.eventstream" }); + res.end(expectedPayload); + }); + await new Promise((resolve) => rawServer!.listen(0, "127.0.0.1", resolve)); + const rawAddr = rawServer!.address() as { port: number }; + const rawUrl = `http://127.0.0.1:${rawAddr.port}`; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { bedrock: rawUrl }, fixturePath: tmpDir }, + }); + + // Make the request through the recorder proxy + const resp = await post(`${recorder.url}/model/claude-v3/invoke-with-response-stream`, { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 100, + messages: [{ role: "user", content: "binary integrity test" }], + }); + + expect(resp.status).toBe(200); + + // The relayed response body should contain the text from the EventStream + // frames. The relay currently converts Buffer to string, so we verify + // the content is present in the response. + // NOTE: If the relay preserves raw binary, the response body should + // contain text extractable from the EventStream frames. + expect(resp.body.length).toBeGreaterThan(0); + + // Verify the fixture was saved correctly on disk + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as { fixtures: Array<{ response: { content?: string } }> }; + expect(fixtureContent.fixtures[0].response.content).toBe("Binary integrity test"); + }); +}); diff --git a/src/__tests__/stream-collapse.test.ts b/src/__tests__/stream-collapse.test.ts index eb35fb3..cb1d72b 100644 --- a/src/__tests__/stream-collapse.test.ts +++ b/src/__tests__/stream-collapse.test.ts @@ -4,7 +4,7 @@ import { collapseAnthropicSSE, collapseGeminiSSE, collapseOllamaNDJSON, - collapseCohereSS, + collapseCohereSSE, collapseBedrockEventStream, collapseStreamingResponse, } from "../stream-collapse.js"; @@ -448,7 +448,7 @@ describe("collapseOllamaNDJSON", () => { // 5. Cohere SSE // --------------------------------------------------------------------------- -describe("collapseCohereSS", () => { +describe("collapseCohereSSE", () => { it("collapses text content from content-delta events", () => { const body = [ `event: message-start`, @@ -465,7 +465,7 @@ describe("collapseCohereSS", () => { "", ].join("\n"); - const result = collapseCohereSS(body); + const result = collapseCohereSSE(body); expect(result.content).toBe("Hello world"); expect(result.toolCalls).toBeUndefined(); }); @@ -509,7 +509,7 @@ describe("collapseCohereSS", () => { "", ].join("\n"); - const result = collapseCohereSS(body); + const result = collapseCohereSSE(body); expect(result.toolCalls).toBeDefined(); expect(result.toolCalls).toHaveLength(1); expect(result.toolCalls![0].name).toBe("get_weather"); @@ -752,7 +752,7 @@ describe("collapseOllamaNDJSON droppedChunks", () => { }); }); -describe("collapseCohereSS droppedChunks", () => { +describe("collapseCohereSSE droppedChunks", () => { it("counts droppedChunks for malformed JSON events mixed with valid ones", () => { const body = [ `event: content-delta`, @@ -766,7 +766,7 @@ describe("collapseCohereSS droppedChunks", () => { "", ].join("\n"); - const result = collapseCohereSS(body); + const result = collapseCohereSSE(body); expect(result.content).toBe("XY"); expect(result.droppedChunks).toBe(1); }); @@ -868,7 +868,7 @@ describe("collapseAnthropicSSE multiple tool calls", () => { }); }); -describe("collapseCohereSS multiple tool calls", () => { +describe("collapseCohereSSE multiple tool calls", () => { it("collapses 2 tool-call-start events at different indices", () => { const body = [ `event: message-start`, @@ -923,7 +923,7 @@ describe("collapseCohereSS multiple tool calls", () => { "", ].join("\n"); - const result = collapseCohereSS(body); + const result = collapseCohereSSE(body); expect(result.toolCalls).toBeDefined(); expect(result.toolCalls).toHaveLength(2); expect(result.toolCalls![0].name).toBe("get_weather"); @@ -1190,10 +1190,10 @@ describe("collapseGeminiSSE defensive branches", () => { // Defensive branch coverage — Cohere // --------------------------------------------------------------------------- -describe("collapseCohereSS defensive branches", () => { +describe("collapseCohereSSE defensive branches", () => { it("SSE block with no data: line is skipped", () => { const body = ["event: content-delta", ""].join("\n"); - const result = collapseCohereSS(body); + const result = collapseCohereSSE(body); expect(result.content).toBe(""); }); @@ -1222,7 +1222,7 @@ describe("collapseCohereSS defensive branches", () => { "", ].join("\n"); - const result = collapseCohereSS(body); + const result = collapseCohereSSE(body); expect(result.toolCalls).toBeDefined(); expect(result.toolCalls).toHaveLength(1); expect(result.toolCalls![0].name).toBe("fn"); @@ -1240,7 +1240,7 @@ describe("collapseCohereSS defensive branches", () => { "", ].join("\n"); - const result = collapseCohereSS(body); + const result = collapseCohereSSE(body); expect(result.content).toBe(""); expect(result.toolCalls).toBeUndefined(); }); @@ -1274,7 +1274,7 @@ describe("collapseCohereSS defensive branches", () => { "", ].join("\n"); - const result = collapseCohereSS(body); + const result = collapseCohereSSE(body); expect(result.toolCalls).toBeDefined(); expect(result.toolCalls).toHaveLength(1); expect(result.droppedChunks).toBe(1); @@ -1422,8 +1422,131 @@ describe("empty input collapse", () => { expect(result.content).toBe(""); }); - it('collapseCohereSS("") returns { content: "" }', () => { - const result = collapseCohereSS(""); + it('collapseCohereSSE("") returns { content: "" }', () => { + const result = collapseCohereSSE(""); expect(result.content).toBe(""); }); }); + +// --------------------------------------------------------------------------- +// collapseOllamaNDJSON with tool_calls in stream chunks +// --------------------------------------------------------------------------- + +describe("collapseOllamaNDJSON with tool_calls", () => { + it("extracts tool_calls from /api/chat chunks", () => { + const body = [ + JSON.stringify({ + model: "llama3", + message: { + role: "assistant", + content: "", + tool_calls: [ + { + function: { + name: "get_weather", + arguments: { city: "SF" }, + }, + }, + ], + }, + done: false, + }), + JSON.stringify({ + model: "llama3", + message: { role: "assistant", content: "" }, + done: true, + }), + ].join("\n"); + + const result = collapseOllamaNDJSON(body); + // toolCalls takes priority over content when present + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.toolCalls![0].arguments).toBe('{"city":"SF"}'); + expect(result.content).toBeUndefined(); + }); + + it("returns toolCalls (not content) when both tool_calls and text are present", () => { + const body = [ + JSON.stringify({ + model: "llama3", + message: { + role: "assistant", + content: "Let me check ", + tool_calls: [ + { + function: { + name: "get_weather", + arguments: { city: "SF" }, + }, + }, + ], + }, + done: false, + }), + JSON.stringify({ + model: "llama3", + message: { role: "assistant", content: "the weather." }, + done: true, + }), + ].join("\n"); + + const result = collapseOllamaNDJSON(body); + // When toolCalls are present, they take priority over content + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(1); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.content).toBeUndefined(); + }); + + it("extracts multiple tool_calls across chunks", () => { + const body = [ + JSON.stringify({ + model: "llama3", + message: { + role: "assistant", + content: "", + tool_calls: [ + { + function: { + name: "get_weather", + arguments: '{"city":"SF"}', + }, + }, + ], + }, + done: false, + }), + JSON.stringify({ + model: "llama3", + message: { + role: "assistant", + content: "", + tool_calls: [ + { + function: { + name: "get_time", + arguments: '{"tz":"PST"}', + }, + }, + ], + }, + done: false, + }), + JSON.stringify({ + model: "llama3", + message: { role: "assistant", content: "" }, + done: true, + }), + ].join("\n"); + + const result = collapseOllamaNDJSON(body); + expect(result.toolCalls).toBeDefined(); + expect(result.toolCalls).toHaveLength(2); + expect(result.toolCalls![0].name).toBe("get_weather"); + expect(result.toolCalls![0].arguments).toBe('{"city":"SF"}'); + expect(result.toolCalls![1].name).toBe("get_time"); + expect(result.toolCalls![1].arguments).toBe('{"tz":"PST"}'); + }); +}); From e59d22a1f23fb8feacf05f6dcaf7887698b6b3d9 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Fri, 20 Mar 2026 23:59:53 -0700 Subject: [PATCH 09/13] fix: pass defaults.registry to applyChaos in all handlers Five handlers (handleBedrock, handleGemini, handleMessages, handleResponses, handleEmbeddings) were missing the registry argument, causing chaos metrics to not be recorded for those endpoints. --- src/bedrock.ts | 20 ++++++++++++++------ src/embeddings.ts | 20 ++++++++++++++------ src/gemini.ts | 20 ++++++++++++++------ src/messages.ts | 20 ++++++++++++++------ src/responses.ts | 20 ++++++++++++++------ 5 files changed, 70 insertions(+), 30 deletions(-) diff --git a/src/bedrock.ts b/src/bedrock.ts index 6f3484d..19b1e74 100644 --- a/src/bedrock.ts +++ b/src/bedrock.ts @@ -309,12 +309,20 @@ export async function handleBedrock( } if ( - applyChaos(res, fixture, defaults.chaos, req.headers, journal, { - method: req.method ?? "POST", - path: urlPath, - headers: flattenHeaders(req.headers), - body: completionReq, - }) + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { + method: req.method ?? "POST", + path: urlPath, + headers: flattenHeaders(req.headers), + body: completionReq, + }, + defaults.registry, + ) ) return; diff --git a/src/embeddings.ts b/src/embeddings.ts index 3253fe8..b8f68ca 100644 --- a/src/embeddings.ts +++ b/src/embeddings.ts @@ -93,12 +93,20 @@ export async function handleEmbeddings( } if ( - applyChaos(res, fixture, defaults.chaos, req.headers, journal, { - method: req.method ?? "POST", - path: req.url ?? "/v1/embeddings", - headers: flattenHeaders(req.headers), - body: syntheticReq, - }) + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { + method: req.method ?? "POST", + path: req.url ?? "/v1/embeddings", + headers: flattenHeaders(req.headers), + body: syntheticReq, + }, + defaults.registry, + ) ) return; diff --git a/src/gemini.ts b/src/gemini.ts index 5a357a6..8b5111a 100644 --- a/src/gemini.ts +++ b/src/gemini.ts @@ -422,12 +422,20 @@ export async function handleGemini( } if ( - applyChaos(res, fixture, defaults.chaos, req.headers, journal, { - method: req.method ?? "POST", - path, - headers: flattenHeaders(req.headers), - body: completionReq, - }) + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { + method: req.method ?? "POST", + path, + headers: flattenHeaders(req.headers), + body: completionReq, + }, + defaults.registry, + ) ) return; diff --git a/src/messages.ts b/src/messages.ts index 75e04f8..cc609fb 100644 --- a/src/messages.ts +++ b/src/messages.ts @@ -471,12 +471,20 @@ export async function handleMessages( } if ( - applyChaos(res, fixture, defaults.chaos, req.headers, journal, { - method: req.method ?? "POST", - path: req.url ?? "/v1/messages", - headers: flattenHeaders(req.headers), - body: completionReq, - }) + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { + method: req.method ?? "POST", + path: req.url ?? "/v1/messages", + headers: flattenHeaders(req.headers), + body: completionReq, + }, + defaults.registry, + ) ) return; diff --git a/src/responses.ts b/src/responses.ts index fb96cf3..aeaad68 100644 --- a/src/responses.ts +++ b/src/responses.ts @@ -534,12 +534,20 @@ export async function handleResponses( } if ( - applyChaos(res, fixture, defaults.chaos, req.headers, journal, { - method: req.method ?? "POST", - path: req.url ?? "/v1/responses", - headers: flattenHeaders(req.headers), - body: completionReq, - }) + applyChaos( + res, + fixture, + defaults.chaos, + req.headers, + journal, + { + method: req.method ?? "POST", + path: req.url ?? "/v1/responses", + headers: flattenHeaders(req.headers), + body: completionReq, + }, + defaults.registry, + ) ) return; From 2983185f45578688f915be8561fb0db53b1a1c01 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Sat, 21 Mar 2026 00:00:05 -0700 Subject: [PATCH 10/13] docs: fix strict mode status code, proxy log level, provider list --- README.md | 2 +- skills/write-fixtures/SKILL.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b14985a..71b7ae3 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ MSW can't intercept any of those calls. llmock can — it's a real server on a r **Use llmock when:** - Multiple processes need to hit the same mock (E2E tests, agent frameworks, microservices) -- You want multi-provider SSE format out of the box (OpenAI, Claude, Gemini) +- You want multi-provider SSE format out of the box (OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere) - You prefer defining fixtures as JSON files rather than code - You need a standalone CLI server diff --git a/skills/write-fixtures/SKILL.md b/skills/write-fixtures/SKILL.md index 6c2e102..46c4f00 100644 --- a/skills/write-fixtures/SKILL.md +++ b/skills/write-fixtures/SKILL.md @@ -431,7 +431,7 @@ llmock --strict -f ./fixtures ``` - `--record` enables proxy-on-miss. Requires at least one `--provider-*` flag. -- `--strict` returns a 404 error for unmatched requests instead of proxying, even if `--record` is set. Use this in CI to ensure all requests hit fixtures. +- `--strict` returns a 503 error for unmatched requests instead of proxying, even if `--record` is set. Use this in CI to ensure all requests hit fixtures. - Provider flags: `--provider-openai`, `--provider-anthropic`, `--provider-gemini`, `--provider-vertexai`, `--provider-bedrock`, `--provider-azure`, `--provider-ollama`, `--provider-cohere`. ### How it works @@ -441,7 +441,7 @@ llmock --strict -f ./fixtures 3. **Auth headers are forwarded but NOT saved** — `Authorization`, `x-api-key`, and `api-key` headers are passed through to the upstream provider, but stripped from the recorded fixture. 4. **Responses are saved as standard fixtures** — recorded files land in `{fixturePath}/recorded/` and use the same JSON format as hand-written fixtures. Nothing special about them. 5. **Streaming responses are collapsed** — SSE streams are collapsed into a single text or tool-call response for the fixture. The original streaming format is preserved in the live proxy response. -6. **Loud logging** — every proxy hit logs at `info` level so you can see exactly which requests are being forwarded. +6. **Loud logging** — every proxy hit logs at `warn` level so you can see exactly which requests are being forwarded. ### Programmatic API From 61876fbfe9c52674f5287874dac312f03ba488d0 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Sat, 21 Mar 2026 00:01:25 -0700 Subject: [PATCH 11/13] fix: handle Bedrock Converse response format in buildFixtureResponse The recorder's buildFixtureResponse had no handler for the Converse format ({ output: { message: { content: [...] } } }), causing recorded fixtures to silently be saved as error responses. Add handler for both text and toolUse content blocks. --- src/recorder.ts | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/recorder.ts b/src/recorder.ts index 3c34223..044ded2 100644 --- a/src/recorder.ts +++ b/src/recorder.ts @@ -331,6 +331,30 @@ function buildFixtureResponse(parsed: unknown, status: number): FixtureResponse } } + // Bedrock Converse: { output: { message: { role, content: [{ text }, { toolUse }] } } } + if (obj.output && typeof obj.output === "object") { + const output = obj.output as Record; + const msg = output.message as Record | undefined; + if (msg && Array.isArray(msg.content)) { + const blocks = msg.content as Array>; + const toolUseBlocks = blocks.filter((b) => b.toolUse); + if (toolUseBlocks.length > 0) { + const toolCalls: ToolCall[] = toolUseBlocks.map((b) => { + const tu = b.toolUse as Record; + return { + name: String(tu.name ?? ""), + arguments: typeof tu.input === "string" ? tu.input : JSON.stringify(tu.input), + }; + }); + return { toolCalls }; + } + const textBlock = blocks.find((b) => typeof b.text === "string"); + if (textBlock && typeof textBlock.text === "string") { + return { content: textBlock.text }; + } + } + } + // Ollama: { message: { content: "...", tool_calls: [...] } } if (obj.message && typeof obj.message === "object") { const msg = obj.message as Record; From 219df288f743b068ce43dbff82be37d38aa57ab8 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Sat, 21 Mar 2026 00:15:32 -0700 Subject: [PATCH 12/13] fix: replace global recordCounter with UUID, pass raw body to proxy - Replace module-level mutable recordCounter with crypto.randomUUID() to avoid non-deterministic filenames in concurrent test scenarios - Pass original request body string to proxyAndRecord in the OpenAI completions path, preserving formatting fidelity to upstream --- src/recorder.ts | 5 ++--- src/server.ts | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/recorder.ts b/src/recorder.ts index 044ded2..02e5532 100644 --- a/src/recorder.ts +++ b/src/recorder.ts @@ -2,6 +2,7 @@ import * as http from "node:http"; import * as https from "node:https"; import * as fs from "node:fs"; import * as path from "node:path"; +import * as crypto from "node:crypto"; import type { ChatCompletionRequest, Fixture, @@ -13,8 +14,6 @@ import { getLastMessageByRole, getTextContent } from "./router.js"; import type { Logger } from "./logger.js"; import { collapseStreamingResponse } from "./stream-collapse.js"; -let recordCounter = 0; - /** * Proxy an unmatched request to the real upstream provider, record the * response as a fixture on disk and in memory, then relay the response @@ -141,7 +140,7 @@ export async function proxyAndRecord( } const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); - const filename = `${providerKey}-${timestamp}-${recordCounter++}.json`; + const filename = `${providerKey}-${timestamp}-${crypto.randomUUID().slice(0, 8)}.json`; const filepath = path.join(fixturePath, filename); try { diff --git a/src/server.ts b/src/server.ts index a5df546..3bd07b8 100644 --- a/src/server.ts +++ b/src/server.ts @@ -216,6 +216,7 @@ async function handleCompletions( req.url ?? COMPLETIONS_PATH, fixtures, defaults, + raw, ); if (proxied) { journal.add({ From e3499bcc4a78db89ace5bcc2fae4b2195c1922ca Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Sat, 21 Mar 2026 00:15:39 -0700 Subject: [PATCH 13/13] test: add regression tests for all recent bug fixes - Recorder: proxy preserves original request body formatting - Recorder: Ollama empty content + tool_calls priority in buildFixtureResponse - Recorder: UUID-based filename format - Chaos: rate clamping (>1 clamps to 1, negative clamps to 0) - Metrics: chaos counter incremented on Anthropic endpoint (was broken) --- src/__tests__/chaos.test.ts | 28 ++++++++ src/__tests__/metrics.test.ts | 23 +++++++ src/__tests__/recorder.test.ts | 121 ++++++++++++++++++++++++++++++++- 3 files changed, 170 insertions(+), 2 deletions(-) diff --git a/src/__tests__/chaos.test.ts b/src/__tests__/chaos.test.ts index 6eec85f..26902d2 100644 --- a/src/__tests__/chaos.test.ts +++ b/src/__tests__/chaos.test.ts @@ -126,6 +126,34 @@ describe("evaluateChaos", () => { const result = evaluateChaos(null, undefined, headers); expect(result).toBe("drop"); }); + + it("clamps rate > 1 to 1.0 (always triggers)", () => { + // dropRate 5.0 should be clamped to 1.0, so it always triggers + const fixture: Fixture = { + match: { userMessage: "hello" }, + response: { content: "hi" }, + chaos: { dropRate: 5.0 }, + }; + // Run 20 times — every single one must return "drop" + for (let i = 0; i < 20; i++) { + const result = evaluateChaos(fixture, undefined, undefined); + expect(result).toBe("drop"); + } + }); + + it("clamps negative rate to 0 (never triggers)", () => { + // dropRate -1.0 should be clamped to 0, so it never triggers + const fixture: Fixture = { + match: { userMessage: "hello" }, + response: { content: "hi" }, + chaos: { dropRate: -1.0 }, + }; + // Run 50 times — none should trigger + for (let i = 0; i < 50; i++) { + const result = evaluateChaos(fixture, undefined, undefined); + expect(result).toBeNull(); + } + }); }); // --------------------------------------------------------------------------- diff --git a/src/__tests__/metrics.test.ts b/src/__tests__/metrics.test.ts index 0d1948c..f9d1436 100644 --- a/src/__tests__/metrics.test.ts +++ b/src/__tests__/metrics.test.ts @@ -567,6 +567,29 @@ describe("integration: /metrics endpoint", () => { expect(res.body).toMatch(/llmock_chaos_triggered_total\{[^}]*action="drop"[^}]*\} 1/); }); + it("increments chaos counter on Anthropic /v1/messages endpoint", async () => { + const fixtures: Fixture[] = [ + { + match: { userMessage: "hello" }, + response: { content: "hi from claude" }, + }, + ]; + instance = await createServer(fixtures, { + metrics: true, + chaos: { dropRate: 1.0 }, + }); + + await httpPost(`${instance.url}/v1/messages`, { + model: "claude-3-5-sonnet-20241022", + max_tokens: 1024, + messages: [{ role: "user", content: "hello" }], + }); + + const res = await httpGet(`${instance.url}/metrics`); + expect(res.body).toContain("llmock_chaos_triggered_total"); + expect(res.body).toMatch(/llmock_chaos_triggered_total\{[^}]*action="drop"[^}]*\} 1/); + }); + it("tracks fixtures loaded gauge", async () => { const fixtures: Fixture[] = [ { match: { userMessage: "a" }, response: { content: "1" } }, diff --git a/src/__tests__/recorder.test.ts b/src/__tests__/recorder.test.ts index 48652cb..5c4ddd4 100644 --- a/src/__tests__/recorder.test.ts +++ b/src/__tests__/recorder.test.ts @@ -1311,11 +1311,60 @@ describe("recorder edge cases", () => { const fixtureFiles = files.filter((f) => f.endsWith(".json")); expect(fixtureFiles).toHaveLength(1); - // Pattern: openai-YYYY-MM-DDTHH-MM-SS-mmmZ-{counter}.json (colons and dots replaced with dashes) - const pattern = /^openai-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-\d{3}Z-\d+\.json$/; + // Pattern: openai-YYYY-MM-DDTHH-MM-SS-mmmZ-{uuid8}.json (colons and dots replaced with dashes) + const pattern = /^openai-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-\d{3}Z-[a-f0-9]{8}\.json$/; expect(fixtureFiles[0]).toMatch(pattern); }); + it("proxies the original request body to upstream (preserves formatting)", async () => { + // The proxy should forward the exact bytes the client sent, not re-serialized JSON. + // This matters because JSON key ordering and whitespace may differ after parse/serialize. + let receivedBody = ""; + const upstreamServer = http.createServer((req, res) => { + const chunks: Buffer[] = []; + req.on("data", (c: Buffer) => chunks.push(c)); + req.on("end", () => { + receivedBody = Buffer.concat(chunks).toString(); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + id: "chatcmpl-proxy-body", + object: "chat.completion", + created: 0, + model: "gpt-4", + choices: [ + { index: 0, message: { role: "assistant", content: "ok" }, finish_reason: "stop" }, + ], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + }), + ); + }); + }); + await new Promise((resolve) => upstreamServer.listen(0, "127.0.0.1", resolve)); + const upAddr = upstreamServer.address() as { port: number }; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { openai: `http://127.0.0.1:${upAddr.port}` }, fixturePath: tmpDir }, + }); + + // Send body with specific formatting (extra spaces, key order) + const customBody = + '{"model": "gpt-4", "messages": [{"role": "user", "content": "preserve me"}]}'; + const resp = await fetch(`${recorder.url}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: customBody, + }); + expect(resp.status).toBe(200); + + // The upstream should have received the original body, not re-serialized + expect(receivedBody).toBe(customBody); + + await new Promise((resolve) => upstreamServer.close(() => resolve())); + }); + it("upstream returns empty response body — handled gracefully", async () => { // Create a raw HTTP server that returns 200 with empty body const emptyServer = http.createServer((_req, res) => { @@ -1347,6 +1396,74 @@ describe("recorder edge cases", () => { await new Promise((resolve) => emptyServer.close(() => resolve())); }); + + it("Ollama empty content + tool_calls: records toolCalls, not content", async () => { + // Raw upstream returns Ollama-style response: empty content + tool_calls + const ollamaRaw = http.createServer((_req, res) => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + model: "llama3", + message: { + role: "assistant", + content: "", + tool_calls: [ + { + function: { + name: "get_weather", + arguments: { city: "NYC" }, + }, + }, + ], + }, + done: true, + }), + ); + }); + await new Promise((resolve) => ollamaRaw.listen(0, "127.0.0.1", resolve)); + const ollamaAddr = ollamaRaw.address() as { port: number }; + const ollamaUrl = `http://127.0.0.1:${ollamaAddr.port}`; + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-")); + recorder = await createServer([], { + port: 0, + record: { providers: { ollama: ollamaUrl }, fixturePath: tmpDir }, + }); + + const resp = await post(`${recorder.url}/api/chat`, { + model: "llama3", + messages: [{ role: "user", content: "what is the weather in NYC" }], + stream: false, + }); + + expect(resp.status).toBe(200); + + const files = fs.readdirSync(tmpDir); + const fixtureFiles = files.filter((f) => f.endsWith(".json")); + expect(fixtureFiles).toHaveLength(1); + + const fixtureContent = JSON.parse( + fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"), + ) as { + fixtures: Array<{ + response: { + content?: string; + toolCalls?: Array<{ name: string; arguments: string }>; + }; + }>; + }; + + // Should record toolCalls, NOT content: "" + expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined(); + expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1); + expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("get_weather"); + expect(JSON.parse(fixtureContent.fixtures[0].response.toolCalls![0].arguments)).toEqual({ + city: "NYC", + }); + expect(fixtureContent.fixtures[0].response.content).toBeUndefined(); + + await new Promise((resolve) => ollamaRaw.close(() => resolve())); + }); }); // ---------------------------------------------------------------------------