From 9184aa6de8966cc6dc022e3aff8f0a597973bdbf Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jordan@copilotkit.ai>
Date: Fri, 20 Mar 2026 15:56:01 -0700
Subject: [PATCH 01/13] =?UTF-8?q?chore:=20housekeeping=20=E2=80=94=20remov?=
 =?UTF-8?q?e=20spec=20files,=20add=20engines=20field,=20fix=20Dockerfile?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../specs/2026-03-15-trust-section-design.md  | 93 -------------------
 package.json                                  |  3 +
 2 files changed, 3 insertions(+), 93 deletions(-)
 delete mode 100644 docs/superpowers/specs/2026-03-15-trust-section-design.md

diff --git a/docs/superpowers/specs/2026-03-15-trust-section-design.md b/docs/superpowers/specs/2026-03-15-trust-section-design.md
deleted file mode 100644
index 5282d63..0000000
--- a/docs/superpowers/specs/2026-03-15-trust-section-design.md
+++ /dev/null
@@ -1,93 +0,0 @@
-# Design: "Reliability" Trust Section for llmock Docs Site
-
-## Summary
-
-Add a new section to the llmock docs site (`docs/index.html`) between "Fixture-driven. Zero boilerplate." (code examples) and "llmock vs MSW" (comparison table). The section explains why users can trust that llmock's response shapes match real provider APIs, and how three-way drift detection keeps it that way.
-
-## Placement
-
-```
-Features ("Stop paying for flaky tests")
-Code Examples ("Fixture-driven. Zero boilerplate.")
-→ NEW: Reliability ("Verified against real APIs. Every day.")
-Comparison ("llmock vs MSW")
-Claude Code Integration
-Real-World Usage
-Footer
-```
-
-## Section Structure
-
-### Header
-
-- **Section label**: `RELIABILITY`
-- **Headline**: "Verified against real APIs. Every day."
-- **Description paragraph**: "A mock that doesn't match reality is worse than no mock — your tests pass, but production breaks. llmock runs three-way drift detection that compares SDK types, real API responses, and mock output to catch shape mismatches before you do."
-
-### Triangle Diagram
-
-SVG-based diagram showing three nodes arranged in a triangle:
-
-- **Top center**: "SDK Types" (blue border, `{ }` icon) — "What TypeScript types say the shape should be"
-- **Bottom left**: "Real API" (green border, `↔` icon) — "What OpenAI, Claude, Gemini actually return"
-- **Bottom right**: "llmock" (purple border, `⚙` icon) — "What the mock produces for the same request"
-
-Dashed connector lines between all three nodes with horizontal labels at each midpoint:
-
-- Left edge: "SDK = Real?"
-- Right edge: "SDK = Mock?"
-- Bottom edge: "Real = Mock?"
-
-### Diagnosis Cards (3-column grid)
-
-Three cards explaining the possible outcomes:
-
-1. **Red dot — "Mock doesn't match real"**: llmock needs updating — test fails immediately. The SDK comparison tells us why it drifted.
-2. **Amber dot — "Provider changed, SDK is behind"**: Early warning — the real API has new fields that neither the SDK nor llmock know about yet.
-3. **Green dot — "All three agree"**: No drift — the mock matches reality and the SDK types are current.
-
-Key principle: any mismatch between real API and mock is a failure, regardless of SDK state. The SDK layer diagnoses _why_ drift happened, it doesn't gate severity.
-
-### Drift Report Snippet
-
-Monospace terminal-style block showing `$ pnpm test:drift` output with three distinct examples:
-
-1. `[critical] LLMOCK DRIFT` — missing field (`choices[].message.refusal`: SDK has it, real has it, mock doesn't)
-2. `[critical] TYPE MISMATCH` — wrong type (`content[].input`: SDK says object, real says object, mock says string)
-3. `[warning] PROVIDER ADDED FIELD` — new field (`choices[].message.annotations`: only real API has it)
-
-Footer line: "2 critical (test fails) · 1 warning (logged) · detected before any user reported it"
-
-### CI Footer
-
-Badge showing "Daily CI" with green dot, text: "Drift tests across 4 providers run automatically every day."
-
-## Styling
-
-All styles must use the site's CSS custom properties (not hardcoded hex):
-
-- Background: `var(--bg-deep)` (page) / `var(--bg-card)` (cards)
-- Borders: `var(--border)`
-- Text: `var(--text-primary)` (headings) / `var(--text-secondary)` (body) / `var(--text-dim)` (labels)
-- Accent: `var(--accent)` (green)
-- Uses existing `.section-label`, `.section-title`, `.section-desc` CSS classes
-- Section uses `class="reveal"` for scroll-triggered animation
-- Triangle diagram uses inline SVG for connector lines
-
-## CI Cadence Change
-
-The drift CI workflow (`.github/workflows/test-drift.yml`) will be updated from weekly (Monday 6am UTC) to daily (6am UTC every day). The cron changes from `0 6 * * 1` to `0 6 * * *`.
-
-DRIFT.md and the site footer text will be updated to say "every day" instead of "every week."
-
-## Files to Modify
-
-| File                               | Change                                                                                                                |
-| ---------------------------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `docs/index.html`                  | Insert new section between code examples and comparison. New CSS for triangle diagram, diagnosis cards, drift report. |
-| `.github/workflows/test-drift.yml` | Change cron from `0 6 * * 1` to `0 6 * * *`                                                                           |
-| `DRIFT.md`                         | Update schedule references from weekly to daily; update cost estimate in Cost section for daily cadence               |
-
-## Validated Mockup
-
-The approved design is in `.superpowers/brainstorm/84286-1773621431/trust-section-v4.html`.
diff --git a/package.json b/package.json
index 9464fd6..8533538 100644
--- a/package.json
+++ b/package.json
@@ -4,6 +4,9 @@
   "description": "Deterministic mock LLM server for testing (OpenAI, Anthropic, Gemini)",
   "license": "MIT",
   "packageManager": "pnpm@10.28.2",
+  "engines": {
+    "node": ">=20.15.0"
+  },
   "type": "module",
   "exports": {
     ".": {

From cf3978d3996fc60193495d2c745dac124ea4cfc9 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jordan@copilotkit.ai>
Date: Fri, 20 Mar 2026 15:56:18 -0700
Subject: [PATCH 02/13] =?UTF-8?q?feat:=20v1.6.0=20=E2=80=94=20endpoints,?=
 =?UTF-8?q?=20chaos,=20metrics,=20record-and-replay?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New provider endpoints: Ollama, Cohere, Vertex AI, Bedrock Converse,
Bedrock streaming (invoke-with-response-stream).

New features: Prometheus metrics (/metrics), record-and-replay proxy,
strict mode (503 on no-match), stream collapse, AWS EventStream binary
framing, NDJSON writer, auth header redaction.

Wire up missing imports (proxyAndRecord, createMetricsRegistry,
normalizePathLabel), add RecordConfig type, expand MockServerOptions
and ServerInstance.defaults with metrics/strict/record fields, add
optional registry param to applyChaos for chaos counter tracking,
add enableRecording/disableRecording to LLMock class, remove unused
RecordConfig imports, deduplicate ChaosConfig import in CLI.
---
 src/aws-event-stream.ts | 156 +++++++++
 src/bedrock-converse.ts | 648 ++++++++++++++++++++++++++++++++++
 src/bedrock.ts          | 379 +++++++++++++++++++-
 src/chaos.ts            |   6 +
 src/cli.ts              |   2 +-
 src/cohere.ts           | 654 ++++++++++++++++++++++++++++++++++
 src/embeddings.ts       |  50 +++
 src/gemini.ts           |  41 ++-
 src/helpers.ts          |   8 +-
 src/index.ts            |  30 +-
 src/llmock.ts           |  15 +
 src/messages.ts         |  38 +-
 src/metrics.ts          | 256 ++++++++++++++
 src/ndjson-writer.ts    |  53 +++
 src/ollama.ts           | 754 ++++++++++++++++++++++++++++++++++++++++
 src/recorder.ts         | 380 ++++++++++++++++++++
 src/responses.ts        |  38 +-
 src/server.ts           | 447 +++++++++++++++++++++---
 src/stream-collapse.ts  | 586 +++++++++++++++++++++++++++++++
 src/types.ts            |  11 +
 src/ws-gemini-live.ts   |   9 +-
 src/ws-realtime.ts      |  11 +-
 src/ws-responses.ts     |   9 +-
 23 files changed, 4511 insertions(+), 70 deletions(-)
 create mode 100644 src/aws-event-stream.ts
 create mode 100644 src/bedrock-converse.ts
 create mode 100644 src/cohere.ts
 create mode 100644 src/metrics.ts
 create mode 100644 src/ndjson-writer.ts
 create mode 100644 src/ollama.ts
 create mode 100644 src/recorder.ts
 create mode 100644 src/stream-collapse.ts

diff --git a/src/aws-event-stream.ts b/src/aws-event-stream.ts
new file mode 100644
index 0000000..1021d80
--- /dev/null
+++ b/src/aws-event-stream.ts
@@ -0,0 +1,156 @@
+/**
+ * AWS Event Stream binary frame encoder.
+ *
+ * Implements the AWS binary event stream framing protocol used by Bedrock's
+ * streaming (invoke-with-response-stream) endpoint. Each frame carries a set of
+ * string headers and a raw-bytes payload, wrapped in a prelude with CRC32
+ * checksums for integrity.
+ *
+ * Binary frame layout:
+ *   [total_length: 4B uint32-BE]
+ *   [headers_length: 4B uint32-BE]
+ *   [prelude_crc32: 4B CRC32 of first 8 bytes]
+ *   [headers: variable]
+ *   [payload: variable, raw JSON bytes]
+ *   [message_crc32: 4B CRC32 of entire frame minus last 4 bytes]
+ */
+
+import { crc32 } from "node:zlib";
+import type * as http from "node:http";
+import type { StreamingProfile } from "./types.js";
+import { delay, calculateDelay } from "./sse-writer.js";
+
+// ─── Header encoding ────────────────────────────────────────────────────────
+
+function encodeHeaders(headers: Record<string, string>): Buffer {
+  const parts: Buffer[] = [];
+  for (const [name, value] of Object.entries(headers)) {
+    const nameBytes = Buffer.from(name, "utf8");
+    const valueBytes = Buffer.from(value, "utf8");
+
+    // name_length (1 byte) + name + type (1 byte, 7 = STRING) +
+    // value_length (2 bytes BE) + value
+    const header = Buffer.alloc(1 + nameBytes.length + 1 + 2 + valueBytes.length);
+    let offset = 0;
+    header.writeUInt8(nameBytes.length, offset);
+    offset += 1;
+    nameBytes.copy(header, offset);
+    offset += nameBytes.length;
+    header.writeUInt8(7, offset); // STRING type
+    offset += 1;
+    header.writeUInt16BE(valueBytes.length, offset);
+    offset += 2;
+    valueBytes.copy(header, offset);
+
+    parts.push(header);
+  }
+  return Buffer.concat(parts);
+}
+
+// ─── Frame encoding ─────────────────────────────────────────────────────────
+
+/**
+ * Encode a single AWS Event Stream binary frame with the given headers and
+ * payload buffer.
+ */
+export function encodeEventStreamFrame(headers: Record<string, string>, payload: Buffer): Buffer {
+  const headersBuffer = encodeHeaders(headers);
+  const headersLength = headersBuffer.length;
+
+  // prelude (8) + prelude_crc (4) + headers + payload + message_crc (4)
+  const totalLength = 4 + 4 + 4 + headersLength + payload.length + 4;
+
+  const frame = Buffer.alloc(totalLength);
+  let offset = 0;
+
+  // Prelude
+  frame.writeUInt32BE(totalLength, offset);
+  offset += 4;
+  frame.writeUInt32BE(headersLength, offset);
+  offset += 4;
+
+  // Prelude CRC32 (covers first 8 bytes)
+  const preludeCrc = crc32(frame.subarray(0, 8));
+  frame.writeUInt32BE(preludeCrc >>> 0, offset);
+  offset += 4;
+
+  // Headers
+  headersBuffer.copy(frame, offset);
+  offset += headersLength;
+
+  // Payload
+  payload.copy(frame, offset);
+  offset += payload.length;
+
+  // Message CRC32 (covers entire frame minus last 4 bytes)
+  const messageCrc = crc32(frame.subarray(0, totalLength - 4));
+  frame.writeUInt32BE(messageCrc >>> 0, offset);
+
+  return frame;
+}
+
+// ─── Convenience wrappers ───────────────────────────────────────────────────
+
+/**
+ * Encode an event-stream message with standard AWS headers for a JSON event.
+ *
+ * Sets `:content-type` = `application/json`, `:event-type` = eventType,
+ * `:message-type` = `event`.
+ */
+export function encodeEventStreamMessage(eventType: string, jsonPayload: object): Buffer {
+  const headers: Record<string, string> = {
+    ":content-type": "application/json",
+    ":event-type": eventType,
+    ":message-type": "event",
+  };
+  const payload = Buffer.from(JSON.stringify(jsonPayload), "utf8");
+  return encodeEventStreamFrame(headers, payload);
+}
+
+/**
+ * Write a sequence of event-stream frames to an HTTP response with optional
+ * timing control. Mirrors the writeSSEStream pattern from sse-writer.ts.
+ *
+ * Returns `true` when all events are written, or `false` if interrupted.
+ */
+export async function writeEventStream(
+  res: http.ServerResponse,
+  events: Array<{ eventType: string; payload: object }>,
+  options?: {
+    latency?: number;
+    streamingProfile?: StreamingProfile;
+    signal?: AbortSignal;
+    onChunkSent?: () => void;
+  },
+): Promise<boolean> {
+  const opts = options ?? {};
+  const latency = opts.latency ?? 0;
+  const profile = opts.streamingProfile;
+  const signal = opts.signal;
+  const onChunkSent = opts.onChunkSent;
+
+  if (res.writableEnded) return true;
+  res.setHeader("Content-Type", "application/vnd.amazon.eventstream");
+  res.setHeader("Transfer-Encoding", "chunked");
+
+  let chunkIndex = 0;
+  for (const event of events) {
+    const chunkDelay = calculateDelay(chunkIndex, profile, latency);
+    if (chunkDelay > 0) {
+      await delay(chunkDelay, signal);
+    }
+    if (signal?.aborted) return false;
+    if (res.writableEnded) return true;
+
+    const frame = encodeEventStreamMessage(event.eventType, event.payload);
+    res.write(frame);
+    onChunkSent?.();
+    if (signal?.aborted) return false;
+    chunkIndex++;
+  }
+
+  if (!res.writableEnded) {
+    res.end();
+  }
+  return true;
+}
diff --git a/src/bedrock-converse.ts b/src/bedrock-converse.ts
new file mode 100644
index 0000000..0880549
--- /dev/null
+++ b/src/bedrock-converse.ts
@@ -0,0 +1,648 @@
+/**
+ * AWS Bedrock Converse API support.
+ *
+ * Translates incoming Converse and Converse-stream requests (Bedrock Converse
+ * format) into the ChatCompletionRequest format used by the fixture router,
+ * and converts fixture responses back into Converse API format — either a
+ * single JSON response or an Event Stream binary stream.
+ */
+
+import type * as http from "node:http";
+import type {
+  ChaosConfig,
+  ChatCompletionRequest,
+  ChatMessage,
+  Fixture,
+  RecordConfig,
+  ToolCall,
+  ToolDefinition,
+} from "./types.js";
+import {
+  generateToolUseId,
+  isTextResponse,
+  isToolCallResponse,
+  isErrorResponse,
+  flattenHeaders,
+} from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import { writeEventStream } from "./aws-event-stream.js";
+import { createInterruptionSignal } from "./interruption.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+import { applyChaos } from "./chaos.js";
+import type { MetricsRegistry } from "./metrics.js";
+import { proxyAndRecord } from "./recorder.js";
+import { buildBedrockStreamTextEvents, buildBedrockStreamToolCallEvents } from "./bedrock.js";
+
+// ─── Converse request types ─────────────────────────────────────────────────
+
+interface ConverseContentBlock {
+  text?: string;
+  toolUse?: { toolUseId: string; name: string; input: object };
+  toolResult?: { toolUseId: string; content: { text?: string }[] };
+}
+
+interface ConverseMessage {
+  role: "user" | "assistant";
+  content: ConverseContentBlock[];
+}
+
+interface ConverseToolSpec {
+  name: string;
+  description?: string;
+  inputSchema?: object;
+}
+
+interface ConverseRequest {
+  messages: ConverseMessage[];
+  system?: { text: string }[];
+  inferenceConfig?: { maxTokens?: number; temperature?: number };
+  toolConfig?: { tools: { toolSpec: ConverseToolSpec }[] };
+}
+
+// ─── Input conversion: Converse → ChatCompletionRequest ─────────────────────
+
+export function converseToCompletionRequest(
+  req: ConverseRequest,
+  modelId: string,
+): ChatCompletionRequest {
+  const messages: ChatMessage[] = [];
+
+  // system field → system message
+  if (req.system && req.system.length > 0) {
+    const systemText = req.system.map((s) => s.text).join("");
+    if (systemText) {
+      messages.push({ role: "system", content: systemText });
+    }
+  }
+
+  for (const msg of req.messages) {
+    if (msg.role === "user") {
+      // Check for toolResult blocks
+      const toolResults = msg.content.filter((b) => b.toolResult);
+      const textBlocks = msg.content.filter((b) => b.text !== undefined && !b.toolResult);
+
+      if (toolResults.length > 0) {
+        for (const block of toolResults) {
+          const tr = block.toolResult!;
+          const resultContent = tr.content.map((c) => c.text ?? "").join("");
+          messages.push({
+            role: "tool",
+            content: resultContent,
+            tool_call_id: tr.toolUseId,
+          });
+        }
+        if (textBlocks.length > 0) {
+          messages.push({
+            role: "user",
+            content: textBlocks.map((b) => b.text ?? "").join(""),
+          });
+        }
+        continue;
+      }
+
+      // Plain user message
+      const text = msg.content
+        .filter((b) => b.text !== undefined)
+        .map((b) => b.text ?? "")
+        .join("");
+      messages.push({ role: "user", content: text });
+    } else if (msg.role === "assistant") {
+      const toolUseBlocks = msg.content.filter((b) => b.toolUse);
+      const textContent = msg.content
+        .filter((b) => b.text !== undefined)
+        .map((b) => b.text ?? "")
+        .join("");
+
+      if (toolUseBlocks.length > 0) {
+        messages.push({
+          role: "assistant",
+          content: textContent || null,
+          tool_calls: toolUseBlocks.map((b) => ({
+            id: b.toolUse!.toolUseId,
+            type: "function" as const,
+            function: {
+              name: b.toolUse!.name,
+              arguments: JSON.stringify(b.toolUse!.input),
+            },
+          })),
+        });
+      } else {
+        messages.push({ role: "assistant", content: textContent || null });
+      }
+    }
+  }
+
+  // Convert tools
+  let tools: ToolDefinition[] | undefined;
+  if (req.toolConfig?.tools && req.toolConfig.tools.length > 0) {
+    tools = req.toolConfig.tools.map((t) => ({
+      type: "function" as const,
+      function: {
+        name: t.toolSpec.name,
+        description: t.toolSpec.description,
+        parameters: t.toolSpec.inputSchema,
+      },
+    }));
+  }
+
+  return {
+    model: modelId,
+    messages,
+    stream: false,
+    temperature: req.inferenceConfig?.temperature,
+    tools,
+  };
+}
+
+// ─── Response builders ──────────────────────────────────────────────────────
+
+function buildConverseTextResponse(content: string): object {
+  return {
+    output: {
+      message: {
+        role: "assistant",
+        content: [{ text: content }],
+      },
+    },
+    stopReason: "end_turn",
+    usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
+  };
+}
+
+function buildConverseToolCallResponse(toolCalls: ToolCall[], logger: Logger): object {
+  return {
+    output: {
+      message: {
+        role: "assistant",
+        content: toolCalls.map((tc) => {
+          let argsObj: unknown;
+          try {
+            argsObj = JSON.parse(tc.arguments || "{}");
+          } catch {
+            logger.warn(
+              `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+            );
+            argsObj = {};
+          }
+          return {
+            toolUse: {
+              toolUseId: tc.id || generateToolUseId(),
+              name: tc.name,
+              input: argsObj,
+            },
+          };
+        }),
+      },
+    },
+    stopReason: "tool_use",
+    usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 },
+  };
+}
+
+// ─── Request handlers ───────────────────────────────────────────────────────
+
+export async function handleConverse(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  modelId: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    logger: Logger;
+    chaos?: ChaosConfig;
+    registry?: MetricsRegistry;
+    record?: RecordConfig;
+    strict?: boolean;
+  },
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  const urlPath = req.url ?? `/model/${modelId}/converse`;
+
+  let converseReq: ConverseRequest;
+  try {
+    converseReq = JSON.parse(raw) as ConverseRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: {} as ChatCompletionRequest,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  if (!converseReq.messages || !Array.isArray(converseReq.messages)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: {} as ChatCompletionRequest,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Invalid request: messages array is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const completionReq = converseToCompletionRequest(converseReq, modelId);
+
+  const fixture = matchFixture(fixtures, completionReq, journal.fixtureMatchCounts);
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: urlPath,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "bedrock",
+        urlPath,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: urlPath,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`);
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  // Text response
+  if (isTextResponse(response)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const body = buildConverseTextResponse(response.content);
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify(body));
+    return;
+  }
+
+  // Tool call response
+  if (isToolCallResponse(response)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const body = buildConverseToolCallResponse(response.toolCalls, logger);
+    res.writeHead(200, { "Content-Type": "application/json" });
+    res.end(JSON.stringify(body));
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: req.method ?? "POST",
+    path: urlPath,
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
+
+export async function handleConverseStream(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  modelId: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    logger: Logger;
+    chaos?: ChaosConfig;
+    registry?: MetricsRegistry;
+    record?: RecordConfig;
+    strict?: boolean;
+  },
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  const urlPath = req.url ?? `/model/${modelId}/converse-stream`;
+
+  let converseReq: ConverseRequest;
+  try {
+    converseReq = JSON.parse(raw) as ConverseRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: {} as ChatCompletionRequest,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  if (!converseReq.messages || !Array.isArray(converseReq.messages)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: {} as ChatCompletionRequest,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Invalid request: messages array is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const completionReq = converseToCompletionRequest(converseReq, modelId);
+
+  const fixture = matchFixture(fixtures, completionReq, journal.fixtureMatchCounts);
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: urlPath,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "bedrock",
+        urlPath,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: urlPath,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`);
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  // Text response — stream as Event Stream
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const events = buildBedrockStreamTextEvents(response.content, chunkSize);
+    const interruption = createInterruptionSignal(fixture);
+    const completed = await writeEventStream(res, events, {
+      latency,
+      streamingProfile: fixture.streamingProfile,
+      signal: interruption?.signal,
+      onChunkSent: interruption?.tick,
+    });
+    if (!completed) {
+      if (!res.writableEnded) res.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+    }
+    interruption?.cleanup();
+    return;
+  }
+
+  // Tool call response — stream as Event Stream
+  if (isToolCallResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const events = buildBedrockStreamToolCallEvents(response.toolCalls, chunkSize, logger);
+    const interruption = createInterruptionSignal(fixture);
+    const completed = await writeEventStream(res, events, {
+      latency,
+      streamingProfile: fixture.streamingProfile,
+      signal: interruption?.signal,
+      onChunkSent: interruption?.tick,
+    });
+    if (!completed) {
+      if (!res.writableEnded) res.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+    }
+    interruption?.cleanup();
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: req.method ?? "POST",
+    path: urlPath,
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
diff --git a/src/bedrock.ts b/src/bedrock.ts
index cee4bb7..3b8ffbf 100644
--- a/src/bedrock.ts
+++ b/src/bedrock.ts
@@ -13,6 +13,7 @@ import type {
   ChatCompletionRequest,
   ChatMessage,
   Fixture,
+  RecordConfig,
   ToolCall,
   ToolDefinition,
 } from "./types.js";
@@ -26,9 +27,12 @@ import {
 } from "./helpers.js";
 import { matchFixture } from "./router.js";
 import { writeErrorResponse } from "./sse-writer.js";
+import { writeEventStream } from "./aws-event-stream.js";
+import { createInterruptionSignal } from "./interruption.js";
 import type { Journal } from "./journal.js";
 import type { Logger } from "./logger.js";
 import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
 
 // ─── Bedrock Claude request types ────────────────────────────────────────────
 
@@ -313,19 +317,48 @@ export async function handleBedrock(
     return;
 
   if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "bedrock",
+        urlPath,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: urlPath,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`);
+    }
     journal.add({
       method: req.method ?? "POST",
       path: urlPath,
       headers: flattenHeaders(req.headers),
       body: completionReq,
-      response: { status: 404, fixture: null },
+      response: { status: strictStatus, fixture: null },
     });
     writeErrorResponse(
       res,
-      404,
+      strictStatus,
       JSON.stringify({
         error: {
-          message: "No fixture matched",
+          message: strictMessage,
           type: "invalid_request_error",
         },
       }),
@@ -406,3 +439,343 @@ export async function handleBedrock(
     }),
   );
 }
+
+// ─── Streaming event builders ───────────────────────────────────────────────
+
+export function buildBedrockStreamTextEvents(
+  content: string,
+  chunkSize: number,
+): Array<{ eventType: string; payload: object }> {
+  const events: Array<{ eventType: string; payload: object }> = [];
+
+  events.push({
+    eventType: "messageStart",
+    payload: { role: "assistant" },
+  });
+
+  events.push({
+    eventType: "contentBlockStart",
+    payload: { contentBlockIndex: 0, start: {} },
+  });
+
+  for (let i = 0; i < content.length; i += chunkSize) {
+    const slice = content.slice(i, i + chunkSize);
+    events.push({
+      eventType: "contentBlockDelta",
+      payload: {
+        contentBlockIndex: 0,
+        delta: { type: "text_delta", text: slice },
+      },
+    });
+  }
+
+  events.push({
+    eventType: "contentBlockStop",
+    payload: { contentBlockIndex: 0 },
+  });
+
+  events.push({
+    eventType: "messageStop",
+    payload: { stopReason: "end_turn" },
+  });
+
+  return events;
+}
+
+export function buildBedrockStreamToolCallEvents(
+  toolCalls: ToolCall[],
+  chunkSize: number,
+  logger: Logger,
+): Array<{ eventType: string; payload: object }> {
+  const events: Array<{ eventType: string; payload: object }> = [];
+
+  events.push({
+    eventType: "messageStart",
+    payload: { role: "assistant" },
+  });
+
+  for (let tcIdx = 0; tcIdx < toolCalls.length; tcIdx++) {
+    const tc = toolCalls[tcIdx];
+    const toolUseId = tc.id || generateToolUseId();
+
+    events.push({
+      eventType: "contentBlockStart",
+      payload: {
+        contentBlockIndex: tcIdx,
+        start: {
+          toolUse: { toolUseId, name: tc.name },
+        },
+      },
+    });
+
+    let argsStr: string;
+    try {
+      const parsed = JSON.parse(tc.arguments || "{}");
+      argsStr = JSON.stringify(parsed);
+    } catch {
+      logger.warn(
+        `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+      );
+      argsStr = "{}";
+    }
+
+    for (let i = 0; i < argsStr.length; i += chunkSize) {
+      const slice = argsStr.slice(i, i + chunkSize);
+      events.push({
+        eventType: "contentBlockDelta",
+        payload: {
+          contentBlockIndex: tcIdx,
+          delta: { type: "input_json_delta", inputJSON: slice },
+        },
+      });
+    }
+
+    events.push({
+      eventType: "contentBlockStop",
+      payload: { contentBlockIndex: tcIdx },
+    });
+  }
+
+  events.push({
+    eventType: "messageStop",
+    payload: { stopReason: "tool_use" },
+  });
+
+  return events;
+}
+
+// ─── Streaming request handler ──────────────────────────────────────────────
+
+export async function handleBedrockStream(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  modelId: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    logger: Logger;
+    chaos?: ChaosConfig;
+    registry?: MetricsRegistry;
+    record?: RecordConfig;
+    strict?: boolean;
+  },
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  const urlPath = req.url ?? `/model/${modelId}/invoke-with-response-stream`;
+
+  let bedrockReq: BedrockRequest;
+  try {
+    bedrockReq = JSON.parse(raw) as BedrockRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: {} as ChatCompletionRequest,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  if (!bedrockReq.messages || !Array.isArray(bedrockReq.messages)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: {} as ChatCompletionRequest,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Invalid request: messages array is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const completionReq = bedrockToCompletionRequest(bedrockReq, modelId);
+
+  const fixture = matchFixture(fixtures, completionReq, journal.fixtureMatchCounts);
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: urlPath,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "bedrock",
+        urlPath,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: urlPath,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`);
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  // Text response — stream as Event Stream
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const events = buildBedrockStreamTextEvents(response.content, chunkSize);
+    const interruption = createInterruptionSignal(fixture);
+    const completed = await writeEventStream(res, events, {
+      latency,
+      streamingProfile: fixture.streamingProfile,
+      signal: interruption?.signal,
+      onChunkSent: interruption?.tick,
+    });
+    if (!completed) {
+      if (!res.writableEnded) res.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+    }
+    interruption?.cleanup();
+    return;
+  }
+
+  // Tool call response — stream as Event Stream
+  if (isToolCallResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    const events = buildBedrockStreamToolCallEvents(response.toolCalls, chunkSize, logger);
+    const interruption = createInterruptionSignal(fixture);
+    const completed = await writeEventStream(res, events, {
+      latency,
+      streamingProfile: fixture.streamingProfile,
+      signal: interruption?.signal,
+      onChunkSent: interruption?.tick,
+    });
+    if (!completed) {
+      if (!res.writableEnded) res.destroy();
+      journalEntry.response.interrupted = true;
+      journalEntry.response.interruptReason = interruption?.reason();
+    }
+    interruption?.cleanup();
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: req.method ?? "POST",
+    path: urlPath,
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
diff --git a/src/chaos.ts b/src/chaos.ts
index 7cdcdd3..05e130f 100644
--- a/src/chaos.ts
+++ b/src/chaos.ts
@@ -11,6 +11,7 @@ import type * as http from "node:http";
 import type { ChaosConfig, ChatCompletionRequest, Fixture } from "./types.js";
 import { writeErrorResponse } from "./sse-writer.js";
 import type { Journal } from "./journal.js";
+import type { MetricsRegistry } from "./metrics.js";
 
 export type ChaosAction = "drop" | "malformed" | "disconnect";
 
@@ -106,10 +107,15 @@ export function applyChaos(
   rawHeaders: http.IncomingHttpHeaders,
   journal: Journal,
   context: ChaosJournalContext,
+  registry?: MetricsRegistry,
 ): boolean {
   const action = evaluateChaos(fixture, serverDefaults, rawHeaders);
   if (!action) return false;
 
+  if (registry) {
+    registry.incrementCounter("llmock_chaos_triggered_total", { action });
+  }
+
   switch (action) {
     case "drop": {
       journal.add({
diff --git a/src/cli.ts b/src/cli.ts
index d452b48..20b6e29 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -6,6 +6,7 @@ import { createServer } from "./server.js";
 import { loadFixtureFile, loadFixturesFromDir, validateFixtures } from "./fixture-loader.js";
 import { Logger, type LogLevel } from "./logger.js";
 import { watchFixtures } from "./watcher.js";
+import type { ChaosConfig } from "./types.js";
 
 const HELP = `
 Usage: llmock [options]
@@ -81,7 +82,6 @@ if (Number.isNaN(chunkSize) || chunkSize < 1) {
 const logger = new Logger(logLevel);
 
 // Parse chaos config from CLI flags
-import type { ChaosConfig } from "./types.js";
 let chaos: ChaosConfig | undefined;
 {
   const dropStr = values["chaos-drop"];
diff --git a/src/cohere.ts b/src/cohere.ts
new file mode 100644
index 0000000..ba5099f
--- /dev/null
+++ b/src/cohere.ts
@@ -0,0 +1,654 @@
+/**
+ * Cohere v2 Chat API endpoint support.
+ *
+ * Translates incoming /v2/chat requests into the ChatCompletionRequest
+ * format used by the fixture router, and converts fixture responses back into
+ * Cohere's typed SSE streaming (or non-streaming) format.
+ *
+ * Cohere uses typed SSE events (event: + data: lines), similar to the
+ * Claude Messages handler in messages.ts.
+ */
+
+import type * as http from "node:http";
+import type {
+  ChaosConfig,
+  ChatCompletionRequest,
+  ChatMessage,
+  Fixture,
+  RecordConfig,
+  StreamingProfile,
+  ToolCall,
+  ToolDefinition,
+} from "./types.js";
+import {
+  generateMessageId,
+  generateToolCallId,
+  isTextResponse,
+  isToolCallResponse,
+  isErrorResponse,
+  flattenHeaders,
+} from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse, delay, calculateDelay } from "./sse-writer.js";
+import { createInterruptionSignal } from "./interruption.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+import { applyChaos } from "./chaos.js";
+import type { MetricsRegistry } from "./metrics.js";
+import { proxyAndRecord } from "./recorder.js";
+
+// ─── Cohere v2 Chat request types ───────────────────────────────────────────
+
+interface CohereMessage {
+  role: "user" | "assistant" | "system" | "tool";
+  content: string;
+  tool_call_id?: string;
+}
+
+interface CohereToolDef {
+  type: string;
+  function: {
+    name: string;
+    description?: string;
+    parameters?: object;
+  };
+}
+
+interface CohereRequest {
+  model: string;
+  messages: CohereMessage[];
+  stream?: boolean;
+  tools?: CohereToolDef[];
+  response_format?: { type: string; json_schema?: object };
+}
+
+// ─── Cohere SSE event types ─────────────────────────────────────────────────
+
+interface CohereSSEEvent {
+  type: string;
+  [key: string]: unknown;
+}
+
+// ─── Zero-value usage block ─────────────────────────────────────────────────
+
+const ZERO_USAGE = {
+  billed_units: { input_tokens: 0, output_tokens: 0, search_units: 0, classifications: 0 },
+  tokens: { input_tokens: 0, output_tokens: 0 },
+};
+
+// ─── Input conversion: Cohere → ChatCompletionRequest ───────────────────────
+
+export function cohereToCompletionRequest(req: CohereRequest): ChatCompletionRequest {
+  const messages: ChatMessage[] = [];
+
+  for (const msg of req.messages) {
+    if (msg.role === "system") {
+      messages.push({ role: "system", content: msg.content });
+    } else if (msg.role === "user") {
+      messages.push({ role: "user", content: msg.content });
+    } else if (msg.role === "assistant") {
+      messages.push({ role: "assistant", content: msg.content });
+    } else if (msg.role === "tool") {
+      messages.push({
+        role: "tool",
+        content: msg.content,
+        tool_call_id: msg.tool_call_id,
+      });
+    }
+  }
+
+  // Convert tools
+  let tools: ToolDefinition[] | undefined;
+  if (req.tools && req.tools.length > 0) {
+    tools = req.tools.map((t) => ({
+      type: "function" as const,
+      function: {
+        name: t.function.name,
+        description: t.function.description,
+        parameters: t.function.parameters,
+      },
+    }));
+  }
+
+  return {
+    model: req.model,
+    messages,
+    stream: req.stream,
+    tools,
+  };
+}
+
+// ─── Response building: fixture → Cohere v2 Chat format ─────────────────────
+
+// Non-streaming text response
+function buildCohereTextResponse(content: string): object {
+  return {
+    id: generateMessageId(),
+    finish_reason: "COMPLETE",
+    message: {
+      role: "assistant",
+      content: [{ type: "text", text: content }],
+      tool_calls: [],
+      tool_plan: "",
+      citations: [],
+    },
+    usage: ZERO_USAGE,
+  };
+}
+
+// Non-streaming tool call response
+function buildCohereToolCallResponse(toolCalls: ToolCall[], logger: Logger): object {
+  const cohereCalls = toolCalls.map((tc) => {
+    // Validate arguments JSON
+    try {
+      JSON.parse(tc.arguments || "{}");
+    } catch {
+      logger.warn(
+        `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+      );
+    }
+    return {
+      id: tc.id || generateToolCallId(),
+      type: "function",
+      function: {
+        name: tc.name,
+        arguments: tc.arguments || "{}",
+      },
+    };
+  });
+
+  return {
+    id: generateMessageId(),
+    finish_reason: "TOOL_CALL",
+    message: {
+      role: "assistant",
+      content: [],
+      tool_calls: cohereCalls,
+      tool_plan: "",
+      citations: [],
+    },
+    usage: ZERO_USAGE,
+  };
+}
+
+// ─── Streaming event builders ───────────────────────────────────────────────
+
+function buildCohereTextStreamEvents(content: string, chunkSize: number): CohereSSEEvent[] {
+  const msgId = generateMessageId();
+  const events: CohereSSEEvent[] = [];
+
+  // message-start
+  events.push({
+    id: msgId,
+    type: "message-start",
+    delta: {
+      message: {
+        role: "assistant",
+        content: [],
+        tool_plan: "",
+        tool_calls: [],
+        citations: [],
+      },
+    },
+  });
+
+  // content-start (type: "text" only, no text field)
+  events.push({
+    type: "content-start",
+    index: 0,
+    delta: {
+      message: {
+        content: { type: "text" },
+      },
+    },
+  });
+
+  // content-delta — text chunks
+  for (let i = 0; i < content.length; i += chunkSize) {
+    const slice = content.slice(i, i + chunkSize);
+    events.push({
+      type: "content-delta",
+      index: 0,
+      delta: {
+        message: {
+          content: { type: "text", text: slice },
+        },
+      },
+    });
+  }
+
+  // content-end
+  events.push({
+    type: "content-end",
+    index: 0,
+  });
+
+  // message-end
+  events.push({
+    type: "message-end",
+    delta: {
+      finish_reason: "COMPLETE",
+      usage: ZERO_USAGE,
+    },
+  });
+
+  return events;
+}
+
+function buildCohereToolCallStreamEvents(
+  toolCalls: ToolCall[],
+  chunkSize: number,
+  logger: Logger,
+): CohereSSEEvent[] {
+  const msgId = generateMessageId();
+  const events: CohereSSEEvent[] = [];
+
+  // message-start
+  events.push({
+    id: msgId,
+    type: "message-start",
+    delta: {
+      message: {
+        role: "assistant",
+        content: [],
+        tool_plan: "",
+        tool_calls: [],
+        citations: [],
+      },
+    },
+  });
+
+  // tool-plan-delta
+  events.push({
+    type: "tool-plan-delta",
+    delta: {
+      message: {
+        tool_plan: "I will use the requested tool.",
+      },
+    },
+  });
+
+  for (let idx = 0; idx < toolCalls.length; idx++) {
+    const tc = toolCalls[idx];
+    const callId = tc.id || generateToolCallId();
+
+    // Validate arguments JSON
+    let argsJson: string;
+    try {
+      JSON.parse(tc.arguments || "{}");
+      argsJson = tc.arguments || "{}";
+    } catch {
+      logger.warn(
+        `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+      );
+      argsJson = "{}";
+    }
+
+    // tool-call-start
+    events.push({
+      type: "tool-call-start",
+      index: idx,
+      delta: {
+        message: {
+          tool_calls: {
+            id: callId,
+            type: "function",
+            function: {
+              name: tc.name,
+              arguments: "",
+            },
+          },
+        },
+      },
+    });
+
+    // tool-call-delta — chunked arguments
+    for (let i = 0; i < argsJson.length; i += chunkSize) {
+      const slice = argsJson.slice(i, i + chunkSize);
+      events.push({
+        type: "tool-call-delta",
+        index: idx,
+        delta: {
+          message: {
+            tool_calls: {
+              function: {
+                arguments: slice,
+              },
+            },
+          },
+        },
+      });
+    }
+
+    // tool-call-end
+    events.push({
+      type: "tool-call-end",
+      index: idx,
+    });
+  }
+
+  // message-end
+  events.push({
+    type: "message-end",
+    delta: {
+      finish_reason: "TOOL_CALL",
+      usage: ZERO_USAGE,
+    },
+  });
+
+  return events;
+}
+
+// ─── SSE writer for Cohere typed events ─────────────────────────────────────
+
+interface CohereStreamOptions {
+  latency?: number;
+  streamingProfile?: StreamingProfile;
+  signal?: AbortSignal;
+  onChunkSent?: () => void;
+}
+
+async function writeCohereSSEStream(
+  res: http.ServerResponse,
+  events: CohereSSEEvent[],
+  optionsOrLatency?: number | CohereStreamOptions,
+): Promise<boolean> {
+  const opts: CohereStreamOptions =
+    typeof optionsOrLatency === "number" ? { latency: optionsOrLatency } : (optionsOrLatency ?? {});
+  const latency = opts.latency ?? 0;
+  const profile = opts.streamingProfile;
+  const signal = opts.signal;
+  const onChunkSent = opts.onChunkSent;
+
+  if (res.writableEnded) return true;
+  res.setHeader("Content-Type", "text/event-stream");
+  res.setHeader("Cache-Control", "no-cache");
+  res.setHeader("Connection", "keep-alive");
+
+  let chunkIndex = 0;
+  for (const event of events) {
+    const chunkDelay = calculateDelay(chunkIndex, profile, latency);
+    if (chunkDelay > 0) await delay(chunkDelay, signal);
+    if (signal?.aborted) return false;
+    if (res.writableEnded) return true;
+    res.write(`event: ${event.type}\ndata: ${JSON.stringify(event)}\n\n`);
+    onChunkSent?.();
+    if (signal?.aborted) return false;
+    chunkIndex++;
+  }
+
+  if (!res.writableEnded) {
+    res.end();
+  }
+  return true;
+}
+
+// ─── Request handler ────────────────────────────────────────────────────────
+
+export async function handleCohere(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    logger: Logger;
+    chaos?: ChaosConfig;
+    registry?: MetricsRegistry;
+    record?: RecordConfig;
+    strict?: boolean;
+  },
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  let cohereReq: CohereRequest;
+  try {
+    cohereReq = JSON.parse(raw) as CohereRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: {} as ChatCompletionRequest,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Validate required model field
+  if (!cohereReq.model) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: {} as ChatCompletionRequest,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "model is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  if (!cohereReq.messages || !Array.isArray(cohereReq.messages)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: {} as ChatCompletionRequest,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Invalid request: messages array is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Convert to ChatCompletionRequest for fixture matching
+  const completionReq = cohereToCompletionRequest(cohereReq);
+
+  const fixture = matchFixture(fixtures, completionReq, journal.fixtureMatchCounts);
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: req.url ?? "/v2/chat",
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "cohere",
+        req.url ?? "/v2/chat",
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: req.url ?? "/v2/chat",
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(
+        `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v2/chat"}`,
+      );
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  // Text response
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (cohereReq.stream !== true) {
+      const body = buildCohereTextResponse(response.content);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const events = buildCohereTextStreamEvents(response.content, chunkSize);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeCohereSSEStream(res, events, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Tool call response
+  if (isToolCallResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v2/chat",
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (cohereReq.stream !== true) {
+      const body = buildCohereToolCallResponse(response.toolCalls, logger);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const events = buildCohereToolCallStreamEvents(response.toolCalls, chunkSize, logger);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeCohereSSEStream(res, events, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: req.method ?? "POST",
+    path: req.url ?? "/v2/chat",
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
diff --git a/src/embeddings.ts b/src/embeddings.ts
index d28d1e7..b86577a 100644
--- a/src/embeddings.ts
+++ b/src/embeddings.ts
@@ -20,6 +20,7 @@ import { writeErrorResponse } from "./sse-writer.js";
 import type { Journal } from "./journal.js";
 import type { Logger } from "./logger.js";
 import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
 
 // ─── Embeddings API request types ──────────────────────────────────────────
 
@@ -157,6 +158,55 @@ export async function handleEmbeddings(
     return;
   }
 
+  // No fixture match — try record-and-replay proxy if configured
+  if (defaults.record) {
+    const proxied = await proxyAndRecord(
+      req,
+      res,
+      syntheticReq,
+      "openai",
+      req.url ?? "/v1/embeddings",
+      fixtures,
+      defaults,
+      raw,
+    );
+    if (proxied) {
+      journal.add({
+        method: req.method ?? "POST",
+        path: req.url ?? "/v1/embeddings",
+        headers: flattenHeaders(req.headers),
+        body: syntheticReq,
+        response: { status: res.statusCode ?? 200, fixture: null },
+      });
+      return;
+    }
+  }
+
+  if (defaults.strict) {
+    logger.error(
+      `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v1/embeddings"}`,
+    );
+    journal.add({
+      method: req.method ?? "POST",
+      path: req.url ?? "/v1/embeddings",
+      headers: flattenHeaders(req.headers),
+      body: syntheticReq,
+      response: { status: 503, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      503,
+      JSON.stringify({
+        error: {
+          message: "Strict mode: no fixture matched",
+          type: "invalid_request_error",
+          code: "no_fixture_match",
+        },
+      }),
+    );
+    return;
+  }
+
   // No fixture match — generate deterministic embeddings from input text
   logger.warn(
     `No embedding fixture matched for "${combinedInput.slice(0, 80)}" — returning deterministic fallback`,
diff --git a/src/gemini.ts b/src/gemini.ts
index e61e34c..9e5f096 100644
--- a/src/gemini.ts
+++ b/src/gemini.ts
@@ -29,6 +29,7 @@ import { createInterruptionSignal } from "./interruption.js";
 import type { Journal } from "./journal.js";
 import type { Logger } from "./logger.js";
 import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
 
 // ─── Gemini request types ───────────────────────────────────────────────────
 
@@ -380,6 +381,7 @@ export async function handleGemini(
   journal: Journal,
   defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig },
   setCorsHeaders: (res: http.ServerResponse) => void,
+  providerKey: string = "gemini",
 ): Promise<void> {
   const { logger } = defaults;
   setCorsHeaders(res);
@@ -430,21 +432,50 @@ export async function handleGemini(
     return;
 
   if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        providerKey,
+        path,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${path}`);
+    }
     journal.add({
       method: req.method ?? "POST",
       path,
       headers: flattenHeaders(req.headers),
       body: completionReq,
-      response: { status: 404, fixture: null },
+      response: { status: strictStatus, fixture: null },
     });
     writeErrorResponse(
       res,
-      404,
+      strictStatus,
       JSON.stringify({
         error: {
-          message: "No fixture matched",
-          code: 404,
-          status: "NOT_FOUND",
+          message: strictMessage,
+          code: strictStatus,
+          status: defaults.strict ? "UNAVAILABLE" : "NOT_FOUND",
         },
       }),
     );
diff --git a/src/helpers.ts b/src/helpers.ts
index d141198..ae48a19 100644
--- a/src/helpers.ts
+++ b/src/helpers.ts
@@ -11,11 +11,17 @@ import type {
   ChatCompletion,
 } from "./types.js";
 
+const REDACTED_HEADERS = new Set(["authorization", "x-api-key", "api-key"]);
+
 export function flattenHeaders(headers: http.IncomingHttpHeaders): Record<string, string> {
   const flat: Record<string, string> = {};
   for (const [key, value] of Object.entries(headers)) {
     if (value === undefined) continue;
-    flat[key] = Array.isArray(value) ? value.join(", ") : value;
+    if (REDACTED_HEADERS.has(key.toLowerCase())) {
+      flat[key] = "[REDACTED]";
+    } else {
+      flat[key] = Array.isArray(value) ? value.join(", ") : value;
+    }
   }
   return flat;
 }
diff --git a/src/index.ts b/src/index.ts
index 773fb16..a770b96 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -24,7 +24,35 @@ export type { ResponsesSSEEvent } from "./responses.js";
 export { handleMessages } from "./messages.js";
 export { handleGemini } from "./gemini.js";
 export { handleEmbeddings } from "./embeddings.js";
-export { handleBedrock, bedrockToCompletionRequest } from "./bedrock.js";
+export { handleBedrock, bedrockToCompletionRequest, handleBedrockStream } from "./bedrock.js";
+
+// Bedrock Converse
+export {
+  handleConverse,
+  handleConverseStream,
+  converseToCompletionRequest,
+} from "./bedrock-converse.js";
+
+// AWS Event Stream
+export {
+  encodeEventStreamFrame,
+  encodeEventStreamMessage,
+  writeEventStream,
+} from "./aws-event-stream.js";
+
+// Metrics
+export { createMetricsRegistry, normalizePathLabel } from "./metrics.js";
+export type { MetricsRegistry } from "./metrics.js";
+
+// NDJSON
+export { writeNDJSONStream } from "./ndjson-writer.js";
+export type { NDJSONStreamOptions } from "./ndjson-writer.js";
+
+// Ollama
+export { handleOllama, handleOllamaGenerate, ollamaToCompletionRequest } from "./ollama.js";
+
+// Cohere
+export { handleCohere, cohereToCompletionRequest } from "./cohere.js";
 
 // WebSocket
 export { WebSocketConnection, upgradeToWebSocket, computeAcceptKey } from "./ws-framing.js";
diff --git a/src/llmock.ts b/src/llmock.ts
index eefc88f..d338dcd 100644
--- a/src/llmock.ts
+++ b/src/llmock.ts
@@ -6,6 +6,7 @@ import type {
   FixtureOpts,
   FixtureResponse,
   MockServerOptions,
+  RecordConfig,
 } from "./types.js";
 import { createServer, type ServerInstance } from "./server.js";
 import { loadFixtureFile, loadFixturesFromDir } from "./fixture-loader.js";
@@ -168,6 +169,20 @@ export class LLMock {
     return this;
   }
 
+  // ---- Recording ----
+
+  enableRecording(config: RecordConfig): this {
+    this.options.record = config;
+    if (this.serverInstance) this.serverInstance.defaults.record = config;
+    return this;
+  }
+
+  disableRecording(): this {
+    delete this.options.record;
+    if (this.serverInstance) delete this.serverInstance.defaults.record;
+    return this;
+  }
+
   // ---- Reset ----
 
   reset(): this {
diff --git a/src/messages.ts b/src/messages.ts
index bcc8f5c..5fb38d2 100644
--- a/src/messages.ts
+++ b/src/messages.ts
@@ -30,6 +30,7 @@ import { createInterruptionSignal } from "./interruption.js";
 import type { Journal } from "./journal.js";
 import type { Logger } from "./logger.js";
 import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
 
 // ─── Claude Messages API request types ──────────────────────────────────────
 
@@ -480,19 +481,50 @@ export async function handleMessages(
     return;
 
   if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "anthropic",
+        req.url ?? "/v1/messages",
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: req.url ?? "/v1/messages",
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(
+        `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v1/messages"}`,
+      );
+    }
     journal.add({
       method: req.method ?? "POST",
       path: req.url ?? "/v1/messages",
       headers: flattenHeaders(req.headers),
       body: completionReq,
-      response: { status: 404, fixture: null },
+      response: { status: strictStatus, fixture: null },
     });
     writeErrorResponse(
       res,
-      404,
+      strictStatus,
       JSON.stringify({
         error: {
-          message: "No fixture matched",
+          message: strictMessage,
           type: "invalid_request_error",
         },
       }),
diff --git a/src/metrics.ts b/src/metrics.ts
new file mode 100644
index 0000000..48b71a3
--- /dev/null
+++ b/src/metrics.ts
@@ -0,0 +1,256 @@
+/**
+ * Lightweight Prometheus metrics registry for LLMock.
+ *
+ * Zero external dependencies — implements counters, histograms, and gauges
+ * with Prometheus text exposition format serialization.
+ */
+
+// ---------------------------------------------------------------------------
+// Public interface
+// ---------------------------------------------------------------------------
+
+export interface MetricsRegistry {
+  incrementCounter(name: string, labels: Record<string, string>): void;
+  observeHistogram(name: string, labels: Record<string, string>, value: number): void;
+  setGauge(name: string, labels: Record<string, string>, value: number): void;
+  serialize(): string;
+  reset(): void;
+}
+
+// ---------------------------------------------------------------------------
+// Histogram bucket boundaries (Prometheus default-ish)
+// ---------------------------------------------------------------------------
+
+const HISTOGRAM_BUCKETS = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10];
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+/** Build a stable label key string for map lookups: `label1="v1",label2="v2"` */
+function labelKey(labels: Record<string, string>): string {
+  const entries = Object.entries(labels).sort(([a], [b]) => a.localeCompare(b));
+  if (entries.length === 0) return "";
+  return entries.map(([k, v]) => `${k}="${escapeLabelValue(v)}"`).join(",");
+}
+
+/** Escape a label value per Prometheus text exposition format. */
+function escapeLabelValue(v: string): string {
+  return v.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n");
+}
+
+/** Format labels for Prometheus output: `{label1="v1",label2="v2"}` */
+function formatLabels(labels: Record<string, string>): string {
+  return `{${labelKey(labels)}}`;
+}
+
+// ---------------------------------------------------------------------------
+// Internal metric storage types
+// ---------------------------------------------------------------------------
+
+interface CounterData {
+  type: "counter";
+  /** Map from labelKey → value */
+  series: Map<string, { labels: Record<string, string>; value: number }>;
+}
+
+interface HistogramData {
+  type: "histogram";
+  /** Map from labelKey → bucket counts, sum, count */
+  series: Map<
+    string,
+    {
+      labels: Record<string, string>;
+      bucketCounts: number[]; // one per HISTOGRAM_BUCKETS entry
+      sum: number;
+      count: number;
+    }
+  >;
+}
+
+interface GaugeData {
+  type: "gauge";
+  /** Map from labelKey → value */
+  series: Map<string, { labels: Record<string, string>; value: number }>;
+}
+
+type MetricData = CounterData | HistogramData | GaugeData;
+
+// ---------------------------------------------------------------------------
+// Registry implementation
+// ---------------------------------------------------------------------------
+
+export function createMetricsRegistry(): MetricsRegistry {
+  /** Ordered map: metric name → data. Insertion order preserved for stable output. */
+  const metrics = new Map<string, MetricData>();
+
+  function getOrCreateCounter(name: string): CounterData {
+    let data = metrics.get(name);
+    if (!data) {
+      data = { type: "counter", series: new Map() };
+      metrics.set(name, data);
+    }
+    if (data.type !== "counter") throw new Error(`Metric ${name} is not a counter`);
+    return data as CounterData;
+  }
+
+  function getOrCreateHistogram(name: string): HistogramData {
+    let data = metrics.get(name);
+    if (!data) {
+      data = { type: "histogram", series: new Map() };
+      metrics.set(name, data);
+    }
+    if (data.type !== "histogram") throw new Error(`Metric ${name} is not a histogram`);
+    return data as HistogramData;
+  }
+
+  function getOrCreateGauge(name: string): GaugeData {
+    let data = metrics.get(name);
+    if (!data) {
+      data = { type: "gauge", series: new Map() };
+      metrics.set(name, data);
+    }
+    if (data.type !== "gauge") throw new Error(`Metric ${name} is not a gauge`);
+    return data as GaugeData;
+  }
+
+  return {
+    incrementCounter(name: string, labels: Record<string, string>): void {
+      const counter = getOrCreateCounter(name);
+      const key = labelKey(labels);
+      const existing = counter.series.get(key);
+      if (existing) {
+        existing.value += 1;
+      } else {
+        counter.series.set(key, { labels, value: 1 });
+      }
+    },
+
+    observeHistogram(name: string, labels: Record<string, string>, value: number): void {
+      const histogram = getOrCreateHistogram(name);
+      const key = labelKey(labels);
+      let existing = histogram.series.get(key);
+      if (!existing) {
+        existing = {
+          labels,
+          bucketCounts: new Array(HISTOGRAM_BUCKETS.length).fill(0) as number[],
+          sum: 0,
+          count: 0,
+        };
+        histogram.series.set(key, existing);
+      }
+      // Update cumulative bucket counts
+      for (let i = 0; i < HISTOGRAM_BUCKETS.length; i++) {
+        if (value <= HISTOGRAM_BUCKETS[i]) {
+          existing.bucketCounts[i] += 1;
+        }
+      }
+      existing.sum += value;
+      existing.count += 1;
+    },
+
+    setGauge(name: string, labels: Record<string, string>, value: number): void {
+      const gauge = getOrCreateGauge(name);
+      const key = labelKey(labels);
+      const existing = gauge.series.get(key);
+      if (existing) {
+        existing.value = value;
+      } else {
+        gauge.series.set(key, { labels, value });
+      }
+    },
+
+    serialize(): string {
+      const lines: string[] = [];
+
+      for (const [name, data] of metrics) {
+        switch (data.type) {
+          case "counter": {
+            lines.push(`# TYPE ${name} counter`);
+            for (const series of data.series.values()) {
+              lines.push(`${name}${formatLabels(series.labels)} ${series.value}`);
+            }
+            break;
+          }
+          case "histogram": {
+            lines.push(`# TYPE ${name} histogram`);
+            for (const series of data.series.values()) {
+              const lblStr = labelKey(series.labels);
+              const lblPrefix = lblStr ? `${lblStr},` : "";
+              // Bucket lines
+              for (let i = 0; i < HISTOGRAM_BUCKETS.length; i++) {
+                lines.push(
+                  `${name}_bucket{${lblPrefix}le="${HISTOGRAM_BUCKETS[i]}"} ${series.bucketCounts[i]}`,
+                );
+              }
+              // +Inf bucket
+              lines.push(`${name}_bucket{${lblPrefix}le="+Inf"} ${series.count}`);
+              // Sum and count
+              lines.push(`${name}_sum${formatLabels(series.labels)} ${series.sum}`);
+              lines.push(`${name}_count${formatLabels(series.labels)} ${series.count}`);
+            }
+            break;
+          }
+          case "gauge": {
+            lines.push(`# TYPE ${name} gauge`);
+            for (const series of data.series.values()) {
+              lines.push(`${name}${formatLabels(series.labels)} ${series.value}`);
+            }
+            break;
+          }
+        }
+      }
+
+      return lines.length > 0 ? lines.join("\n") + "\n" : "";
+    },
+
+    reset(): void {
+      metrics.clear();
+    },
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Path normalization for metric labels
+// ---------------------------------------------------------------------------
+
+// Regex patterns for parametric API routes
+const BEDROCK_RE =
+  /^\/model\/([^/]+)\/(invoke|invoke-with-response-stream|converse|converse-stream)$/;
+const GEMINI_RE = /^\/v1beta\/models\/([^:]+):(generateContent|streamGenerateContent)$/;
+const AZURE_RE = /^\/openai\/deployments\/([^/]+)\/(chat\/completions|embeddings)$/;
+const VERTEX_RE =
+  /^\/v1\/projects\/([^/]+)\/locations\/([^/]+)\/publishers\/google\/models\/([^:]+):(.+)$/;
+
+/**
+ * Normalize parametric API paths to route patterns for use as metric labels.
+ * Replaces dynamic segments (model IDs, deployment names, etc.) with placeholders.
+ */
+export function normalizePathLabel(pathname: string): string {
+  // Bedrock: /model/{modelId}/{operation}
+  const bedrockMatch = pathname.match(BEDROCK_RE);
+  if (bedrockMatch) {
+    return `/model/{modelId}/${bedrockMatch[2]}`;
+  }
+
+  // Gemini: /v1beta/models/{model}:{action}
+  const geminiMatch = pathname.match(GEMINI_RE);
+  if (geminiMatch) {
+    return `/v1beta/models/{model}:${geminiMatch[2]}`;
+  }
+
+  // Azure: /openai/deployments/{id}/{operation}
+  const azureMatch = pathname.match(AZURE_RE);
+  if (azureMatch) {
+    return `/openai/deployments/{id}/${azureMatch[2]}`;
+  }
+
+  // Vertex AI: /v1/projects/{p}/locations/{l}/publishers/google/models/{m}:{action}
+  const vertexMatch = pathname.match(VERTEX_RE);
+  if (vertexMatch) {
+    return `/v1/projects/{p}/locations/{l}/publishers/google/models/{m}:${vertexMatch[4]}`;
+  }
+
+  // Static path — return as-is
+  return pathname;
+}
diff --git a/src/ndjson-writer.ts b/src/ndjson-writer.ts
new file mode 100644
index 0000000..1e2ab7d
--- /dev/null
+++ b/src/ndjson-writer.ts
@@ -0,0 +1,53 @@
+/**
+ * NDJSON streaming writer for Ollama endpoints.
+ *
+ * Mirrors writeSSEStream from sse-writer.ts but writes newline-delimited JSON
+ * (one JSON object per line) instead of SSE events.
+ */
+
+import type * as http from "node:http";
+import type { StreamingProfile } from "./types.js";
+import { delay, calculateDelay } from "./sse-writer.js";
+
+export interface NDJSONStreamOptions {
+  latency?: number;
+  streamingProfile?: StreamingProfile;
+  signal?: AbortSignal;
+  onChunkSent?: () => void;
+}
+
+export async function writeNDJSONStream(
+  res: http.ServerResponse,
+  chunks: object[],
+  options?: NDJSONStreamOptions,
+): Promise<boolean> {
+  const opts = options ?? {};
+  const latency = opts.latency ?? 0;
+  const profile = opts.streamingProfile;
+  const signal = opts.signal;
+  const onChunkSent = opts.onChunkSent;
+
+  if (res.writableEnded) return true;
+  res.setHeader("Content-Type", "application/x-ndjson");
+  res.setHeader("Cache-Control", "no-cache");
+  res.setHeader("Connection", "keep-alive");
+
+  let chunkIndex = 0;
+  for (const chunk of chunks) {
+    const chunkDelay = calculateDelay(chunkIndex, profile, latency);
+    if (chunkDelay > 0) {
+      await delay(chunkDelay, signal);
+    }
+    if (signal?.aborted) return false;
+    if (res.writableEnded) return true;
+    res.write(JSON.stringify(chunk) + "\n");
+    onChunkSent?.();
+    if (signal?.aborted) return false;
+    chunkIndex++;
+  }
+
+  if (!res.writableEnded) {
+    res.end();
+  }
+  return true;
+}
diff --git a/src/ollama.ts b/src/ollama.ts
new file mode 100644
index 0000000..0ddcc62
--- /dev/null
+++ b/src/ollama.ts
@@ -0,0 +1,754 @@
+/**
+ * Ollama API endpoint support.
+ *
+ * Translates incoming /api/chat and /api/generate requests into the
+ * ChatCompletionRequest format used by the fixture router, and converts
+ * fixture responses back into Ollama's NDJSON streaming or non-streaming format.
+ *
+ * Key differences from OpenAI:
+ * - Ollama defaults to stream: true (opposite of OpenAI)
+ * - Streaming uses NDJSON, not SSE
+ * - Tool call arguments are objects, not JSON strings
+ * - Tool calls have no id field
+ */
+
+import type * as http from "node:http";
+import type {
+  ChaosConfig,
+  ChatCompletionRequest,
+  ChatMessage,
+  Fixture,
+  RecordConfig,
+  ToolCall,
+  ToolDefinition,
+} from "./types.js";
+import { isTextResponse, isToolCallResponse, isErrorResponse, flattenHeaders } from "./helpers.js";
+import { matchFixture } from "./router.js";
+import { writeErrorResponse } from "./sse-writer.js";
+import { writeNDJSONStream } from "./ndjson-writer.js";
+import { createInterruptionSignal } from "./interruption.js";
+import type { Journal } from "./journal.js";
+import type { Logger } from "./logger.js";
+import { applyChaos } from "./chaos.js";
+import type { MetricsRegistry } from "./metrics.js";
+import { proxyAndRecord } from "./recorder.js";
+
+// ─── Ollama request types ────────────────────────────────────────────────────
+
+interface OllamaMessage {
+  role: string;
+  content: string;
+}
+
+interface OllamaToolDef {
+  type: string;
+  function: {
+    name: string;
+    description?: string;
+    parameters?: object;
+  };
+}
+
+interface OllamaRequest {
+  model: string;
+  messages: OllamaMessage[];
+  stream?: boolean; // default true!
+  options?: { temperature?: number; num_predict?: number };
+  tools?: OllamaToolDef[];
+}
+
+interface OllamaGenerateRequest {
+  model: string;
+  prompt: string;
+  stream?: boolean; // default true!
+  options?: { temperature?: number; num_predict?: number };
+}
+
+// ─── Duration fields (zeroed, required on final/non-streaming responses) ────
+
+const DURATION_FIELDS = {
+  done_reason: "stop" as const,
+  total_duration: 0,
+  load_duration: 0,
+  prompt_eval_count: 0,
+  prompt_eval_duration: 0,
+  eval_count: 0,
+  eval_duration: 0,
+};
+
+// ─── Input conversion: Ollama → ChatCompletionRequest ────────────────────────
+
+export function ollamaToCompletionRequest(req: OllamaRequest): ChatCompletionRequest {
+  const messages: ChatMessage[] = [];
+
+  for (const msg of req.messages) {
+    messages.push({
+      role: msg.role as ChatMessage["role"],
+      content: msg.content,
+    });
+  }
+
+  // Convert tools
+  let tools: ToolDefinition[] | undefined;
+  if (req.tools && req.tools.length > 0) {
+    tools = req.tools.map((t) => ({
+      type: "function" as const,
+      function: {
+        name: t.function.name,
+        description: t.function.description,
+        parameters: t.function.parameters,
+      },
+    }));
+  }
+
+  return {
+    model: req.model,
+    messages,
+    stream: req.stream,
+    temperature: req.options?.temperature,
+    max_tokens: req.options?.num_predict,
+    tools,
+  };
+}
+
+function ollamaGenerateToCompletionRequest(req: OllamaGenerateRequest): ChatCompletionRequest {
+  return {
+    model: req.model,
+    messages: [{ role: "user", content: req.prompt }],
+    stream: req.stream,
+    temperature: req.options?.temperature,
+    max_tokens: req.options?.num_predict,
+  };
+}
+
+// ─── Response builders: /api/chat ────────────────────────────────────────────
+
+function buildOllamaChatTextChunks(content: string, model: string, chunkSize: number): object[] {
+  const chunks: object[] = [];
+
+  for (let i = 0; i < content.length; i += chunkSize) {
+    const slice = content.slice(i, i + chunkSize);
+    chunks.push({
+      model,
+      message: { role: "assistant", content: slice },
+      done: false,
+    });
+  }
+
+  // Final chunk with done: true and all duration fields
+  chunks.push({
+    model,
+    message: { role: "assistant", content: "" },
+    done: true,
+    ...DURATION_FIELDS,
+  });
+
+  return chunks;
+}
+
+function buildOllamaChatTextResponse(content: string, model: string): object {
+  return {
+    model,
+    message: { role: "assistant", content },
+    done: true,
+    ...DURATION_FIELDS,
+  };
+}
+
+function buildOllamaChatToolCallChunks(
+  toolCalls: ToolCall[],
+  model: string,
+  logger: Logger,
+): object[] {
+  const ollamaToolCalls = toolCalls.map((tc) => {
+    let argsObj: unknown;
+    try {
+      argsObj = JSON.parse(tc.arguments || "{}");
+    } catch {
+      logger.warn(
+        `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+      );
+      argsObj = {};
+    }
+    return {
+      function: {
+        name: tc.name,
+        arguments: argsObj,
+      },
+    };
+  });
+
+  // Tool calls are sent in a single chunk (no streaming of individual args)
+  const chunks: object[] = [];
+  chunks.push({
+    model,
+    message: {
+      role: "assistant",
+      content: "",
+      tool_calls: ollamaToolCalls,
+    },
+    done: false,
+  });
+
+  // Final chunk
+  chunks.push({
+    model,
+    message: { role: "assistant", content: "" },
+    done: true,
+    ...DURATION_FIELDS,
+  });
+
+  return chunks;
+}
+
+function buildOllamaChatToolCallResponse(
+  toolCalls: ToolCall[],
+  model: string,
+  logger: Logger,
+): object {
+  const ollamaToolCalls = toolCalls.map((tc) => {
+    let argsObj: unknown;
+    try {
+      argsObj = JSON.parse(tc.arguments || "{}");
+    } catch {
+      logger.warn(
+        `Malformed JSON in fixture tool call arguments for "${tc.name}": ${tc.arguments}`,
+      );
+      argsObj = {};
+    }
+    return {
+      function: {
+        name: tc.name,
+        arguments: argsObj,
+      },
+    };
+  });
+
+  return {
+    model,
+    message: {
+      role: "assistant",
+      content: "",
+      tool_calls: ollamaToolCalls,
+    },
+    done: true,
+    ...DURATION_FIELDS,
+  };
+}
+
+// ─── Response builders: /api/generate ────────────────────────────────────────
+
+function buildOllamaGenerateTextChunks(
+  content: string,
+  model: string,
+  chunkSize: number,
+): object[] {
+  const chunks: object[] = [];
+  const createdAt = new Date().toISOString();
+
+  for (let i = 0; i < content.length; i += chunkSize) {
+    const slice = content.slice(i, i + chunkSize);
+    chunks.push({
+      model,
+      created_at: createdAt,
+      response: slice,
+      done: false,
+    });
+  }
+
+  // Final chunk
+  chunks.push({
+    model,
+    created_at: createdAt,
+    response: "",
+    done: true,
+    ...DURATION_FIELDS,
+    context: [],
+  });
+
+  return chunks;
+}
+
+function buildOllamaGenerateTextResponse(content: string, model: string): object {
+  return {
+    model,
+    created_at: new Date().toISOString(),
+    response: content,
+    done: true,
+    ...DURATION_FIELDS,
+    context: [],
+  };
+}
+
+// ─── Request handler: /api/chat ──────────────────────────────────────────────
+
+export async function handleOllama(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    logger: Logger;
+    chaos?: ChaosConfig;
+    registry?: MetricsRegistry;
+    record?: RecordConfig;
+    strict?: boolean;
+  },
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  const { logger } = defaults;
+  setCorsHeaders(res);
+
+  const urlPath = req.url ?? "/api/chat";
+
+  let ollamaReq: OllamaRequest;
+  try {
+    ollamaReq = JSON.parse(raw) as OllamaRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: {} as ChatCompletionRequest,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  if (!ollamaReq.messages || !Array.isArray(ollamaReq.messages)) {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: {} as ChatCompletionRequest,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Invalid request: messages array is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Convert to ChatCompletionRequest for fixture matching
+  const completionReq = ollamaToCompletionRequest(ollamaReq);
+
+  const fixture = matchFixture(fixtures, completionReq, journal.fixtureMatchCounts);
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: urlPath,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "ollama",
+        urlPath,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: urlPath,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`);
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Ollama defaults to streaming when stream is absent or true
+  const streaming = ollamaReq.stream !== false;
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  // Text response
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (!streaming) {
+      const body = buildOllamaChatTextResponse(response.content, completionReq.model);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const chunks = buildOllamaChatTextChunks(response.content, completionReq.model, chunkSize);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeNDJSONStream(res, chunks, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Tool call response
+  if (isToolCallResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (!streaming) {
+      const body = buildOllamaChatToolCallResponse(response.toolCalls, completionReq.model, logger);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const chunks = buildOllamaChatToolCallChunks(response.toolCalls, completionReq.model, logger);
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeNDJSONStream(res, chunks, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Unknown response type
+  journal.add({
+    method: req.method ?? "POST",
+    path: urlPath,
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
+
+// ─── Request handler: /api/generate ──────────────────────────────────────────
+
+export async function handleOllamaGenerate(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  raw: string,
+  fixtures: Fixture[],
+  journal: Journal,
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    logger: Logger;
+    chaos?: ChaosConfig;
+    registry?: MetricsRegistry;
+    record?: RecordConfig;
+    strict?: boolean;
+  },
+  setCorsHeaders: (res: http.ServerResponse) => void,
+): Promise<void> {
+  setCorsHeaders(res);
+
+  const urlPath = req.url ?? "/api/generate";
+
+  let generateReq: OllamaGenerateRequest;
+  try {
+    generateReq = JSON.parse(raw) as OllamaGenerateRequest;
+  } catch {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: {} as ChatCompletionRequest,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Malformed JSON",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  if (!generateReq.prompt || typeof generateReq.prompt !== "string") {
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: {} as ChatCompletionRequest,
+      response: { status: 400, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      400,
+      JSON.stringify({
+        error: {
+          message: "Invalid request: prompt field is required",
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  // Convert to ChatCompletionRequest for fixture matching
+  const completionReq = ollamaGenerateToCompletionRequest(generateReq);
+
+  const fixture = matchFixture(fixtures, completionReq, journal.fixtureMatchCounts);
+
+  if (fixture) {
+    journal.incrementFixtureMatchCount(fixture, fixtures);
+  }
+
+  if (
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: urlPath,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+    )
+  )
+    return;
+
+  if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "ollama",
+        urlPath,
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: urlPath,
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      defaults.logger.error(`STRICT: No fixture matched for ${req.method ?? "POST"} ${urlPath}`);
+    }
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: strictStatus, fixture: null },
+    });
+    writeErrorResponse(
+      res,
+      strictStatus,
+      JSON.stringify({
+        error: {
+          message: strictMessage,
+          type: "invalid_request_error",
+        },
+      }),
+    );
+    return;
+  }
+
+  const response = fixture.response;
+  const latency = fixture.latency ?? defaults.latency;
+  const chunkSize = Math.max(1, fixture.chunkSize ?? defaults.chunkSize);
+
+  // Ollama defaults to streaming when stream is absent or true
+  const streaming = generateReq.stream !== false;
+
+  // Error response
+  if (isErrorResponse(response)) {
+    const status = response.status ?? 500;
+    journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status, fixture },
+    });
+    writeErrorResponse(res, status, JSON.stringify(response));
+    return;
+  }
+
+  // Text response (only type supported for /api/generate)
+  if (isTextResponse(response)) {
+    const journalEntry = journal.add({
+      method: req.method ?? "POST",
+      path: urlPath,
+      headers: flattenHeaders(req.headers),
+      body: completionReq,
+      response: { status: 200, fixture },
+    });
+    if (!streaming) {
+      const body = buildOllamaGenerateTextResponse(response.content, completionReq.model);
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify(body));
+    } else {
+      const chunks = buildOllamaGenerateTextChunks(
+        response.content,
+        completionReq.model,
+        chunkSize,
+      );
+      const interruption = createInterruptionSignal(fixture);
+      const completed = await writeNDJSONStream(res, chunks, {
+        latency,
+        streamingProfile: fixture.streamingProfile,
+        signal: interruption?.signal,
+        onChunkSent: interruption?.tick,
+      });
+      if (!completed) {
+        if (!res.writableEnded) res.destroy();
+        journalEntry.response.interrupted = true;
+        journalEntry.response.interruptReason = interruption?.reason();
+      }
+      interruption?.cleanup();
+    }
+    return;
+  }
+
+  // Tool call responses not supported for /api/generate — fall through to error
+  journal.add({
+    method: req.method ?? "POST",
+    path: urlPath,
+    headers: flattenHeaders(req.headers),
+    body: completionReq,
+    response: { status: 500, fixture },
+  });
+  writeErrorResponse(
+    res,
+    500,
+    JSON.stringify({
+      error: {
+        message: "Fixture response did not match any known type",
+        type: "server_error",
+      },
+    }),
+  );
+}
diff --git a/src/recorder.ts b/src/recorder.ts
new file mode 100644
index 0000000..650b331
--- /dev/null
+++ b/src/recorder.ts
@@ -0,0 +1,380 @@
+import * as http from "node:http";
+import * as https from "node:https";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import type {
+  ChatCompletionRequest,
+  Fixture,
+  FixtureResponse,
+  RecordConfig,
+  ToolCall,
+} from "./types.js";
+import { getLastMessageByRole, getTextContent } from "./router.js";
+import type { Logger } from "./logger.js";
+import { collapseStreamingResponse } from "./stream-collapse.js";
+
+let recordCounter = 0;
+
+/**
+ * Proxy an unmatched request to the real upstream provider, record the
+ * response as a fixture on disk and in memory, then relay the response
+ * back to the original client.
+ *
+ * Returns `true` if the request was proxied (provider configured),
+ * `false` if no upstream URL is configured for the given provider key.
+ */
+export async function proxyAndRecord(
+  req: http.IncomingMessage,
+  res: http.ServerResponse,
+  request: ChatCompletionRequest,
+  providerKey: string,
+  pathname: string,
+  fixtures: Fixture[],
+  defaults: { record?: RecordConfig; logger: Logger },
+  rawBody?: string,
+): Promise<boolean> {
+  const record = defaults.record;
+  if (!record) return false;
+
+  const providers = record.providers as Record<string, string | undefined>;
+  const upstreamUrl = providers[providerKey];
+
+  if (!upstreamUrl) {
+    defaults.logger.warn(`No upstream URL configured for provider "${providerKey}" — cannot proxy`);
+    return false;
+  }
+
+  const fixturePath = record.fixturePath ?? "./fixtures/recorded";
+  const target = new URL(pathname, upstreamUrl);
+
+  defaults.logger.warn(`NO FIXTURE MATCH — proxying to ${upstreamUrl}${pathname}`);
+
+  // Forward relevant headers, strip x-llmock-* headers
+  const forwardHeaders: Record<string, string> = {};
+  const headersToForward = ["authorization", "x-api-key", "content-type", "accept"];
+  for (const name of headersToForward) {
+    const val = req.headers[name];
+    if (val !== undefined) {
+      forwardHeaders[name] = Array.isArray(val) ? val.join(", ") : val;
+    }
+  }
+
+  const requestBody = rawBody ?? JSON.stringify(request);
+
+  // Make upstream request
+  let upstreamStatus: number;
+  let upstreamHeaders: http.IncomingHttpHeaders;
+  let upstreamBody: string;
+  let rawBuffer: Buffer;
+
+  try {
+    const result = await makeUpstreamRequest(target, forwardHeaders, requestBody);
+    upstreamStatus = result.status;
+    upstreamHeaders = result.headers;
+    upstreamBody = result.body;
+    rawBuffer = result.rawBuffer;
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : "Unknown proxy error";
+    defaults.logger.error(`Proxy request failed: ${msg}`);
+    res.writeHead(502, { "Content-Type": "application/json" });
+    res.end(
+      JSON.stringify({
+        error: { message: `Proxy to upstream failed: ${msg}`, type: "proxy_error" },
+      }),
+    );
+    return true;
+  }
+
+  // Detect streaming response and collapse if necessary
+  const contentType = upstreamHeaders["content-type"];
+  const ctString = Array.isArray(contentType) ? contentType.join(", ") : (contentType ?? "");
+  const isBinaryStream = ctString.toLowerCase().includes("application/vnd.amazon.eventstream");
+  const collapsed = collapseStreamingResponse(
+    ctString,
+    providerKey,
+    isBinaryStream ? rawBuffer : upstreamBody,
+  );
+
+  let fixtureResponse: FixtureResponse;
+
+  if (collapsed) {
+    // Streaming response — use collapsed result
+    defaults.logger.warn(`Streaming response detected (${ctString}) — collapsing to fixture`);
+    if (collapsed.droppedChunks && collapsed.droppedChunks > 0) {
+      defaults.logger.warn(`${collapsed.droppedChunks} chunk(s) dropped during stream collapse`);
+    }
+    if (collapsed.toolCalls && collapsed.toolCalls.length > 0) {
+      if (collapsed.content) {
+        defaults.logger.warn(
+          "Collapsed response has both content and toolCalls — preferring toolCalls",
+        );
+      }
+      fixtureResponse = { toolCalls: collapsed.toolCalls };
+    } else {
+      fixtureResponse = { content: collapsed.content ?? "" };
+    }
+  } else {
+    // Non-streaming — try to parse as JSON
+    let parsedResponse: unknown = null;
+    try {
+      parsedResponse = JSON.parse(upstreamBody);
+    } catch {
+      // Not JSON — could be an unknown format
+      defaults.logger.warn("Upstream response is not valid JSON — saving raw response");
+    }
+    fixtureResponse = buildFixtureResponse(parsedResponse, upstreamStatus);
+  }
+
+  // Build the match criteria from the original request
+  const fixtureMatch = buildFixtureMatch(request);
+
+  // Build and save the fixture
+  const fixture: Fixture = { match: fixtureMatch, response: fixtureResponse };
+
+  // Check if the match is empty (all undefined values) — warn but still save to disk
+  const matchValues = Object.values(fixtureMatch);
+  const isEmptyMatch = matchValues.length === 0 || matchValues.every((v) => v === undefined);
+  if (isEmptyMatch) {
+    defaults.logger.warn(
+      "Recorded fixture has empty match criteria — skipping in-memory registration",
+    );
+  }
+
+  const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
+  const filename = `${providerKey}-${timestamp}-${recordCounter++}.json`;
+  const filepath = path.join(fixturePath, filename);
+
+  try {
+    // Ensure fixture directory exists
+    fs.mkdirSync(fixturePath, { recursive: true });
+
+    // Exclude auth headers from saved fixture (they're in the match/response, not headers)
+    const fileContent = isEmptyMatch
+      ? {
+          fixtures: [fixture],
+          _warning: "Empty match criteria — this fixture will not match any request",
+        }
+      : { fixtures: [fixture] };
+    fs.writeFileSync(filepath, JSON.stringify(fileContent, null, 2), "utf-8");
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : "Unknown filesystem error";
+    defaults.logger.error(`Failed to save fixture to disk: ${msg}`);
+  }
+
+  // Register in memory so subsequent identical requests match (skip if empty match)
+  if (!isEmptyMatch) {
+    fixtures.push(fixture);
+  }
+
+  defaults.logger.warn(`Response recorded → ${filepath}`);
+
+  // Relay upstream response to client
+  const relayHeaders: Record<string, string> = {};
+  if (ctString) {
+    relayHeaders["Content-Type"] = ctString;
+  }
+  res.writeHead(upstreamStatus, relayHeaders);
+  res.end(upstreamBody);
+
+  return true;
+}
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+function makeUpstreamRequest(
+  target: URL,
+  headers: Record<string, string>,
+  body: string,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string; rawBuffer: Buffer }> {
+  return new Promise((resolve, reject) => {
+    const transport = target.protocol === "https:" ? https : http;
+    const req = transport.request(
+      target,
+      {
+        method: "POST",
+        headers: {
+          ...headers,
+          "Content-Length": Buffer.byteLength(body).toString(),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (chunk: Buffer) => chunks.push(chunk));
+        res.on("end", () => {
+          const rawBuffer = Buffer.concat(chunks);
+          resolve({
+            status: res.statusCode ?? 500,
+            headers: res.headers,
+            body: rawBuffer.toString(),
+            rawBuffer,
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(body);
+    req.end();
+  });
+}
+
+/**
+ * Detect the response format from the parsed upstream JSON and convert
+ * it into an llmock FixtureResponse.
+ */
+function buildFixtureResponse(parsed: unknown, status: number): FixtureResponse {
+  if (parsed === null || parsed === undefined) {
+    // Raw / unparseable response — save as error
+    return {
+      error: { message: "Upstream returned non-JSON response", type: "proxy_error" },
+      status,
+    };
+  }
+
+  const obj = parsed as Record<string, unknown>;
+
+  // Error response
+  if (obj.error) {
+    const err = obj.error as Record<string, unknown>;
+    return {
+      error: {
+        message: String(err.message ?? "Unknown error"),
+        type: String(err.type ?? "api_error"),
+        code: err.code ? String(err.code) : undefined,
+      },
+      status,
+    };
+  }
+
+  // OpenAI embeddings: { data: [{ embedding: [...] }] }
+  if (Array.isArray(obj.data) && obj.data.length > 0) {
+    const first = obj.data[0] as Record<string, unknown>;
+    if (Array.isArray(first.embedding)) {
+      return { embedding: first.embedding as number[] };
+    }
+  }
+
+  // Direct embedding: { embedding: [...] }
+  if (Array.isArray(obj.embedding)) {
+    return { embedding: obj.embedding as number[] };
+  }
+
+  // OpenAI chat completion: { choices: [{ message: { content, tool_calls } }] }
+  if (Array.isArray(obj.choices) && obj.choices.length > 0) {
+    const choice = obj.choices[0] as Record<string, unknown>;
+    const message = choice.message as Record<string, unknown> | undefined;
+    if (message) {
+      // Tool calls
+      if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) {
+        const toolCalls: ToolCall[] = (message.tool_calls as Array<Record<string, unknown>>).map(
+          (tc) => {
+            const fn = tc.function as Record<string, unknown>;
+            return {
+              name: String(fn.name),
+              arguments: String(fn.arguments),
+            };
+          },
+        );
+        return { toolCalls };
+      }
+      // Text content
+      if (typeof message.content === "string") {
+        return { content: message.content };
+      }
+    }
+  }
+
+  // Anthropic: { content: [{ type: "text", text: "..." }] } or tool_use
+  if (Array.isArray(obj.content) && obj.content.length > 0) {
+    const blocks = obj.content as Array<Record<string, unknown>>;
+    // Check for tool_use blocks first
+    const toolUseBlocks = blocks.filter((b) => b.type === "tool_use");
+    if (toolUseBlocks.length > 0) {
+      const toolCalls: ToolCall[] = toolUseBlocks.map((b) => ({
+        name: String(b.name),
+        arguments: typeof b.input === "string" ? b.input : JSON.stringify(b.input),
+      }));
+      return { toolCalls };
+    }
+    // Text blocks
+    const textBlock = blocks.find((b) => b.type === "text");
+    if (textBlock && typeof textBlock.text === "string") {
+      return { content: textBlock.text };
+    }
+  }
+
+  // Gemini: { candidates: [{ content: { parts: [{ text: "..." }] } }] }
+  if (Array.isArray(obj.candidates) && obj.candidates.length > 0) {
+    const candidate = obj.candidates[0] as Record<string, unknown>;
+    const content = candidate.content as Record<string, unknown> | undefined;
+    if (content && Array.isArray(content.parts)) {
+      const parts = content.parts as Array<Record<string, unknown>>;
+      // Tool calls (functionCall)
+      const fnCallParts = parts.filter((p) => p.functionCall);
+      if (fnCallParts.length > 0) {
+        const toolCalls: ToolCall[] = fnCallParts.map((p) => {
+          const fc = p.functionCall as Record<string, unknown>;
+          return {
+            name: String(fc.name),
+            arguments: typeof fc.args === "string" ? fc.args : JSON.stringify(fc.args),
+          };
+        });
+        return { toolCalls };
+      }
+      // Text
+      const textPart = parts.find((p) => typeof p.text === "string");
+      if (textPart && typeof textPart.text === "string") {
+        return { content: textPart.text };
+      }
+    }
+  }
+
+  // Ollama: { message: { content: "..." } }
+  if (obj.message && typeof obj.message === "object") {
+    const msg = obj.message as Record<string, unknown>;
+    if (typeof msg.content === "string") {
+      return { content: msg.content };
+    }
+    // Ollama message with content array (like Cohere)
+    if (Array.isArray(msg.content) && msg.content.length > 0) {
+      const first = msg.content[0] as Record<string, unknown>;
+      if (typeof first.text === "string") {
+        return { content: first.text };
+      }
+    }
+  }
+
+  // Fallback: unknown format — save as error
+  return {
+    error: {
+      message: "Could not detect response format from upstream",
+      type: "proxy_error",
+    },
+    status,
+  };
+}
+
+/**
+ * Derive fixture match criteria from the original request.
+ */
+function buildFixtureMatch(request: ChatCompletionRequest): {
+  userMessage?: string;
+  inputText?: string;
+} {
+  // Embedding request
+  if (request.embeddingInput) {
+    return { inputText: request.embeddingInput };
+  }
+
+  // Chat request — match on the last user message
+  const lastUser = getLastMessageByRole(request.messages ?? [], "user");
+  if (lastUser) {
+    const text = getTextContent(lastUser.content);
+    if (text) {
+      return { userMessage: text };
+    }
+  }
+
+  return {};
+}
diff --git a/src/responses.ts b/src/responses.ts
index beba4ec..28e2af0 100644
--- a/src/responses.ts
+++ b/src/responses.ts
@@ -30,6 +30,7 @@ import { createInterruptionSignal } from "./interruption.js";
 import type { Journal } from "./journal.js";
 import type { Logger } from "./logger.js";
 import { applyChaos } from "./chaos.js";
+import { proxyAndRecord } from "./recorder.js";
 
 // ─── Responses API request types ────────────────────────────────────────────
 
@@ -544,19 +545,50 @@ export async function handleResponses(
     return;
 
   if (!fixture) {
+    if (defaults.record) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        completionReq,
+        "openai",
+        req.url ?? "/v1/responses",
+        fixtures,
+        defaults,
+        raw,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: req.url ?? "/v1/responses",
+          headers: flattenHeaders(req.headers),
+          body: completionReq,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      defaults.logger.error(
+        `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v1/responses"}`,
+      );
+    }
     journal.add({
       method: req.method ?? "POST",
       path: req.url ?? "/v1/responses",
       headers: flattenHeaders(req.headers),
       body: completionReq,
-      response: { status: 404, fixture: null },
+      response: { status: strictStatus, fixture: null },
     });
     writeErrorResponse(
       res,
-      404,
+      strictStatus,
       JSON.stringify({
         error: {
-          message: "No fixture matched",
+          message: strictMessage,
           type: "invalid_request_error",
           code: "no_fixture_match",
         },
diff --git a/src/server.ts b/src/server.ts
index cef414d..8f8b4b3 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -1,5 +1,11 @@
 import * as http from "node:http";
-import type { Fixture, ChatCompletionRequest, ChaosConfig, MockServerOptions } from "./types.js";
+import type {
+  Fixture,
+  ChatCompletionRequest,
+  ChaosConfig,
+  MockServerOptions,
+  RecordConfig,
+} from "./types.js";
 import { Journal } from "./journal.js";
 import { matchFixture } from "./router.js";
 import { writeSSEStream, writeErrorResponse } from "./sse-writer.js";
@@ -17,20 +23,33 @@ import {
 import { handleResponses } from "./responses.js";
 import { handleMessages } from "./messages.js";
 import { handleGemini } from "./gemini.js";
-import { handleBedrock } from "./bedrock.js";
+import { handleBedrock, handleBedrockStream } from "./bedrock.js";
+import { handleConverse, handleConverseStream } from "./bedrock-converse.js";
 import { handleEmbeddings } from "./embeddings.js";
+import { handleOllama, handleOllamaGenerate } from "./ollama.js";
+import { handleCohere } from "./cohere.js";
 import { upgradeToWebSocket, type WebSocketConnection } from "./ws-framing.js";
 import { handleWebSocketResponses } from "./ws-responses.js";
 import { handleWebSocketRealtime } from "./ws-realtime.js";
 import { handleWebSocketGeminiLive } from "./ws-gemini-live.js";
 import { Logger } from "./logger.js";
 import { applyChaos } from "./chaos.js";
+import { createMetricsRegistry, normalizePathLabel, type MetricsRegistry } from "./metrics.js";
+import { proxyAndRecord } from "./recorder.js";
 
 export interface ServerInstance {
   server: http.Server;
   journal: Journal;
   url: string;
-  defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig };
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    logger: Logger;
+    chaos?: ChaosConfig;
+    registry?: MetricsRegistry;
+    strict?: boolean;
+    record?: RecordConfig;
+  };
 }
 
 const COMPLETIONS_PATH = "/v1/chat/completions";
@@ -40,11 +59,21 @@ const GEMINI_LIVE_PATH =
   "/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent";
 const MESSAGES_PATH = "/v1/messages";
 const EMBEDDINGS_PATH = "/v1/embeddings";
+const COHERE_CHAT_PATH = "/v2/chat";
 const DEFAULT_CHUNK_SIZE = 20;
 
 const GEMINI_PATH_RE = /^\/v1beta\/models\/([^:]+):(generateContent|streamGenerateContent)$/;
 const AZURE_DEPLOYMENT_RE = /^\/openai\/deployments\/([^/]+)\/(chat\/completions|embeddings)$/;
 const BEDROCK_INVOKE_RE = /^\/model\/([^/]+)\/invoke$/;
+const BEDROCK_STREAM_RE = /^\/model\/([^/]+)\/invoke-with-response-stream$/;
+const BEDROCK_CONVERSE_RE = /^\/model\/([^/]+)\/converse$/;
+const BEDROCK_CONVERSE_STREAM_RE = /^\/model\/([^/]+)\/converse-stream$/;
+const VERTEX_AI_RE =
+  /^\/v1\/projects\/[^/]+\/locations\/[^/]+\/publishers\/google\/models\/([^/:]+):(generateContent|streamGenerateContent)$/;
+
+const OLLAMA_CHAT_PATH = "/api/chat";
+const OLLAMA_GENERATE_PATH = "/api/generate";
+const OLLAMA_TAGS_PATH = "/api/tags";
 
 const HEALTH_PATH = "/health";
 const READY_PATH = "/ready";
@@ -93,8 +122,17 @@ async function handleCompletions(
   res: http.ServerResponse,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig },
+  defaults: {
+    latency: number;
+    chunkSize: number;
+    logger: Logger;
+    chaos?: ChaosConfig;
+    registry?: MetricsRegistry;
+    strict?: boolean;
+    record?: RecordConfig;
+  },
   modelFallback?: string,
+  providerKey?: string,
 ): Promise<void> {
   setCorsHeaders(res);
 
@@ -167,29 +205,70 @@ async function handleCompletions(
 
   // Apply chaos before normal response handling
   if (
-    applyChaos(res, fixture, defaults.chaos, req.headers, journal, {
-      method,
-      path,
-      headers: flatHeaders,
-      body,
-    })
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method,
+        path,
+        headers: flatHeaders,
+        body,
+      },
+      defaults.registry,
+    )
   )
     return;
 
   if (!fixture) {
+    // Try record-and-replay proxy if configured
+    if (defaults.record && providerKey) {
+      const proxied = await proxyAndRecord(
+        req,
+        res,
+        body,
+        providerKey,
+        req.url ?? COMPLETIONS_PATH,
+        fixtures,
+        defaults,
+      );
+      if (proxied) {
+        journal.add({
+          method: req.method ?? "POST",
+          path: req.url ?? COMPLETIONS_PATH,
+          headers: flattenHeaders(req.headers),
+          body,
+          response: { status: res.statusCode ?? 200, fixture: null },
+        });
+        return;
+      }
+    }
+
+    const strictStatus = defaults.strict ? 503 : 404;
+    const strictMessage = defaults.strict
+      ? "Strict mode: no fixture matched"
+      : "No fixture matched";
+    if (defaults.strict) {
+      defaults.logger.error(
+        `STRICT: No fixture matched for ${req.method ?? "POST"} ${req.url ?? COMPLETIONS_PATH}`,
+      );
+    }
+
     journal.add({
       method: req.method ?? "POST",
       path: req.url ?? COMPLETIONS_PATH,
       headers: flattenHeaders(req.headers),
       body,
-      response: { status: 404, fixture: null },
+      response: { status: strictStatus, fixture: null },
     });
     writeErrorResponse(
       res,
-      404,
+      strictStatus,
       JSON.stringify({
         error: {
-          message: "No fixture matched",
+          message: strictMessage,
           type: "invalid_request_error",
           code: "no_fixture_match",
         },
@@ -310,15 +389,25 @@ export async function createServer(
   const host = options?.host ?? "127.0.0.1";
   const port = options?.port ?? 0;
   const logger = new Logger(options?.logLevel ?? "silent");
+  const registry = options?.metrics ? createMetricsRegistry() : undefined;
+  const serverOptions = options ?? {};
   const defaults = {
-    latency: options?.latency ?? 0,
-    chunkSize: Math.max(1, options?.chunkSize ?? DEFAULT_CHUNK_SIZE),
+    latency: serverOptions.latency ?? 0,
+    chunkSize: Math.max(1, serverOptions.chunkSize ?? DEFAULT_CHUNK_SIZE),
     logger,
     chaos: options?.chaos,
+    registry,
+    strict: options?.strict,
+    record: options?.record,
   };
 
   const journal = new Journal();
 
+  // Set initial fixtures-loaded gauge
+  if (registry) {
+    registry.setGauge("llmock_fixtures_loaded", {}, fixtures.length);
+  }
+
   const server = http.createServer((req: http.IncomingMessage, res: http.ServerResponse) => {
     // OPTIONS preflight
     if (req.method === "OPTIONS") {
@@ -326,10 +415,34 @@ export async function createServer(
       return;
     }
 
+    // Record start time for metrics
+    const startTime = registry ? process.hrtime.bigint() : 0n;
+
     // Parse the URL pathname (strip query string)
     const parsedUrl = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
     let pathname = parsedUrl.pathname;
 
+    // Instrument response completion for metrics
+    if (registry) {
+      const rawPathname = pathname;
+      res.on("finish", () => {
+        const normalizedPath = normalizePathLabel(rawPathname);
+        const method = req.method ?? "UNKNOWN";
+        const status = String(res.statusCode);
+        registry.incrementCounter("llmock_requests_total", {
+          method,
+          path: normalizedPath,
+          status,
+        });
+        const elapsed = Number(process.hrtime.bigint() - startTime) / 1e9;
+        registry.observeHistogram(
+          "llmock_request_duration_seconds",
+          { method, path: normalizedPath },
+          elapsed,
+        );
+      });
+    }
+
     // Azure OpenAI: /openai/deployments/{id}/{operation} → /v1/{operation} (chat/completions, embeddings)
     // Must be checked BEFORE the generic /openai/ prefix strip
     let azureDeploymentId: string | undefined;
@@ -361,6 +474,18 @@ export async function createServer(
       return;
     }
 
+    // Prometheus metrics
+    if (pathname === "/metrics" && req.method === "GET") {
+      if (!registry) {
+        handleNotFound(res, "Not found");
+        return;
+      }
+      setCorsHeaders(res);
+      res.writeHead(200, { "Content-Type": "text/plain; version=0.0.4; charset=utf-8" });
+      res.end(registry.serialize());
+      return;
+    }
+
     // Models listing
     if (pathname === MODELS_PATH && req.method === "GET") {
       setCorsHeaders(res);
@@ -435,8 +560,8 @@ export async function createServer(
           } else if (!res.writableEnded) {
             try {
               res.write(`event: error\ndata: ${JSON.stringify({ error: { message: msg } })}\n\n`);
-            } catch {
-              /* */
+            } catch (writeErr) {
+              logger.debug("Failed to write error recovery response:", writeErr);
             }
             res.end();
           }
@@ -459,8 +584,32 @@ export async function createServer(
           } else if (!res.writableEnded) {
             try {
               res.write(`event: error\ndata: ${JSON.stringify({ error: { message: msg } })}\n\n`);
-            } catch {
-              /* */
+            } catch (writeErr) {
+              logger.debug("Failed to write error recovery response:", writeErr);
+            }
+            res.end();
+          }
+        });
+      return;
+    }
+
+    // POST /v2/chat — Cohere v2 Chat API
+    if (pathname === COHERE_CHAT_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) => handleCohere(req, res, raw, fixtures, journal, defaults, setCorsHeaders))
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            try {
+              res.write(`event: error\ndata: ${JSON.stringify({ error: { message: msg } })}\n\n`);
+            } catch (writeErr) {
+              logger.debug("Failed to write error recovery response:", writeErr);
             }
             res.end();
           }
@@ -540,8 +689,48 @@ export async function createServer(
           } else if (!res.writableEnded) {
             try {
               res.write(`data: ${JSON.stringify({ error: { message: msg } })}\n\n`);
-            } catch {
-              /* */
+            } catch (writeErr) {
+              logger.debug("Failed to write error recovery response:", writeErr);
+            }
+            res.end();
+          }
+        });
+      return;
+    }
+
+    // POST /v1/projects/{project}/locations/{location}/publishers/google/models/{model}:(generateContent|streamGenerateContent) — Vertex AI
+    const vertexMatch = pathname.match(VERTEX_AI_RE);
+    if (vertexMatch && req.method === "POST") {
+      const vertexModel = vertexMatch[1];
+      const streaming = vertexMatch[2] === "streamGenerateContent";
+      readBody(req)
+        .then((raw) =>
+          handleGemini(
+            req,
+            res,
+            raw,
+            vertexModel,
+            streaming,
+            fixtures,
+            journal,
+            defaults,
+            setCorsHeaders,
+            "vertexai",
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            try {
+              res.write(`data: ${JSON.stringify({ error: { message: msg } })}\n\n`);
+            } catch (writeErr) {
+              logger.debug("Failed to write error recovery response:", writeErr);
             }
             res.end();
           }
@@ -572,6 +761,165 @@ export async function createServer(
       return;
     }
 
+    // POST /model/{modelId}/invoke-with-response-stream — AWS Bedrock Claude streaming
+    const bedrockStreamMatch = pathname.match(BEDROCK_STREAM_RE);
+    if (bedrockStreamMatch && req.method === "POST") {
+      const bedrockModelId = bedrockStreamMatch[1];
+      readBody(req)
+        .then((raw) =>
+          handleBedrockStream(
+            req,
+            res,
+            raw,
+            bedrockModelId,
+            fixtures,
+            journal,
+            defaults,
+            setCorsHeaders,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /model/{modelId}/converse — AWS Bedrock Converse API
+    const converseMatch = pathname.match(BEDROCK_CONVERSE_RE);
+    if (converseMatch && req.method === "POST") {
+      const converseModelId = converseMatch[1];
+      readBody(req)
+        .then((raw) =>
+          handleConverse(
+            req,
+            res,
+            raw,
+            converseModelId,
+            fixtures,
+            journal,
+            defaults,
+            setCorsHeaders,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /model/{modelId}/converse-stream — AWS Bedrock Converse streaming API
+    const converseStreamMatch = pathname.match(BEDROCK_CONVERSE_STREAM_RE);
+    if (converseStreamMatch && req.method === "POST") {
+      const converseStreamModelId = converseStreamMatch[1];
+      readBody(req)
+        .then((raw) =>
+          handleConverseStream(
+            req,
+            res,
+            raw,
+            converseStreamModelId,
+            fixtures,
+            journal,
+            defaults,
+            setCorsHeaders,
+          ),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /api/chat — Ollama Chat API
+    if (pathname === OLLAMA_CHAT_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) => handleOllama(req, res, raw, fixtures, journal, defaults, setCorsHeaders))
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // POST /api/generate — Ollama Generate API
+    if (pathname === OLLAMA_GENERATE_PATH && req.method === "POST") {
+      readBody(req)
+        .then((raw) =>
+          handleOllamaGenerate(req, res, raw, fixtures, journal, defaults, setCorsHeaders),
+        )
+        .catch((err: unknown) => {
+          const msg = err instanceof Error ? err.message : "Internal error";
+          if (!res.headersSent) {
+            writeErrorResponse(
+              res,
+              500,
+              JSON.stringify({ error: { message: msg, type: "server_error" } }),
+            );
+          } else if (!res.writableEnded) {
+            res.destroy();
+          }
+        });
+      return;
+    }
+
+    // GET /api/tags — Ollama Models listing
+    if (pathname === OLLAMA_TAGS_PATH && req.method === "GET") {
+      setCorsHeaders(res);
+      const modelIds = new Set<string>();
+      for (const f of fixtures) {
+        if (f.match.model && typeof f.match.model === "string") {
+          modelIds.add(f.match.model);
+        }
+      }
+      const ids = modelIds.size > 0 ? [...modelIds] : DEFAULT_MODELS;
+      const models = ids.map((name) => ({
+        name,
+        model: name,
+        modified_at: new Date().toISOString(),
+        size: 0,
+        digest: "",
+        details: {},
+      }));
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ models }));
+      return;
+    }
+
     // POST /v1/chat/completions — Chat Completions API
     if (pathname !== COMPLETIONS_PATH) {
       handleNotFound(res, "Not found");
@@ -582,33 +930,40 @@ export async function createServer(
       return;
     }
 
-    handleCompletions(req, res, fixtures, journal, defaults, azureDeploymentId).catch(
-      (err: unknown) => {
-        const msg = err instanceof Error ? err.message : "Internal error";
-        if (!res.headersSent) {
-          writeErrorResponse(
-            res,
-            500,
-            JSON.stringify({
-              error: {
-                message: msg,
-                type: "server_error",
-              },
-            }),
+    const completionsProvider = azureDeploymentId ? "azure" : "openai";
+    handleCompletions(
+      req,
+      res,
+      fixtures,
+      journal,
+      defaults,
+      azureDeploymentId,
+      completionsProvider,
+    ).catch((err: unknown) => {
+      const msg = err instanceof Error ? err.message : "Internal error";
+      if (!res.headersSent) {
+        writeErrorResponse(
+          res,
+          500,
+          JSON.stringify({
+            error: {
+              message: msg,
+              type: "server_error",
+            },
+          }),
+        );
+      } else if (!res.writableEnded) {
+        // Headers already sent (SSE stream in progress) — write error event then close
+        try {
+          res.write(
+            `data: ${JSON.stringify({ error: { message: msg, type: "server_error" } })}\n\n`,
           );
-        } else if (!res.writableEnded) {
-          // Headers already sent (SSE stream in progress) — write error event then close
-          try {
-            res.write(
-              `data: ${JSON.stringify({ error: { message: msg, type: "server_error" } })}\n\n`,
-            );
-          } catch {
-            // write itself failed, nothing more we can do
-          }
-          res.end();
+        } catch (writeErr) {
+          logger.debug("Failed to write error recovery response:", writeErr);
         }
-      },
-    );
+        res.end();
+      }
+    });
   });
 
   // ─── WebSocket upgrade handling ──────────────────────────────────────────
diff --git a/src/stream-collapse.ts b/src/stream-collapse.ts
new file mode 100644
index 0000000..6d4558e
--- /dev/null
+++ b/src/stream-collapse.ts
@@ -0,0 +1,586 @@
+/**
+ * Stream collapsing functions for record-and-replay.
+ *
+ * Each function takes a raw streaming response body (SSE, NDJSON, or binary
+ * EventStream) and collapses it into a non-streaming fixture response
+ * containing either `{ content }` or `{ toolCalls }`.
+ */
+
+import { crc32 } from "node:zlib";
+import type { ToolCall } from "./types.js";
+
+// ---------------------------------------------------------------------------
+// Result type shared by all collapse functions
+// ---------------------------------------------------------------------------
+
+export interface CollapseResult {
+  content?: string;
+  toolCalls?: ToolCall[];
+  droppedChunks?: number;
+}
+
+// ---------------------------------------------------------------------------
+// 1. OpenAI SSE
+// ---------------------------------------------------------------------------
+
+/**
+ * Collapse OpenAI Chat Completions SSE stream into a single response.
+ *
+ * Format:
+ *   data: {"id":"chatcmpl-123","choices":[{"delta":{"content":"Hello"}}]}\n\n
+ *   data: [DONE]\n\n
+ */
+export function collapseOpenAISSE(body: string): CollapseResult {
+  const lines = body.split("\n\n").filter((l) => l.trim().length > 0);
+  let content = "";
+  let droppedChunks = 0;
+  const toolCallMap = new Map<number, { id: string; name: string; arguments: string }>();
+
+  for (const line of lines) {
+    const dataLine = line.split("\n").find((l) => l.startsWith("data:"));
+    if (!dataLine) continue;
+
+    const payload = dataLine.slice(5).trim();
+    if (payload === "[DONE]") continue;
+
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(payload) as Record<string, unknown>;
+    } catch {
+      droppedChunks++;
+      continue;
+    }
+
+    const choices = parsed.choices as Array<Record<string, unknown>> | undefined;
+    if (!choices || choices.length === 0) continue;
+
+    const delta = choices[0].delta as Record<string, unknown> | undefined;
+    if (!delta) continue;
+
+    // Text content
+    if (typeof delta.content === "string") {
+      content += delta.content;
+    }
+
+    // Tool calls
+    const toolCalls = delta.tool_calls as Array<Record<string, unknown>> | undefined;
+    if (toolCalls) {
+      for (const tc of toolCalls) {
+        const index = tc.index as number;
+        const fn = tc.function as Record<string, unknown> | undefined;
+
+        if (!toolCallMap.has(index)) {
+          toolCallMap.set(index, {
+            id: (tc.id as string) ?? "",
+            name: (fn?.name as string) ?? "",
+            arguments: "",
+          });
+        }
+
+        const entry = toolCallMap.get(index)!;
+        if (fn?.name && typeof fn.name === "string" && !entry.name) {
+          entry.name = fn.name;
+        }
+        if (tc.id && typeof tc.id === "string" && !entry.id) {
+          entry.id = tc.id;
+        }
+        if (fn?.arguments && typeof fn.arguments === "string") {
+          entry.arguments += fn.arguments;
+        }
+      }
+    }
+  }
+
+  if (toolCallMap.size > 0) {
+    const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
+    return {
+      toolCalls: sorted.map(([, tc]) => ({
+        name: tc.name,
+        arguments: tc.arguments,
+        ...(tc.id ? { id: tc.id } : {}),
+      })),
+      ...(droppedChunks > 0 ? { droppedChunks } : {}),
+    };
+  }
+
+  return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
+}
+
+// ---------------------------------------------------------------------------
+// 2. Anthropic SSE
+// ---------------------------------------------------------------------------
+
+/**
+ * Collapse Anthropic Claude Messages SSE stream into a single response.
+ *
+ * Format:
+ *   event: message_start\ndata: {...}\n\n
+ *   event: content_block_delta\ndata: {"delta":{"type":"text_delta","text":"Hello"}}\n\n
+ */
+export function collapseAnthropicSSE(body: string): CollapseResult {
+  const blocks = body.split("\n\n").filter((b) => b.trim().length > 0);
+  let content = "";
+  let droppedChunks = 0;
+  const toolCallMap = new Map<number, { id: string; name: string; arguments: string }>();
+
+  for (const block of blocks) {
+    const lines = block.split("\n");
+    const eventLine = lines.find((l) => l.startsWith("event:"));
+    const dataLine = lines.find((l) => l.startsWith("data:"));
+    if (!dataLine) continue;
+
+    const eventType = eventLine ? eventLine.slice(6).trim() : "";
+    const payload = dataLine.slice(5).trim();
+
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(payload) as Record<string, unknown>;
+    } catch {
+      droppedChunks++;
+      continue;
+    }
+
+    if (eventType === "content_block_start") {
+      const index = parsed.index as number;
+      const contentBlock = parsed.content_block as Record<string, unknown> | undefined;
+      if (contentBlock?.type === "tool_use") {
+        toolCallMap.set(index, {
+          id: (contentBlock.id as string) ?? "",
+          name: (contentBlock.name as string) ?? "",
+          arguments: "",
+        });
+      }
+    }
+
+    if (eventType === "content_block_delta") {
+      const index = parsed.index as number;
+      const delta = parsed.delta as Record<string, unknown> | undefined;
+      if (!delta) continue;
+
+      if (delta.type === "text_delta" && typeof delta.text === "string") {
+        content += delta.text;
+      }
+
+      if (delta.type === "input_json_delta" && typeof delta.partial_json === "string") {
+        const entry = toolCallMap.get(index);
+        if (entry) {
+          entry.arguments += delta.partial_json;
+        }
+      }
+    }
+  }
+
+  if (toolCallMap.size > 0) {
+    const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
+    return {
+      toolCalls: sorted.map(([, tc]) => ({
+        name: tc.name,
+        arguments: tc.arguments,
+        ...(tc.id ? { id: tc.id } : {}),
+      })),
+      ...(droppedChunks > 0 ? { droppedChunks } : {}),
+    };
+  }
+
+  return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
+}
+
+// ---------------------------------------------------------------------------
+// 3. Gemini SSE
+// ---------------------------------------------------------------------------
+
+/**
+ * Collapse Gemini SSE stream into a single response.
+ *
+ * Format (data-only, no event prefix, no [DONE]):
+ *   data: {"candidates":[{"content":{"parts":[{"text":"Hello"}]}}]}\n\n
+ */
+export function collapseGeminiSSE(body: string): CollapseResult {
+  const lines = body.split("\n\n").filter((l) => l.trim().length > 0);
+  let content = "";
+  let droppedChunks = 0;
+
+  for (const line of lines) {
+    const dataLine = line.split("\n").find((l) => l.startsWith("data:"));
+    if (!dataLine) continue;
+
+    const payload = dataLine.slice(5).trim();
+
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(payload) as Record<string, unknown>;
+    } catch {
+      droppedChunks++;
+      continue;
+    }
+
+    const candidates = parsed.candidates as Array<Record<string, unknown>> | undefined;
+    if (!candidates || candidates.length === 0) continue;
+
+    const candidateContent = candidates[0].content as Record<string, unknown> | undefined;
+    if (!candidateContent) continue;
+
+    const parts = candidateContent.parts as Array<Record<string, unknown>> | undefined;
+    if (!parts || parts.length === 0) continue;
+
+    // Handle functionCall parts
+    const fnCallParts = parts.filter((p) => p.functionCall);
+    if (fnCallParts.length > 0) {
+      const toolCallMap = new Map<number, { name: string; arguments: string }>();
+      for (let i = 0; i < fnCallParts.length; i++) {
+        const fc = fnCallParts[i].functionCall as Record<string, unknown>;
+        toolCallMap.set(i, {
+          name: String(fc.name ?? ""),
+          arguments: typeof fc.args === "string" ? (fc.args as string) : JSON.stringify(fc.args),
+        });
+      }
+      if (toolCallMap.size > 0) {
+        const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
+        return {
+          toolCalls: sorted.map(([, tc]) => ({
+            name: tc.name,
+            arguments: tc.arguments,
+          })),
+          ...(droppedChunks > 0 ? { droppedChunks } : {}),
+        };
+      }
+    }
+
+    if (typeof parts[0].text === "string") {
+      content += parts[0].text;
+    }
+  }
+
+  return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
+}
+
+// ---------------------------------------------------------------------------
+// 4. Ollama NDJSON
+// ---------------------------------------------------------------------------
+
+/**
+ * Collapse Ollama NDJSON stream into a single response.
+ *
+ * /api/chat format:
+ *   {"model":"llama3","message":{"role":"assistant","content":"Hello"},"done":false}\n
+ *
+ * /api/generate format:
+ *   {"model":"llama3","response":"Hello","done":false}\n
+ */
+export function collapseOllamaNDJSON(body: string): CollapseResult {
+  const lines = body.split("\n").filter((l) => l.trim().length > 0);
+  let content = "";
+  let droppedChunks = 0;
+
+  for (const line of lines) {
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(line.trim()) as Record<string, unknown>;
+    } catch {
+      droppedChunks++;
+      continue;
+    }
+
+    // /api/chat format
+    const message = parsed.message as Record<string, unknown> | undefined;
+    if (message && typeof message.content === "string") {
+      content += message.content;
+    }
+
+    // /api/generate format
+    else if (typeof parsed.response === "string") {
+      content += parsed.response;
+    }
+  }
+
+  return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
+}
+
+// ---------------------------------------------------------------------------
+// 5. Cohere SSE
+// ---------------------------------------------------------------------------
+
+/**
+ * Collapse Cohere SSE stream into a single response.
+ *
+ * Format:
+ *   event: content-delta\ndata: {"type":"content-delta","delta":{"message":{"content":{"text":"Hello"}}}}\n\n
+ */
+export function collapseCohereSS(body: string): CollapseResult {
+  const blocks = body.split("\n\n").filter((b) => b.trim().length > 0);
+  let content = "";
+  let droppedChunks = 0;
+  const toolCallMap = new Map<number, { id: string; name: string; arguments: string }>();
+
+  for (const block of blocks) {
+    const lines = block.split("\n");
+    const eventLine = lines.find((l) => l.startsWith("event:"));
+    const dataLine = lines.find((l) => l.startsWith("data:"));
+    if (!dataLine) continue;
+
+    const eventType = eventLine ? eventLine.slice(6).trim() : "";
+    const payload = dataLine.slice(5).trim();
+
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(payload) as Record<string, unknown>;
+    } catch {
+      droppedChunks++;
+      continue;
+    }
+
+    if (eventType === "content-delta") {
+      const delta = parsed.delta as Record<string, unknown> | undefined;
+      const message = delta?.message as Record<string, unknown> | undefined;
+      const contentObj = message?.content as Record<string, unknown> | undefined;
+      if (contentObj && typeof contentObj.text === "string") {
+        content += contentObj.text;
+      }
+    }
+
+    if (eventType === "tool-call-start") {
+      const index = parsed.index as number;
+      const delta = parsed.delta as Record<string, unknown> | undefined;
+      const message = delta?.message as Record<string, unknown> | undefined;
+      const toolCalls = message?.tool_calls as Record<string, unknown> | undefined;
+      if (toolCalls) {
+        const fn = toolCalls.function as Record<string, unknown> | undefined;
+        toolCallMap.set(index, {
+          id: (toolCalls.id as string) ?? "",
+          name: (fn?.name as string) ?? "",
+          arguments: "",
+        });
+      }
+    }
+
+    if (eventType === "tool-call-delta") {
+      const index = parsed.index as number;
+      const delta = parsed.delta as Record<string, unknown> | undefined;
+      const message = delta?.message as Record<string, unknown> | undefined;
+      const toolCalls = message?.tool_calls as Record<string, unknown> | undefined;
+      if (toolCalls) {
+        const fn = toolCalls.function as Record<string, unknown> | undefined;
+        if (fn && typeof fn.arguments === "string") {
+          const entry = toolCallMap.get(index);
+          if (entry) {
+            entry.arguments += fn.arguments;
+          }
+        }
+      }
+    }
+  }
+
+  if (toolCallMap.size > 0) {
+    const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
+    return {
+      toolCalls: sorted.map(([, tc]) => ({
+        name: tc.name,
+        arguments: tc.arguments,
+        ...(tc.id ? { id: tc.id } : {}),
+      })),
+      ...(droppedChunks > 0 ? { droppedChunks } : {}),
+    };
+  }
+
+  return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
+}
+
+// ---------------------------------------------------------------------------
+// 6. Bedrock EventStream (binary)
+// ---------------------------------------------------------------------------
+
+/**
+ * Decode AWS Event Stream binary frames and extract JSON payloads.
+ *
+ * Binary frame layout:
+ *   [total_length: 4B uint32-BE]
+ *   [headers_length: 4B uint32-BE]
+ *   [prelude_crc32: 4B]
+ *   [headers: variable]
+ *   [payload: variable]
+ *   [message_crc32: 4B]
+ */
+function decodeEventStreamFrames(
+  buf: Buffer,
+): Array<{ headers: Record<string, string>; payload: Buffer }> {
+  const frames: Array<{ headers: Record<string, string>; payload: Buffer }> = [];
+  let offset = 0;
+
+  while (offset < buf.length) {
+    if (offset + 12 > buf.length) break;
+
+    const totalLength = buf.readUInt32BE(offset);
+    const headersLength = buf.readUInt32BE(offset + 4);
+
+    // Validate prelude CRC
+    const preludeCrc = buf.readUInt32BE(offset + 8);
+    const computedPreludeCrc = crc32(buf.subarray(offset, offset + 8));
+    if (preludeCrc >>> 0 !== computedPreludeCrc >>> 0) {
+      break; // CRC mismatch — stop parsing
+    }
+
+    // Parse headers
+    const headersStart = offset + 12;
+    const headersEnd = headersStart + headersLength;
+    const headers: Record<string, string> = {};
+    let hOffset = headersStart;
+
+    while (hOffset < headersEnd) {
+      const nameLen = buf.readUInt8(hOffset);
+      hOffset += 1;
+      const name = buf.subarray(hOffset, hOffset + nameLen).toString("utf8");
+      hOffset += nameLen;
+      // Skip header type byte (type 7 = STRING)
+      hOffset += 1;
+      const valueLen = buf.readUInt16BE(hOffset);
+      hOffset += 2;
+      const value = buf.subarray(hOffset, hOffset + valueLen).toString("utf8");
+      hOffset += valueLen;
+      headers[name] = value;
+    }
+
+    // Extract payload
+    const payloadStart = headersEnd;
+    const payloadEnd = offset + totalLength - 4; // minus message CRC
+    const payload = buf.subarray(payloadStart, payloadEnd);
+
+    // Validate message CRC (covers entire frame minus last 4 bytes)
+    const messageCrc = buf.readUInt32BE(offset + totalLength - 4);
+    const computedMessageCrc = crc32(buf.subarray(offset, offset + totalLength - 4));
+    if (messageCrc >>> 0 !== computedMessageCrc >>> 0) {
+      break; // Message CRC mismatch — stop parsing
+    }
+
+    frames.push({ headers, payload });
+    offset += totalLength;
+  }
+
+  return frames;
+}
+
+/**
+ * Collapse Bedrock binary Event Stream into a single response.
+ *
+ * Each frame contains a JSON payload with event types like:
+ *   contentBlockDelta, contentBlockStart, etc.
+ */
+export function collapseBedrockEventStream(body: Buffer): CollapseResult {
+  const frames = decodeEventStreamFrames(body);
+  let content = "";
+  let droppedChunks = 0;
+  const toolCallMap = new Map<number, { id: string; name: string; arguments: string }>();
+
+  for (const frame of frames) {
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(frame.payload.toString("utf8")) as Record<string, unknown>;
+    } catch {
+      droppedChunks++;
+      continue;
+    }
+
+    // contentBlockStart — may initiate a tool_use block
+    if (parsed.contentBlockStart) {
+      const blockStart = parsed.contentBlockStart as Record<string, unknown>;
+      const index = (parsed.contentBlockIndex ?? blockStart.contentBlockIndex) as
+        | number
+        | undefined;
+      const start = blockStart.start as Record<string, unknown> | undefined;
+      if (start?.toolUse && index !== undefined) {
+        const toolUse = start.toolUse as Record<string, unknown>;
+        toolCallMap.set(index, {
+          id: (toolUse.toolUseId as string) ?? "",
+          name: (toolUse.name as string) ?? "",
+          arguments: "",
+        });
+      }
+    }
+
+    // contentBlockDelta
+    if (parsed.contentBlockDelta) {
+      const blockDelta = parsed.contentBlockDelta as Record<string, unknown>;
+      const index = (parsed.contentBlockIndex ?? blockDelta.contentBlockIndex) as
+        | number
+        | undefined;
+      const delta = blockDelta.delta as Record<string, unknown> | undefined;
+      if (!delta) continue;
+
+      // Text delta
+      if (typeof delta.text === "string") {
+        content += delta.text;
+      }
+
+      // Tool use input JSON delta
+      if (typeof delta.toolUse === "object" && delta.toolUse !== null) {
+        const toolUseDelta = delta.toolUse as Record<string, unknown>;
+        if (typeof toolUseDelta.input === "string" && index !== undefined) {
+          const entry = toolCallMap.get(index);
+          if (entry) {
+            entry.arguments += toolUseDelta.input;
+          }
+        }
+      }
+    }
+  }
+
+  if (toolCallMap.size > 0) {
+    const sorted = Array.from(toolCallMap.entries()).sort(([a], [b]) => a - b);
+    return {
+      toolCalls: sorted.map(([, tc]) => ({
+        name: tc.name,
+        arguments: tc.arguments,
+        ...(tc.id ? { id: tc.id } : {}),
+      })),
+      ...(droppedChunks > 0 ? { droppedChunks } : {}),
+    };
+  }
+
+  return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
+}
+
+// ---------------------------------------------------------------------------
+// Dispatch helper — pick the right collapse function by provider
+// ---------------------------------------------------------------------------
+
+/**
+ * Collapse a streaming response body into a non-streaming fixture response.
+ * Returns null if the content type is not a known streaming format.
+ */
+export function collapseStreamingResponse(
+  contentType: string,
+  providerKey: string,
+  body: string | Buffer,
+): CollapseResult | null {
+  const ct = contentType.toLowerCase();
+
+  if (ct.includes("application/vnd.amazon.eventstream")) {
+    const buf = typeof body === "string" ? Buffer.from(body, "binary") : body;
+    return collapseBedrockEventStream(buf);
+  }
+
+  if (ct.includes("application/x-ndjson")) {
+    const str = typeof body === "string" ? body : body.toString("utf8");
+    return collapseOllamaNDJSON(str);
+  }
+
+  if (ct.includes("text/event-stream")) {
+    const str = typeof body === "string" ? body : body.toString("utf8");
+    switch (providerKey) {
+      case "openai":
+      case "azure":
+        return collapseOpenAISSE(str);
+      case "anthropic":
+        return collapseAnthropicSSE(str);
+      case "gemini":
+      case "vertexai":
+        return collapseGeminiSSE(str);
+      case "cohere":
+        return collapseCohereSS(str);
+      default:
+        // Try OpenAI format as default for unknown SSE providers
+        return collapseOpenAISSE(str);
+    }
+  }
+
+  return null;
+}
diff --git a/src/types.ts b/src/types.ts
index 8433548..df0ee6b 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -215,6 +215,11 @@ export interface ChatCompletionMessage {
 
 // Server options
 
+export interface RecordConfig {
+  providers: Record<string, string | undefined>;
+  fixturePath?: string;
+}
+
 export interface MockServerOptions {
   port?: number;
   host?: string;
@@ -223,4 +228,10 @@ export interface MockServerOptions {
   /** Log verbosity. CLI default is "info"; programmatic default (when omitted) is "silent". */
   logLevel?: "silent" | "info" | "debug";
   chaos?: ChaosConfig;
+  /** Enable Prometheus-compatible /metrics endpoint. */
+  metrics?: boolean;
+  /** Strict mode: return 503 instead of 404 when no fixture matches. */
+  strict?: boolean;
+  /** Record-and-replay: proxy unmatched requests to upstream and save fixtures. */
+  record?: RecordConfig;
 }
diff --git a/src/ws-gemini-live.ts b/src/ws-gemini-live.ts
index 88d1abb..15f70bf 100644
--- a/src/ws-gemini-live.ts
+++ b/src/ws-gemini-live.ts
@@ -171,7 +171,7 @@ export function handleWebSocketGeminiLive(
   ws: WebSocketConnection,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number; model: string; logger: Logger },
+  defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean },
 ): void {
   const { logger } = defaults;
   const session: SessionState = {
@@ -206,7 +206,7 @@ async function processMessage(
   ws: WebSocketConnection,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number; model: string; logger: Logger },
+  defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean },
   session: SessionState,
 ): Promise<void> {
   let parsed: GeminiLiveMessage;
@@ -303,6 +303,11 @@ async function processMessage(
   }
 
   if (!fixture) {
+    if (defaults.strict) {
+      defaults.logger.warn(`STRICT: No fixture matched for WebSocket message`);
+      ws.close(1008, "Strict mode: no fixture matched");
+      return;
+    }
     journal.add({
       method: "WS",
       path,
diff --git a/src/ws-realtime.ts b/src/ws-realtime.ts
index 15e0608..6c9955d 100644
--- a/src/ws-realtime.ts
+++ b/src/ws-realtime.ts
@@ -130,7 +130,7 @@ export function handleWebSocketRealtime(
   ws: WebSocketConnection,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number; model: string; logger: Logger },
+  defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean },
 ): void {
   const { logger } = defaults;
   const sessionId = generateId("sess");
@@ -176,7 +176,7 @@ async function processMessage(
   ws: WebSocketConnection,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number; model: string; logger: Logger },
+  defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean },
   session: SessionConfig,
   conversationItems: RealtimeItem[],
 ): Promise<void> {
@@ -246,7 +246,7 @@ async function handleResponseCreate(
   ws: WebSocketConnection,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number; model: string; logger: Logger },
+  defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean },
   session: SessionConfig,
   conversationItems: RealtimeItem[],
 ): Promise<void> {
@@ -266,6 +266,11 @@ async function handleResponseCreate(
   }
 
   if (!fixture) {
+    if (defaults.strict) {
+      defaults.logger.warn(`STRICT: No fixture matched for WebSocket message`);
+      ws.close(1008, "Strict mode: no fixture matched");
+      return;
+    }
     journal.add({
       method: "WS",
       path: "/v1/realtime",
diff --git a/src/ws-responses.ts b/src/ws-responses.ts
index 5d73def..60ab4b7 100644
--- a/src/ws-responses.ts
+++ b/src/ws-responses.ts
@@ -57,7 +57,7 @@ export function handleWebSocketResponses(
   ws: WebSocketConnection,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number; model: string; logger: Logger },
+  defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean },
 ): void {
   const { logger } = defaults;
   // Serialize message processing to prevent event interleaving
@@ -82,7 +82,7 @@ async function processMessage(
   ws: WebSocketConnection,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number; model: string; logger: Logger },
+  defaults: { latency: number; chunkSize: number; model: string; logger: Logger; strict?: boolean },
 ): Promise<void> {
   let parsed: unknown;
   try {
@@ -143,6 +143,11 @@ async function processMessage(
   }
 
   if (!fixture) {
+    if (defaults.strict) {
+      defaults.logger.warn(`STRICT: No fixture matched for WebSocket message`);
+      ws.close(1008, "Strict mode: no fixture matched");
+      return;
+    }
     journal.add({
       method: "WS",
       path: "/v1/responses",

From aa527b676113830bbd0d28818728aa80f6153b9b Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jordan@copilotkit.ai>
Date: Fri, 20 Mar 2026 15:56:29 -0700
Subject: [PATCH 03/13] =?UTF-8?q?test:=201176=20tests=20=E2=80=94=20compre?=
 =?UTF-8?q?hensive=20coverage=20across=20all=20new=20features?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tests for: Bedrock streaming, Bedrock Converse, AWS EventStream binary
framing, Ollama chat/generate, Cohere v2 chat, Vertex AI, metrics
endpoint, record-and-replay proxy, stream collapse, strict mode, and
multi-provider recording.
---
 src/__tests__/aws-event-stream.test.ts      |  391 +++
 src/__tests__/bedrock-stream.test.ts        | 1155 +++++++++
 src/__tests__/cohere.test.ts                |  932 +++++++
 src/__tests__/drift/bedrock-stream.drift.ts |  145 ++
 src/__tests__/drift/cohere.drift.ts         |  213 ++
 src/__tests__/drift/ollama.drift.ts         |  219 ++
 src/__tests__/drift/vertex-ai.drift.ts      |  165 ++
 src/__tests__/metrics.test.ts               |  579 +++++
 src/__tests__/ollama.test.ts                | 1045 ++++++++
 src/__tests__/recorder.test.ts              | 2531 +++++++++++++++++++
 src/__tests__/server.test.ts                |   10 +-
 src/__tests__/stream-collapse.test.ts       | 1429 +++++++++++
 src/__tests__/vertex-ai.test.ts             |  524 ++++
 13 files changed, 9333 insertions(+), 5 deletions(-)
 create mode 100644 src/__tests__/aws-event-stream.test.ts
 create mode 100644 src/__tests__/bedrock-stream.test.ts
 create mode 100644 src/__tests__/cohere.test.ts
 create mode 100644 src/__tests__/drift/bedrock-stream.drift.ts
 create mode 100644 src/__tests__/drift/cohere.drift.ts
 create mode 100644 src/__tests__/drift/ollama.drift.ts
 create mode 100644 src/__tests__/drift/vertex-ai.drift.ts
 create mode 100644 src/__tests__/metrics.test.ts
 create mode 100644 src/__tests__/ollama.test.ts
 create mode 100644 src/__tests__/recorder.test.ts
 create mode 100644 src/__tests__/stream-collapse.test.ts
 create mode 100644 src/__tests__/vertex-ai.test.ts

diff --git a/src/__tests__/aws-event-stream.test.ts b/src/__tests__/aws-event-stream.test.ts
new file mode 100644
index 0000000..6245fbd
--- /dev/null
+++ b/src/__tests__/aws-event-stream.test.ts
@@ -0,0 +1,391 @@
+import { describe, it, expect, vi, afterEach } from "vitest";
+import { crc32 } from "node:zlib";
+import { PassThrough } from "node:stream";
+import type * as http from "node:http";
+import {
+  encodeEventStreamFrame,
+  encodeEventStreamMessage,
+  writeEventStream,
+} from "../aws-event-stream.js";
+
+// ─── Test helpers ────────────────────────────────────────────────────────────
+
+function makeMockResponse(): {
+  res: http.ServerResponse;
+  chunks: Buffer[];
+  headers: () => Record<string, string | string[] | number | undefined>;
+  ended: () => boolean;
+} {
+  const stream = new PassThrough();
+  const chunks: Buffer[] = [];
+  stream.on("data", (chunk: Buffer) => chunks.push(Buffer.from(chunk)));
+
+  const writtenHeaders: Record<string, string | string[] | number | undefined> = {};
+  let isEnded = false;
+
+  const res = {
+    setHeader(name: string, value: string) {
+      writtenHeaders[name] = value;
+    },
+    writeHead(statusCode: number, headers?: Record<string, string>) {
+      if (headers) {
+        for (const [k, v] of Object.entries(headers)) {
+          writtenHeaders[k] = v;
+        }
+      }
+    },
+    write(data: Buffer | string) {
+      stream.write(data);
+    },
+    end(data?: Buffer | string) {
+      if (data !== undefined) {
+        stream.write(data);
+      }
+      isEnded = true;
+      stream.end();
+    },
+    writableEnded: false,
+  } as unknown as http.ServerResponse;
+
+  // Make writableEnded track our isEnded state
+  Object.defineProperty(res, "writableEnded", {
+    get: () => isEnded,
+  });
+
+  return {
+    res,
+    chunks,
+    headers: () => writtenHeaders,
+    ended: () => isEnded,
+  };
+}
+
+/**
+ * Parse the binary frame manually and return its components.
+ */
+function parseFrame(frame: Buffer) {
+  const totalLength = frame.readUInt32BE(0);
+  const headersLength = frame.readUInt32BE(4);
+  const preludeCrc = frame.readUInt32BE(8);
+  const headersStart = 12;
+  const headersEnd = headersStart + headersLength;
+  const payloadStart = headersEnd;
+  const payloadEnd = totalLength - 4;
+  const messageCrc = frame.readUInt32BE(totalLength - 4);
+
+  // Parse headers
+  const headers: Array<{ name: string; type: number; value: string }> = [];
+  let offset = headersStart;
+  while (offset < headersEnd) {
+    const nameLen = frame.readUInt8(offset);
+    offset += 1;
+    const name = frame.subarray(offset, offset + nameLen).toString("utf8");
+    offset += nameLen;
+    const type = frame.readUInt8(offset);
+    offset += 1;
+    const valueLen = frame.readUInt16BE(offset);
+    offset += 2;
+    const value = frame.subarray(offset, offset + valueLen).toString("utf8");
+    offset += valueLen;
+    headers.push({ name, type, value });
+  }
+
+  const payload = frame.subarray(payloadStart, payloadEnd);
+
+  return { totalLength, headersLength, preludeCrc, headers, payload, messageCrc };
+}
+
+// ─── encodeEventStreamFrame ─────────────────────────────────────────────────
+
+describe("encodeEventStreamFrame", () => {
+  it("produces a frame whose total_length field matches actual buffer size", () => {
+    const headers = { ":event-type": "contentBlockDelta" };
+    const payload = Buffer.from(JSON.stringify({ hello: "world" }), "utf8");
+    const frame = encodeEventStreamFrame(headers, payload);
+
+    const totalLength = frame.readUInt32BE(0);
+    expect(totalLength).toBe(frame.length);
+  });
+
+  it("headers_length field matches actual serialised headers size", () => {
+    const headers = {
+      ":content-type": "application/json",
+      ":event-type": "contentBlockDelta",
+    };
+    const payload = Buffer.from("{}", "utf8");
+    const frame = encodeEventStreamFrame(headers, payload);
+
+    const parsed = parseFrame(frame);
+
+    // Manually compute expected headers size
+    let expectedLen = 0;
+    for (const [name, value] of Object.entries(headers)) {
+      const nameBytes = Buffer.byteLength(name, "utf8");
+      const valueBytes = Buffer.byteLength(value, "utf8");
+      expectedLen += 1 + nameBytes + 1 + 2 + valueBytes;
+    }
+    expect(parsed.headersLength).toBe(expectedLen);
+  });
+
+  it("prelude CRC32 covers first 8 bytes correctly", () => {
+    const headers = { ":message-type": "event" };
+    const payload = Buffer.from("test", "utf8");
+    const frame = encodeEventStreamFrame(headers, payload);
+
+    const expected = crc32(frame.subarray(0, 8));
+    expect(frame.readUInt32BE(8)).toBe(expected >>> 0);
+  });
+
+  it("message CRC32 covers entire frame minus last 4 bytes", () => {
+    const headers = { key: "val" };
+    const payload = Buffer.from(JSON.stringify({ n: 42 }), "utf8");
+    const frame = encodeEventStreamFrame(headers, payload);
+
+    const expected = crc32(frame.subarray(0, frame.length - 4));
+    expect(frame.readUInt32BE(frame.length - 4)).toBe(expected >>> 0);
+  });
+
+  it("encodes each header with name_length + name + type(7) + value_length + value", () => {
+    const headers = { ":event-type": "chunk", ":message-type": "event" };
+    const payload = Buffer.alloc(0);
+    const frame = encodeEventStreamFrame(headers, payload);
+
+    const parsed = parseFrame(frame);
+    expect(parsed.headers).toHaveLength(2);
+
+    expect(parsed.headers[0].name).toBe(":event-type");
+    expect(parsed.headers[0].type).toBe(7);
+    expect(parsed.headers[0].value).toBe("chunk");
+
+    expect(parsed.headers[1].name).toBe(":message-type");
+    expect(parsed.headers[1].type).toBe(7);
+    expect(parsed.headers[1].value).toBe("event");
+  });
+
+  it("payload is raw bytes (not base64)", () => {
+    const obj = { text: "hello world" };
+    const payload = Buffer.from(JSON.stringify(obj), "utf8");
+    const frame = encodeEventStreamFrame({}, payload);
+
+    const parsed = parseFrame(frame);
+    const decoded = JSON.parse(parsed.payload.toString("utf8"));
+    expect(decoded).toEqual(obj);
+  });
+
+  it("handles empty headers and empty payload", () => {
+    const frame = encodeEventStreamFrame({}, Buffer.alloc(0));
+    const parsed = parseFrame(frame);
+
+    // 4 (total) + 4 (headers_length) + 4 (prelude_crc) + 0 (headers) + 0 (payload) + 4 (msg_crc) = 16
+    expect(parsed.totalLength).toBe(16);
+    expect(parsed.headersLength).toBe(0);
+    expect(parsed.headers).toHaveLength(0);
+    expect(parsed.payload.length).toBe(0);
+  });
+
+  it("large payload (100KB) encoding correctness", () => {
+    const largeString = "A".repeat(100 * 1024);
+    const payload = Buffer.from(JSON.stringify({ data: largeString }), "utf8");
+    const frame = encodeEventStreamFrame({ ":event-type": "big" }, payload);
+
+    const parsed = parseFrame(frame);
+    expect(parsed.totalLength).toBe(frame.length);
+
+    // Verify CRCs
+    const expectedPrelude = crc32(frame.subarray(0, 8));
+    expect(parsed.preludeCrc).toBe(expectedPrelude >>> 0);
+    const expectedMsg = crc32(frame.subarray(0, frame.length - 4));
+    expect(parsed.messageCrc).toBe(expectedMsg >>> 0);
+
+    // Verify payload
+    const decoded = JSON.parse(parsed.payload.toString("utf8"));
+    expect(decoded.data.length).toBe(100 * 1024);
+  });
+
+  it("handles UTF-8 multi-byte characters in headers and payload", () => {
+    const headers = { "x-emoji": "\u{1F600}" };
+    const payload = Buffer.from(JSON.stringify({ msg: "\u{1F4A9}" }), "utf8");
+    const frame = encodeEventStreamFrame(headers, payload);
+
+    const parsed = parseFrame(frame);
+    expect(parsed.headers[0].value).toBe("\u{1F600}");
+    const decoded = JSON.parse(parsed.payload.toString("utf8"));
+    expect(decoded.msg).toBe("\u{1F4A9}");
+  });
+});
+
+// ─── encodeEventStreamMessage ───────────────────────────────────────────────
+
+describe("encodeEventStreamMessage", () => {
+  it("wraps JSON payload with standard AWS headers", () => {
+    const frame = encodeEventStreamMessage("contentBlockDelta", { delta: { text: "hi" } });
+    const parsed = parseFrame(frame);
+
+    const headerMap = Object.fromEntries(parsed.headers.map((h) => [h.name, h.value]));
+    expect(headerMap[":content-type"]).toBe("application/json");
+    expect(headerMap[":event-type"]).toBe("contentBlockDelta");
+    expect(headerMap[":message-type"]).toBe("event");
+  });
+
+  it("payload is raw JSON bytes (not base64)", () => {
+    const obj = { delta: { text: "test" } };
+    const frame = encodeEventStreamMessage("contentBlockDelta", obj);
+    const parsed = parseFrame(frame);
+
+    const decoded = JSON.parse(parsed.payload.toString("utf8"));
+    expect(decoded).toEqual(obj);
+  });
+
+  it("round-trip: encode then parse produces identical data", () => {
+    const eventType = "messageStop";
+    const payload = { stop_reason: "end_turn", usage: { input_tokens: 10, output_tokens: 5 } };
+    const frame = encodeEventStreamMessage(eventType, payload);
+    const parsed = parseFrame(frame);
+
+    // Verify structural integrity
+    expect(parsed.totalLength).toBe(frame.length);
+    const preludeCrc = crc32(frame.subarray(0, 8));
+    expect(parsed.preludeCrc).toBe(preludeCrc >>> 0);
+    const messageCrc = crc32(frame.subarray(0, frame.length - 4));
+    expect(parsed.messageCrc).toBe(messageCrc >>> 0);
+
+    // Verify content
+    const headerMap = Object.fromEntries(parsed.headers.map((h) => [h.name, h.value]));
+    expect(headerMap[":event-type"]).toBe(eventType);
+    expect(JSON.parse(parsed.payload.toString("utf8"))).toEqual(payload);
+  });
+});
+
+// ─── writeEventStream ───────────────────────────────────────────────────────
+
+describe("writeEventStream", () => {
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("sets Content-Type to application/vnd.amazon.eventstream", async () => {
+    const { res, headers } = makeMockResponse();
+    await writeEventStream(res, []);
+    expect(headers()["Content-Type"]).toBe("application/vnd.amazon.eventstream");
+  });
+
+  it("writes binary frames for each event", async () => {
+    const { res, chunks } = makeMockResponse();
+    const events = [
+      { eventType: "contentBlockDelta", payload: { delta: { text: "A" } } },
+      { eventType: "contentBlockDelta", payload: { delta: { text: "B" } } },
+    ];
+    await writeEventStream(res, events);
+
+    // Wait a tick for PassThrough to flush
+    await new Promise((r) => setTimeout(r, 10));
+
+    const output = Buffer.concat(chunks);
+    expect(output.length).toBeGreaterThan(0);
+
+    // Parse the first frame from the output
+    const firstTotalLen = output.readUInt32BE(0);
+    const firstParsed = parseFrame(output.subarray(0, firstTotalLen));
+    const firstPayload = JSON.parse(firstParsed.payload.toString("utf8"));
+    expect(firstPayload).toEqual({ delta: { text: "A" } });
+
+    // Parse the second frame
+    const secondParsed = parseFrame(output.subarray(firstTotalLen));
+    const secondPayload = JSON.parse(secondParsed.payload.toString("utf8"));
+    expect(secondPayload).toEqual({ delta: { text: "B" } });
+  });
+
+  it("returns true when stream completes normally", async () => {
+    const { res } = makeMockResponse();
+    const result = await writeEventStream(res, [{ eventType: "test", payload: { data: 1 } }]);
+    expect(result).toBe(true);
+  });
+
+  it("calls res.end() when done", async () => {
+    const { res, ended } = makeMockResponse();
+    await writeEventStream(res, []);
+    expect(ended()).toBe(true);
+  });
+
+  it("returns true immediately when res.writableEnded is already true", async () => {
+    const { res, headers } = makeMockResponse();
+    // Force writableEnded to true
+    Object.defineProperty(res, "writableEnded", { get: () => true });
+    const result = await writeEventStream(res, [{ eventType: "test", payload: { data: 1 } }]);
+    expect(result).toBe(true);
+    expect(headers()["Content-Type"]).toBeUndefined();
+  });
+
+  it("supports streaming profile delays", async () => {
+    vi.useFakeTimers();
+    const { res } = makeMockResponse();
+    const events = [
+      { eventType: "test", payload: { n: 1 } },
+      { eventType: "test", payload: { n: 2 } },
+    ];
+
+    const promise = writeEventStream(res, events, {
+      streamingProfile: { ttft: 100, tps: 10 },
+    });
+    await vi.runAllTimersAsync();
+    const result = await promise;
+    expect(result).toBe(true);
+  });
+
+  it("supports latency option", async () => {
+    vi.useFakeTimers();
+    const { res } = makeMockResponse();
+    const events = [{ eventType: "test", payload: { n: 1 } }];
+
+    const promise = writeEventStream(res, events, { latency: 50 });
+    await vi.runAllTimersAsync();
+    const result = await promise;
+    expect(result).toBe(true);
+  });
+
+  it("stops mid-stream on abort signal and returns false", async () => {
+    const { res } = makeMockResponse();
+    const controller = new AbortController();
+
+    const events = [
+      { eventType: "test", payload: { n: 1 } },
+      { eventType: "test", payload: { n: 2 } },
+      { eventType: "test", payload: { n: 3 } },
+    ];
+
+    let chunksSent = 0;
+    const result = await writeEventStream(res, events, {
+      signal: controller.signal,
+      onChunkSent: () => {
+        chunksSent++;
+        if (chunksSent === 1) controller.abort();
+      },
+    });
+
+    expect(result).toBe(false);
+    // Should have written exactly one frame before abort
+    expect(chunksSent).toBe(1);
+  });
+
+  it("sets Transfer-Encoding: chunked header", async () => {
+    const { res, headers } = makeMockResponse();
+    await writeEventStream(res, [{ eventType: "test", payload: { n: 1 } }]);
+    expect(headers()["Transfer-Encoding"]).toBe("chunked");
+  });
+
+  it("onChunkSent fires per event", async () => {
+    const { res } = makeMockResponse();
+    const events = [
+      { eventType: "test", payload: { n: 1 } },
+      { eventType: "test", payload: { n: 2 } },
+      { eventType: "test", payload: { n: 3 } },
+    ];
+    let count = 0;
+    await writeEventStream(res, events, {
+      onChunkSent: () => {
+        count++;
+      },
+    });
+    expect(count).toBe(3);
+  });
+});
diff --git a/src/__tests__/bedrock-stream.test.ts b/src/__tests__/bedrock-stream.test.ts
new file mode 100644
index 0000000..0fa3f03
--- /dev/null
+++ b/src/__tests__/bedrock-stream.test.ts
@@ -0,0 +1,1155 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import { crc32 } from "node:zlib";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+import { converseToCompletionRequest } from "../bedrock-converse.js";
+
+// --- helpers ---
+
+function post(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function postBinary(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: Buffer }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+/**
+ * Parse sequential binary Event Stream frames from a buffer.
+ */
+interface ParsedFrame {
+  eventType: string;
+  messageType: string;
+  payload: unknown;
+  preludeCrc: { expected: number; actual: number };
+  messageCrc: { expected: number; actual: number };
+}
+
+function parseFrames(buf: Buffer): ParsedFrame[] {
+  const frames: ParsedFrame[] = [];
+  let offset = 0;
+
+  while (offset < buf.length) {
+    const totalLength = buf.readUInt32BE(offset);
+    const frame = buf.subarray(offset, offset + totalLength);
+
+    // Compute CRCs for later assertion
+    const computedPreludeCrc = crc32(frame.subarray(0, 8)) >>> 0;
+    const storedPreludeCrc = frame.readUInt32BE(8);
+    const computedMessageCrc = crc32(frame.subarray(0, totalLength - 4)) >>> 0;
+    const storedMessageCrc = frame.readUInt32BE(totalLength - 4);
+
+    // Parse headers
+    const headersLength = frame.readUInt32BE(4);
+    const headersStart = 12;
+    const headersEnd = headersStart + headersLength;
+    const headers: Record<string, string> = {};
+    let hOffset = headersStart;
+    while (hOffset < headersEnd) {
+      const nameLen = frame.readUInt8(hOffset);
+      hOffset += 1;
+      const name = frame.subarray(hOffset, hOffset + nameLen).toString("utf8");
+      hOffset += nameLen;
+      hOffset += 1; // type byte (7 = STRING)
+      const valueLen = frame.readUInt16BE(hOffset);
+      hOffset += 2;
+      const value = frame.subarray(hOffset, hOffset + valueLen).toString("utf8");
+      hOffset += valueLen;
+      headers[name] = value;
+    }
+
+    // Parse payload
+    const payloadStart = headersEnd;
+    const payloadEnd = totalLength - 4;
+    const payloadBuf = frame.subarray(payloadStart, payloadEnd);
+    let payload: unknown = null;
+    if (payloadBuf.length > 0) {
+      payload = JSON.parse(payloadBuf.toString("utf8"));
+    }
+
+    frames.push({
+      eventType: headers[":event-type"] ?? "",
+      messageType: headers[":message-type"] ?? "",
+      payload,
+      preludeCrc: { expected: storedPreludeCrc, actual: computedPreludeCrc },
+      messageCrc: { expected: storedMessageCrc, actual: computedMessageCrc },
+    });
+
+    offset += totalLength;
+  }
+
+  return frames;
+}
+
+function postPartialBinary(
+  url: string,
+  body: unknown,
+): Promise<{ body: Buffer; aborted: boolean }> {
+  return new Promise((resolve) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const chunks: Buffer[] = [];
+    let aborted = false;
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({ body: Buffer.concat(chunks), aborted });
+        });
+        res.on("error", () => {
+          aborted = true;
+        });
+        res.on("aborted", () => {
+          aborted = true;
+        });
+        res.on("close", () => {
+          resolve({ body: Buffer.concat(chunks), aborted });
+        });
+      },
+    );
+    req.on("error", () => {
+      aborted = true;
+      resolve({ body: Buffer.concat(chunks), aborted });
+    });
+    req.write(data);
+    req.end();
+  });
+}
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [
+      {
+        name: "get_weather",
+        arguments: '{"city":"SF"}',
+      },
+    ],
+  },
+};
+
+const errorFixture: Fixture = {
+  match: { userMessage: "fail" },
+  response: {
+    error: {
+      message: "Rate limited",
+      type: "rate_limit_error",
+    },
+    status: 429,
+  },
+};
+
+const allFixtures: Fixture[] = [textFixture, toolFixture, errorFixture];
+
+// --- test lifecycle ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+// ─── invoke-with-response-stream ────────────────────────────────────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns text response as binary Event Stream frames", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/vnd.amazon.eventstream");
+
+    const frames = parseFrames(res.body);
+    expect(frames.length).toBeGreaterThanOrEqual(5);
+
+    // messageStart
+    expect(frames[0].eventType).toBe("messageStart");
+    expect(frames[0].payload).toEqual({ role: "assistant" });
+
+    // contentBlockStart
+    expect(frames[1].eventType).toBe("contentBlockStart");
+    expect(frames[1].payload).toEqual({ contentBlockIndex: 0, start: {} });
+
+    // Content delta(s) — collect text
+    const deltas = frames.filter((f) => f.eventType === "contentBlockDelta");
+    expect(deltas.length).toBeGreaterThanOrEqual(1);
+    const fullText = deltas
+      .map((f) => (f.payload as { delta: { text: string } }).delta.text)
+      .join("");
+    expect(fullText).toBe("Hi there!");
+
+    // contentBlockStop
+    const stopBlock = frames.find((f) => f.eventType === "contentBlockStop");
+    expect(stopBlock).toBeDefined();
+    expect(stopBlock!.payload).toEqual({ contentBlockIndex: 0 });
+
+    // messageStop
+    const msgStop = frames.find((f) => f.eventType === "messageStop");
+    expect(msgStop).toBeDefined();
+    expect(msgStop!.payload).toEqual({ stopReason: "end_turn" });
+  });
+
+  it("returns tool call response as binary Event Stream frames", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "weather" }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = parseFrames(res.body);
+
+    // messageStart
+    expect(frames[0].eventType).toBe("messageStart");
+    expect(frames[0].payload).toEqual({ role: "assistant" });
+
+    // contentBlockStart with toolUse
+    expect(frames[1].eventType).toBe("contentBlockStart");
+    const startPayload = frames[1].payload as {
+      contentBlockIndex: number;
+      start: { toolUse: { toolUseId: string; name: string } };
+    };
+    expect(startPayload.contentBlockIndex).toBe(0);
+    expect(startPayload.start.toolUse.name).toBe("get_weather");
+    expect(startPayload.start.toolUse.toolUseId).toBeDefined();
+
+    // contentBlockDelta(s) with input_json_delta
+    const deltas = frames.filter((f) => f.eventType === "contentBlockDelta");
+    expect(deltas.length).toBeGreaterThanOrEqual(1);
+    const fullJson = deltas
+      .map((f) => (f.payload as { delta: { inputJSON: string } }).delta.inputJSON)
+      .join("");
+    expect(JSON.parse(fullJson)).toEqual({ city: "SF" });
+
+    // messageStop
+    const msgStop = frames.find((f) => f.eventType === "messageStop");
+    expect(msgStop!.payload).toEqual({ stopReason: "tool_use" });
+  });
+
+  it("Content-Type is application/vnd.amazon.eventstream", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(res.headers["content-type"]).toBe("application/vnd.amazon.eventstream");
+  });
+
+  it("binary frames have valid CRC32 checksums", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    const frames = parseFrames(res.body);
+    expect(frames.length).toBeGreaterThan(0);
+    for (const frame of frames) {
+      expect(frame.preludeCrc.actual).toBe(frame.preludeCrc.expected);
+      expect(frame.messageCrc.actual).toBe(frame.messageCrc.expected);
+    }
+  });
+
+  it("returns error fixture with correct status", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "fail" }],
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+
+  it("returns 404 when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "nomatch" }],
+    });
+
+    expect(res.status).toBe(404);
+  });
+
+  it("returns 400 for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const parsed = new URL(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`);
+    const res = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+      const raw = "{not valid";
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(raw),
+          },
+        },
+        (r) => {
+          const chunks: Buffer[] = [];
+          r.on("data", (c: Buffer) => chunks.push(c));
+          r.on("end", () => {
+            resolve({
+              status: r.statusCode ?? 0,
+              body: Buffer.concat(chunks).toString(),
+            });
+          });
+        },
+      );
+      req.on("error", reject);
+      req.write(raw);
+      req.end();
+    });
+
+    expect(res.status).toBe(400);
+  });
+});
+
+// ─── invoke-with-response-stream: missing messages ──────────────────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (missing messages)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("23. returns 400 for empty body (no messages)", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {});
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("messages");
+  });
+});
+
+// ─── invoke-with-response-stream: multiple tool calls ───────────────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (multiple tool calls)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("24. emits correct contentBlockIndex for 2 tool calls", async () => {
+    const multiToolFixture: Fixture = {
+      match: { userMessage: "multi-tool" },
+      response: {
+        toolCalls: [
+          { name: "get_weather", arguments: '{"city":"NYC"}' },
+          { name: "get_time", arguments: '{"tz":"EST"}' },
+        ],
+      },
+    };
+    instance = await createServer([multiToolFixture]);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "multi-tool" }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = parseFrames(res.body);
+
+    // Find contentBlockStart frames
+    const blockStarts = frames.filter((f) => f.eventType === "contentBlockStart");
+    expect(blockStarts.length).toBeGreaterThanOrEqual(2);
+
+    // First tool at contentBlockIndex 0
+    const start0 = blockStarts[0].payload as {
+      contentBlockIndex: number;
+      start: { toolUse: { name: string } };
+    };
+    expect(start0.contentBlockIndex).toBe(0);
+    expect(start0.start.toolUse.name).toBe("get_weather");
+
+    // Second tool at contentBlockIndex 1
+    const start1 = blockStarts[1].payload as {
+      contentBlockIndex: number;
+      start: { toolUse: { name: string } };
+    };
+    expect(start1.contentBlockIndex).toBe(1);
+    expect(start1.start.toolUse.name).toBe("get_time");
+
+    // contentBlockStop should also have correct indices
+    const blockStops = frames.filter((f) => f.eventType === "contentBlockStop");
+    expect(blockStops.length).toBeGreaterThanOrEqual(2);
+    expect((blockStops[0].payload as { contentBlockIndex: number }).contentBlockIndex).toBe(0);
+    expect((blockStops[1].payload as { contentBlockIndex: number }).contentBlockIndex).toBe(1);
+
+    // messageStop should indicate tool_use
+    const msgStop = frames.find((f) => f.eventType === "messageStop");
+    expect(msgStop!.payload).toEqual({ stopReason: "tool_use" });
+  });
+});
+
+// ─── invoke-with-response-stream: interruption ─────────────────────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (interruption)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("truncateAfterChunks truncates the stream", async () => {
+    const truncatedFixture: Fixture = {
+      match: { userMessage: "hello" },
+      response: { content: "Hello, World! This is a longer message for chunking." },
+      chunkSize: 5,
+      truncateAfterChunks: 3,
+    };
+    instance = await createServer([truncatedFixture]);
+
+    const res = await postPartialBinary(
+      `${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "hello" }],
+      },
+    );
+
+    // Stream was truncated — res.destroy() causes abrupt close
+    expect(res.aborted).toBe(true);
+
+    // Journal should record interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+// ─── invoke-with-response-stream: chaos ─────────────────────────────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (chaos)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("chaos drops requests when dropRate is 1", async () => {
+    instance = await createServer(allFixtures, { chaos: { dropRate: 1.0 } });
+    const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    // Chaos drop returns 500 with server_error
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.type).toBe("server_error");
+  });
+});
+
+// ─── Converse non-streaming ─────────────────────────────────────────────────
+
+describe("POST /model/{modelId}/converse (non-streaming)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns text response in Converse format", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "hello" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.output.message.role).toBe("assistant");
+    expect(body.output.message.content).toHaveLength(1);
+    expect(body.output.message.content[0].text).toBe("Hi there!");
+    expect(body.stopReason).toBe("end_turn");
+    expect(body.usage).toEqual({ inputTokens: 0, outputTokens: 0, totalTokens: 0 });
+  });
+
+  it("returns tool call response in Converse format", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "weather" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.output.message.role).toBe("assistant");
+    expect(body.output.message.content).toHaveLength(1);
+    expect(body.output.message.content[0].toolUse.name).toBe("get_weather");
+    expect(body.output.message.content[0].toolUse.input).toEqual({ city: "SF" });
+    expect(body.output.message.content[0].toolUse.toolUseId).toBeDefined();
+    expect(body.stopReason).toBe("tool_use");
+  });
+
+  it("returns 404 when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "nomatch" }] }],
+    });
+
+    expect(res.status).toBe(404);
+  });
+
+  it("returns 400 for missing messages", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {});
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Invalid request: messages array is required");
+  });
+
+  it("chaos applies to converse endpoint", async () => {
+    instance = await createServer(allFixtures, { chaos: { dropRate: 1.0 } });
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "hello" }] }],
+    });
+
+    expect(res.status).toBe(500);
+  });
+});
+
+// ─── Converse streaming ─────────────────────────────────────────────────────
+
+describe("POST /model/{modelId}/converse-stream", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns text response as Event Stream", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "hello" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/vnd.amazon.eventstream");
+
+    const frames = parseFrames(res.body);
+
+    // Verify event sequence
+    expect(frames[0].eventType).toBe("messageStart");
+    expect(frames[0].payload).toEqual({ role: "assistant" });
+
+    expect(frames[1].eventType).toBe("contentBlockStart");
+
+    const deltas = frames.filter((f) => f.eventType === "contentBlockDelta");
+    const fullText = deltas
+      .map((f) => (f.payload as { delta: { text: string } }).delta.text)
+      .join("");
+    expect(fullText).toBe("Hi there!");
+
+    const msgStop = frames.find((f) => f.eventType === "messageStop");
+    expect(msgStop!.payload).toEqual({ stopReason: "end_turn" });
+  });
+
+  it("returns tool call response as Event Stream", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "weather" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = parseFrames(res.body);
+
+    expect(frames[0].eventType).toBe("messageStart");
+
+    const startFrame = frames.find((f) => f.eventType === "contentBlockStart");
+    const startPayload = startFrame!.payload as {
+      contentBlockIndex: number;
+      start: { toolUse: { toolUseId: string; name: string } };
+    };
+    expect(startPayload.start.toolUse.name).toBe("get_weather");
+
+    const deltas = frames.filter((f) => f.eventType === "contentBlockDelta");
+    const fullJson = deltas
+      .map((f) => (f.payload as { delta: { inputJSON: string } }).delta.inputJSON)
+      .join("");
+    expect(JSON.parse(fullJson)).toEqual({ city: "SF" });
+
+    const msgStop = frames.find((f) => f.eventType === "messageStop");
+    expect(msgStop!.payload).toEqual({ stopReason: "tool_use" });
+  });
+
+  it("supports streaming profile (ttft/tps)", async () => {
+    const profileFixture: Fixture = {
+      match: { userMessage: "hello" },
+      response: { content: "Hi" },
+      streamingProfile: { ttft: 0, tps: 10000 },
+    };
+    instance = await createServer([profileFixture]);
+
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "hello" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = parseFrames(res.body);
+    expect(frames.length).toBeGreaterThan(0);
+  });
+
+  it("truncateAfterChunks interrupts the stream", async () => {
+    const truncatedFixture: Fixture = {
+      match: { userMessage: "hello" },
+      response: { content: "Hello, World! This is a longer message." },
+      chunkSize: 5,
+      truncateAfterChunks: 2,
+    };
+    instance = await createServer([truncatedFixture]);
+
+    const res = await postPartialBinary(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "hello" }] }],
+    });
+
+    // Stream was truncated — res.destroy() causes abrupt close
+    expect(res.aborted).toBe(true);
+
+    // Journal should record interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+
+  it("chaos applies to converse-stream endpoint", async () => {
+    instance = await createServer(allFixtures, { chaos: { dropRate: 1.0 } });
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "hello" }] }],
+    });
+
+    expect(res.status).toBe(500);
+  });
+});
+
+// ─── converseToCompletionRequest unit tests ─────────────────────────────────
+
+describe("converseToCompletionRequest", () => {
+  it("converts system messages", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        system: [{ text: "You are a helpful assistant." }],
+      },
+      "anthropic.claude-3-5-sonnet",
+    );
+
+    expect(result.messages[0]).toEqual({
+      role: "system",
+      content: "You are a helpful assistant.",
+    });
+    expect(result.messages[1]).toEqual({ role: "user", content: "hi" });
+  });
+
+  it("concatenates multiple system blocks", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        system: [{ text: "You are " }, { text: "a helpful assistant." }],
+      },
+      "anthropic.claude-3-5-sonnet",
+    );
+
+    expect(result.messages[0]).toEqual({
+      role: "system",
+      content: "You are a helpful assistant.",
+    });
+  });
+
+  it("converts user messages with text content", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "Hello" }, { text: " World" }] }],
+      },
+      "model-id",
+    );
+
+    expect(result.messages[0]).toEqual({ role: "user", content: "Hello World" });
+  });
+
+  it("converts tool results in user messages", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          {
+            role: "user",
+            content: [
+              {
+                toolResult: {
+                  toolUseId: "toolu_123",
+                  content: [{ text: "72F and sunny" }],
+                },
+              },
+              { text: "Tell me more" },
+            ],
+          },
+        ],
+      },
+      "model-id",
+    );
+
+    expect(result.messages[0]).toEqual({
+      role: "tool",
+      content: "72F and sunny",
+      tool_call_id: "toolu_123",
+    });
+    expect(result.messages[1]).toEqual({
+      role: "user",
+      content: "Tell me more",
+    });
+  });
+
+  it("converts assistant messages with toolUse blocks", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [
+          { role: "user", content: [{ text: "search" }] },
+          {
+            role: "assistant",
+            content: [
+              { text: "Let me search." },
+              {
+                toolUse: {
+                  toolUseId: "toolu_456",
+                  name: "search",
+                  input: { query: "cats" },
+                },
+              },
+            ],
+          },
+        ],
+      },
+      "model-id",
+    );
+
+    expect(result.messages[1]).toMatchObject({
+      role: "assistant",
+      content: "Let me search.",
+      tool_calls: [
+        {
+          id: "toolu_456",
+          type: "function",
+          function: { name: "search", arguments: '{"query":"cats"}' },
+        },
+      ],
+    });
+  });
+
+  it("converts tool definitions from toolConfig", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        toolConfig: {
+          tools: [
+            {
+              toolSpec: {
+                name: "get_weather",
+                description: "Get weather for a city",
+                inputSchema: {
+                  type: "object",
+                  properties: { city: { type: "string" } },
+                  required: ["city"],
+                },
+              },
+            },
+          ],
+        },
+      },
+      "model-id",
+    );
+
+    expect(result.tools).toHaveLength(1);
+    expect(result.tools![0]).toEqual({
+      type: "function",
+      function: {
+        name: "get_weather",
+        description: "Get weather for a city",
+        parameters: {
+          type: "object",
+          properties: { city: { type: "string" } },
+          required: ["city"],
+        },
+      },
+    });
+  });
+
+  it("passes through inferenceConfig temperature", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+        inferenceConfig: { temperature: 0.7 },
+      },
+      "model-id",
+    );
+
+    expect(result.temperature).toBe(0.7);
+  });
+
+  it("sets model from modelId parameter", () => {
+    const result = converseToCompletionRequest(
+      {
+        messages: [{ role: "user", content: [{ text: "hi" }] }],
+      },
+      "anthropic.claude-3-5-sonnet-20241022-v2:0",
+    );
+
+    expect(result.model).toBe("anthropic.claude-3-5-sonnet-20241022-v2:0");
+  });
+});
+
+// ─── Converse edge cases ─────────────────────────────────────────────────────
+
+function postRaw(url: string, raw: string): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(raw),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(raw);
+    req.end();
+  });
+}
+
+describe("POST /model/{modelId}/converse (malformed JSON)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 400 for malformed JSON body", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postRaw(`${instance.url}/model/${MODEL_ID}/converse`, "{not valid");
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+});
+
+describe("POST /model/{modelId}/converse-stream (missing messages)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 400 when messages array is missing", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, {});
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Invalid request: messages array is required");
+  });
+});
+
+// ─── invoke-with-response-stream: unknown response type → 500 ──────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (unknown response type)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 500 for embedding fixture on streaming endpoint", async () => {
+    const embeddingFixture: Fixture = {
+      match: { userMessage: "embed-stream" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+    instance = await createServer([embeddingFixture]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "embed-stream" }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+// ─── invoke-with-response-stream: malformed tool call arguments ─────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (malformed tool args)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("malformed tool call arguments fall back to empty JSON string", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-tool-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "bad-tool-args" }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = parseFrames(res.body);
+
+    // Find contentBlockDelta frames with inputJSON
+    const deltas = frames.filter((f) => f.eventType === "contentBlockDelta");
+    const fullJson = deltas
+      .map((f) => {
+        const payload = f.payload as { delta: { inputJSON?: string } };
+        return payload.delta.inputJSON ?? "";
+      })
+      .join("");
+    // Malformed arguments should fall back to "{}"
+    expect(fullJson).toBe("{}");
+  });
+});
+
+// ─── invoke-with-response-stream: empty content string ──────────────────────
+
+describe("POST /model/{modelId}/invoke-with-response-stream (empty content)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("empty content produces event sequence with zero content deltas", async () => {
+    const emptyContentFixture: Fixture = {
+      match: { userMessage: "empty-content" },
+      response: { content: "" },
+    };
+    instance = await createServer([emptyContentFixture]);
+    const res = await postBinary(`${instance.url}/model/${MODEL_ID}/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 512,
+      messages: [{ role: "user", content: "empty-content" }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = parseFrames(res.body);
+
+    // Should still have messageStart, contentBlockStart, contentBlockStop, messageStop
+    expect(frames[0].eventType).toBe("messageStart");
+    expect(frames.find((f) => f.eventType === "contentBlockStart")).toBeDefined();
+    expect(frames.find((f) => f.eventType === "contentBlockStop")).toBeDefined();
+    expect(frames.find((f) => f.eventType === "messageStop")).toBeDefined();
+
+    // Content deltas should be zero (empty string → no chunks)
+    const deltas = frames.filter((f) => f.eventType === "contentBlockDelta");
+    expect(deltas).toHaveLength(0);
+  });
+});
+
+// ─── converse-stream: malformed JSON → 400 ──────────────────────────────────
+
+describe("POST /model/{modelId}/converse-stream (malformed JSON)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 400 for malformed JSON body", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postRaw(`${instance.url}/model/${MODEL_ID}/converse-stream`, "{not valid");
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+});
+
+// ─── Strict mode: converse and converse-stream ──────────────────────────────
+
+describe("POST /model/{modelId}/converse (strict mode)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 503 in strict mode when no fixture matches", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "nomatch" }] }],
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+  });
+});
+
+describe("POST /model/{modelId}/converse-stream (strict mode)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 503 in strict mode when no fixture matches", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "nomatch" }] }],
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+  });
+});
+
+// ─── Unknown response type through converse and converse-stream ─────────────
+
+describe("POST /model/{modelId}/converse (unknown response type)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 500 for embedding fixture on converse endpoint", async () => {
+    const embeddingFixture: Fixture = {
+      match: { userMessage: "embed-converse" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+    instance = await createServer([embeddingFixture]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "embed-converse" }] }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+describe("POST /model/{modelId}/converse-stream (unknown response type)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns 500 for embedding fixture on converse-stream endpoint", async () => {
+    const embeddingFixture: Fixture = {
+      match: { userMessage: "embed-stream" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+    instance = await createServer([embeddingFixture]);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "embed-stream" }] }],
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+// ─── Error fixture through converse-stream ──────────────────────────────────
+
+describe("POST /model/{modelId}/converse-stream (error fixture)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns error fixture with correct status through /converse-stream", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "fail" }] }],
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
+
+// ─── Error fixture through /converse endpoint ───────────────────────────────
+
+describe("POST /model/{modelId}/converse (error fixture)", () => {
+  const MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0";
+
+  it("returns error fixture with correct status through /converse", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/${MODEL_ID}/converse`, {
+      messages: [{ role: "user", content: [{ text: "fail" }] }],
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
diff --git a/src/__tests__/cohere.test.ts b/src/__tests__/cohere.test.ts
new file mode 100644
index 0000000..4b6228f
--- /dev/null
+++ b/src/__tests__/cohere.test.ts
@@ -0,0 +1,932 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+import { cohereToCompletionRequest } from "../cohere.js";
+
+// --- helpers ---
+
+function post(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function postRaw(url: string, raw: string): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(raw),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(raw);
+    req.end();
+  });
+}
+
+function postWithHeaders(
+  url: string,
+  body: unknown,
+  extraHeaders: Record<string, string>,
+): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+          ...extraHeaders,
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+interface SSEEvent {
+  event: string;
+  data: Record<string, unknown>;
+}
+
+function parseSSEEvents(body: string): SSEEvent[] {
+  const events: SSEEvent[] = [];
+  const blocks = body.split("\n\n").filter((b) => b.trim() !== "");
+  for (const block of blocks) {
+    const lines = block.split("\n");
+    let eventType = "";
+    let dataStr = "";
+    for (const line of lines) {
+      if (line.startsWith("event: ")) {
+        eventType = line.slice(7);
+      } else if (line.startsWith("data: ")) {
+        dataStr = line.slice(6);
+      }
+    }
+    if (eventType && dataStr) {
+      events.push({ event: eventType, data: JSON.parse(dataStr) as Record<string, unknown> });
+    }
+  }
+  return events;
+}
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "The capital of France is Paris." },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [
+      {
+        name: "get_weather",
+        arguments: '{"city":"SF"}',
+      },
+    ],
+  },
+};
+
+const errorFixture: Fixture = {
+  match: { userMessage: "fail" },
+  response: {
+    error: {
+      message: "Rate limited",
+      type: "rate_limit_error",
+    },
+    status: 429,
+  },
+};
+
+const allFixtures: Fixture[] = [textFixture, toolFixture, errorFixture];
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+// ─── Unit tests: cohereToCompletionRequest ──────────────────────────────────
+
+describe("cohereToCompletionRequest", () => {
+  it("converts basic user message", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+    });
+    expect(result.model).toBe("command-r-plus");
+    expect(result.messages).toEqual([{ role: "user", content: "hello" }]);
+  });
+
+  it("converts system message", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [
+        { role: "system", content: "Be helpful" },
+        { role: "user", content: "hello" },
+      ],
+    });
+    expect(result.messages[0]).toEqual({ role: "system", content: "Be helpful" });
+    expect(result.messages[1]).toEqual({ role: "user", content: "hello" });
+  });
+
+  it("converts tool message with tool_call_id", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [
+        {
+          role: "tool",
+          content: '{"temp":72}',
+          tool_call_id: "call_abc",
+        },
+      ],
+    });
+    expect(result.messages[0]).toEqual({
+      role: "tool",
+      content: '{"temp":72}',
+      tool_call_id: "call_abc",
+    });
+  });
+
+  it("converts tools", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hi" }],
+      tools: [
+        {
+          type: "function",
+          function: {
+            name: "get_weather",
+            description: "Get weather",
+            parameters: { type: "object", properties: { city: { type: "string" } } },
+          },
+        },
+      ],
+    });
+    expect(result.tools).toHaveLength(1);
+    expect(result.tools![0]).toEqual({
+      type: "function",
+      function: {
+        name: "get_weather",
+        description: "Get weather",
+        parameters: { type: "object", properties: { city: { type: "string" } } },
+      },
+    });
+  });
+
+  it("passes through stream field", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hi" }],
+      stream: true,
+    });
+    expect(result.stream).toBe(true);
+  });
+
+  it("returns undefined tools when none provided", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hi" }],
+    });
+    expect(result.tools).toBeUndefined();
+  });
+});
+
+// ─── Unit tests: cohereToCompletionRequest (assistant message) ───────────────
+
+describe("cohereToCompletionRequest (assistant message)", () => {
+  it("converts assistant message", () => {
+    const result = cohereToCompletionRequest({
+      model: "command-r-plus",
+      messages: [
+        { role: "user", content: "hello" },
+        { role: "assistant", content: "Hi there" },
+      ],
+    });
+    expect(result.messages[1]).toEqual({ role: "assistant", content: "Hi there" });
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (non-streaming text) ─────────────────
+
+describe("POST /v2/chat (non-streaming text)", () => {
+  it("returns text response with all required fields", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.id).toMatch(/^msg_/);
+    expect(body.finish_reason).toBe("COMPLETE");
+    expect(body.message.role).toBe("assistant");
+    expect(body.message.content).toEqual([
+      { type: "text", text: "The capital of France is Paris." },
+    ]);
+    expect(body.message.tool_calls).toEqual([]);
+    expect(body.message.tool_plan).toBe("");
+    expect(body.message.citations).toEqual([]);
+    expect(body.usage.billed_units).toEqual({
+      input_tokens: 0,
+      output_tokens: 0,
+      search_units: 0,
+      classifications: 0,
+    });
+    expect(body.usage.tokens).toEqual({ input_tokens: 0, output_tokens: 0 });
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (non-streaming tool call) ─────────────
+
+describe("POST /v2/chat (non-streaming tool call)", () => {
+  it("returns tool call with TOOL_CALL finish_reason", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "weather" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.finish_reason).toBe("TOOL_CALL");
+    expect(body.message.tool_calls).toHaveLength(1);
+    expect(body.message.tool_calls[0].id).toMatch(/^call_/);
+    expect(body.message.tool_calls[0].type).toBe("function");
+    expect(body.message.tool_calls[0].function.name).toBe("get_weather");
+    expect(body.message.tool_calls[0].function.arguments).toBe('{"city":"SF"}');
+    expect(body.message.content).toEqual([]);
+    expect(body.usage).toBeDefined();
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (streaming text) ─────────────────────
+
+describe("POST /v2/chat (streaming text)", () => {
+  it("produces correct event sequence", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+
+    const events = parseSSEEvents(res.body);
+    expect(events.length).toBeGreaterThanOrEqual(5);
+
+    // message-start
+    expect(events[0].event).toBe("message-start");
+    expect(events[0].data.type).toBe("message-start");
+    const msgStart = events[0].data.delta as Record<string, unknown>;
+    const startMsg = msgStart.message as Record<string, unknown>;
+    expect(startMsg.role).toBe("assistant");
+    expect(startMsg.content).toEqual([]);
+    expect(startMsg.tool_plan).toBe("");
+    expect(startMsg.tool_calls).toEqual([]);
+    expect(startMsg.citations).toEqual([]);
+
+    // content-start (type: "text" only, no text field)
+    expect(events[1].event).toBe("content-start");
+    expect(events[1].data.type).toBe("content-start");
+    expect(events[1].data.index).toBe(0);
+    const csDelta = events[1].data.delta as Record<string, unknown>;
+    const csMsg = csDelta.message as Record<string, unknown>;
+    const csContent = csMsg.content as Record<string, unknown>;
+    expect(csContent.type).toBe("text");
+    expect(csContent).not.toHaveProperty("text");
+
+    // content-delta(s)
+    const contentDeltas = events.filter((e) => e.event === "content-delta");
+    expect(contentDeltas.length).toBeGreaterThanOrEqual(1);
+    for (const cd of contentDeltas) {
+      expect(cd.data.type).toBe("content-delta");
+      expect(cd.data.index).toBe(0);
+      const delta = cd.data.delta as Record<string, unknown>;
+      const msg = delta.message as Record<string, unknown>;
+      const content = msg.content as Record<string, unknown>;
+      expect(content.type).toBe("text");
+      expect(typeof content.text).toBe("string");
+    }
+
+    // Reconstruct full text from deltas
+    const fullText = contentDeltas
+      .map((cd) => {
+        const delta = cd.data.delta as Record<string, unknown>;
+        const msg = delta.message as Record<string, unknown>;
+        const content = msg.content as Record<string, unknown>;
+        return content.text as string;
+      })
+      .join("");
+    expect(fullText).toBe("The capital of France is Paris.");
+
+    // content-end
+    const contentEnd = events.find((e) => e.event === "content-end");
+    expect(contentEnd).toBeDefined();
+    expect(contentEnd!.data.type).toBe("content-end");
+    expect(contentEnd!.data.index).toBe(0);
+
+    // message-end
+    const msgEnd = events[events.length - 1];
+    expect(msgEnd.event).toBe("message-end");
+    expect(msgEnd.data.type).toBe("message-end");
+    const endDelta = msgEnd.data.delta as Record<string, unknown>;
+    expect(endDelta.finish_reason).toBe("COMPLETE");
+    const usage = endDelta.usage as Record<string, unknown>;
+    expect(usage.billed_units).toEqual({
+      input_tokens: 0,
+      output_tokens: 0,
+      search_units: 0,
+      classifications: 0,
+    });
+    expect(usage.tokens).toEqual({ input_tokens: 0, output_tokens: 0 });
+  });
+
+  it("content-start has type:text only and no text field", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    const events = parseSSEEvents(res.body);
+    const contentStart = events.find((e) => e.event === "content-start");
+    expect(contentStart).toBeDefined();
+    const delta = contentStart!.data.delta as Record<string, unknown>;
+    const msg = delta.message as Record<string, unknown>;
+    const content = msg.content as Record<string, unknown>;
+    expect(content.type).toBe("text");
+    expect(Object.keys(content)).toEqual(["type"]);
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (streaming tool calls) ────────────────
+
+describe("POST /v2/chat (streaming tool calls)", () => {
+  it("produces correct tool call event sequence", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "weather" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    const events = parseSSEEvents(res.body);
+
+    // message-start
+    expect(events[0].event).toBe("message-start");
+
+    // tool-plan-delta
+    const planDelta = events.find((e) => e.event === "tool-plan-delta");
+    expect(planDelta).toBeDefined();
+    expect(planDelta!.data.type).toBe("tool-plan-delta");
+    const planMsg = (planDelta!.data.delta as Record<string, unknown>).message as Record<
+      string,
+      unknown
+    >;
+    expect(typeof planMsg.tool_plan).toBe("string");
+
+    // tool-call-start
+    const tcStart = events.find((e) => e.event === "tool-call-start");
+    expect(tcStart).toBeDefined();
+    expect(tcStart!.data.type).toBe("tool-call-start");
+    expect(tcStart!.data.index).toBe(0);
+    const tcStartDelta = tcStart!.data.delta as Record<string, unknown>;
+    const tcStartMsg = tcStartDelta.message as Record<string, unknown>;
+    const tcStartCalls = tcStartMsg.tool_calls as Record<string, unknown>;
+    expect(tcStartCalls.id).toMatch(/^call_/);
+    expect(tcStartCalls.type).toBe("function");
+    const tcStartFn = tcStartCalls.function as Record<string, unknown>;
+    expect(tcStartFn.name).toBe("get_weather");
+    expect(tcStartFn.arguments).toBe("");
+
+    // tool-call-delta(s)
+    const tcDeltas = events.filter((e) => e.event === "tool-call-delta");
+    expect(tcDeltas.length).toBeGreaterThanOrEqual(1);
+    const argsAccum = tcDeltas
+      .map((e) => {
+        const delta = e.data.delta as Record<string, unknown>;
+        const msg = delta.message as Record<string, unknown>;
+        const calls = msg.tool_calls as Record<string, unknown>;
+        const fn = calls.function as Record<string, unknown>;
+        return fn.arguments as string;
+      })
+      .join("");
+    expect(argsAccum).toBe('{"city":"SF"}');
+
+    // tool-call-end
+    const tcEnd = events.find((e) => e.event === "tool-call-end");
+    expect(tcEnd).toBeDefined();
+    expect(tcEnd!.data.type).toBe("tool-call-end");
+    expect(tcEnd!.data.index).toBe(0);
+
+    // message-end with TOOL_CALL
+    const msgEnd = events[events.length - 1];
+    expect(msgEnd.event).toBe("message-end");
+    const endDelta = msgEnd.data.delta as Record<string, unknown>;
+    expect(endDelta.finish_reason).toBe("TOOL_CALL");
+    expect(endDelta.usage).toBeDefined();
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (message-end usage) ───────────────────
+
+describe("POST /v2/chat (message-end usage)", () => {
+  it("includes usage with both billed_units and tokens", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    const events = parseSSEEvents(res.body);
+    const msgEnd = events.find((e) => e.event === "message-end");
+    expect(msgEnd).toBeDefined();
+    const delta = msgEnd!.data.delta as Record<string, unknown>;
+    const usage = delta.usage as Record<string, unknown>;
+    expect(usage.billed_units).toBeDefined();
+    expect(usage.tokens).toBeDefined();
+    const billedUnits = usage.billed_units as Record<string, unknown>;
+    expect(billedUnits.input_tokens).toBe(0);
+    expect(billedUnits.output_tokens).toBe(0);
+    expect(billedUnits.search_units).toBe(0);
+    expect(billedUnits.classifications).toBe(0);
+    const tokens = usage.tokens as Record<string, unknown>;
+    expect(tokens.input_tokens).toBe(0);
+    expect(tokens.output_tokens).toBe(0);
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (validation) ──────────────────────────
+
+describe("POST /v2/chat (validation)", () => {
+  it("returns 400 when model is missing", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("model is required");
+  });
+
+  it("returns 400 when messages array is missing", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r",
+    });
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Invalid request: messages array is required");
+  });
+
+  it("returns 400 for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postRaw(`${instance.url}/v2/chat`, "{not valid");
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+
+  it("returns 404 when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "nomatch" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(404);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("No fixture matched");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (streaming profile) ───────────────────
+
+describe("POST /v2/chat (streaming profile)", () => {
+  it("applies streaming profile latency", async () => {
+    const slowFixture: Fixture = {
+      match: { userMessage: "slow" },
+      response: { content: "AB" },
+      chunkSize: 1,
+      streamingProfile: { ttft: 50, tps: 20, jitter: 0 },
+    };
+    instance = await createServer([slowFixture]);
+
+    const start = Date.now();
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "slow" }],
+      stream: true,
+    });
+    const elapsed = Date.now() - start;
+
+    expect(res.status).toBe(200);
+    // Should have noticeable delay from streaming profile
+    expect(elapsed).toBeGreaterThanOrEqual(80);
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (interruption) ────────────────────────
+
+describe("POST /v2/chat (interruption)", () => {
+  it("truncates after specified number of chunks", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate" },
+      response: { content: "ABCDEFGHIJ" },
+      chunkSize: 1,
+      truncateAfterChunks: 3,
+    };
+    instance = await createServer([truncFixture]);
+
+    const res = await new Promise<{ aborted: boolean; body: string }>((resolve) => {
+      const data = JSON.stringify({
+        model: "command-r-plus",
+        messages: [{ role: "user", content: "truncate" }],
+        stream: true,
+      });
+      const parsed = new URL(`${instance!.url}/v2/chat`);
+      const chunks: Buffer[] = [];
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+          },
+        },
+        (res) => {
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({ aborted: false, body: Buffer.concat(chunks).toString() });
+          });
+          res.on("aborted", () => {
+            resolve({ aborted: true, body: Buffer.concat(chunks).toString() });
+          });
+        },
+      );
+      req.on("error", () => {
+        resolve({ aborted: true, body: Buffer.concat(chunks).toString() });
+      });
+      req.write(data);
+      req.end();
+    });
+
+    // Stream was truncated — res.destroy() causes abrupt close
+    expect(res.aborted).toBe(true);
+
+    // Journal should record interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (chaos) ──────────────────────────────
+
+describe("POST /v2/chat (chaos)", () => {
+  it("drops request when chaos drop header is set to 1.0", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postWithHeaders(
+      `${instance.url}/v2/chat`,
+      {
+        model: "command-r-plus",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      },
+      { "x-llmock-chaos-drop": "1.0" },
+    );
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (error fixture) ───────────────────────
+
+describe("POST /v2/chat (error fixture)", () => {
+  it("returns error fixture with correct status", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "fail" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (streaming default) ───────────────────
+
+describe("POST /v2/chat (streaming default)", () => {
+  it("20. returns non-streaming JSON when stream field is omitted", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      // stream field intentionally omitted — Cohere defaults to non-streaming
+    });
+
+    expect(res.status).toBe(200);
+    // Should be non-streaming JSON, NOT SSE
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.id).toMatch(/^msg_/);
+    expect(body.finish_reason).toBe("COMPLETE");
+    expect(body.message.role).toBe("assistant");
+    expect(body.message.content).toEqual([
+      { type: "text", text: "The capital of France is Paris." },
+    ]);
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (multiple tool calls) ─────────────────
+
+describe("POST /v2/chat (multiple tool calls)", () => {
+  const multiToolFixture: Fixture = {
+    match: { userMessage: "multi-tool" },
+    response: {
+      toolCalls: [
+        { name: "get_weather", arguments: '{"city":"NYC"}' },
+        { name: "get_time", arguments: '{"tz":"EST"}' },
+      ],
+    },
+  };
+
+  it("21a. non-streaming returns 2 items in tool_calls array", async () => {
+    instance = await createServer([multiToolFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "multi-tool" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.finish_reason).toBe("TOOL_CALL");
+    expect(body.message.tool_calls).toHaveLength(2);
+    expect(body.message.tool_calls[0].function.name).toBe("get_weather");
+    expect(body.message.tool_calls[1].function.name).toBe("get_time");
+  });
+
+  it("21b. streaming produces 2 tool-call-start events", async () => {
+    instance = await createServer([multiToolFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "multi-tool" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+
+    const events = parseSSEEvents(res.body);
+    const toolCallStarts = events.filter((e) => e.event === "tool-call-start");
+    expect(toolCallStarts).toHaveLength(2);
+
+    // First tool at index 0
+    expect(toolCallStarts[0].data.index).toBe(0);
+    const tc0Delta = toolCallStarts[0].data.delta as Record<string, unknown>;
+    const tc0Msg = tc0Delta.message as Record<string, unknown>;
+    const tc0Calls = tc0Msg.tool_calls as Record<string, unknown>;
+    const tc0Fn = tc0Calls.function as Record<string, unknown>;
+    expect(tc0Fn.name).toBe("get_weather");
+
+    // Second tool at index 1
+    expect(toolCallStarts[1].data.index).toBe(1);
+    const tc1Delta = toolCallStarts[1].data.delta as Record<string, unknown>;
+    const tc1Msg = tc1Delta.message as Record<string, unknown>;
+    const tc1Calls = tc1Msg.tool_calls as Record<string, unknown>;
+    const tc1Fn = tc1Calls.function as Record<string, unknown>;
+    expect(tc1Fn.name).toBe("get_time");
+
+    // message-end should have TOOL_CALL finish_reason
+    const msgEnd = events.find((e) => e.event === "message-end");
+    expect(msgEnd).toBeDefined();
+    const endDelta = msgEnd!.data.delta as Record<string, unknown>;
+    expect(endDelta.finish_reason).toBe("TOOL_CALL");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (malformed tool call arguments) ───────
+
+describe("POST /v2/chat (malformed tool call arguments)", () => {
+  it("falls back to empty string when arguments is not valid JSON", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "bad-args" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.message.tool_calls).toHaveLength(1);
+    expect(body.message.tool_calls[0].function.name).toBe("fn");
+    // Cohere passes through the arguments string as-is (logs warning)
+    expect(body.message.tool_calls[0].function.arguments).toBe("NOT VALID JSON");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (strict mode) ────────────────────────
+
+describe("POST /v2/chat (strict mode)", () => {
+  it("returns 503 in strict mode with no fixtures", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("no fixture matched");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (unknown response type → 500) ─────────
+
+describe("POST /v2/chat (unknown response type)", () => {
+  it("returns 500 for a fixture with unrecognizable response shape", async () => {
+    const weirdFixture: Fixture = {
+      match: { userMessage: "weird" },
+      response: { embedding: [0.1, 0.2, 0.3] },
+    };
+    instance = await createServer([weirdFixture]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "weird" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (error fixture no explicit status) ────
+
+describe("POST /v2/chat (error fixture no explicit status)", () => {
+  it("defaults to 500 when error fixture has no status", async () => {
+    const noStatusError: Fixture = {
+      match: { userMessage: "err-no-status" },
+      response: {
+        error: {
+          message: "Something went wrong",
+          type: "server_error",
+        },
+      },
+    };
+    instance = await createServer([noStatusError]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "err-no-status" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Something went wrong");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (CORS headers) ────────────────────────
+
+describe("POST /v2/chat (CORS headers)", () => {
+  it("includes CORS headers in response", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(res.headers["access-control-allow-origin"]).toBe("*");
+  });
+});
+
+// ─── Integration tests: POST /v2/chat (journal) ────────────────────────────
+
+describe("POST /v2/chat (journal)", () => {
+  it("records request in the journal", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry!.path).toBe("/v2/chat");
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(textFixture);
+    expect(entry!.body.model).toBe("command-r-plus");
+  });
+});
diff --git a/src/__tests__/drift/bedrock-stream.drift.ts b/src/__tests__/drift/bedrock-stream.drift.ts
new file mode 100644
index 0000000..01e0750
--- /dev/null
+++ b/src/__tests__/drift/bedrock-stream.drift.ts
@@ -0,0 +1,145 @@
+/**
+ * AWS Bedrock drift tests.
+ *
+ * Three-way comparison: SDK types x real API x llmock output.
+ * Covers invoke-with-response-stream and converse endpoints.
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
+import { httpPost, startDriftServer, stopDriftServer } from "./helpers.js";
+
+// ---------------------------------------------------------------------------
+// Credentials check
+// ---------------------------------------------------------------------------
+
+const HAS_CREDENTIALS =
+  !!process.env.AWS_ACCESS_KEY_ID &&
+  !!process.env.AWS_SECRET_ACCESS_KEY &&
+  !!process.env.AWS_REGION;
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// SDK shape stubs
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimal Bedrock InvokeModel response shape.
+ * Bedrock wraps the model output in its own envelope.
+ */
+function bedrockInvokeResponseShape() {
+  return extractShape({
+    body: "base64-encoded-string",
+    contentType: "application/json",
+    $metadata: {
+      httpStatusCode: 200,
+      requestId: "req-abc",
+    },
+  });
+}
+
+/**
+ * Minimal Bedrock Converse response shape.
+ */
+function bedrockConverseResponseShape() {
+  return extractShape({
+    output: {
+      message: {
+        role: "assistant",
+        content: [{ text: "Hello!" }],
+      },
+    },
+    stopReason: "end_turn",
+    usage: {
+      inputTokens: 10,
+      outputTokens: 5,
+      totalTokens: 15,
+    },
+    metrics: {
+      latencyMs: 100,
+    },
+    $metadata: {
+      httpStatusCode: 200,
+      requestId: "req-abc",
+    },
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!HAS_CREDENTIALS)("Bedrock drift", () => {
+  it("invoke-with-response-stream mock shape is plausible", async () => {
+    const sdkShape = bedrockInvokeResponseShape();
+
+    // Bedrock streaming uses binary event-stream framing, so we test the
+    // mock's JSON response shape for the non-streaming invoke endpoint.
+    const mockRes = await httpPost(
+      `${instance.url}/model/anthropic.claude-3-haiku-20240307-v1:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 10,
+        messages: [{ role: "user", content: "Say hello" }],
+      },
+    );
+
+    expect(mockRes.status).toBe(200);
+
+    // When real AWS credentials are available, send the same request to
+    // the real Bedrock API and compare shapes. For now, validate mock
+    // against the SDK shape as both real and expected.
+    if (mockRes.status === 200) {
+      const mockShape = extractShape(JSON.parse(mockRes.body));
+      const diffs = triangulate(sdkShape, sdkShape, mockShape);
+      const report = formatDriftReport("Bedrock Invoke", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+
+  it("converse mock shape matches SDK expectations", async () => {
+    const sdkShape = bedrockConverseResponseShape();
+
+    const mockRes = await httpPost(
+      `${instance.url}/model/anthropic.claude-3-haiku-20240307-v1:0/converse`,
+      {
+        messages: [
+          {
+            role: "user",
+            content: [{ text: "Say hello" }],
+          },
+        ],
+        inferenceConfig: { maxTokens: 10 },
+      },
+    );
+
+    expect(mockRes.status).toBe(200);
+
+    if (mockRes.status === 200) {
+      const mockShape = extractShape(JSON.parse(mockRes.body));
+      const diffs = triangulate(sdkShape, sdkShape, mockShape);
+      const report = formatDriftReport("Bedrock Converse", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+});
diff --git a/src/__tests__/drift/cohere.drift.ts b/src/__tests__/drift/cohere.drift.ts
new file mode 100644
index 0000000..a4a2beb
--- /dev/null
+++ b/src/__tests__/drift/cohere.drift.ts
@@ -0,0 +1,213 @@
+/**
+ * Cohere drift tests.
+ *
+ * Three-way comparison: expected shape x real API x llmock output.
+ * Covers /v2/chat non-streaming and streaming endpoints.
+ *
+ * Requires: COHERE_API_KEY
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
+import { httpPost, parseDataOnlySSE, startDriftServer, stopDriftServer } from "./helpers.js";
+
+// ---------------------------------------------------------------------------
+// Credentials check
+// ---------------------------------------------------------------------------
+
+const COHERE_API_KEY = process.env.COHERE_API_KEY;
+const HAS_CREDENTIALS = !!COHERE_API_KEY;
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// SDK shape stubs
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimal Cohere /v2/chat response shape (non-streaming).
+ */
+function cohereChatResponseShape() {
+  return extractShape({
+    id: "chat-abc123",
+    finish_reason: "COMPLETE",
+    message: {
+      role: "assistant",
+      content: [{ type: "text", text: "Hello!" }],
+    },
+    usage: {
+      billed_units: {
+        input_tokens: 10,
+        output_tokens: 5,
+      },
+      tokens: {
+        input_tokens: 10,
+        output_tokens: 5,
+      },
+    },
+  });
+}
+
+/**
+ * Minimal Cohere /v2/chat streaming chunk shape.
+ */
+function cohereChatStreamChunkShape() {
+  return extractShape({
+    id: "chat-abc123",
+    type: "content-delta",
+    delta: {
+      message: {
+        content: { text: "Hel" },
+      },
+    },
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Real API helpers
+// ---------------------------------------------------------------------------
+
+async function cohereChatNonStreaming(
+  messages: { role: string; content: string }[],
+): Promise<{ status: number; body: string }> {
+  const res = await fetch("https://api.cohere.com/v2/chat", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${COHERE_API_KEY}`,
+    },
+    body: JSON.stringify({
+      model: "command-r-plus",
+      messages,
+      stream: false,
+      max_tokens: 10,
+    }),
+  });
+  return { status: res.status, body: await res.text() };
+}
+
+async function cohereChatStreaming(
+  messages: { role: string; content: string }[],
+): Promise<{ status: number; body: string }> {
+  const res = await fetch("https://api.cohere.com/v2/chat", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${COHERE_API_KEY}`,
+    },
+    body: JSON.stringify({
+      model: "command-r-plus",
+      messages,
+      stream: true,
+      max_tokens: 10,
+    }),
+  });
+  return { status: res.status, body: await res.text() };
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!HAS_CREDENTIALS)("Cohere drift", () => {
+  it("non-streaming /v2/chat shape matches", async () => {
+    const sdkShape = cohereChatResponseShape();
+    const messages = [{ role: "user", content: "Say hello" }];
+
+    const [realRes, mockRes] = await Promise.all([
+      cohereChatNonStreaming(messages),
+      httpPost(`${instance.url}/v2/chat`, {
+        model: "command-r-plus",
+        messages,
+        stream: false,
+      }),
+    ]);
+
+    expect(realRes.status).toBe(200);
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      const realShape = extractShape(JSON.parse(realRes.body));
+      const mockShape = extractShape(JSON.parse(mockRes.body));
+
+      const diffs = triangulate(sdkShape, realShape, mockShape);
+      const report = formatDriftReport("Cohere /v2/chat (non-streaming)", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+
+  it("streaming /v2/chat shape matches", async () => {
+    const sdkChunkShape = cohereChatStreamChunkShape();
+    const messages = [{ role: "user", content: "Say hello" }];
+
+    const [realRes, mockRes] = await Promise.all([
+      cohereChatStreaming(messages),
+      httpPost(`${instance.url}/v2/chat`, {
+        model: "command-r-plus",
+        messages,
+        stream: true,
+      }),
+    ]);
+
+    expect(realRes.status).toBe(200);
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      // Parse SSE chunks from both responses
+      const realChunks = parseDataOnlySSE(realRes.body);
+      const mockChunks = parseDataOnlySSE(mockRes.body);
+
+      if (realChunks.length > 0 && mockChunks.length > 0) {
+        // Compare first chunk shape (content-delta)
+        const realChunkShape = extractShape(realChunks[0]);
+        const mockChunkShape = extractShape(mockChunks[0]);
+
+        const diffs = triangulate(sdkChunkShape, realChunkShape, mockChunkShape);
+        const report = formatDriftReport("Cohere /v2/chat (streaming first chunk)", diffs);
+
+        if (shouldFail(diffs)) {
+          expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+        }
+
+        // Also compare the LAST chunk shape (has finish_reason, usage)
+        const sdkLastChunkShape = extractShape({
+          id: "chat-abc123",
+          type: "message-end",
+          delta: {
+            finish_reason: "COMPLETE",
+            usage: {
+              billed_units: { input_tokens: 10, output_tokens: 5 },
+              tokens: { input_tokens: 10, output_tokens: 5 },
+            },
+          },
+        });
+
+        const realLastShape = extractShape(realChunks[realChunks.length - 1]);
+        const mockLastShape = extractShape(mockChunks[mockChunks.length - 1]);
+
+        const lastDiffs = triangulate(sdkLastChunkShape, realLastShape, mockLastShape);
+        const lastReport = formatDriftReport("Cohere /v2/chat (streaming last chunk)", lastDiffs);
+
+        if (shouldFail(lastDiffs)) {
+          expect.soft([], lastReport).toEqual(lastDiffs.filter((d) => d.severity === "critical"));
+        }
+      }
+    }
+  });
+});
diff --git a/src/__tests__/drift/ollama.drift.ts b/src/__tests__/drift/ollama.drift.ts
new file mode 100644
index 0000000..4e0114e
--- /dev/null
+++ b/src/__tests__/drift/ollama.drift.ts
@@ -0,0 +1,219 @@
+/**
+ * Ollama drift tests.
+ *
+ * Compares llmock's Ollama endpoint output shapes against a real local
+ * Ollama instance. Skips automatically if Ollama is not reachable.
+ *
+ * Requires: local Ollama running at http://localhost:11434
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
+import { httpPost, startDriftServer, stopDriftServer } from "./helpers.js";
+
+// ---------------------------------------------------------------------------
+// Connectivity check
+// ---------------------------------------------------------------------------
+
+let OLLAMA_REACHABLE = false;
+
+async function checkOllamaConnectivity(): Promise<boolean> {
+  try {
+    const res = await fetch("http://localhost:11434/api/tags", {
+      signal: AbortSignal.timeout(3000),
+    });
+    return res.ok;
+  } catch {
+    return false;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+
+beforeAll(async () => {
+  OLLAMA_REACHABLE = await checkOllamaConnectivity();
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// SDK shape stubs
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimal Ollama /api/chat response shape (non-streaming final message).
+ */
+function ollamaChatResponseShape() {
+  return extractShape({
+    model: "llama3.2",
+    created_at: "2024-01-01T00:00:00Z",
+    message: {
+      role: "assistant",
+      content: "Hello!",
+    },
+    done: true,
+    done_reason: "stop",
+    total_duration: 1000000,
+    load_duration: 100000,
+    prompt_eval_count: 10,
+    prompt_eval_duration: 500000,
+    eval_count: 5,
+    eval_duration: 400000,
+  });
+}
+
+/**
+ * Minimal Ollama /api/generate response shape (non-streaming).
+ */
+function ollamaGenerateResponseShape() {
+  return extractShape({
+    model: "llama3.2",
+    created_at: "2024-01-01T00:00:00Z",
+    response: "Hello!",
+    done: true,
+    done_reason: "stop",
+    total_duration: 1000000,
+    load_duration: 100000,
+    prompt_eval_count: 10,
+    prompt_eval_duration: 500000,
+    eval_count: 5,
+    eval_duration: 400000,
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+// ---------------------------------------------------------------------------
+// Streaming shape stubs
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimal Ollama /api/chat streaming chunk shape (non-final).
+ */
+function ollamaChatStreamChunkShape() {
+  return extractShape({
+    model: "llama3.2",
+    created_at: "2024-01-01T00:00:00Z",
+    message: {
+      role: "assistant",
+      content: "H",
+    },
+    done: false,
+  });
+}
+
+function parseNDJSON(body: string): object[] {
+  return body
+    .split("\n")
+    .filter((line) => line.trim() !== "")
+    .map((line) => JSON.parse(line) as object);
+}
+
+describe.skipIf(!OLLAMA_REACHABLE)("Ollama drift", () => {
+  it("/api/chat response shape matches", async () => {
+    const sdkShape = ollamaChatResponseShape();
+
+    const body = {
+      model: "llama3.2",
+      messages: [{ role: "user", content: "Say hello" }],
+      stream: false,
+    };
+
+    const [realRes, mockRes] = await Promise.all([
+      httpPost("http://localhost:11434/api/chat", body),
+      httpPost(`${instance.url}/api/chat`, body),
+    ]);
+
+    expect(realRes.status).toBe(200);
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      const realShape = extractShape(JSON.parse(realRes.body));
+      const mockShape = extractShape(JSON.parse(mockRes.body));
+
+      const diffs = triangulate(sdkShape, realShape, mockShape);
+      const report = formatDriftReport("Ollama /api/chat", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+
+  it("/api/chat streaming NDJSON chunk shapes match", async () => {
+    const sdkChunkShape = ollamaChatStreamChunkShape();
+
+    const body = {
+      model: "llama3.2",
+      messages: [{ role: "user", content: "Say hello" }],
+      stream: true,
+    };
+
+    const [realRes, mockRes] = await Promise.all([
+      httpPost("http://localhost:11434/api/chat", body),
+      httpPost(`${instance.url}/api/chat`, body),
+    ]);
+
+    expect(realRes.status).toBe(200);
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      const realChunks = parseNDJSON(realRes.body);
+      const mockChunks = parseNDJSON(mockRes.body);
+
+      expect(realChunks.length).toBeGreaterThan(0);
+      expect(mockChunks.length).toBeGreaterThan(0);
+
+      // Compare first (non-final) chunk shapes
+      const realFirstShape = extractShape(realChunks[0]);
+      const mockFirstShape = extractShape(mockChunks[0]);
+
+      const diffs = triangulate(sdkChunkShape, realFirstShape, mockFirstShape);
+      const report = formatDriftReport("Ollama /api/chat (streaming chunk)", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+
+  it("/api/generate response shape matches", async () => {
+    const sdkShape = ollamaGenerateResponseShape();
+
+    const body = {
+      model: "llama3.2",
+      prompt: "Say hello",
+      stream: false,
+    };
+
+    const [realRes, mockRes] = await Promise.all([
+      httpPost("http://localhost:11434/api/generate", body),
+      httpPost(`${instance.url}/api/generate`, body),
+    ]);
+
+    expect(realRes.status).toBe(200);
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      const realShape = extractShape(JSON.parse(realRes.body));
+      const mockShape = extractShape(JSON.parse(mockRes.body));
+
+      const diffs = triangulate(sdkShape, realShape, mockShape);
+      const report = formatDriftReport("Ollama /api/generate", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+});
diff --git a/src/__tests__/drift/vertex-ai.drift.ts b/src/__tests__/drift/vertex-ai.drift.ts
new file mode 100644
index 0000000..358bc21
--- /dev/null
+++ b/src/__tests__/drift/vertex-ai.drift.ts
@@ -0,0 +1,165 @@
+/**
+ * Vertex AI / Gemini drift tests.
+ *
+ * Verifies that llmock's Vertex AI routing produces response shapes
+ * consistent with the Gemini generateContent endpoint.
+ *
+ * Requires: GOOGLE_APPLICATION_CREDENTIALS or (VERTEX_AI_PROJECT + VERTEX_AI_LOCATION)
+ */
+
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import type { ServerInstance } from "../../server.js";
+import { extractShape, triangulate, formatDriftReport, shouldFail } from "./schema.js";
+import { httpPost, startDriftServer, stopDriftServer } from "./helpers.js";
+
+// ---------------------------------------------------------------------------
+// Credentials check
+// ---------------------------------------------------------------------------
+
+const HAS_CREDENTIALS =
+  !!process.env.GOOGLE_APPLICATION_CREDENTIALS ||
+  (!!process.env.VERTEX_AI_PROJECT && !!process.env.VERTEX_AI_LOCATION);
+
+// ---------------------------------------------------------------------------
+// Server lifecycle
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance;
+
+beforeAll(async () => {
+  instance = await startDriftServer();
+});
+
+afterAll(async () => {
+  await stopDriftServer(instance);
+});
+
+// ---------------------------------------------------------------------------
+// SDK shape stubs
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimal Gemini generateContent response shape.
+ * Vertex AI uses the same response format as consumer Gemini.
+ */
+function geminiGenerateContentShape() {
+  return extractShape({
+    candidates: [
+      {
+        content: {
+          parts: [{ text: "Hello!" }],
+          role: "model",
+        },
+        finishReason: "STOP",
+        index: 0,
+      },
+    ],
+    usageMetadata: {
+      promptTokenCount: 10,
+      candidatesTokenCount: 5,
+      totalTokenCount: 15,
+    },
+    modelVersion: "gemini-2.5-flash",
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe.skipIf(!HAS_CREDENTIALS)("Vertex AI drift", () => {
+  it("generateContent mock shape matches Gemini format", async () => {
+    const sdkShape = geminiGenerateContentShape();
+
+    // Vertex AI routing in llmock follows the path pattern:
+    // /v1/projects/{project}/locations/{location}/publishers/google/models/{model}:generateContent
+    const mockRes = await httpPost(
+      `${instance.url}/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-2.5-flash:generateContent`,
+      {
+        contents: [
+          {
+            role: "user",
+            parts: [{ text: "Say hello" }],
+          },
+        ],
+        generationConfig: { maxOutputTokens: 10 },
+      },
+    );
+
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      const mockShape = extractShape(JSON.parse(mockRes.body));
+      const diffs = triangulate(sdkShape, sdkShape, mockShape);
+      const report = formatDriftReport("Vertex AI generateContent", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+
+  it("streamGenerateContent mock shape matches Gemini SSE format", async () => {
+    const sdkChunkShape = extractShape({
+      candidates: [
+        {
+          content: {
+            parts: [{ text: "Hello" }],
+            role: "model",
+          },
+          finishReason: "STOP",
+        },
+      ],
+      usageMetadata: {
+        promptTokenCount: 10,
+        candidatesTokenCount: 5,
+        totalTokenCount: 15,
+      },
+    });
+
+    // Vertex AI streaming uses SSE with the same chunk shape as consumer Gemini
+    const mockRes = await httpPost(
+      `${instance.url}/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-2.5-flash:streamGenerateContent?alt=sse`,
+      {
+        contents: [
+          {
+            role: "user",
+            parts: [{ text: "Say hello" }],
+          },
+        ],
+        generationConfig: { maxOutputTokens: 10 },
+      },
+    );
+
+    expect(mockRes.status).toBeLessThan(500);
+
+    if (mockRes.status === 200) {
+      // Parse SSE chunks and extract shapes
+      const chunks = mockRes.body
+        .split("\n")
+        .filter((line: string) => line.startsWith("data: "))
+        .map((line: string) => JSON.parse(line.slice(6)));
+
+      expect(chunks.length).toBeGreaterThan(0);
+
+      // Each chunk should have the candidates structure
+      for (const chunk of chunks) {
+        const chunkShape = extractShape(chunk);
+        expect(chunkShape.kind).toBe("object");
+        if (chunkShape.kind === "object") {
+          expect(chunkShape.fields).toHaveProperty("candidates");
+        }
+      }
+
+      // Last chunk should match the SDK shape (has finishReason and usageMetadata)
+      const lastChunk = chunks[chunks.length - 1];
+      const lastShape = extractShape(lastChunk);
+      const diffs = triangulate(sdkChunkShape, sdkChunkShape, lastShape);
+      const report = formatDriftReport("Vertex AI streamGenerateContent (last chunk)", diffs);
+
+      if (shouldFail(diffs)) {
+        expect.soft([], report).toEqual(diffs.filter((d) => d.severity === "critical"));
+      }
+    }
+  });
+});
diff --git a/src/__tests__/metrics.test.ts b/src/__tests__/metrics.test.ts
new file mode 100644
index 0000000..0d1948c
--- /dev/null
+++ b/src/__tests__/metrics.test.ts
@@ -0,0 +1,579 @@
+import { describe, it, expect, afterEach, beforeEach } from "vitest";
+import http from "node:http";
+import { createMetricsRegistry, normalizePathLabel, type MetricsRegistry } from "../metrics.js";
+import { createServer, type ServerInstance } from "../server.js";
+import type { Fixture, ChatCompletionRequest } from "../types.js";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+async function httpPost(
+  url: string,
+  body: object,
+  headers?: Record<string, string>,
+): Promise<{ status: number; body: string; headers: Record<string, string> }> {
+  return new Promise((resolve, reject) => {
+    const req = http.request(
+      url,
+      {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          ...headers,
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c) => chunks.push(c));
+        res.on("end", () =>
+          resolve({
+            status: res.statusCode!,
+            body: Buffer.concat(chunks).toString(),
+            headers: Object.fromEntries(
+              Object.entries(res.headers).map(([k, v]) => [
+                k,
+                Array.isArray(v) ? v.join(", ") : (v ?? ""),
+              ]),
+            ),
+          }),
+        );
+      },
+    );
+    req.on("error", reject);
+    req.write(JSON.stringify(body));
+    req.end();
+  });
+}
+
+async function httpGet(
+  url: string,
+): Promise<{ status: number; body: string; headers: Record<string, string> }> {
+  return new Promise((resolve, reject) => {
+    const req = http.request(url, { method: "GET" }, (res) => {
+      const chunks: Buffer[] = [];
+      res.on("data", (c) => chunks.push(c));
+      res.on("end", () =>
+        resolve({
+          status: res.statusCode!,
+          body: Buffer.concat(chunks).toString(),
+          headers: Object.fromEntries(
+            Object.entries(res.headers).map(([k, v]) => [
+              k,
+              Array.isArray(v) ? v.join(", ") : (v ?? ""),
+            ]),
+          ),
+        }),
+      );
+    });
+    req.on("error", reject);
+    req.end();
+  });
+}
+
+function chatRequest(userContent: string): ChatCompletionRequest {
+  return {
+    model: "gpt-4",
+    messages: [{ role: "user", content: userContent }],
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Unit tests: MetricsRegistry
+// ---------------------------------------------------------------------------
+
+describe("MetricsRegistry", () => {
+  let registry: MetricsRegistry;
+
+  beforeEach(() => {
+    registry = createMetricsRegistry();
+  });
+
+  describe("Counter", () => {
+    it("increments and serializes correct value", () => {
+      registry.incrementCounter("http_requests_total", { method: "POST" });
+      registry.incrementCounter("http_requests_total", { method: "POST" });
+      registry.incrementCounter("http_requests_total", { method: "POST" });
+      const output = registry.serialize();
+      expect(output).toContain('http_requests_total{method="POST"} 3');
+    });
+
+    it("tracks different label combos separately", () => {
+      registry.incrementCounter("http_requests_total", { method: "POST", path: "/a" });
+      registry.incrementCounter("http_requests_total", { method: "POST", path: "/a" });
+      registry.incrementCounter("http_requests_total", { method: "GET", path: "/b" });
+      const output = registry.serialize();
+      expect(output).toContain('http_requests_total{method="POST",path="/a"} 2');
+      expect(output).toContain('http_requests_total{method="GET",path="/b"} 1');
+    });
+  });
+
+  describe("Histogram", () => {
+    it("observes values with cumulative buckets, +Inf = count", () => {
+      // Observe values: 0.003, 0.05, 1.5
+      registry.observeHistogram("request_duration_seconds", {}, 0.003);
+      registry.observeHistogram("request_duration_seconds", {}, 0.05);
+      registry.observeHistogram("request_duration_seconds", {}, 1.5);
+      const output = registry.serialize();
+
+      // Bucket 0.005: 1 observation (0.003)
+      expect(output).toContain('request_duration_seconds_bucket{le="0.005"} 1');
+      // Bucket 0.01: 1 observation (cumulative, still just 0.003)
+      expect(output).toContain('request_duration_seconds_bucket{le="0.01"} 1');
+      // Bucket 0.05: 2 observations (0.003, 0.05)
+      expect(output).toContain('request_duration_seconds_bucket{le="0.05"} 2');
+      // Bucket 0.1: 2 observations
+      expect(output).toContain('request_duration_seconds_bucket{le="0.1"} 2');
+      // Bucket 2.5: 3 observations (all)
+      expect(output).toContain('request_duration_seconds_bucket{le="2.5"} 3');
+      // +Inf = count = 3
+      expect(output).toContain('request_duration_seconds_bucket{le="+Inf"} 3');
+    });
+
+    it("has correct _sum and _count suffixes", () => {
+      registry.observeHistogram("request_duration_seconds", {}, 0.5);
+      registry.observeHistogram("request_duration_seconds", {}, 1.5);
+      const output = registry.serialize();
+      expect(output).toContain("request_duration_seconds_sum{} 2");
+      expect(output).toContain("request_duration_seconds_count{} 2");
+    });
+
+    it("tracks labels separately in histograms", () => {
+      registry.observeHistogram("req_dur", { method: "POST" }, 0.01);
+      registry.observeHistogram("req_dur", { method: "GET" }, 5.0);
+      const output = registry.serialize();
+      // POST: bucket le=0.01 should have 1
+      expect(output).toContain('req_dur_bucket{method="POST",le="0.01"} 1');
+      // POST: +Inf should have 1
+      expect(output).toContain('req_dur_bucket{method="POST",le="+Inf"} 1');
+      // GET: bucket le=0.01 should have 0
+      expect(output).toContain('req_dur_bucket{method="GET",le="0.01"} 0');
+      // GET: bucket le=5 should have 1
+      expect(output).toContain('req_dur_bucket{method="GET",le="5"} 1');
+      // GET: +Inf should have 1
+      expect(output).toContain('req_dur_bucket{method="GET",le="+Inf"} 1');
+    });
+  });
+
+  describe("Histogram edge: value > all buckets", () => {
+    it("28. only +Inf increments when value exceeds all bucket bounds", () => {
+      registry.observeHistogram("big_value_hist", {}, 100);
+      const output = registry.serialize();
+
+      // All finite buckets should have 0
+      for (const b of [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]) {
+        expect(output).toContain(`big_value_hist_bucket{le="${b}"} 0`);
+      }
+      // Only +Inf should have 1
+      expect(output).toContain('big_value_hist_bucket{le="+Inf"} 1');
+      expect(output).toContain("big_value_hist_count{} 1");
+      expect(output).toContain("big_value_hist_sum{} 100");
+    });
+  });
+
+  describe("Empty registry serialization", () => {
+    it("29. returns empty string from fresh registry", () => {
+      const freshRegistry = createMetricsRegistry();
+      expect(freshRegistry.serialize()).toBe("");
+    });
+  });
+
+  describe("Type mismatch errors", () => {
+    it("throws when observing histogram on a counter name", () => {
+      registry.incrementCounter("foo", {});
+      expect(() => registry.observeHistogram("foo", {}, 0.5)).toThrow(
+        "Metric foo is not a histogram",
+      );
+    });
+
+    it("throws when incrementing counter on a histogram name", () => {
+      registry.observeHistogram("bar", {}, 0.5);
+      expect(() => registry.incrementCounter("bar", {})).toThrow("Metric bar is not a counter");
+    });
+  });
+
+  describe("Gauge type mismatch errors", () => {
+    it("throws when incrementing counter on a gauge name", () => {
+      registry.setGauge("x", {}, 1);
+      expect(() => registry.incrementCounter("x", {})).toThrow("Metric x is not a counter");
+    });
+
+    it("throws when observing histogram on a gauge name", () => {
+      registry.setGauge("y", {}, 1);
+      expect(() => registry.observeHistogram("y", {}, 0.5)).toThrow("Metric y is not a histogram");
+    });
+
+    it("throws when setting gauge on a counter name", () => {
+      registry.incrementCounter("z", {});
+      expect(() => registry.setGauge("z", {}, 1)).toThrow("Metric z is not a gauge");
+    });
+  });
+
+  describe("Histogram value exactly 0", () => {
+    it("observe 0, verify it lands in 0.005 bucket", () => {
+      registry.observeHistogram("zero_hist", {}, 0);
+      const output = registry.serialize();
+      // 0 <= 0.005, so the 0.005 bucket should have 1
+      expect(output).toContain('zero_hist_bucket{le="0.005"} 1');
+      expect(output).toContain('zero_hist_bucket{le="+Inf"} 1');
+      expect(output).toContain("zero_hist_sum{} 0");
+      expect(output).toContain("zero_hist_count{} 1");
+    });
+  });
+
+  describe("Histogram negative value", () => {
+    it("observe -1, verify it lands in ALL finite buckets (cumulative), +Inf/count/sum correct", () => {
+      registry.observeHistogram("neg_hist", {}, -1);
+      const output = registry.serialize();
+      // -1 <= every positive bucket boundary, so all finite buckets should have 1
+      for (const b of [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]) {
+        expect(output).toContain(`neg_hist_bucket{le="${b}"} 1`);
+      }
+      expect(output).toContain('neg_hist_bucket{le="+Inf"} 1');
+      expect(output).toContain("neg_hist_count{} 1");
+      expect(output).toContain("neg_hist_sum{} -1");
+    });
+  });
+
+  describe("Counter with empty labels serialization format", () => {
+    it("serializes counter with empty labels as name{} value", () => {
+      registry.incrementCounter("empty_label_counter", {});
+      const output = registry.serialize();
+      expect(output).toContain("empty_label_counter{} 1");
+    });
+  });
+
+  describe("Label value escaping", () => {
+    it("escapes backslash, double-quote, and newline in label values", () => {
+      registry.incrementCounter("escaped_metric", { val: 'back\\slash "quoted" new\nline' });
+      const output = registry.serialize();
+      expect(output).toContain('val="back\\\\slash \\"quoted\\" new\\nline"');
+    });
+  });
+
+  describe("Label sort order stability", () => {
+    it("maps {b:2,a:1} and {a:1,b:2} to the same series", () => {
+      registry.incrementCounter("sorted_counter", { b: "2", a: "1" });
+      registry.incrementCounter("sorted_counter", { a: "1", b: "2" });
+      const output = registry.serialize();
+      // Should be one series with value 2, not two series with value 1
+      expect(output).toContain('sorted_counter{a="1",b="2"} 2');
+      // Should not contain a separate series with value 1
+      expect(output).not.toMatch(/sorted_counter\{[^}]*\} 1/);
+    });
+  });
+
+  describe("Gauge", () => {
+    it("sets and updates value", () => {
+      registry.setGauge("fixtures_loaded", {}, 5);
+      let output = registry.serialize();
+      expect(output).toContain("fixtures_loaded{} 5");
+
+      registry.setGauge("fixtures_loaded", {}, 10);
+      output = registry.serialize();
+      expect(output).toContain("fixtures_loaded{} 10");
+      // Old value should not be present
+      expect(output).not.toMatch(/fixtures_loaded\{\} 5/);
+    });
+  });
+
+  describe("serialize()", () => {
+    it("produces valid Prometheus text exposition format", () => {
+      registry.incrementCounter("my_counter", { env: "test" });
+      registry.setGauge("my_gauge", {}, 42);
+      const output = registry.serialize();
+
+      // Should contain TYPE lines
+      expect(output).toMatch(/^# TYPE my_counter counter$/m);
+      expect(output).toMatch(/^# TYPE my_gauge gauge$/m);
+      // Metric lines
+      expect(output).toContain('my_counter{env="test"} 1');
+      expect(output).toContain("my_gauge{} 42");
+    });
+  });
+
+  describe("reset()", () => {
+    it("clears all metrics", () => {
+      registry.incrementCounter("c", {});
+      registry.observeHistogram("h", {}, 0.5);
+      registry.setGauge("g", {}, 1);
+      registry.reset();
+      const output = registry.serialize();
+      expect(output).toBe("");
+    });
+  });
+
+  describe("histogram→gauge type mismatch", () => {
+    it("throws when setting gauge on a histogram name", () => {
+      registry.observeHistogram("x", {}, 0.5);
+      expect(() => registry.setGauge("x", {}, 1)).toThrow("Metric x is not a gauge");
+    });
+  });
+
+  describe("Gauge with non-empty labels", () => {
+    it("serializes gauge with labels correctly", () => {
+      registry.setGauge("g", { region: "us" }, 42);
+      const output = registry.serialize();
+      expect(output).toContain('g{region="us"} 42');
+    });
+  });
+
+  describe("Gauge multi-series", () => {
+    it("tracks multiple label combos independently", () => {
+      registry.setGauge("g", { region: "us" }, 10);
+      registry.setGauge("g", { region: "eu" }, 20);
+      const output = registry.serialize();
+      expect(output).toContain('g{region="us"} 10');
+      expect(output).toContain('g{region="eu"} 20');
+    });
+  });
+
+  describe("reset then re-accumulate", () => {
+    it("counter restarts from zero after reset", () => {
+      registry.incrementCounter("c", {});
+      registry.reset();
+      registry.incrementCounter("c", {});
+      const output = registry.serialize();
+      expect(output).toContain("c{} 1");
+      expect(output).not.toMatch(/c\{\} 2/);
+    });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Unit tests: normalizePathLabel
+// ---------------------------------------------------------------------------
+
+describe("normalizePathLabel", () => {
+  it("normalizes Bedrock invoke path", () => {
+    expect(normalizePathLabel("/model/anthropic.claude-3-haiku/invoke")).toBe(
+      "/model/{modelId}/invoke",
+    );
+  });
+
+  it("normalizes Bedrock invoke-with-response-stream", () => {
+    expect(normalizePathLabel("/model/anthropic.claude-3-haiku/invoke-with-response-stream")).toBe(
+      "/model/{modelId}/invoke-with-response-stream",
+    );
+  });
+
+  it("normalizes Bedrock converse", () => {
+    expect(normalizePathLabel("/model/anthropic.claude-3-haiku/converse")).toBe(
+      "/model/{modelId}/converse",
+    );
+  });
+
+  it("normalizes Bedrock converse-stream", () => {
+    expect(normalizePathLabel("/model/anthropic.claude-3-haiku/converse-stream")).toBe(
+      "/model/{modelId}/converse-stream",
+    );
+  });
+
+  it("normalizes Gemini generateContent path", () => {
+    expect(normalizePathLabel("/v1beta/models/gemini-2.0-flash:generateContent")).toBe(
+      "/v1beta/models/{model}:generateContent",
+    );
+  });
+
+  it("normalizes Gemini streamGenerateContent path", () => {
+    expect(normalizePathLabel("/v1beta/models/gemini-2.0-flash:streamGenerateContent")).toBe(
+      "/v1beta/models/{model}:streamGenerateContent",
+    );
+  });
+
+  it("normalizes Azure deployment path", () => {
+    expect(normalizePathLabel("/openai/deployments/my-gpt4/chat/completions")).toBe(
+      "/openai/deployments/{id}/chat/completions",
+    );
+  });
+
+  it("normalizes Azure deployment embeddings path", () => {
+    expect(normalizePathLabel("/openai/deployments/my-gpt4/embeddings")).toBe(
+      "/openai/deployments/{id}/embeddings",
+    );
+  });
+
+  it("normalizes Vertex AI path", () => {
+    expect(
+      normalizePathLabel(
+        "/v1/projects/my-proj/locations/us-central1/publishers/google/models/gemini:generateContent",
+      ),
+    ).toBe("/v1/projects/{p}/locations/{l}/publishers/google/models/{m}:generateContent");
+  });
+
+  it("leaves static /api/chat unchanged", () => {
+    expect(normalizePathLabel("/api/chat")).toBe("/api/chat");
+  });
+
+  it("leaves static /v1/chat/completions unchanged", () => {
+    expect(normalizePathLabel("/v1/chat/completions")).toBe("/v1/chat/completions");
+  });
+
+  it("leaves static /v1/messages unchanged", () => {
+    expect(normalizePathLabel("/v1/messages")).toBe("/v1/messages");
+  });
+
+  it("leaves static /v1/embeddings unchanged", () => {
+    expect(normalizePathLabel("/v1/embeddings")).toBe("/v1/embeddings");
+  });
+
+  it("partial match: /model/foo/unknown-op returns as-is", () => {
+    expect(normalizePathLabel("/model/foo/unknown-op")).toBe("/model/foo/unknown-op");
+  });
+
+  it("empty string returns empty string", () => {
+    expect(normalizePathLabel("")).toBe("");
+  });
+
+  it("normalizes Vertex AI streamGenerateContent path", () => {
+    expect(
+      normalizePathLabel(
+        "/v1/projects/my-proj/locations/us-central1/publishers/google/models/gemini:streamGenerateContent",
+      ),
+    ).toBe("/v1/projects/{p}/locations/{l}/publishers/google/models/{m}:streamGenerateContent");
+  });
+});
+
+describe("MetricsRegistry: all three types serialized together", () => {
+  it("counter + histogram + gauge all appear in serialize output", () => {
+    const reg = createMetricsRegistry();
+    reg.incrementCounter("c_total", { env: "test" });
+    reg.observeHistogram("h_seconds", { op: "read" }, 0.05);
+    reg.setGauge("g_loaded", {}, 7);
+
+    const output = reg.serialize();
+    expect(output).toContain("# TYPE c_total counter");
+    expect(output).toContain('c_total{env="test"} 1');
+    expect(output).toContain("# TYPE h_seconds histogram");
+    expect(output).toContain('h_seconds_bucket{op="read",le="0.05"} 1');
+    expect(output).toContain("# TYPE g_loaded gauge");
+    expect(output).toContain("g_loaded{} 7");
+  });
+});
+
+describe("MetricsRegistry: status label in counter output", () => {
+  it("status label appears correctly in serialized counter", () => {
+    const reg = createMetricsRegistry();
+    reg.incrementCounter("llmock_requests_total", { status: "200", path: "/v1/chat/completions" });
+    reg.incrementCounter("llmock_requests_total", { status: "200", path: "/v1/chat/completions" });
+    reg.incrementCounter("llmock_requests_total", { status: "404", path: "/v1/chat/completions" });
+
+    const output = reg.serialize();
+    expect(output).toContain('llmock_requests_total{path="/v1/chat/completions",status="200"} 2');
+    expect(output).toContain('llmock_requests_total{path="/v1/chat/completions",status="404"} 1');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests: /metrics endpoint through the server
+// ---------------------------------------------------------------------------
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => instance!.server.close(() => resolve()));
+    instance = null;
+  }
+});
+
+describe("integration: /metrics endpoint", () => {
+  it("returns 404 when metrics disabled (default)", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi" },
+      },
+    ];
+    instance = await createServer(fixtures);
+    const res = await httpGet(`${instance.url}/metrics`);
+    expect(res.status).toBe(404);
+  });
+
+  it("returns 200 with correct content-type when metrics enabled", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi" },
+      },
+    ];
+    instance = await createServer(fixtures, { metrics: true });
+    const res = await httpGet(`${instance.url}/metrics`);
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/plain; version=0.0.4; charset=utf-8");
+  });
+
+  it("increments counters after sending requests", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi" },
+      },
+    ];
+    instance = await createServer(fixtures, { metrics: true });
+
+    // Send two requests
+    await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+    await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+
+    const res = await httpGet(`${instance.url}/metrics`);
+    expect(res.body).toContain("llmock_requests_total");
+    // Should have count of 2 for the completions path
+    expect(res.body).toMatch(/llmock_requests_total\{[^}]*path="\/v1\/chat\/completions"[^}]*\} 2/);
+  });
+
+  it("records histogram bucket distribution after a request", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi" },
+      },
+    ];
+    instance = await createServer(fixtures, { metrics: true });
+
+    await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+
+    const res = await httpGet(`${instance.url}/metrics`);
+    // Should have histogram buckets
+    expect(res.body).toContain("llmock_request_duration_seconds_bucket");
+    expect(res.body).toContain("llmock_request_duration_seconds_count");
+    expect(res.body).toContain("llmock_request_duration_seconds_sum");
+    // +Inf bucket should equal count
+    const infMatch = res.body.match(
+      /llmock_request_duration_seconds_bucket\{[^}]*le="\+Inf"\} (\d+)/,
+    );
+    const countMatch = res.body.match(/llmock_request_duration_seconds_count\{[^}]*\} (\d+)/);
+    expect(infMatch).not.toBeNull();
+    expect(countMatch).not.toBeNull();
+    expect(infMatch![1]).toBe(countMatch![1]);
+  });
+
+  it("increments chaos counter when chaos triggers", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi" },
+      },
+    ];
+    instance = await createServer(fixtures, {
+      metrics: true,
+      chaos: { dropRate: 1.0 }, // 100% drop
+    });
+
+    await httpPost(`${instance.url}/v1/chat/completions`, chatRequest("hello"));
+
+    const res = await httpGet(`${instance.url}/metrics`);
+    expect(res.body).toContain("llmock_chaos_triggered_total");
+    expect(res.body).toMatch(/llmock_chaos_triggered_total\{[^}]*action="drop"[^}]*\} 1/);
+  });
+
+  it("tracks fixtures loaded gauge", async () => {
+    const fixtures: Fixture[] = [
+      { match: { userMessage: "a" }, response: { content: "1" } },
+      { match: { userMessage: "b" }, response: { content: "2" } },
+    ];
+    instance = await createServer(fixtures, { metrics: true });
+    const res = await httpGet(`${instance.url}/metrics`);
+    expect(res.body).toContain("llmock_fixtures_loaded{} 2");
+  });
+});
diff --git a/src/__tests__/ollama.test.ts b/src/__tests__/ollama.test.ts
new file mode 100644
index 0000000..c9870b4
--- /dev/null
+++ b/src/__tests__/ollama.test.ts
@@ -0,0 +1,1045 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+import { ollamaToCompletionRequest } from "../ollama.js";
+import { writeNDJSONStream } from "../ndjson-writer.js";
+
+// --- helpers ---
+
+function post(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function get(
+  url: string,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "GET",
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.end();
+  });
+}
+
+function postRaw(url: string, raw: string): Promise<{ status: number; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(raw),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(raw);
+    req.end();
+  });
+}
+
+function parseNDJSON(body: string): object[] {
+  return body
+    .split("\n")
+    .filter((line) => line.trim() !== "")
+    .map((line) => JSON.parse(line) as object);
+}
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [
+      {
+        name: "get_weather",
+        arguments: '{"city":"NYC"}',
+      },
+    ],
+  },
+};
+
+const modelFixture: Fixture = {
+  match: { model: "llama3", userMessage: "greet" },
+  response: { content: "Hello from Ollama!" },
+};
+
+const errorFixture: Fixture = {
+  match: { userMessage: "fail" },
+  response: {
+    error: {
+      message: "Rate limited",
+      type: "rate_limit_error",
+    },
+    status: 429,
+  },
+};
+
+const allFixtures: Fixture[] = [textFixture, toolFixture, modelFixture, errorFixture];
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+// ─── Unit tests: ollamaToCompletionRequest ──────────────────────────────────
+
+describe("ollamaToCompletionRequest", () => {
+  it("converts basic chat request", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+    });
+    expect(result.model).toBe("llama3");
+    expect(result.messages).toEqual([{ role: "user", content: "hello" }]);
+  });
+
+  it("passes through stream field", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+      stream: false,
+    });
+    expect(result.stream).toBe(false);
+  });
+
+  it("converts options to temperature and max_tokens", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+      options: { temperature: 0.7, num_predict: 100 },
+    });
+    expect(result.temperature).toBe(0.7);
+    expect(result.max_tokens).toBe(100);
+  });
+
+  it("converts tools", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+      tools: [
+        {
+          type: "function",
+          function: {
+            name: "get_weather",
+            description: "Get weather",
+            parameters: { type: "object", properties: { city: { type: "string" } } },
+          },
+        },
+      ],
+    });
+    expect(result.tools).toHaveLength(1);
+    expect(result.tools![0]).toEqual({
+      type: "function",
+      function: {
+        name: "get_weather",
+        description: "Get weather",
+        parameters: { type: "object", properties: { city: { type: "string" } } },
+      },
+    });
+  });
+
+  it("returns undefined tools when none provided", () => {
+    const result = ollamaToCompletionRequest({
+      model: "llama3",
+      messages: [{ role: "user", content: "hi" }],
+    });
+    expect(result.tools).toBeUndefined();
+  });
+});
+
+// ─── Unit tests: NDJSON writer ──────────────────────────────────────────────
+
+describe("writeNDJSONStream", () => {
+  it("writes correct NDJSON format", async () => {
+    const chunks: string[] = [];
+    const res = {
+      writableEnded: false,
+      setHeader: () => {},
+      write: (data: string) => {
+        chunks.push(data);
+        return true;
+      },
+      end: () => {
+        (res as { writableEnded: boolean }).writableEnded = true;
+      },
+    } as unknown as http.ServerResponse;
+
+    const data = [
+      { model: "llama3", done: false },
+      { model: "llama3", done: true },
+    ];
+    const completed = await writeNDJSONStream(res, data);
+
+    expect(completed).toBe(true);
+    expect(chunks).toHaveLength(2);
+    expect(chunks[0]).toBe('{"model":"llama3","done":false}\n');
+    expect(chunks[1]).toBe('{"model":"llama3","done":true}\n');
+  });
+
+  it("respects abort signal for interruption", async () => {
+    const chunks: string[] = [];
+    const controller = new AbortController();
+    const res = {
+      writableEnded: false,
+      setHeader: () => {},
+      write: (data: string) => {
+        chunks.push(data);
+        // Abort after first chunk
+        controller.abort();
+        return true;
+      },
+      end: () => {
+        (res as { writableEnded: boolean }).writableEnded = true;
+      },
+    } as unknown as http.ServerResponse;
+
+    const data = [
+      { model: "llama3", done: false },
+      { model: "llama3", done: false },
+      { model: "llama3", done: true },
+    ];
+    const completed = await writeNDJSONStream(res, data, { signal: controller.signal });
+
+    expect(completed).toBe(false);
+    expect(chunks).toHaveLength(1);
+  });
+
+  it("applies streaming profile latency", async () => {
+    const chunks: string[] = [];
+    const res = {
+      writableEnded: false,
+      setHeader: () => {},
+      write: (data: string) => {
+        chunks.push(data);
+        return true;
+      },
+      end: () => {
+        (res as { writableEnded: boolean }).writableEnded = true;
+      },
+    } as unknown as http.ServerResponse;
+
+    const data = [{ done: false }, { done: true }];
+    const start = Date.now();
+    await writeNDJSONStream(res, data, {
+      streamingProfile: { ttft: 50, tps: 100, jitter: 0 },
+    });
+    const elapsed = Date.now() - start;
+
+    // Should have at least some delay from the streaming profile
+    expect(elapsed).toBeGreaterThanOrEqual(40); // ttft ~50ms + 1/100 tps ~10ms
+    expect(chunks).toHaveLength(2);
+  });
+});
+
+// ─── Integration tests: POST /api/chat (non-streaming) ─────────────────────
+
+describe("POST /api/chat (non-streaming)", () => {
+  it("returns text response with all final fields", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.model).toBe("llama3");
+    expect(body.message.role).toBe("assistant");
+    expect(body.message.content).toBe("Hi there!");
+    expect(body.done).toBe(true);
+    expect(body.done_reason).toBe("stop");
+    expect(body.total_duration).toBe(0);
+    expect(body.load_duration).toBe(0);
+    expect(body.prompt_eval_count).toBe(0);
+    expect(body.prompt_eval_duration).toBe(0);
+    expect(body.eval_count).toBe(0);
+    expect(body.eval_duration).toBe(0);
+  });
+
+  it("returns tool call with arguments as object and no id", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "weather" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.done).toBe(true);
+    expect(body.message.tool_calls).toHaveLength(1);
+    expect(body.message.tool_calls[0].function.name).toBe("get_weather");
+    // Arguments must be an OBJECT, not a JSON string
+    expect(body.message.tool_calls[0].function.arguments).toEqual({ city: "NYC" });
+    // No id field on tool calls
+    expect(body.message.tool_calls[0].id).toBeUndefined();
+  });
+});
+
+// ─── Integration tests: POST /api/chat (streaming) ──────────────────────────
+
+describe("POST /api/chat (streaming)", () => {
+  it("streams NDJSON when stream is absent (default streaming)", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      // stream field intentionally omitted — Ollama defaults to true
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/x-ndjson");
+
+    const chunks = parseNDJSON(res.body);
+    expect(chunks.length).toBeGreaterThan(1);
+
+    // All non-final chunks should have done: false
+    const nonFinal = chunks.slice(0, -1);
+    for (const chunk of nonFinal) {
+      expect((chunk as { done: boolean }).done).toBe(false);
+    }
+
+    // Final chunk should have done: true and all duration fields
+    const final = chunks[chunks.length - 1] as Record<string, unknown>;
+    expect(final.done).toBe(true);
+    expect(final.done_reason).toBe("stop");
+    expect(final.total_duration).toBe(0);
+    expect(final.load_duration).toBe(0);
+    expect(final.prompt_eval_count).toBe(0);
+    expect(final.prompt_eval_duration).toBe(0);
+    expect(final.eval_count).toBe(0);
+    expect(final.eval_duration).toBe(0);
+  });
+
+  it("streams NDJSON when stream is explicitly true", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/x-ndjson");
+
+    const chunks = parseNDJSON(res.body);
+    expect(chunks.length).toBeGreaterThan(1);
+  });
+
+  it("reconstructs full text from streaming chunks", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      stream: true,
+    });
+
+    const chunks = parseNDJSON(res.body) as Array<{
+      message: { content: string };
+      done: boolean;
+    }>;
+    const fullText = chunks
+      .filter((c) => !c.done)
+      .map((c) => c.message.content)
+      .join("");
+    expect(fullText).toBe("Hi there!");
+  });
+
+  it("streams tool call with arguments as object", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "weather" }],
+      stream: true,
+    });
+
+    const chunks = parseNDJSON(res.body) as Array<{
+      message: { tool_calls?: Array<{ function: { name: string; arguments: unknown } }> };
+      done: boolean;
+    }>;
+    const toolChunk = chunks.find((c) => c.message.tool_calls && c.message.tool_calls.length > 0);
+    expect(toolChunk).toBeDefined();
+    expect(toolChunk!.message.tool_calls![0].function.name).toBe("get_weather");
+    expect(toolChunk!.message.tool_calls![0].function.arguments).toEqual({ city: "NYC" });
+  });
+
+  it("uses fixture chunkSize for text streaming", async () => {
+    const bigChunkFixture: Fixture = {
+      match: { userMessage: "bigchunk" },
+      response: { content: "ABCDEFGHIJ" },
+      chunkSize: 5,
+    };
+    instance = await createServer([bigChunkFixture], { chunkSize: 2 });
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "bigchunk" }],
+      stream: true,
+    });
+
+    const chunks = parseNDJSON(res.body) as Array<{
+      message: { content: string };
+      done: boolean;
+    }>;
+    // 10 chars / chunkSize 5 = 2 content chunks + 1 final = 3 total
+    expect(chunks).toHaveLength(3);
+    expect(chunks[0].message.content).toBe("ABCDE");
+    expect(chunks[1].message.content).toBe("FGHIJ");
+    expect(chunks[2].done).toBe(true);
+  });
+});
+
+// ─── Integration tests: POST /api/chat (streaming profile) ─────────────────
+
+describe("POST /api/chat (streaming profile)", () => {
+  it("applies streaming profile latency", async () => {
+    const slowFixture: Fixture = {
+      match: { userMessage: "slow" },
+      response: { content: "AB" },
+      chunkSize: 1,
+      streamingProfile: { ttft: 50, tps: 20, jitter: 0 },
+    };
+    instance = await createServer([slowFixture]);
+
+    const start = Date.now();
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "slow" }],
+      stream: true,
+    });
+    const elapsed = Date.now() - start;
+
+    expect(res.status).toBe(200);
+    // Should have noticeable delay: ttft 50ms + at least 2 chunks at 20tps (50ms each) + final
+    expect(elapsed).toBeGreaterThanOrEqual(80);
+  });
+});
+
+// ─── Integration tests: POST /api/chat (interruption) ───────────────────────
+
+describe("POST /api/chat (interruption)", () => {
+  it("truncates after specified number of chunks", async () => {
+    const truncFixture: Fixture = {
+      match: { userMessage: "truncate" },
+      response: { content: "ABCDEFGHIJ" },
+      chunkSize: 1,
+      truncateAfterChunks: 3,
+    };
+    instance = await createServer([truncFixture]);
+
+    // Use a custom request that tolerates abrupt socket close
+    const res = await new Promise<{ aborted: boolean; body: string }>((resolve) => {
+      const data = JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "truncate" }],
+        stream: true,
+      });
+      const parsed = new URL(`${instance!.url}/api/chat`);
+      const chunks: Buffer[] = [];
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+          },
+        },
+        (res) => {
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({ aborted: false, body: Buffer.concat(chunks).toString() });
+          });
+          res.on("aborted", () => {
+            resolve({ aborted: true, body: Buffer.concat(chunks).toString() });
+          });
+        },
+      );
+      req.on("error", () => {
+        resolve({ aborted: true, body: Buffer.concat(chunks).toString() });
+      });
+      req.write(data);
+      req.end();
+    });
+
+    // Stream was truncated — res.destroy() causes abrupt close
+    expect(res.aborted).toBe(true);
+
+    // Journal should record interruption
+    await new Promise((r) => setTimeout(r, 50));
+    const entry = instance.journal.getLast();
+    expect(entry!.response.interrupted).toBe(true);
+    expect(entry!.response.interruptReason).toBe("truncateAfterChunks");
+  });
+});
+
+// ─── Integration tests: POST /api/chat (chaos) ─────────────────────────────
+
+describe("POST /api/chat (chaos)", () => {
+  it("drops request when chaos drop header is set to 1.0", async () => {
+    instance = await createServer(allFixtures);
+    const res = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+      const data = JSON.stringify({
+        model: "llama3",
+        messages: [{ role: "user", content: "hello" }],
+        stream: false,
+      });
+      const parsed = new URL(`${instance!.url}/api/chat`);
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+            "x-llmock-chaos-drop": "1.0",
+          },
+        },
+        (res) => {
+          const chunks: Buffer[] = [];
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({
+              status: res.statusCode ?? 0,
+              body: Buffer.concat(chunks).toString(),
+            });
+          });
+        },
+      );
+      req.on("error", reject);
+      req.write(data);
+      req.end();
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+});
+
+// ─── Integration tests: POST /api/chat (error handling) ─────────────────────
+
+describe("POST /api/chat (error handling)", () => {
+  it("returns error fixture with correct status", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "fail" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+
+  it("returns 404 when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "nomatch" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(404);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("No fixture matched");
+  });
+
+  it("returns 400 when messages array is missing from /api/chat", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      stream: false,
+    });
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Invalid request: messages array is required");
+  });
+
+  it("returns 400 when prompt is missing from /api/generate", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      stream: false,
+    });
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Invalid request: prompt field is required");
+  });
+
+  it("returns 400 for malformed JSON", async () => {
+    instance = await createServer(allFixtures);
+    const res = await postRaw(`${instance.url}/api/chat`, "{not valid");
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+});
+
+// ─── Integration tests: POST /api/generate (non-streaming) ─────────────────
+
+describe("POST /api/generate (non-streaming)", () => {
+  it("returns text in response field (not message)", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "hello",
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.model).toBe("llama3");
+    expect(body.response).toBe("Hi there!");
+    expect(body.done).toBe(true);
+    expect(body.done_reason).toBe("stop");
+    expect(body.context).toEqual([]);
+    expect(body.created_at).toBeDefined();
+    // Should NOT have message field
+    expect(body.message).toBeUndefined();
+  });
+});
+
+// ─── Integration tests: POST /api/generate (error/chaos/strict/no-match) ────
+
+describe("POST /api/generate (error fixture)", () => {
+  it("19a. returns error fixture through /api/generate", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "fail",
+      stream: false,
+    });
+
+    expect(res.status).toBe(429);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Rate limited");
+  });
+});
+
+describe("POST /api/generate (chaos)", () => {
+  it("19b. drops request with chaos-drop header", async () => {
+    instance = await createServer(allFixtures);
+    const res = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+      const data = JSON.stringify({
+        model: "llama3",
+        prompt: "hello",
+        stream: false,
+      });
+      const parsed = new URL(`${instance!.url}/api/generate`);
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+            "x-llmock-chaos-drop": "1.0",
+          },
+        },
+        (res) => {
+          const chunks: Buffer[] = [];
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({
+              status: res.statusCode ?? 0,
+              body: Buffer.concat(chunks).toString(),
+            });
+          });
+        },
+      );
+      req.on("error", reject);
+      req.write(data);
+      req.end();
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+});
+
+describe("POST /api/generate (strict mode)", () => {
+  it("19c. returns 503 in strict mode with no fixtures", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "hello",
+      stream: false,
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("no fixture matched");
+  });
+});
+
+describe("POST /api/generate (no fixture match)", () => {
+  it("19d. returns 404 when no fixture matches", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "nomatch_xyz",
+      stream: false,
+    });
+
+    expect(res.status).toBe(404);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("No fixture matched");
+  });
+});
+
+// ─── Integration tests: POST /api/generate (streaming) ──────────────────────
+
+describe("POST /api/generate (streaming)", () => {
+  it("streams NDJSON with response field", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "hello",
+      // stream omitted — defaults to true
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/x-ndjson");
+
+    const chunks = parseNDJSON(res.body) as Array<{
+      model: string;
+      created_at: string;
+      response: string;
+      done: boolean;
+    }>;
+    expect(chunks.length).toBeGreaterThan(1);
+
+    // Non-final chunks use response field
+    const nonFinal = chunks.slice(0, -1);
+    for (const chunk of nonFinal) {
+      expect(chunk.response).toBeDefined();
+      expect(chunk.done).toBe(false);
+      expect(chunk.created_at).toBeDefined();
+      // Should NOT have message field
+      expect((chunk as Record<string, unknown>).message).toBeUndefined();
+    }
+
+    // Reconstruct text
+    const fullText = nonFinal.map((c) => c.response).join("");
+    expect(fullText).toBe("Hi there!");
+
+    // Final chunk
+    const final = chunks[chunks.length - 1] as Record<string, unknown>;
+    expect(final.done).toBe(true);
+    expect(final.response).toBe("");
+    expect(final.done_reason).toBe("stop");
+    expect(final.context).toEqual([]);
+  });
+
+  it("defaults to streaming when stream field is absent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "hello",
+    });
+
+    expect(res.headers["content-type"]).toBe("application/x-ndjson");
+  });
+});
+
+// ─── Integration tests: GET /api/tags ───────────────────────────────────────
+
+describe("GET /api/tags", () => {
+  it("returns model list from fixtures", async () => {
+    instance = await createServer(allFixtures);
+    const res = await get(`${instance.url}/api/tags`);
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.models).toBeDefined();
+    expect(Array.isArray(body.models)).toBe(true);
+    // modelFixture has model: "llama3", so it should appear
+    const names = body.models.map((m: { name: string }) => m.name);
+    expect(names).toContain("llama3");
+  });
+
+  it("returns default models when no fixture has model match", async () => {
+    const noModelFixtures: Fixture[] = [
+      { match: { userMessage: "hi" }, response: { content: "hello" } },
+    ];
+    instance = await createServer(noModelFixtures);
+    const res = await get(`${instance.url}/api/tags`);
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.models.length).toBeGreaterThan(0);
+    // Default models should include standard ones
+    const names = body.models.map((m: { name: string }) => m.name);
+    expect(names).toContain("gpt-4");
+  });
+});
+
+// ─── Integration tests: journal ─────────────────────────────────────────────
+
+describe("POST /api/chat (journal)", () => {
+  it("records request in the journal", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry!.path).toBe("/api/chat");
+    expect(entry!.response.status).toBe(200);
+    expect(entry!.response.fixture).toBe(textFixture);
+    expect(entry!.body.model).toBe("llama3");
+  });
+});
+
+describe("POST /api/generate (journal)", () => {
+  it("records request in the journal", async () => {
+    instance = await createServer(allFixtures);
+    await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "hello",
+      stream: false,
+    });
+
+    expect(instance.journal.size).toBe(1);
+    const entry = instance.journal.getLast();
+    expect(entry!.path).toBe("/api/generate");
+    expect(entry!.response.status).toBe(200);
+  });
+});
+
+// ─── Integration tests: malformed tool call arguments ───────────────────────
+
+describe("POST /api/chat (malformed tool call arguments)", () => {
+  it("falls back to empty object when arguments is not valid JSON", async () => {
+    const badArgsFixture: Fixture = {
+      match: { userMessage: "bad-args" },
+      response: {
+        toolCalls: [{ name: "fn", arguments: "NOT VALID JSON" }],
+      },
+    };
+    instance = await createServer([badArgsFixture]);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "bad-args" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.message.tool_calls).toHaveLength(1);
+    expect(body.message.tool_calls[0].function.name).toBe("fn");
+    // Malformed JSON falls back to empty object
+    expect(body.message.tool_calls[0].function.arguments).toEqual({});
+  });
+});
+
+// ─── Integration tests: tool call on /api/generate → 500 ───────────────────
+
+describe("POST /api/generate (tool call fixture)", () => {
+  it("returns 500 'unknown type' for tool call fixtures on /api/generate", async () => {
+    const tcFixture: Fixture = {
+      match: { userMessage: "tool-gen" },
+      response: {
+        toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+      },
+    };
+    instance = await createServer([tcFixture]);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "llama3",
+      prompt: "tool-gen",
+      stream: false,
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("did not match any known type");
+  });
+});
+
+// ─── Integration tests: CORS ────────────────────────────────────────────────
+
+describe("POST /api/chat (CORS)", () => {
+  it("includes CORS headers", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(res.headers["access-control-allow-origin"]).toBe("*");
+  });
+});
+
+// ─── Integration tests: strict mode → 503 ──────────────────────────────────
+
+describe("POST /api/chat (strict mode)", () => {
+  it("returns 503 in strict mode with no matching fixture", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("no fixture matched");
+  });
+});
+
+// ─── Integration tests: multiple tool calls ─────────────────────────────────
+
+describe("POST /api/chat (multiple tool calls)", () => {
+  it("returns 2 tool calls in a single non-streaming response", async () => {
+    const multiToolFixture: Fixture = {
+      match: { userMessage: "multi-tool" },
+      response: {
+        toolCalls: [
+          { name: "get_weather", arguments: '{"city":"NYC"}' },
+          { name: "get_time", arguments: '{"tz":"EST"}' },
+        ],
+      },
+    };
+    instance = await createServer([multiToolFixture]);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "multi-tool" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.message.tool_calls).toHaveLength(2);
+    expect(body.message.tool_calls[0].function.name).toBe("get_weather");
+    expect(body.message.tool_calls[0].function.arguments).toEqual({ city: "NYC" });
+    expect(body.message.tool_calls[1].function.name).toBe("get_time");
+    expect(body.message.tool_calls[1].function.arguments).toEqual({ tz: "EST" });
+  });
+});
+
+// ─── Integration tests: error fixture with no explicit status ───────────────
+
+describe("POST /api/chat (error fixture no explicit status)", () => {
+  it("defaults to 500 when error fixture has no status", async () => {
+    const noStatusError: Fixture = {
+      match: { userMessage: "err-no-status" },
+      response: {
+        error: {
+          message: "Something went wrong",
+          type: "server_error",
+        },
+      },
+    };
+    instance = await createServer([noStatusError]);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "err-no-status" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Something went wrong");
+  });
+});
diff --git a/src/__tests__/recorder.test.ts b/src/__tests__/recorder.test.ts
new file mode 100644
index 0000000..499f93d
--- /dev/null
+++ b/src/__tests__/recorder.test.ts
@@ -0,0 +1,2531 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import type { Fixture, FixtureFile } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+import { proxyAndRecord } from "../recorder.js";
+import type { RecordConfig } from "../types.js";
+import { Logger } from "../logger.js";
+import { LLMock } from "../llmock.js";
+import { encodeEventStreamMessage } from "../aws-event-stream.js";
+
+// ---------------------------------------------------------------------------
+// HTTP helpers
+// ---------------------------------------------------------------------------
+
+function post(
+  url: string,
+  body: unknown,
+  headers?: Record<string, string>,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+          ...headers,
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function get(
+  url: string,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname + parsed.search,
+        method: "GET",
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.end();
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Test state
+// ---------------------------------------------------------------------------
+
+let upstream: ServerInstance | undefined;
+let recorder: ServerInstance | undefined;
+let tmpDir: string | undefined;
+
+afterEach(async () => {
+  if (recorder) {
+    await new Promise<void>((resolve) => recorder!.server.close(() => resolve()));
+    recorder = undefined;
+  }
+  if (upstream) {
+    await new Promise<void>((resolve) => upstream!.server.close(() => resolve()));
+    upstream = undefined;
+  }
+  if (tmpDir) {
+    fs.rmSync(tmpDir, { recursive: true, force: true });
+    tmpDir = undefined;
+  }
+});
+
+// ---------------------------------------------------------------------------
+// Unit tests — proxyAndRecord function directly
+// ---------------------------------------------------------------------------
+
+describe("proxyAndRecord", () => {
+  it("returns false when provider is not configured", async () => {
+    const fixtures: Fixture[] = [];
+    const logger = new Logger("silent");
+    const record: RecordConfig = { providers: {} };
+
+    // Create a mock req/res pair — we just need them to exist,
+    // proxyAndRecord should return false before using them
+    const { req, res } = createMockReqRes();
+
+    const result = await proxyAndRecord(
+      req,
+      res,
+      { model: "gpt-4", messages: [{ role: "user", content: "hello" }] },
+      "openai",
+      "/v1/chat/completions",
+      fixtures,
+      { record, logger },
+    );
+
+    expect(result).toBe(false);
+  });
+
+  it("returns false when record config is undefined", async () => {
+    const fixtures: Fixture[] = [];
+    const logger = new Logger("silent");
+
+    const { req, res } = createMockReqRes();
+
+    const result = await proxyAndRecord(
+      req,
+      res,
+      { model: "gpt-4", messages: [{ role: "user", content: "hello" }] },
+      "openai",
+      "/v1/chat/completions",
+      fixtures,
+      { record: undefined, logger },
+    );
+
+    expect(result).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — upstream mock + recording proxy
+// ---------------------------------------------------------------------------
+
+describe("recorder integration", () => {
+  it("proxies unmatched request to upstream and returns correct response", async () => {
+    const { recorderUrl } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+    });
+
+    expect(resp.status).toBe(200);
+    const body = JSON.parse(resp.body);
+    expect(body.choices[0].message.content).toBe("Paris is the capital of France.");
+  });
+
+  it("saves fixture file to disk with correct format", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+    });
+
+    // Check that a fixture file was created
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    // Validate fixture content
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].match.userMessage).toBe("What is the capital of France?");
+    expect((fixtureContent.fixtures[0].response as { content: string }).content).toBe(
+      "Paris is the capital of France.",
+    );
+  });
+
+  it("recorded fixture is reused for subsequent identical requests", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    // First request — proxied
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+    });
+
+    // Second request — should match the recorded fixture
+    const resp2 = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+    });
+
+    expect(resp2.status).toBe(200);
+    const body2 = JSON.parse(resp2.body);
+    expect(body2.choices[0].message.content).toBe("Paris is the capital of France.");
+
+    // Only one fixture file should exist (no second proxy)
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+  });
+
+  it("records journal entry for proxied request", async () => {
+    const { recorderUrl } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+    });
+
+    // Check journal
+    const journalResp = await get(`${recorderUrl}/v1/_requests`);
+    const entries = JSON.parse(journalResp.body);
+    expect(entries.length).toBeGreaterThanOrEqual(1);
+  });
+
+  it("does not save auth headers in fixture file", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    await post(
+      `${recorderUrl}/v1/chat/completions`,
+      {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "What is the capital of France?" }],
+      },
+      { Authorization: "Bearer sk-secret-key-12345" },
+    );
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json"));
+    const content = fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8");
+
+    // The fixture file should not contain any auth headers/secrets
+    expect(content).not.toContain("sk-secret-key-12345");
+    expect(content).not.toContain("Authorization");
+    expect(content).not.toContain("authorization");
+  });
+
+  it("records tool call response from upstream", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "weather" },
+        response: {
+          toolCalls: [{ name: "get_weather", arguments: '{"city":"Paris"}' }],
+        },
+      },
+    ]);
+
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the weather?" }],
+      tools: [{ type: "function", function: { name: "get_weather", parameters: {} } }],
+    });
+
+    expect(resp.status).toBe(200);
+    const body = JSON.parse(resp.body);
+    expect(body.choices[0].message.tool_calls).toBeDefined();
+    expect(body.choices[0].message.tool_calls[0].function.name).toBe("get_weather");
+
+    // Check saved fixture has toolCalls
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as { toolCalls: unknown[] };
+    expect(savedResponse.toolCalls).toBeDefined();
+    expect(savedResponse.toolCalls).toHaveLength(1);
+  });
+
+  it("records embedding response from upstream", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder(
+      [
+        {
+          match: { inputText: "hello world" },
+          response: { embedding: [0.1, 0.2, 0.3] },
+        },
+      ],
+      "openai",
+    );
+
+    const resp = await post(`${recorderUrl}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "hello world",
+    });
+
+    expect(resp.status).toBe(200);
+    const body = JSON.parse(resp.body);
+    expect(body.data[0].embedding).toEqual([0.1, 0.2, 0.3]);
+
+    // Check saved fixture
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as { embedding: number[] };
+    expect(savedResponse.embedding).toEqual([0.1, 0.2, 0.3]);
+  });
+
+  it("records upstream error status as error fixture", async () => {
+    // Upstream with no matching fixture for our request → 404
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "something else entirely" },
+        response: { content: "not what we asked" },
+      },
+    ]);
+
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "unmatched request" }],
+    });
+
+    // The upstream returns 404 (no fixture match), which gets proxied
+    // The recorder should save an error fixture
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as {
+      error: { message: string };
+      status?: number;
+    };
+    expect(savedResponse.error).toBeDefined();
+    expect(savedResponse.status).toBe(404);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — streaming upstream → collapsed fixture
+// ---------------------------------------------------------------------------
+
+describe("recorder streaming collapse", () => {
+  it("collapses OpenAI SSE streaming response to non-streaming fixture", async () => {
+    // Upstream has a fixture; when recorder proxies with stream:true,
+    // upstream returns SSE, recorder should collapse it
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    // Send request with stream: true — upstream llmock will return SSE
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+      stream: true,
+    });
+
+    expect(resp.status).toBe(200);
+    // The recorder relays the raw SSE to the client
+    // But the saved fixture should be collapsed
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures).toHaveLength(1);
+    const savedResponse = fixtureContent.fixtures[0].response as { content: string };
+    expect(savedResponse.content).toBe("Paris is the capital of France.");
+  });
+
+  it("collapsed streaming fixture works on replay (second request matches)", async () => {
+    const { recorderUrl } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "capital of France" },
+        response: { content: "Paris is the capital of France." },
+      },
+    ]);
+
+    // First request — stream:true, proxied to upstream, collapsed on save
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+      stream: true,
+    });
+
+    // Second request — non-streaming, should match the collapsed fixture
+    const resp2 = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the capital of France?" }],
+    });
+
+    expect(resp2.status).toBe(200);
+    const body2 = JSON.parse(resp2.body);
+    expect(body2.choices[0].message.content).toBe("Paris is the capital of France.");
+  });
+
+  it("collapses streaming tool call response to fixture with toolCalls", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "weather" },
+        response: {
+          toolCalls: [{ name: "get_weather", arguments: '{"city":"Paris"}' }],
+        },
+      },
+    ]);
+
+    // Send streaming request
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "What is the weather?" }],
+      stream: true,
+      tools: [{ type: "function", function: { name: "get_weather", parameters: {} } }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    // Check saved fixture has toolCalls (not SSE)
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as { toolCalls: unknown[] };
+    expect(savedResponse.toolCalls).toBeDefined();
+    expect(savedResponse.toolCalls).toHaveLength(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — multi-provider proxy routing
+// ---------------------------------------------------------------------------
+
+describe("recorder multi-provider routing", () => {
+  it("proxies Anthropic messages request to anthropic upstream", async () => {
+    // Upstream for Anthropic
+    const anthropicUpstream = await createServer(
+      [
+        {
+          match: { userMessage: "bonjour" },
+          response: { content: "Salut!" },
+        },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+
+    recorder = await createServer([], {
+      port: 0,
+      record: {
+        providers: { anthropic: anthropicUpstream.url },
+        fixturePath: tmpDir,
+      },
+    });
+
+    const resp = await post(`${recorder.url}/v1/messages`, {
+      model: "claude-3-sonnet",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "bonjour" }],
+    });
+
+    expect(resp.status).toBe(200);
+    // Anthropic handler translates to/from Claude format; the upstream
+    // is another llmock so it returns OpenAI format which gets proxied raw
+    const body = JSON.parse(resp.body);
+    // The proxied response should contain content
+    expect(body).toBeDefined();
+
+    // Fixture file created on disk
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles.length).toBeGreaterThanOrEqual(1);
+
+    // Clean up the extra upstream
+    await new Promise<void>((resolve) => anthropicUpstream.server.close(() => resolve()));
+  });
+
+  it("unconfigured provider returns 404 (no proxy)", async () => {
+    // Only openai provider configured, not gemini
+    const { recorderUrl } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "test" },
+        response: { content: "ok" },
+      },
+    ]);
+
+    // Send a Gemini-format request — no upstream configured for gemini
+    const resp = await post(`${recorderUrl}/v1beta/models/gemini-pro:generateContent`, {
+      contents: [{ parts: [{ text: "hello gemini" }], role: "user" }],
+    });
+
+    // Should get 404 — no fixture and no gemini upstream
+    expect(resp.status).toBe(404);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — strict mode
+// ---------------------------------------------------------------------------
+
+describe("recorder strict mode", () => {
+  it("strict mode without recording: unmatched request returns 503 with error logged", async () => {
+    recorder = await createServer([], {
+      port: 0,
+      strict: true,
+      logLevel: "debug",
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "no fixture here" }],
+    });
+
+    expect(resp.status).toBe(503);
+    const body = JSON.parse(resp.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+  });
+
+  it("record + strict: proxy succeeds when upstream is available", async () => {
+    await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "hello" },
+        response: { content: "world" },
+      },
+    ]);
+
+    // Override to also set strict on the recorder
+    // Need to create a new recorder with both record + strict
+    await new Promise<void>((resolve) => recorder!.server.close(() => resolve()));
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      strict: true,
+      record: { providers: { openai: upstream!.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    expect(resp.status).toBe(200);
+    const body = JSON.parse(resp.body);
+    expect(body.choices[0].message.content).toBe("world");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — enableRecording / disableRecording on LLMock
+// ---------------------------------------------------------------------------
+
+describe("LLMock enableRecording / disableRecording", () => {
+  let mock: LLMock;
+  let upstreamServer: ServerInstance;
+
+  afterEach(async () => {
+    if (mock) {
+      try {
+        await mock.stop();
+      } catch {
+        // ignore if not started
+      }
+    }
+    if (upstreamServer) {
+      await new Promise<void>((resolve) => upstreamServer.server.close(() => resolve()));
+    }
+    if (tmpDir) {
+      fs.rmSync(tmpDir, { recursive: true, force: true });
+      tmpDir = undefined;
+    }
+  });
+
+  it("enableRecording allows proxying; disableRecording returns to 404", async () => {
+    // Set up upstream
+    upstreamServer = await createServer(
+      [
+        {
+          match: { userMessage: "hello" },
+          response: { content: "from upstream" },
+        },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+
+    mock = new LLMock();
+    const url = await mock.start();
+
+    // Without recording: request gets 404
+    const resp1 = await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "hello" }],
+    });
+    expect(resp1.status).toBe(404);
+
+    // Enable recording
+    mock.enableRecording({
+      providers: { openai: upstreamServer.url },
+      fixturePath: tmpDir,
+    });
+
+    // Now request should proxy to upstream
+    const resp2 = await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "hello" }],
+    });
+    expect(resp2.status).toBe(200);
+    const body2 = JSON.parse(resp2.body);
+    expect(body2.choices[0].message.content).toBe("from upstream");
+
+    // Disable recording
+    mock.disableRecording();
+
+    // Recorded fixture should still work (it was added to memory)
+    const resp3 = await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "hello" }],
+    });
+    expect(resp3.status).toBe(200);
+    const body3 = JSON.parse(resp3.body);
+    expect(body3.choices[0].message.content).toBe("from upstream");
+
+    // A different message should 404 (no recording, no fixture)
+    const resp4 = await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "something else" }],
+    });
+    expect(resp4.status).toBe(404);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — multi-provider recording (Gemini, Ollama, Cohere, Bedrock, Vertex AI)
+// ---------------------------------------------------------------------------
+
+describe("recorder multi-provider recording", () => {
+  let servers: http.Server[] = [];
+
+  afterEach(async () => {
+    for (const s of servers) {
+      await new Promise<void>((resolve) => s.close(() => resolve()));
+    }
+    servers = [];
+  });
+
+  function trackServer(si: ServerInstance): ServerInstance {
+    servers.push(si.server);
+    return si;
+  }
+
+  it("records Gemini generateContent request through full proxy", async () => {
+    const geminiUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "test gemini" }, response: { content: "Gemini says hello" } }],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { gemini: geminiUpstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ parts: [{ text: "test gemini" }], role: "user" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    // Fixture file saved with gemini prefix
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.startsWith("gemini-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].match.userMessage).toBe("test gemini");
+  });
+
+  it("records Ollama /api/chat request through full proxy", async () => {
+    const ollamaUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "test ollama" }, response: { content: "Ollama says hello" } }],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { ollama: ollamaUpstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "test ollama" }],
+      stream: false,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.startsWith("ollama-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures[0].match.userMessage).toBe("test ollama");
+  });
+
+  it("records Cohere /v2/chat request through full proxy", async () => {
+    const cohereUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "test cohere" }, response: { content: "Cohere says hello" } }],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { cohere: cohereUpstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "test cohere" }],
+      stream: false,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.startsWith("cohere-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures[0].match.userMessage).toBe("test cohere");
+  });
+
+  it("records Bedrock /model/{id}/invoke request through full proxy", async () => {
+    const bedrockUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "test bedrock" }, response: { content: "Bedrock says hello" } }],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { bedrock: bedrockUpstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/model/claude-v3/invoke`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "test bedrock" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.startsWith("bedrock-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures[0].match.userMessage).toBe("test bedrock");
+  });
+
+  it("records Vertex AI request through vertexai provider key", async () => {
+    // Vertex AI now uses "vertexai" as the provider key
+    const vertexUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "test vertex" }, response: { content: "Vertex says hello" } }],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { vertexai: vertexUpstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(
+      `${recorder.url}/v1/projects/my-project/locations/us-central1/publishers/google/models/gemini-2.0-flash:generateContent`,
+      { contents: [{ parts: [{ text: "test vertex" }], role: "user" }] },
+    );
+
+    expect(resp.status).toBe(200);
+
+    // Uses vertexai prefix (separate provider key from gemini)
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.startsWith("vertexai-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+  });
+
+  it("records Anthropic streaming request through handleMessages", async () => {
+    const anthropicUpstream = trackServer(
+      await createServer(
+        [
+          {
+            match: { userMessage: "stream anthropic" },
+            response: { content: "Anthropic streamed" },
+          },
+        ],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { anthropic: anthropicUpstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/messages`, {
+      model: "claude-3-sonnet",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "stream anthropic" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.startsWith("anthropic-") && f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+  });
+
+  it("records multiple providers simultaneously", async () => {
+    const openaiUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "multi openai" }, response: { content: "OpenAI multi" } }],
+        { port: 0 },
+      ),
+    );
+    const geminiUpstream = trackServer(
+      await createServer(
+        [{ match: { userMessage: "multi gemini" }, response: { content: "Gemini multi" } }],
+        { port: 0 },
+      ),
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: {
+        providers: { openai: openaiUpstream.url, gemini: geminiUpstream.url },
+        fixturePath: tmpDir,
+      },
+    });
+
+    // OpenAI request
+    const resp1 = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "multi openai" }],
+    });
+    expect(resp1.status).toBe(200);
+
+    // Gemini request
+    const resp2 = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ parts: [{ text: "multi gemini" }], role: "user" }],
+    });
+    expect(resp2.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const openaiFixtures = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json"));
+    const geminiFixtures = files.filter((f) => f.startsWith("gemini-") && f.endsWith(".json"));
+    expect(openaiFixtures).toHaveLength(1);
+    expect(geminiFixtures).toHaveLength(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — streaming recording through full server
+// ---------------------------------------------------------------------------
+
+describe("recorder streaming through full server", () => {
+  it("OpenAI streaming request collapses and saves fixture with correct content", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "stream test" },
+        response: { content: "Streamed content from upstream" },
+      },
+    ]);
+
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "stream test" }],
+      stream: true,
+    });
+
+    expect(resp.status).toBe(200);
+    // SSE data relayed to client
+    expect(resp.body).toContain("data:");
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as { content: string };
+    expect(savedResponse.content).toBe("Streamed content from upstream");
+  });
+
+  it("streaming tool call recording preserves toolCalls in fixture", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "stream tools" },
+        response: {
+          toolCalls: [{ name: "search", arguments: '{"query":"test"}' }],
+        },
+      },
+    ]);
+
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "stream tools" }],
+      stream: true,
+      tools: [{ type: "function", function: { name: "search", parameters: {} } }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as {
+      toolCalls: Array<{ name: string; arguments: string }>;
+    };
+    expect(savedResponse.toolCalls).toBeDefined();
+    expect(savedResponse.toolCalls).toHaveLength(1);
+    expect(savedResponse.toolCalls[0].name).toBe("search");
+    expect(savedResponse.toolCalls[0].arguments).toBe('{"query":"test"}');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// End-to-end replay verification
+// ---------------------------------------------------------------------------
+
+describe("recorder end-to-end replay", () => {
+  it("record → verify fixture on disk → replay from fixture (not proxy)", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "replay test" },
+        response: { content: "Replay this content" },
+      },
+    ]);
+
+    // First request — proxied to upstream
+    const resp1 = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "replay test" }],
+    });
+    expect(resp1.status).toBe(200);
+
+    // Verify fixture file on disk
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures[0].match.userMessage).toBe("replay test");
+    expect((fixtureContent.fixtures[0].response as { content: string }).content).toBe(
+      "Replay this content",
+    );
+
+    // Clear journal to distinguish proxy vs fixture-match
+    await fetch(`${recorderUrl}/v1/_requests`, { method: "DELETE" });
+
+    // Second request — should match recorded fixture
+    const resp2 = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "replay test" }],
+    });
+    expect(resp2.status).toBe(200);
+    const body2 = JSON.parse(resp2.body);
+    expect(body2.choices[0].message.content).toBe("Replay this content");
+
+    // Journal should show the request was served with a fixture match (not null)
+    const journalResp = await get(`${recorderUrl}/v1/_requests`);
+    const entries = JSON.parse(journalResp.body);
+    expect(entries).toHaveLength(1);
+    expect(entries[0].response.fixture).not.toBeNull();
+
+    // Still only one fixture file (no second proxy)
+    const files2 = fs.readdirSync(fixturePath);
+    const fixtureFiles2 = files2.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles2).toHaveLength(1);
+  });
+
+  it("record tool call → replay → toolCalls match", async () => {
+    const { recorderUrl } = await setupUpstreamAndRecorder([
+      {
+        match: { userMessage: "tool replay" },
+        response: {
+          toolCalls: [{ name: "get_weather", arguments: '{"city":"NYC"}' }],
+        },
+      },
+    ]);
+
+    // Record
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "tool replay" }],
+      tools: [{ type: "function", function: { name: "get_weather", parameters: {} } }],
+    });
+
+    // Replay
+    const resp2 = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "tool replay" }],
+      tools: [{ type: "function", function: { name: "get_weather", parameters: {} } }],
+    });
+    expect(resp2.status).toBe(200);
+    const body2 = JSON.parse(resp2.body);
+    expect(body2.choices[0].message.tool_calls).toBeDefined();
+    expect(body2.choices[0].message.tool_calls[0].function.name).toBe("get_weather");
+    expect(body2.choices[0].message.tool_calls[0].function.arguments).toBe('{"city":"NYC"}');
+  });
+
+  it("record embedding → replay → embedding vector matches", async () => {
+    const { recorderUrl } = await setupUpstreamAndRecorder(
+      [{ match: { inputText: "embed replay" }, response: { embedding: [0.5, 0.6, 0.7] } }],
+      "openai",
+    );
+
+    // Record
+    await post(`${recorderUrl}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "embed replay",
+    });
+
+    // Replay
+    const resp2 = await post(`${recorderUrl}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "embed replay",
+    });
+    expect(resp2.status).toBe(200);
+    const body2 = JSON.parse(resp2.body);
+    expect(body2.data[0].embedding).toEqual([0.5, 0.6, 0.7]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Edge cases
+// ---------------------------------------------------------------------------
+
+describe("recorder edge cases", () => {
+  it("upstream 500 error recorded as error fixture and replayed", async () => {
+    // Upstream returns error for any request
+    upstream = await createServer(
+      [
+        {
+          match: { userMessage: "trigger error" },
+          response: {
+            error: { message: "Internal server error", type: "server_error" },
+            status: 500,
+          },
+        },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstream.url }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "trigger error" }],
+    });
+
+    expect(resp.status).toBe(500);
+
+    // Fixture file created with error response
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as {
+      error: { message: string };
+      status?: number;
+    };
+    expect(savedResponse.error).toBeDefined();
+    expect(savedResponse.status).toBe(500);
+
+    // Replay: second identical request matches the recorded error fixture
+    const resp2 = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "trigger error" }],
+    });
+    expect(resp2.status).toBe(500);
+  });
+
+  it("empty match _warning field assertion: present in saved file, NOT in memory", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        // Upstream matches everything via predicate
+        match: { predicate: () => true },
+        response: { content: "empty match response" },
+      },
+    ]);
+
+    // Send a request with only a system message (no user message → empty match)
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "system", content: "You are a helpful assistant" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    // Saved file should have _warning field
+    const fileContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    );
+    expect(fileContent._warning).toBeDefined();
+    expect(fileContent._warning).toContain("Empty match");
+
+    // In-memory fixtures should NOT have been augmented (empty match skipped)
+    // Send same request again — it should proxy again (not match from memory)
+    const resp2 = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "system", content: "You are a helpful assistant" }],
+    });
+    // Should still return 200 (proxied again since empty match wasn't added to memory)
+    expect(resp2.status).toBe(200);
+
+    // Now TWO fixture files on disk (proxied twice)
+    const files2 = fs.readdirSync(fixturePath);
+    const fixtureFiles2 = files2.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles2).toHaveLength(2);
+  });
+
+  it("default fixturePath: omit fixturePath from config, verify default path used", async () => {
+    upstream = await createServer(
+      [{ match: { userMessage: "default path" }, response: { content: "default path response" } }],
+      { port: 0 },
+    );
+
+    // Create recorder with no fixturePath — should default to "./fixtures/recorded"
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstream.url } },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "default path" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    // Check the default path
+    const defaultPath = path.resolve("./fixtures/recorded");
+    expect(fs.existsSync(defaultPath)).toBe(true);
+    const files = fs.readdirSync(defaultPath);
+    const fixtureFiles = files.filter((f) => f.startsWith("openai-") && f.endsWith(".json"));
+    expect(fixtureFiles.length).toBeGreaterThanOrEqual(1);
+
+    // Clean up the default path files we just created
+    for (const f of fixtureFiles) {
+      fs.unlinkSync(path.join(defaultPath, f));
+    }
+    // Remove dir if empty
+    try {
+      fs.rmdirSync(defaultPath);
+    } catch {
+      // ignore — might not be empty if other tests ran
+    }
+  });
+
+  it("request with system-only messages (no user message) derives empty match", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      {
+        // Upstream matches everything via predicate
+        match: { predicate: () => true },
+        response: { content: "system only response" },
+      },
+    ]);
+
+    const resp = await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "system", content: "You are a helpful assistant" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    // The match should have no userMessage (no user message in request)
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures[0].match.userMessage).toBeUndefined();
+  });
+
+  it("recording path created automatically (mkdirSync recursive)", async () => {
+    upstream = await createServer(
+      [{ match: { userMessage: "auto dir" }, response: { content: "dir created" } }],
+      { port: 0 },
+    );
+
+    // Use a nested path that doesn't exist
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    const nestedPath = path.join(tmpDir, "nested", "deep", "fixtures");
+
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstream.url }, fixturePath: nestedPath },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "auto dir" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    // Nested directory was created
+    expect(fs.existsSync(nestedPath)).toBe(true);
+    const files = fs.readdirSync(nestedPath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+  });
+
+  it("fixture file naming follows {provider}-{ISO-timestamp}.json format", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      { match: { userMessage: "naming test" }, response: { content: "named" } },
+    ]);
+
+    await post(`${recorderUrl}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "naming test" }],
+    });
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    // Pattern: openai-YYYY-MM-DDTHH-MM-SS-mmmZ-{counter}.json (colons and dots replaced with dashes)
+    const pattern = /^openai-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-\d{3}Z-\d+\.json$/;
+    expect(fixtureFiles[0]).toMatch(pattern);
+  });
+
+  it("upstream returns empty response body — handled gracefully", async () => {
+    // Create a raw HTTP server that returns 200 with empty body
+    const emptyServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end("");
+    });
+    await new Promise<void>((resolve) => emptyServer.listen(0, "127.0.0.1", resolve));
+    const emptyAddr = emptyServer.address() as { port: number };
+    const emptyUrl = `http://127.0.0.1:${emptyAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: emptyUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "empty body test" }],
+    });
+
+    // Should not crash — returns the upstream status
+    expect(resp.status).toBe(200);
+
+    // Fixture file should still be created (with error/fallback response)
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    await new Promise<void>((resolve) => emptyServer.close(() => resolve()));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Strict mode thorough tests
+// ---------------------------------------------------------------------------
+
+describe("recorder strict mode thorough", () => {
+  it("strict mode + recording but provider not configured: 503 returned", async () => {
+    // Only anthropic configured, but request goes to openai endpoint
+    const anthropicUpstream = await createServer(
+      [{ match: { userMessage: "strict test" }, response: { content: "ok" } }],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      strict: true,
+      record: { providers: { anthropic: anthropicUpstream.url }, fixturePath: tmpDir },
+    });
+
+    // OpenAI endpoint — no openai provider configured
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "strict test" }],
+    });
+
+    expect(resp.status).toBe(503);
+    const body = JSON.parse(resp.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+
+    await new Promise<void>((resolve) => anthropicUpstream.server.close(() => resolve()));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// enableRecording / disableRecording lifecycle (extended)
+// ---------------------------------------------------------------------------
+
+describe("LLMock enableRecording / disableRecording lifecycle", () => {
+  let mock: LLMock;
+  let upstreamServer: ServerInstance;
+
+  afterEach(async () => {
+    if (mock) {
+      try {
+        await mock.stop();
+      } catch {
+        // ignore
+      }
+    }
+    if (upstreamServer) {
+      await new Promise<void>((resolve) => upstreamServer.server.close(() => resolve()));
+    }
+    if (tmpDir) {
+      fs.rmSync(tmpDir, { recursive: true, force: true });
+      tmpDir = undefined;
+    }
+  });
+
+  it("recorded fixtures persist on disk after disableRecording", async () => {
+    upstreamServer = await createServer(
+      [{ match: { userMessage: "persist test" }, response: { content: "persisted" } }],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    mock = new LLMock();
+    const url = await mock.start();
+
+    mock.enableRecording({
+      providers: { openai: upstreamServer.url },
+      fixturePath: tmpDir,
+    });
+
+    await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "persist test" }],
+    });
+
+    mock.disableRecording();
+
+    // Fixture files still on disk
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    // And the fixture is usable — request still matches from in-memory fixture
+    const resp = await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "persist test" }],
+    });
+    expect(resp.status).toBe(200);
+    const body = JSON.parse(resp.body);
+    expect(body.choices[0].message.content).toBe("persisted");
+  });
+
+  it("re-enable recording after disable works for new requests", async () => {
+    upstreamServer = await createServer(
+      [
+        { match: { userMessage: "first" }, response: { content: "first response" } },
+        { match: { userMessage: "second" }, response: { content: "second response" } },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    mock = new LLMock();
+    const url = await mock.start();
+
+    // First recording session
+    mock.enableRecording({
+      providers: { openai: upstreamServer.url },
+      fixturePath: tmpDir,
+    });
+    await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "first" }],
+    });
+    mock.disableRecording();
+
+    // Second recording session
+    mock.enableRecording({
+      providers: { openai: upstreamServer.url },
+      fixturePath: tmpDir,
+    });
+    const resp = await post(`${url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "second" }],
+    });
+    expect(resp.status).toBe(200);
+    mock.disableRecording();
+
+    // Both fixtures on disk
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(2);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Auth header tests (extended)
+// ---------------------------------------------------------------------------
+
+describe("recorder auth header handling", () => {
+  it("x-api-key (Anthropic) forwarded to upstream but not saved in fixture", async () => {
+    const anthropicUpstream = await createServer(
+      [{ match: { userMessage: "api key test" }, response: { content: "ok" } }],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { anthropic: anthropicUpstream.url }, fixturePath: tmpDir },
+    });
+
+    await post(
+      `${recorder.url}/v1/messages`,
+      {
+        model: "claude-3-sonnet",
+        max_tokens: 100,
+        messages: [{ role: "user", content: "api key test" }],
+      },
+      { "x-api-key": "sk-ant-secret-123" },
+    );
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles.length).toBeGreaterThanOrEqual(1);
+
+    const content = fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8");
+    expect(content).not.toContain("sk-ant-secret-123");
+    expect(content).not.toContain("x-api-key");
+
+    await new Promise<void>((resolve) => anthropicUpstream.server.close(() => resolve()));
+  });
+
+  it("multiple auth header types all absent from fixture", async () => {
+    const { recorderUrl, fixturePath } = await setupUpstreamAndRecorder([
+      { match: { userMessage: "multi auth" }, response: { content: "multi auth ok" } },
+    ]);
+
+    await post(
+      `${recorderUrl}/v1/chat/completions`,
+      {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "multi auth" }],
+      },
+      {
+        Authorization: "Bearer sk-openai-secret",
+        "x-api-key": "sk-anthropic-secret",
+        "api-key": "azure-secret-key",
+      },
+    );
+
+    const files = fs.readdirSync(fixturePath);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    const content = fs.readFileSync(path.join(fixturePath, fixtureFiles[0]), "utf-8");
+
+    expect(content).not.toContain("sk-openai-secret");
+    expect(content).not.toContain("sk-anthropic-secret");
+    expect(content).not.toContain("azure-secret-key");
+    expect(content).not.toContain("Authorization");
+    expect(content).not.toContain("authorization");
+    expect(content).not.toContain("x-api-key");
+    expect(content).not.toContain("api-key");
+  });
+
+  it("custom non-auth headers from client are NOT forwarded to upstream", async () => {
+    // We'll verify by checking that the upstream doesn't receive custom headers.
+    // Create a raw upstream that echoes back received headers.
+    let receivedHeaders: http.IncomingHttpHeaders = {};
+    const echoServer = http.createServer((req, res) => {
+      receivedHeaders = req.headers;
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          choices: [{ message: { role: "assistant", content: "echo" }, index: 0 }],
+          model: "gpt-4",
+        }),
+      );
+    });
+    await new Promise<void>((resolve) => echoServer.listen(0, "127.0.0.1", resolve));
+    const echoAddr = echoServer.address() as { port: number };
+    const echoUrl = `http://127.0.0.1:${echoAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: echoUrl }, fixturePath: tmpDir },
+    });
+
+    await post(
+      `${recorder.url}/v1/chat/completions`,
+      {
+        model: "gpt-4",
+        messages: [{ role: "user", content: "header test" }],
+      },
+      {
+        Authorization: "Bearer sk-test",
+        "X-Custom-Header": "should-not-forward",
+        "X-Request-Id": "req-123",
+      },
+    );
+
+    // Authorization is forwarded, custom headers are not
+    expect(receivedHeaders["authorization"]).toBe("Bearer sk-test");
+    expect(receivedHeaders["x-custom-header"]).toBeUndefined();
+    expect(receivedHeaders["x-request-id"]).toBeUndefined();
+
+    await new Promise<void>((resolve) => echoServer.close(() => resolve()));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Upstream connection failure → 502
+// ---------------------------------------------------------------------------
+
+describe("recorder upstream connection failure", () => {
+  it("returns 502 when upstream is unreachable", async () => {
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: {
+        providers: { openai: "http://127.0.0.1:1" },
+        fixturePath: tmpDir,
+      },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "unreachable upstream" }],
+    });
+
+    expect(resp.status).toBe(502);
+    const body = JSON.parse(resp.body);
+    expect(body.error.type).toBe("proxy_error");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Filesystem write failure — response still relayed
+// ---------------------------------------------------------------------------
+
+describe("recorder filesystem write failure", () => {
+  it("relays response to client even when fixture write fails", async () => {
+    upstream = await createServer(
+      [{ match: { userMessage: "fs fail" }, response: { content: "still works" } }],
+      { port: 0 },
+    );
+
+    // Use a path that cannot be a directory (a regular file)
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    const blockedPath = path.join(tmpDir, "blocked");
+    fs.writeFileSync(blockedPath, "i am a file not a directory");
+
+    recorder = await createServer([], {
+      port: 0,
+      logLevel: "silent",
+      record: {
+        providers: { openai: upstream.url },
+        fixturePath: blockedPath,
+      },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "fs fail" }],
+    });
+
+    // Response still relayed to client
+    expect(resp.status).toBe(200);
+    const body = JSON.parse(resp.body);
+    expect(body.choices[0].message.content).toBe("still works");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildFixtureResponse for non-OpenAI formats
+// ---------------------------------------------------------------------------
+
+describe("recorder buildFixtureResponse non-OpenAI formats", () => {
+  let servers: http.Server[] = [];
+
+  afterEach(async () => {
+    for (const s of servers) {
+      await new Promise<void>((resolve) => s.close(() => resolve()));
+    }
+    servers = [];
+  });
+
+  function createRawUpstream(responseBody: object): Promise<{ url: string; server: http.Server }> {
+    return new Promise((resolve) => {
+      const srv = http.createServer((_req, res) => {
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify(responseBody));
+      });
+      srv.listen(0, "127.0.0.1", () => {
+        const addr = srv.address() as { port: number };
+        servers.push(srv);
+        resolve({ url: `http://127.0.0.1:${addr.port}`, server: srv });
+      });
+    });
+  }
+
+  it("records Anthropic format (content array with type/text)", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      id: "msg_123",
+      type: "message",
+      role: "assistant",
+      content: [{ type: "text", text: "Bonjour from Anthropic" }],
+      stop_reason: "end_turn",
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { anthropic: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/messages`, {
+      model: "claude-3-sonnet",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "hello anthropic" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Bonjour from Anthropic");
+  });
+
+  it("records Gemini format (candidates array)", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      candidates: [
+        {
+          content: { role: "model", parts: [{ text: "Hello from Gemini" }] },
+          finishReason: "STOP",
+        },
+      ],
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { gemini: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ parts: [{ text: "hello gemini" }], role: "user" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Hello from Gemini");
+  });
+
+  it("records Ollama format (message object)", async () => {
+    const { url: upstreamUrl } = await createRawUpstream({
+      model: "llama3",
+      message: { role: "assistant", content: "Hello from Ollama" },
+      done: true,
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { ollama: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello ollama" }],
+      stream: false,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Hello from Ollama");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Content + toolCalls coexistence
+// ---------------------------------------------------------------------------
+
+describe("recorder content + toolCalls coexistence", () => {
+  it("saves toolCalls when both content and tool_calls are in OpenAI response", async () => {
+    // Create raw upstream returning both content and tool_calls
+    const rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          id: "chatcmpl-coexist",
+          choices: [
+            {
+              index: 0,
+              message: {
+                role: "assistant",
+                content: "I'll look that up for you.",
+                tool_calls: [
+                  {
+                    id: "call_coex",
+                    type: "function",
+                    function: { name: "search", arguments: '{"q":"test"}' },
+                  },
+                ],
+              },
+            },
+          ],
+          model: "gpt-4",
+        }),
+      );
+    });
+    await new Promise<void>((resolve) => rawServer.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: rawUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "coexist test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: { content?: string; toolCalls?: Array<{ name: string; arguments: string }> };
+      }>;
+    };
+    // toolCalls should win
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("search");
+
+    await new Promise<void>((resolve) => rawServer.close(() => resolve()));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Non-OpenAI streaming through recorder
+// ---------------------------------------------------------------------------
+
+describe("recorder non-OpenAI streaming", () => {
+  it("collapses Anthropic SSE streaming to fixture content", async () => {
+    // Create a raw upstream that returns Anthropic SSE format
+    const rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "text/event-stream" });
+      res.write(
+        `event: message_start\ndata: ${JSON.stringify({ type: "message_start", message: { id: "msg_s", role: "assistant" } })}\n\n`,
+      );
+      res.write(
+        `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Streamed " } })}\n\n`,
+      );
+      res.write(
+        `event: content_block_delta\ndata: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Anthropic" } })}\n\n`,
+      );
+      res.write(`event: message_stop\ndata: ${JSON.stringify({ type: "message_stop" })}\n\n`);
+      res.end();
+    });
+    await new Promise<void>((resolve) => rawServer.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { anthropic: rawUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/messages`, {
+      model: "claude-3-sonnet",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "stream anthropic test" }],
+      stream: true,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Streamed Anthropic");
+
+    await new Promise<void>((resolve) => rawServer.close(() => resolve()));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Integration tests — streaming through recorder: Gemini SSE + Ollama NDJSON
+// ---------------------------------------------------------------------------
+
+describe("recorder streaming collapse: Gemini SSE", () => {
+  it("collapses Gemini SSE streaming response to non-streaming fixture", async () => {
+    // Create upstream with gemini provider
+    upstream = await createServer(
+      [
+        {
+          match: { userMessage: "hello gemini" },
+          response: { content: "Gemini says hello back." },
+        },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { gemini: upstream.url }, fixturePath: tmpDir },
+    });
+
+    // Send streaming Gemini request
+    const resp = await post(
+      `${recorder.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`,
+      {
+        contents: [{ parts: [{ text: "hello gemini" }], role: "user" }],
+      },
+    );
+
+    expect(resp.status).toBe(200);
+
+    // Check saved fixture
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures).toHaveLength(1);
+    const savedResponse = fixtureContent.fixtures[0].response as { content: string };
+    expect(savedResponse.content).toBe("Gemini says hello back.");
+  });
+});
+
+describe("recorder streaming collapse: Cohere SSE", () => {
+  it("collapses Cohere SSE streaming response to non-streaming fixture", async () => {
+    upstream = await createServer(
+      [
+        {
+          match: { userMessage: "hello cohere" },
+          response: { content: "Cohere says hello." },
+        },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { cohere: upstream.url }, fixturePath: tmpDir },
+    });
+
+    // Send streaming Cohere request
+    const resp = await post(`${recorder.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "hello cohere" }],
+      stream: true,
+    });
+
+    expect(resp.status).toBe(200);
+
+    // Check saved fixture
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures).toHaveLength(1);
+    const savedResponse = fixtureContent.fixtures[0].response as { content: string };
+    expect(savedResponse.content).toBe("Cohere says hello.");
+  });
+});
+
+describe("recorder streaming collapse: Ollama NDJSON", () => {
+  it("collapses Ollama NDJSON streaming response to non-streaming fixture", async () => {
+    upstream = await createServer(
+      [
+        {
+          match: { userMessage: "hello ollama" },
+          response: { content: "Ollama says hi." },
+        },
+      ],
+      { port: 0 },
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { ollama: upstream.url }, fixturePath: tmpDir },
+    });
+
+    // Send streaming Ollama request (stream defaults to true)
+    const resp = await post(`${recorder.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "hello ollama" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    // Check saved fixture
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    expect(fixtureContent.fixtures).toHaveLength(1);
+    const savedResponse = fixtureContent.fixtures[0].response as { content: string };
+    expect(savedResponse.content).toBe("Ollama says hi.");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildFixtureResponse format detection
+// ---------------------------------------------------------------------------
+
+describe("buildFixtureResponse format detection", () => {
+  let servers: http.Server[] = [];
+
+  afterEach(async () => {
+    for (const s of servers) {
+      await new Promise<void>((resolve) => s.close(() => resolve()));
+    }
+    servers = [];
+  });
+
+  function createRawUpstreamWithStatus(
+    responseBody: object | string,
+    status: number = 200,
+    contentType: string = "application/json",
+  ): Promise<{ url: string; server: http.Server }> {
+    return new Promise((resolve) => {
+      const srv = http.createServer((_req, res) => {
+        res.writeHead(status, { "Content-Type": contentType });
+        res.end(typeof responseBody === "string" ? responseBody : JSON.stringify(responseBody));
+      });
+      srv.listen(0, "127.0.0.1", () => {
+        const addr = srv.address() as { port: number };
+        servers.push(srv);
+        resolve({ url: `http://127.0.0.1:${addr.port}`, server: srv });
+      });
+    });
+  }
+
+  it("detects Anthropic tool_use format and saves toolCalls", async () => {
+    const { url: upstreamUrl } = await createRawUpstreamWithStatus({
+      content: [
+        {
+          type: "tool_use",
+          id: "toolu_123",
+          name: "get_weather",
+          input: { city: "SF" },
+        },
+      ],
+      role: "assistant",
+      stop_reason: "tool_use",
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { anthropic: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/messages`, {
+      model: "claude-3-sonnet",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "tool use format test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: {
+          content?: string;
+          toolCalls?: Array<{ name: string; arguments: string }>;
+        };
+      }>;
+    };
+    // Should be toolCalls, NOT content
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("get_weather");
+    expect(JSON.parse(fixtureContent.fixtures[0].response.toolCalls![0].arguments)).toEqual({
+      city: "SF",
+    });
+    expect(fixtureContent.fixtures[0].response.content).toBeUndefined();
+  });
+
+  it("detects Gemini functionCall format and saves toolCalls", async () => {
+    const { url: upstreamUrl } = await createRawUpstreamWithStatus({
+      candidates: [
+        {
+          content: {
+            parts: [
+              {
+                functionCall: {
+                  name: "get_weather",
+                  args: { city: "SF" },
+                },
+              },
+            ],
+          },
+        },
+      ],
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { gemini: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1beta/models/gemini-2.0-flash:generateContent`, {
+      contents: [{ parts: [{ text: "gemini tool call test" }], role: "user" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: {
+          content?: string;
+          toolCalls?: Array<{ name: string; arguments: string }>;
+        };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("get_weather");
+    expect(JSON.parse(fixtureContent.fixtures[0].response.toolCalls![0].arguments)).toEqual({
+      city: "SF",
+    });
+    expect(fixtureContent.fixtures[0].response.content).toBeUndefined();
+  });
+
+  it("unknown format falls back to error response", async () => {
+    const { url: upstreamUrl } = await createRawUpstreamWithStatus({
+      custom: "data",
+      status: "ok",
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "unknown format test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: {
+          error?: { message: string; type: string };
+        };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.error).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.error!.message).toContain(
+      "Could not detect response format",
+    );
+    expect(fixtureContent.fixtures[0].response.error!.type).toBe("proxy_error");
+  });
+
+  it("detects direct embedding format (top-level embedding array)", async () => {
+    const { url: upstreamUrl } = await createRawUpstreamWithStatus({
+      embedding: [0.1, 0.2, 0.3],
+    });
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/embeddings`, {
+      model: "text-embedding-3-small",
+      input: "direct embedding test",
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: { embedding?: number[] };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.embedding).toEqual([0.1, 0.2, 0.3]);
+  });
+
+  it("preserves error code field from upstream error response", async () => {
+    const { url: upstreamUrl } = await createRawUpstreamWithStatus(
+      {
+        error: {
+          message: "Rate limited",
+          type: "rate_limit_error",
+          code: "rate_limit",
+        },
+      },
+      429,
+    );
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: upstreamUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "rate limit test" }],
+    });
+
+    expect(resp.status).toBe(429);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: {
+          error?: { message: string; type: string; code?: string };
+          status?: number;
+        };
+      }>;
+    };
+    expect(fixtureContent.fixtures[0].response.error).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.error!.message).toBe("Rate limited");
+    expect(fixtureContent.fixtures[0].response.error!.type).toBe("rate_limit_error");
+    expect(fixtureContent.fixtures[0].response.error!.code).toBe("rate_limit");
+    expect(fixtureContent.fixtures[0].response.status).toBe(429);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Bedrock EventStream binary through recorder
+// ---------------------------------------------------------------------------
+
+describe("recorder Bedrock EventStream binary", () => {
+  it("collapses Bedrock binary EventStream to text fixture", async () => {
+    // Create a raw upstream returning application/vnd.amazon.eventstream binary
+    const rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/vnd.amazon.eventstream" });
+
+      // Write binary EventStream frames using encodeEventStreamMessage
+      const frame1 = encodeEventStreamMessage("contentBlockDelta", {
+        contentBlockDelta: {
+          delta: { text: "Hello " },
+          contentBlockIndex: 0,
+        },
+        contentBlockIndex: 0,
+      });
+      const frame2 = encodeEventStreamMessage("contentBlockDelta", {
+        contentBlockDelta: {
+          delta: { text: "from Bedrock" },
+          contentBlockIndex: 0,
+        },
+        contentBlockIndex: 0,
+      });
+      const frame3 = encodeEventStreamMessage("messageStop", {
+        messageStop: { stopReason: "end_turn" },
+      });
+
+      res.write(frame1);
+      res.write(frame2);
+      res.write(frame3);
+      res.end();
+    });
+    await new Promise<void>((resolve) => rawServer.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { bedrock: rawUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/model/claude-v3/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "bedrock binary test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Hello from Bedrock");
+
+    await new Promise<void>((resolve) => rawServer.close(() => resolve()));
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Streaming edge cases — droppedChunks and content+toolCalls coexistence
+// ---------------------------------------------------------------------------
+
+describe("recorder streaming edge cases", () => {
+  let servers: http.Server[] = [];
+
+  afterEach(async () => {
+    for (const s of servers) {
+      await new Promise<void>((resolve) => s.close(() => resolve()));
+    }
+    servers = [];
+  });
+
+  it("streaming with malformed chunks: fixture still saved with surviving content", async () => {
+    // Create a raw upstream that returns SSE with malformed chunks mixed in
+    const rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "text/event-stream" });
+      res.write(
+        `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "Hello" } }] })}\n\n`,
+      );
+      res.write(`data: {MALFORMED JSON!!!\n\n`);
+      res.write(
+        `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: " World" } }] })}\n\n`,
+      );
+      res.write(`data: [DONE]\n\n`);
+      res.end();
+    });
+    servers.push(rawServer);
+    await new Promise<void>((resolve) => rawServer.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: rawUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "droppedchunks test" }],
+      stream: true,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as { content: string };
+    // Surviving content from non-malformed chunks
+    expect(savedResponse.content).toBe("Hello World");
+  });
+
+  it("streaming with content + toolCalls: fixture saves toolCalls (not content)", async () => {
+    // Create a raw upstream that returns SSE with both text and tool call deltas
+    const rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "text/event-stream" });
+      res.write(
+        `data: ${JSON.stringify({
+          id: "c1",
+          choices: [{ delta: { content: "Calling tool..." } }],
+        })}\n\n`,
+      );
+      res.write(
+        `data: ${JSON.stringify({
+          id: "c1",
+          choices: [
+            {
+              delta: {
+                tool_calls: [
+                  {
+                    index: 0,
+                    id: "call_abc",
+                    type: "function",
+                    function: { name: "get_weather", arguments: '{"city":"SF"}' },
+                  },
+                ],
+              },
+            },
+          ],
+        })}\n\n`,
+      );
+      res.write(`data: [DONE]\n\n`);
+      res.end();
+    });
+    servers.push(rawServer);
+    await new Promise<void>((resolve) => rawServer.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: rawUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "content+tools test" }],
+      stream: true,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as FixtureFile;
+    const savedResponse = fixtureContent.fixtures[0].response as {
+      toolCalls?: Array<{ name: string; arguments: string }>;
+      content?: string;
+    };
+    // When toolCalls exist, they win over content
+    expect(savedResponse.toolCalls).toBeDefined();
+    expect(savedResponse.toolCalls).toHaveLength(1);
+    expect(savedResponse.toolCalls![0].name).toBe("get_weather");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function createMockReqRes(): { req: http.IncomingMessage; res: http.ServerResponse } {
+  // Create minimal mock objects — only needed for type compatibility,
+  // proxyAndRecord returns false before touching them in these test cases
+  const req = Object.create(http.IncomingMessage.prototype) as http.IncomingMessage;
+  req.headers = {};
+  const res = Object.create(http.ServerResponse.prototype) as http.ServerResponse;
+  return { req, res };
+}
+
+async function setupUpstreamAndRecorder(
+  upstreamFixtures: Fixture[],
+  providerKey: string = "openai",
+): Promise<{ upstreamUrl: string; recorderUrl: string; fixturePath: string }> {
+  // Create upstream "real API" server
+  upstream = await createServer(upstreamFixtures, { port: 0 });
+
+  // Create temp directory for recorded fixtures
+  tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+
+  // Create recording llmock (no fixtures — everything proxies)
+  const providers: Record<string, string> = {};
+  providers[providerKey] = upstream.url;
+
+  recorder = await createServer([], {
+    port: 0,
+    record: { providers, fixturePath: tmpDir },
+  });
+
+  return {
+    upstreamUrl: upstream.url,
+    recorderUrl: recorder.url,
+    fixturePath: tmpDir,
+  };
+}
diff --git a/src/__tests__/server.test.ts b/src/__tests__/server.test.ts
index 4993444..3a61f4d 100644
--- a/src/__tests__/server.test.ts
+++ b/src/__tests__/server.test.ts
@@ -789,7 +789,7 @@ describe("journal", () => {
     );
 
     const entry = instance.journal.getLast();
-    expect(entry!.headers["authorization"]).toBe("Bearer sk-test");
+    expect(entry!.headers["authorization"]).toBe("[REDACTED]");
   });
 });
 
@@ -1016,7 +1016,7 @@ describe("header forwarding in journal", () => {
 
     const entry = instance.journal.getLast();
     expect(entry).not.toBeNull();
-    expect(entry!.headers["authorization"]).toBe("Bearer test-key");
+    expect(entry!.headers["authorization"]).toBe("[REDACTED]");
     expect(entry!.headers["x-custom-header"]).toBe("custom-value");
     expect(entry!.headers["content-type"]).toBe("application/json");
   });
@@ -1055,7 +1055,7 @@ describe("header forwarding in journal", () => {
 
     const entries = JSON.parse(res.body);
     expect(entries).toHaveLength(1);
-    expect(entries[0].headers["authorization"]).toBe("Bearer api-key-123");
+    expect(entries[0].headers["authorization"]).toBe("[REDACTED]");
     expect(entries[0].headers["x-request-id"]).toBe("req-abc-def");
     expect(entries[0].headers["content-type"]).toBe("application/json");
     expect(entries[0].headers["host"]).toBeDefined();
@@ -1075,8 +1075,8 @@ describe("header forwarding in journal", () => {
 
     const entries = instance.journal.getAll();
     expect(entries).toHaveLength(2);
-    expect(entries[0].headers["authorization"]).toBe("Bearer key-one");
-    expect(entries[1].headers["authorization"]).toBe("Bearer key-two");
+    expect(entries[0].headers["authorization"]).toBe("[REDACTED]");
+    expect(entries[1].headers["authorization"]).toBe("[REDACTED]");
   });
 });
 
diff --git a/src/__tests__/stream-collapse.test.ts b/src/__tests__/stream-collapse.test.ts
new file mode 100644
index 0000000..eb35fb3
--- /dev/null
+++ b/src/__tests__/stream-collapse.test.ts
@@ -0,0 +1,1429 @@
+import { describe, it, expect } from "vitest";
+import {
+  collapseOpenAISSE,
+  collapseAnthropicSSE,
+  collapseGeminiSSE,
+  collapseOllamaNDJSON,
+  collapseCohereSS,
+  collapseBedrockEventStream,
+  collapseStreamingResponse,
+} from "../stream-collapse.js";
+import { encodeEventStreamMessage, encodeEventStreamFrame } from "../aws-event-stream.js";
+
+// ---------------------------------------------------------------------------
+// 1. OpenAI SSE
+// ---------------------------------------------------------------------------
+
+describe("collapseOpenAISSE", () => {
+  it("collapses text content from SSE chunks", () => {
+    const body = [
+      `data: ${JSON.stringify({ id: "chatcmpl-123", choices: [{ delta: { role: "assistant" } }] })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-123", choices: [{ delta: { content: "Hello" } }] })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-123", choices: [{ delta: { content: " world" } }] })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-123", choices: [{ delta: { content: "!" } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("Hello world!");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("collapses tool calls with merged arguments", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        id: "chatcmpl-456",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_abc",
+                  type: "function",
+                  function: { name: "get_weather", arguments: '{"ci' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      `data: ${JSON.stringify({
+        id: "chatcmpl-456",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  function: { arguments: 'ty":"Pa' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      `data: ${JSON.stringify({
+        id: "chatcmpl-456",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  function: { arguments: 'ris"}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"Paris"}');
+    expect(result.toolCalls![0].id).toBe("call_abc");
+    expect(result.content).toBeUndefined();
+  });
+
+  it("handles multiple tool calls", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        id: "chatcmpl-789",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_1",
+                  type: "function",
+                  function: { name: "func_a", arguments: '{"x":1}' },
+                },
+                {
+                  index: 1,
+                  id: "call_2",
+                  type: "function",
+                  function: { name: "func_b", arguments: '{"y":2}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.toolCalls).toHaveLength(2);
+    expect(result.toolCalls![0].name).toBe("func_a");
+    expect(result.toolCalls![1].name).toBe("func_b");
+  });
+
+  it("returns empty content for empty stream", () => {
+    const body = "data: [DONE]\n\n";
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("");
+  });
+
+  it("counts droppedChunks for malformed JSON mixed with valid chunks", () => {
+    const body = [
+      `data: ${JSON.stringify({ id: "chatcmpl-d1", choices: [{ delta: { content: "A" } }] })}`,
+      "",
+      `data: {INVALID JSON!!!`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-d1", choices: [{ delta: { content: "B" } }] })}`,
+      "",
+      `data: also broken`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-d1", choices: [{ delta: { content: "C" } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("ABC");
+    expect(result.droppedChunks).toBe(2);
+  });
+
+  it("choices with no delta property are skipped (continue)", () => {
+    const body = [
+      `data: ${JSON.stringify({ id: "chatcmpl-nd", choices: [{ finish_reason: "stop" }] })}`,
+      "",
+      `data: ${JSON.stringify({ id: "chatcmpl-nd", choices: [{ delta: { content: "OK" } }] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("OK");
+  });
+
+  it("captures both text deltas and tool call deltas in same stream", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        id: "chatcmpl-mix",
+        choices: [{ delta: { content: "Calling tool..." } }],
+      })}`,
+      "",
+      `data: ${JSON.stringify({
+        id: "chatcmpl-mix",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_mix",
+                  type: "function",
+                  function: { name: "lookup", arguments: '{"q":"test"}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    // When tool calls exist, they win over content
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("lookup");
+    expect(result.toolCalls![0].arguments).toBe('{"q":"test"}');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 2. Anthropic SSE
+// ---------------------------------------------------------------------------
+
+describe("collapseAnthropicSSE", () => {
+  it("collapses text content from SSE chunks", () => {
+    const body = [
+      `event: message_start`,
+      `data: ${JSON.stringify({ type: "message_start", message: { id: "msg_123", role: "assistant" } })}`,
+      "",
+      `event: content_block_start`,
+      `data: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "text", text: "" } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hello" } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: " world" } })}`,
+      "",
+      `event: content_block_stop`,
+      `data: ${JSON.stringify({ type: "content_block_stop", index: 0 })}`,
+      "",
+      `event: message_stop`,
+      `data: ${JSON.stringify({ type: "message_stop" })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.content).toBe("Hello world");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("collapses tool use with input_json_delta", () => {
+    const body = [
+      `event: message_start`,
+      `data: ${JSON.stringify({ type: "message_start", message: { id: "msg_456" } })}`,
+      "",
+      `event: content_block_start`,
+      `data: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "tool_use", id: "toolu_abc", name: "get_weather", input: {} } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"ci' } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: 'ty":"Paris"}' } })}`,
+      "",
+      `event: content_block_stop`,
+      `data: ${JSON.stringify({ type: "content_block_stop", index: 0 })}`,
+      "",
+      `event: message_stop`,
+      `data: ${JSON.stringify({ type: "message_stop" })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"Paris"}');
+    expect(result.toolCalls![0].id).toBe("toolu_abc");
+    expect(result.content).toBeUndefined();
+  });
+  it("counts droppedChunks for malformed JSON mixed with valid chunks", () => {
+    const body = [
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "Hi" } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: {BROKEN JSON`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: " there" } })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.content).toBe("Hi there");
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 3. Gemini SSE
+// ---------------------------------------------------------------------------
+
+describe("collapseGeminiSSE", () => {
+  it("collapses text content from data-only SSE", () => {
+    const body = [
+      `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "Hello" }] } }] })}`,
+      "",
+      `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: " world" }] } }] })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.content).toBe("Hello world");
+  });
+
+  it("handles empty candidates gracefully", () => {
+    const body = `data: ${JSON.stringify({ candidates: [] })}\n\n`;
+    const result = collapseGeminiSSE(body);
+    expect(result.content).toBe("");
+  });
+
+  it("collapses functionCall parts into toolCalls", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        candidates: [
+          {
+            content: {
+              role: "model",
+              parts: [
+                {
+                  functionCall: {
+                    name: "get_weather",
+                    args: { city: "Paris" },
+                  },
+                },
+              ],
+            },
+            finishReason: "FUNCTION_CALL",
+          },
+        ],
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(JSON.parse(result.toolCalls![0].arguments)).toEqual({ city: "Paris" });
+    expect(result.content).toBeUndefined();
+  });
+  it("counts droppedChunks for malformed JSON mixed with valid chunks", () => {
+    const body = [
+      `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "X" }] } }] })}`,
+      "",
+      `data: NOT VALID JSON AT ALL`,
+      "",
+      `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "Y" }] } }] })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.content).toBe("XY");
+    expect(result.droppedChunks).toBe(1);
+  });
+
+  it("includes droppedChunks in functionCall return path (bug fix)", () => {
+    const body = [
+      `data: NOT VALID JSON`,
+      "",
+      `data: ${JSON.stringify({
+        candidates: [
+          {
+            content: {
+              role: "model",
+              parts: [
+                {
+                  functionCall: {
+                    name: "get_weather",
+                    args: { city: "Paris" },
+                  },
+                },
+              ],
+            },
+            finishReason: "FUNCTION_CALL",
+          },
+        ],
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.droppedChunks).toBe(1);
+  });
+
+  it("candidate with no content property is skipped (continue)", () => {
+    const body = [
+      `data: ${JSON.stringify({ candidates: [{ finishReason: "SAFETY" }] })}`,
+      "",
+      `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "OK" }] } }] })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.content).toBe("OK");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 4. Ollama NDJSON
+// ---------------------------------------------------------------------------
+
+describe("collapseOllamaNDJSON", () => {
+  it("collapses /api/chat format (message.content)", () => {
+    const body = [
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "Hello" },
+        done: false,
+      }),
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: " world" },
+        done: false,
+      }),
+      JSON.stringify({ model: "llama3", message: { role: "assistant", content: "" }, done: true }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    expect(result.content).toBe("Hello world");
+  });
+
+  it("collapses /api/generate format (response field)", () => {
+    const body = [
+      JSON.stringify({ model: "llama3", response: "Hello", done: false }),
+      JSON.stringify({ model: "llama3", response: " world", done: false }),
+      JSON.stringify({ model: "llama3", response: "", done: true }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    expect(result.content).toBe("Hello world");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 5. Cohere SSE
+// ---------------------------------------------------------------------------
+
+describe("collapseCohereSS", () => {
+  it("collapses text content from content-delta events", () => {
+    const body = [
+      `event: message-start`,
+      `data: ${JSON.stringify({ type: "message-start", delta: { message: { role: "assistant" } } })}`,
+      "",
+      `event: content-delta`,
+      `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: "Hello" } } } })}`,
+      "",
+      `event: content-delta`,
+      `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: " world" } } } })}`,
+      "",
+      `event: message-end`,
+      `data: ${JSON.stringify({ type: "message-end", delta: { finish_reason: "COMPLETE" } })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSS(body);
+    expect(result.content).toBe("Hello world");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("collapses tool calls from tool-call events", () => {
+    const body = [
+      `event: message-start`,
+      `data: ${JSON.stringify({ type: "message-start", delta: { message: { role: "assistant" } } })}`,
+      "",
+      `event: tool-call-start`,
+      `data: ${JSON.stringify({
+        type: "tool-call-start",
+        index: 0,
+        delta: {
+          message: {
+            tool_calls: {
+              id: "call_xyz",
+              type: "function",
+              function: { name: "get_weather", arguments: "" },
+            },
+          },
+        },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 0,
+        delta: { message: { tool_calls: { function: { arguments: '{"city"' } } } },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 0,
+        delta: { message: { tool_calls: { function: { arguments: ':"Paris"}' } } } },
+      })}`,
+      "",
+      `event: message-end`,
+      `data: ${JSON.stringify({ type: "message-end", delta: { finish_reason: "TOOL_CALL" } })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSS(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"Paris"}');
+    expect(result.toolCalls![0].id).toBe("call_xyz");
+    expect(result.content).toBeUndefined();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// 6. Bedrock EventStream (binary)
+// ---------------------------------------------------------------------------
+
+describe("collapseBedrockEventStream", () => {
+  it("collapses text content from binary event frames", () => {
+    const frame1 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: "Hello" },
+      },
+    });
+    const frame2 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: " world" },
+      },
+    });
+
+    const buf = Buffer.concat([frame1, frame2]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.content).toBe("Hello world");
+  });
+
+  it("handles empty buffer", () => {
+    const result = collapseBedrockEventStream(Buffer.alloc(0));
+    expect(result.content).toBe("");
+  });
+
+  it("collapses tool call from contentBlockStart + contentBlockDelta with toolUse", () => {
+    const startFrame = encodeEventStreamMessage("contentBlockStart", {
+      contentBlockIndex: 0,
+      contentBlockStart: {
+        contentBlockIndex: 0,
+        start: {
+          toolUse: {
+            toolUseId: "tool_123",
+            name: "get_weather",
+          },
+        },
+      },
+    });
+    const deltaFrame1 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 0,
+      contentBlockDelta: {
+        contentBlockIndex: 0,
+        delta: {
+          toolUse: { input: '{"ci' },
+        },
+      },
+    });
+    const deltaFrame2 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 0,
+      contentBlockDelta: {
+        contentBlockIndex: 0,
+        delta: {
+          toolUse: { input: 'ty":"Paris"}' },
+        },
+      },
+    });
+
+    const buf = Buffer.concat([startFrame, deltaFrame1, deltaFrame2]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"Paris"}');
+    expect(result.toolCalls![0].id).toBe("tool_123");
+  });
+
+  it("stops parsing gracefully on corrupted prelude CRC", () => {
+    const goodFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: "Good" },
+      },
+    });
+    const badFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: "Bad" },
+      },
+    });
+    // Corrupt the prelude CRC (bytes 8-11) of the bad frame
+    const badFrameBuf = Buffer.from(badFrame);
+    badFrameBuf.writeUInt32BE(0xdeadbeef, 8);
+
+    const buf = Buffer.concat([goodFrame, badFrameBuf]);
+    const result = collapseBedrockEventStream(buf);
+    // Should parse the good frame but stop at the corrupted one
+    expect(result.content).toBe("Good");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// collapseStreamingResponse dispatch
+// ---------------------------------------------------------------------------
+
+describe("collapseStreamingResponse", () => {
+  it("returns null for application/json (not streaming)", () => {
+    const result = collapseStreamingResponse("application/json", "openai", '{"choices":[]}');
+    expect(result).toBeNull();
+  });
+
+  it("dispatches text/event-stream to OpenAI for openai provider", () => {
+    const body = `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "hi" } }] })}\n\ndata: [DONE]\n\n`;
+    const result = collapseStreamingResponse("text/event-stream", "openai", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("hi");
+  });
+
+  it("dispatches text/event-stream to Anthropic for anthropic provider", () => {
+    const body = [
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "hi" } })}`,
+      "",
+    ].join("\n");
+    const result = collapseStreamingResponse("text/event-stream", "anthropic", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("hi");
+  });
+
+  it("dispatches text/event-stream to Gemini for gemini provider", () => {
+    const body = `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "hi" }] } }] })}\n\n`;
+    const result = collapseStreamingResponse("text/event-stream", "gemini", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("hi");
+  });
+
+  it("dispatches application/x-ndjson to Ollama", () => {
+    const body = JSON.stringify({
+      model: "m",
+      message: { role: "assistant", content: "hi" },
+      done: true,
+    });
+    const result = collapseStreamingResponse("application/x-ndjson", "ollama", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("hi");
+  });
+
+  it("dispatches text/event-stream to Cohere for cohere provider", () => {
+    const body = [
+      `event: content-delta`,
+      `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: "hi" } } } })}`,
+      "",
+    ].join("\n");
+    const result = collapseStreamingResponse("text/event-stream", "cohere", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("hi");
+  });
+
+  it("dispatches application/vnd.amazon.eventstream to Bedrock", () => {
+    const frame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "hi" } },
+    });
+    const result = collapseStreamingResponse(
+      "application/vnd.amazon.eventstream",
+      "bedrock",
+      frame,
+    );
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("hi");
+  });
+
+  it('dispatches text/event-stream with "azure" to OpenAI collapse', () => {
+    const body = `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "azure-hi" } }] })}\n\ndata: [DONE]\n\n`;
+    const result = collapseStreamingResponse("text/event-stream", "azure", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("azure-hi");
+  });
+
+  it('dispatches text/event-stream with "vertexai" to Gemini collapse', () => {
+    const body = `data: ${JSON.stringify({ candidates: [{ content: { parts: [{ text: "vertex-hi" }] } }] })}\n\n`;
+    const result = collapseStreamingResponse("text/event-stream", "vertexai", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("vertex-hi");
+  });
+
+  it('dispatches text/event-stream with "unknown-provider" to OpenAI collapse (fallback)', () => {
+    const body = `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "fallback-hi" } }] })}\n\ndata: [DONE]\n\n`;
+    const result = collapseStreamingResponse("text/event-stream", "unknown-provider", body);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("fallback-hi");
+  });
+
+  it("Bedrock: string body through collapseStreamingResponse (not Buffer)", () => {
+    // Build a valid frame and convert to binary string
+    const frame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "str-body" } },
+    });
+    const binaryStr = frame.toString("binary");
+    const result = collapseStreamingResponse(
+      "application/vnd.amazon.eventstream",
+      "bedrock",
+      binaryStr,
+    );
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("str-body");
+  });
+
+  it("collapseStreamingResponse with Buffer input for non-Bedrock SSE provider", () => {
+    const sseStr = `data: ${JSON.stringify({ id: "c1", choices: [{ delta: { content: "buf-hi" } }] })}\n\ndata: [DONE]\n\n`;
+    const buf = Buffer.from(sseStr, "utf8");
+    const result = collapseStreamingResponse("text/event-stream", "openai", buf);
+    expect(result).not.toBeNull();
+    expect(result!.content).toBe("buf-hi");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// droppedChunks: Ollama, Cohere, Bedrock
+// ---------------------------------------------------------------------------
+
+describe("collapseOllamaNDJSON droppedChunks", () => {
+  it("counts droppedChunks for malformed JSON lines mixed with valid ones", () => {
+    const body = [
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "A" },
+        done: false,
+      }),
+      "NOT VALID JSON",
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "B" },
+        done: false,
+      }),
+      "{also broken",
+      JSON.stringify({ model: "llama3", message: { role: "assistant", content: "" }, done: true }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    expect(result.content).toBe("AB");
+    expect(result.droppedChunks).toBe(2);
+  });
+});
+
+describe("collapseCohereSS droppedChunks", () => {
+  it("counts droppedChunks for malformed JSON events mixed with valid ones", () => {
+    const body = [
+      `event: content-delta`,
+      `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: "X" } } } })}`,
+      "",
+      `event: content-delta`,
+      `data: {BROKEN`,
+      "",
+      `event: content-delta`,
+      `data: ${JSON.stringify({ type: "content-delta", index: 0, delta: { message: { content: { type: "text", text: "Y" } } } })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSS(body);
+    expect(result.content).toBe("XY");
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+describe("collapseBedrockEventStream droppedChunks", () => {
+  it("counts droppedChunks for valid frame with malformed JSON payload", () => {
+    const goodFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "Good" } },
+    });
+
+    // Build a frame with non-JSON payload
+    const badPayload = Buffer.from("NOT JSON AT ALL", "utf8");
+    const badFrame = encodeEventStreamFrame(
+      {
+        ":content-type": "application/json",
+        ":event-type": "contentBlockDelta",
+        ":message-type": "event",
+      },
+      badPayload,
+    );
+
+    const goodFrame2 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: " data" } },
+    });
+
+    const buf = Buffer.concat([goodFrame, badFrame, goodFrame2]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.content).toBe("Good data");
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Message CRC validation
+// ---------------------------------------------------------------------------
+
+describe("collapseBedrockEventStream message CRC validation", () => {
+  it("stops parsing on corrupted message CRC", () => {
+    const goodFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "Good" } },
+    });
+    const badFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "Bad" } },
+    });
+    // Corrupt the message CRC (last 4 bytes) of the bad frame
+    const badFrameBuf = Buffer.from(badFrame);
+    badFrameBuf.writeUInt32BE(0xdeadbeef, badFrameBuf.length - 4);
+
+    const buf = Buffer.concat([goodFrame, badFrameBuf]);
+    const result = collapseBedrockEventStream(buf);
+    // Should parse the good frame but stop at the corrupted one
+    expect(result.content).toBe("Good");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Multiple tool calls: Anthropic, Cohere, Bedrock
+// ---------------------------------------------------------------------------
+
+describe("collapseAnthropicSSE multiple tool calls", () => {
+  it("collapses 2 tool_use blocks at different content_block indices", () => {
+    const body = [
+      `event: message_start`,
+      `data: ${JSON.stringify({ type: "message_start", message: { id: "msg_multi" } })}`,
+      "",
+      `event: content_block_start`,
+      `data: ${JSON.stringify({ type: "content_block_start", index: 0, content_block: { type: "tool_use", id: "toolu_1", name: "get_weather", input: {} } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 0, delta: { type: "input_json_delta", partial_json: '{"city":"NYC"}' } })}`,
+      "",
+      `event: content_block_stop`,
+      `data: ${JSON.stringify({ type: "content_block_stop", index: 0 })}`,
+      "",
+      `event: content_block_start`,
+      `data: ${JSON.stringify({ type: "content_block_start", index: 1, content_block: { type: "tool_use", id: "toolu_2", name: "get_time", input: {} } })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({ type: "content_block_delta", index: 1, delta: { type: "input_json_delta", partial_json: '{"tz":"EST"}' } })}`,
+      "",
+      `event: content_block_stop`,
+      `data: ${JSON.stringify({ type: "content_block_stop", index: 1 })}`,
+      "",
+      `event: message_stop`,
+      `data: ${JSON.stringify({ type: "message_stop" })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(2);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"NYC"}');
+    expect(result.toolCalls![0].id).toBe("toolu_1");
+    expect(result.toolCalls![1].name).toBe("get_time");
+    expect(result.toolCalls![1].arguments).toBe('{"tz":"EST"}');
+    expect(result.toolCalls![1].id).toBe("toolu_2");
+  });
+});
+
+describe("collapseCohereSS multiple tool calls", () => {
+  it("collapses 2 tool-call-start events at different indices", () => {
+    const body = [
+      `event: message-start`,
+      `data: ${JSON.stringify({ type: "message-start", delta: { message: { role: "assistant" } } })}`,
+      "",
+      `event: tool-call-start`,
+      `data: ${JSON.stringify({
+        type: "tool-call-start",
+        index: 0,
+        delta: {
+          message: {
+            tool_calls: {
+              id: "call_1",
+              type: "function",
+              function: { name: "get_weather", arguments: "" },
+            },
+          },
+        },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 0,
+        delta: { message: { tool_calls: { function: { arguments: '{"city":"NYC"}' } } } },
+      })}`,
+      "",
+      `event: tool-call-start`,
+      `data: ${JSON.stringify({
+        type: "tool-call-start",
+        index: 1,
+        delta: {
+          message: {
+            tool_calls: {
+              id: "call_2",
+              type: "function",
+              function: { name: "get_time", arguments: "" },
+            },
+          },
+        },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 1,
+        delta: { message: { tool_calls: { function: { arguments: '{"tz":"EST"}' } } } },
+      })}`,
+      "",
+      `event: message-end`,
+      `data: ${JSON.stringify({ type: "message-end", delta: { finish_reason: "TOOL_CALL" } })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSS(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(2);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"NYC"}');
+    expect(result.toolCalls![0].id).toBe("call_1");
+    expect(result.toolCalls![1].name).toBe("get_time");
+    expect(result.toolCalls![1].arguments).toBe('{"tz":"EST"}');
+    expect(result.toolCalls![1].id).toBe("call_2");
+  });
+});
+
+describe("collapseBedrockEventStream multiple tool calls", () => {
+  it("collapses 2 contentBlockStart+contentBlockDelta pairs at different indices", () => {
+    const startFrame0 = encodeEventStreamMessage("contentBlockStart", {
+      contentBlockIndex: 0,
+      contentBlockStart: {
+        contentBlockIndex: 0,
+        start: { toolUse: { toolUseId: "tool_1", name: "get_weather" } },
+      },
+    });
+    const deltaFrame0 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 0,
+      contentBlockDelta: {
+        contentBlockIndex: 0,
+        delta: { toolUse: { input: '{"city":"NYC"}' } },
+      },
+    });
+    const startFrame1 = encodeEventStreamMessage("contentBlockStart", {
+      contentBlockIndex: 1,
+      contentBlockStart: {
+        contentBlockIndex: 1,
+        start: { toolUse: { toolUseId: "tool_2", name: "get_time" } },
+      },
+    });
+    const deltaFrame1 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 1,
+      contentBlockDelta: {
+        contentBlockIndex: 1,
+        delta: { toolUse: { input: '{"tz":"EST"}' } },
+      },
+    });
+
+    const buf = Buffer.concat([startFrame0, deltaFrame0, startFrame1, deltaFrame1]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(2);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"NYC"}');
+    expect(result.toolCalls![0].id).toBe("tool_1");
+    expect(result.toolCalls![1].name).toBe("get_time");
+    expect(result.toolCalls![1].arguments).toBe('{"tz":"EST"}');
+    expect(result.toolCalls![1].id).toBe("tool_2");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Empty input: Ollama, Anthropic, Cohere
+// ---------------------------------------------------------------------------
+
+// ---------------------------------------------------------------------------
+// Defensive branch coverage — OpenAI
+// ---------------------------------------------------------------------------
+
+describe("collapseOpenAISSE defensive branches", () => {
+  it("SSE block with no data: line is skipped", () => {
+    const body = ["event: something", "", "data: [DONE]", ""].join("\n");
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("");
+  });
+
+  it("empty choices array is skipped", () => {
+    const body = [
+      `data: ${JSON.stringify({ id: "c1", choices: [] })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+    const result = collapseOpenAISSE(body);
+    expect(result.content).toBe("");
+  });
+
+  it("tool call delta with no id — result toolCall has no id field", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        id: "c1",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  type: "function",
+                  function: { name: "fn", arguments: '{"x":1}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("fn");
+    expect(result.toolCalls![0]).not.toHaveProperty("id");
+  });
+
+  it("droppedChunks returned alongside toolCalls", () => {
+    const body = [
+      `data: {BROKEN JSON`,
+      "",
+      `data: ${JSON.stringify({
+        id: "c1",
+        choices: [
+          {
+            delta: {
+              tool_calls: [
+                {
+                  index: 0,
+                  id: "call_1",
+                  type: "function",
+                  function: { name: "fn", arguments: '{"x":1}' },
+                },
+              ],
+            },
+          },
+        ],
+      })}`,
+      "",
+      "data: [DONE]",
+      "",
+    ].join("\n");
+
+    const result = collapseOpenAISSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Defensive branch coverage — Anthropic
+// ---------------------------------------------------------------------------
+
+describe("collapseAnthropicSSE defensive branches", () => {
+  it("SSE block with no data: line is skipped", () => {
+    const body = ["event: content_block_delta", ""].join("\n");
+    const result = collapseAnthropicSSE(body);
+    expect(result.content).toBe("");
+  });
+
+  it("tool_use content_block_start with no id — result has no id field", () => {
+    const body = [
+      `event: content_block_start`,
+      `data: ${JSON.stringify({
+        type: "content_block_start",
+        index: 0,
+        content_block: { type: "tool_use", name: "fn", input: {} },
+      })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({
+        type: "content_block_delta",
+        index: 0,
+        delta: { type: "input_json_delta", partial_json: '{"x":1}' },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("fn");
+    expect(result.toolCalls![0]).not.toHaveProperty("id");
+  });
+
+  it("orphaned input_json_delta for unknown index — no crash, data ignored", () => {
+    const body = [
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({
+        type: "content_block_delta",
+        index: 5,
+        delta: { type: "input_json_delta", partial_json: '{"orphan":true}' },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    // No tool calls created, no crash
+    expect(result.content).toBe("");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("droppedChunks returned alongside toolCalls", () => {
+    const body = [
+      `event: content_block_start`,
+      `data: {BROKEN`,
+      "",
+      `event: content_block_start`,
+      `data: ${JSON.stringify({
+        type: "content_block_start",
+        index: 0,
+        content_block: { type: "tool_use", id: "toolu_1", name: "fn", input: {} },
+      })}`,
+      "",
+      `event: content_block_delta`,
+      `data: ${JSON.stringify({
+        type: "content_block_delta",
+        index: 0,
+        delta: { type: "input_json_delta", partial_json: '{"x":1}' },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseAnthropicSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Defensive branch coverage — Gemini
+// ---------------------------------------------------------------------------
+
+describe("collapseGeminiSSE defensive branches", () => {
+  it("empty parts array is skipped", () => {
+    const body = [`data: ${JSON.stringify({ candidates: [{ content: { parts: [] } }] })}`, ""].join(
+      "\n",
+    );
+
+    const result = collapseGeminiSSE(body);
+    expect(result.content).toBe("");
+  });
+
+  it("functionCall args as string — preserved as string", () => {
+    const body = [
+      `data: ${JSON.stringify({
+        candidates: [
+          {
+            content: {
+              role: "model",
+              parts: [{ functionCall: { name: "fn", args: "already-a-string" } }],
+            },
+            finishReason: "FUNCTION_CALL",
+          },
+        ],
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseGeminiSSE(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].arguments).toBe("already-a-string");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Defensive branch coverage — Cohere
+// ---------------------------------------------------------------------------
+
+describe("collapseCohereSS defensive branches", () => {
+  it("SSE block with no data: line is skipped", () => {
+    const body = ["event: content-delta", ""].join("\n");
+    const result = collapseCohereSS(body);
+    expect(result.content).toBe("");
+  });
+
+  it("tool-call-start with no id — result has no id field", () => {
+    const body = [
+      `event: tool-call-start`,
+      `data: ${JSON.stringify({
+        type: "tool-call-start",
+        index: 0,
+        delta: {
+          message: {
+            tool_calls: {
+              type: "function",
+              function: { name: "fn", arguments: "" },
+            },
+          },
+        },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 0,
+        delta: { message: { tool_calls: { function: { arguments: '{"x":1}' } } } },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSS(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("fn");
+    expect(result.toolCalls![0]).not.toHaveProperty("id");
+  });
+
+  it("orphaned tool-call-delta for unknown index — no crash", () => {
+    const body = [
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 5,
+        delta: { message: { tool_calls: { function: { arguments: '{"orphan":true}' } } } },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSS(body);
+    expect(result.content).toBe("");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("droppedChunks returned alongside toolCalls", () => {
+    const body = [
+      `event: tool-call-start`,
+      `data: {BROKEN`,
+      "",
+      `event: tool-call-start`,
+      `data: ${JSON.stringify({
+        type: "tool-call-start",
+        index: 0,
+        delta: {
+          message: {
+            tool_calls: {
+              id: "call_1",
+              type: "function",
+              function: { name: "fn", arguments: "" },
+            },
+          },
+        },
+      })}`,
+      "",
+      `event: tool-call-delta`,
+      `data: ${JSON.stringify({
+        type: "tool-call-delta",
+        index: 0,
+        delta: { message: { tool_calls: { function: { arguments: '{"x":1}' } } } },
+      })}`,
+      "",
+    ].join("\n");
+
+    const result = collapseCohereSS(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Defensive branch coverage — Bedrock
+// ---------------------------------------------------------------------------
+
+describe("collapseBedrockEventStream defensive branches", () => {
+  it("contentBlockStart without toolUse — no tool entry created", () => {
+    const startFrame = encodeEventStreamMessage("contentBlockStart", {
+      contentBlockIndex: 0,
+      contentBlockStart: {
+        contentBlockIndex: 0,
+        start: {},
+      },
+    });
+    const deltaFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: { delta: { text: "Hello" } },
+    });
+
+    const buf = Buffer.concat([startFrame, deltaFrame]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.content).toBe("Hello");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("contentBlockDelta without delta — skipped", () => {
+    const frame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 0,
+      contentBlockDelta: {
+        contentBlockIndex: 0,
+      },
+    });
+
+    const buf = Buffer.from(frame);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.content).toBe("");
+  });
+
+  it("tool call with no toolUseId — result has no id field", () => {
+    const startFrame = encodeEventStreamMessage("contentBlockStart", {
+      contentBlockIndex: 0,
+      contentBlockStart: {
+        contentBlockIndex: 0,
+        start: {
+          toolUse: { name: "fn" },
+        },
+      },
+    });
+    const deltaFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 0,
+      contentBlockDelta: {
+        contentBlockIndex: 0,
+        delta: { toolUse: { input: '{"x":1}' } },
+      },
+    });
+
+    const buf = Buffer.concat([startFrame, deltaFrame]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("fn");
+    expect(result.toolCalls![0]).not.toHaveProperty("id");
+  });
+
+  it("orphaned toolUse delta for unknown index — no crash", () => {
+    const deltaFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 5,
+      contentBlockDelta: {
+        contentBlockIndex: 5,
+        delta: { toolUse: { input: '{"orphan":true}' } },
+      },
+    });
+
+    const buf = Buffer.from(deltaFrame);
+    const result = collapseBedrockEventStream(buf);
+    // No tool entry for index 5, so delta is silently ignored
+    expect(result.content).toBe("");
+    expect(result.toolCalls).toBeUndefined();
+  });
+
+  it("droppedChunks returned alongside toolCalls", () => {
+    const startFrame = encodeEventStreamMessage("contentBlockStart", {
+      contentBlockIndex: 0,
+      contentBlockStart: {
+        contentBlockIndex: 0,
+        start: { toolUse: { toolUseId: "tool_1", name: "fn" } },
+      },
+    });
+    const deltaFrame = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockIndex: 0,
+      contentBlockDelta: {
+        contentBlockIndex: 0,
+        delta: { toolUse: { input: '{"x":1}' } },
+      },
+    });
+
+    // Build a frame with non-JSON payload for droppedChunks
+    const badPayload = Buffer.from("NOT JSON", "utf8");
+    const badFrame = encodeEventStreamFrame(
+      {
+        ":content-type": "application/json",
+        ":event-type": "contentBlockDelta",
+        ":message-type": "event",
+      },
+      badPayload,
+    );
+
+    const buf = Buffer.concat([badFrame, startFrame, deltaFrame]);
+    const result = collapseBedrockEventStream(buf);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.droppedChunks).toBe(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Defensive branch coverage — Ollama
+// ---------------------------------------------------------------------------
+
+describe("collapseOllamaNDJSON defensive branches", () => {
+  it("line with neither message.content nor response — no content added", () => {
+    const body = [JSON.stringify({ model: "x", done: true })].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    expect(result.content).toBe("");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Original empty input tests
+// ---------------------------------------------------------------------------
+
+describe("empty input collapse", () => {
+  it('collapseOllamaNDJSON("") returns { content: "" }', () => {
+    const result = collapseOllamaNDJSON("");
+    expect(result.content).toBe("");
+  });
+
+  it('collapseAnthropicSSE("") returns { content: "" }', () => {
+    const result = collapseAnthropicSSE("");
+    expect(result.content).toBe("");
+  });
+
+  it('collapseCohereSS("") returns { content: "" }', () => {
+    const result = collapseCohereSS("");
+    expect(result.content).toBe("");
+  });
+});
diff --git a/src/__tests__/vertex-ai.test.ts b/src/__tests__/vertex-ai.test.ts
new file mode 100644
index 0000000..fc033ac
--- /dev/null
+++ b/src/__tests__/vertex-ai.test.ts
@@ -0,0 +1,524 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+
+// --- helpers ---
+
+function post(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+function parseGeminiSSEChunks(body: string): unknown[] {
+  const chunks: unknown[] = [];
+  for (const line of body.split("\n")) {
+    if (line.startsWith("data: ")) {
+      chunks.push(JSON.parse(line.slice(6)));
+    }
+  }
+  return chunks;
+}
+
+// --- fixtures ---
+
+const textFixture: Fixture = {
+  match: { userMessage: "hello" },
+  response: { content: "Hi there!" },
+};
+
+const toolFixture: Fixture = {
+  match: { userMessage: "weather" },
+  response: {
+    toolCalls: [
+      {
+        name: "get_weather",
+        arguments: '{"city":"NYC"}',
+      },
+    ],
+  },
+};
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+const VERTEX_BASE = "/v1/projects/my-project/locations/us-central1/publishers/google/models";
+
+function vertexUrl(base: string, model: string, action: string): string {
+  return `${base}${VERTEX_BASE}/${model}:${action}`;
+}
+
+const geminiBody = (text: string) => ({
+  contents: [{ role: "user", parts: [{ text }] }],
+});
+
+// ─── Non-streaming (generateContent) ────────────────────────────────────────
+
+describe("Vertex AI: generateContent (non-streaming)", () => {
+  it("routes to Gemini handler and returns correct text response", async () => {
+    instance = await createServer([textFixture]);
+    const res = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"),
+      geminiBody("hello"),
+    );
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("application/json");
+
+    const body = JSON.parse(res.body);
+    expect(body.candidates).toHaveLength(1);
+    expect(body.candidates[0].content.role).toBe("model");
+    expect(body.candidates[0].content.parts[0].text).toBe("Hi there!");
+    expect(body.candidates[0].finishReason).toBe("STOP");
+    expect(body.usageMetadata).toBeDefined();
+  });
+
+  it("extracts model name from URL path and records it in journal", async () => {
+    instance = await createServer([textFixture]);
+    await post(vertexUrl(instance.url, "gemini-1.5-pro", "generateContent"), geminiBody("hello"));
+
+    const entry = instance.journal.getLast();
+    expect(entry).not.toBeNull();
+    expect(entry!.body.model).toBe("gemini-1.5-pro");
+  });
+
+  it("returns tool call response with functionCall parts", async () => {
+    instance = await createServer([toolFixture]);
+    const res = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"),
+      geminiBody("weather"),
+    );
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.candidates[0].content.parts[0].functionCall).toBeDefined();
+    expect(body.candidates[0].content.parts[0].functionCall.name).toBe("get_weather");
+    expect(body.candidates[0].content.parts[0].functionCall.args).toEqual({ city: "NYC" });
+    expect(body.candidates[0].finishReason).toBe("FUNCTION_CALL");
+  });
+});
+
+// ─── Streaming (streamGenerateContent) ──────────────────────────────────────
+
+describe("Vertex AI: streamGenerateContent (streaming)", () => {
+  it("streams text response as SSE", async () => {
+    instance = await createServer([textFixture]);
+    const res = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "streamGenerateContent"),
+      geminiBody("hello"),
+    );
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: { role: string; parts: { text?: string }[] };
+        finishReason?: string;
+      }[];
+      usageMetadata?: unknown;
+    }[];
+
+    expect(chunks.length).toBeGreaterThan(0);
+
+    // Reconstruct content from text parts
+    const fullText = chunks.map((c) => c.candidates[0].content.parts[0].text ?? "").join("");
+    expect(fullText).toBe("Hi there!");
+
+    // Last chunk has finishReason
+    const lastChunk = chunks[chunks.length - 1];
+    expect(lastChunk.candidates[0].finishReason).toBe("STOP");
+    expect(lastChunk.usageMetadata).toBeDefined();
+  });
+
+  it("streams tool calls as SSE", async () => {
+    instance = await createServer([toolFixture]);
+    const res = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "streamGenerateContent"),
+      geminiBody("weather"),
+    );
+
+    expect(res.status).toBe(200);
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: {
+          parts: { functionCall?: { name: string; args: unknown } }[];
+        };
+        finishReason?: string;
+      }[];
+    }[];
+
+    expect(chunks).toHaveLength(1);
+    expect(chunks[0].candidates[0].content.parts[0].functionCall!.name).toBe("get_weather");
+    expect(chunks[0].candidates[0].finishReason).toBe("FUNCTION_CALL");
+  });
+});
+
+// ─── Response format parity with consumer Gemini ────────────────────────────
+
+describe("Vertex AI: response format matches consumer Gemini", () => {
+  it("non-streaming responses are identical", async () => {
+    instance = await createServer([textFixture]);
+
+    const vertexRes = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"),
+      geminiBody("hello"),
+    );
+    const geminiRes = await post(
+      `${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`,
+      geminiBody("hello"),
+    );
+
+    const vertexBody = JSON.parse(vertexRes.body);
+    const geminiBody_ = JSON.parse(geminiRes.body);
+
+    // Structure should be identical (candidates, usageMetadata)
+    expect(vertexBody.candidates[0].content).toEqual(geminiBody_.candidates[0].content);
+    expect(vertexBody.candidates[0].finishReason).toEqual(geminiBody_.candidates[0].finishReason);
+    expect(Object.keys(vertexBody)).toEqual(Object.keys(geminiBody_));
+  });
+
+  it("streaming responses are identical", async () => {
+    instance = await createServer([textFixture]);
+
+    const vertexRes = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "streamGenerateContent"),
+      geminiBody("hello"),
+    );
+    const geminiRes = await post(
+      `${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`,
+      geminiBody("hello"),
+    );
+
+    const vertexChunks = parseGeminiSSEChunks(vertexRes.body);
+    const geminiChunks = parseGeminiSSEChunks(geminiRes.body);
+
+    expect(vertexChunks.length).toBe(geminiChunks.length);
+    // Each chunk should have the same structure
+    for (let i = 0; i < vertexChunks.length; i++) {
+      expect(vertexChunks[i]).toEqual(geminiChunks[i]);
+    }
+  });
+});
+
+// ─── Tool call parity with consumer Gemini ──────────────────────────────────
+
+describe("Vertex AI: tool call parity with consumer Gemini", () => {
+  it("non-streaming tool call responses have same structure", async () => {
+    instance = await createServer([toolFixture]);
+
+    const vertexRes = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"),
+      geminiBody("weather"),
+    );
+    const geminiRes = await post(
+      `${instance.url}/v1beta/models/gemini-2.0-flash:generateContent`,
+      geminiBody("weather"),
+    );
+
+    const vertexBody = JSON.parse(vertexRes.body);
+    const geminiBody_ = JSON.parse(geminiRes.body);
+
+    // Both should have FUNCTION_CALL finish reason
+    expect(vertexBody.candidates[0].finishReason).toBe("FUNCTION_CALL");
+    expect(geminiBody_.candidates[0].finishReason).toBe("FUNCTION_CALL");
+
+    // Same role
+    expect(vertexBody.candidates[0].content.role).toBe(geminiBody_.candidates[0].content.role);
+
+    // Same function name and args (IDs differ since they're randomly generated)
+    const vertexFc = vertexBody.candidates[0].content.parts[0].functionCall;
+    const geminiFc = geminiBody_.candidates[0].content.parts[0].functionCall;
+    expect(vertexFc.name).toBe(geminiFc.name);
+    expect(vertexFc.args).toEqual(geminiFc.args);
+
+    // Same top-level keys
+    expect(Object.keys(vertexBody)).toEqual(Object.keys(geminiBody_));
+  });
+
+  it("streaming tool call responses have same structure", async () => {
+    instance = await createServer([toolFixture]);
+
+    const vertexRes = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "streamGenerateContent"),
+      geminiBody("weather"),
+    );
+    const geminiRes = await post(
+      `${instance.url}/v1beta/models/gemini-2.0-flash:streamGenerateContent`,
+      geminiBody("weather"),
+    );
+
+    const vertexChunks = parseGeminiSSEChunks(vertexRes.body) as Array<Record<string, unknown>>;
+    const geminiChunks = parseGeminiSSEChunks(geminiRes.body) as Array<Record<string, unknown>>;
+
+    expect(vertexChunks.length).toBe(geminiChunks.length);
+
+    // Compare structure: same finishReason, same function name/args
+    for (let i = 0; i < vertexChunks.length; i++) {
+      const vc = vertexChunks[i].candidates as Array<Record<string, unknown>>;
+      const gc = geminiChunks[i].candidates as Array<Record<string, unknown>>;
+      expect(vc[0].finishReason).toBe(gc[0].finishReason);
+      const vContent = vc[0].content as Record<string, unknown>;
+      const gContent = gc[0].content as Record<string, unknown>;
+      expect(vContent.role).toBe(gContent.role);
+      const vParts = vContent.parts as Array<Record<string, unknown>>;
+      const gParts = gContent.parts as Array<Record<string, unknown>>;
+      // Same function name and args
+      const vFc = vParts[0].functionCall as Record<string, unknown>;
+      const gFc = gParts[0].functionCall as Record<string, unknown>;
+      expect(vFc.name).toBe(gFc.name);
+      expect(vFc.args).toEqual(gFc.args);
+    }
+  });
+});
+
+// ─── Query parameter resilience ─────────────────────────────────────────────
+
+describe("Vertex AI: query parameter resilience", () => {
+  it("?alt=sse does not break routing", async () => {
+    instance = await createServer([textFixture]);
+    const urlPath = `${VERTEX_BASE}/gemini-2.0-flash:streamGenerateContent`;
+
+    const res = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+      const data = JSON.stringify(geminiBody("hello"));
+      const parsed = new URL(instance!.url);
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: `${urlPath}?alt=sse`,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(data),
+          },
+        },
+        (res) => {
+          const chunks: Buffer[] = [];
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({
+              status: res.statusCode ?? 0,
+              body: Buffer.concat(chunks).toString(),
+            });
+          });
+        },
+      );
+      req.on("error", reject);
+      req.write(data);
+      req.end();
+    });
+
+    expect(res.status).toBe(200);
+    const chunks = parseGeminiSSEChunks(res.body);
+    const fullText = chunks
+      .map(
+        (c) =>
+          ((c as Record<string, unknown>).candidates as Array<Record<string, unknown>>)?.[0] &&
+          (
+            (
+              (
+                (c as Record<string, unknown>).candidates as Array<Record<string, unknown>>
+              )?.[0] as Record<string, unknown>
+            )?.content as Record<string, unknown>
+          )?.parts,
+      )
+      .filter(Boolean)
+      .map((parts) => ((parts as Array<Record<string, unknown>>)[0]?.text as string) ?? "")
+      .join("");
+    expect(fullText).toBe("Hi there!");
+  });
+});
+
+// ─── Various project/location combinations ──────────────────────────────────
+
+describe("Vertex AI: various project/location combinations", () => {
+  const combos = [
+    { project: "my-project", location: "us-central1" },
+    { project: "prod-123", location: "europe-west4" },
+    { project: "test_project_456", location: "asia-east1" },
+    { project: "my-org-project", location: "us-east1" },
+  ];
+
+  for (const { project, location } of combos) {
+    it(`routes ${project}/${location} correctly`, async () => {
+      instance = await createServer([textFixture]);
+      const path = `/v1/projects/${project}/locations/${location}/publishers/google/models/gemini-2.0-flash:generateContent`;
+      const res = await post(`${instance.url}${path}`, geminiBody("hello"));
+
+      expect(res.status).toBe(200);
+      const body = JSON.parse(res.body);
+      expect(body.candidates[0].content.parts[0].text).toBe("Hi there!");
+
+      // Clean up for next iteration
+      await new Promise<void>((resolve) => {
+        instance!.server.close(() => resolve());
+      });
+      instance = null;
+    });
+  }
+});
+
+// ─── Malformed URL / Wrong method / Strict mode ─────────────────────────────
+
+describe("Vertex AI: malformed URL", () => {
+  it("22a. returns 404 for unknown action in URL", async () => {
+    instance = await createServer([textFixture]);
+    const res = await post(
+      `${instance.url}/v1/projects/p/locations/l/publishers/google/models/m:unknownAction`,
+      geminiBody("hello"),
+    );
+
+    expect(res.status).toBe(404);
+  });
+});
+
+describe("Vertex AI: wrong HTTP method", () => {
+  it("22b. returns 404 for GET to a valid Vertex AI path", async () => {
+    instance = await createServer([textFixture]);
+    const res = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+      const parsed = new URL(vertexUrl(instance!.url, "gemini-2.0-flash", "generateContent"));
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "GET",
+        },
+        (res) => {
+          const chunks: Buffer[] = [];
+          res.on("data", (c: Buffer) => chunks.push(c));
+          res.on("end", () => {
+            resolve({
+              status: res.statusCode ?? 0,
+              body: Buffer.concat(chunks).toString(),
+            });
+          });
+        },
+      );
+      req.on("error", reject);
+      req.end();
+    });
+
+    expect(res.status).toBe(404);
+  });
+});
+
+describe("Vertex AI: malformed JSON body", () => {
+  it("returns 400 for non-JSON body", async () => {
+    instance = await createServer([textFixture]);
+    const parsed = new URL(vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"));
+    const res = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+      const raw = "not json";
+      const req = http.request(
+        {
+          hostname: parsed.hostname,
+          port: parsed.port,
+          path: parsed.pathname,
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            "Content-Length": Buffer.byteLength(raw),
+          },
+        },
+        (r) => {
+          const chunks: Buffer[] = [];
+          r.on("data", (c: Buffer) => chunks.push(c));
+          r.on("end", () => {
+            resolve({
+              status: r.statusCode ?? 0,
+              body: Buffer.concat(chunks).toString(),
+            });
+          });
+        },
+      );
+      req.on("error", reject);
+      req.write(raw);
+      req.end();
+    });
+
+    expect(res.status).toBe(400);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Malformed JSON");
+  });
+});
+
+describe("Vertex AI: strict mode", () => {
+  it("22c. returns 503 in strict mode with no fixtures", async () => {
+    instance = await createServer([], { strict: true });
+    const res = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"),
+      geminiBody("hello"),
+    );
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toContain("no fixture matched");
+  });
+});
+
+// ─── Chaos ──────────────────────────────────────────────────────────────────
+
+describe("Vertex AI: chaos applies", () => {
+  it("drops request when dropRate is 1.0", async () => {
+    instance = await createServer([textFixture], { chaos: { dropRate: 1.0 } });
+    const res = await post(
+      vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"),
+      geminiBody("hello"),
+    );
+
+    expect(res.status).toBe(500);
+    const body = JSON.parse(res.body);
+    expect(body.error.code).toBe("chaos_drop");
+  });
+
+  it("records chaos action in journal", async () => {
+    instance = await createServer([textFixture], { chaos: { dropRate: 1.0 } });
+    await post(vertexUrl(instance.url, "gemini-2.0-flash", "generateContent"), geminiBody("hello"));
+
+    const entries = instance.journal.getAll();
+    expect(entries).toHaveLength(1);
+    expect(entries[0].response.chaosAction).toBe("drop");
+  });
+});

From 1feef7863d40d9d711f0824c1703dd6c8fe71e18 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jordan@copilotkit.ai>
Date: Fri, 20 Mar 2026 15:56:39 -0700
Subject: [PATCH 04/13] docs: update skill and README for v1.6.0 features

---
 README.md                      |  42 ++++++---
 skills/write-fixtures/SKILL.md | 151 +++++++++++++++++++++++++++++----
 2 files changed, 162 insertions(+), 31 deletions(-)

diff --git a/README.md b/README.md
index f310c12..b14985a 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # @copilotkit/llmock [![Unit Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-unit.yml) [![Drift Tests](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml/badge.svg)](https://github.com/CopilotKit/llmock/actions/workflows/test-drift.yml) [![npm version](https://img.shields.io/npm/v/@copilotkit/llmock)](https://www.npmjs.com/package/@copilotkit/llmock)
 
-Deterministic mock LLM server for testing. A real HTTP server on a real port — not an in-process interceptor — so every process in your stack (Playwright, Next.js, agent workers, microservices) can point at it via `OPENAI_BASE_URL` / `ANTHROPIC_BASE_URL` and get reproducible, instant responses. Streams SSE in real OpenAI, Claude, Gemini, Bedrock, and Azure API formats, driven entirely by fixtures. Zero runtime dependencies.
+Deterministic mock LLM server for testing. A real HTTP server on a real port — not an in-process interceptor — so every process in your stack (Playwright, Next.js, agent workers, microservices) can point at it via `OPENAI_BASE_URL` / `ANTHROPIC_BASE_URL` and get reproducible, instant responses. Streams SSE in real OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, and Cohere API formats, driven entirely by fixtures. Zero runtime dependencies.
 
 ## Quick Start
 
@@ -72,17 +72,20 @@ MSW can't intercept any of those calls. llmock can — it's a real server on a r
 
 ## Features
 
-- **[Multi-provider support](https://llmock.copilotkit.dev/compatible-providers.html)** — [OpenAI Chat Completions](https://llmock.copilotkit.dev/chat-completions.html), [OpenAI Responses](https://llmock.copilotkit.dev/responses-api.html), [Anthropic Claude](https://llmock.copilotkit.dev/claude-messages.html), [Google Gemini](https://llmock.copilotkit.dev/gemini.html), [AWS Bedrock](https://llmock.copilotkit.dev/aws-bedrock.html), [Azure OpenAI](https://llmock.copilotkit.dev/azure-openai.html)
+- **[Multi-provider support](https://llmock.copilotkit.dev/compatible-providers.html)** — [OpenAI Chat Completions](https://llmock.copilotkit.dev/chat-completions.html), [OpenAI Responses](https://llmock.copilotkit.dev/responses-api.html), [Anthropic Claude](https://llmock.copilotkit.dev/claude-messages.html), [Google Gemini](https://llmock.copilotkit.dev/gemini.html), [AWS Bedrock](https://llmock.copilotkit.dev/aws-bedrock.html) (streaming + Converse), [Azure OpenAI](https://llmock.copilotkit.dev/azure-openai.html), [Vertex AI](https://llmock.copilotkit.dev/vertex-ai.html), [Ollama](https://llmock.copilotkit.dev/ollama.html), [Cohere](https://llmock.copilotkit.dev/cohere.html)
 - **[Embeddings API](https://llmock.copilotkit.dev/embeddings.html)** — OpenAI-compatible embedding responses with configurable dimensions
 - **[Structured output / JSON mode](https://llmock.copilotkit.dev/structured-output.html)** — `response_format`, `json_schema`, and function calling
 - **[Sequential responses](https://llmock.copilotkit.dev/sequential-responses.html)** — Stateful multi-turn fixtures that return different responses on each call
 - **[Streaming physics](https://llmock.copilotkit.dev/streaming-physics.html)** — Configurable `ttft`, `tps`, and `jitter` for realistic timing
 - **[WebSocket APIs](https://llmock.copilotkit.dev/websocket.html)** — OpenAI Responses WS, Realtime API, and Gemini Live
 - **[Error injection](https://llmock.copilotkit.dev/error-injection.html)** — One-shot errors, rate limiting, and provider-specific error formats
+- **[Chaos testing](https://llmock.copilotkit.dev/chaos.html)** — Probabilistic failure injection: 500 errors, malformed JSON, mid-stream disconnects
+- **[Prometheus metrics](https://llmock.copilotkit.dev/metrics.html)** — Request counts, latencies, and fixture match rates at `/metrics`
 - **[Request journal](https://llmock.copilotkit.dev/docs.html)** — Record, inspect, and assert on every request
 - **[Fixture validation](https://llmock.copilotkit.dev/fixtures.html)** — Schema validation at load time with `--validate-on-load`
 - **CLI with hot-reload** — Standalone server with `--watch` for live fixture editing
 - **[Docker + Helm](https://llmock.copilotkit.dev/docker.html)** — Container image and Helm chart for CI/CD pipelines
+- **Record-and-replay** — VCR-style proxy-on-miss records real API responses as fixtures for deterministic replay
 - **[Drift detection](https://llmock.copilotkit.dev/drift-detection.html)** — Daily CI runs against real APIs to catch response format changes
 - **Claude Code integration** — `/write-fixtures` skill teaches your AI assistant how to write fixtures correctly
 
@@ -92,17 +95,24 @@ MSW can't intercept any of those calls. llmock can — it's a real server on a r
 llmock [options]
 ```
 
-| Option               | Short | Default      | Description                               |
-| -------------------- | ----- | ------------ | ----------------------------------------- |
-| `--port`             | `-p`  | `4010`       | Port to listen on                         |
-| `--host`             | `-h`  | `127.0.0.1`  | Host to bind to                           |
-| `--fixtures`         | `-f`  | `./fixtures` | Path to fixtures directory or file        |
-| `--latency`          | `-l`  | `0`          | Latency between SSE chunks (ms)           |
-| `--chunk-size`       | `-c`  | `20`         | Characters per SSE chunk                  |
-| `--watch`            | `-w`  |              | Watch fixture path for changes and reload |
-| `--log-level`        |       | `info`       | Log verbosity: `silent`, `info`, `debug`  |
-| `--validate-on-load` |       |              | Validate fixture schemas at startup       |
-| `--help`             |       |              | Show help                                 |
+| Option               | Short | Default      | Description                                 |
+| -------------------- | ----- | ------------ | ------------------------------------------- |
+| `--port`             | `-p`  | `4010`       | Port to listen on                           |
+| `--host`             | `-h`  | `127.0.0.1`  | Host to bind to                             |
+| `--fixtures`         | `-f`  | `./fixtures` | Path to fixtures directory or file          |
+| `--latency`          | `-l`  | `0`          | Latency between SSE chunks (ms)             |
+| `--chunk-size`       | `-c`  | `20`         | Characters per SSE chunk                    |
+| `--watch`            | `-w`  |              | Watch fixture path for changes and reload   |
+| `--log-level`        |       | `info`       | Log verbosity: `silent`, `info`, `debug`    |
+| `--validate-on-load` |       |              | Validate fixture schemas at startup         |
+| `--chaos-drop`       |       | `0`          | Chaos: probability of 500 errors (0-1)      |
+| `--chaos-malformed`  |       | `0`          | Chaos: probability of malformed JSON (0-1)  |
+| `--chaos-disconnect` |       | `0`          | Chaos: probability of disconnect (0-1)      |
+| `--metrics`          |       |              | Enable Prometheus metrics at /metrics       |
+| `--record`           |       |              | Record mode: proxy unmatched to real APIs   |
+| `--strict`           |       |              | Strict mode: fail on unmatched requests     |
+| `--provider-*`       |       |              | Upstream URL per provider (with `--record`) |
+| `--help`             |       |              | Show help                                   |
 
 ```bash
 # Start with bundled example fixtures
@@ -113,6 +123,12 @@ llmock -p 8080 -f ./my-fixtures
 
 # Simulate slow responses
 llmock --latency 100 --chunk-size 5
+
+# Record mode: proxy unmatched requests to real APIs and save as fixtures
+llmock --record --provider-openai https://api.openai.com --provider-anthropic https://api.anthropic.com
+
+# Strict mode in CI: fail if any request doesn't match a fixture
+llmock --strict -f ./fixtures
 ```
 
 ## Documentation
diff --git a/skills/write-fixtures/SKILL.md b/skills/write-fixtures/SKILL.md
index cfaeb24..6c2e102 100644
--- a/skills/write-fixtures/SKILL.md
+++ b/skills/write-fixtures/SKILL.md
@@ -7,7 +7,7 @@ description: Use when writing test fixtures for @copilotkit/llmock — mock LLM
 
 ## What llmock Is
 
-Zero-dependency mock LLM server. Fixture-driven. Multi-provider (OpenAI, Anthropic, Gemini, AWS Bedrock, Azure OpenAI). Runs a real HTTP server on a real port — works across processes, unlike MSW-style interceptors. WebSocket support for OpenAI Responses/Realtime and Gemini Live APIs.
+Zero-dependency mock LLM server. Fixture-driven. Multi-provider (OpenAI, Anthropic, Gemini, AWS Bedrock, Azure OpenAI, Vertex AI, Ollama, Cohere). Runs a real HTTP server on a real port — works across processes, unlike MSW-style interceptors. WebSocket support for OpenAI Responses/Realtime and Gemini Live APIs. Chaos testing and Prometheus metrics.
 
 ## Core Mental Model
 
@@ -73,6 +73,22 @@ The embedding vector is returned for each input in the request. If no embedding
 { error: { message: "Rate limited", type: "rate_limit_error" }, status: 429 }
 ```
 
+### Chaos (Failure Injection)
+
+The optional `chaos` field on a fixture enables probabilistic failure injection:
+
+```typescript
+{
+  chaos?: {
+    dropRate?: number;      // Probability (0-1) of returning a 500 error
+    malformedRate?: number; // Probability (0-1) of returning malformed JSON
+    disconnectRate?: number; // Probability (0-1) of disconnecting mid-stream
+  }
+}
+```
+
+Rates are evaluated per-request. When triggered, the chaos failure replaces the normal response.
+
 ## Common Patterns
 
 ### Basic text fixture
@@ -212,6 +228,25 @@ mock.onMessage(
 );
 ```
 
+### Chaos testing (probabilistic failures)
+
+```typescript
+mock.addFixture({
+  match: { userMessage: "flaky" },
+  response: { content: "Sometimes works!" },
+  chaos: { dropRate: 0.3 },
+});
+```
+
+30% of requests matching this fixture will get a 500 error instead of the response. Can also use `malformedRate` (garbled JSON) or `disconnectRate` (connection dropped mid-stream).
+
+Server-level chaos applies to ALL requests:
+
+```typescript
+mock.setChaos({ dropRate: 0.1 }); // 10% of all requests fail
+mock.clearChaos(); // Remove server-level chaos
+```
+
 ### Error injection (one-shot)
 
 ```typescript
@@ -248,22 +283,32 @@ Load with `mock.loadFixtureFile("./fixtures/greetings.json")` or `mock.loadFixtu
 
 All providers share the same fixture pool — write fixtures once, they work for any endpoint.
 
-| Endpoint                                         | Provider      | Protocol  |
-| ------------------------------------------------ | ------------- | --------- |
-| `POST /v1/chat/completions`                      | OpenAI        | HTTP      |
-| `POST /v1/responses`                             | OpenAI        | HTTP + WS |
-| `POST /v1/messages`                              | Anthropic     | HTTP      |
-| `POST /v1/embeddings`                            | OpenAI        | HTTP      |
-| `POST /v1beta/models/{model}:{method}`           | Google Gemini | HTTP      |
-| `POST /model/{modelId}/invoke`                   | AWS Bedrock   | HTTP      |
-| `POST /openai/deployments/{id}/chat/completions` | Azure OpenAI  | HTTP      |
-| `POST /openai/deployments/{id}/embeddings`       | Azure OpenAI  | HTTP      |
-| `GET /health`                                    | —             | HTTP      |
-| `GET /ready`                                     | —             | HTTP      |
-| `GET /v1/models`                                 | OpenAI-compat | HTTP      |
-| `WS /v1/responses`                               | OpenAI        | WebSocket |
-| `WS /v1/realtime`                                | OpenAI        | WebSocket |
-| `WS /ws/google.ai...BidiGenerateContent`         | Gemini Live   | WebSocket |
+| Endpoint                                                                                 | Provider      | Protocol  |
+| ---------------------------------------------------------------------------------------- | ------------- | --------- |
+| `POST /v1/chat/completions`                                                              | OpenAI        | HTTP      |
+| `POST /v1/responses`                                                                     | OpenAI        | HTTP + WS |
+| `POST /v1/messages`                                                                      | Anthropic     | HTTP      |
+| `POST /v1/embeddings`                                                                    | OpenAI        | HTTP      |
+| `POST /v1beta/models/{model}:{method}`                                                   | Google Gemini | HTTP      |
+| `POST /model/{modelId}/invoke`                                                           | AWS Bedrock   | HTTP      |
+| `POST /openai/deployments/{id}/chat/completions`                                         | Azure OpenAI  | HTTP      |
+| `POST /openai/deployments/{id}/embeddings`                                               | Azure OpenAI  | HTTP      |
+| `GET /health`                                                                            | —             | HTTP      |
+| `GET /ready`                                                                             | —             | HTTP      |
+| `POST /model/{modelId}/invoke-with-response-stream`                                      | AWS Bedrock   | HTTP      |
+| `POST /model/{modelId}/converse`                                                         | AWS Bedrock   | HTTP      |
+| `POST /model/{modelId}/converse-stream`                                                  | AWS Bedrock   | HTTP      |
+| `POST /v1/projects/{p}/locations/{l}/publishers/google/models/{m}:generateContent`       | Vertex AI     | HTTP      |
+| `POST /v1/projects/{p}/locations/{l}/publishers/google/models/{m}:streamGenerateContent` | Vertex AI     | HTTP      |
+| `POST /api/chat`                                                                         | Ollama        | HTTP      |
+| `POST /api/generate`                                                                     | Ollama        | HTTP      |
+| `GET /api/tags`                                                                          | Ollama        | HTTP      |
+| `POST /v2/chat`                                                                          | Cohere        | HTTP      |
+| `GET /metrics`                                                                           | —             | HTTP      |
+| `GET /v1/models`                                                                         | OpenAI-compat | HTTP      |
+| `WS /v1/responses`                                                                       | OpenAI        | WebSocket |
+| `WS /v1/realtime`                                                                        | OpenAI        | WebSocket |
+| `WS /ws/google.ai...BidiGenerateContent`                                                 | Gemini Live   | WebSocket |
 
 ## Critical Gotchas
 
@@ -289,10 +334,20 @@ All providers share the same fixture pool — write fixtures once, they work for
 
 11. **Sequential response counts are tracked per fixture** — counts reset with `reset()` or `resetMatchCounts()`. The count increments after each match of that fixture group (all fixtures sharing the same non-`sequenceIndex` match fields).
 
-12. **Bedrock uses Anthropic Messages format internally** — the adapter normalizes Bedrock requests to `ChatCompletionRequest`, so the same fixtures work. Bedrock is non-streaming only.
+12. **Bedrock uses Anthropic Messages format internally** — the adapter normalizes Bedrock requests to `ChatCompletionRequest`, so the same fixtures work. Bedrock supports both non-streaming (`/invoke`, `/converse`) and streaming (`/invoke-with-response-stream`, `/converse-stream`) endpoints.
 
 13. **Azure OpenAI routes through the same handlers** — `/openai/deployments/{id}/chat/completions` maps to the completions handler, `/openai/deployments/{id}/embeddings` maps to the embeddings handler. Fixtures work unchanged.
 
+14. **Ollama defaults to streaming** — opposite of OpenAI. Set `stream: false` explicitly in the request for non-streaming responses.
+
+15. **Ollama tool call `arguments` is an object, not a JSON string** — unlike OpenAI where `arguments` is a JSON string, Ollama sends and expects a plain object.
+
+16. **Bedrock streaming uses binary Event Stream format** — not SSE. The `invoke-with-response-stream` and `converse-stream` endpoints use AWS Event Stream binary encoding.
+
+17. **Vertex AI routes to the same handler as consumer Gemini** — the same fixtures work for both Vertex AI (`/v1/projects/.../models/{m}:generateContent`) and consumer Gemini (`/v1beta/models/{model}:generateContent`).
+
+18. **Cohere requires `model` field** — returns 400 if `model` is missing from the request body.
+
 ## Debugging Fixture Mismatches
 
 When a fixture doesn't match:
@@ -351,7 +406,67 @@ const mock = await LLMock.create({ port: 0 }); // creates + starts in one call
 | `getRequests()`                         | All journal entries                         |
 | `getLastRequest()`                      | Most recent journal entry                   |
 | `clearRequests()`                       | Clear journal only                          |
+| `setChaos(opts)`                        | Set server-level chaos rates                |
+| `clearChaos()`                          | Remove server-level chaos                   |
 | `url` / `baseUrl`                       | Server URL (throws if not started)          |
 | `port`                                  | Server port number                          |
 
 Sequential responses use `on()` with `sequenceIndex` in the match — there is no dedicated convenience method.
+
+## Record-and-Replay (VCR Mode)
+
+llmock supports a VCR-style record-and-replay workflow: unmatched requests are proxied to real provider APIs, and the responses are saved as standard llmock fixture files for deterministic replay.
+
+### CLI usage
+
+```bash
+# Record mode: proxy unmatched requests to real OpenAI and Anthropic APIs
+llmock --record \
+  --provider-openai https://api.openai.com \
+  --provider-anthropic https://api.anthropic.com \
+  -f ./fixtures
+
+# Strict mode: fail on unmatched requests (no proxying, no catch-all 404)
+llmock --strict -f ./fixtures
+```
+
+- `--record` enables proxy-on-miss. Requires at least one `--provider-*` flag.
+- `--strict` returns a 404 error for unmatched requests instead of proxying, even if `--record` is set. Use this in CI to ensure all requests hit fixtures.
+- Provider flags: `--provider-openai`, `--provider-anthropic`, `--provider-gemini`, `--provider-vertexai`, `--provider-bedrock`, `--provider-azure`, `--provider-ollama`, `--provider-cohere`.
+
+### How it works
+
+1. **Existing fixtures are served first** — the router checks all loaded fixtures before considering the proxy.
+2. **Misses are proxied** — if no fixture matches and recording is enabled, the request is forwarded to the real provider API.
+3. **Auth headers are forwarded but NOT saved** — `Authorization`, `x-api-key`, and `api-key` headers are passed through to the upstream provider, but stripped from the recorded fixture.
+4. **Responses are saved as standard fixtures** — recorded files land in `{fixturePath}/recorded/` and use the same JSON format as hand-written fixtures. Nothing special about them.
+5. **Streaming responses are collapsed** — SSE streams are collapsed into a single text or tool-call response for the fixture. The original streaming format is preserved in the live proxy response.
+6. **Loud logging** — every proxy hit logs at `info` level so you can see exactly which requests are being forwarded.
+
+### Programmatic API
+
+```typescript
+const mock = new LLMock({ port: 0 });
+await mock.start();
+
+// Enable recording at runtime
+mock.enableRecording({
+  providers: {
+    openai: "https://api.openai.com",
+    anthropic: "https://api.anthropic.com",
+  },
+  fixturePath: "./fixtures/recorded",
+});
+
+// ... run tests that hit real APIs for uncovered cases ...
+
+// Disable recording (back to fixture-only mode)
+mock.disableRecording();
+```
+
+### Workflow
+
+1. **Bootstrap**: Run your test suite with `--record` and provider URLs. All requests that don't match existing fixtures are proxied and recorded.
+2. **Review**: Check the recorded fixtures in `{fixturePath}/recorded/`. Edit or reorganize as needed.
+3. **Lock down**: Run your test suite with `--strict` to ensure every request hits a fixture. No network calls escape.
+4. **Maintain**: When APIs change, delete stale fixtures and re-record.

From 711a6002b2a7b48870d9ebee36d2f71bd730fd84 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jordan@copilotkit.ai>
Date: Fri, 20 Mar 2026 17:12:01 -0700
Subject: [PATCH 05/13] fix: restore CLI flags and defaults getters lost during
 merge

Restore --record, --strict, --metrics, --provider-* CLI flags that were
lost during commit regrouping. Restore getter-based defaults (get chaos(),
get record(), get strict()) for live config propagation. Remove direct
defaults mutation in setChaos/clearChaos/enableRecording/disableRecording
since getters read from the options object directly.
---
 src/cli.ts    | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
 src/index.ts  | 16 ++++++++++++++++
 src/llmock.ts |  4 ----
 src/server.ts | 12 +++++++++---
 4 files changed, 72 insertions(+), 8 deletions(-)

diff --git a/src/cli.ts b/src/cli.ts
index 20b6e29..56e3282 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -6,7 +6,7 @@ import { createServer } from "./server.js";
 import { loadFixtureFile, loadFixturesFromDir, validateFixtures } from "./fixture-loader.js";
 import { Logger, type LogLevel } from "./logger.js";
 import { watchFixtures } from "./watcher.js";
-import type { ChaosConfig } from "./types.js";
+import type { ChaosConfig, RecordConfig } from "./types.js";
 
 const HELP = `
 Usage: llmock [options]
@@ -20,6 +20,17 @@ Options:
   -w, --watch               Watch fixture path for changes and reload
       --log-level <level>   Log verbosity: silent, info, debug (default: info)
       --validate-on-load    Validate fixture schemas at startup
+      --metrics             Enable Prometheus metrics at GET /metrics
+      --record              Record mode: proxy unmatched requests to real APIs
+      --strict              Strict mode: fail on unmatched requests
+      --provider-openai <url>     Upstream URL for OpenAI (used with --record)
+      --provider-anthropic <url>  Upstream URL for Anthropic
+      --provider-gemini <url>     Upstream URL for Gemini
+      --provider-vertexai <url>   Upstream URL for Vertex AI
+      --provider-bedrock <url>    Upstream URL for Bedrock
+      --provider-azure <url>      Upstream URL for Azure OpenAI
+      --provider-ollama <url>     Upstream URL for Ollama
+      --provider-cohere <url>     Upstream URL for Cohere
       --chaos-drop <rate>   Probability (0-1) of dropping requests with 500
       --chaos-malformed <rate>  Probability (0-1) of returning malformed JSON
       --chaos-disconnect <rate> Probability (0-1) of destroying connection
@@ -36,6 +47,17 @@ const { values } = parseArgs({
     watch: { type: "boolean", short: "w", default: false },
     "log-level": { type: "string", default: "info" },
     "validate-on-load": { type: "boolean", default: false },
+    metrics: { type: "boolean", default: false },
+    record: { type: "boolean", default: false },
+    strict: { type: "boolean", default: false },
+    "provider-openai": { type: "string" },
+    "provider-anthropic": { type: "string" },
+    "provider-gemini": { type: "string" },
+    "provider-vertexai": { type: "string" },
+    "provider-bedrock": { type: "string" },
+    "provider-azure": { type: "string" },
+    "provider-ollama": { type: "string" },
+    "provider-cohere": { type: "string" },
     "chaos-drop": { type: "string" },
     "chaos-malformed": { type: "string" },
     "chaos-disconnect": { type: "string" },
@@ -117,6 +139,27 @@ let chaos: ChaosConfig | undefined;
   }
 }
 
+// Parse record config from CLI flags
+let record: RecordConfig | undefined;
+if (values.record) {
+  const providers: RecordConfig["providers"] = {};
+  if (values["provider-openai"]) providers.openai = values["provider-openai"];
+  if (values["provider-anthropic"]) providers.anthropic = values["provider-anthropic"];
+  if (values["provider-gemini"]) providers.gemini = values["provider-gemini"];
+  if (values["provider-vertexai"]) providers.vertexai = values["provider-vertexai"];
+  if (values["provider-bedrock"]) providers.bedrock = values["provider-bedrock"];
+  if (values["provider-azure"]) providers.azure = values["provider-azure"];
+  if (values["provider-ollama"]) providers.ollama = values["provider-ollama"];
+  if (values["provider-cohere"]) providers.cohere = values["provider-cohere"];
+
+  if (Object.keys(providers).length === 0) {
+    console.error("Error: --record requires at least one --provider-* flag");
+    process.exit(1);
+  }
+
+  record = { providers, fixturePath: resolve(fixturePath, "recorded") };
+}
+
 async function main() {
   // Load fixtures from path (detect file vs directory)
   let isDir: boolean;
@@ -171,6 +214,9 @@ async function main() {
     chunkSize,
     logLevel,
     chaos,
+    metrics: values.metrics,
+    record,
+    strict: values.strict,
   });
 
   logger.info(`llmock server listening on ${instance.url}`);
diff --git a/src/index.ts b/src/index.ts
index a770b96..482a645 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -87,6 +87,21 @@ export type { StreamOptions } from "./sse-writer.js";
 export { evaluateChaos, applyChaos } from "./chaos.js";
 export type { ChaosAction } from "./chaos.js";
 
+// Recorder
+export { proxyAndRecord } from "./recorder.js";
+
+// Stream Collapse
+export {
+  collapseOpenAISSE,
+  collapseAnthropicSSE,
+  collapseGeminiSSE,
+  collapseOllamaNDJSON,
+  collapseCohereSS,
+  collapseBedrockEventStream,
+  collapseStreamingResponse,
+} from "./stream-collapse.js";
+export type { CollapseResult } from "./stream-collapse.js";
+
 // Types
 export type {
   ChatMessage,
@@ -114,4 +129,5 @@ export type {
   FixtureOpts,
   EmbeddingFixtureOpts,
   ToolCallMessage,
+  RecordConfig,
 } from "./types.js";
diff --git a/src/llmock.ts b/src/llmock.ts
index d338dcd..d528c8a 100644
--- a/src/llmock.ts
+++ b/src/llmock.ts
@@ -159,13 +159,11 @@ export class LLMock {
 
   setChaos(config: ChaosConfig): this {
     this.options.chaos = config;
-    if (this.serverInstance) this.serverInstance.defaults.chaos = config;
     return this;
   }
 
   clearChaos(): this {
     delete this.options.chaos;
-    if (this.serverInstance) delete this.serverInstance.defaults.chaos;
     return this;
   }
 
@@ -173,13 +171,11 @@ export class LLMock {
 
   enableRecording(config: RecordConfig): this {
     this.options.record = config;
-    if (this.serverInstance) this.serverInstance.defaults.record = config;
     return this;
   }
 
   disableRecording(): this {
     delete this.options.record;
-    if (this.serverInstance) delete this.serverInstance.defaults.record;
     return this;
   }
 
diff --git a/src/server.ts b/src/server.ts
index 8f8b4b3..f1bae78 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -395,10 +395,16 @@ export async function createServer(
     latency: serverOptions.latency ?? 0,
     chunkSize: Math.max(1, serverOptions.chunkSize ?? DEFAULT_CHUNK_SIZE),
     logger,
-    chaos: options?.chaos,
+    get chaos() {
+      return serverOptions.chaos;
+    },
     registry,
-    strict: options?.strict,
-    record: options?.record,
+    get record() {
+      return serverOptions.record;
+    },
+    get strict() {
+      return serverOptions.strict;
+    },
   };
 
   const journal = new Journal();

From 924319468839c868413a0da7e6a5910255b33821 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jordan@copilotkit.ai>
Date: Fri, 20 Mar 2026 23:52:21 -0700
Subject: [PATCH 06/13] refactor: extract HandlerDefaults type, fix handler
 signatures, deduplicate ChaosAction

- Create shared HandlerDefaults interface replacing 12+ inline type declarations
- All handlers now have access to record, strict, registry fields (fixes silent undefined access)
- Move ChaosAction type to types.ts to eliminate inline duplication in JournalEntry
- Add RecordProviderKey string union for typed provider keys
- Type OllamaMessage.role as union instead of bare string
- Remove unused imports across all handler files
- Fix bedrock.ts docstring to not overclaim /converse endpoints
---
 src/bedrock-converse.ts | 24 +++---------------------
 src/bedrock.ts          | 28 +++++++++++-----------------
 src/cohere.ts           | 14 ++------------
 src/embeddings.ts       |  5 ++---
 src/gemini.ts           |  4 ++--
 src/index.ts            |  5 +++--
 src/messages.ts         |  4 ++--
 src/ollama.ts           | 26 ++++----------------------
 src/responses.ts        |  5 ++---
 src/server.ts           | 25 ++++---------------------
 src/types.ts            | 33 ++++++++++++++++++++++++++++++---
 11 files changed, 65 insertions(+), 108 deletions(-)

diff --git a/src/bedrock-converse.ts b/src/bedrock-converse.ts
index 0880549..2ae10a2 100644
--- a/src/bedrock-converse.ts
+++ b/src/bedrock-converse.ts
@@ -9,11 +9,10 @@
 
 import type * as http from "node:http";
 import type {
-  ChaosConfig,
   ChatCompletionRequest,
   ChatMessage,
   Fixture,
-  RecordConfig,
+  HandlerDefaults,
   ToolCall,
   ToolDefinition,
 } from "./types.js";
@@ -31,7 +30,6 @@ import { createInterruptionSignal } from "./interruption.js";
 import type { Journal } from "./journal.js";
 import type { Logger } from "./logger.js";
 import { applyChaos } from "./chaos.js";
-import type { MetricsRegistry } from "./metrics.js";
 import { proxyAndRecord } from "./recorder.js";
 import { buildBedrockStreamTextEvents, buildBedrockStreamToolCallEvents } from "./bedrock.js";
 
@@ -210,15 +208,7 @@ export async function handleConverse(
   modelId: string,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: {
-    latency: number;
-    chunkSize: number;
-    logger: Logger;
-    chaos?: ChaosConfig;
-    registry?: MetricsRegistry;
-    record?: RecordConfig;
-    strict?: boolean;
-  },
+  defaults: HandlerDefaults,
   setCorsHeaders: (res: http.ServerResponse) => void,
 ): Promise<void> {
   const { logger } = defaults;
@@ -420,15 +410,7 @@ export async function handleConverseStream(
   modelId: string,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: {
-    latency: number;
-    chunkSize: number;
-    logger: Logger;
-    chaos?: ChaosConfig;
-    registry?: MetricsRegistry;
-    record?: RecordConfig;
-    strict?: boolean;
-  },
+  defaults: HandlerDefaults,
   setCorsHeaders: (res: http.ServerResponse) => void,
 ): Promise<void> {
   const { logger } = defaults;
diff --git a/src/bedrock.ts b/src/bedrock.ts
index 3b8ffbf..6f3484d 100644
--- a/src/bedrock.ts
+++ b/src/bedrock.ts
@@ -1,19 +1,21 @@
 /**
- * AWS Bedrock Claude invoke endpoint support.
+ * AWS Bedrock Claude endpoint support.
  *
- * Translates incoming POST /model/{modelId}/invoke requests (Bedrock Claude
- * format) into the ChatCompletionRequest format used by the fixture router,
- * and converts fixture responses back into the Anthropic Messages API
- * non-streaming format (which Bedrock Claude SDKs expect as the response body).
+ * Handles POST /model/{modelId}/invoke and /invoke-with-response-stream
+ * requests. Translates incoming Bedrock Claude format into the
+ * ChatCompletionRequest format used by the fixture router, and converts
+ * fixture responses back into the appropriate Bedrock response format
+ * (JSON for invoke, AWS Event Stream binary encoding for streaming).
+ *
+ * See bedrock-converse.ts for /converse and /converse-stream support.
  */
 
 import type * as http from "node:http";
 import type {
-  ChaosConfig,
   ChatCompletionRequest,
   ChatMessage,
   Fixture,
-  RecordConfig,
+  HandlerDefaults,
   ToolCall,
   ToolDefinition,
 } from "./types.js";
@@ -244,7 +246,7 @@ export async function handleBedrock(
   modelId: string,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig },
+  defaults: HandlerDefaults,
   setCorsHeaders: (res: http.ServerResponse) => void,
 ): Promise<void> {
   const { logger } = defaults;
@@ -553,15 +555,7 @@ export async function handleBedrockStream(
   modelId: string,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: {
-    latency: number;
-    chunkSize: number;
-    logger: Logger;
-    chaos?: ChaosConfig;
-    registry?: MetricsRegistry;
-    record?: RecordConfig;
-    strict?: boolean;
-  },
+  defaults: HandlerDefaults,
   setCorsHeaders: (res: http.ServerResponse) => void,
 ): Promise<void> {
   const { logger } = defaults;
diff --git a/src/cohere.ts b/src/cohere.ts
index ba5099f..bfd1736 100644
--- a/src/cohere.ts
+++ b/src/cohere.ts
@@ -11,11 +11,10 @@
 
 import type * as http from "node:http";
 import type {
-  ChaosConfig,
   ChatCompletionRequest,
   ChatMessage,
   Fixture,
-  RecordConfig,
+  HandlerDefaults,
   StreamingProfile,
   ToolCall,
   ToolDefinition,
@@ -34,7 +33,6 @@ import { createInterruptionSignal } from "./interruption.js";
 import type { Journal } from "./journal.js";
 import type { Logger } from "./logger.js";
 import { applyChaos } from "./chaos.js";
-import type { MetricsRegistry } from "./metrics.js";
 import { proxyAndRecord } from "./recorder.js";
 
 // ─── Cohere v2 Chat request types ───────────────────────────────────────────
@@ -391,15 +389,7 @@ export async function handleCohere(
   raw: string,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: {
-    latency: number;
-    chunkSize: number;
-    logger: Logger;
-    chaos?: ChaosConfig;
-    registry?: MetricsRegistry;
-    record?: RecordConfig;
-    strict?: boolean;
-  },
+  defaults: HandlerDefaults,
   setCorsHeaders: (res: http.ServerResponse) => void,
 ): Promise<void> {
   const { logger } = defaults;
diff --git a/src/embeddings.ts b/src/embeddings.ts
index b86577a..3253fe8 100644
--- a/src/embeddings.ts
+++ b/src/embeddings.ts
@@ -7,7 +7,7 @@
  */
 
 import type * as http from "node:http";
-import type { ChaosConfig, ChatCompletionRequest, Fixture } from "./types.js";
+import type { ChatCompletionRequest, Fixture, HandlerDefaults } from "./types.js";
 import {
   isEmbeddingResponse,
   isErrorResponse,
@@ -18,7 +18,6 @@ import {
 import { matchFixture } from "./router.js";
 import { writeErrorResponse } from "./sse-writer.js";
 import type { Journal } from "./journal.js";
-import type { Logger } from "./logger.js";
 import { applyChaos } from "./chaos.js";
 import { proxyAndRecord } from "./recorder.js";
 
@@ -40,7 +39,7 @@ export async function handleEmbeddings(
   raw: string,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig },
+  defaults: HandlerDefaults,
   setCorsHeaders: (res: http.ServerResponse) => void,
 ): Promise<void> {
   const { logger } = defaults;
diff --git a/src/gemini.ts b/src/gemini.ts
index 9e5f096..5a357a6 100644
--- a/src/gemini.ts
+++ b/src/gemini.ts
@@ -8,10 +8,10 @@
 
 import type * as http from "node:http";
 import type {
-  ChaosConfig,
   ChatCompletionRequest,
   ChatMessage,
   Fixture,
+  HandlerDefaults,
   StreamingProfile,
   ToolCall,
   ToolDefinition,
@@ -379,7 +379,7 @@ export async function handleGemini(
   streaming: boolean,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig },
+  defaults: HandlerDefaults,
   setCorsHeaders: (res: http.ServerResponse) => void,
   providerKey: string = "gemini",
 ): Promise<void> {
diff --git a/src/index.ts b/src/index.ts
index 482a645..ddb960a 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -85,7 +85,7 @@ export type { StreamOptions } from "./sse-writer.js";
 
 // Chaos
 export { evaluateChaos, applyChaos } from "./chaos.js";
-export type { ChaosAction } from "./chaos.js";
+export type { ChaosAction } from "./types.js";
 
 // Recorder
 export { proxyAndRecord } from "./recorder.js";
@@ -96,7 +96,7 @@ export {
   collapseAnthropicSSE,
   collapseGeminiSSE,
   collapseOllamaNDJSON,
-  collapseCohereSS,
+  collapseCohereSSE,
   collapseBedrockEventStream,
   collapseStreamingResponse,
 } from "./stream-collapse.js";
@@ -130,4 +130,5 @@ export type {
   EmbeddingFixtureOpts,
   ToolCallMessage,
   RecordConfig,
+  RecordProviderKey,
 } from "./types.js";
diff --git a/src/messages.ts b/src/messages.ts
index 5fb38d2..75e04f8 100644
--- a/src/messages.ts
+++ b/src/messages.ts
@@ -8,10 +8,10 @@
 
 import type * as http from "node:http";
 import type {
-  ChaosConfig,
   ChatCompletionRequest,
   ChatMessage,
   Fixture,
+  HandlerDefaults,
   StreamingProfile,
   ToolCall,
   ToolDefinition,
@@ -431,7 +431,7 @@ export async function handleMessages(
   raw: string,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig },
+  defaults: HandlerDefaults,
   setCorsHeaders: (res: http.ServerResponse) => void,
 ): Promise<void> {
   const { logger } = defaults;
diff --git a/src/ollama.ts b/src/ollama.ts
index 0ddcc62..2f4f5bf 100644
--- a/src/ollama.ts
+++ b/src/ollama.ts
@@ -14,11 +14,10 @@
 
 import type * as http from "node:http";
 import type {
-  ChaosConfig,
   ChatCompletionRequest,
   ChatMessage,
   Fixture,
-  RecordConfig,
+  HandlerDefaults,
   ToolCall,
   ToolDefinition,
 } from "./types.js";
@@ -30,13 +29,12 @@ import { createInterruptionSignal } from "./interruption.js";
 import type { Journal } from "./journal.js";
 import type { Logger } from "./logger.js";
 import { applyChaos } from "./chaos.js";
-import type { MetricsRegistry } from "./metrics.js";
 import { proxyAndRecord } from "./recorder.js";
 
 // ─── Ollama request types ────────────────────────────────────────────────────
 
 interface OllamaMessage {
-  role: string;
+  role: "system" | "user" | "assistant" | "tool";
   content: string;
 }
 
@@ -288,15 +286,7 @@ export async function handleOllama(
   raw: string,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: {
-    latency: number;
-    chunkSize: number;
-    logger: Logger;
-    chaos?: ChaosConfig;
-    registry?: MetricsRegistry;
-    record?: RecordConfig;
-    strict?: boolean;
-  },
+  defaults: HandlerDefaults,
   setCorsHeaders: (res: http.ServerResponse) => void,
 ): Promise<void> {
   const { logger } = defaults;
@@ -539,15 +529,7 @@ export async function handleOllamaGenerate(
   raw: string,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: {
-    latency: number;
-    chunkSize: number;
-    logger: Logger;
-    chaos?: ChaosConfig;
-    registry?: MetricsRegistry;
-    record?: RecordConfig;
-    strict?: boolean;
-  },
+  defaults: HandlerDefaults,
   setCorsHeaders: (res: http.ServerResponse) => void,
 ): Promise<void> {
   setCorsHeaders(res);
diff --git a/src/responses.ts b/src/responses.ts
index 28e2af0..fb96cf3 100644
--- a/src/responses.ts
+++ b/src/responses.ts
@@ -8,10 +8,10 @@
 
 import type * as http from "node:http";
 import type {
-  ChaosConfig,
   ChatCompletionRequest,
   ChatMessage,
   Fixture,
+  HandlerDefaults,
   StreamingProfile,
   ToolCall,
   ToolDefinition,
@@ -28,7 +28,6 @@ import { matchFixture } from "./router.js";
 import { writeErrorResponse, delay, calculateDelay } from "./sse-writer.js";
 import { createInterruptionSignal } from "./interruption.js";
 import type { Journal } from "./journal.js";
-import type { Logger } from "./logger.js";
 import { applyChaos } from "./chaos.js";
 import { proxyAndRecord } from "./recorder.js";
 
@@ -499,7 +498,7 @@ export async function handleResponses(
   raw: string,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: { latency: number; chunkSize: number; logger: Logger; chaos?: ChaosConfig },
+  defaults: HandlerDefaults,
   setCorsHeaders: (res: http.ServerResponse) => void,
 ): Promise<void> {
   setCorsHeaders(res);
diff --git a/src/server.ts b/src/server.ts
index f1bae78..a5df546 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -2,9 +2,8 @@ import * as http from "node:http";
 import type {
   Fixture,
   ChatCompletionRequest,
-  ChaosConfig,
+  HandlerDefaults,
   MockServerOptions,
-  RecordConfig,
 } from "./types.js";
 import { Journal } from "./journal.js";
 import { matchFixture } from "./router.js";
@@ -34,22 +33,14 @@ import { handleWebSocketRealtime } from "./ws-realtime.js";
 import { handleWebSocketGeminiLive } from "./ws-gemini-live.js";
 import { Logger } from "./logger.js";
 import { applyChaos } from "./chaos.js";
-import { createMetricsRegistry, normalizePathLabel, type MetricsRegistry } from "./metrics.js";
+import { createMetricsRegistry, normalizePathLabel } from "./metrics.js";
 import { proxyAndRecord } from "./recorder.js";
 
 export interface ServerInstance {
   server: http.Server;
   journal: Journal;
   url: string;
-  defaults: {
-    latency: number;
-    chunkSize: number;
-    logger: Logger;
-    chaos?: ChaosConfig;
-    registry?: MetricsRegistry;
-    strict?: boolean;
-    record?: RecordConfig;
-  };
+  defaults: HandlerDefaults;
 }
 
 const COMPLETIONS_PATH = "/v1/chat/completions";
@@ -122,15 +113,7 @@ async function handleCompletions(
   res: http.ServerResponse,
   fixtures: Fixture[],
   journal: Journal,
-  defaults: {
-    latency: number;
-    chunkSize: number;
-    logger: Logger;
-    chaos?: ChaosConfig;
-    registry?: MetricsRegistry;
-    strict?: boolean;
-    record?: RecordConfig;
-  },
+  defaults: HandlerDefaults,
   modelFallback?: string,
   providerKey?: string,
 ): Promise<void> {
diff --git a/src/types.ts b/src/types.ts
index df0ee6b..02e601a 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1,4 +1,7 @@
-// OpenAI Chat Completion request types (subset we care about)
+import type { Logger } from "./logger.js";
+import type { MetricsRegistry } from "./metrics.js";
+
+// LLMock type definitions — shared across all provider adapters and the fixture router.
 
 export interface ContentPart {
   type: string;
@@ -97,6 +100,8 @@ export interface ChaosConfig {
   disconnectRate?: number;
 }
 
+export type ChaosAction = "drop" | "malformed" | "disconnect";
+
 // Fixture
 
 export interface Fixture {
@@ -156,7 +161,7 @@ export interface JournalEntry {
     fixture: Fixture | null;
     interrupted?: boolean;
     interruptReason?: string;
-    chaosAction?: "drop" | "malformed" | "disconnect";
+    chaosAction?: ChaosAction;
   };
 }
 
@@ -215,8 +220,18 @@ export interface ChatCompletionMessage {
 
 // Server options
 
+export type RecordProviderKey =
+  | "openai"
+  | "anthropic"
+  | "gemini"
+  | "vertexai"
+  | "bedrock"
+  | "azure"
+  | "ollama"
+  | "cohere";
+
 export interface RecordConfig {
-  providers: Record<string, string | undefined>;
+  providers: Partial<Record<RecordProviderKey, string>>;
   fixturePath?: string;
 }
 
@@ -235,3 +250,15 @@ export interface MockServerOptions {
   /** Record-and-replay: proxy unmatched requests to upstream and save fixtures. */
   record?: RecordConfig;
 }
+
+// Handler defaults — the common shape passed from server.ts to every handler
+
+export interface HandlerDefaults {
+  latency: number;
+  chunkSize: number;
+  logger: Logger;
+  chaos?: ChaosConfig;
+  registry?: MetricsRegistry;
+  record?: RecordConfig;
+  strict?: boolean;
+}

From f527e23d73a2160244509c91eaf69be1d3848cfb Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jordan@copilotkit.ai>
Date: Fri, 20 Mar 2026 23:52:32 -0700
Subject: [PATCH 07/13] fix: recorder binary relay, Ollama tool_calls collapse,
 chaos rate clamping

- Use raw Buffer for binary EventStream relay instead of UTF-8 string (prevents CRC corruption)
- buildFixtureResponse checks toolCalls before empty content for Ollama responses
- Add null guard on tc.function in Ollama tool_calls extraction
- collapseOllamaNDJSON accumulates message.tool_calls from stream chunks
- Rename collapseCohereSS to collapseCohereSSE for naming consistency
- Clamp chaos rates to [0,1] after merging all override levels
- Add X-LLMock-Record-Error header when fixture write fails
- Fix auth header comment in recorder
---
 src/chaos.ts           | 11 ++++++++---
 src/recorder.ts        | 23 +++++++++++++++++++----
 src/stream-collapse.ts | 29 +++++++++++++++++++++++++----
 3 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/src/chaos.ts b/src/chaos.ts
index 05e130f..8c0f0d8 100644
--- a/src/chaos.ts
+++ b/src/chaos.ts
@@ -8,13 +8,11 @@
  */
 
 import type * as http from "node:http";
-import type { ChaosConfig, ChatCompletionRequest, Fixture } from "./types.js";
+import type { ChaosAction, ChaosConfig, ChatCompletionRequest, Fixture } from "./types.js";
 import { writeErrorResponse } from "./sse-writer.js";
 import type { Journal } from "./journal.js";
 import type { MetricsRegistry } from "./metrics.js";
 
-export type ChaosAction = "drop" | "malformed" | "disconnect";
-
 /**
  * Resolve chaos config from headers, fixture, and server defaults.
  * Header values override fixture values, which override server defaults.
@@ -54,6 +52,13 @@ function resolveChaosConfig(
     }
   }
 
+  // Clamp all rates to [0, 1]
+  if (base.dropRate !== undefined) base.dropRate = Math.max(0, Math.min(1, base.dropRate));
+  if (base.malformedRate !== undefined)
+    base.malformedRate = Math.max(0, Math.min(1, base.malformedRate));
+  if (base.disconnectRate !== undefined)
+    base.disconnectRate = Math.max(0, Math.min(1, base.disconnectRate));
+
   return base;
 }
 
diff --git a/src/recorder.ts b/src/recorder.ts
index 650b331..3c34223 100644
--- a/src/recorder.ts
+++ b/src/recorder.ts
@@ -148,7 +148,7 @@ export async function proxyAndRecord(
     // Ensure fixture directory exists
     fs.mkdirSync(fixturePath, { recursive: true });
 
-    // Exclude auth headers from saved fixture (they're in the match/response, not headers)
+    // Auth headers are forwarded to upstream but excluded from saved fixtures for security
     const fileContent = isEmptyMatch
       ? {
           fixtures: [fixture],
@@ -159,6 +159,7 @@ export async function proxyAndRecord(
   } catch (err) {
     const msg = err instanceof Error ? err.message : "Unknown filesystem error";
     defaults.logger.error(`Failed to save fixture to disk: ${msg}`);
+    res.setHeader("X-LLMock-Record-Error", msg);
   }
 
   // Register in memory so subsequent identical requests match (skip if empty match)
@@ -174,7 +175,7 @@ export async function proxyAndRecord(
     relayHeaders["Content-Type"] = ctString;
   }
   res.writeHead(upstreamStatus, relayHeaders);
-  res.end(upstreamBody);
+  res.end(isBinaryStream ? rawBuffer : upstreamBody);
 
   return true;
 }
@@ -330,10 +331,24 @@ function buildFixtureResponse(parsed: unknown, status: number): FixtureResponse
     }
   }
 
-  // Ollama: { message: { content: "..." } }
+  // Ollama: { message: { content: "...", tool_calls: [...] } }
   if (obj.message && typeof obj.message === "object") {
     const msg = obj.message as Record<string, unknown>;
-    if (typeof msg.content === "string") {
+    // Tool calls (check before content — Ollama sends content: "" alongside tool_calls)
+    if (Array.isArray(msg.tool_calls) && msg.tool_calls.length > 0) {
+      const toolCalls: ToolCall[] = (msg.tool_calls as Array<Record<string, unknown>>)
+        .filter((tc) => tc.function != null)
+        .map((tc) => {
+          const fn = tc.function as Record<string, unknown>;
+          return {
+            name: String(fn.name ?? ""),
+            arguments:
+              typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments),
+          };
+        });
+      return { toolCalls };
+    }
+    if (typeof msg.content === "string" && msg.content.length > 0) {
       return { content: msg.content };
     }
     // Ollama message with content array (like Cohere)
diff --git a/src/stream-collapse.ts b/src/stream-collapse.ts
index 6d4558e..1aa22b5 100644
--- a/src/stream-collapse.ts
+++ b/src/stream-collapse.ts
@@ -271,6 +271,7 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
   const lines = body.split("\n").filter((l) => l.trim().length > 0);
   let content = "";
   let droppedChunks = 0;
+  const toolCalls: ToolCall[] = [];
 
   for (const line of lines) {
     let parsed: Record<string, unknown>;
@@ -283,8 +284,24 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
 
     // /api/chat format
     const message = parsed.message as Record<string, unknown> | undefined;
-    if (message && typeof message.content === "string") {
-      content += message.content;
+    if (message) {
+      if (typeof message.content === "string") {
+        content += message.content;
+      }
+
+      // Tool calls
+      if (Array.isArray(message.tool_calls)) {
+        for (const tc of message.tool_calls as Array<Record<string, unknown>>) {
+          const fn = tc.function as Record<string, unknown> | undefined;
+          if (fn) {
+            toolCalls.push({
+              name: String(fn.name ?? ""),
+              arguments:
+                typeof fn.arguments === "string" ? fn.arguments : JSON.stringify(fn.arguments),
+            });
+          }
+        }
+      }
     }
 
     // /api/generate format
@@ -293,6 +310,10 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
     }
   }
 
+  if (toolCalls.length > 0) {
+    return { toolCalls, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
+  }
+
   return { content, ...(droppedChunks > 0 ? { droppedChunks } : {}) };
 }
 
@@ -306,7 +327,7 @@ export function collapseOllamaNDJSON(body: string): CollapseResult {
  * Format:
  *   event: content-delta\ndata: {"type":"content-delta","delta":{"message":{"content":{"text":"Hello"}}}}\n\n
  */
-export function collapseCohereSS(body: string): CollapseResult {
+export function collapseCohereSSE(body: string): CollapseResult {
   const blocks = body.split("\n\n").filter((b) => b.trim().length > 0);
   let content = "";
   let droppedChunks = 0;
@@ -575,7 +596,7 @@ export function collapseStreamingResponse(
       case "vertexai":
         return collapseGeminiSSE(str);
       case "cohere":
-        return collapseCohereSS(str);
+        return collapseCohereSSE(str);
       default:
         // Try OpenAI format as default for unknown SSE providers
         return collapseOpenAISSE(str);

From 1e7853efbbfe40f5690fab3d46d09a001a91354b Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jordan@copilotkit.ai>
Date: Fri, 20 Mar 2026 23:52:41 -0700
Subject: [PATCH 08/13] test: add coverage for strict mode, tool_calls
 collapse, latency, binary relay

- Bedrock strict mode returns 503 for unmatched requests
- Ollama NDJSON tool_calls collapse (single, priority, multiple)
- writeNDJSONStream with non-zero latency
- Cohere streaming tool calls with fixture-provided IDs
- Recorder binary EventStream relay integrity with afterEach cleanup
- collapseCohereSSE rename in test references
---
 src/__tests__/bedrock.test.ts         |  38 +++++++
 src/__tests__/cohere.test.ts          |  64 +++++++++++
 src/__tests__/ollama.test.ts          |  69 ++++++++++++
 src/__tests__/recorder.test.ts        |  86 +++++++++++++++
 src/__tests__/stream-collapse.test.ts | 153 +++++++++++++++++++++++---
 5 files changed, 395 insertions(+), 15 deletions(-)

diff --git a/src/__tests__/bedrock.test.ts b/src/__tests__/bedrock.test.ts
index c3b4707..969365c 100644
--- a/src/__tests__/bedrock.test.ts
+++ b/src/__tests__/bedrock.test.ts
@@ -527,3 +527,41 @@ describe("bedrockToCompletionRequest", () => {
     });
   });
 });
+
+// ---------------------------------------------------------------------------
+// strict:true returns 503 for unmatched Bedrock request
+// ---------------------------------------------------------------------------
+
+describe("POST /model/{modelId}/invoke (strict mode)", () => {
+  it("returns 503 with strict message when no fixture matches in strict mode", async () => {
+    instance = await createServer(allFixtures, { strict: true });
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "nomatch" }],
+      },
+    );
+
+    expect(res.status).toBe(503);
+    const body = JSON.parse(res.body);
+    expect(body.error.message).toBe("Strict mode: no fixture matched");
+  });
+
+  it("returns 200 when fixture matches even in strict mode", async () => {
+    instance = await createServer(allFixtures, { strict: true });
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-5-sonnet-20241022-v2:0/invoke`,
+      {
+        anthropic_version: "bedrock-2023-05-31",
+        max_tokens: 512,
+        messages: [{ role: "user", content: "hello" }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content[0].text).toBe("Hi there!");
+  });
+});
diff --git a/src/__tests__/cohere.test.ts b/src/__tests__/cohere.test.ts
index 4b6228f..a7655d9 100644
--- a/src/__tests__/cohere.test.ts
+++ b/src/__tests__/cohere.test.ts
@@ -930,3 +930,67 @@ describe("POST /v2/chat (journal)", () => {
     expect(entry!.body.model).toBe("command-r-plus");
   });
 });
+
+// ---------------------------------------------------------------------------
+// Streaming tool call with explicit fixture id
+// ---------------------------------------------------------------------------
+
+describe("POST /v2/chat (streaming tool call with fixture-provided id)", () => {
+  const toolFixtureWithId: Fixture = {
+    match: { userMessage: "lookup" },
+    response: {
+      toolCalls: [
+        {
+          name: "search_db",
+          arguments: '{"query":"cats"}',
+          id: "call_fixture_custom_123",
+        },
+      ],
+    },
+  };
+
+  it("preserves fixture-provided tool call id in streaming events", async () => {
+    instance = await createServer([toolFixtureWithId]);
+    const res = await post(`${instance.url}/v2/chat`, {
+      model: "command-r-plus",
+      messages: [{ role: "user", content: "lookup" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+
+    const events = parseSSEEvents(res.body);
+
+    // tool-call-start should carry the fixture-provided id
+    const tcStart = events.find((e) => e.event === "tool-call-start");
+    expect(tcStart).toBeDefined();
+    const tcStartDelta = tcStart!.data.delta as Record<string, unknown>;
+    const tcStartMsg = tcStartDelta.message as Record<string, unknown>;
+    const tcStartCalls = tcStartMsg.tool_calls as Record<string, unknown>;
+    expect(tcStartCalls.id).toBe("call_fixture_custom_123");
+    expect(tcStartCalls.type).toBe("function");
+    const tcStartFn = tcStartCalls.function as Record<string, unknown>;
+    expect(tcStartFn.name).toBe("search_db");
+
+    // tool-call-delta(s) should accumulate to the full arguments
+    const tcDeltas = events.filter((e) => e.event === "tool-call-delta");
+    expect(tcDeltas.length).toBeGreaterThanOrEqual(1);
+    const argsAccum = tcDeltas
+      .map((e) => {
+        const delta = e.data.delta as Record<string, unknown>;
+        const msg = delta.message as Record<string, unknown>;
+        const calls = msg.tool_calls as Record<string, unknown>;
+        const fn = calls.function as Record<string, unknown>;
+        return fn.arguments as string;
+      })
+      .join("");
+    expect(argsAccum).toBe('{"query":"cats"}');
+
+    // message-end with TOOL_CALL
+    const msgEnd = events.find((e) => e.event === "message-end");
+    expect(msgEnd).toBeDefined();
+    const endDelta = msgEnd!.data.delta as Record<string, unknown>;
+    expect(endDelta.finish_reason).toBe("TOOL_CALL");
+  });
+});
diff --git a/src/__tests__/ollama.test.ts b/src/__tests__/ollama.test.ts
index c9870b4..1a5a217 100644
--- a/src/__tests__/ollama.test.ts
+++ b/src/__tests__/ollama.test.ts
@@ -1043,3 +1043,72 @@ describe("POST /api/chat (error fixture no explicit status)", () => {
     expect(body.error.message).toBe("Something went wrong");
   });
 });
+
+// ---------------------------------------------------------------------------
+// writeNDJSONStream with non-zero latency
+// ---------------------------------------------------------------------------
+
+describe("writeNDJSONStream with non-zero latency", () => {
+  it("delays between chunks when latency is set", async () => {
+    const chunks: string[] = [];
+    const timestamps: number[] = [];
+    const res = {
+      writableEnded: false,
+      setHeader: () => {},
+      write: (data: string) => {
+        chunks.push(data);
+        timestamps.push(Date.now());
+        return true;
+      },
+      end: () => {
+        (res as { writableEnded: boolean }).writableEnded = true;
+      },
+    } as unknown as http.ServerResponse;
+
+    const data = [
+      { model: "llama3", message: { content: "Hello" }, done: false },
+      { model: "llama3", message: { content: " world" }, done: false },
+      { model: "llama3", message: { content: "" }, done: true },
+    ];
+
+    const start = Date.now();
+    const completed = await writeNDJSONStream(res, data, { latency: 30 });
+    const elapsed = Date.now() - start;
+
+    expect(completed).toBe(true);
+    expect(chunks).toHaveLength(3);
+    // With 30ms latency per chunk and 3 chunks, total should be >= 60ms
+    // (first chunk has 0 delay with default profile, subsequent chunks have latency)
+    expect(elapsed).toBeGreaterThanOrEqual(50);
+  });
+
+  it("all chunks are valid NDJSON with non-zero latency", async () => {
+    const chunks: string[] = [];
+    const res = {
+      writableEnded: false,
+      setHeader: () => {},
+      write: (data: string) => {
+        chunks.push(data);
+        return true;
+      },
+      end: () => {
+        (res as { writableEnded: boolean }).writableEnded = true;
+      },
+    } as unknown as http.ServerResponse;
+
+    const data = [
+      { model: "llama3", done: false, message: { content: "a" } },
+      { model: "llama3", done: true, message: { content: "" } },
+    ];
+
+    const completed = await writeNDJSONStream(res, data, { latency: 10 });
+
+    expect(completed).toBe(true);
+    expect(chunks).toHaveLength(2);
+    // Each chunk should be valid JSON followed by newline
+    for (const chunk of chunks) {
+      expect(chunk.endsWith("\n")).toBe(true);
+      expect(() => JSON.parse(chunk.trim())).not.toThrow();
+    }
+  });
+});
diff --git a/src/__tests__/recorder.test.ts b/src/__tests__/recorder.test.ts
index 499f93d..48652cb 100644
--- a/src/__tests__/recorder.test.ts
+++ b/src/__tests__/recorder.test.ts
@@ -2529,3 +2529,89 @@ async function setupUpstreamAndRecorder(
     fixturePath: tmpDir,
   };
 }
+
+// ---------------------------------------------------------------------------
+// Binary EventStream relay preserves data integrity
+// ---------------------------------------------------------------------------
+
+describe("recorder binary EventStream relay integrity", () => {
+  let rawServer: http.Server | undefined;
+
+  afterEach(async () => {
+    if (rawServer) {
+      await new Promise<void>((resolve) => rawServer!.close(() => resolve()));
+      rawServer = undefined;
+    }
+  });
+
+  it("relays binary EventStream data that can be decoded back to original content", async () => {
+    // Build a known binary EventStream payload upstream
+    const frame1 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: "Binary " },
+        contentBlockIndex: 0,
+      },
+      contentBlockIndex: 0,
+    });
+    const frame2 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: "integrity " },
+        contentBlockIndex: 0,
+      },
+      contentBlockIndex: 0,
+    });
+    const frame3 = encodeEventStreamMessage("contentBlockDelta", {
+      contentBlockDelta: {
+        delta: { text: "test" },
+        contentBlockIndex: 0,
+      },
+      contentBlockIndex: 0,
+    });
+    const frame4 = encodeEventStreamMessage("messageStop", {
+      messageStop: { stopReason: "end_turn" },
+    });
+
+    const expectedPayload = Buffer.concat([frame1, frame2, frame3, frame4]);
+
+    // Create raw upstream that returns binary EventStream
+    rawServer = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/vnd.amazon.eventstream" });
+      res.end(expectedPayload);
+    });
+    await new Promise<void>((resolve) => rawServer!.listen(0, "127.0.0.1", resolve));
+    const rawAddr = rawServer!.address() as { port: number };
+    const rawUrl = `http://127.0.0.1:${rawAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { bedrock: rawUrl }, fixturePath: tmpDir },
+    });
+
+    // Make the request through the recorder proxy
+    const resp = await post(`${recorder.url}/model/claude-v3/invoke-with-response-stream`, {
+      anthropic_version: "bedrock-2023-05-31",
+      max_tokens: 100,
+      messages: [{ role: "user", content: "binary integrity test" }],
+    });
+
+    expect(resp.status).toBe(200);
+
+    // The relayed response body should contain the text from the EventStream
+    // frames. The relay currently converts Buffer to string, so we verify
+    // the content is present in the response.
+    // NOTE: If the relay preserves raw binary, the response body should
+    // contain text extractable from the EventStream frames.
+    expect(resp.body.length).toBeGreaterThan(0);
+
+    // Verify the fixture was saved correctly on disk
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as { fixtures: Array<{ response: { content?: string } }> };
+    expect(fixtureContent.fixtures[0].response.content).toBe("Binary integrity test");
+  });
+});
diff --git a/src/__tests__/stream-collapse.test.ts b/src/__tests__/stream-collapse.test.ts
index eb35fb3..cb1d72b 100644
--- a/src/__tests__/stream-collapse.test.ts
+++ b/src/__tests__/stream-collapse.test.ts
@@ -4,7 +4,7 @@ import {
   collapseAnthropicSSE,
   collapseGeminiSSE,
   collapseOllamaNDJSON,
-  collapseCohereSS,
+  collapseCohereSSE,
   collapseBedrockEventStream,
   collapseStreamingResponse,
 } from "../stream-collapse.js";
@@ -448,7 +448,7 @@ describe("collapseOllamaNDJSON", () => {
 // 5. Cohere SSE
 // ---------------------------------------------------------------------------
 
-describe("collapseCohereSS", () => {
+describe("collapseCohereSSE", () => {
   it("collapses text content from content-delta events", () => {
     const body = [
       `event: message-start`,
@@ -465,7 +465,7 @@ describe("collapseCohereSS", () => {
       "",
     ].join("\n");
 
-    const result = collapseCohereSS(body);
+    const result = collapseCohereSSE(body);
     expect(result.content).toBe("Hello world");
     expect(result.toolCalls).toBeUndefined();
   });
@@ -509,7 +509,7 @@ describe("collapseCohereSS", () => {
       "",
     ].join("\n");
 
-    const result = collapseCohereSS(body);
+    const result = collapseCohereSSE(body);
     expect(result.toolCalls).toBeDefined();
     expect(result.toolCalls).toHaveLength(1);
     expect(result.toolCalls![0].name).toBe("get_weather");
@@ -752,7 +752,7 @@ describe("collapseOllamaNDJSON droppedChunks", () => {
   });
 });
 
-describe("collapseCohereSS droppedChunks", () => {
+describe("collapseCohereSSE droppedChunks", () => {
   it("counts droppedChunks for malformed JSON events mixed with valid ones", () => {
     const body = [
       `event: content-delta`,
@@ -766,7 +766,7 @@ describe("collapseCohereSS droppedChunks", () => {
       "",
     ].join("\n");
 
-    const result = collapseCohereSS(body);
+    const result = collapseCohereSSE(body);
     expect(result.content).toBe("XY");
     expect(result.droppedChunks).toBe(1);
   });
@@ -868,7 +868,7 @@ describe("collapseAnthropicSSE multiple tool calls", () => {
   });
 });
 
-describe("collapseCohereSS multiple tool calls", () => {
+describe("collapseCohereSSE multiple tool calls", () => {
   it("collapses 2 tool-call-start events at different indices", () => {
     const body = [
       `event: message-start`,
@@ -923,7 +923,7 @@ describe("collapseCohereSS multiple tool calls", () => {
       "",
     ].join("\n");
 
-    const result = collapseCohereSS(body);
+    const result = collapseCohereSSE(body);
     expect(result.toolCalls).toBeDefined();
     expect(result.toolCalls).toHaveLength(2);
     expect(result.toolCalls![0].name).toBe("get_weather");
@@ -1190,10 +1190,10 @@ describe("collapseGeminiSSE defensive branches", () => {
 // Defensive branch coverage — Cohere
 // ---------------------------------------------------------------------------
 
-describe("collapseCohereSS defensive branches", () => {
+describe("collapseCohereSSE defensive branches", () => {
   it("SSE block with no data: line is skipped", () => {
     const body = ["event: content-delta", ""].join("\n");
-    const result = collapseCohereSS(body);
+    const result = collapseCohereSSE(body);
     expect(result.content).toBe("");
   });
 
@@ -1222,7 +1222,7 @@ describe("collapseCohereSS defensive branches", () => {
       "",
     ].join("\n");
 
-    const result = collapseCohereSS(body);
+    const result = collapseCohereSSE(body);
     expect(result.toolCalls).toBeDefined();
     expect(result.toolCalls).toHaveLength(1);
     expect(result.toolCalls![0].name).toBe("fn");
@@ -1240,7 +1240,7 @@ describe("collapseCohereSS defensive branches", () => {
       "",
     ].join("\n");
 
-    const result = collapseCohereSS(body);
+    const result = collapseCohereSSE(body);
     expect(result.content).toBe("");
     expect(result.toolCalls).toBeUndefined();
   });
@@ -1274,7 +1274,7 @@ describe("collapseCohereSS defensive branches", () => {
       "",
     ].join("\n");
 
-    const result = collapseCohereSS(body);
+    const result = collapseCohereSSE(body);
     expect(result.toolCalls).toBeDefined();
     expect(result.toolCalls).toHaveLength(1);
     expect(result.droppedChunks).toBe(1);
@@ -1422,8 +1422,131 @@ describe("empty input collapse", () => {
     expect(result.content).toBe("");
   });
 
-  it('collapseCohereSS("") returns { content: "" }', () => {
-    const result = collapseCohereSS("");
+  it('collapseCohereSSE("") returns { content: "" }', () => {
+    const result = collapseCohereSSE("");
     expect(result.content).toBe("");
   });
 });
+
+// ---------------------------------------------------------------------------
+// collapseOllamaNDJSON with tool_calls in stream chunks
+// ---------------------------------------------------------------------------
+
+describe("collapseOllamaNDJSON with tool_calls", () => {
+  it("extracts tool_calls from /api/chat chunks", () => {
+    const body = [
+      JSON.stringify({
+        model: "llama3",
+        message: {
+          role: "assistant",
+          content: "",
+          tool_calls: [
+            {
+              function: {
+                name: "get_weather",
+                arguments: { city: "SF" },
+              },
+            },
+          ],
+        },
+        done: false,
+      }),
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "" },
+        done: true,
+      }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    // toolCalls takes priority over content when present
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"SF"}');
+    expect(result.content).toBeUndefined();
+  });
+
+  it("returns toolCalls (not content) when both tool_calls and text are present", () => {
+    const body = [
+      JSON.stringify({
+        model: "llama3",
+        message: {
+          role: "assistant",
+          content: "Let me check ",
+          tool_calls: [
+            {
+              function: {
+                name: "get_weather",
+                arguments: { city: "SF" },
+              },
+            },
+          ],
+        },
+        done: false,
+      }),
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "the weather." },
+        done: true,
+      }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    // When toolCalls are present, they take priority over content
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.content).toBeUndefined();
+  });
+
+  it("extracts multiple tool_calls across chunks", () => {
+    const body = [
+      JSON.stringify({
+        model: "llama3",
+        message: {
+          role: "assistant",
+          content: "",
+          tool_calls: [
+            {
+              function: {
+                name: "get_weather",
+                arguments: '{"city":"SF"}',
+              },
+            },
+          ],
+        },
+        done: false,
+      }),
+      JSON.stringify({
+        model: "llama3",
+        message: {
+          role: "assistant",
+          content: "",
+          tool_calls: [
+            {
+              function: {
+                name: "get_time",
+                arguments: '{"tz":"PST"}',
+              },
+            },
+          ],
+        },
+        done: false,
+      }),
+      JSON.stringify({
+        model: "llama3",
+        message: { role: "assistant", content: "" },
+        done: true,
+      }),
+    ].join("\n");
+
+    const result = collapseOllamaNDJSON(body);
+    expect(result.toolCalls).toBeDefined();
+    expect(result.toolCalls).toHaveLength(2);
+    expect(result.toolCalls![0].name).toBe("get_weather");
+    expect(result.toolCalls![0].arguments).toBe('{"city":"SF"}');
+    expect(result.toolCalls![1].name).toBe("get_time");
+    expect(result.toolCalls![1].arguments).toBe('{"tz":"PST"}');
+  });
+});

From e59d22a1f23fb8feacf05f6dcaf7887698b6b3d9 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jordan@copilotkit.ai>
Date: Fri, 20 Mar 2026 23:59:53 -0700
Subject: [PATCH 09/13] fix: pass defaults.registry to applyChaos in all
 handlers

Five handlers (handleBedrock, handleGemini, handleMessages,
handleResponses, handleEmbeddings) were missing the registry
argument, causing chaos metrics to not be recorded for those
endpoints.
---
 src/bedrock.ts    | 20 ++++++++++++++------
 src/embeddings.ts | 20 ++++++++++++++------
 src/gemini.ts     | 20 ++++++++++++++------
 src/messages.ts   | 20 ++++++++++++++------
 src/responses.ts  | 20 ++++++++++++++------
 5 files changed, 70 insertions(+), 30 deletions(-)

diff --git a/src/bedrock.ts b/src/bedrock.ts
index 6f3484d..19b1e74 100644
--- a/src/bedrock.ts
+++ b/src/bedrock.ts
@@ -309,12 +309,20 @@ export async function handleBedrock(
   }
 
   if (
-    applyChaos(res, fixture, defaults.chaos, req.headers, journal, {
-      method: req.method ?? "POST",
-      path: urlPath,
-      headers: flattenHeaders(req.headers),
-      body: completionReq,
-    })
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: urlPath,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+    )
   )
     return;
 
diff --git a/src/embeddings.ts b/src/embeddings.ts
index 3253fe8..b8f68ca 100644
--- a/src/embeddings.ts
+++ b/src/embeddings.ts
@@ -93,12 +93,20 @@ export async function handleEmbeddings(
   }
 
   if (
-    applyChaos(res, fixture, defaults.chaos, req.headers, journal, {
-      method: req.method ?? "POST",
-      path: req.url ?? "/v1/embeddings",
-      headers: flattenHeaders(req.headers),
-      body: syntheticReq,
-    })
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: req.url ?? "/v1/embeddings",
+        headers: flattenHeaders(req.headers),
+        body: syntheticReq,
+      },
+      defaults.registry,
+    )
   )
     return;
 
diff --git a/src/gemini.ts b/src/gemini.ts
index 5a357a6..8b5111a 100644
--- a/src/gemini.ts
+++ b/src/gemini.ts
@@ -422,12 +422,20 @@ export async function handleGemini(
   }
 
   if (
-    applyChaos(res, fixture, defaults.chaos, req.headers, journal, {
-      method: req.method ?? "POST",
-      path,
-      headers: flattenHeaders(req.headers),
-      body: completionReq,
-    })
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path,
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+    )
   )
     return;
 
diff --git a/src/messages.ts b/src/messages.ts
index 75e04f8..cc609fb 100644
--- a/src/messages.ts
+++ b/src/messages.ts
@@ -471,12 +471,20 @@ export async function handleMessages(
   }
 
   if (
-    applyChaos(res, fixture, defaults.chaos, req.headers, journal, {
-      method: req.method ?? "POST",
-      path: req.url ?? "/v1/messages",
-      headers: flattenHeaders(req.headers),
-      body: completionReq,
-    })
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: req.url ?? "/v1/messages",
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+    )
   )
     return;
 
diff --git a/src/responses.ts b/src/responses.ts
index fb96cf3..aeaad68 100644
--- a/src/responses.ts
+++ b/src/responses.ts
@@ -534,12 +534,20 @@ export async function handleResponses(
   }
 
   if (
-    applyChaos(res, fixture, defaults.chaos, req.headers, journal, {
-      method: req.method ?? "POST",
-      path: req.url ?? "/v1/responses",
-      headers: flattenHeaders(req.headers),
-      body: completionReq,
-    })
+    applyChaos(
+      res,
+      fixture,
+      defaults.chaos,
+      req.headers,
+      journal,
+      {
+        method: req.method ?? "POST",
+        path: req.url ?? "/v1/responses",
+        headers: flattenHeaders(req.headers),
+        body: completionReq,
+      },
+      defaults.registry,
+    )
   )
     return;
 

From 2983185f45578688f915be8561fb0db53b1a1c01 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jordan@copilotkit.ai>
Date: Sat, 21 Mar 2026 00:00:05 -0700
Subject: [PATCH 10/13] docs: fix strict mode status code, proxy log level,
 provider list

---
 README.md                      | 2 +-
 skills/write-fixtures/SKILL.md | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index b14985a..71b7ae3 100644
--- a/README.md
+++ b/README.md
@@ -45,7 +45,7 @@ MSW can't intercept any of those calls. llmock can — it's a real server on a r
 **Use llmock when:**
 
 - Multiple processes need to hit the same mock (E2E tests, agent frameworks, microservices)
-- You want multi-provider SSE format out of the box (OpenAI, Claude, Gemini)
+- You want multi-provider SSE format out of the box (OpenAI, Claude, Gemini, Bedrock, Azure, Vertex AI, Ollama, Cohere)
 - You prefer defining fixtures as JSON files rather than code
 - You need a standalone CLI server
 
diff --git a/skills/write-fixtures/SKILL.md b/skills/write-fixtures/SKILL.md
index 6c2e102..46c4f00 100644
--- a/skills/write-fixtures/SKILL.md
+++ b/skills/write-fixtures/SKILL.md
@@ -431,7 +431,7 @@ llmock --strict -f ./fixtures
 ```
 
 - `--record` enables proxy-on-miss. Requires at least one `--provider-*` flag.
-- `--strict` returns a 404 error for unmatched requests instead of proxying, even if `--record` is set. Use this in CI to ensure all requests hit fixtures.
+- `--strict` returns a 503 error for unmatched requests instead of proxying, even if `--record` is set. Use this in CI to ensure all requests hit fixtures.
 - Provider flags: `--provider-openai`, `--provider-anthropic`, `--provider-gemini`, `--provider-vertexai`, `--provider-bedrock`, `--provider-azure`, `--provider-ollama`, `--provider-cohere`.
 
 ### How it works
@@ -441,7 +441,7 @@ llmock --strict -f ./fixtures
 3. **Auth headers are forwarded but NOT saved** — `Authorization`, `x-api-key`, and `api-key` headers are passed through to the upstream provider, but stripped from the recorded fixture.
 4. **Responses are saved as standard fixtures** — recorded files land in `{fixturePath}/recorded/` and use the same JSON format as hand-written fixtures. Nothing special about them.
 5. **Streaming responses are collapsed** — SSE streams are collapsed into a single text or tool-call response for the fixture. The original streaming format is preserved in the live proxy response.
-6. **Loud logging** — every proxy hit logs at `info` level so you can see exactly which requests are being forwarded.
+6. **Loud logging** — every proxy hit logs at `warn` level so you can see exactly which requests are being forwarded.
 
 ### Programmatic API
 

From 61876fbfe9c52674f5287874dac312f03ba488d0 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jordan@copilotkit.ai>
Date: Sat, 21 Mar 2026 00:01:25 -0700
Subject: [PATCH 11/13] fix: handle Bedrock Converse response format in
 buildFixtureResponse

The recorder's buildFixtureResponse had no handler for the Converse
format ({ output: { message: { content: [...] } } }), causing recorded
fixtures to silently be saved as error responses. Add handler for both
text and toolUse content blocks.
---
 src/recorder.ts | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/src/recorder.ts b/src/recorder.ts
index 3c34223..044ded2 100644
--- a/src/recorder.ts
+++ b/src/recorder.ts
@@ -331,6 +331,30 @@ function buildFixtureResponse(parsed: unknown, status: number): FixtureResponse
     }
   }
 
+  // Bedrock Converse: { output: { message: { role, content: [{ text }, { toolUse }] } } }
+  if (obj.output && typeof obj.output === "object") {
+    const output = obj.output as Record<string, unknown>;
+    const msg = output.message as Record<string, unknown> | undefined;
+    if (msg && Array.isArray(msg.content)) {
+      const blocks = msg.content as Array<Record<string, unknown>>;
+      const toolUseBlocks = blocks.filter((b) => b.toolUse);
+      if (toolUseBlocks.length > 0) {
+        const toolCalls: ToolCall[] = toolUseBlocks.map((b) => {
+          const tu = b.toolUse as Record<string, unknown>;
+          return {
+            name: String(tu.name ?? ""),
+            arguments: typeof tu.input === "string" ? tu.input : JSON.stringify(tu.input),
+          };
+        });
+        return { toolCalls };
+      }
+      const textBlock = blocks.find((b) => typeof b.text === "string");
+      if (textBlock && typeof textBlock.text === "string") {
+        return { content: textBlock.text };
+      }
+    }
+  }
+
   // Ollama: { message: { content: "...", tool_calls: [...] } }
   if (obj.message && typeof obj.message === "object") {
     const msg = obj.message as Record<string, unknown>;

From 219df288f743b068ce43dbff82be37d38aa57ab8 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jordan@copilotkit.ai>
Date: Sat, 21 Mar 2026 00:15:32 -0700
Subject: [PATCH 12/13] fix: replace global recordCounter with UUID, pass raw
 body to proxy

- Replace module-level mutable recordCounter with crypto.randomUUID()
  to avoid non-deterministic filenames in concurrent test scenarios
- Pass original request body string to proxyAndRecord in the OpenAI
  completions path, preserving formatting fidelity to upstream
---
 src/recorder.ts | 5 ++---
 src/server.ts   | 1 +
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/recorder.ts b/src/recorder.ts
index 044ded2..02e5532 100644
--- a/src/recorder.ts
+++ b/src/recorder.ts
@@ -2,6 +2,7 @@ import * as http from "node:http";
 import * as https from "node:https";
 import * as fs from "node:fs";
 import * as path from "node:path";
+import * as crypto from "node:crypto";
 import type {
   ChatCompletionRequest,
   Fixture,
@@ -13,8 +14,6 @@ import { getLastMessageByRole, getTextContent } from "./router.js";
 import type { Logger } from "./logger.js";
 import { collapseStreamingResponse } from "./stream-collapse.js";
 
-let recordCounter = 0;
-
 /**
  * Proxy an unmatched request to the real upstream provider, record the
  * response as a fixture on disk and in memory, then relay the response
@@ -141,7 +140,7 @@ export async function proxyAndRecord(
   }
 
   const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
-  const filename = `${providerKey}-${timestamp}-${recordCounter++}.json`;
+  const filename = `${providerKey}-${timestamp}-${crypto.randomUUID().slice(0, 8)}.json`;
   const filepath = path.join(fixturePath, filename);
 
   try {
diff --git a/src/server.ts b/src/server.ts
index a5df546..3bd07b8 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -216,6 +216,7 @@ async function handleCompletions(
         req.url ?? COMPLETIONS_PATH,
         fixtures,
         defaults,
+        raw,
       );
       if (proxied) {
         journal.add({

From e3499bcc4a78db89ace5bcc2fae4b2195c1922ca Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jordan@copilotkit.ai>
Date: Sat, 21 Mar 2026 00:15:39 -0700
Subject: [PATCH 13/13] test: add regression tests for all recent bug fixes

- Recorder: proxy preserves original request body formatting
- Recorder: Ollama empty content + tool_calls priority in buildFixtureResponse
- Recorder: UUID-based filename format
- Chaos: rate clamping (>1 clamps to 1, negative clamps to 0)
- Metrics: chaos counter incremented on Anthropic endpoint (was broken)
---
 src/__tests__/chaos.test.ts    |  28 ++++++++
 src/__tests__/metrics.test.ts  |  23 +++++++
 src/__tests__/recorder.test.ts | 121 ++++++++++++++++++++++++++++++++-
 3 files changed, 170 insertions(+), 2 deletions(-)

diff --git a/src/__tests__/chaos.test.ts b/src/__tests__/chaos.test.ts
index 6eec85f..26902d2 100644
--- a/src/__tests__/chaos.test.ts
+++ b/src/__tests__/chaos.test.ts
@@ -126,6 +126,34 @@ describe("evaluateChaos", () => {
     const result = evaluateChaos(null, undefined, headers);
     expect(result).toBe("drop");
   });
+
+  it("clamps rate > 1 to 1.0 (always triggers)", () => {
+    // dropRate 5.0 should be clamped to 1.0, so it always triggers
+    const fixture: Fixture = {
+      match: { userMessage: "hello" },
+      response: { content: "hi" },
+      chaos: { dropRate: 5.0 },
+    };
+    // Run 20 times — every single one must return "drop"
+    for (let i = 0; i < 20; i++) {
+      const result = evaluateChaos(fixture, undefined, undefined);
+      expect(result).toBe("drop");
+    }
+  });
+
+  it("clamps negative rate to 0 (never triggers)", () => {
+    // dropRate -1.0 should be clamped to 0, so it never triggers
+    const fixture: Fixture = {
+      match: { userMessage: "hello" },
+      response: { content: "hi" },
+      chaos: { dropRate: -1.0 },
+    };
+    // Run 50 times — none should trigger
+    for (let i = 0; i < 50; i++) {
+      const result = evaluateChaos(fixture, undefined, undefined);
+      expect(result).toBeNull();
+    }
+  });
 });
 
 // ---------------------------------------------------------------------------
diff --git a/src/__tests__/metrics.test.ts b/src/__tests__/metrics.test.ts
index 0d1948c..f9d1436 100644
--- a/src/__tests__/metrics.test.ts
+++ b/src/__tests__/metrics.test.ts
@@ -567,6 +567,29 @@ describe("integration: /metrics endpoint", () => {
     expect(res.body).toMatch(/llmock_chaos_triggered_total\{[^}]*action="drop"[^}]*\} 1/);
   });
 
+  it("increments chaos counter on Anthropic /v1/messages endpoint", async () => {
+    const fixtures: Fixture[] = [
+      {
+        match: { userMessage: "hello" },
+        response: { content: "hi from claude" },
+      },
+    ];
+    instance = await createServer(fixtures, {
+      metrics: true,
+      chaos: { dropRate: 1.0 },
+    });
+
+    await httpPost(`${instance.url}/v1/messages`, {
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "hello" }],
+    });
+
+    const res = await httpGet(`${instance.url}/metrics`);
+    expect(res.body).toContain("llmock_chaos_triggered_total");
+    expect(res.body).toMatch(/llmock_chaos_triggered_total\{[^}]*action="drop"[^}]*\} 1/);
+  });
+
   it("tracks fixtures loaded gauge", async () => {
     const fixtures: Fixture[] = [
       { match: { userMessage: "a" }, response: { content: "1" } },
diff --git a/src/__tests__/recorder.test.ts b/src/__tests__/recorder.test.ts
index 48652cb..5c4ddd4 100644
--- a/src/__tests__/recorder.test.ts
+++ b/src/__tests__/recorder.test.ts
@@ -1311,11 +1311,60 @@ describe("recorder edge cases", () => {
     const fixtureFiles = files.filter((f) => f.endsWith(".json"));
     expect(fixtureFiles).toHaveLength(1);
 
-    // Pattern: openai-YYYY-MM-DDTHH-MM-SS-mmmZ-{counter}.json (colons and dots replaced with dashes)
-    const pattern = /^openai-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-\d{3}Z-\d+\.json$/;
+    // Pattern: openai-YYYY-MM-DDTHH-MM-SS-mmmZ-{uuid8}.json (colons and dots replaced with dashes)
+    const pattern = /^openai-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-\d{3}Z-[a-f0-9]{8}\.json$/;
     expect(fixtureFiles[0]).toMatch(pattern);
   });
 
+  it("proxies the original request body to upstream (preserves formatting)", async () => {
+    // The proxy should forward the exact bytes the client sent, not re-serialized JSON.
+    // This matters because JSON key ordering and whitespace may differ after parse/serialize.
+    let receivedBody = "";
+    const upstreamServer = http.createServer((req, res) => {
+      const chunks: Buffer[] = [];
+      req.on("data", (c: Buffer) => chunks.push(c));
+      req.on("end", () => {
+        receivedBody = Buffer.concat(chunks).toString();
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(
+          JSON.stringify({
+            id: "chatcmpl-proxy-body",
+            object: "chat.completion",
+            created: 0,
+            model: "gpt-4",
+            choices: [
+              { index: 0, message: { role: "assistant", content: "ok" }, finish_reason: "stop" },
+            ],
+            usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+          }),
+        );
+      });
+    });
+    await new Promise<void>((resolve) => upstreamServer.listen(0, "127.0.0.1", resolve));
+    const upAddr = upstreamServer.address() as { port: number };
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { openai: `http://127.0.0.1:${upAddr.port}` }, fixturePath: tmpDir },
+    });
+
+    // Send body with specific formatting (extra spaces, key order)
+    const customBody =
+      '{"model":  "gpt-4",  "messages": [{"role": "user", "content": "preserve me"}]}';
+    const resp = await fetch(`${recorder.url}/v1/chat/completions`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: customBody,
+    });
+    expect(resp.status).toBe(200);
+
+    // The upstream should have received the original body, not re-serialized
+    expect(receivedBody).toBe(customBody);
+
+    await new Promise<void>((resolve) => upstreamServer.close(() => resolve()));
+  });
+
   it("upstream returns empty response body — handled gracefully", async () => {
     // Create a raw HTTP server that returns 200 with empty body
     const emptyServer = http.createServer((_req, res) => {
@@ -1347,6 +1396,74 @@ describe("recorder edge cases", () => {
 
     await new Promise<void>((resolve) => emptyServer.close(() => resolve()));
   });
+
+  it("Ollama empty content + tool_calls: records toolCalls, not content", async () => {
+    // Raw upstream returns Ollama-style response: empty content + tool_calls
+    const ollamaRaw = http.createServer((_req, res) => {
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(
+        JSON.stringify({
+          model: "llama3",
+          message: {
+            role: "assistant",
+            content: "",
+            tool_calls: [
+              {
+                function: {
+                  name: "get_weather",
+                  arguments: { city: "NYC" },
+                },
+              },
+            ],
+          },
+          done: true,
+        }),
+      );
+    });
+    await new Promise<void>((resolve) => ollamaRaw.listen(0, "127.0.0.1", resolve));
+    const ollamaAddr = ollamaRaw.address() as { port: number };
+    const ollamaUrl = `http://127.0.0.1:${ollamaAddr.port}`;
+
+    tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "llmock-record-"));
+    recorder = await createServer([], {
+      port: 0,
+      record: { providers: { ollama: ollamaUrl }, fixturePath: tmpDir },
+    });
+
+    const resp = await post(`${recorder.url}/api/chat`, {
+      model: "llama3",
+      messages: [{ role: "user", content: "what is the weather in NYC" }],
+      stream: false,
+    });
+
+    expect(resp.status).toBe(200);
+
+    const files = fs.readdirSync(tmpDir);
+    const fixtureFiles = files.filter((f) => f.endsWith(".json"));
+    expect(fixtureFiles).toHaveLength(1);
+
+    const fixtureContent = JSON.parse(
+      fs.readFileSync(path.join(tmpDir, fixtureFiles[0]), "utf-8"),
+    ) as {
+      fixtures: Array<{
+        response: {
+          content?: string;
+          toolCalls?: Array<{ name: string; arguments: string }>;
+        };
+      }>;
+    };
+
+    // Should record toolCalls, NOT content: ""
+    expect(fixtureContent.fixtures[0].response.toolCalls).toBeDefined();
+    expect(fixtureContent.fixtures[0].response.toolCalls).toHaveLength(1);
+    expect(fixtureContent.fixtures[0].response.toolCalls![0].name).toBe("get_weather");
+    expect(JSON.parse(fixtureContent.fixtures[0].response.toolCalls![0].arguments)).toEqual({
+      city: "NYC",
+    });
+    expect(fixtureContent.fixtures[0].response.content).toBeUndefined();
+
+    await new Promise<void>((resolve) => ollamaRaw.close(() => resolve()));
+  });
 });
 
 // ---------------------------------------------------------------------------