Kilo-Org · alex-alecu · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026 · Apr 2, 2026
diff --git a/src/app/api/openrouter/[...path]/route.ts b/src/app/api/openrouter/[...path]/route.ts
@@ -86,6 +86,7 @@ import {
   getMaxTokens,
   hasMiddleOutTransform,
 } from '@/lib/providers/openrouter/request-helpers';
+import { logCacheDiagnostics } from '@/lib/providers/cache-debug';
 
 export const maxDuration = 800;
 
@@ -481,6 +482,8 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
     userByok
   );
 
+  logCacheDiagnostics(requestBodyParsed, originalModelIdLowerCased, taskId ?? null);
+
   let response: Response;
   if (requestBodyParsed.kind === 'chat_completions' && provider.id === 'martian') {
     response = await grokCodeFastOptimizedRequest(

diff --git a/src/lib/processUsage.ts b/src/lib/processUsage.ts
@@ -820,6 +820,58 @@ async function processTokenData(
       );
     }
     usageStats = genStats;
+
+    // Log cache token reconciliation for Anthropic models with tools
+    if (
+      usageContext.api_kind === 'chat_completions' &&
+      usageContext.has_tools &&
+      usageContext.requested_model.startsWith('anthropic/')
+    ) {
+      console.log(
+        `[CacheDiag:response]`,
+        JSON.stringify({
+          sessionId: usageContext.session_id,
+          model: usageStats.model,
+          source: 'generation',
+          messageId: usageStats.messageId,
+          upstreamId: usageStats.upstream_id,
+          inputTokens: usageStats.inputTokens,
+          cacheHitTokens: usageStats.cacheHitTokens,
+          cacheWriteTokens: usageStats.cacheWriteTokens,
+          outputTokens: usageStats.outputTokens,
+          cost_mUsd: usageStats.cost_mUsd,
+          cacheDiscount_mUsd: usageStats.cacheDiscount_mUsd,
+          inferenceProvider: usageStats.inference_provider,
+        })
+      );
+    }
+  }
+
+  // Log inline-only usage for Anthropic models with tools (no generation data)
+  if (
+    !generation &&
+    usageContext.api_kind === 'chat_completions' &&
+    usageContext.has_tools &&
+    usageContext.requested_model.startsWith('anthropic/')
+  ) {
+    console.log(
+      `[CacheDiag:response]`,
+      JSON.stringify({
+        sessionId: usageContext.session_id,
+        model: usageStats.model,
+        source: 'inline',
+        messageId: usageStats.messageId,
+        upstreamId: usageStats.upstream_id,
+        inputTokens: usageStats.inputTokens,
+        cacheHitTokens: usageStats.cacheHitTokens,
+        cacheWriteTokens: usageStats.cacheWriteTokens,
+        outputTokens: usageStats.outputTokens,
+        cost_mUsd: usageStats.cost_mUsd,
+        cacheDiscount_mUsd: usageStats.cacheDiscount_mUsd ?? null,
+        inferenceProvider: usageStats.inference_provider,
+        generationLookupFailed: true,
+      })
+    );
   }
 
   if (usageStats.inputTokens - usageStats.cacheHitTokens > 100000)

diff --git a/src/lib/providers/cache-debug.ts b/src/lib/providers/cache-debug.ts
@@ -0,0 +1,123 @@
+import crypto from 'crypto';
+import type { GatewayRequest } from '@/lib/providers/openrouter/types';
+import { isAnthropicModel } from '@/lib/providers/anthropic';
+
+/**
+ * Logs a structured diagnostic payload for Anthropic chat_completions
+ * requests to help debug cache hit/miss behavior.
+ *
+ * Call this AFTER all body mutations (tracking IDs, reasoning dedup,
+ * cache breakpoints, provider-specific logic) and BEFORE forwarding upstream.
+ */
+export function logCacheDiagnostics(
+  request: GatewayRequest,
+  requestedModel: string,
+  sessionId: string | null
+) {
+  if (request.kind !== 'chat_completions') return;
+  if (!isAnthropicModel(requestedModel)) return;
+  const messages = request.body.messages;
+  if (!Array.isArray(messages) || messages.length === 0) return;
+  const hasTools = (request.body.tools?.length ?? 0) > 0;
+  if (!hasTools) return;
+
+  try {
+    // Find the breakpoint message (the one with cache_control set by addCacheBreakpoints)
+    let breakpointIndex = -1;
+    let breakpointRole = '<none>';
+    let breakpointContentLength = 0;
+
+    for (let i = messages.length - 1; i >= 0; i--) {
+      const msg = messages[i];
+      const content = msg.content;
+      let hasCacheControl = false;
+
+      if (Array.isArray(content)) {
+        hasCacheControl = content.some(
+          (part: unknown) =>
+            typeof part === 'object' &&
+            part !== null &&
+            'cache_control' in part &&
+            typeof (part as Record<string, unknown>).cache_control === 'object' &&
+            (part as Record<string, unknown>).cache_control !== null &&
+            'type' in
+              ((part as Record<string, unknown>).cache_control as Record<string, unknown>) &&
+            ((part as Record<string, unknown>).cache_control as Record<string, unknown>).type ===
+              'ephemeral'
+        );
+        breakpointContentLength = JSON.stringify(content).length;
+      } else if (typeof content === 'string') {
+        breakpointContentLength = content.length;
+      }
+
+      if (hasCacheControl) {
+        breakpointIndex = i;
+        breakpointRole = msg.role;
+        break;
+      }
+    }
+
+    // Message structure summary
+    const roleCounts: Record<string, number> = {};
+    let totalContentBytes = 0;
+    for (const msg of messages) {
+      roleCounts[msg.role] = (roleCounts[msg.role] || 0) + 1;
+      const c = msg.content;
+      if (typeof c === 'string') {
+        totalContentBytes += c.length;
+      } else if (Array.isArray(c)) {
+        totalContentBytes += JSON.stringify(c).length;
+      }
+    }
+
+    // Count reasoning_details entries (residual after dedup)
+    let reasoningDetailCount = 0;
+    for (const msg of messages) {
+      if ('reasoning_details' in msg && Array.isArray(msg.reasoning_details)) {
+        reasoningDetailCount += msg.reasoning_details.length;
+      }
+    }
+
+    // Prefix hash: SHA256 of messages[0..breakpointIndex] serialized.
+    // This is the content that SHOULD be cached across consecutive requests.
+    // If this hash changes between requests in the same session, the cache misses.
+    let prefixHash = '<no-breakpoint>';
+    let prefixBytes = 0;
+    if (breakpointIndex >= 0) {
+      const prefix = messages.slice(0, breakpointIndex + 1);
+      const prefixJson = JSON.stringify(prefix);
+      prefixBytes = prefixJson.length;
+      prefixHash = crypto.createHash('sha256').update(prefixJson).digest('hex').slice(0, 16);
+    }
+
+    // Full body hash (for dedup / correlation)
+    const bodyJson = JSON.stringify(request.body);
+    const bodyBytes = bodyJson.length;
+    const bodyHash = crypto.createHash('sha256').update(bodyJson).digest('hex').slice(0, 16);
+
+    console.log(
+      `[CacheDiag]`,
+      JSON.stringify({
+        sessionId: sessionId ?? '<none>',
+        model: request.body.model,
+        msgCount: messages.length,
+        roles: roleCounts,
+        reasoningDetails: reasoningDetailCount,
+        breakpoint: {
+          index: breakpointIndex,
+          role: breakpointRole,
+          contentLen: breakpointContentLength,
+        },
+        promptCacheKey: 'prompt_cache_key' in request.body && !!request.body.prompt_cache_key,
+        prefixHash,
+        prefixBytes,
+        bodyHash,
+        bodyBytes,
+        totalContentBytes,
+      })
+    );
+  } catch (err) {
+    // Never let diagnostic logging break the request
+    console.warn('[CacheDiag] error:', err);
+  }
+}
diff --git a/src/scripts/index.ts b/src/scripts/index.ts
@@ -1,6 +1,18 @@
 // Load environment variables before any other imports
 import '../lib/load-env';
 
+// Shim 'server-only' for CLI scripts. Next.js strips this at build time, but
+// tsx/Node.js doesn't — pre-populate the require cache with an empty module so
+// transitive imports of 'server-only' (e.g. via config.server.ts) don't throw.
+import Module from 'node:module';
+const serverOnlyResolved = require.resolve('server-only');
+(Module as unknown as { _cache: Record<string, unknown> })._cache[serverOnlyResolved] = {
+  id: serverOnlyResolved,
+  filename: serverOnlyResolved,
+  loaded: true,
+  exports: {},
+};
+
 // get all folders in the src/scripts directory excluding './lib'
 import { readdirSync } from 'fs';
 import { join } from 'path';