Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/app/api/openrouter/[...path]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ import {
getMaxTokens,
hasMiddleOutTransform,
} from '@/lib/providers/openrouter/request-helpers';
import { logCacheDiagnostics } from '@/lib/providers/cache-debug';

export const maxDuration = 800;

Expand Down Expand Up @@ -481,6 +482,8 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
userByok
);

logCacheDiagnostics(requestBodyParsed, originalModelIdLowerCased, taskId ?? null);

let response: Response;
if (requestBodyParsed.kind === 'chat_completions' && provider.id === 'martian') {
response = await grokCodeFastOptimizedRequest(
Expand Down
52 changes: 52 additions & 0 deletions src/lib/processUsage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,58 @@ async function processTokenData(
);
}
usageStats = genStats;

// Log cache token reconciliation for Anthropic models with tools
if (
usageContext.api_kind === 'chat_completions' &&
usageContext.has_tools &&
usageContext.requested_model.startsWith('anthropic/')
) {
console.log(
`[CacheDiag:response]`,
JSON.stringify({
sessionId: usageContext.session_id,
model: usageStats.model,
source: 'generation',
messageId: usageStats.messageId,
upstreamId: usageStats.upstream_id,
inputTokens: usageStats.inputTokens,
cacheHitTokens: usageStats.cacheHitTokens,
cacheWriteTokens: usageStats.cacheWriteTokens,
outputTokens: usageStats.outputTokens,
cost_mUsd: usageStats.cost_mUsd,
cacheDiscount_mUsd: usageStats.cacheDiscount_mUsd,
inferenceProvider: usageStats.inference_provider,
})
);
}
}

// Log inline-only usage for Anthropic models with tools (no generation data)
if (
!generation &&
usageContext.api_kind === 'chat_completions' &&
usageContext.has_tools &&
usageContext.requested_model.startsWith('anthropic/')
) {
console.log(
`[CacheDiag:response]`,
JSON.stringify({
sessionId: usageContext.session_id,
model: usageStats.model,
source: 'inline',
messageId: usageStats.messageId,
upstreamId: usageStats.upstream_id,
inputTokens: usageStats.inputTokens,
cacheHitTokens: usageStats.cacheHitTokens,
cacheWriteTokens: usageStats.cacheWriteTokens,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: Inline fallback always reports zero cache writes

On the source: 'inline' path this logs usageStats.cacheWriteTokens, but processOpenRouterUsage() still hardcodes that field to 0 and OpenRouterUsage does not read prompt_tokens_details.cache_write_tokens. When fetchGeneration() misses, this diagnostic will hide non-zero cache writes instead of surfacing them.

outputTokens: usageStats.outputTokens,
cost_mUsd: usageStats.cost_mUsd,
cacheDiscount_mUsd: usageStats.cacheDiscount_mUsd ?? null,
inferenceProvider: usageStats.inference_provider,
generationLookupFailed: true,
})
);
}

if (usageStats.inputTokens - usageStats.cacheHitTokens > 100000)
Expand Down
123 changes: 123 additions & 0 deletions src/lib/providers/cache-debug.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import crypto from 'crypto';
import type { GatewayRequest } from '@/lib/providers/openrouter/types';
import { isAnthropicModel } from '@/lib/providers/anthropic';

/**
* Logs a structured diagnostic payload for Anthropic chat_completions
* requests to help debug cache hit/miss behavior.
*
* Call this AFTER all body mutations (tracking IDs, reasoning dedup,
* cache breakpoints, provider-specific logic) and BEFORE forwarding upstream.
*/
export function logCacheDiagnostics(
request: GatewayRequest,
requestedModel: string,
sessionId: string | null
) {
if (request.kind !== 'chat_completions') return;
if (!isAnthropicModel(requestedModel)) return;
const messages = request.body.messages;
if (!Array.isArray(messages) || messages.length === 0) return;
const hasTools = (request.body.tools?.length ?? 0) > 0;
if (!hasTools) return;

try {
// Find the breakpoint message (the one with cache_control set by addCacheBreakpoints)
let breakpointIndex = -1;
let breakpointRole = '<none>';
let breakpointContentLength = 0;

for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i];
const content = msg.content;
let hasCacheControl = false;

if (Array.isArray(content)) {
hasCacheControl = content.some(
(part: unknown) =>
typeof part === 'object' &&
part !== null &&
'cache_control' in part &&
typeof (part as Record<string, unknown>).cache_control === 'object' &&
(part as Record<string, unknown>).cache_control !== null &&
'type' in
((part as Record<string, unknown>).cache_control as Record<string, unknown>) &&
((part as Record<string, unknown>).cache_control as Record<string, unknown>).type ===
'ephemeral'
);
breakpointContentLength = JSON.stringify(content).length;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: contentLen can be wrong when no breakpoint is found

breakpointContentLength is updated before you know whether the current message actually has cache_control. If a request reaches this logger without any breakpoint, the payload ends up with index: -1 / role: '<none>' but a non-zero contentLen copied from the last inspected message, which makes the diagnostic misleading.

} else if (typeof content === 'string') {
breakpointContentLength = content.length;
}

if (hasCacheControl) {
breakpointIndex = i;
breakpointRole = msg.role;
break;
}
}

// Message structure summary
const roleCounts: Record<string, number> = {};
let totalContentBytes = 0;
for (const msg of messages) {
roleCounts[msg.role] = (roleCounts[msg.role] || 0) + 1;

Check failure

Code scanning / CodeQL

Remote property injection High

A property name to write to depends on a
user-provided value
.
const c = msg.content;
if (typeof c === 'string') {
totalContentBytes += c.length;
} else if (Array.isArray(c)) {
totalContentBytes += JSON.stringify(c).length;
}
}

// Count reasoning_details entries (residual after dedup)
let reasoningDetailCount = 0;
for (const msg of messages) {
if ('reasoning_details' in msg && Array.isArray(msg.reasoning_details)) {
reasoningDetailCount += msg.reasoning_details.length;
}
}

// Prefix hash: SHA256 of messages[0..breakpointIndex] serialized.
// This is the content that SHOULD be cached across consecutive requests.
// If this hash changes between requests in the same session, the cache misses.
let prefixHash = '<no-breakpoint>';
let prefixBytes = 0;
if (breakpointIndex >= 0) {
const prefix = messages.slice(0, breakpointIndex + 1);
const prefixJson = JSON.stringify(prefix);
prefixBytes = prefixJson.length;
prefixHash = crypto.createHash('sha256').update(prefixJson).digest('hex').slice(0, 16);
}

// Full body hash (for dedup / correlation)
const bodyJson = JSON.stringify(request.body);
const bodyBytes = bodyJson.length;
const bodyHash = crypto.createHash('sha256').update(bodyJson).digest('hex').slice(0, 16);

console.log(
`[CacheDiag]`,
JSON.stringify({
sessionId: sessionId ?? '<none>',
model: request.body.model,
msgCount: messages.length,
roles: roleCounts,
reasoningDetails: reasoningDetailCount,
breakpoint: {
index: breakpointIndex,
role: breakpointRole,
contentLen: breakpointContentLength,
},
promptCacheKey: 'prompt_cache_key' in request.body && !!request.body.prompt_cache_key,
prefixHash,
prefixBytes,
bodyHash,
bodyBytes,
totalContentBytes,
})
);
} catch (err) {
// Never let diagnostic logging break the request
console.warn('[CacheDiag] error:', err);
}
}
12 changes: 12 additions & 0 deletions src/scripts/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
// Load environment variables before any other imports
import '../lib/load-env';

// Shim 'server-only' for CLI scripts. Next.js strips this at build time, but
// tsx/Node.js doesn't — pre-populate the require cache with an empty module so
// transitive imports of 'server-only' (e.g. via config.server.ts) don't throw.
import Module from 'node:module';
const serverOnlyResolved = require.resolve('server-only');
(Module as unknown as { _cache: Record<string, unknown> })._cache[serverOnlyResolved] = {
id: serverOnlyResolved,
filename: serverOnlyResolved,
loaded: true,
exports: {},
};

// get all folders in the src/scripts directory excluding './lib'
import { readdirSync } from 'fs';
import { join } from 'path';
Expand Down
Loading
Loading