Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions src/endpoints/inference/openrouter/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import { BaseEndpoint } from "../../base";
import { OpenRouterClient, OpenRouterError } from "../../../services/openrouter";
import { logPnL } from "../../../services/pricing";
import { lookupModel, getSimilarModels } from "../../../services/model-cache";
import { tokenTypeParam } from "../../schema";
import type { AppContext, ChatCompletionRequest, UsageRecord } from "../../../types";

Expand Down Expand Up @@ -128,6 +129,27 @@ export class OpenRouterChat extends BaseEndpoint {
return this.errorResponse(c, "model and messages are required", 400);
}

// Belt-and-suspenders model validation: middleware handles the primary rejection
// pre-payment, but validate again here in case the cache was degraded at that time
// and has since been populated, or in case the middleware was bypassed.
const modelResult = await lookupModel(request.model, c.env.OPENROUTER_API_KEY, log);
if (modelResult.valid && modelResult.degraded) {
log.warn("Model cache degraded at chat handler — cannot confirm model validity", {
model: request.model,
});
} else if (!modelResult.valid) {
const suggestions = getSimilarModels(request.model, 3);
return c.json(
{
error: modelResult.error,
code: "invalid_model",
model: request.model,
...(suggestions.length > 0 ? { suggestions } : {}),
},
400
);
}

const client = new OpenRouterClient(c.env.OPENROUTER_API_KEY, log);

try {
Expand Down Expand Up @@ -189,6 +211,23 @@ export class OpenRouterChat extends BaseEndpoint {
const { response, usage } = await client.createChatCompletion(request);
const durationMs = Date.now() - startTime;

// validateChatResponse guarantees .choices is an array, but it may be empty.
if (response.choices.length === 0) {
log.error("OpenRouter returned no choices", {
model: response.model || request.model,
});
return this.errorResponse(c, "OpenRouter returned no choices", 502);
}

// Log a warning if the first choice has empty content (valid but unexpected).
const firstChoice = response.choices[0];
if (firstChoice?.message?.content === "") {
log.warn("OpenRouter returned empty content in first choice", {
model: response.model || request.model,
finishReason: firstChoice.finish_reason,
});
}

// Log PnL
if (x402.priceEstimate) {
logPnL(
Expand Down
2 changes: 2 additions & 0 deletions src/endpoints/inference/openrouter/list-models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ export class OpenRouterListModels extends FreeEndpoint {
const client = new OpenRouterClient(c.env.OPENROUTER_API_KEY, log);

try {
// getModels() runs validateModelsResponse() which guarantees .data is an
// array and every model has .id (string) and .pricing with string fields.
const response = await client.getModels();

const models = response.data.map((model) => ({
Expand Down
12 changes: 10 additions & 2 deletions src/middleware/x402.ts
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,16 @@ export function x402Middleware(
// Pre-payment model validation: reject unknown models before issuing 402
if (c.env.OPENROUTER_API_KEY) {
const modelResult = await lookupModel(chatRequest.model, c.env.OPENROUTER_API_KEY, log);
if (!modelResult.valid) {
return c.json({ error: modelResult.error, code: "invalid_model" }, 400);
if (modelResult.valid && modelResult.degraded) {
// Cache was empty after refresh attempt — allow the request but warn operators
log.warn("Model cache degraded at middleware — skipping pre-payment model validation", {
model: chatRequest.model,
});
} else if (!modelResult.valid) {
return c.json(
{ error: modelResult.error, code: "invalid_model", model: chatRequest.model },
400
);
}
// Use live registry pricing if available, otherwise fall through to hardcoded table
priceEstimate = estimateChatPayment(chatRequest, tokenType, log, modelResult.pricing);
Expand Down
128 changes: 124 additions & 4 deletions src/services/model-cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,24 @@ const FETCH_TIMEOUT_MS = 3_000;

/** Discriminated union result from lookupModel */
export type ModelLookupResult =
| { valid: true; pricing?: ModelPricing }
| { valid: true; pricing?: ModelPricing; degraded?: true }
| { valid: false; error: string };

/** Cache state reported by getCacheStatus() */
export type CacheState = "warm" | "cold" | "degraded";

/** Cache status returned by getCacheStatus() */
export interface CacheStatus {
/** warm = populated and fresh; cold = never fetched or empty; degraded = last fetch failed */
state: CacheState;
/** Number of models currently in the registry */
modelCount: number;
/** Timestamp (ms since epoch) of the last successful fetch, or null if never fetched */
lastRefreshed: number | null;
/** Timestamp (ms since epoch) of the last failed fetch attempt, or null if no failures */
lastFailedAt: number | null;
}

// =============================================================================
// Module-level Cache
// =============================================================================
Expand Down Expand Up @@ -91,6 +106,8 @@ async function doRefresh(apiKey: string, logger: Logger): Promise<void> {

try {
const client = new OpenRouterClient(apiKey, logger);
// getModels() runs validateModelsResponse() which guarantees .data is an
// array and every model has .id (string) and .pricing with string fields.
const modelsResponse = await client.getModels(controller.signal);

modelRegistry.clear();
Expand Down Expand Up @@ -127,6 +144,78 @@ async function doRefresh(apiKey: string, logger: Logger): Promise<void> {
// Public API
// =============================================================================

/**
* Returns the current cache state without triggering a refresh.
*
* States:
* "warm" — registry populated and TTL not expired
* "cold" — never fetched successfully (fetchedAt is null) or registry is empty with no prior failure
* "degraded" — last fetch attempt failed and registry may be empty or stale
*/
export function getCacheStatus(): CacheStatus {
const modelCount = modelRegistry.size;

let state: CacheState;

if (lastFailedAt !== null && (modelCount === 0 || (fetchedAt !== null && Date.now() - fetchedAt > CACHE_TTL_MS))) {
state = "degraded";
} else if (fetchedAt !== null && modelCount > 0 && Date.now() - fetchedAt <= CACHE_TTL_MS) {
state = "warm";
} else {
state = "cold";
}

return {
state,
modelCount,
lastRefreshed: fetchedAt,
lastFailedAt,
};
}

/**
* Find model IDs in the registry that are similar to the given model ID.
*
* Similarity strategy:
* 1. If modelId contains "/", try to match other models with the same provider prefix.
* 2. If no prefix matches found, fall back to lexicographic prefix match on the full ID.
* 3. Returns at most maxResults results.
*/
export function getSimilarModels(modelId: string, maxResults = 3): string[] {
if (modelRegistry.size === 0) {
return [];
}

const allModels = Array.from(modelRegistry.keys()).sort();

// Try provider prefix match (e.g., "openai/" from "openai/gpt-4o")
const slashIdx = modelId.indexOf("/");
if (slashIdx !== -1) {
const providerPrefix = modelId.slice(0, slashIdx + 1);
const providerMatches = allModels.filter(
(id) => id.startsWith(providerPrefix) && id !== modelId
);
if (providerMatches.length > 0) {
return providerMatches.slice(0, maxResults);
}
}

// Fall back: full-string prefix match (e.g., "gpt" matches "gpt-4")
const prefixLen = Math.max(3, Math.floor(modelId.length / 2));
const prefix = modelId.slice(0, prefixLen).toLowerCase();
const prefixMatches = allModels.filter(
(id) => id.toLowerCase().startsWith(prefix) && id !== modelId
);
if (prefixMatches.length > 0) {
return prefixMatches.slice(0, maxResults);
}

// No structural match — return first maxResults models as fallback hints.
// These may be unrelated; callers should treat them as "here are some valid models"
// rather than "here are close matches."
return allModels.filter((id) => id !== modelId).slice(0, maxResults);
}

/**
* Look up a model by ID, refreshing the cache if stale.
*
Expand All @@ -147,10 +236,10 @@ export async function lookupModel(
await refreshCache(apiKey, logger);
}

// If the cache is still empty (e.g., fetch failed), be permissive
// If the cache is still empty (e.g., fetch failed), be permissive but signal degraded state
if (modelRegistry.size === 0) {
logger.debug("Model cache empty after refresh attempt -- allowing request", { modelId });
return { valid: true };
logger.debug("Model cache empty after refresh attempt -- allowing request (degraded)", { modelId });
return { valid: true, degraded: true };
}

const cached = modelRegistry.get(modelId);
Expand All @@ -164,3 +253,34 @@ export async function lookupModel(

return { valid: true, pricing: cached };
}

// =============================================================================
// Test Helpers (not part of public API)
// =============================================================================

/**
* Seed the model registry with test data and reset internal state.
* Exported for unit tests only — not intended for production use.
*/
export function _seedCacheForTesting(
models: Array<{ id: string; pricing: ModelPricing }>,
options?: { simulateFailure?: boolean }
): void {
modelRegistry.clear();
for (const m of models) {
modelRegistry.set(m.id, m.pricing);
}
fetchedAt = models.length > 0 ? Date.now() : null;
lastFailedAt = options?.simulateFailure ? Date.now() : null;
}

/**
* Reset the cache to its initial empty state.
* Exported for unit tests only — not intended for production use.
*/
export function _resetCacheForTesting(): void {
modelRegistry.clear();
fetchedAt = null;
lastFailedAt = null;
inflightRefresh = null;
}
Loading
Loading