aibtcdev · whoabuddy · Mar 23, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/src/endpoints/inference/openrouter/chat.ts b/src/endpoints/inference/openrouter/chat.ts
@@ -8,6 +8,7 @@
 import { BaseEndpoint } from "../../base";
 import { OpenRouterClient, OpenRouterError } from "../../../services/openrouter";
 import { logPnL } from "../../../services/pricing";
+import { lookupModel, getSimilarModels } from "../../../services/model-cache";
 import { tokenTypeParam } from "../../schema";
 import type { AppContext, ChatCompletionRequest, UsageRecord } from "../../../types";
 
@@ -128,6 +129,27 @@ export class OpenRouterChat extends BaseEndpoint {
       return this.errorResponse(c, "model and messages are required", 400);
     }
 
+    // Belt-and-suspenders model validation: middleware handles the primary rejection
+    // pre-payment, but validate again here in case the cache was degraded at that time
+    // and has since been populated, or in case the middleware was bypassed.
+    const modelResult = await lookupModel(request.model, c.env.OPENROUTER_API_KEY, log);
+    if (modelResult.valid && modelResult.degraded) {
+      log.warn("Model cache degraded at chat handler — cannot confirm model validity", {
+        model: request.model,
+      });
+    } else if (!modelResult.valid) {
+      const suggestions = getSimilarModels(request.model, 3);
+      return c.json(
+        {
+          error: modelResult.error,
+          code: "invalid_model",
+          model: request.model,
+          ...(suggestions.length > 0 ? { suggestions } : {}),
+        },
+        400
+      );
+    }
+
     const client = new OpenRouterClient(c.env.OPENROUTER_API_KEY, log);
 
     try {
@@ -189,6 +211,23 @@ export class OpenRouterChat extends BaseEndpoint {
         const { response, usage } = await client.createChatCompletion(request);
         const durationMs = Date.now() - startTime;
 
+        // validateChatResponse guarantees .choices is an array, but it may be empty.
+        if (response.choices.length === 0) {
+          log.error("OpenRouter returned no choices", {
+            model: response.model || request.model,
+          });
+          return this.errorResponse(c, "OpenRouter returned no choices", 502);
+        }
+
+        // Log a warning if the first choice has empty content (valid but unexpected).
+        const firstChoice = response.choices[0];
+        if (firstChoice?.message?.content === "") {
+          log.warn("OpenRouter returned empty content in first choice", {
+            model: response.model || request.model,
+            finishReason: firstChoice.finish_reason,
+          });
+        }
+
         // Log PnL
         if (x402.priceEstimate) {
           logPnL(

diff --git a/src/endpoints/inference/openrouter/list-models.ts b/src/endpoints/inference/openrouter/list-models.ts
@@ -62,6 +62,8 @@ export class OpenRouterListModels extends FreeEndpoint {
     const client = new OpenRouterClient(c.env.OPENROUTER_API_KEY, log);
 
     try {
+      // getModels() runs validateModelsResponse() which guarantees .data is an
+      // array and every model has .id (string) and .pricing with string fields.
       const response = await client.getModels();
 
       const models = response.data.map((model) => ({

diff --git a/src/middleware/x402.ts b/src/middleware/x402.ts
@@ -248,8 +248,16 @@ export function x402Middleware(
         // Pre-payment model validation: reject unknown models before issuing 402
         if (c.env.OPENROUTER_API_KEY) {
           const modelResult = await lookupModel(chatRequest.model, c.env.OPENROUTER_API_KEY, log);
-          if (!modelResult.valid) {
-            return c.json({ error: modelResult.error, code: "invalid_model" }, 400);
+          if (modelResult.valid && modelResult.degraded) {
+            // Cache was empty after refresh attempt — allow the request but warn operators
+            log.warn("Model cache degraded at middleware — skipping pre-payment model validation", {
+              model: chatRequest.model,
+            });
+          } else if (!modelResult.valid) {
+            return c.json(
+              { error: modelResult.error, code: "invalid_model", model: chatRequest.model },
+              400
+            );
           }
           // Use live registry pricing if available, otherwise fall through to hardcoded table
           priceEstimate = estimateChatPayment(chatRequest, tokenType, log, modelResult.pricing);

diff --git a/src/services/model-cache.ts b/src/services/model-cache.ts
@@ -26,9 +26,24 @@ const FETCH_TIMEOUT_MS = 3_000;
 
 /** Discriminated union result from lookupModel */
 export type ModelLookupResult =
-  | { valid: true; pricing?: ModelPricing }
+  | { valid: true; pricing?: ModelPricing; degraded?: true }
   | { valid: false; error: string };
 
+/** Cache state reported by getCacheStatus() */
+export type CacheState = "warm" | "cold" | "degraded";
+
+/** Cache status returned by getCacheStatus() */
+export interface CacheStatus {
+  /** warm = populated and fresh; cold = never fetched or empty; degraded = last fetch failed */
+  state: CacheState;
+  /** Number of models currently in the registry */
+  modelCount: number;
+  /** Timestamp (ms since epoch) of the last successful fetch, or null if never fetched */
+  lastRefreshed: number | null;
+  /** Timestamp (ms since epoch) of the last failed fetch attempt, or null if no failures */
+  lastFailedAt: number | null;
+}
+
 // =============================================================================
 // Module-level Cache
 // =============================================================================
@@ -91,6 +106,8 @@ async function doRefresh(apiKey: string, logger: Logger): Promise<void> {
 
   try {
     const client = new OpenRouterClient(apiKey, logger);
+    // getModels() runs validateModelsResponse() which guarantees .data is an
+    // array and every model has .id (string) and .pricing with string fields.
     const modelsResponse = await client.getModels(controller.signal);
 
     modelRegistry.clear();
@@ -127,6 +144,78 @@ async function doRefresh(apiKey: string, logger: Logger): Promise<void> {
 // Public API
 // =============================================================================
 
+/**
+ * Returns the current cache state without triggering a refresh.
+ *
+ * States:
+ *   "warm"     — registry populated and TTL not expired
+ *   "cold"     — never fetched successfully (fetchedAt is null) or registry is empty with no prior failure
+ *   "degraded" — last fetch attempt failed and registry may be empty or stale
+ */
+export function getCacheStatus(): CacheStatus {
+  const modelCount = modelRegistry.size;
+
+  let state: CacheState;
+
+  if (lastFailedAt !== null && (modelCount === 0 || (fetchedAt !== null && Date.now() - fetchedAt > CACHE_TTL_MS))) {
+    state = "degraded";
+  } else if (fetchedAt !== null && modelCount > 0 && Date.now() - fetchedAt <= CACHE_TTL_MS) {
+    state = "warm";
+  } else {
+    state = "cold";
+  }
+
+  return {
+    state,
+    modelCount,
+    lastRefreshed: fetchedAt,
+    lastFailedAt,
+  };
+}
+
+/**
+ * Find model IDs in the registry that are similar to the given model ID.
+ *
+ * Similarity strategy:
+ *   1. If modelId contains "/", try to match other models with the same provider prefix.
+ *   2. If no prefix matches found, fall back to lexicographic prefix match on the full ID.
+ *   3. Returns at most maxResults results.
+ */
+export function getSimilarModels(modelId: string, maxResults = 3): string[] {
+  if (modelRegistry.size === 0) {
+    return [];
+  }
+
+  const allModels = Array.from(modelRegistry.keys()).sort();
+
+  // Try provider prefix match (e.g., "openai/" from "openai/gpt-4o")
+  const slashIdx = modelId.indexOf("/");
+  if (slashIdx !== -1) {
+    const providerPrefix = modelId.slice(0, slashIdx + 1);
+    const providerMatches = allModels.filter(
+      (id) => id.startsWith(providerPrefix) && id !== modelId
+    );
+    if (providerMatches.length > 0) {
+      return providerMatches.slice(0, maxResults);
+    }
+  }
+
+  // Fall back: full-string prefix match (e.g., "gpt" matches "gpt-4")
+  const prefixLen = Math.max(3, Math.floor(modelId.length / 2));
+  const prefix = modelId.slice(0, prefixLen).toLowerCase();
+  const prefixMatches = allModels.filter(
+    (id) => id.toLowerCase().startsWith(prefix) && id !== modelId
+  );
+  if (prefixMatches.length > 0) {
+    return prefixMatches.slice(0, maxResults);
+  }
+
+  // No structural match — return first maxResults models as fallback hints.
+  // These may be unrelated; callers should treat them as "here are some valid models"
+  // rather than "here are close matches."
+  return allModels.filter((id) => id !== modelId).slice(0, maxResults);
+}
+
 /**
  * Look up a model by ID, refreshing the cache if stale.
  *
@@ -147,10 +236,10 @@ export async function lookupModel(
     await refreshCache(apiKey, logger);
   }
 
-  // If the cache is still empty (e.g., fetch failed), be permissive
+  // If the cache is still empty (e.g., fetch failed), be permissive but signal degraded state
   if (modelRegistry.size === 0) {
-    logger.debug("Model cache empty after refresh attempt -- allowing request", { modelId });
-    return { valid: true };
+    logger.debug("Model cache empty after refresh attempt -- allowing request (degraded)", { modelId });
+    return { valid: true, degraded: true };
   }
 
   const cached = modelRegistry.get(modelId);
@@ -164,3 +253,34 @@ export async function lookupModel(
 
   return { valid: true, pricing: cached };
 }
+
+// =============================================================================
+// Test Helpers (not part of public API)
+// =============================================================================
+
+/**
+ * Seed the model registry with test data and reset internal state.
+ * Exported for unit tests only — not intended for production use.
+ */
+export function _seedCacheForTesting(
+  models: Array<{ id: string; pricing: ModelPricing }>,
+  options?: { simulateFailure?: boolean }
+): void {
+  modelRegistry.clear();
+  for (const m of models) {
+    modelRegistry.set(m.id, m.pricing);
+  }
+  fetchedAt = models.length > 0 ? Date.now() : null;
+  lastFailedAt = options?.simulateFailure ? Date.now() : null;
+}
+
+/**
+ * Reset the cache to its initial empty state.
+ * Exported for unit tests only — not intended for production use.
+ */
+export function _resetCacheForTesting(): void {
+  modelRegistry.clear();
+  fetchedAt = null;
+  lastFailedAt = null;
+  inflightRefresh = null;
+}