From 86c2009faeb41b625f5ba1bebd846747c22b1e69 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 21 Feb 2026 15:31:09 +0000
Subject: [PATCH 1/5] fix: prevent and surface context-length errors in AI chat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two-pronged fix for AI_APICallError when conversations exceed model
context windows (e.g. OpenRouter 400k token limit):

**Prevention (proactive truncation)**
- Move modelMessages conversion to after system prompt is built so we
  have accurate token budgets before calling streamText
- Use determineMessagesToInclude() from ai-context-calculator to trim
  the oldest messages first, reserving 75% of the context window for
  input and leaving 25% headroom for output tokens and estimator
  inaccuracies
- Log a warning when truncation occurs with before/after message counts
- Add OpenRouter provider context windows to getContextWindowSize so
  common models (DeepSeek, Qwen, Llama, Mistral, Gemini, Claude) get
  accurate limits instead of the 200k fallback

**Better errors (when context still exceeds after truncation)**
- Detect context-length errors in the route error handler and return
  HTTP 413 with error key 'context_length_exceeded' instead of a
  generic 500
- Add isContextLengthError() helper to error-messages.ts that matches
  provider-specific phrasing (OpenRouter, Anthropic, OpenAI, etc.)
- getAIErrorMessage() now returns a clear, actionable message for
  context errors: "The conversation is too long for this model's context
  window. Older messages have been trimmed to fit — try sending your
  message again."
- Replace hardcoded error-message logic in SidebarChatTab with
  getAIErrorMessage() so all three chat surfaces handle errors
  consistently

https://claude.ai/code/session_011dBcfJNRsawEMzZLb67Z7m
---
 apps/web/src/app/api/ai/chat/route.ts         | 62 ++++++++++++++++---
 .../ai-assistant/SidebarChatTab.tsx           | 12 +---
 apps/web/src/lib/ai/shared/error-messages.ts  | 25 ++++++++
 .../src/monitoring/ai-context-calculator.ts   | 29 ++++++++-
 4 files changed, 108 insertions(+), 20 deletions(-)

diff --git a/apps/web/src/app/api/ai/chat/route.ts b/apps/web/src/app/api/ai/chat/route.ts
index 9a9dacfb2..d3da692be 100644
--- a/apps/web/src/app/api/ai/chat/route.ts
+++ b/apps/web/src/app/api/ai/chat/route.ts
@@ -71,6 +71,13 @@ import {
 } from '@/lib/ai/core/stream-abort-registry';
 import { validateUserMessageFileParts, hasFileParts } from '@/lib/ai/core/validate-image-parts';
 import { hasVisionCapability } from '@/lib/ai/core/model-capabilities';
+import {
+  determineMessagesToInclude,
+  getContextWindowSize,
+  estimateSystemPromptTokens,
+  estimateToolDefinitionTokens,
+} from '@pagespace/lib/ai-context-calculator';
+import { isContextLengthError } from '@/lib/ai/shared/error-messages';
 
 
 // Allow streaming responses up to 5 minutes for complex AI agent interactions
@@ -741,13 +748,10 @@ export async function POST(request: Request) {
       });
     }
 
-    // Convert UIMessages to ModelMessages for the AI model
-    // First sanitize messages to remove tool parts without results (prevents "input-available" state errors)
+    // Sanitize messages to remove tool parts without results (prevents "input-available" state errors)
     // NOTE: We use database-loaded messages, NOT messages from client
+    // modelMessages is computed after system prompt is built so we can apply context truncation
     const sanitizedMessages = sanitizeMessagesForModel(conversationHistory);
-    const modelMessages = convertToModelMessages(sanitizedMessages, {
-      tools: filteredTools  // Use original tools - no wrapping needed
-    });
 
     // Fetch user personalization for AI system prompt injection
     const personalization = await getUserPersonalization(userId);
@@ -818,8 +822,43 @@ export async function POST(request: Request) {
     }
 
     loggers.ai.debug('AI Chat API: Tools configured for Page AI', { toolCount: Object.keys(filteredTools).length });
+
+    // Context-length guard: proactively truncate oldest messages to fit within the model's context window.
+    // This prevents AI_APICallError from providers when a conversation grows too long.
+    // We build modelMessages here (after system prompt) so we have accurate token budgeting.
+    const fullSystemPrompt = systemPrompt + timestampSystemPrompt + pageTreePrompt;
+    const contextWindow = getContextWindowSize(currentModel, currentProvider);
+    const systemPromptTokens = estimateSystemPromptTokens(fullSystemPrompt);
+    // Cast needed because filteredTools is a ToolSet (Vercel AI SDK type) but calculator expects plain object
+    const toolTokens = estimateToolDefinitionTokens(filteredTools as Record<string, unknown>);
+    // Reserve 25% headroom for output tokens and tokenizer inaccuracies
+    const inputBudget = Math.floor(contextWindow * 0.75);
+    const { includedMessages, wasTruncated } = determineMessagesToInclude(
+      sanitizedMessages,
+      inputBudget,
+      systemPromptTokens,
+      toolTokens
+    );
+
+    if (wasTruncated) {
+      loggers.ai.warn('AI Chat API: Conversation truncated to fit context window', {
+        originalMessageCount: sanitizedMessages.length,
+        includedMessageCount: includedMessages.length,
+        model: currentModel,
+        provider: currentProvider,
+        contextWindow,
+        inputBudget,
+        systemPromptTokens,
+        toolTokens,
+      });
+    }
+
+    const modelMessages = convertToModelMessages(includedMessages, {
+      tools: filteredTools  // Use original tools - no wrapping needed
+    });
+
     loggers.ai.info('AI Chat API: Starting streamText for Page AI', { model: currentModel, pageName: page.title });
-    
+
     // Create UI message stream with visual content injection support
     // This handles the case where tools return visual content that needs to be injected into the stream
     let result;
@@ -1199,8 +1238,15 @@ export async function POST(request: Request) {
     });
     
     // Return a proper error response
-    return NextResponse.json({ 
-      error: 'Failed to process chat request. Please try again.' 
+    const errorMsg = error instanceof Error ? error.message : '';
+    if (isContextLengthError(errorMsg)) {
+      return NextResponse.json(
+        { error: 'context_length_exceeded', details: errorMsg },
+        { status: 413 }
+      );
+    }
+    return NextResponse.json({
+      error: 'Failed to process chat request. Please try again.'
     }, { status: 500 });
   }
 }
diff --git a/apps/web/src/components/layout/right-sidebar/ai-assistant/SidebarChatTab.tsx b/apps/web/src/components/layout/right-sidebar/ai-assistant/SidebarChatTab.tsx
index 31a33a047..9e9fa3b61 100644
--- a/apps/web/src/components/layout/right-sidebar/ai-assistant/SidebarChatTab.tsx
+++ b/apps/web/src/components/layout/right-sidebar/ai-assistant/SidebarChatTab.tsx
@@ -1,5 +1,6 @@
 import React, { useEffect, useState, useRef, useMemo, useCallback } from 'react';
 import { UIMessage } from 'ai';
+import { getAIErrorMessage } from '@/lib/ai/shared/error-messages';
 import { usePathname } from 'next/navigation';
 import { Button } from '@/components/ui/button';
 import { ChatInput, type ChatInputRef } from '@/components/ai/chat/input';
@@ -787,16 +788,7 @@ const SidebarChatTab: React.FC = () => {
         {error && showError && (
           <div className="p-2 bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded text-xs flex items-center justify-between">
             <p className="text-red-700 dark:text-red-300">
-              {error.message?.includes('Unauthorized') || error.message?.includes('401')
-                ? 'Authentication failed. Please refresh the page and try again.'
-                : (error.message?.toLowerCase().includes('rate') ||
-                   error.message?.toLowerCase().includes('limit') ||
-                   error.message?.includes('429') ||
-                   error.message?.includes('402') ||
-                   error.message?.includes('Failed after') ||
-                   error.message?.includes('Provider returned error'))
-                ? 'Free tier rate limit hit. Please try again in a few seconds or subscribe for premium models and access.'
-                : 'Something went wrong. Please try again.'}
+              {getAIErrorMessage(error.message)}
             </p>
             <button
               onClick={() => setShowError(false)}
diff --git a/apps/web/src/lib/ai/shared/error-messages.ts b/apps/web/src/lib/ai/shared/error-messages.ts
index 82846236e..839d2740a 100644
--- a/apps/web/src/lib/ai/shared/error-messages.ts
+++ b/apps/web/src/lib/ai/shared/error-messages.ts
@@ -13,6 +13,11 @@ export function getAIErrorMessage(errorMessage: string | undefined): string {
     return 'Authentication failed. Please refresh the page and try again.';
   }
 
+  // Context length errors
+  if (isContextLengthError(errorMessage)) {
+    return 'The conversation is too long for this model\'s context window. Older messages have been trimmed to fit — try sending your message again.';
+  }
+
   // Rate limit errors
   if (
     errorMessage.toLowerCase().includes('rate') ||
@@ -36,6 +41,26 @@ export function isAuthenticationError(errorMessage: string | undefined): boolean
   return errorMessage.includes('Unauthorized') || errorMessage.includes('401');
 }
 
+/**
+ * Check if error is a context length / token limit error
+ */
+export function isContextLengthError(errorMessage: string | undefined): boolean {
+  if (!errorMessage) return false;
+  const msg = errorMessage.toLowerCase();
+  return (
+    msg.includes('context_length') ||     // API error key: context_length_exceeded
+    msg.includes('context length') ||     // Human-readable variant
+    msg.includes('context window') ||
+    msg.includes('maximum context') ||
+    msg.includes('token limit') ||
+    msg.includes('tokens exceeds') ||
+    msg.includes('too many tokens') ||
+    errorMessage.includes('413') ||
+    // OpenRouter / provider-specific phrasing
+    (msg.includes('maximum') && msg.includes('tokens'))
+  );
+}
+
 /**
  * Check if error is a rate limit error
  */
diff --git a/packages/lib/src/monitoring/ai-context-calculator.ts b/packages/lib/src/monitoring/ai-context-calculator.ts
index ad7760989..d70b803a0 100644
--- a/packages/lib/src/monitoring/ai-context-calculator.ts
+++ b/packages/lib/src/monitoring/ai-context-calculator.ts
@@ -217,8 +217,33 @@ export function getContextWindowSize(model: string, provider?: string): number {
     return 128_000; // Default for older MiniMax models
   }
 
-  // OpenRouter or unknown
-  return 200_000; // Conservative default
+  // OpenRouter - use model-specific limits where known, else 200k conservative default
+  if (providerLower === 'openrouter') {
+    // Claude models via OpenRouter
+    if (modelLower.includes('claude')) return 200_000;
+    // Gemini models via OpenRouter
+    if (modelLower.includes('gemini-2.5')) return 1_000_000;
+    if (modelLower.includes('gemini-2.0') || modelLower.includes('gemini-1.5')) return 1_000_000;
+    // GPT models via OpenRouter
+    if (modelLower.includes('gpt-4o') || modelLower.includes('gpt-4-turbo')) return 128_000;
+    // DeepSeek models - commonly 64k or 128k
+    if (modelLower.includes('deepseek-r1') || modelLower.includes('deepseek-v3')) return 128_000;
+    if (modelLower.includes('deepseek')) return 64_000;
+    // Qwen models
+    if (modelLower.includes('qwen-2.5') || modelLower.includes('qwq')) return 128_000;
+    if (modelLower.includes('qwen')) return 32_000;
+    // Llama models
+    if (modelLower.includes('llama-3') || modelLower.includes('llama3')) return 128_000;
+    if (modelLower.includes('llama')) return 32_000;
+    // Mistral models
+    if (modelLower.includes('mistral-large') || modelLower.includes('mistral-nemo')) return 128_000;
+    if (modelLower.includes('mistral')) return 32_000;
+    // OpenRouter platform hard cap is 400k for many endpoints - use 200k as safe default
+    return 200_000;
+  }
+
+  // Unknown provider/model - conservative default
+  return 200_000;
 }
 
 /**

From 8a5e5d5c6bfdad79130d3b378606691c516ea698 Mon Sep 17 00:00:00 2001
From: 2witstudios <2witstudios@gmail.com>
Date: Wed, 25 Feb 2026 18:29:06 -0600
Subject: [PATCH 2/5] fix: address code review feedback for context window
 management

- Fix TypeScript build error: cast includedMessages to UIMessage[] for
  convertToModelMessages compatibility
- Improve CJK token estimation: use 2 chars/token when >20% non-ASCII
  content detected (vs 4 chars/token for Latin text)
- Sanitize 413 error response: replace raw provider error with constant,
  make retry suggestion conditional on whether truncation already ran
- Add isContextLengthError guard to isRateLimitError to prevent
  misclassification of context-length errors as rate-limit errors
- Add GPT-5 variant matching to OpenRouter section of getContextWindowSize

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/web/src/app/api/ai/chat/route.ts         | 10 ++++++--
 apps/web/src/lib/ai/shared/error-messages.ts  |  2 ++
 .../src/monitoring/ai-context-calculator.ts   | 24 ++++++++++++++-----
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/apps/web/src/app/api/ai/chat/route.ts b/apps/web/src/app/api/ai/chat/route.ts
index d3da692be..84f9aff5a 100644
--- a/apps/web/src/app/api/ai/chat/route.ts
+++ b/apps/web/src/app/api/ai/chat/route.ts
@@ -853,7 +853,7 @@ export async function POST(request: Request) {
       });
     }
 
-    const modelMessages = convertToModelMessages(includedMessages, {
+    const modelMessages = convertToModelMessages(includedMessages as UIMessage[], {
       tools: filteredTools  // Use original tools - no wrapping needed
     });
 
@@ -1241,7 +1241,13 @@ export async function POST(request: Request) {
     const errorMsg = error instanceof Error ? error.message : '';
     if (isContextLengthError(errorMsg)) {
       return NextResponse.json(
-        { error: 'context_length_exceeded', details: errorMsg },
+        {
+          error: 'context_length_exceeded',
+          message: wasTruncated
+            ? 'The conversation still exceeds this model\'s context window even after trimming. Please start a new conversation.'
+            : 'The conversation is too long for this model\'s context window. Older messages have been trimmed — try sending your message again.',
+          details: 'context_length_exceeded',
+        },
         { status: 413 }
       );
     }
diff --git a/apps/web/src/lib/ai/shared/error-messages.ts b/apps/web/src/lib/ai/shared/error-messages.ts
index 839d2740a..4d78199a7 100644
--- a/apps/web/src/lib/ai/shared/error-messages.ts
+++ b/apps/web/src/lib/ai/shared/error-messages.ts
@@ -66,6 +66,8 @@ export function isContextLengthError(errorMessage: string | undefined): boolean
  */
 export function isRateLimitError(errorMessage: string | undefined): boolean {
   if (!errorMessage) return false;
+  // Exclude context-length errors that also contain "limit"
+  if (isContextLengthError(errorMessage)) return false;
   return (
     errorMessage.toLowerCase().includes('rate') ||
     errorMessage.toLowerCase().includes('limit') ||
diff --git a/packages/lib/src/monitoring/ai-context-calculator.ts b/packages/lib/src/monitoring/ai-context-calculator.ts
index d70b803a0..940c16737 100644
--- a/packages/lib/src/monitoring/ai-context-calculator.ts
+++ b/packages/lib/src/monitoring/ai-context-calculator.ts
@@ -48,16 +48,21 @@ export interface ContextCalculation {
 }
 
 /**
- * Estimate tokens in a text string
- * Uses 4 characters per token as a rough estimate
- * This is conservative - actual token count may be slightly lower
+ * Estimate tokens in a text string.
+ * Uses ~4 chars/token for Latin text, ~2 chars/token when significant
+ * non-ASCII / CJK content is detected (CJK characters often tokenize to 1-2 tokens each).
  */
 export function estimateTokens(text: string): number {
   if (!text) return 0;
 
-  // GPT-style tokenization: ~4 characters per token
-  // This is conservative to avoid underestimating
-  return Math.ceil(text.length / 4);
+  // Detect non-ASCII heavy content (CJK, emoji, etc.)
+  // CJK Unified Ideographs, Hiragana, Katakana, Hangul, etc.
+  const nonAsciiCount = (text.match(/[^\x00-\x7F]/g) || []).length;
+  const nonAsciiRatio = nonAsciiCount / text.length;
+
+  // Use 2 chars/token when >20% non-ASCII (CJK-heavy), else 4 chars/token
+  const charsPerToken = nonAsciiRatio > 0.2 ? 2 : 4;
+  return Math.ceil(text.length / charsPerToken);
 }
 
 /**
@@ -225,6 +230,13 @@ export function getContextWindowSize(model: string, provider?: string): number {
     if (modelLower.includes('gemini-2.5')) return 1_000_000;
     if (modelLower.includes('gemini-2.0') || modelLower.includes('gemini-1.5')) return 1_000_000;
     // GPT models via OpenRouter
+    if (modelLower.includes('gpt-5.2')) {
+      return modelLower.includes('mini') || modelLower.includes('nano') ? 256_000 : 400_000;
+    }
+    if (modelLower.includes('gpt-5.1')) return 400_000;
+    if (modelLower.includes('gpt-5')) {
+      return modelLower.includes('mini') || modelLower.includes('nano') ? 128_000 : 272_000;
+    }
     if (modelLower.includes('gpt-4o') || modelLower.includes('gpt-4-turbo')) return 128_000;
     // DeepSeek models - commonly 64k or 128k
     if (modelLower.includes('deepseek-r1') || modelLower.includes('deepseek-v3')) return 128_000;

From 72fb8d92fd6fe4b10cc0821a74ddb042cbf1ea09 Mon Sep 17 00:00:00 2001
From: 2witstudios <2witstudios@gmail.com>
Date: Wed, 25 Feb 2026 18:35:03 -0600
Subject: [PATCH 3/5] fix: hoist wasTruncated to function scope for catch block
 access

The wasTruncated variable was declared inside the try block but
referenced in the outer catch block for conditional error messaging.
Hoist it to function-level scope so the 413 error response can
correctly determine whether truncation already occurred.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/web/src/app/api/ai/chat/route.ts | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/apps/web/src/app/api/ai/chat/route.ts b/apps/web/src/app/api/ai/chat/route.ts
index 84f9aff5a..c6becc4fd 100644
--- a/apps/web/src/app/api/ai/chat/route.ts
+++ b/apps/web/src/app/api/ai/chat/route.ts
@@ -97,6 +97,7 @@ export async function POST(request: Request) {
   let selectedProvider: string | undefined;
   let selectedModel: string | undefined;
   let usagePromise: Promise<LanguageModelUsage | undefined> | undefined;
+  let wasTruncated = false;
   const usageLogger = loggers.ai.child({ module: 'page-ai-usage' });
   const permissionLogger = loggers.ai.child({ module: 'page-ai-permissions' });
 
@@ -833,12 +834,14 @@ export async function POST(request: Request) {
     const toolTokens = estimateToolDefinitionTokens(filteredTools as Record<string, unknown>);
     // Reserve 25% headroom for output tokens and tokenizer inaccuracies
     const inputBudget = Math.floor(contextWindow * 0.75);
-    const { includedMessages, wasTruncated } = determineMessagesToInclude(
+    const truncationResult = determineMessagesToInclude(
       sanitizedMessages,
       inputBudget,
       systemPromptTokens,
       toolTokens
     );
+    const { includedMessages } = truncationResult;
+    wasTruncated = truncationResult.wasTruncated;
 
     if (wasTruncated) {
       loggers.ai.warn('AI Chat API: Conversation truncated to fit context window', {

From a907fab0ba59c407914756b1ce57c9fe343214fa Mon Sep 17 00:00:00 2001
From: 2witstudios <2witstudios@gmail.com>
Date: Fri, 27 Feb 2026 08:38:33 -0600
Subject: [PATCH 4/5] fix: address remaining code review issues for context
 window management

- Guard against empty truncation: return 413 error when latest message
  alone exceeds context budget instead of calling API with no messages
- Fix misleading error message: when wasTruncated is false, no longer
  claims messages 'have been trimmed'
- Fix OpenRouter model limit shadowing: move OpenRouter block before
  provider-specific branches so models like 'claude'/'gpt'/'gemini'
  routed via OpenRouter get their correct limits

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/web/src/app/api/ai/chat/route.ts         | 23 ++++++-
 .../src/monitoring/ai-context-calculator.ts   | 68 ++++++++++---------
 2 files changed, 58 insertions(+), 33 deletions(-)

diff --git a/apps/web/src/app/api/ai/chat/route.ts b/apps/web/src/app/api/ai/chat/route.ts
index c6becc4fd..7d2064a4b 100644
--- a/apps/web/src/app/api/ai/chat/route.ts
+++ b/apps/web/src/app/api/ai/chat/route.ts
@@ -856,6 +856,27 @@ export async function POST(request: Request) {
       });
     }
 
+    // Guard: if truncation left zero messages, the latest message alone exceeds the budget
+    if (includedMessages.length === 0) {
+      loggers.ai.error('AI Chat API: No messages fit within context budget', {
+        model: currentModel,
+        provider: currentProvider,
+        contextWindow,
+        inputBudget,
+        systemPromptTokens,
+        toolTokens,
+        originalMessageCount: sanitizedMessages.length,
+      });
+      return NextResponse.json(
+        {
+          error: 'context_length_exceeded',
+          message: 'Your latest message is too large to fit within this model\'s context window. Try shortening your message or starting a new conversation.',
+          details: 'context_length_exceeded',
+        },
+        { status: 413 }
+      );
+    }
+
     const modelMessages = convertToModelMessages(includedMessages as UIMessage[], {
       tools: filteredTools  // Use original tools - no wrapping needed
     });
@@ -1248,7 +1269,7 @@ export async function POST(request: Request) {
           error: 'context_length_exceeded',
           message: wasTruncated
             ? 'The conversation still exceeds this model\'s context window even after trimming. Please start a new conversation.'
-            : 'The conversation is too long for this model\'s context window. Older messages have been trimmed — try sending your message again.',
+            : 'The conversation is too long for this model\'s context window. Please start a new conversation or try a model with a larger context window.',
           details: 'context_length_exceeded',
         },
         { status: 413 }
diff --git a/packages/lib/src/monitoring/ai-context-calculator.ts b/packages/lib/src/monitoring/ai-context-calculator.ts
index 940c16737..4b6d73b59 100644
--- a/packages/lib/src/monitoring/ai-context-calculator.ts
+++ b/packages/lib/src/monitoring/ai-context-calculator.ts
@@ -142,6 +142,42 @@ export function getContextWindowSize(model: string, provider?: string): number {
   const providerLower = provider?.toLowerCase() || '';
   const modelLower = model.toLowerCase();
 
+  // OpenRouter must be checked first — its models contain names like 'claude', 'gpt', 'gemini'
+  // that would otherwise match the provider-specific branches below.
+  if (providerLower === 'openrouter') {
+    // Claude models via OpenRouter
+    if (modelLower.includes('claude')) return 200_000;
+    // Gemini models via OpenRouter
+    if (modelLower.includes('gemini-2.5')) return 1_000_000;
+    if (modelLower.includes('gemini-2.0') || modelLower.includes('gemini-1.5')) return 1_000_000;
+    // GPT models via OpenRouter
+    if (modelLower.includes('gpt-5.2')) {
+      return modelLower.includes('mini') || modelLower.includes('nano') ? 256_000 : 400_000;
+    }
+    if (modelLower.includes('gpt-5.1')) return 400_000;
+    if (modelLower.includes('gpt-5')) {
+      return modelLower.includes('mini') || modelLower.includes('nano') ? 128_000 : 272_000;
+    }
+    if (modelLower.includes('gpt-4o') || modelLower.includes('gpt-4-turbo')) return 128_000;
+    // Grok models via OpenRouter
+    if (modelLower.includes('grok-4-fast')) return 2_000_000;
+    if (modelLower.includes('grok')) return 128_000;
+    // DeepSeek models - commonly 64k or 128k
+    if (modelLower.includes('deepseek-r1') || modelLower.includes('deepseek-v3')) return 128_000;
+    if (modelLower.includes('deepseek')) return 64_000;
+    // Qwen models
+    if (modelLower.includes('qwen-2.5') || modelLower.includes('qwq')) return 128_000;
+    if (modelLower.includes('qwen')) return 32_000;
+    // Llama models
+    if (modelLower.includes('llama-3') || modelLower.includes('llama3')) return 128_000;
+    if (modelLower.includes('llama')) return 32_000;
+    // Mistral models
+    if (modelLower.includes('mistral-large') || modelLower.includes('mistral-nemo')) return 128_000;
+    if (modelLower.includes('mistral')) return 32_000;
+    // OpenRouter platform hard cap is 400k for many endpoints - use 200k as safe default
+    return 200_000;
+  }
+
   // OpenAI models
   if (providerLower === 'openai' || modelLower.includes('gpt')) {
     // GPT-5.2 models (400k/256k context)
@@ -222,38 +258,6 @@ export function getContextWindowSize(model: string, provider?: string): number {
     return 128_000; // Default for older MiniMax models
   }
 
-  // OpenRouter - use model-specific limits where known, else 200k conservative default
-  if (providerLower === 'openrouter') {
-    // Claude models via OpenRouter
-    if (modelLower.includes('claude')) return 200_000;
-    // Gemini models via OpenRouter
-    if (modelLower.includes('gemini-2.5')) return 1_000_000;
-    if (modelLower.includes('gemini-2.0') || modelLower.includes('gemini-1.5')) return 1_000_000;
-    // GPT models via OpenRouter
-    if (modelLower.includes('gpt-5.2')) {
-      return modelLower.includes('mini') || modelLower.includes('nano') ? 256_000 : 400_000;
-    }
-    if (modelLower.includes('gpt-5.1')) return 400_000;
-    if (modelLower.includes('gpt-5')) {
-      return modelLower.includes('mini') || modelLower.includes('nano') ? 128_000 : 272_000;
-    }
-    if (modelLower.includes('gpt-4o') || modelLower.includes('gpt-4-turbo')) return 128_000;
-    // DeepSeek models - commonly 64k or 128k
-    if (modelLower.includes('deepseek-r1') || modelLower.includes('deepseek-v3')) return 128_000;
-    if (modelLower.includes('deepseek')) return 64_000;
-    // Qwen models
-    if (modelLower.includes('qwen-2.5') || modelLower.includes('qwq')) return 128_000;
-    if (modelLower.includes('qwen')) return 32_000;
-    // Llama models
-    if (modelLower.includes('llama-3') || modelLower.includes('llama3')) return 128_000;
-    if (modelLower.includes('llama')) return 32_000;
-    // Mistral models
-    if (modelLower.includes('mistral-large') || modelLower.includes('mistral-nemo')) return 128_000;
-    if (modelLower.includes('mistral')) return 32_000;
-    // OpenRouter platform hard cap is 400k for many endpoints - use 200k as safe default
-    return 200_000;
-  }
-
   // Unknown provider/model - conservative default
   return 200_000;
 }

From ee03ef95f5ff3b411410661a359e85347b3849c1 Mon Sep 17 00:00:00 2001
From: 2witstudios <2witstudios@gmail.com>
Date: Fri, 27 Feb 2026 09:10:12 -0600
Subject: [PATCH 5/5] fix: disambiguate payload-size vs context-length 413
 errors

- Exclude 'request body too large' / 'payload too large' from
  isContextLengthError() so payload 413s aren't misclassified
- Add dedicated request-size error category in getAIErrorMessage()
- Pass through server-provided context error messages (e.g. "latest
  message is too large", "even after trimming") instead of replacing
  them with a generic client-side message
- Update fallback context-length message to suggest new conversation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/web/src/lib/ai/shared/error-messages.ts | 31 +++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/apps/web/src/lib/ai/shared/error-messages.ts b/apps/web/src/lib/ai/shared/error-messages.ts
index 4d78199a7..aa63c7b48 100644
--- a/apps/web/src/lib/ai/shared/error-messages.ts
+++ b/apps/web/src/lib/ai/shared/error-messages.ts
@@ -8,14 +8,33 @@
 export function getAIErrorMessage(errorMessage: string | undefined): string {
   if (!errorMessage) return 'Something went wrong. Please try again.';
 
+  const msg = errorMessage.toLowerCase();
+
   // Authentication errors
   if (errorMessage.includes('Unauthorized') || errorMessage.includes('401')) {
     return 'Authentication failed. Please refresh the page and try again.';
   }
 
+  // Request size errors (distinct from context-window limits)
+  if (
+    msg.includes('request body too large') ||
+    msg.includes('payload too large') ||
+    msg.includes('entity too large')
+  ) {
+    return 'Your request is too large. Try sending a shorter message or fewer/lower-size attachments.';
+  }
+
   // Context length errors
   if (isContextLengthError(errorMessage)) {
-    return 'The conversation is too long for this model\'s context window. Older messages have been trimmed to fit — try sending your message again.';
+    // Preserve server-provided guidance when present (e.g. "even after trimming", "latest message too large")
+    if (
+      msg.includes('latest message is too large') ||
+      msg.includes('even after trimming') ||
+      msg.includes('too long for this model')
+    ) {
+      return errorMessage;
+    }
+    return 'The conversation is too long for this model\'s context window. Please start a new conversation or use a model with a larger context window.';
   }
 
   // Rate limit errors
@@ -47,6 +66,16 @@ export function isAuthenticationError(errorMessage: string | undefined): boolean
 export function isContextLengthError(errorMessage: string | undefined): boolean {
   if (!errorMessage) return false;
   const msg = errorMessage.toLowerCase();
+
+  // Explicitly exclude non-context 413 payload errors
+  if (
+    msg.includes('request body too large') ||
+    msg.includes('payload too large') ||
+    msg.includes('entity too large')
+  ) {
+    return false;
+  }
+
   return (
     msg.includes('context_length') ||     // API error key: context_length_exceeded
     msg.includes('context length') ||     // Human-readable variant