From 86c2009faeb41b625f5ba1bebd846747c22b1e69 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 21 Feb 2026 15:31:09 +0000 Subject: [PATCH 1/5] fix: prevent and surface context-length errors in AI chat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two-pronged fix for AI_APICallError when conversations exceed model context windows (e.g. OpenRouter 400k token limit): **Prevention (proactive truncation)** - Move modelMessages conversion to after system prompt is built so we have accurate token budgets before calling streamText - Use determineMessagesToInclude() from ai-context-calculator to trim the oldest messages first, reserving 75% of the context window for input and leaving 25% headroom for output tokens and estimator inaccuracies - Log a warning when truncation occurs with before/after message counts - Add OpenRouter provider context windows to getContextWindowSize so common models (DeepSeek, Qwen, Llama, Mistral, Gemini, Claude) get accurate limits instead of the 200k fallback **Better errors (when context still exceeds after truncation)** - Detect context-length errors in the route error handler and return HTTP 413 with error key 'context_length_exceeded' instead of a generic 500 - Add isContextLengthError() helper to error-messages.ts that matches provider-specific phrasing (OpenRouter, Anthropic, OpenAI, etc.) - getAIErrorMessage() now returns a clear, actionable message for context errors: "The conversation is too long for this model's context window. Older messages have been trimmed to fit — try sending your message again." - Replace hardcoded error-message logic in SidebarChatTab with getAIErrorMessage() so all three chat surfaces handle errors consistently https://claude.ai/code/session_011dBcfJNRsawEMzZLb67Z7m --- apps/web/src/app/api/ai/chat/route.ts | 62 ++++++++++++++++--- .../ai-assistant/SidebarChatTab.tsx | 12 +--- apps/web/src/lib/ai/shared/error-messages.ts | 25 ++++++++ .../src/monitoring/ai-context-calculator.ts | 29 ++++++++- 4 files changed, 108 insertions(+), 20 deletions(-) diff --git a/apps/web/src/app/api/ai/chat/route.ts b/apps/web/src/app/api/ai/chat/route.ts index 9a9dacfb2..d3da692be 100644 --- a/apps/web/src/app/api/ai/chat/route.ts +++ b/apps/web/src/app/api/ai/chat/route.ts @@ -71,6 +71,13 @@ import { } from '@/lib/ai/core/stream-abort-registry'; import { validateUserMessageFileParts, hasFileParts } from '@/lib/ai/core/validate-image-parts'; import { hasVisionCapability } from '@/lib/ai/core/model-capabilities'; +import { + determineMessagesToInclude, + getContextWindowSize, + estimateSystemPromptTokens, + estimateToolDefinitionTokens, +} from '@pagespace/lib/ai-context-calculator'; +import { isContextLengthError } from '@/lib/ai/shared/error-messages'; // Allow streaming responses up to 5 minutes for complex AI agent interactions @@ -741,13 +748,10 @@ export async function POST(request: Request) { }); } - // Convert UIMessages to ModelMessages for the AI model - // First sanitize messages to remove tool parts without results (prevents "input-available" state errors) + // Sanitize messages to remove tool parts without results (prevents "input-available" state errors) // NOTE: We use database-loaded messages, NOT messages from client + // modelMessages is computed after system prompt is built so we can apply context truncation const sanitizedMessages = sanitizeMessagesForModel(conversationHistory); - const modelMessages = convertToModelMessages(sanitizedMessages, { - tools: filteredTools // Use original tools - no wrapping needed - }); // Fetch user personalization for AI system prompt injection const personalization = await getUserPersonalization(userId); @@ -818,8 +822,43 @@ export async function POST(request: Request) { } loggers.ai.debug('AI Chat API: Tools configured for Page AI', { toolCount: Object.keys(filteredTools).length }); + + // Context-length guard: proactively truncate oldest messages to fit within the model's context window. + // This prevents AI_APICallError from providers when a conversation grows too long. + // We build modelMessages here (after system prompt) so we have accurate token budgeting. + const fullSystemPrompt = systemPrompt + timestampSystemPrompt + pageTreePrompt; + const contextWindow = getContextWindowSize(currentModel, currentProvider); + const systemPromptTokens = estimateSystemPromptTokens(fullSystemPrompt); + // Cast needed because filteredTools is a ToolSet (Vercel AI SDK type) but calculator expects plain object + const toolTokens = estimateToolDefinitionTokens(filteredTools as Record); + // Reserve 25% headroom for output tokens and tokenizer inaccuracies + const inputBudget = Math.floor(contextWindow * 0.75); + const { includedMessages, wasTruncated } = determineMessagesToInclude( + sanitizedMessages, + inputBudget, + systemPromptTokens, + toolTokens + ); + + if (wasTruncated) { + loggers.ai.warn('AI Chat API: Conversation truncated to fit context window', { + originalMessageCount: sanitizedMessages.length, + includedMessageCount: includedMessages.length, + model: currentModel, + provider: currentProvider, + contextWindow, + inputBudget, + systemPromptTokens, + toolTokens, + }); + } + + const modelMessages = convertToModelMessages(includedMessages, { + tools: filteredTools // Use original tools - no wrapping needed + }); + loggers.ai.info('AI Chat API: Starting streamText for Page AI', { model: currentModel, pageName: page.title }); - + // Create UI message stream with visual content injection support // This handles the case where tools return visual content that needs to be injected into the stream let result; @@ -1199,8 +1238,15 @@ export async function POST(request: Request) { }); // Return a proper error response - return NextResponse.json({ - error: 'Failed to process chat request. Please try again.' + const errorMsg = error instanceof Error ? error.message : ''; + if (isContextLengthError(errorMsg)) { + return NextResponse.json( + { error: 'context_length_exceeded', details: errorMsg }, + { status: 413 } + ); + } + return NextResponse.json({ + error: 'Failed to process chat request. Please try again.' }, { status: 500 }); } } diff --git a/apps/web/src/components/layout/right-sidebar/ai-assistant/SidebarChatTab.tsx b/apps/web/src/components/layout/right-sidebar/ai-assistant/SidebarChatTab.tsx index 31a33a047..9e9fa3b61 100644 --- a/apps/web/src/components/layout/right-sidebar/ai-assistant/SidebarChatTab.tsx +++ b/apps/web/src/components/layout/right-sidebar/ai-assistant/SidebarChatTab.tsx @@ -1,5 +1,6 @@ import React, { useEffect, useState, useRef, useMemo, useCallback } from 'react'; import { UIMessage } from 'ai'; +import { getAIErrorMessage } from '@/lib/ai/shared/error-messages'; import { usePathname } from 'next/navigation'; import { Button } from '@/components/ui/button'; import { ChatInput, type ChatInputRef } from '@/components/ai/chat/input'; @@ -787,16 +788,7 @@ const SidebarChatTab: React.FC = () => { {error && showError && (

- {error.message?.includes('Unauthorized') || error.message?.includes('401') - ? 'Authentication failed. Please refresh the page and try again.' - : (error.message?.toLowerCase().includes('rate') || - error.message?.toLowerCase().includes('limit') || - error.message?.includes('429') || - error.message?.includes('402') || - error.message?.includes('Failed after') || - error.message?.includes('Provider returned error')) - ? 'Free tier rate limit hit. Please try again in a few seconds or subscribe for premium models and access.' - : 'Something went wrong. Please try again.'} + {getAIErrorMessage(error.message)}