Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions src/api/providers/__tests__/minimax.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -343,11 +343,6 @@ describe("MiniMaxHandler", () => {
expect.objectContaining({
role: "user",
content: [{ type: "text", text: "Merged message" }],
providerOptions: {
anthropic: {
cacheControl: { type: "ephemeral" },
},
},
}),
]),
)
Expand Down
54 changes: 0 additions & 54 deletions src/api/providers/anthropic-vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,37 +119,6 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
anthropicProviderOptions.disableParallelToolUse = true
}

/**
* Vertex API has specific limitations for prompt caching:
* 1. Maximum of 4 blocks can have cache_control
* 2. Only text blocks can be cached (images and other content types cannot)
* 3. Cache control can only be applied to user messages, not assistant messages
*
* Our caching strategy:
* - Cache the system prompt (1 block)
* - Cache the last text block of the second-to-last user message (1 block)
* - Cache the last text block of the last user message (1 block)
* This ensures we stay under the 4-block limit while maintaining effective caching
* for the most relevant context.
*/
const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }

const userMsgIndices = messages.reduce(
(acc, msg, index) => ("role" in msg && msg.role === "user" ? [...acc, index] : acc),
[] as number[],
)

const targetIndices = new Set<number>()
const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1

if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)

if (targetIndices.size > 0) {
this.applyCacheControlToAiSdkMessages(messages as ModelMessage[], targetIndices, cacheProviderOption)
}

// Build streamText request
// Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
const requestOptions: Parameters<typeof streamText>[0] = {
Expand Down Expand Up @@ -241,29 +210,6 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
}
}

/**
* Apply cacheControl providerOptions to the correct AI SDK messages by walking
* the original Anthropic messages and converted AI SDK messages in parallel.
*
* convertToAiSdkMessages() can split a single Anthropic user message (containing
* tool_results + text) into 2 AI SDK messages (tool role + user role). This method
* accounts for that split so cache control lands on the right message.
*/
private applyCacheControlToAiSdkMessages(
aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
targetIndices: Set<number>,
cacheProviderOption: Record<string, Record<string, unknown>>,
): void {
for (const idx of targetIndices) {
if (idx >= 0 && idx < aiSdkMessages.length) {
aiSdkMessages[idx].providerOptions = {
...aiSdkMessages[idx].providerOptions,
...cacheProviderOption,
}
}
}
}

getModel() {
const modelId = this.options.apiModelId
let id = modelId && modelId in vertexModels ? (modelId as VertexModelId) : vertexDefaultModelId
Expand Down
43 changes: 0 additions & 43 deletions src/api/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,26 +105,6 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
anthropicProviderOptions.disableParallelToolUse = true
}

// Apply cache control to user messages
// Strategy: cache the last 2 user messages (write-to-cache + read-from-cache)
const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }

const userMsgIndices = messages.reduce(
(acc, msg, index) => ("role" in msg && msg.role === "user" ? [...acc, index] : acc),
[] as number[],
)

const targetIndices = new Set<number>()
const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1

if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)

if (targetIndices.size > 0) {
this.applyCacheControlToAiSdkMessages(messages as ModelMessage[], targetIndices, cacheProviderOption)
}

// Build streamText request
// Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
const requestOptions: Parameters<typeof streamText>[0] = {
Expand Down Expand Up @@ -216,29 +196,6 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
}
}

/**
* Apply cacheControl providerOptions to the correct AI SDK messages by walking
* the original Anthropic messages and converted AI SDK messages in parallel.
*
* convertToAiSdkMessages() can split a single Anthropic user message (containing
* tool_results + text) into 2 AI SDK messages (tool role + user role). This method
* accounts for that split so cache control lands on the right message.
*/
private applyCacheControlToAiSdkMessages(
aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
targetIndices: Set<number>,
cacheProviderOption: Record<string, Record<string, unknown>>,
): void {
for (const idx of targetIndices) {
if (idx >= 0 && idx < aiSdkMessages.length) {
aiSdkMessages[idx].providerOptions = {
...aiSdkMessages[idx].providerOptions,
...cacheProviderOption,
}
}
}
}

getModel() {
const modelId = this.options.apiModelId
let id = modelId && modelId in anthropicModels ? (modelId as AnthropicModelId) : anthropicDefaultModelId
Expand Down
84 changes: 2 additions & 82 deletions src/api/providers/bedrock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -252,67 +252,10 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
}

// Prompt caching: use AI SDK's cachePoint mechanism
// The AI SDK's @ai-sdk/amazon-bedrock supports cachePoint in providerOptions per message.
//
// Strategy: Bedrock allows up to 4 cache checkpoints. We use them as:
// 1. System prompt (via systemProviderOptions below)
// 2-4. Up to 3 user messages in the conversation history
//
// For the message cache points, we target the last 2 user messages (matching
// Anthropic's strategy: write-to-cache + read-from-cache) PLUS an earlier "anchor"
// user message near the middle of the conversation. This anchor ensures the 20-block
// lookback window has a stable cache entry to hit, covering all assistant/tool messages
// between the anchor and the recent messages.
//
// We identify targets in the ORIGINAL Anthropic messages (before AI SDK conversion)
// because convertToAiSdkMessages() splits user messages containing tool_results into
// separate "tool" + "user" role messages, which would skew naive counting.
// Determine whether to enable prompt caching for the system prompt.
// Per-message cache breakpoints are applied centrally in Task.ts.
const usePromptCache = Boolean(this.options.awsUsePromptCache && this.supportsAwsPromptCache(modelConfig))

if (usePromptCache) {
const cachePointOption = { bedrock: { cachePoint: { type: "default" as const } } }

// Find all user message indices in the original (pre-conversion) message array.
const originalUserIndices = filteredMessages.reduce<number[]>(
(acc, msg, idx) => ("role" in msg && msg.role === "user" ? [...acc, idx] : acc),
[],
)

// Select up to 3 user messages for cache points (system prompt uses the 4th):
// - Last user message: write to cache for next request
// - Second-to-last user message: read from cache for current request
// - An "anchor" message earlier in the conversation for 20-block window coverage
const targetOriginalIndices = new Set<number>()
const numUserMsgs = originalUserIndices.length

if (numUserMsgs >= 1) {
// Always cache the last user message
targetOriginalIndices.add(originalUserIndices[numUserMsgs - 1])
}
if (numUserMsgs >= 2) {
// Cache the second-to-last user message
targetOriginalIndices.add(originalUserIndices[numUserMsgs - 2])
}
if (numUserMsgs >= 5) {
// Add an anchor cache point roughly in the first third of user messages.
// This ensures that the 20-block lookback from the second-to-last breakpoint
// can find a stable cache entry, covering all the assistant and tool messages
// in the middle of the conversation. We pick the user message at ~1/3 position.
const anchorIdx = Math.floor(numUserMsgs / 3)
// Only add if it's not already one of the last-2 targets
if (!targetOriginalIndices.has(originalUserIndices[anchorIdx])) {
targetOriginalIndices.add(originalUserIndices[anchorIdx])
}
}

// Apply cachePoint to the correct AI SDK messages by walking both arrays in parallel.
// A single original user message with tool_results becomes [tool-role msg, user-role msg]
// in the AI SDK array, while a plain user message becomes [user-role msg].
if (targetOriginalIndices.size > 0) {
this.applyCachePointsToAiSdkMessages(aiSdkMessages, targetOriginalIndices, cachePointOption)
}
}

// Build streamText request
// Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
const requestOptions: Parameters<typeof streamText>[0] = {
Expand Down Expand Up @@ -706,29 +649,6 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
)
}

/**
* Apply cachePoint providerOptions to the correct AI SDK messages by walking
* the original Anthropic messages and converted AI SDK messages in parallel.
*
* convertToAiSdkMessages() can split a single Anthropic user message (containing
* tool_results + text) into 2 AI SDK messages (tool role + user role). This method
* accounts for that split so cache points land on the right message.
*/
private applyCachePointsToAiSdkMessages(
aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
targetIndices: Set<number>,
cachePointOption: Record<string, Record<string, unknown>>,
): void {
for (const idx of targetIndices) {
if (idx >= 0 && idx < aiSdkMessages.length) {
aiSdkMessages[idx].providerOptions = {
...aiSdkMessages[idx].providerOptions,
...cachePointOption,
}
}
}
}

/************************************************************************************
*
* AMAZON REGIONS
Expand Down
32 changes: 0 additions & 32 deletions src/api/providers/minimax.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,23 +89,6 @@ export class MiniMaxHandler extends BaseProvider implements SingleCompletionHand
anthropicProviderOptions.disableParallelToolUse = true
}

const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }
const userMsgIndices = mergedMessages.reduce(
(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
[] as number[],
)

const targetIndices = new Set<number>()
const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1

if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)

if (targetIndices.size > 0) {
this.applyCacheControlToAiSdkMessages(aiSdkMessages, targetIndices, cacheProviderOption)
}

const requestOptions = {
model: this.client(modelConfig.id),
system: systemPrompt,
Expand Down Expand Up @@ -187,21 +170,6 @@ export class MiniMaxHandler extends BaseProvider implements SingleCompletionHand
}
}

private applyCacheControlToAiSdkMessages(
aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
targetIndices: Set<number>,
cacheProviderOption: Record<string, Record<string, unknown>>,
): void {
for (const idx of targetIndices) {
if (idx >= 0 && idx < aiSdkMessages.length) {
aiSdkMessages[idx].providerOptions = {
...aiSdkMessages[idx].providerOptions,
...cacheProviderOption,
}
}
}
}

getModel() {
const modelId = this.options.apiModelId

Expand Down
Loading
Loading