RooCodeInc · hannesrudolph · Feb 11, 2026
@@ -343,11 +343,6 @@ describe("MiniMaxHandler", () => {
 					expect.objectContaining({
 						role: "user",
 						content: [{ type: "text", text: "Merged message" }],
-						providerOptions: {
-							anthropic: {
-								cacheControl: { type: "ephemeral" },
-							},
-						},
 					}),
 				]),
 			)

@@ -119,37 +119,6 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 			anthropicProviderOptions.disableParallelToolUse = true
 		}
 
-		/**
-		 * Vertex API has specific limitations for prompt caching:
-		 * 1. Maximum of 4 blocks can have cache_control
-		 * 2. Only text blocks can be cached (images and other content types cannot)
-		 * 3. Cache control can only be applied to user messages, not assistant messages
-		 *
-		 * Our caching strategy:
-		 * - Cache the system prompt (1 block)
-		 * - Cache the last text block of the second-to-last user message (1 block)
-		 * - Cache the last text block of the last user message (1 block)
-		 * This ensures we stay under the 4-block limit while maintaining effective caching
-		 * for the most relevant context.
-		 */
-		const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }
-
-		const userMsgIndices = messages.reduce(
-			(acc, msg, index) => ("role" in msg && msg.role === "user" ? [...acc, index] : acc),
-			[] as number[],
-		)
-
-		const targetIndices = new Set<number>()
-		const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
-		const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
-
-		if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
-		if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)
-
-		if (targetIndices.size > 0) {
-			this.applyCacheControlToAiSdkMessages(messages as ModelMessage[], targetIndices, cacheProviderOption)
-		}
-
 		// Build streamText request
 		// Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
 		const requestOptions: Parameters<typeof streamText>[0] = {
@@ -241,29 +210,6 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
 		}
 	}
 
-	/**
-	 * Apply cacheControl providerOptions to the correct AI SDK messages by walking
-	 * the original Anthropic messages and converted AI SDK messages in parallel.
-	 *
-	 * convertToAiSdkMessages() can split a single Anthropic user message (containing
-	 * tool_results + text) into 2 AI SDK messages (tool role + user role). This method
-	 * accounts for that split so cache control lands on the right message.
-	 */
-	private applyCacheControlToAiSdkMessages(
-		aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
-		targetIndices: Set<number>,
-		cacheProviderOption: Record<string, Record<string, unknown>>,
-	): void {
-		for (const idx of targetIndices) {
-			if (idx >= 0 && idx < aiSdkMessages.length) {
-				aiSdkMessages[idx].providerOptions = {
-					...aiSdkMessages[idx].providerOptions,
-					...cacheProviderOption,
-				}
-			}
-		}
-	}
-
 	getModel() {
 		const modelId = this.options.apiModelId
 		let id = modelId && modelId in vertexModels ? (modelId as VertexModelId) : vertexDefaultModelId

@@ -105,26 +105,6 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 			anthropicProviderOptions.disableParallelToolUse = true
 		}
 
-		// Apply cache control to user messages
-		// Strategy: cache the last 2 user messages (write-to-cache + read-from-cache)
-		const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }
-
-		const userMsgIndices = messages.reduce(
-			(acc, msg, index) => ("role" in msg && msg.role === "user" ? [...acc, index] : acc),
-			[] as number[],
-		)
-
-		const targetIndices = new Set<number>()
-		const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
-		const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
-
-		if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
-		if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)
-
-		if (targetIndices.size > 0) {
-			this.applyCacheControlToAiSdkMessages(messages as ModelMessage[], targetIndices, cacheProviderOption)
-		}
-
 		// Build streamText request
 		// Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
 		const requestOptions: Parameters<typeof streamText>[0] = {
@@ -216,29 +196,6 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
 		}
 	}
 
-	/**
-	 * Apply cacheControl providerOptions to the correct AI SDK messages by walking
-	 * the original Anthropic messages and converted AI SDK messages in parallel.
-	 *
-	 * convertToAiSdkMessages() can split a single Anthropic user message (containing
-	 * tool_results + text) into 2 AI SDK messages (tool role + user role). This method
-	 * accounts for that split so cache control lands on the right message.
-	 */
-	private applyCacheControlToAiSdkMessages(
-		aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
-		targetIndices: Set<number>,
-		cacheProviderOption: Record<string, Record<string, unknown>>,
-	): void {
-		for (const idx of targetIndices) {
-			if (idx >= 0 && idx < aiSdkMessages.length) {
-				aiSdkMessages[idx].providerOptions = {
-					...aiSdkMessages[idx].providerOptions,
-					...cacheProviderOption,
-				}
-			}
-		}
-	}
-
 	getModel() {
 		const modelId = this.options.apiModelId
 		let id = modelId && modelId in anthropicModels ? (modelId as AnthropicModelId) : anthropicDefaultModelId

@@ -252,67 +252,10 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 		}
 
 		// Prompt caching: use AI SDK's cachePoint mechanism
-		// The AI SDK's @ai-sdk/amazon-bedrock supports cachePoint in providerOptions per message.
-		//
-		// Strategy: Bedrock allows up to 4 cache checkpoints. We use them as:
-		//   1. System prompt (via systemProviderOptions below)
-		//   2-4. Up to 3 user messages in the conversation history
-		//
-		// For the message cache points, we target the last 2 user messages (matching
-		// Anthropic's strategy: write-to-cache + read-from-cache) PLUS an earlier "anchor"
-		// user message near the middle of the conversation. This anchor ensures the 20-block
-		// lookback window has a stable cache entry to hit, covering all assistant/tool messages
-		// between the anchor and the recent messages.
-		//
-		// We identify targets in the ORIGINAL Anthropic messages (before AI SDK conversion)
-		// because convertToAiSdkMessages() splits user messages containing tool_results into
-		// separate "tool" + "user" role messages, which would skew naive counting.
+		// Determine whether to enable prompt caching for the system prompt.
+		// Per-message cache breakpoints are applied centrally in Task.ts.
 		const usePromptCache = Boolean(this.options.awsUsePromptCache && this.supportsAwsPromptCache(modelConfig))
 
-		if (usePromptCache) {
-			const cachePointOption = { bedrock: { cachePoint: { type: "default" as const } } }
-
-			// Find all user message indices in the original (pre-conversion) message array.
-			const originalUserIndices = filteredMessages.reduce<number[]>(
-				(acc, msg, idx) => ("role" in msg && msg.role === "user" ? [...acc, idx] : acc),
-				[],
-			)
-
-			// Select up to 3 user messages for cache points (system prompt uses the 4th):
-			// - Last user message: write to cache for next request
-			// - Second-to-last user message: read from cache for current request
-			// - An "anchor" message earlier in the conversation for 20-block window coverage
-			const targetOriginalIndices = new Set<number>()
-			const numUserMsgs = originalUserIndices.length
-
-			if (numUserMsgs >= 1) {
-				// Always cache the last user message
-				targetOriginalIndices.add(originalUserIndices[numUserMsgs - 1])
-			}
-			if (numUserMsgs >= 2) {
-				// Cache the second-to-last user message
-				targetOriginalIndices.add(originalUserIndices[numUserMsgs - 2])
-			}
-			if (numUserMsgs >= 5) {
-				// Add an anchor cache point roughly in the first third of user messages.
-				// This ensures that the 20-block lookback from the second-to-last breakpoint
-				// can find a stable cache entry, covering all the assistant and tool messages
-				// in the middle of the conversation. We pick the user message at ~1/3 position.
-				const anchorIdx = Math.floor(numUserMsgs / 3)
-				// Only add if it's not already one of the last-2 targets
-				if (!targetOriginalIndices.has(originalUserIndices[anchorIdx])) {
-					targetOriginalIndices.add(originalUserIndices[anchorIdx])
-				}
-			}
-
-			// Apply cachePoint to the correct AI SDK messages by walking both arrays in parallel.
-			// A single original user message with tool_results becomes [tool-role msg, user-role msg]
-			// in the AI SDK array, while a plain user message becomes [user-role msg].
-			if (targetOriginalIndices.size > 0) {
-				this.applyCachePointsToAiSdkMessages(aiSdkMessages, targetOriginalIndices, cachePointOption)
-			}
-		}
-
 		// Build streamText request
 		// Cast providerOptions to any to bypass strict JSONObject typing — the AI SDK accepts the correct runtime values
 		const requestOptions: Parameters<typeof streamText>[0] = {
@@ -706,29 +649,6 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
 		)
 	}
 
-	/**
-	 * Apply cachePoint providerOptions to the correct AI SDK messages by walking
-	 * the original Anthropic messages and converted AI SDK messages in parallel.
-	 *
-	 * convertToAiSdkMessages() can split a single Anthropic user message (containing
-	 * tool_results + text) into 2 AI SDK messages (tool role + user role). This method
-	 * accounts for that split so cache points land on the right message.
-	 */
-	private applyCachePointsToAiSdkMessages(
-		aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
-		targetIndices: Set<number>,
-		cachePointOption: Record<string, Record<string, unknown>>,
-	): void {
-		for (const idx of targetIndices) {
-			if (idx >= 0 && idx < aiSdkMessages.length) {
-				aiSdkMessages[idx].providerOptions = {
-					...aiSdkMessages[idx].providerOptions,
-					...cachePointOption,
-				}
-			}
-		}
-	}
-
 	/************************************************************************************
 	 *
 	 *     AMAZON REGIONS

@@ -89,23 +89,6 @@ export class MiniMaxHandler extends BaseProvider implements SingleCompletionHand
 			anthropicProviderOptions.disableParallelToolUse = true
 		}
 
-		const cacheProviderOption = { anthropic: { cacheControl: { type: "ephemeral" as const } } }
-		const userMsgIndices = mergedMessages.reduce(
-			(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
-			[] as number[],
-		)
-
-		const targetIndices = new Set<number>()
-		const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
-		const secondLastUserMsgIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1
-
-		if (lastUserMsgIndex >= 0) targetIndices.add(lastUserMsgIndex)
-		if (secondLastUserMsgIndex >= 0) targetIndices.add(secondLastUserMsgIndex)
-
-		if (targetIndices.size > 0) {
-			this.applyCacheControlToAiSdkMessages(aiSdkMessages, targetIndices, cacheProviderOption)
-		}
-
 		const requestOptions = {
 			model: this.client(modelConfig.id),
 			system: systemPrompt,
@@ -187,21 +170,6 @@ export class MiniMaxHandler extends BaseProvider implements SingleCompletionHand
 		}
 	}
 
-	private applyCacheControlToAiSdkMessages(
-		aiSdkMessages: { role: string; providerOptions?: Record<string, Record<string, unknown>> }[],
-		targetIndices: Set<number>,
-		cacheProviderOption: Record<string, Record<string, unknown>>,
-	): void {
-		for (const idx of targetIndices) {
-			if (idx >= 0 && idx < aiSdkMessages.length) {
-				aiSdkMessages[idx].providerOptions = {
-					...aiSdkMessages[idx].providerOptions,
-					...cacheProviderOption,
-				}
-			}
-		}
-	}
-
 	getModel() {
 		const modelId = this.options.apiModelId