Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/types/src/provider-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ const openRouterSchema = baseProviderSettingsSchema.extend({
openRouterModelId: z.string().optional(),
openRouterBaseUrl: z.string().optional(),
openRouterSpecificProvider: z.string().optional(),
openRouterExcludeLowQuantization: z.boolean().optional(),
})

const bedrockSchema = apiModelIdProviderModelSchema.extend({
Expand Down
132 changes: 132 additions & 0 deletions src/api/providers/__tests__/openrouter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1166,4 +1166,136 @@ describe("OpenRouterHandler", () => {
)
})
})

describe("quantization filter", () => {
it("includes quantizations in providerOptions when openRouterExcludeLowQuantization is enabled", async () => {
const handler = new OpenRouterHandler({
openRouterApiKey: "test-key",
openRouterModelId: "openai/gpt-4o",
openRouterExcludeLowQuantization: true,
})

const mockFullStream = (async function* () {
yield { type: "text-delta", text: "test", id: "1" }
})()

mockStreamText.mockReturnValue({
fullStream: mockFullStream,
usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }),
totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }),
})

const generator = handler.createMessage("test", [{ role: "user", content: "test" }])

for await (const _ of generator) {
// consume
}

expect(mockStreamText).toHaveBeenCalledWith(
expect.objectContaining({
providerOptions: {
openrouter: {
provider: {
quantizations: ["fp16", "bf16", "fp8", "int8"],
},
},
},
}),
)
})

it("does not include quantizations in providerOptions when openRouterExcludeLowQuantization is disabled", async () => {
const handler = new OpenRouterHandler({
openRouterApiKey: "test-key",
openRouterModelId: "openai/gpt-4o",
openRouterExcludeLowQuantization: false,
})

const mockFullStream = (async function* () {
yield { type: "text-delta", text: "test", id: "1" }
})()

mockStreamText.mockReturnValue({
fullStream: mockFullStream,
usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }),
totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }),
})

const generator = handler.createMessage("test", [{ role: "user", content: "test" }])

for await (const _ of generator) {
// consume
}

expect(mockStreamText).toHaveBeenCalledWith(
expect.objectContaining({
providerOptions: undefined,
}),
)
})

it("combines quantizations with specific provider routing", async () => {
const handler = new OpenRouterHandler({
openRouterApiKey: "test-key",
openRouterModelId: "openai/gpt-4o",
openRouterExcludeLowQuantization: true,
openRouterSpecificProvider: "DeepInfra",
})

const mockFullStream = (async function* () {
yield { type: "text-delta", text: "test", id: "1" }
})()

mockStreamText.mockReturnValue({
fullStream: mockFullStream,
usage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }),
totalUsage: Promise.resolve({ inputTokens: 10, outputTokens: 20, totalTokens: 30 }),
})

const generator = handler.createMessage("test", [{ role: "user", content: "test" }])

for await (const _ of generator) {
// consume
}

expect(mockStreamText).toHaveBeenCalledWith(
expect.objectContaining({
providerOptions: {
openrouter: {
provider: {
order: ["DeepInfra"],
only: ["DeepInfra"],
allow_fallbacks: false,
quantizations: ["fp16", "bf16", "fp8", "int8"],
},
},
},
}),
)
})

it("includes quantizations in completePrompt when openRouterExcludeLowQuantization is enabled", async () => {
const handler = new OpenRouterHandler({
openRouterApiKey: "test-key",
openRouterModelId: "openai/gpt-4o",
openRouterExcludeLowQuantization: true,
})

mockGenerateText.mockResolvedValue({ text: "test" })

await handler.completePrompt("test prompt")

expect(mockGenerateText).toHaveBeenCalledWith(
expect.objectContaining({
providerOptions: {
openrouter: {
provider: {
quantizations: ["fp16", "bf16", "fp8", "int8"],
},
},
},
}),
)
})
})
})
81 changes: 43 additions & 38 deletions src/api/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,25 +155,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH

const tools = convertToolsForAiSdk(metadata?.tools)

const providerOptions:
| {
openrouter?: {
provider?: { order: string[]; only: string[]; allow_fallbacks: boolean }
}
}
| undefined =
this.options.openRouterSpecificProvider &&
this.options.openRouterSpecificProvider !== OPENROUTER_DEFAULT_PROVIDER_NAME
? {
openrouter: {
provider: {
order: [this.options.openRouterSpecificProvider],
only: [this.options.openRouterSpecificProvider],
allow_fallbacks: false,
},
},
}
: undefined
const providerOptions = this.buildProviderOptions()

let accumulatedReasoningText = ""

Expand Down Expand Up @@ -281,6 +263,47 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
return { id, info, topP: isDeepSeekR1 ? 0.95 : undefined, ...params }
}

private buildProviderOptions():
| {
openrouter?: {
provider?: {
order?: string[]
only?: string[]
allow_fallbacks?: boolean
quantizations?: string[]
}
}
}
| undefined {
const hasSpecificProvider =
this.options.openRouterSpecificProvider &&
this.options.openRouterSpecificProvider !== OPENROUTER_DEFAULT_PROVIDER_NAME
const excludeLowQuantization = this.options.openRouterExcludeLowQuantization

if (!hasSpecificProvider && !excludeLowQuantization) {
return undefined
}

const provider: {
order?: string[]
only?: string[]
allow_fallbacks?: boolean
quantizations?: string[]
} = {}

if (hasSpecificProvider) {
provider.order = [this.options.openRouterSpecificProvider!]
provider.only = [this.options.openRouterSpecificProvider!]
provider.allow_fallbacks = false
}

if (excludeLowQuantization) {
provider.quantizations = ["fp16", "bf16", "fp8", "int8"]
}

return { openrouter: { provider } }
}

async completePrompt(prompt: string): Promise<string> {
let { id: modelId, maxTokens, temperature, topP, reasoning } = await this.fetchModel()

Expand All @@ -298,25 +321,7 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH

const openrouter = this.createOpenRouterProvider({ reasoning, headers })

const providerOptions:
| {
openrouter?: {
provider?: { order: string[]; only: string[]; allow_fallbacks: boolean }
}
}
| undefined =
this.options.openRouterSpecificProvider &&
this.options.openRouterSpecificProvider !== OPENROUTER_DEFAULT_PROVIDER_NAME
? {
openrouter: {
provider: {
order: [this.options.openRouterSpecificProvider],
only: [this.options.openRouterSpecificProvider],
allow_fallbacks: false,
},
},
}
: undefined
const providerOptions = this.buildProviderOptions()

try {
const result = await generateText({
Expand Down
12 changes: 12 additions & 0 deletions webview-ui/src/components/settings/providers/OpenRouter.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,18 @@ export const OpenRouter = ({
)}
</div>
)}
<div>
<Checkbox
checked={apiConfiguration?.openRouterExcludeLowQuantization ?? false}
onChange={(checked: boolean) => {
setApiConfigurationField("openRouterExcludeLowQuantization", checked)
}}>
{t("settings:providers.openRouter.excludeLowQuantization.label")}
</Checkbox>
<div className="text-sm text-vscode-descriptionForeground mt-1 ml-6">
{t("settings:providers.openRouter.excludeLowQuantization.description")}
</div>
</div>
<ModelPicker
apiConfiguration={apiConfiguration}
setApiConfigurationField={setApiConfigurationField}
Expand Down
4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/ca/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/de/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/en/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,10 @@
"title": "OpenRouter Provider Routing",
"description": "OpenRouter routes requests to the best available providers for your model. By default, requests are load balanced across the top providers to maximize uptime. However, you can choose a specific provider to use for this model.",
"learnMore": "Learn more about provider routing"
},
"excludeLowQuantization": {
"label": "Exclude low-bit quantization (FP4/FP6/Int4)",
"description": "Only allow higher precision providers (FP8/FP16/BF16/Int8). Helps prevent broken CJK (Korean/Chinese/Japanese) encoding from aggressively quantized models."
}
},
"customModel": {
Expand Down
4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/es/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/fr/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/hi/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/id/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/it/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/ja/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/ko/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/nl/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions webview-ui/src/i18n/locales/pl/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading