Skip to content

Commit 2eb015d

Browse files
sbe-argConradIrwin
andauthored
Allow changing the context window size for Ollama (zed-industries#44506)
Release Notes: - Changed the way context window is set for ollama at the provider level instead of per model. --------- Co-authored-by: Conrad Irwin <conrad.irwin@gmail.com>
1 parent 9c102a5 commit 2eb015d

File tree

5 files changed

+149
-33
lines changed

5 files changed

+149
-33
lines changed

crates/language_models/src/provider/ollama.rs

Lines changed: 129 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ pub struct OllamaSettings {
4545
pub api_url: String,
4646
pub auto_discover: bool,
4747
pub available_models: Vec<AvailableModel>,
48+
pub context_window: Option<u64>,
4849
}
4950

5051
pub struct OllamaLanguageModelProvider {
@@ -246,14 +247,20 @@ impl LanguageModelProvider for OllamaLanguageModelProvider {
246247
let settings = OllamaLanguageModelProvider::settings(cx);
247248

248249
// Add models from the Ollama API
249-
if settings.auto_discover {
250-
for model in self.state.read(cx).fetched_models.iter() {
251-
models.insert(model.name.clone(), model.clone());
250+
for model in self.state.read(cx).fetched_models.iter() {
251+
let mut model = model.clone();
252+
if let Some(context_window) = settings.context_window {
253+
model.max_tokens = context_window;
252254
}
255+
models.insert(model.name.clone(), model);
253256
}
254257

255258
// Override with available models from settings
256-
merge_settings_into_models(&mut models, &settings.available_models);
259+
merge_settings_into_models(
260+
&mut models,
261+
&settings.available_models,
262+
settings.context_window,
263+
);
257264

258265
let mut models = models
259266
.into_values()
@@ -604,6 +611,7 @@ fn map_to_language_model_completion_events(
604611
struct ConfigurationView {
605612
api_key_editor: Entity<InputField>,
606613
api_url_editor: Entity<InputField>,
614+
context_window_editor: Entity<InputField>,
607615
state: Entity<State>,
608616
}
609617

@@ -617,6 +625,14 @@ impl ConfigurationView {
617625
input
618626
});
619627

628+
let context_window_editor = cx.new(|cx| {
629+
let input = InputField::new(window, cx, "8192").label("Context Window");
630+
if let Some(context_window) = OllamaLanguageModelProvider::settings(cx).context_window {
631+
input.set_text(&context_window.to_string(), window, cx);
632+
}
633+
input
634+
});
635+
620636
cx.observe(&state, |_, _, cx| {
621637
cx.notify();
622638
})
@@ -625,6 +641,7 @@ impl ConfigurationView {
625641
Self {
626642
api_key_editor,
627643
api_url_editor,
644+
context_window_editor,
628645
state,
629646
}
630647
}
@@ -712,7 +729,57 @@ impl ConfigurationView {
712729
cx.notify();
713730
}
714731

715-
fn render_instructions(cx: &mut Context<Self>) -> Div {
732+
fn save_context_window(&mut self, cx: &mut Context<Self>) {
733+
let context_window_str = self
734+
.context_window_editor
735+
.read(cx)
736+
.text(cx)
737+
.trim()
738+
.to_string();
739+
let current_context_window = OllamaLanguageModelProvider::settings(cx).context_window;
740+
741+
if let Ok(context_window) = context_window_str.parse::<u64>() {
742+
if Some(context_window) != current_context_window {
743+
let fs = <dyn Fs>::global(cx);
744+
update_settings_file(fs, cx, move |settings, _| {
745+
settings
746+
.language_models
747+
.get_or_insert_default()
748+
.ollama
749+
.get_or_insert_default()
750+
.context_window = Some(context_window);
751+
});
752+
}
753+
} else if context_window_str.is_empty() && current_context_window.is_some() {
754+
let fs = <dyn Fs>::global(cx);
755+
update_settings_file(fs, cx, move |settings, _| {
756+
settings
757+
.language_models
758+
.get_or_insert_default()
759+
.ollama
760+
.get_or_insert_default()
761+
.context_window = None;
762+
});
763+
}
764+
}
765+
766+
fn reset_context_window(&mut self, window: &mut Window, cx: &mut Context<Self>) {
767+
self.context_window_editor
768+
.update(cx, |input, cx| input.set_text("", window, cx));
769+
let fs = <dyn Fs>::global(cx);
770+
update_settings_file(fs, cx, |settings, _cx| {
771+
if let Some(settings) = settings
772+
.language_models
773+
.as_mut()
774+
.and_then(|models| models.ollama.as_mut())
775+
{
776+
settings.context_window = None;
777+
}
778+
});
779+
cx.notify();
780+
}
781+
782+
fn render_instructions(cx: &App) -> Div {
716783
v_flex()
717784
.gap_2()
718785
.child(Label::new(
@@ -774,6 +841,56 @@ impl ConfigurationView {
774841
}
775842
}
776843

844+
fn render_context_window_editor(&self, cx: &Context<Self>) -> Div {
845+
let settings = OllamaLanguageModelProvider::settings(cx);
846+
let custom_context_window_set = settings.context_window.is_some();
847+
848+
if custom_context_window_set {
849+
h_flex()
850+
.p_3()
851+
.justify_between()
852+
.rounded_md()
853+
.border_1()
854+
.border_color(cx.theme().colors().border)
855+
.bg(cx.theme().colors().elevated_surface_background)
856+
.child(
857+
h_flex()
858+
.gap_2()
859+
.child(Icon::new(IconName::Check).color(Color::Success))
860+
.child(v_flex().gap_1().child(Label::new(format!(
861+
"Context Window: {}",
862+
settings.context_window.unwrap()
863+
)))),
864+
)
865+
.child(
866+
Button::new("reset-context-window", "Reset")
867+
.label_size(LabelSize::Small)
868+
.icon(IconName::Undo)
869+
.icon_size(IconSize::Small)
870+
.icon_position(IconPosition::Start)
871+
.layer(ElevationIndex::ModalSurface)
872+
.on_click(
873+
cx.listener(|this, _, window, cx| {
874+
this.reset_context_window(window, cx)
875+
}),
876+
),
877+
)
878+
} else {
879+
v_flex()
880+
.on_action(
881+
cx.listener(|this, _: &menu::Confirm, _window, cx| {
882+
this.save_context_window(cx)
883+
}),
884+
)
885+
.child(self.context_window_editor.clone())
886+
.child(
887+
Label::new("Default: Model specific")
888+
.size(LabelSize::Small)
889+
.color(Color::Muted),
890+
)
891+
}
892+
}
893+
777894
fn render_api_url_editor(&self, cx: &Context<Self>) -> Div {
778895
let api_url = OllamaLanguageModelProvider::api_url(cx);
779896
let custom_api_url_set = api_url != OLLAMA_API_URL;
@@ -823,6 +940,7 @@ impl Render for ConfigurationView {
823940
.gap_2()
824941
.child(Self::render_instructions(cx))
825942
.child(self.render_api_url_editor(cx))
943+
.child(self.render_context_window_editor(cx))
826944
.child(self.render_api_key_editor(cx))
827945
.child(
828946
h_flex()
@@ -910,10 +1028,13 @@ impl Render for ConfigurationView {
9101028
fn merge_settings_into_models(
9111029
models: &mut HashMap<String, ollama::Model>,
9121030
available_models: &[AvailableModel],
1031+
context_window: Option<u64>,
9131032
) {
9141033
for setting_model in available_models {
9151034
if let Some(model) = models.get_mut(&setting_model.name) {
916-
model.max_tokens = setting_model.max_tokens;
1035+
if context_window.is_none() {
1036+
model.max_tokens = setting_model.max_tokens;
1037+
}
9171038
model.display_name = setting_model.display_name.clone();
9181039
model.keep_alive = setting_model.keep_alive.clone();
9191040
model.supports_tools = setting_model.supports_tools;
@@ -925,7 +1046,7 @@ fn merge_settings_into_models(
9251046
ollama::Model {
9261047
name: setting_model.name.clone(),
9271048
display_name: setting_model.display_name.clone(),
928-
max_tokens: setting_model.max_tokens,
1049+
max_tokens: context_window.unwrap_or(setting_model.max_tokens),
9291050
keep_alive: setting_model.keep_alive.clone(),
9301051
supports_tools: setting_model.supports_tools,
9311052
supports_vision: setting_model.supports_images,
@@ -1003,7 +1124,7 @@ mod tests {
10031124
},
10041125
];
10051126

1006-
merge_settings_into_models(&mut models, &available_models);
1127+
merge_settings_into_models(&mut models, &available_models, None);
10071128

10081129
let model_1_5b = models
10091130
.get("qwen2.5-coder:1.5b")

crates/language_models/src/settings.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ impl settings::Settings for AllLanguageModelSettings {
8181
api_url: ollama.api_url.unwrap(),
8282
auto_discover: ollama.auto_discover.unwrap_or(true),
8383
available_models: ollama.available_models.unwrap_or_default(),
84+
context_window: ollama.context_window,
8485
},
8586
open_router: OpenRouterSettings {
8687
api_url: open_router.api_url.unwrap(),

crates/ollama/src/ollama.rs

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,9 @@ pub struct Model {
2020
pub supports_thinking: Option<bool>,
2121
}
2222

23-
fn get_max_tokens(name: &str) -> u64 {
24-
/// Default context length for unknown models.
23+
fn get_max_tokens(_name: &str) -> u64 {
2524
const DEFAULT_TOKENS: u64 = 4096;
26-
/// Magic number. Lets many Ollama models work with ~16GB of ram.
27-
/// Models that support context beyond 16k such as codestral (32k) or devstral (128k) will be clamped down to 16k
28-
const MAXIMUM_TOKENS: u64 = 16384;
29-
30-
match name.split(':').next().unwrap() {
31-
"granite-code" | "phi" | "tinyllama" => 2048,
32-
"llama2" | "stablelm2" | "vicuna" | "yi" => 4096,
33-
"aya" | "codegemma" | "gemma" | "gemma2" | "llama3" | "starcoder" => 8192,
34-
"codellama" | "starcoder2" => 16384,
35-
"codestral" | "dolphin-mixtral" | "llava" | "magistral" | "mistral" | "mixstral"
36-
| "qwen2" | "qwen2.5-coder" => 32768,
37-
"cogito" | "command-r" | "deepseek-coder-v2" | "deepseek-r1" | "deepseek-v3"
38-
| "devstral" | "gemma3" | "gpt-oss" | "granite3.3" | "llama3.1" | "llama3.2"
39-
| "llama3.3" | "mistral-nemo" | "phi3" | "phi3.5" | "phi4" | "qwen3" | "yi-coder" => 128000,
40-
"qwen3-coder" => 256000,
41-
_ => DEFAULT_TOKENS,
42-
}
43-
.clamp(1, MAXIMUM_TOKENS)
25+
DEFAULT_TOKENS
4426
}
4527

4628
impl Model {

crates/settings_content/src/language_model.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ pub struct OllamaSettingsContent {
9999
pub api_url: Option<String>,
100100
pub auto_discover: Option<bool>,
101101
pub available_models: Option<Vec<OllamaAvailableModel>>,
102+
pub context_window: Option<u64>,
102103
}
103104

104105
#[with_fallible_options]

docs/src/ai/llm-providers.md

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -423,14 +423,23 @@ models are available.
423423

424424
#### Ollama Context Length {#ollama-context}
425425

426-
Zed has pre-configured maximum context lengths (`max_tokens`) to match the capabilities of common models.
427-
Zed API requests to Ollama include this as the `num_ctx` parameter, but the default values do not exceed `16384` so users with ~16GB of RAM are able to use most models out of the box.
428-
429-
See [get_max_tokens in ollama.rs](https://github.com/zed-industries/zed/blob/main/crates/ollama/src/ollama.rs) for a complete set of defaults.
426+
Zed API requests to Ollama include the context length as the `num_ctx` parameter. By default, Zed uses a context length of `4096` tokens for all Ollama models.
430427

431428
> **Note**: Token counts displayed in the Agent Panel are only estimates and will differ from the model's native tokenizer.
432429
433-
Depending on your hardware or use-case you may wish to limit or increase the context length for a specific model via settings.json:
430+
You can set a context length for all Ollama models using the `context_window` setting. This can also be configured in the Ollama provider settings UI:
431+
432+
```json [settings]
433+
{
434+
"language_models": {
435+
"ollama": {
436+
"context_window": 8192
437+
}
438+
}
439+
}
440+
```
441+
442+
Alternatively, you can configure the context length per-model using the `max_tokens` field in `available_models`:
434443

435444
```json [settings]
436445
{
@@ -452,6 +461,8 @@ Depending on your hardware or use-case you may wish to limit or increase the con
452461
}
453462
```
454463

464+
> **Note**: If `context_window` is set, it overrides any per-model `max_tokens` values.
465+
455466
If you specify a context length that is too large for your hardware, Ollama will log an error.
456467
You can watch these logs by running: `tail -f ~/.ollama/logs/ollama.log` (macOS) or `journalctl -u ollama -f` (Linux).
457468
Depending on the memory available on your machine, you may need to adjust the context length to a smaller value.

0 commit comments

Comments
 (0)