From 7b129d65272c8ed1b4ac980640a9942ff12c38d9 Mon Sep 17 00:00:00 2001 From: Gautam Sirdeshmukh Date: Mon, 6 Apr 2026 13:08:27 -0400 Subject: [PATCH] fix: inject GPT-OSS stop tokens when not specified to prevent tool call failures --- src/strands/models/openai.py | 11 ++++++++++- tests/strands/models/test_openai.py | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/strands/models/openai.py b/src/strands/models/openai.py index 73484e924..06c478ff3 100644 --- a/src/strands/models/openai.py +++ b/src/strands/models/openai.py @@ -36,6 +36,10 @@ "too many total text bytes", ] +# Stop tokens for GPT-OSS models to enforce generation boundaries +# https://github.com/openai/harmony/blob/main/src/registry.rs +_GPT_OSS_STOP_TOKENS = ["<|call|>", "<|return|>", "<|end|>"] + class Client(Protocol): """Protocol defining the OpenAI-compatible interface for the underlying provider client.""" @@ -467,6 +471,11 @@ def format_request( TypeError: If a message contains a content block type that cannot be converted to an OpenAI-compatible format. """ + params = cast(dict[str, Any], self.config.get("params", {})) + # Inject default GPT-OSS stop tokens unless the user has explicitly provided their own + if "gpt-oss" in cast(str, self.config.get("model_id", "")).lower() and "stop" not in params: + params = {**params, "stop": _GPT_OSS_STOP_TOKENS} + return { "messages": self.format_request_messages( messages, system_prompt, system_prompt_content=system_prompt_content @@ -486,7 +495,7 @@ def format_request( for tool_spec in tool_specs or [] ], **(self._format_request_tool_choice(tool_choice)), - **cast(dict[str, Any], self.config.get("params", {})), + **params, } def format_chunk(self, event: dict[str, Any], **kwargs: Any) -> StreamEvent: diff --git a/tests/strands/models/test_openai.py b/tests/strands/models/test_openai.py index 747e1123a..238c409d6 100644 --- a/tests/strands/models/test_openai.py +++ b/tests/strands/models/test_openai.py @@ -627,6 +627,25 @@ def test_format_request(model, messages, tool_specs, system_prompt): assert tru_request == exp_request +@pytest.mark.parametrize("model_id", ["openai.gpt-oss-120b"]) +def test_format_request_gpt_oss_injects_stop_tokens(model_id, model, messages, tool_specs, system_prompt): + tru_request = model.format_request(messages, tool_specs, system_prompt) + assert tru_request["stop"] == ["<|call|>", "<|return|>", "<|end|>"] + + +@pytest.mark.parametrize("model_id", ["openai.gpt-oss-120b"]) +def test_format_request_gpt_oss_preserves_explicit_stop_tokens(model_id, model, messages, tool_specs, system_prompt): + model.update_config(params={"max_tokens": 1, "stop": ["<|end|>"]}) + + tru_request = model.format_request(messages, tool_specs, system_prompt) + assert tru_request["stop"] == ["<|end|>"] + + +def test_format_request_non_gpt_oss_no_stop_tokens(model, messages, tool_specs, system_prompt): + tru_request = model.format_request(messages, tool_specs, system_prompt) + assert "stop" not in tru_request + + def test_format_request_with_tool_choice_auto(model, messages, tool_specs, system_prompt): tool_choice = {"auto": {}} tru_request = model.format_request(messages, tool_specs, system_prompt, tool_choice)