From 7b129d65272c8ed1b4ac980640a9942ff12c38d9 Mon Sep 17 00:00:00 2001
From: Gautam Sirdeshmukh <gsird@amazon.com>
Date: Mon, 6 Apr 2026 13:08:27 -0400
Subject: [PATCH] fix: inject GPT-OSS stop tokens when not specified to prevent
 tool call failures

---
 src/strands/models/openai.py        | 11 ++++++++++-
 tests/strands/models/test_openai.py | 19 +++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/strands/models/openai.py b/src/strands/models/openai.py
index 73484e924..06c478ff3 100644
--- a/src/strands/models/openai.py
+++ b/src/strands/models/openai.py
@@ -36,6 +36,10 @@
     "too many total text bytes",
 ]
 
+# Stop tokens for GPT-OSS models to enforce generation boundaries
+# https://github.com/openai/harmony/blob/main/src/registry.rs
+_GPT_OSS_STOP_TOKENS = ["<|call|>", "<|return|>", "<|end|>"]
+
 
 class Client(Protocol):
     """Protocol defining the OpenAI-compatible interface for the underlying provider client."""
@@ -467,6 +471,11 @@ def format_request(
             TypeError: If a message contains a content block type that cannot be converted to an OpenAI-compatible
                 format.
         """
+        params = cast(dict[str, Any], self.config.get("params", {}))
+        # Inject default GPT-OSS stop tokens unless the user has explicitly provided their own
+        if "gpt-oss" in cast(str, self.config.get("model_id", "")).lower() and "stop" not in params:
+            params = {**params, "stop": _GPT_OSS_STOP_TOKENS}
+
         return {
             "messages": self.format_request_messages(
                 messages, system_prompt, system_prompt_content=system_prompt_content
@@ -486,7 +495,7 @@ def format_request(
                 for tool_spec in tool_specs or []
             ],
             **(self._format_request_tool_choice(tool_choice)),
-            **cast(dict[str, Any], self.config.get("params", {})),
+            **params,
         }
 
     def format_chunk(self, event: dict[str, Any], **kwargs: Any) -> StreamEvent:
diff --git a/tests/strands/models/test_openai.py b/tests/strands/models/test_openai.py
index 747e1123a..238c409d6 100644
--- a/tests/strands/models/test_openai.py
+++ b/tests/strands/models/test_openai.py
@@ -627,6 +627,25 @@ def test_format_request(model, messages, tool_specs, system_prompt):
     assert tru_request == exp_request
 
 
+@pytest.mark.parametrize("model_id", ["openai.gpt-oss-120b"])
+def test_format_request_gpt_oss_injects_stop_tokens(model_id, model, messages, tool_specs, system_prompt):
+    tru_request = model.format_request(messages, tool_specs, system_prompt)
+    assert tru_request["stop"] == ["<|call|>", "<|return|>", "<|end|>"]
+
+
+@pytest.mark.parametrize("model_id", ["openai.gpt-oss-120b"])
+def test_format_request_gpt_oss_preserves_explicit_stop_tokens(model_id, model, messages, tool_specs, system_prompt):
+    model.update_config(params={"max_tokens": 1, "stop": ["<|end|>"]})
+
+    tru_request = model.format_request(messages, tool_specs, system_prompt)
+    assert tru_request["stop"] == ["<|end|>"]
+
+
+def test_format_request_non_gpt_oss_no_stop_tokens(model, messages, tool_specs, system_prompt):
+    tru_request = model.format_request(messages, tool_specs, system_prompt)
+    assert "stop" not in tru_request
+
+
 def test_format_request_with_tool_choice_auto(model, messages, tool_specs, system_prompt):
     tool_choice = {"auto": {}}
     tru_request = model.format_request(messages, tool_specs, system_prompt, tool_choice)