Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/strands/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
"too many total text bytes",
]

# Stop tokens for GPT-OSS models to enforce generation boundaries
# https://github.com/openai/harmony/blob/main/src/registry.rs
_GPT_OSS_STOP_TOKENS = ["<|call|>", "<|return|>", "<|end|>"]


class Client(Protocol):
"""Protocol defining the OpenAI-compatible interface for the underlying provider client."""
Expand Down Expand Up @@ -467,6 +471,11 @@ def format_request(
TypeError: If a message contains a content block type that cannot be converted to an OpenAI-compatible
format.
"""
params = cast(dict[str, Any], self.config.get("params", {}))
# Inject default GPT-OSS stop tokens unless the user has explicitly provided their own
if "gpt-oss" in cast(str, self.config.get("model_id", "")).lower() and "stop" not in params:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this something we should always inject? any potential downsides, ie reasons not to do it? 🤔

or should the customer just pass this param for their custom self hosting?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my main hesitation is, this model provider is the gateway to all models that use openai chat shapes. so if we start injecting this, would/could it negatively impact others?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We only inject the defaults when no stop tokens are specified at all. If the user passes their own, or if pre-configured middleware overrides with its own defaults (like what happens for request to the main Bedrock/Mantle endpoint), we don't touch them.

We should NOT inject these tokens every time because OpenAI only allows a max of 4 stop sequences (ref). So, if the user has already passed some of their own, or if pre-configured middleware overrides with default stop tokens, appending these 3 defaults will almost certainly exceed the limit and cause errors.

params = {**params, "stop": _GPT_OSS_STOP_TOKENS}

return {
"messages": self.format_request_messages(
messages, system_prompt, system_prompt_content=system_prompt_content
Expand All @@ -486,7 +495,7 @@ def format_request(
for tool_spec in tool_specs or []
],
**(self._format_request_tool_choice(tool_choice)),
**cast(dict[str, Any], self.config.get("params", {})),
**params,
}

def format_chunk(self, event: dict[str, Any], **kwargs: Any) -> StreamEvent:
Expand Down
19 changes: 19 additions & 0 deletions tests/strands/models/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,25 @@ def test_format_request(model, messages, tool_specs, system_prompt):
assert tru_request == exp_request


@pytest.mark.parametrize("model_id", ["openai.gpt-oss-120b"])
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: Consider adding a parametrized test case with mixed-case model IDs (e.g., "openai.GPT-OSS-120B") to validate the .lower() normalization, since this is a key part of the matching logic.

@pytest.mark.parametrize("model_id", ["openai.gpt-oss-120b", "openai.GPT-OSS-120B"])
def test_format_request_gpt_oss_injects_stop_tokens(model_id, model, messages, tool_specs, system_prompt):
    ...

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Official model ID is lowercase

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: consider adopt the same fixture pattern : @pytest.fixture

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

model_id is already defined as a pytest.fixture (set to dummy id "m1") - this parametrize tag lets you override fixtures to new values. To your point, I suppose we could set up a fixture for model_id_gpt_oss_120b - I can follow up with some cosmetic upgrades for tests

def test_format_request_gpt_oss_injects_stop_tokens(model_id, model, messages, tool_specs, system_prompt):
tru_request = model.format_request(messages, tool_specs, system_prompt)
assert tru_request["stop"] == ["<|call|>", "<|return|>", "<|end|>"]


@pytest.mark.parametrize("model_id", ["openai.gpt-oss-120b"])
def test_format_request_gpt_oss_preserves_explicit_stop_tokens(model_id, model, messages, tool_specs, system_prompt):
model.update_config(params={"max_tokens": 1, "stop": ["<|end|>"]})

tru_request = model.format_request(messages, tool_specs, system_prompt)
assert tru_request["stop"] == ["<|end|>"]


def test_format_request_non_gpt_oss_no_stop_tokens(model, messages, tool_specs, system_prompt):
tru_request = model.format_request(messages, tool_specs, system_prompt)
assert "stop" not in tru_request


def test_format_request_with_tool_choice_auto(model, messages, tool_specs, system_prompt):
tool_choice = {"auto": {}}
tru_request = model.format_request(messages, tool_specs, system_prompt, tool_choice)
Expand Down
Loading