Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ curl http://127.0.0.1:8000/v1/chat/completions \

# Supported models
- `gpt-5.4`
- `gpt-5.4-mini`
- `gpt-5.2`
- `gpt-5.1`
- `gpt-5`
Expand All @@ -127,7 +128,7 @@ curl http://127.0.0.1:8000/v1/chat/completions \

- `--reasoning-effort` (choice of none,minimal,low,medium,high,xhigh)<br>
GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`.<br>
The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`. The `gpt-5.2` and `gpt-5.3` families (including codex) support `low`, `medium`, `high`, and `xhigh`. GPT-5.4 supports `none`, `low`, `medium`, `high`, and `xhigh`.
The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`. The `gpt-5.2` and `gpt-5.3` families (including codex) support `low`, `medium`, `high`, and `xhigh`. `gpt-5.4` supports `none`, `low`, `medium`, `high`, and `xhigh`, while `gpt-5.4-mini` supports `low`, `medium`, `high`, and `xhigh`.

### Thinking summaries

Expand Down
7 changes: 7 additions & 0 deletions chatmock/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ class ModelSpec:
allowed_efforts=frozenset(("none", "low", "medium", "high", "xhigh")),
variant_efforts=("xhigh", "high", "medium", "low", "none"),
),
ModelSpec(
public_id="gpt-5.4-mini",
upstream_id="gpt-5.4-mini",
aliases=("gpt5.4-mini", "gpt-5.4-mini-latest"),
allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
variant_efforts=("xhigh", "high", "medium", "low"),
),
ModelSpec(
public_id="gpt-5.3-codex",
upstream_id="gpt-5.3-codex",
Expand Down
6 changes: 6 additions & 0 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,28 @@ class ModelRegistryTests(unittest.TestCase):
def test_normalizes_aliases(self) -> None:
self.assertEqual(normalize_model_name("gpt5"), "gpt-5")
self.assertEqual(normalize_model_name("gpt5.4"), "gpt-5.4")
self.assertEqual(normalize_model_name("gpt5.4-mini"), "gpt-5.4-mini")
self.assertEqual(normalize_model_name("codex"), "codex-mini-latest")

def test_strips_reasoning_suffixes(self) -> None:
self.assertEqual(normalize_model_name("gpt-5.4-high"), "gpt-5.4")
self.assertEqual(normalize_model_name("gpt-5.4-mini-high"), "gpt-5.4-mini")
self.assertEqual(normalize_model_name("gpt-5.2_codemirror"), "gpt-5.2_codemirror")
self.assertEqual(normalize_model_name("gpt-5.1-codex:max"), "gpt-5.1-codex:max")
self.assertEqual(normalize_model_name("gpt-5.1-codex:high"), "gpt-5.1-codex")

def test_allowed_efforts_follow_registry(self) -> None:
self.assertEqual(allowed_efforts_for_model("gpt-5.4"), frozenset(("none", "low", "medium", "high", "xhigh")))
self.assertEqual(allowed_efforts_for_model("gpt-5.4-mini"), frozenset(("low", "medium", "high", "xhigh")))
self.assertEqual(allowed_efforts_for_model("gpt-5.1-codex"), frozenset(("low", "medium", "high")))

def test_public_models_include_variants(self) -> None:
model_ids = list_public_models(expose_reasoning_models=True)
self.assertIn("gpt-5.4", model_ids)
self.assertIn("gpt-5.4-mini", model_ids)
self.assertIn("gpt-5.4-none", model_ids)
self.assertIn("gpt-5.4-mini-xhigh", model_ids)
self.assertNotIn("gpt-5.4-mini-none", model_ids)
self.assertIn("gpt-5.1-codex-max-xhigh", model_ids)
self.assertNotIn("codex-mini-high", model_ids)

Expand Down
12 changes: 8 additions & 4 deletions tests/test_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,17 @@ def test_openai_models_list(self) -> None:
response = self.client.get("/v1/models")
body = response.get_json()
self.assertEqual(response.status_code, 200)
self.assertIn("gpt-5.4", [item["id"] for item in body["data"]])
model_ids = [item["id"] for item in body["data"]]
self.assertIn("gpt-5.4", model_ids)
self.assertIn("gpt-5.4-mini", model_ids)

def test_ollama_tags_list(self) -> None:
response = self.client.get("/api/tags")
body = response.get_json()
self.assertEqual(response.status_code, 200)
self.assertIn("gpt-5.4", [item["name"] for item in body["models"]])
model_names = [item["name"] for item in body["models"]]
self.assertIn("gpt-5.4", model_names)
self.assertIn("gpt-5.4-mini", model_names)

@patch("chatmock.routes_openai.start_upstream_request")
def test_chat_completions(self, mock_start) -> None:
Expand All @@ -54,12 +58,12 @@ def test_chat_completions(self, mock_start) -> None:
)
response = self.client.post(
"/v1/chat/completions",
json={"model": "gpt5.4", "messages": [{"role": "user", "content": "hi"}]},
json={"model": "gpt5.4-mini", "messages": [{"role": "user", "content": "hi"}]},
)
body = response.get_json()
self.assertEqual(response.status_code, 200)
self.assertEqual(body["choices"][0]["message"]["content"], "hello")
self.assertEqual(body["model"], "gpt5.4")
self.assertEqual(body["model"], "gpt5.4-mini")

@patch("chatmock.routes_ollama.start_upstream_request")
def test_ollama_chat(self, mock_start) -> None:
Expand Down
Loading