From 4d8ffec8bc47ea2b1dac2ee523c11a2376303292 Mon Sep 17 00:00:00 2001 From: Ra's al Ghul Date: Thu, 19 Mar 2026 21:14:17 -0400 Subject: [PATCH] feat: Add gpt-5.4-mini. This just adds gpt-5.4-mini at (low/med/high/xhigh) reasoning levels. --- README.md | 3 ++- chatmock/model_registry.py | 7 +++++++ tests/test_models.py | 6 ++++++ tests/test_routes.py | 12 ++++++++---- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 61ef96c..0b246aa 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,7 @@ curl http://127.0.0.1:8000/v1/chat/completions \ # Supported models - `gpt-5.4` +- `gpt-5.4-mini` - `gpt-5.2` - `gpt-5.1` - `gpt-5` @@ -127,7 +128,7 @@ curl http://127.0.0.1:8000/v1/chat/completions \ - `--reasoning-effort` (choice of none,minimal,low,medium,high,xhigh)
GPT-5 has a configurable amount of "effort" it can put into thinking, which may cause it to take more time for a response to return, but may overall give a smarter answer. Applying this parameter after `serve` forces the server to use this reasoning effort by default, unless overrided by the API request with a different effort set. The default reasoning effort without setting this parameter is `medium`.
- The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`. The `gpt-5.2` and `gpt-5.3` families (including codex) support `low`, `medium`, `high`, and `xhigh`. GPT-5.4 supports `none`, `low`, `medium`, `high`, and `xhigh`. + The `gpt-5.1` family (including codex) supports `low`, `medium`, and `high` while `gpt-5.1-codex-max` adds `xhigh`. The `gpt-5.2` and `gpt-5.3` families (including codex) support `low`, `medium`, `high`, and `xhigh`. `gpt-5.4` supports `none`, `low`, `medium`, `high`, and `xhigh`, while `gpt-5.4-mini` supports `low`, `medium`, `high`, and `xhigh`. ### Thinking summaries diff --git a/chatmock/model_registry.py b/chatmock/model_registry.py index bf01e94..b171883 100644 --- a/chatmock/model_registry.py +++ b/chatmock/model_registry.py @@ -47,6 +47,13 @@ class ModelSpec: allowed_efforts=frozenset(("none", "low", "medium", "high", "xhigh")), variant_efforts=("xhigh", "high", "medium", "low", "none"), ), + ModelSpec( + public_id="gpt-5.4-mini", + upstream_id="gpt-5.4-mini", + aliases=("gpt5.4-mini", "gpt-5.4-mini-latest"), + allowed_efforts=frozenset(("low", "medium", "high", "xhigh")), + variant_efforts=("xhigh", "high", "medium", "low"), + ), ModelSpec( public_id="gpt-5.3-codex", upstream_id="gpt-5.3-codex", diff --git a/tests/test_models.py b/tests/test_models.py index 8eeae9e..4d690cf 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -9,22 +9,28 @@ class ModelRegistryTests(unittest.TestCase): def test_normalizes_aliases(self) -> None: self.assertEqual(normalize_model_name("gpt5"), "gpt-5") self.assertEqual(normalize_model_name("gpt5.4"), "gpt-5.4") + self.assertEqual(normalize_model_name("gpt5.4-mini"), "gpt-5.4-mini") self.assertEqual(normalize_model_name("codex"), "codex-mini-latest") def test_strips_reasoning_suffixes(self) -> None: self.assertEqual(normalize_model_name("gpt-5.4-high"), "gpt-5.4") + self.assertEqual(normalize_model_name("gpt-5.4-mini-high"), "gpt-5.4-mini") self.assertEqual(normalize_model_name("gpt-5.2_codemirror"), "gpt-5.2_codemirror") self.assertEqual(normalize_model_name("gpt-5.1-codex:max"), "gpt-5.1-codex:max") self.assertEqual(normalize_model_name("gpt-5.1-codex:high"), "gpt-5.1-codex") def test_allowed_efforts_follow_registry(self) -> None: self.assertEqual(allowed_efforts_for_model("gpt-5.4"), frozenset(("none", "low", "medium", "high", "xhigh"))) + self.assertEqual(allowed_efforts_for_model("gpt-5.4-mini"), frozenset(("low", "medium", "high", "xhigh"))) self.assertEqual(allowed_efforts_for_model("gpt-5.1-codex"), frozenset(("low", "medium", "high"))) def test_public_models_include_variants(self) -> None: model_ids = list_public_models(expose_reasoning_models=True) self.assertIn("gpt-5.4", model_ids) + self.assertIn("gpt-5.4-mini", model_ids) self.assertIn("gpt-5.4-none", model_ids) + self.assertIn("gpt-5.4-mini-xhigh", model_ids) + self.assertNotIn("gpt-5.4-mini-none", model_ids) self.assertIn("gpt-5.1-codex-max-xhigh", model_ids) self.assertNotIn("codex-mini-high", model_ids) diff --git a/tests/test_routes.py b/tests/test_routes.py index 159fe09..b0d3422 100644 --- a/tests/test_routes.py +++ b/tests/test_routes.py @@ -33,13 +33,17 @@ def test_openai_models_list(self) -> None: response = self.client.get("/v1/models") body = response.get_json() self.assertEqual(response.status_code, 200) - self.assertIn("gpt-5.4", [item["id"] for item in body["data"]]) + model_ids = [item["id"] for item in body["data"]] + self.assertIn("gpt-5.4", model_ids) + self.assertIn("gpt-5.4-mini", model_ids) def test_ollama_tags_list(self) -> None: response = self.client.get("/api/tags") body = response.get_json() self.assertEqual(response.status_code, 200) - self.assertIn("gpt-5.4", [item["name"] for item in body["models"]]) + model_names = [item["name"] for item in body["models"]] + self.assertIn("gpt-5.4", model_names) + self.assertIn("gpt-5.4-mini", model_names) @patch("chatmock.routes_openai.start_upstream_request") def test_chat_completions(self, mock_start) -> None: @@ -54,12 +58,12 @@ def test_chat_completions(self, mock_start) -> None: ) response = self.client.post( "/v1/chat/completions", - json={"model": "gpt5.4", "messages": [{"role": "user", "content": "hi"}]}, + json={"model": "gpt5.4-mini", "messages": [{"role": "user", "content": "hi"}]}, ) body = response.get_json() self.assertEqual(response.status_code, 200) self.assertEqual(body["choices"][0]["message"]["content"], "hello") - self.assertEqual(body["model"], "gpt5.4") + self.assertEqual(body["model"], "gpt5.4-mini") @patch("chatmock.routes_ollama.start_upstream_request") def test_ollama_chat(self, mock_start) -> None: