From 0dd7f5ccbeb0faa75a13e867c5883eb350fdbc12 Mon Sep 17 00:00:00 2001
From: peterschmidt85 <andrey.cheptsov@gmail.com>
Date: Thu, 29 Jan 2026 23:45:11 +0100
Subject: [PATCH 1/5] [Services] Add default probes if model is set #3522

---
 .../_internal/core/models/configurations.py   | 39 ++++++++++++++--
 .../core/models/test_configurations.py        | 45 +++++++++++++++++++
 2 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/src/dstack/_internal/core/models/configurations.py b/src/dstack/_internal/core/models/configurations.py
index 3b2c7812b9..f6c1b385a2 100644
--- a/src/dstack/_internal/core/models/configurations.py
+++ b/src/dstack/_internal/core/models/configurations.py
@@ -56,6 +56,8 @@
 DEFAULT_PROBE_METHOD = "get"
 MAX_PROBE_URL_LEN = 2048
 DEFAULT_REPLICA_GROUP_NAME = "0"
+DEFAULT_MODEL_PROBE_TIMEOUT = 30
+DEFAULT_MODEL_PROBE_URL = "/v1/chat/completions"
 
 
 class RunConfigurationType(str, Enum):
@@ -851,9 +853,9 @@ class ServiceConfigurationParams(CoreModel):
     ] = None
     rate_limits: Annotated[list[RateLimit], Field(description="Rate limiting rules")] = []
     probes: Annotated[
-        list[ProbeConfig],
+        Optional[list[ProbeConfig]],
         Field(description="List of probes used to determine job health"),
-    ] = []
+    ] = None  # None = omitted (may get default when model is set); [] = explicit empty
 
     replicas: Annotated[
         Optional[Union[List[ReplicaGroup], Range[int]]],
@@ -895,7 +897,9 @@ def validate_rate_limits(cls, v: list[RateLimit]) -> list[RateLimit]:
         return v
 
     @validator("probes")
-    def validate_probes(cls, v: list[ProbeConfig]) -> list[ProbeConfig]:
+    def validate_probes(cls, v: Optional[list[ProbeConfig]]) -> Optional[list[ProbeConfig]]:
+        if v is None:
+            return v
         if has_duplicates(v):
             # Using a custom validator instead of Field(unique_items=True) to avoid Pydantic bug:
             # https://github.com/pydantic/pydantic/issues/3765
@@ -932,6 +936,35 @@ def validate_replicas(
                 )
         return v
 
+    @root_validator()
+    def set_default_probes_for_model(cls, values):
+        model = values.get("model")
+        probes = values.get("probes")
+        if model is not None and probes is None:
+            body = orjson.dumps(
+                {
+                    "model": model.name,
+                    "messages": [{"role": "user", "content": "hi"}],
+                    "max_tokens": 1,
+                }
+            ).decode("utf-8")
+            values["probes"] = [
+                ProbeConfig(
+                    type="http",
+                    method="post",
+                    url=DEFAULT_MODEL_PROBE_URL,
+                    headers=[
+                        HTTPHeaderSpec(name="Content-Type", value="application/json"),
+                    ],
+                    body=body,
+                    timeout=DEFAULT_MODEL_PROBE_TIMEOUT,
+                )
+            ]
+        elif probes is None:
+            # Probes omitted and model not set: normalize to empty list for downstream.
+            values["probes"] = []
+        return values
+
     @root_validator()
     def validate_scaling(cls, values):
         scaling = values.get("scaling")
diff --git a/src/tests/_internal/core/models/test_configurations.py b/src/tests/_internal/core/models/test_configurations.py
index 65eec62642..1ff025ea2f 100644
--- a/src/tests/_internal/core/models/test_configurations.py
+++ b/src/tests/_internal/core/models/test_configurations.py
@@ -5,6 +5,8 @@
 from dstack._internal.core.errors import ConfigurationError
 from dstack._internal.core.models.common import RegistryAuth
 from dstack._internal.core.models.configurations import (
+    DEFAULT_MODEL_PROBE_TIMEOUT,
+    DEFAULT_MODEL_PROBE_URL,
     DevEnvironmentConfigurationParams,
     RepoSpec,
     parse_run_configuration,
@@ -13,6 +15,49 @@
 
 
 class TestParseConfiguration:
+    def test_service_model_sets_default_probes_when_probes_omitted(self):
+        conf = {
+            "type": "service",
+            "commands": ["python3 -m http.server"],
+            "port": 8000,
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+        }
+        parsed = parse_run_configuration(conf)
+        assert len(parsed.probes) == 1
+        probe = parsed.probes[0]
+        assert probe.type == "http"
+        assert probe.method == "post"
+        assert probe.url == DEFAULT_MODEL_PROBE_URL
+        assert probe.timeout == DEFAULT_MODEL_PROBE_TIMEOUT
+        assert len(probe.headers) == 1
+        assert probe.headers[0].name == "Content-Type"
+        assert probe.headers[0].value == "application/json"
+        assert "meta-llama/Meta-Llama-3.1-8B-Instruct" in (probe.body or "")
+        assert "max_tokens" in (probe.body or "")
+
+    def test_service_model_does_not_override_explicit_probes(self):
+        conf = {
+            "type": "service",
+            "commands": ["python3 -m http.server"],
+            "port": 8000,
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "probes": [{"type": "http", "url": "/health"}],
+        }
+        parsed = parse_run_configuration(conf)
+        assert len(parsed.probes) == 1
+        assert parsed.probes[0].url == "/health"
+
+    def test_service_model_explicit_empty_probes_no_default(self):
+        conf = {
+            "type": "service",
+            "commands": ["python3 -m http.server"],
+            "port": 8000,
+            "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+            "probes": [],
+        }
+        parsed = parse_run_configuration(conf)
+        assert len(parsed.probes) == 0
+
     def test_services_replicas_and_scaling(self):
         def test_conf(replicas: Any, scaling: Optional[Any] = None):
             conf = {

From a10565cd4f8edddfef658090cebb0112ce6ee2d8 Mon Sep 17 00:00:00 2001
From: peterschmidt85 <andrey.cheptsov@gmail.com>
Date: Thu, 29 Jan 2026 23:53:13 +0100
Subject: [PATCH 2/5] Fixing `pyright`

---
 src/dstack/_internal/cli/services/configurators/run.py        | 2 +-
 .../_internal/server/services/jobs/configurators/base.py      | 2 +-
 src/dstack/_internal/server/services/runs/spec.py             | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/dstack/_internal/cli/services/configurators/run.py b/src/dstack/_internal/cli/services/configurators/run.py
index fc76fe43ed..1077eff8a9 100644
--- a/src/dstack/_internal/cli/services/configurators/run.py
+++ b/src/dstack/_internal/cli/services/configurators/run.py
@@ -354,7 +354,7 @@ def interpolate_env(self, conf: RunConfigurationT):
                     password=interpolator.interpolate_or_error(conf.registry_auth.password),
                 )
             if isinstance(conf, ServiceConfiguration):
-                for probe in conf.probes:
+                for probe in conf.probes or []:
                     for header in probe.headers:
                         header.value = interpolator.interpolate_or_error(header.value)
                     if probe.url:
diff --git a/src/dstack/_internal/server/services/jobs/configurators/base.py b/src/dstack/_internal/server/services/jobs/configurators/base.py
index df6738a774..44579bf81c 100644
--- a/src/dstack/_internal/server/services/jobs/configurators/base.py
+++ b/src/dstack/_internal/server/services/jobs/configurators/base.py
@@ -394,7 +394,7 @@ def _service_port(self) -> Optional[int]:
 
     def _probes(self) -> list[ProbeSpec]:
         if isinstance(self.run_spec.configuration, ServiceConfiguration):
-            return list(map(_probe_config_to_spec, self.run_spec.configuration.probes))
+            return list(map(_probe_config_to_spec, self.run_spec.configuration.probes or []))
         return []
 
 
diff --git a/src/dstack/_internal/server/services/runs/spec.py b/src/dstack/_internal/server/services/runs/spec.py
index db81eb724a..ad2fcef1ff 100644
--- a/src/dstack/_internal/server/services/runs/spec.py
+++ b/src/dstack/_internal/server/services/runs/spec.py
@@ -94,13 +94,13 @@ def validate_run_spec_and_set_defaults(
             raise ServerClientError(
                 "Scheduled services with autoscaling to zero are not supported"
             )
-        if len(run_spec.configuration.probes) > settings.MAX_PROBES_PER_JOB:
+        if len(run_spec.configuration.probes or []) > settings.MAX_PROBES_PER_JOB:
             raise ServerClientError(
                 f"Cannot configure more than {settings.MAX_PROBES_PER_JOB} probes"
             )
         if any(
             p.timeout is not None and p.timeout > settings.MAX_PROBE_TIMEOUT
-            for p in run_spec.configuration.probes
+            for p in (run_spec.configuration.probes or [])
         ):
             raise ServerClientError(
                 f"Probe timeout cannot be longer than {settings.MAX_PROBE_TIMEOUT}s"

From c4058228fcb8e8f06ad318201575e4ff90ded407 Mon Sep 17 00:00:00 2001
From: peterschmidt85 <andrey.cheptsov@gmail.com>
Date: Mon, 2 Feb 2026 20:35:40 +0100
Subject: [PATCH 3/5] PR review feedback

---
 docs/docs/concepts/services.md                | 19 +++-
 .../_internal/core/models/configurations.py   | 29 ------
 .../services/jobs/configurators/base.py       | 34 ++++++-
 .../core/models/test_configurations.py        | 24 ++---
 .../jobs/configurators/test_service.py        | 98 +++++++++++++++++++
 5 files changed, 159 insertions(+), 45 deletions(-)
 create mode 100644 src/tests/_internal/server/services/jobs/configurators/test_service.py

diff --git a/docs/docs/concepts/services.md b/docs/docs/concepts/services.md
index fed9f7cb39..9f4e5900dc 100644
--- a/docs/docs/concepts/services.md
+++ b/docs/docs/concepts/services.md
@@ -233,6 +233,16 @@ Setting the minimum number of replicas to `0` allows the service to scale down t
 ??? info "Disaggregated serving"
     Native support for disaggregated prefill and decode, allowing both worker types to run within a single service, is coming soon.
 
+### Model
+
+If the service is running a chat model with an OpenAI-compatible interface (i.e., `/v1/chat/completions`),
+set the [`model`](../reference/dstack.yml/service.md#model) property to make the model accessible via `dstack`'s 
+global OpenAI-compatible endpoint, and also accessible via `dstack`'s UI.
+
+When `model` is set, `dstack` automatically configures [`probes`](#probes) to verify model health.
+To customize or disable this, set `probes` explicitly.
+
+
 ### Authorization
 
 By default, the service enables authorization, meaning the service endpoint requires a `dstack` user token.
@@ -290,7 +300,7 @@ $ dstack ps --verbose
 
 </div>
 
-??? info "Probe statuses"
+??? info "Status"
     The following symbols are used for probe statuses:
 
     - `×` &mdash; the last probe execution failed.
@@ -328,6 +338,13 @@ Probes are executed for each service replica while the replica is `running`. A p
 
     </div>
 
+??? info "Model"
+    If you set the [`model`](#model) property but don't explicitly configure `probes`, 
+    `dstack` automatically configures a default probe that tests the model using the `/v1/chat/completions` API.
+    This default probe sends a minimal chat completion request to verify the model is responding correctly.
+
+    To disable probes entirely when `model` is set, explicitly set `probes` to an empty list.
+
 See the [reference](../reference/dstack.yml/service.md#probes) for more probe configuration options.
 
 ### Path prefix { #path-prefix }
diff --git a/src/dstack/_internal/core/models/configurations.py b/src/dstack/_internal/core/models/configurations.py
index f6c1b385a2..7c5d442e44 100644
--- a/src/dstack/_internal/core/models/configurations.py
+++ b/src/dstack/_internal/core/models/configurations.py
@@ -936,35 +936,6 @@ def validate_replicas(
                 )
         return v
 
-    @root_validator()
-    def set_default_probes_for_model(cls, values):
-        model = values.get("model")
-        probes = values.get("probes")
-        if model is not None and probes is None:
-            body = orjson.dumps(
-                {
-                    "model": model.name,
-                    "messages": [{"role": "user", "content": "hi"}],
-                    "max_tokens": 1,
-                }
-            ).decode("utf-8")
-            values["probes"] = [
-                ProbeConfig(
-                    type="http",
-                    method="post",
-                    url=DEFAULT_MODEL_PROBE_URL,
-                    headers=[
-                        HTTPHeaderSpec(name="Content-Type", value="application/json"),
-                    ],
-                    body=body,
-                    timeout=DEFAULT_MODEL_PROBE_TIMEOUT,
-                )
-            ]
-        elif probes is None:
-            # Probes omitted and model not set: normalize to empty list for downstream.
-            values["probes"] = []
-        return values
-
     @root_validator()
     def validate_scaling(cls, values):
         scaling = values.get("scaling")
diff --git a/src/dstack/_internal/server/services/jobs/configurators/base.py b/src/dstack/_internal/server/services/jobs/configurators/base.py
index 44579bf81c..cb914fe94d 100644
--- a/src/dstack/_internal/server/services/jobs/configurators/base.py
+++ b/src/dstack/_internal/server/services/jobs/configurators/base.py
@@ -5,12 +5,15 @@
 from pathlib import PurePosixPath
 from typing import Dict, List, Optional
 
+import orjson
 from cachetools import TTLCache, cached
 
 from dstack._internal import settings
 from dstack._internal.core.errors import DockerRegistryError, ServerClientError
 from dstack._internal.core.models.common import RegistryAuth
 from dstack._internal.core.models.configurations import (
+    DEFAULT_MODEL_PROBE_TIMEOUT,
+    DEFAULT_MODEL_PROBE_URL,
     DEFAULT_PROBE_INTERVAL,
     DEFAULT_PROBE_METHOD,
     DEFAULT_PROBE_READY_AFTER,
@@ -18,6 +21,7 @@
     DEFAULT_PROBE_URL,
     DEFAULT_REPLICA_GROUP_NAME,
     LEGACY_REPO_DIR,
+    HTTPHeaderSpec,
     PortMapping,
     ProbeConfig,
     PythonVersion,
@@ -394,7 +398,13 @@ def _service_port(self) -> Optional[int]:
 
     def _probes(self) -> list[ProbeSpec]:
         if isinstance(self.run_spec.configuration, ServiceConfiguration):
-            return list(map(_probe_config_to_spec, self.run_spec.configuration.probes or []))
+            probes = self.run_spec.configuration.probes
+            if probes is not None:
+                return list(map(_probe_config_to_spec, probes))
+            # Generate default probe if model is set
+            model = self.run_spec.configuration.model
+            if model is not None:
+                return [_default_model_probe_spec(model.name)]
         return []
 
 
@@ -447,6 +457,28 @@ def _probe_config_to_spec(c: ProbeConfig) -> ProbeSpec:
     )
 
 
+def _default_model_probe_spec(model_name: str) -> ProbeSpec:
+    body = orjson.dumps(
+        {
+            "model": model_name,
+            "messages": [{"role": "user", "content": "hi"}],
+            "max_tokens": 1,
+        }
+    ).decode("utf-8")
+    return ProbeSpec(
+        type="http",
+        method="post",
+        url=DEFAULT_MODEL_PROBE_URL,
+        headers=[
+            HTTPHeaderSpec(name="Content-Type", value="application/json"),
+        ],
+        body=body,
+        timeout=DEFAULT_MODEL_PROBE_TIMEOUT,
+        interval=DEFAULT_PROBE_INTERVAL,
+        ready_after=DEFAULT_PROBE_READY_AFTER,
+    )
+
+
 def _join_shell_commands(commands: List[str]) -> str:
     for i, cmd in enumerate(commands):
         cmd = cmd.strip()
diff --git a/src/tests/_internal/core/models/test_configurations.py b/src/tests/_internal/core/models/test_configurations.py
index 1ff025ea2f..44c31f5cbb 100644
--- a/src/tests/_internal/core/models/test_configurations.py
+++ b/src/tests/_internal/core/models/test_configurations.py
@@ -5,17 +5,18 @@
 from dstack._internal.core.errors import ConfigurationError
 from dstack._internal.core.models.common import RegistryAuth
 from dstack._internal.core.models.configurations import (
-    DEFAULT_MODEL_PROBE_TIMEOUT,
-    DEFAULT_MODEL_PROBE_URL,
     DevEnvironmentConfigurationParams,
     RepoSpec,
+    ServiceConfiguration,
     parse_run_configuration,
 )
 from dstack._internal.core.models.resources import Range
 
 
 class TestParseConfiguration:
-    def test_service_model_sets_default_probes_when_probes_omitted(self):
+    def test_service_model_probes_none_when_omitted(self):
+        """When model is set but probes omitted, probes should remain None.
+        The default probe is generated server-side in the job configurator."""
         conf = {
             "type": "service",
             "commands": ["python3 -m http.server"],
@@ -23,17 +24,8 @@ def test_service_model_sets_default_probes_when_probes_omitted(self):
             "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
         }
         parsed = parse_run_configuration(conf)
-        assert len(parsed.probes) == 1
-        probe = parsed.probes[0]
-        assert probe.type == "http"
-        assert probe.method == "post"
-        assert probe.url == DEFAULT_MODEL_PROBE_URL
-        assert probe.timeout == DEFAULT_MODEL_PROBE_TIMEOUT
-        assert len(probe.headers) == 1
-        assert probe.headers[0].name == "Content-Type"
-        assert probe.headers[0].value == "application/json"
-        assert "meta-llama/Meta-Llama-3.1-8B-Instruct" in (probe.body or "")
-        assert "max_tokens" in (probe.body or "")
+        assert isinstance(parsed, ServiceConfiguration)
+        assert parsed.probes is None
 
     def test_service_model_does_not_override_explicit_probes(self):
         conf = {
@@ -44,6 +36,8 @@ def test_service_model_does_not_override_explicit_probes(self):
             "probes": [{"type": "http", "url": "/health"}],
         }
         parsed = parse_run_configuration(conf)
+        assert isinstance(parsed, ServiceConfiguration)
+        assert parsed.probes is not None
         assert len(parsed.probes) == 1
         assert parsed.probes[0].url == "/health"
 
@@ -56,6 +50,8 @@ def test_service_model_explicit_empty_probes_no_default(self):
             "probes": [],
         }
         parsed = parse_run_configuration(conf)
+        assert isinstance(parsed, ServiceConfiguration)
+        assert parsed.probes is not None
         assert len(parsed.probes) == 0
 
     def test_services_replicas_and_scaling(self):
diff --git a/src/tests/_internal/server/services/jobs/configurators/test_service.py b/src/tests/_internal/server/services/jobs/configurators/test_service.py
new file mode 100644
index 0000000000..b52ee297a5
--- /dev/null
+++ b/src/tests/_internal/server/services/jobs/configurators/test_service.py
@@ -0,0 +1,98 @@
+import pytest
+
+from dstack._internal.core.models.configurations import (
+    DEFAULT_MODEL_PROBE_TIMEOUT,
+    DEFAULT_MODEL_PROBE_URL,
+    ProbeConfig,
+    ServiceConfiguration,
+)
+from dstack._internal.core.models.services import OpenAIChatModel
+from dstack._internal.server.services.jobs.configurators.service import ServiceJobConfigurator
+from dstack._internal.server.testing.common import get_run_spec
+
+
+@pytest.mark.asyncio
+@pytest.mark.usefixtures("image_config_mock")
+class TestProbes:
+    async def test_default_probe_when_model_set(self):
+        """When model is set but probes omitted, a default model probe should be generated."""
+        configuration = ServiceConfiguration(
+            port=80,
+            image="debian",
+            model=OpenAIChatModel(
+                name="meta-llama/Meta-Llama-3.1-8B-Instruct",
+                format="openai",
+            ),
+        )
+        run_spec = get_run_spec(run_name="run", repo_id="id", configuration=configuration)
+        configurator = ServiceJobConfigurator(run_spec)
+
+        job_specs = await configurator.get_job_specs(replica_num=0)
+
+        assert len(job_specs) == 1
+        probes = job_specs[0].probes
+        assert len(probes) == 1
+        probe = probes[0]
+        assert probe.type == "http"
+        assert probe.method == "post"
+        assert probe.url == DEFAULT_MODEL_PROBE_URL
+        assert probe.timeout == DEFAULT_MODEL_PROBE_TIMEOUT
+        assert len(probe.headers) == 1
+        assert probe.headers[0].name == "Content-Type"
+        assert probe.headers[0].value == "application/json"
+        assert "meta-llama/Meta-Llama-3.1-8B-Instruct" in (probe.body or "")
+        assert "max_tokens" in (probe.body or "")
+
+    async def test_explicit_probes_not_overridden(self):
+        """When probes are explicitly set, they should be used as-is."""
+        configuration = ServiceConfiguration(
+            port=80,
+            image="debian",
+            model=OpenAIChatModel(
+                name="meta-llama/Meta-Llama-3.1-8B-Instruct",
+                format="openai",
+            ),
+            probes=[ProbeConfig(type="http", url="/health")],
+        )
+        run_spec = get_run_spec(run_name="run", repo_id="id", configuration=configuration)
+        configurator = ServiceJobConfigurator(run_spec)
+
+        job_specs = await configurator.get_job_specs(replica_num=0)
+
+        assert len(job_specs) == 1
+        probes = job_specs[0].probes
+        assert len(probes) == 1
+        assert probes[0].url == "/health"
+
+    async def test_explicit_empty_probes(self):
+        """When probes is explicitly set to empty list, no probes should be generated."""
+        configuration = ServiceConfiguration(
+            port=80,
+            image="debian",
+            model=OpenAIChatModel(
+                name="meta-llama/Meta-Llama-3.1-8B-Instruct",
+                format="openai",
+            ),
+            probes=[],
+        )
+        run_spec = get_run_spec(run_name="run", repo_id="id", configuration=configuration)
+        configurator = ServiceJobConfigurator(run_spec)
+
+        job_specs = await configurator.get_job_specs(replica_num=0)
+
+        assert len(job_specs) == 1
+        assert len(job_specs[0].probes) == 0
+
+    async def test_no_probe_when_no_model(self):
+        """When neither model nor probes are set, no probes should be generated."""
+        configuration = ServiceConfiguration(
+            port=80,
+            image="debian",
+        )
+        run_spec = get_run_spec(run_name="run", repo_id="id", configuration=configuration)
+        configurator = ServiceJobConfigurator(run_spec)
+
+        job_specs = await configurator.get_job_specs(replica_num=0)
+
+        assert len(job_specs) == 1
+        assert len(job_specs[0].probes) == 0

From 5f81c696625d298e0d9736dec671aa42453be8db Mon Sep 17 00:00:00 2001
From: peterschmidt85 <andrey.cheptsov@gmail.com>
Date: Mon, 2 Feb 2026 20:39:05 +0100
Subject: [PATCH 4/5] PR review feedback

---
 src/dstack/_internal/core/models/configurations.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/dstack/_internal/core/models/configurations.py b/src/dstack/_internal/core/models/configurations.py
index 7c5d442e44..601cf11a81 100644
--- a/src/dstack/_internal/core/models/configurations.py
+++ b/src/dstack/_internal/core/models/configurations.py
@@ -854,7 +854,11 @@ class ServiceConfigurationParams(CoreModel):
     rate_limits: Annotated[list[RateLimit], Field(description="Rate limiting rules")] = []
     probes: Annotated[
         Optional[list[ProbeConfig]],
-        Field(description="List of probes used to determine job health"),
+        Field(
+            description="The list of probes to determine service health. "
+            "If `model` is set, defaults to a `/v1/chat/completions` probe. "
+            "Set explicitly to override."
+        ),
     ] = None  # None = omitted (may get default when model is set); [] = explicit empty
 
     replicas: Annotated[

From 3e2586747ee26fd8765a14dff15c5c6518b35558 Mon Sep 17 00:00:00 2001
From: peterschmidt85 <andrey.cheptsov@gmail.com>
Date: Mon, 2 Feb 2026 20:48:10 +0100
Subject: [PATCH 5/5] Resolved conflict

---
 docs/docs/concepts/services.md | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/docs/docs/concepts/services.md b/docs/docs/concepts/services.md
index 9f4e5900dc..d40984866b 100644
--- a/docs/docs/concepts/services.md
+++ b/docs/docs/concepts/services.md
@@ -233,16 +233,6 @@ Setting the minimum number of replicas to `0` allows the service to scale down t
 ??? info "Disaggregated serving"
     Native support for disaggregated prefill and decode, allowing both worker types to run within a single service, is coming soon.
 
-### Model
-
-If the service is running a chat model with an OpenAI-compatible interface (i.e., `/v1/chat/completions`),
-set the [`model`](../reference/dstack.yml/service.md#model) property to make the model accessible via `dstack`'s 
-global OpenAI-compatible endpoint, and also accessible via `dstack`'s UI.
-
-When `model` is set, `dstack` automatically configures [`probes`](#probes) to verify model health.
-To customize or disable this, set `probes` explicitly.
-
-
 ### Authorization
 
 By default, the service enables authorization, meaning the service endpoint requires a `dstack` user token.
@@ -341,8 +331,6 @@ Probes are executed for each service replica while the replica is `running`. A p
 ??? info "Model"
     If you set the [`model`](#model) property but don't explicitly configure `probes`, 
     `dstack` automatically configures a default probe that tests the model using the `/v1/chat/completions` API.
-    This default probe sends a minimal chat completion request to verify the model is responding correctly.
-
     To disable probes entirely when `model` is set, explicitly set `probes` to an empty list.
 
 See the [reference](../reference/dstack.yml/service.md#probes) for more probe configuration options.
@@ -442,6 +430,9 @@ Limits apply to the whole service (all replicas) and per client (by IP). Clients
 If the service runs a model with an OpenAI-compatible interface, you can set the [`model`](#model) property to make the model accessible through `dstack`'s chat UI on the `Models` page. 
 In this case, `dstack` will use the service's `/v1/chat/completions` service.
 
+When `model` is set, `dstack` automatically configures [`probes`](#probes) to verify model health.
+To customize or disable this, set `probes` explicitly.
+
 ### Resources
 
 If you specify memory size, you can either specify an explicit size (e.g. `24GB`) or a