-
Notifications
You must be signed in to change notification settings - Fork 211
[Services] Add default probes if model is set #3524
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -56,6 +56,8 @@ | |
| DEFAULT_PROBE_METHOD = "get" | ||
| MAX_PROBE_URL_LEN = 2048 | ||
| DEFAULT_REPLICA_GROUP_NAME = "0" | ||
| DEFAULT_MODEL_PROBE_TIMEOUT = 30 | ||
| DEFAULT_MODEL_PROBE_URL = "/v1/chat/completions" | ||
|
|
||
|
|
||
| class RunConfigurationType(str, Enum): | ||
|
|
@@ -851,9 +853,9 @@ class ServiceConfigurationParams(CoreModel): | |
| ] = None | ||
| rate_limits: Annotated[list[RateLimit], Field(description="Rate limiting rules")] = [] | ||
| probes: Annotated[ | ||
| list[ProbeConfig], | ||
| Optional[list[ProbeConfig]], | ||
| Field(description="List of probes used to determine job health"), | ||
| ] = [] | ||
| ] = None # None = omitted (may get default when model is set); [] = explicit empty | ||
|
|
||
| replicas: Annotated[ | ||
| Optional[Union[List[ReplicaGroup], Range[int]]], | ||
|
|
@@ -895,7 +897,9 @@ def validate_rate_limits(cls, v: list[RateLimit]) -> list[RateLimit]: | |
| return v | ||
|
|
||
| @validator("probes") | ||
| def validate_probes(cls, v: list[ProbeConfig]) -> list[ProbeConfig]: | ||
| def validate_probes(cls, v: Optional[list[ProbeConfig]]) -> Optional[list[ProbeConfig]]: | ||
| if v is None: | ||
| return v | ||
| if has_duplicates(v): | ||
| # Using a custom validator instead of Field(unique_items=True) to avoid Pydantic bug: | ||
| # https://github.com/pydantic/pydantic/issues/3765 | ||
|
|
@@ -932,6 +936,35 @@ def validate_replicas( | |
| ) | ||
| return v | ||
|
|
||
| @root_validator() | ||
| def set_default_probes_for_model(cls, values): | ||
|
Comment on lines
+939
to
+940
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This sets the default on the client side, and we prefer server-side defaults. Consider handling the default probe in |
||
| model = values.get("model") | ||
| probes = values.get("probes") | ||
| if model is not None and probes is None: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (nit)
This check also passes for models that are declared using the full syntax that are not OpenAI-compatible or not chat models (once we support those). Such models don't necessarily provide A better check would be |
||
| body = orjson.dumps( | ||
| { | ||
| "model": model.name, | ||
| "messages": [{"role": "user", "content": "hi"}], | ||
| "max_tokens": 1, | ||
| } | ||
| ).decode("utf-8") | ||
| values["probes"] = [ | ||
| ProbeConfig( | ||
| type="http", | ||
| method="post", | ||
| url=DEFAULT_MODEL_PROBE_URL, | ||
| headers=[ | ||
| HTTPHeaderSpec(name="Content-Type", value="application/json"), | ||
| ], | ||
| body=body, | ||
| timeout=DEFAULT_MODEL_PROBE_TIMEOUT, | ||
| ) | ||
| ] | ||
| elif probes is None: | ||
| # Probes omitted and model not set: normalize to empty list for downstream. | ||
| values["probes"] = [] | ||
| return values | ||
|
|
||
| @root_validator() | ||
| def validate_scaling(cls, values): | ||
| scaling = values.get("scaling") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider documenting the default probe either here or in
model. And optionally inconcepts/services.md