Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions docs/internal/cloud-matrix.md
Original file line number Diff line number Diff line change
Expand Up @@ -355,10 +355,21 @@ Storage auth uses GCP Workload Identity via Application Default Credentials (ADC
config:
values:
launch:
cache_redis_onprem_url: redis://my-memorystore-instance.redis.cache.googleapis.com:6379/0
cache_redis_gcp_url: redis://my-memorystore-instance.redis.cache.googleapis.com:6379/0
```

Alternatively, `cache_redis_aws_url` is also accepted (the on-prem fallback path in `config.py` accepts it with a log warning). The same Redis instance serves as both the caching layer and the Celery broker.
`cache_redis_gcp_url` is the preferred field for GCP — it maps directly to GCP Memorystore and triggers the GCP branch in `HostedModelInferenceServiceConfig.cache_redis_url`. The same Redis instance serves as both the caching layer and the Celery broker.

#### Database (Cloud SQL)

GCP uses environment variables for DB credentials injected via a Kubernetes secret — the same pattern as on-prem. There is no GCP Secret Manager integration; use `kubernetesDatabaseSecretName` to mount the secret:

```yaml
secrets:
kubernetesDatabaseSecretName: llm-engine-postgres-credentials
```

The K8s secret must contain keys that the chart injects as env vars: `DB_HOST`, `DB_HOST_RO`, `DB_PORT`, `DB_USER`, `DB_PASSWORD`, `DB_NAME`. `get_engine_url` in `db/base.py` reads these directly when `cloud_provider == "gcp"`.

#### Service Account (GCP Workload Identity)

Expand Down
7 changes: 7 additions & 0 deletions model-engine/model_engine_server/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ class HostedModelInferenceServiceConfig:
None # Not an env var because the redis cache info is already here
)
cache_redis_onprem_url: Optional[str] = None # For on-prem Redis (e.g., redis://redis:6379/0)
cache_redis_gcp_url: Optional[str] = (
None # For GCP Memorystore (e.g., redis://MEMORYSTORE_HOST:6379/0)
)
sglang_repository: Optional[str] = None

@classmethod
Expand Down Expand Up @@ -106,6 +109,10 @@ def cache_redis_url(self) -> str:
redis_port = getattr(infra_config(), "redis_port", 6379)
return f"redis://{redis_host}:{redis_port}/0"

if cloud_provider == "gcp":
assert self.cache_redis_gcp_url, "cache_redis_gcp_url required for GCP"
return self.cache_redis_gcp_url

if self.cache_redis_aws_url:
assert cloud_provider == "aws", "cache_redis_aws_url is only for AWS"
if self.cache_redis_aws_secret_name:
Expand Down
14 changes: 14 additions & 0 deletions model-engine/model_engine_server/db/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,20 @@ def get_engine_url(

engine_url = f"postgresql://{user}:{password}@{host}:{port}/{dbname}"

elif infra_config().cloud_provider == "gcp":
user = os.environ.get("DB_USER", "postgres")
password = os.environ.get("DB_PASSWORD", "postgres")
host = (
os.environ.get("DB_HOST_RO")
if read_only
else os.environ.get("DB_HOST", "localhost")
)
Comment on lines +75 to +79
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 host will be None when DB_HOST_RO is unset

When read_only=True and the DB_HOST_RO environment variable is not set, os.environ.get("DB_HOST_RO") returns None, so host will be None. This produces a broken connection string that will fail at connection time.

The on-prem branch on line 65 handles this correctly with an or fallback:

host = os.environ.get("DB_HOST_RO") or os.environ.get("DB_HOST", "localhost")

The GCP branch should use the same fallback pattern to avoid None hosts:

Suggested change
host = (
os.environ.get("DB_HOST_RO")
if read_only
else os.environ.get("DB_HOST", "localhost")
)
host = (
os.environ.get("DB_HOST_RO")
or os.environ.get("DB_HOST", "localhost")
if read_only
else os.environ.get("DB_HOST", "localhost")
)

Or more simply, match the on-prem pattern exactly if the read_only distinction is not required.

Prompt To Fix With AI
This is a comment left during a code review.
Path: model-engine/model_engine_server/db/base.py
Line: 75-79

Comment:
**`host` will be `None` when `DB_HOST_RO` is unset**

When `read_only=True` and the `DB_HOST_RO` environment variable is not set, `os.environ.get("DB_HOST_RO")` returns `None`, so `host` will be `None`. This produces a broken connection string that will fail at connection time.

The on-prem branch on line 65 handles this correctly with an `or` fallback:
```python
host = os.environ.get("DB_HOST_RO") or os.environ.get("DB_HOST", "localhost")
```

The GCP branch should use the same fallback pattern to avoid `None` hosts:

```suggestion
            host = (
                os.environ.get("DB_HOST_RO")
                or os.environ.get("DB_HOST", "localhost")
                if read_only
                else os.environ.get("DB_HOST", "localhost")
            )
```

Or more simply, match the on-prem pattern exactly if the `read_only` distinction is not required.

How can I resolve this? If you propose a fix, please make it concise.

port = os.environ.get("DB_PORT", "5432")
dbname = os.environ.get("DB_NAME", "llm_engine")
logger.info(f"Connecting to db {host}:{port}, name {dbname}")

engine_url = f"postgresql://{user}:{password}@{host}:{port}/{dbname}"

elif infra_config().cloud_provider == "azure":
client = SecretClient(
vault_url=f"https://{os.environ.get('KEYVAULT_NAME')}.vault.azure.net",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ set -eo pipefail
# Examples:
#
# # 1. Published versions (base image and pip version match)
# ./build_and_upload_image.sh my-tag vllm
# ./build_and_upload_image.sh my-tag vllm # → tag: 0.17.0-my-tag
# ./build_and_upload_image.sh "" vllm # → tag: 0.17.0 (version-only)
# ./build_and_upload_image.sh my-tag vllm --vllm-version=0.15.1
#
# # 2. Newer pip version on older base image (e.g. 0.16.0 wheel on 0.15.1 base)
Expand Down Expand Up @@ -105,8 +106,8 @@ VLLM_OMNI_VERSION=${VLLM_OMNI_VERSION:-"0.16.0"}
VLLM_OMNI_SOURCE_DIR=""
VLLM_OMNI_SOURCE_REF=""

if [ -z "$1" ]; then
echo "Must supply the user-provided tag"
if [ "$#" -lt 1 ]; then
echo "Must supply the user-provided tag (pass empty string \"\" for a version-only tag)"
exit 1;
fi

Expand Down Expand Up @@ -264,9 +265,9 @@ fi

# Construct image tag based on vllm version and user tag
if [ "$BUILD_TARGET" == "vllm_omni" ]; then
IMAGE_TAG="${VLLM_VERSION}-omni-${USER_TAG}"
IMAGE_TAG="${VLLM_VERSION}-omni${USER_TAG:+-$USER_TAG}"
else
IMAGE_TAG="${VLLM_VERSION}-${USER_TAG}"
IMAGE_TAG="${VLLM_VERSION}${USER_TAG:+-$USER_TAG}"
fi

# if build target = vllm use vllm otherwise use vllm_batch
Expand Down
70 changes: 70 additions & 0 deletions model-engine/tests/unit/api/test_dependencies.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from unittest.mock import MagicMock, patch

import pytest
from model_engine_server.api.dependencies import _get_external_interfaces
from model_engine_server.common.config import HostedModelInferenceServiceConfig
from model_engine_server.infra.gateways import (
GCSFileStorageGateway,
GCSFilesystemGateway,
Expand Down Expand Up @@ -119,3 +121,71 @@ def test_gcp_provider_selects_gcp_implementations():
external_interfaces.resource_gateway.queue_delegate,
RedisQueueEndpointResourceDelegate,
)


def test_gcp_cache_redis_url_returns_gcp_url():
"""Test that cache_redis_url returns cache_redis_gcp_url when cloud_provider is gcp."""
with patch("model_engine_server.common.config.infra_config") as mock_infra_config:
mock_infra = MagicMock()
mock_infra.cloud_provider = "gcp"
mock_infra_config.return_value = mock_infra

config = HostedModelInferenceServiceConfig(
gateway_namespace="ns",
endpoint_namespace="ns",
billing_queue_arn="arn",
sqs_profile="default",
sqs_queue_policy_template="{}",
sqs_queue_tag_template="{}",
model_primitive_host="localhost",
cloud_file_llm_fine_tune_repository="gs://bucket/ft",
hf_user_fine_tuned_weights_prefix="gs://bucket/weights",
istio_enabled=False,
dd_trace_enabled=False,
tgi_repository="repo/tgi",
vllm_repository="repo/vllm",
lightllm_repository="repo/lightllm",
tensorrt_llm_repository="repo/tensorrt",
batch_inference_vllm_repository="repo/batch",
user_inference_base_repository="repo/base",
user_inference_pytorch_repository="repo/pytorch",
user_inference_tensorflow_repository="repo/tf",
docker_image_layer_cache_repository="repo/cache",
sensitive_log_mode=False,
cache_redis_gcp_url="redis://10.0.0.1:6379/0",
)
assert config.cache_redis_url == "redis://10.0.0.1:6379/0"


def test_gcp_cache_redis_url_raises_when_not_set():
"""Test that cache_redis_url raises AssertionError for GCP when cache_redis_gcp_url is not set."""
with patch("model_engine_server.common.config.infra_config") as mock_infra_config:
mock_infra = MagicMock()
mock_infra.cloud_provider = "gcp"
mock_infra_config.return_value = mock_infra

config = HostedModelInferenceServiceConfig(
gateway_namespace="ns",
endpoint_namespace="ns",
billing_queue_arn="arn",
sqs_profile="default",
sqs_queue_policy_template="{}",
sqs_queue_tag_template="{}",
model_primitive_host="localhost",
cloud_file_llm_fine_tune_repository="gs://bucket/ft",
hf_user_fine_tuned_weights_prefix="gs://bucket/weights",
istio_enabled=False,
dd_trace_enabled=False,
tgi_repository="repo/tgi",
vllm_repository="repo/vllm",
lightllm_repository="repo/lightllm",
tensorrt_llm_repository="repo/tensorrt",
batch_inference_vllm_repository="repo/batch",
user_inference_base_repository="repo/base",
user_inference_pytorch_repository="repo/pytorch",
user_inference_tensorflow_repository="repo/tf",
docker_image_layer_cache_repository="repo/cache",
sensitive_log_mode=False,
)
with pytest.raises(AssertionError, match="cache_redis_gcp_url required for GCP"):
_ = config.cache_redis_url