diff --git a/docs/internal/cloud-matrix.md b/docs/internal/cloud-matrix.md index 2d92615a5..d17b43afe 100644 --- a/docs/internal/cloud-matrix.md +++ b/docs/internal/cloud-matrix.md @@ -355,10 +355,21 @@ Storage auth uses GCP Workload Identity via Application Default Credentials (ADC config: values: launch: - cache_redis_onprem_url: redis://my-memorystore-instance.redis.cache.googleapis.com:6379/0 + cache_redis_gcp_url: redis://my-memorystore-instance.redis.cache.googleapis.com:6379/0 ``` -Alternatively, `cache_redis_aws_url` is also accepted (the on-prem fallback path in `config.py` accepts it with a log warning). The same Redis instance serves as both the caching layer and the Celery broker. +`cache_redis_gcp_url` is the preferred field for GCP — it maps directly to GCP Memorystore and triggers the GCP branch in `HostedModelInferenceServiceConfig.cache_redis_url`. The same Redis instance serves as both the caching layer and the Celery broker. + +#### Database (Cloud SQL) + +GCP uses environment variables for DB credentials injected via a Kubernetes secret — the same pattern as on-prem. There is no GCP Secret Manager integration; use `kubernetesDatabaseSecretName` to mount the secret: + +```yaml +secrets: + kubernetesDatabaseSecretName: llm-engine-postgres-credentials +``` + +The K8s secret must contain keys that the chart injects as env vars: `DB_HOST`, `DB_HOST_RO`, `DB_PORT`, `DB_USER`, `DB_PASSWORD`, `DB_NAME`. `get_engine_url` in `db/base.py` reads these directly when `cloud_provider == "gcp"`. #### Service Account (GCP Workload Identity) diff --git a/model-engine/model_engine_server/common/config.py b/model-engine/model_engine_server/common/config.py index 902c1a898..9e6e39b81 100644 --- a/model-engine/model_engine_server/common/config.py +++ b/model-engine/model_engine_server/common/config.py @@ -77,6 +77,9 @@ class HostedModelInferenceServiceConfig: None # Not an env var because the redis cache info is already here ) cache_redis_onprem_url: Optional[str] = None # For on-prem Redis (e.g., redis://redis:6379/0) + cache_redis_gcp_url: Optional[str] = ( + None # For GCP Memorystore (e.g., redis://MEMORYSTORE_HOST:6379/0) + ) sglang_repository: Optional[str] = None @classmethod @@ -106,6 +109,10 @@ def cache_redis_url(self) -> str: redis_port = getattr(infra_config(), "redis_port", 6379) return f"redis://{redis_host}:{redis_port}/0" + if cloud_provider == "gcp": + assert self.cache_redis_gcp_url, "cache_redis_gcp_url required for GCP" + return self.cache_redis_gcp_url + if self.cache_redis_aws_url: assert cloud_provider == "aws", "cache_redis_aws_url is only for AWS" if self.cache_redis_aws_secret_name: diff --git a/model-engine/model_engine_server/db/base.py b/model-engine/model_engine_server/db/base.py index f5ea49e7c..3d2f884a3 100644 --- a/model-engine/model_engine_server/db/base.py +++ b/model-engine/model_engine_server/db/base.py @@ -69,6 +69,20 @@ def get_engine_url( engine_url = f"postgresql://{user}:{password}@{host}:{port}/{dbname}" + elif infra_config().cloud_provider == "gcp": + user = os.environ.get("DB_USER", "postgres") + password = os.environ.get("DB_PASSWORD", "postgres") + host = ( + os.environ.get("DB_HOST_RO") + if read_only + else os.environ.get("DB_HOST", "localhost") + ) + port = os.environ.get("DB_PORT", "5432") + dbname = os.environ.get("DB_NAME", "llm_engine") + logger.info(f"Connecting to db {host}:{port}, name {dbname}") + + engine_url = f"postgresql://{user}:{password}@{host}:{port}/{dbname}" + elif infra_config().cloud_provider == "azure": client = SecretClient( vault_url=f"https://{os.environ.get('KEYVAULT_NAME')}.vault.azure.net", diff --git a/model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh b/model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh index 82f2b9567..ea7124c24 100755 --- a/model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh +++ b/model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh @@ -38,7 +38,8 @@ set -eo pipefail # Examples: # # # 1. Published versions (base image and pip version match) -# ./build_and_upload_image.sh my-tag vllm +# ./build_and_upload_image.sh my-tag vllm # → tag: 0.17.0-my-tag +# ./build_and_upload_image.sh "" vllm # → tag: 0.17.0 (version-only) # ./build_and_upload_image.sh my-tag vllm --vllm-version=0.15.1 # # # 2. Newer pip version on older base image (e.g. 0.16.0 wheel on 0.15.1 base) @@ -105,8 +106,8 @@ VLLM_OMNI_VERSION=${VLLM_OMNI_VERSION:-"0.16.0"} VLLM_OMNI_SOURCE_DIR="" VLLM_OMNI_SOURCE_REF="" -if [ -z "$1" ]; then - echo "Must supply the user-provided tag" +if [ "$#" -lt 1 ]; then + echo "Must supply the user-provided tag (pass empty string \"\" for a version-only tag)" exit 1; fi @@ -264,9 +265,9 @@ fi # Construct image tag based on vllm version and user tag if [ "$BUILD_TARGET" == "vllm_omni" ]; then - IMAGE_TAG="${VLLM_VERSION}-omni-${USER_TAG}" + IMAGE_TAG="${VLLM_VERSION}-omni${USER_TAG:+-$USER_TAG}" else - IMAGE_TAG="${VLLM_VERSION}-${USER_TAG}" + IMAGE_TAG="${VLLM_VERSION}${USER_TAG:+-$USER_TAG}" fi # if build target = vllm use vllm otherwise use vllm_batch diff --git a/model-engine/tests/unit/api/test_dependencies.py b/model-engine/tests/unit/api/test_dependencies.py index 15491c368..a2712827e 100644 --- a/model-engine/tests/unit/api/test_dependencies.py +++ b/model-engine/tests/unit/api/test_dependencies.py @@ -1,6 +1,8 @@ from unittest.mock import MagicMock, patch +import pytest from model_engine_server.api.dependencies import _get_external_interfaces +from model_engine_server.common.config import HostedModelInferenceServiceConfig from model_engine_server.infra.gateways import ( GCSFileStorageGateway, GCSFilesystemGateway, @@ -119,3 +121,71 @@ def test_gcp_provider_selects_gcp_implementations(): external_interfaces.resource_gateway.queue_delegate, RedisQueueEndpointResourceDelegate, ) + + +def test_gcp_cache_redis_url_returns_gcp_url(): + """Test that cache_redis_url returns cache_redis_gcp_url when cloud_provider is gcp.""" + with patch("model_engine_server.common.config.infra_config") as mock_infra_config: + mock_infra = MagicMock() + mock_infra.cloud_provider = "gcp" + mock_infra_config.return_value = mock_infra + + config = HostedModelInferenceServiceConfig( + gateway_namespace="ns", + endpoint_namespace="ns", + billing_queue_arn="arn", + sqs_profile="default", + sqs_queue_policy_template="{}", + sqs_queue_tag_template="{}", + model_primitive_host="localhost", + cloud_file_llm_fine_tune_repository="gs://bucket/ft", + hf_user_fine_tuned_weights_prefix="gs://bucket/weights", + istio_enabled=False, + dd_trace_enabled=False, + tgi_repository="repo/tgi", + vllm_repository="repo/vllm", + lightllm_repository="repo/lightllm", + tensorrt_llm_repository="repo/tensorrt", + batch_inference_vllm_repository="repo/batch", + user_inference_base_repository="repo/base", + user_inference_pytorch_repository="repo/pytorch", + user_inference_tensorflow_repository="repo/tf", + docker_image_layer_cache_repository="repo/cache", + sensitive_log_mode=False, + cache_redis_gcp_url="redis://10.0.0.1:6379/0", + ) + assert config.cache_redis_url == "redis://10.0.0.1:6379/0" + + +def test_gcp_cache_redis_url_raises_when_not_set(): + """Test that cache_redis_url raises AssertionError for GCP when cache_redis_gcp_url is not set.""" + with patch("model_engine_server.common.config.infra_config") as mock_infra_config: + mock_infra = MagicMock() + mock_infra.cloud_provider = "gcp" + mock_infra_config.return_value = mock_infra + + config = HostedModelInferenceServiceConfig( + gateway_namespace="ns", + endpoint_namespace="ns", + billing_queue_arn="arn", + sqs_profile="default", + sqs_queue_policy_template="{}", + sqs_queue_tag_template="{}", + model_primitive_host="localhost", + cloud_file_llm_fine_tune_repository="gs://bucket/ft", + hf_user_fine_tuned_weights_prefix="gs://bucket/weights", + istio_enabled=False, + dd_trace_enabled=False, + tgi_repository="repo/tgi", + vllm_repository="repo/vllm", + lightllm_repository="repo/lightllm", + tensorrt_llm_repository="repo/tensorrt", + batch_inference_vllm_repository="repo/batch", + user_inference_base_repository="repo/base", + user_inference_pytorch_repository="repo/pytorch", + user_inference_tensorflow_repository="repo/tf", + docker_image_layer_cache_repository="repo/cache", + sensitive_log_mode=False, + ) + with pytest.raises(AssertionError, match="cache_redis_gcp_url required for GCP"): + _ = config.cache_redis_url