feat: GenAI Client(evals): Route gemini-3 model traffic to global region in local agent scraping

vertex-sdk-bot · copybara-github · commit c4900cca01ff · 2026-04-08T15:36:41.000-07:00
PiperOrigin-RevId: 893194564
diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py
@@ -44,7 +44,6 @@
 import pandas as pd
 import pytest
 
-
 _TEST_PROJECT = "test-project"
 _TEST_LOCATION = "us-central1"
 
@@ -2079,6 +2078,99 @@ def test_has_tool_call_with_agent_event(self):
 
 
 @pytest.mark.usefixtures("google_auth_mock")
+class TestRunAgent:
+    """Unit tests for the _run_agent function."""
+
+    @mock.patch.object(_evals_common, "_execute_inference_concurrently")
+    def test_run_agent_rewrites_gemini_3_model_name(
+        self, mock_execute_inference_concurrently, mock_api_client_fixture
+    ):
+        mock_execute_inference_concurrently.return_value = []
+        user_simulator_config = vertexai_genai_types.evals.UserSimulatorConfig(
+            model_name="gemini-3-preview"
+        )
+        prompt_dataset = pd.DataFrame({"prompt": ["prompt1"]})
+        with mock.patch.dict(os.environ, clear=True):
+            os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1"
+
+            def mock_execute(*args, **kwargs):
+                assert os.environ["GOOGLE_CLOUD_LOCATION"] == "global"
+                return []
+
+            mock_execute_inference_concurrently.side_effect = mock_execute
+
+            _evals_common._run_agent(
+                api_client=mock_api_client_fixture,
+                agent_engine=mock.Mock(),
+                agent=None,
+                prompt_dataset=prompt_dataset,
+                user_simulator_config=user_simulator_config,
+                allow_cross_region_model=True,
+            )
+
+            assert (
+                user_simulator_config.model_name
+                == f"projects/{mock_api_client_fixture.project}/locations/global/publishers/google/models/gemini-3-preview"
+            )
+            assert os.environ.get("GOOGLE_CLOUD_LOCATION") == "us-central1"
+
+    @mock.patch.object(_evals_common, "_execute_inference_concurrently")
+    def test_run_agent_raises_error_if_gemini_3_and_allow_cross_region_model_false(
+        self, mock_execute_inference_concurrently, mock_api_client_fixture
+    ):
+        user_simulator_config = vertexai_genai_types.evals.UserSimulatorConfig(
+            model_name="gemini-3-preview"
+        )
+        prompt_dataset = pd.DataFrame({"prompt": ["prompt1"]})
+        with mock.patch.dict(os.environ, clear=True):
+            os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1"
+
+            with pytest.raises(
+                ValueError,
+                match="The model 'gemini-3-preview' is currently only available in the 'global' region.",
+            ):
+                _evals_common._run_agent(
+                    api_client=mock_api_client_fixture,
+                    agent_engine=mock.Mock(),
+                    agent=None,
+                    prompt_dataset=prompt_dataset,
+                    user_simulator_config=user_simulator_config,
+                    allow_cross_region_model=False,
+                )
+
+    @mock.patch.object(_evals_common, "_execute_inference_concurrently")
+    def test_run_agent_rewrites_gemini_3_model_name_empty_env(
+        self, mock_execute_inference_concurrently, mock_api_client_fixture
+    ):
+        mock_execute_inference_concurrently.return_value = []
+        user_simulator_config = vertexai_genai_types.evals.UserSimulatorConfig(
+            model_name="gemini-3-preview"
+        )
+        prompt_dataset = pd.DataFrame({"prompt": ["prompt1"]})
+        with mock.patch.dict(os.environ, clear=True):
+
+            def mock_execute(*args, **kwargs):
+                assert os.environ["GOOGLE_CLOUD_LOCATION"] == "global"
+                return []
+
+            mock_execute_inference_concurrently.side_effect = mock_execute
+
+            _evals_common._run_agent(
+                api_client=mock_api_client_fixture,
+                agent_engine=mock.Mock(),
+                agent=None,
+                prompt_dataset=prompt_dataset,
+                user_simulator_config=user_simulator_config,
+                allow_cross_region_model=True,
+            )
+
+            assert (
+                user_simulator_config.model_name
+                == f"projects/{mock_api_client_fixture.project}/locations/global/publishers/google/models/gemini-3-preview"
+            )
+            assert "GOOGLE_CLOUD_LOCATION" not in os.environ
+
+
 class TestRunAgentInternal:
     """Unit tests for the _run_agent_internal function."""
 
diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py
@@ -1143,6 +1143,7 @@ def _execute_inference(
     prompt_template: Optional[Union[str, types.PromptTemplateOrDict]] = None,
     location: Optional[str] = None,
     user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None,
+    allow_cross_region_model: bool = False,
 ) -> pd.DataFrame:
     """Executes inference on a given dataset using the specified model.
 
@@ -1250,6 +1251,7 @@ def _execute_inference(
             agent=agent,
             prompt_dataset=prompt_dataset,
             user_simulator_config=user_simulator_config,
+            allow_cross_region_model=allow_cross_region_model,
         )
         end_time = time.time()
         logger.info("Agent Run completed in %.2f seconds.", end_time - start_time)
@@ -1823,6 +1825,7 @@ def _run_agent_internal(
     agent: Optional[LlmAgent],
     prompt_dataset: pd.DataFrame,
     user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None,
+    allow_cross_region_model: bool = False,
 ) -> pd.DataFrame:
     """Runs an agent."""
     raw_responses = _run_agent(
@@ -1831,6 +1834,7 @@ def _run_agent_internal(
         agent=agent,
         prompt_dataset=prompt_dataset,
         user_simulator_config=user_simulator_config,
+        allow_cross_region_model=allow_cross_region_model,
     )
     processed_intermediate_events = []
     processed_responses = []
@@ -1872,6 +1876,7 @@ def _run_agent(
     agent: Optional[LlmAgent],
     prompt_dataset: pd.DataFrame,
     user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None,
+    allow_cross_region_model: bool = False,
 ) -> list[
     Union[
         list[dict[str, Any]],
@@ -1880,28 +1885,60 @@ def _run_agent(
     ]
 ]:
     """Internal helper to run inference using Gemini model with concurrency."""
-    if agent_engine:
-        return _execute_inference_concurrently(
-            api_client=api_client,
-            agent_engine=agent_engine,
-            prompt_dataset=prompt_dataset,
-            progress_desc="Agent Run",
-            gemini_config=None,
-            user_simulator_config=None,
-            inference_fn=_execute_agent_run_with_retry,
-        )
-    elif agent:
-        return _execute_inference_concurrently(
-            api_client=api_client,
-            agent=agent,
-            prompt_dataset=prompt_dataset,
-            progress_desc="Local Agent Run",
-            gemini_config=None,
-            user_simulator_config=user_simulator_config,
-            inference_fn=_execute_local_agent_run_with_retry,
-        )
-    else:
-        raise ValueError("Neither agent_engine nor agent is provided.")
+    original_location = os.environ.get("GOOGLE_CLOUD_LOCATION")
+    location_overridden = False
+
+    if user_simulator_config and user_simulator_config.model_name:
+        model_name = user_simulator_config.model_name
+        if model_name.startswith("gemini-3") and "/" not in model_name:
+            current_location = original_location or api_client.location or "us-central1"
+            if current_location != "global" and not allow_cross_region_model:
+                raise ValueError(
+                    f"The model '{model_name}' is currently only available in the"
+                    " 'global' region. Because this request originated in"
+                    f" '{current_location}', you must explicitly set "
+                    "allow_cross_region_model=True to allow your data to be routed outside"
+                    " of your request's region."
+                )
+
+            logger.warning(
+                "Model %s is only available in the global region. Routing to global.",
+                model_name,
+            )
+            user_simulator_config.model_name = f"projects/{api_client.project}/locations/global/publishers/google/models/{model_name}"
+            if original_location != "global":
+                os.environ["GOOGLE_CLOUD_LOCATION"] = "global"
+                location_overridden = True
+
+    try:
+        if agent_engine:
+            return _execute_inference_concurrently(
+                api_client=api_client,
+                agent_engine=agent_engine,
+                prompt_dataset=prompt_dataset,
+                progress_desc="Agent Run",
+                gemini_config=None,
+                user_simulator_config=None,
+                inference_fn=_execute_agent_run_with_retry,
+            )
+        elif agent:
+            return _execute_inference_concurrently(
+                api_client=api_client,
+                agent=agent,
+                prompt_dataset=prompt_dataset,
+                progress_desc="Local Agent Run",
+                gemini_config=None,
+                user_simulator_config=user_simulator_config,
+                inference_fn=_execute_local_agent_run_with_retry,
+            )
+        else:
+            raise ValueError("Neither agent_engine nor agent is provided.")
+    finally:
+        if location_overridden:
+            if original_location is None:
+                del os.environ["GOOGLE_CLOUD_LOCATION"]
+            else:
+                os.environ["GOOGLE_CLOUD_LOCATION"] = original_location
 
 
 def _execute_agent_run_with_retry(
diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py
@@ -1736,6 +1736,7 @@ def run_inference(
                 - dest: The destination path for storage of the inference results.
                 - prompt_template: The template string to use for constructing prompts.
                 - generate_content_config: The config for the Gemini generate content call.
+                - allow_cross_region_model: Opt-in flag to authorize cross-region routing for LLM models.
 
         Returns:
           The evaluation dataset.
@@ -1771,6 +1772,7 @@ def run_inference(
             location=location,
             config=config.generate_content_config,
             user_simulator_config=getattr(config, "user_simulator_config", None),
+            allow_cross_region_model=getattr(config, "allow_cross_region_model", False),
         )
 
     def evaluate(
@@ -2216,6 +2218,8 @@ def create_evaluation_run(
               If `agent_info` is provided without `inference_configs`, this config is used
               to automatically construct the inference configuration. If not specified,
               or if `max_turn` is not set, `max_turn` defaults to 5.
+              The `model_name` inside this config can be either a full model path or a
+              short model name, e.g. `gemini-3-preview-flash`.
           inference_configs: The candidate to inference config map for the evaluation run.
               The key is the candidate name, and the value is the inference config.
               If provided, `agent_info` must be None. If omitted and `agent_info` is provided,
@@ -3486,6 +3490,8 @@ async def create_evaluation_run(
               If `agent_info` is provided without `inference_configs`, this config is used
               to automatically construct the inference configuration. If not specified,
               or if `max_turn` is not set, `max_turn` defaults to 5.
+              The `model_name` inside this config can be either a full model path or a
+              short model name, e.g. `gemini-3-preview-flash`.
           inference_configs: The candidate to inference config map for the evaluation run.
               The key is the candidate name, and the value is the inference config.
               If provided, `agent_info` must be None. If omitted and `agent_info` is provided,
diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py
@@ -16011,6 +16011,10 @@ class EvalRunInferenceConfig(_common.BaseModel):
         description="""Configuration for user simulation in multi-turn agent scraping. If provided, and the dataset contains
       conversation plans, user simulation will be triggered.""",
     )
+    allow_cross_region_model: Optional[bool] = Field(
+        default=None,
+        description="""Opt-in flag to authorize cross-region routing for LLM models.""",
+    )
 
 
 class EvalRunInferenceConfigDict(TypedDict, total=False):
@@ -16029,6 +16033,9 @@ class EvalRunInferenceConfigDict(TypedDict, total=False):
     """Configuration for user simulation in multi-turn agent scraping. If provided, and the dataset contains
       conversation plans, user simulation will be triggered."""
 
+    allow_cross_region_model: Optional[bool]
+    """Opt-in flag to authorize cross-region routing for LLM models."""
+
 
 EvalRunInferenceConfigOrDict = Union[EvalRunInferenceConfig, EvalRunInferenceConfigDict]