Skip to content

Commit c4900cc

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals): Route gemini-3 model traffic to global region in local agent scraping
PiperOrigin-RevId: 893194564
1 parent 0e5037d commit c4900cc

4 files changed

Lines changed: 165 additions & 23 deletions

File tree

tests/unit/vertexai/genai/test_evals.py

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
import pandas as pd
4545
import pytest
4646

47-
4847
_TEST_PROJECT = "test-project"
4948
_TEST_LOCATION = "us-central1"
5049

@@ -2079,6 +2078,99 @@ def test_has_tool_call_with_agent_event(self):
20792078

20802079

20812080
@pytest.mark.usefixtures("google_auth_mock")
2081+
class TestRunAgent:
2082+
"""Unit tests for the _run_agent function."""
2083+
2084+
@mock.patch.object(_evals_common, "_execute_inference_concurrently")
2085+
def test_run_agent_rewrites_gemini_3_model_name(
2086+
self, mock_execute_inference_concurrently, mock_api_client_fixture
2087+
):
2088+
mock_execute_inference_concurrently.return_value = []
2089+
user_simulator_config = vertexai_genai_types.evals.UserSimulatorConfig(
2090+
model_name="gemini-3-preview"
2091+
)
2092+
prompt_dataset = pd.DataFrame({"prompt": ["prompt1"]})
2093+
with mock.patch.dict(os.environ, clear=True):
2094+
os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1"
2095+
2096+
def mock_execute(*args, **kwargs):
2097+
assert os.environ["GOOGLE_CLOUD_LOCATION"] == "global"
2098+
return []
2099+
2100+
mock_execute_inference_concurrently.side_effect = mock_execute
2101+
2102+
_evals_common._run_agent(
2103+
api_client=mock_api_client_fixture,
2104+
agent_engine=mock.Mock(),
2105+
agent=None,
2106+
prompt_dataset=prompt_dataset,
2107+
user_simulator_config=user_simulator_config,
2108+
allow_cross_region_model=True,
2109+
)
2110+
2111+
assert (
2112+
user_simulator_config.model_name
2113+
== f"projects/{mock_api_client_fixture.project}/locations/global/publishers/google/models/gemini-3-preview"
2114+
)
2115+
assert os.environ.get("GOOGLE_CLOUD_LOCATION") == "us-central1"
2116+
2117+
@mock.patch.object(_evals_common, "_execute_inference_concurrently")
2118+
def test_run_agent_raises_error_if_gemini_3_and_allow_cross_region_model_false(
2119+
self, mock_execute_inference_concurrently, mock_api_client_fixture
2120+
):
2121+
user_simulator_config = vertexai_genai_types.evals.UserSimulatorConfig(
2122+
model_name="gemini-3-preview"
2123+
)
2124+
prompt_dataset = pd.DataFrame({"prompt": ["prompt1"]})
2125+
with mock.patch.dict(os.environ, clear=True):
2126+
os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1"
2127+
2128+
with pytest.raises(
2129+
ValueError,
2130+
match="The model 'gemini-3-preview' is currently only available in the 'global' region.",
2131+
):
2132+
_evals_common._run_agent(
2133+
api_client=mock_api_client_fixture,
2134+
agent_engine=mock.Mock(),
2135+
agent=None,
2136+
prompt_dataset=prompt_dataset,
2137+
user_simulator_config=user_simulator_config,
2138+
allow_cross_region_model=False,
2139+
)
2140+
2141+
@mock.patch.object(_evals_common, "_execute_inference_concurrently")
2142+
def test_run_agent_rewrites_gemini_3_model_name_empty_env(
2143+
self, mock_execute_inference_concurrently, mock_api_client_fixture
2144+
):
2145+
mock_execute_inference_concurrently.return_value = []
2146+
user_simulator_config = vertexai_genai_types.evals.UserSimulatorConfig(
2147+
model_name="gemini-3-preview"
2148+
)
2149+
prompt_dataset = pd.DataFrame({"prompt": ["prompt1"]})
2150+
with mock.patch.dict(os.environ, clear=True):
2151+
2152+
def mock_execute(*args, **kwargs):
2153+
assert os.environ["GOOGLE_CLOUD_LOCATION"] == "global"
2154+
return []
2155+
2156+
mock_execute_inference_concurrently.side_effect = mock_execute
2157+
2158+
_evals_common._run_agent(
2159+
api_client=mock_api_client_fixture,
2160+
agent_engine=mock.Mock(),
2161+
agent=None,
2162+
prompt_dataset=prompt_dataset,
2163+
user_simulator_config=user_simulator_config,
2164+
allow_cross_region_model=True,
2165+
)
2166+
2167+
assert (
2168+
user_simulator_config.model_name
2169+
== f"projects/{mock_api_client_fixture.project}/locations/global/publishers/google/models/gemini-3-preview"
2170+
)
2171+
assert "GOOGLE_CLOUD_LOCATION" not in os.environ
2172+
2173+
20822174
class TestRunAgentInternal:
20832175
"""Unit tests for the _run_agent_internal function."""
20842176

vertexai/_genai/_evals_common.py

Lines changed: 59 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,6 +1143,7 @@ def _execute_inference(
11431143
prompt_template: Optional[Union[str, types.PromptTemplateOrDict]] = None,
11441144
location: Optional[str] = None,
11451145
user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None,
1146+
allow_cross_region_model: bool = False,
11461147
) -> pd.DataFrame:
11471148
"""Executes inference on a given dataset using the specified model.
11481149
@@ -1250,6 +1251,7 @@ def _execute_inference(
12501251
agent=agent,
12511252
prompt_dataset=prompt_dataset,
12521253
user_simulator_config=user_simulator_config,
1254+
allow_cross_region_model=allow_cross_region_model,
12531255
)
12541256
end_time = time.time()
12551257
logger.info("Agent Run completed in %.2f seconds.", end_time - start_time)
@@ -1823,6 +1825,7 @@ def _run_agent_internal(
18231825
agent: Optional[LlmAgent],
18241826
prompt_dataset: pd.DataFrame,
18251827
user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None,
1828+
allow_cross_region_model: bool = False,
18261829
) -> pd.DataFrame:
18271830
"""Runs an agent."""
18281831
raw_responses = _run_agent(
@@ -1831,6 +1834,7 @@ def _run_agent_internal(
18311834
agent=agent,
18321835
prompt_dataset=prompt_dataset,
18331836
user_simulator_config=user_simulator_config,
1837+
allow_cross_region_model=allow_cross_region_model,
18341838
)
18351839
processed_intermediate_events = []
18361840
processed_responses = []
@@ -1872,6 +1876,7 @@ def _run_agent(
18721876
agent: Optional[LlmAgent],
18731877
prompt_dataset: pd.DataFrame,
18741878
user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None,
1879+
allow_cross_region_model: bool = False,
18751880
) -> list[
18761881
Union[
18771882
list[dict[str, Any]],
@@ -1880,28 +1885,60 @@ def _run_agent(
18801885
]
18811886
]:
18821887
"""Internal helper to run inference using Gemini model with concurrency."""
1883-
if agent_engine:
1884-
return _execute_inference_concurrently(
1885-
api_client=api_client,
1886-
agent_engine=agent_engine,
1887-
prompt_dataset=prompt_dataset,
1888-
progress_desc="Agent Run",
1889-
gemini_config=None,
1890-
user_simulator_config=None,
1891-
inference_fn=_execute_agent_run_with_retry,
1892-
)
1893-
elif agent:
1894-
return _execute_inference_concurrently(
1895-
api_client=api_client,
1896-
agent=agent,
1897-
prompt_dataset=prompt_dataset,
1898-
progress_desc="Local Agent Run",
1899-
gemini_config=None,
1900-
user_simulator_config=user_simulator_config,
1901-
inference_fn=_execute_local_agent_run_with_retry,
1902-
)
1903-
else:
1904-
raise ValueError("Neither agent_engine nor agent is provided.")
1888+
original_location = os.environ.get("GOOGLE_CLOUD_LOCATION")
1889+
location_overridden = False
1890+
1891+
if user_simulator_config and user_simulator_config.model_name:
1892+
model_name = user_simulator_config.model_name
1893+
if model_name.startswith("gemini-3") and "/" not in model_name:
1894+
current_location = original_location or api_client.location or "us-central1"
1895+
if current_location != "global" and not allow_cross_region_model:
1896+
raise ValueError(
1897+
f"The model '{model_name}' is currently only available in the"
1898+
" 'global' region. Because this request originated in"
1899+
f" '{current_location}', you must explicitly set "
1900+
"allow_cross_region_model=True to allow your data to be routed outside"
1901+
" of your request's region."
1902+
)
1903+
1904+
logger.warning(
1905+
"Model %s is only available in the global region. Routing to global.",
1906+
model_name,
1907+
)
1908+
user_simulator_config.model_name = f"projects/{api_client.project}/locations/global/publishers/google/models/{model_name}"
1909+
if original_location != "global":
1910+
os.environ["GOOGLE_CLOUD_LOCATION"] = "global"
1911+
location_overridden = True
1912+
1913+
try:
1914+
if agent_engine:
1915+
return _execute_inference_concurrently(
1916+
api_client=api_client,
1917+
agent_engine=agent_engine,
1918+
prompt_dataset=prompt_dataset,
1919+
progress_desc="Agent Run",
1920+
gemini_config=None,
1921+
user_simulator_config=None,
1922+
inference_fn=_execute_agent_run_with_retry,
1923+
)
1924+
elif agent:
1925+
return _execute_inference_concurrently(
1926+
api_client=api_client,
1927+
agent=agent,
1928+
prompt_dataset=prompt_dataset,
1929+
progress_desc="Local Agent Run",
1930+
gemini_config=None,
1931+
user_simulator_config=user_simulator_config,
1932+
inference_fn=_execute_local_agent_run_with_retry,
1933+
)
1934+
else:
1935+
raise ValueError("Neither agent_engine nor agent is provided.")
1936+
finally:
1937+
if location_overridden:
1938+
if original_location is None:
1939+
del os.environ["GOOGLE_CLOUD_LOCATION"]
1940+
else:
1941+
os.environ["GOOGLE_CLOUD_LOCATION"] = original_location
19051942

19061943

19071944
def _execute_agent_run_with_retry(

vertexai/_genai/evals.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1736,6 +1736,7 @@ def run_inference(
17361736
- dest: The destination path for storage of the inference results.
17371737
- prompt_template: The template string to use for constructing prompts.
17381738
- generate_content_config: The config for the Gemini generate content call.
1739+
- allow_cross_region_model: Opt-in flag to authorize cross-region routing for LLM models.
17391740
17401741
Returns:
17411742
The evaluation dataset.
@@ -1771,6 +1772,7 @@ def run_inference(
17711772
location=location,
17721773
config=config.generate_content_config,
17731774
user_simulator_config=getattr(config, "user_simulator_config", None),
1775+
allow_cross_region_model=getattr(config, "allow_cross_region_model", False),
17741776
)
17751777

17761778
def evaluate(
@@ -2216,6 +2218,8 @@ def create_evaluation_run(
22162218
If `agent_info` is provided without `inference_configs`, this config is used
22172219
to automatically construct the inference configuration. If not specified,
22182220
or if `max_turn` is not set, `max_turn` defaults to 5.
2221+
The `model_name` inside this config can be either a full model path or a
2222+
short model name, e.g. `gemini-3-preview-flash`.
22192223
inference_configs: The candidate to inference config map for the evaluation run.
22202224
The key is the candidate name, and the value is the inference config.
22212225
If provided, `agent_info` must be None. If omitted and `agent_info` is provided,
@@ -3486,6 +3490,8 @@ async def create_evaluation_run(
34863490
If `agent_info` is provided without `inference_configs`, this config is used
34873491
to automatically construct the inference configuration. If not specified,
34883492
or if `max_turn` is not set, `max_turn` defaults to 5.
3493+
The `model_name` inside this config can be either a full model path or a
3494+
short model name, e.g. `gemini-3-preview-flash`.
34893495
inference_configs: The candidate to inference config map for the evaluation run.
34903496
The key is the candidate name, and the value is the inference config.
34913497
If provided, `agent_info` must be None. If omitted and `agent_info` is provided,

vertexai/_genai/types/common.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16011,6 +16011,10 @@ class EvalRunInferenceConfig(_common.BaseModel):
1601116011
description="""Configuration for user simulation in multi-turn agent scraping. If provided, and the dataset contains
1601216012
conversation plans, user simulation will be triggered.""",
1601316013
)
16014+
allow_cross_region_model: Optional[bool] = Field(
16015+
default=None,
16016+
description="""Opt-in flag to authorize cross-region routing for LLM models.""",
16017+
)
1601416018

1601516019

1601616020
class EvalRunInferenceConfigDict(TypedDict, total=False):
@@ -16029,6 +16033,9 @@ class EvalRunInferenceConfigDict(TypedDict, total=False):
1602916033
"""Configuration for user simulation in multi-turn agent scraping. If provided, and the dataset contains
1603016034
conversation plans, user simulation will be triggered."""
1603116035

16036+
allow_cross_region_model: Optional[bool]
16037+
"""Opt-in flag to authorize cross-region routing for LLM models."""
16038+
1603216039

1603316040
EvalRunInferenceConfigOrDict = Union[EvalRunInferenceConfig, EvalRunInferenceConfigDict]
1603416041

0 commit comments

Comments
 (0)