Skip to content

Commit d4efe88

Browse files
jsondaicopybara-github
authored andcommitted
chore: GenAI Client(evals) - fix single turn metrics on Agent Scraping CUJ
PiperOrigin-RevId: 884665619
1 parent be869ca commit d4efe88

3 files changed

Lines changed: 417 additions & 202 deletions

File tree

tests/unit/vertexai/genai/test_evals.py

Lines changed: 149 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,6 +1313,31 @@ def test_run_inference_with_agent_engine_and_session_inputs_dict(
13131313
]
13141314
],
13151315
"response": ["agent response"],
1316+
"agent_data": [
1317+
{
1318+
"agents": None,
1319+
"turns": [
1320+
{
1321+
"events": [
1322+
{
1323+
"author": "model",
1324+
"content": {
1325+
"parts": [{"text": "intermediate1"}]
1326+
},
1327+
},
1328+
{
1329+
"author": "model",
1330+
"content": {
1331+
"parts": [{"text": "agent response"}]
1332+
},
1333+
},
1334+
],
1335+
"turn_id": "turn_0",
1336+
"turn_index": 0,
1337+
}
1338+
],
1339+
}
1340+
],
13161341
}
13171342
),
13181343
)
@@ -1392,6 +1417,31 @@ def test_run_inference_with_agent_engine_and_session_inputs_literal_string(
13921417
]
13931418
],
13941419
"response": ["agent response"],
1420+
"agent_data": [
1421+
{
1422+
"agents": None,
1423+
"turns": [
1424+
{
1425+
"events": [
1426+
{
1427+
"author": "model",
1428+
"content": {
1429+
"parts": [{"text": "intermediate1"}]
1430+
},
1431+
},
1432+
{
1433+
"author": "model",
1434+
"content": {
1435+
"parts": [{"text": "agent response"}]
1436+
},
1437+
},
1438+
],
1439+
"turn_id": "turn_0",
1440+
"turn_index": 0,
1441+
}
1442+
],
1443+
}
1444+
],
13951445
}
13961446
),
13971447
)
@@ -1571,6 +1621,72 @@ def run_async_side_effect(*args, **kwargs):
15711621
],
15721622
],
15731623
"response": ["agent response", "agent response 2"],
1624+
"agent_data": [
1625+
{
1626+
"agents": {
1627+
"mock_agent": {
1628+
"agent_id": "mock_agent",
1629+
"agent_resource_name": None,
1630+
"agent_type": "Mock",
1631+
"instruction": "mock instruction",
1632+
"description": "mock description",
1633+
"tools": [],
1634+
}
1635+
},
1636+
"turns": [
1637+
{
1638+
"events": [
1639+
{
1640+
"author": "model",
1641+
"content": {
1642+
"parts": [{"text": "intermediate1"}]
1643+
},
1644+
},
1645+
{
1646+
"author": "model",
1647+
"content": {
1648+
"parts": [{"text": "agent response"}]
1649+
},
1650+
},
1651+
],
1652+
"turn_id": "turn_0",
1653+
"turn_index": 0,
1654+
}
1655+
],
1656+
},
1657+
{
1658+
"agents": {
1659+
"mock_agent": {
1660+
"agent_id": "mock_agent",
1661+
"agent_resource_name": None,
1662+
"agent_type": "Mock",
1663+
"instruction": "mock instruction",
1664+
"description": "mock description",
1665+
"tools": [],
1666+
}
1667+
},
1668+
"turns": [
1669+
{
1670+
"events": [
1671+
{
1672+
"author": "model",
1673+
"content": {
1674+
"parts": [{"text": "intermediate2"}]
1675+
},
1676+
},
1677+
{
1678+
"author": "model",
1679+
"content": {
1680+
"parts": [{"text": "agent response 2"}]
1681+
},
1682+
},
1683+
],
1684+
"turn_id": "turn_0",
1685+
"turn_index": 0,
1686+
}
1687+
],
1688+
},
1689+
],
15741690
}
15751691
)
15761692
pd.testing.assert_frame_equal(
@@ -1952,6 +2068,31 @@ def test_run_agent_internal_success(self, mock_run_agent):
19522068
]
19532069
],
19542070
"response": ["final response"],
2071+
"agent_data": [
2072+
{
2073+
"agents": None,
2074+
"turns": [
2075+
{
2076+
"events": [
2077+
{
2078+
"author": "model",
2079+
"content": {
2080+
"parts": [{"text": "intermediate1"}]
2081+
},
2082+
},
2083+
{
2084+
"author": "model",
2085+
"content": {
2086+
"parts": [{"text": "final response"}]
2087+
},
2088+
},
2089+
],
2090+
"turn_id": "turn_0",
2091+
"turn_index": 0,
2092+
}
2093+
],
2094+
}
2095+
],
19552096
}
19562097
)
19572098
pd.testing.assert_frame_equal(result_df, expected_df)
@@ -2144,24 +2285,24 @@ def test_run_agent_internal_malformed_event(self, mock_run_agent):
21442285
assert not result_df["intermediate_events"][0]
21452286

21462287

2147-
class TestIsMultiTurnAgentRun:
2148-
"""Unit tests for the _is_multi_turn_agent_run function."""
2288+
class TestIsMultiTurnAgentSimulation:
2289+
"""Unit tests for the _is_multi_turn_agent_simulation function."""
21492290

2150-
def test_is_multi_turn_agent_run_with_config(self):
2291+
def test_is_multi_turn_agent_simulation_with_config(self):
21512292
config = vertexai_genai_types.evals.UserSimulatorConfig(model_name="gemini-pro")
2152-
assert _evals_common._is_multi_turn_agent_run(
2293+
assert _evals_common._is_multi_turn_agent_simulation(
21532294
user_simulator_config=config, prompt_dataset=pd.DataFrame()
21542295
)
21552296

2156-
def test_is_multi_turn_agent_run_with_conversation_plan(self):
2297+
def test_is_multi_turn_agent_simulation_with_conversation_plan(self):
21572298
prompt_dataset = pd.DataFrame({"conversation_plan": ["plan"]})
2158-
assert _evals_common._is_multi_turn_agent_run(
2299+
assert _evals_common._is_multi_turn_agent_simulation(
21592300
user_simulator_config=None, prompt_dataset=prompt_dataset
21602301
)
21612302

2162-
def test_is_multi_turn_agent_run_false(self):
2303+
def test_is_multi_turn_agent_simulation_false(self):
21632304
prompt_dataset = pd.DataFrame({"prompt": ["prompt"]})
2164-
assert not _evals_common._is_multi_turn_agent_run(
2305+
assert not _evals_common._is_multi_turn_agent_simulation(
21652306
user_simulator_config=None, prompt_dataset=prompt_dataset
21662307
)
21672308

0 commit comments

Comments
 (0)