From bbb449c8db03a1aff22009d122cae1122bbf80d7 Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Tue, 17 Mar 2026 12:45:50 -0700 Subject: [PATCH] chore: GenAI Client(evals) - refactor evaluation data handling for Agent-based evals PiperOrigin-RevId: 885160026 --- tests/unit/vertexai/genai/test_evals.py | 296 +++++++++++++++---- vertexai/_genai/_evals_common.py | 234 +++++++++------ vertexai/_genai/_evals_data_converters.py | 4 +- vertexai/_genai/_evals_metric_handlers.py | 327 ++++++++++---------- vertexai/_genai/types/evals.py | 344 +++++++++------------- 5 files changed, 710 insertions(+), 495 deletions(-) diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index 2b534d4740..517a0ab33a 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -258,7 +258,10 @@ def test_eval_evaluate_with_agent_info(self, mock_execute_evaluation): dataset = vertexai_genai_types.EvaluationDataset( eval_dataset_df=pd.DataFrame([{"prompt": "p1", "response": "r1"}]) ) - agent_info = {"agent1": {"name": "agent1", "instruction": "instruction1"}} + agent_info = { + "name": "agent_system", + "agents": {"agent1": {"agent_id": "agent1", "instruction": "instruction1"}}, + } self.client.evals.evaluate( dataset=dataset, metrics=[vertexai_genai_types.Metric(name="exact_match")], @@ -1313,6 +1316,31 @@ def test_run_inference_with_agent_engine_and_session_inputs_dict( ] ], "response": ["agent response"], + "agent_data": [ + { + "agents": None, + "turns": [ + { + "events": [ + { + "author": "model", + "content": { + "parts": [{"text": "intermediate1"}] + }, + }, + { + "author": "model", + "content": { + "parts": [{"text": "agent response"}] + }, + }, + ], + "turn_id": "turn_0", + "turn_index": 0, + } + ], + } + ], } ), ) @@ -1392,6 +1420,31 @@ def test_run_inference_with_agent_engine_and_session_inputs_literal_string( ] ], "response": ["agent response"], + "agent_data": [ + { + "agents": None, + "turns": [ + { + "events": [ + { + "author": "model", + "content": { + "parts": [{"text": "intermediate1"}] + }, + }, + { + "author": "model", + "content": { + "parts": [{"text": "agent response"}] + }, + }, + ], + "turn_id": "turn_0", + "turn_index": 0, + } + ], + } + ], } ), ) @@ -1571,6 +1624,72 @@ def run_async_side_effect(*args, **kwargs): ], ], "response": ["agent response", "agent response 2"], + "agent_data": [ + { + "agents": { + "mock_agent": { + "agent_id": "mock_agent", + "agent_type": "Mock", + "instruction": "mock instruction", + "description": "mock description", + "tools": [], + "sub_agents": [], + } + }, + "turns": [ + { + "events": [ + { + "author": "model", + "content": { + "parts": [{"text": "intermediate1"}] + }, + }, + { + "author": "model", + "content": { + "parts": [{"text": "agent response"}] + }, + }, + ], + "turn_id": "turn_0", + "turn_index": 0, + } + ], + }, + { + "agents": { + "mock_agent": { + "agent_id": "mock_agent", + "agent_type": "Mock", + "instruction": "mock instruction", + "description": "mock description", + "tools": [], + "sub_agents": [], + } + }, + "turns": [ + { + "events": [ + { + "author": "model", + "content": { + "parts": [{"text": "intermediate2"}] + }, + }, + { + "author": "model", + "content": { + "parts": [{"text": "agent response 2"}] + }, + }, + ], + "turn_id": "turn_0", + "turn_index": 0, + } + ], + }, + ], } ) pd.testing.assert_frame_equal( @@ -1952,6 +2071,31 @@ def test_run_agent_internal_success(self, mock_run_agent): ] ], "response": ["final response"], + "agent_data": [ + { + "agents": None, + "turns": [ + { + "events": [ + { + "author": "model", + "content": { + "parts": [{"text": "intermediate1"}] + }, + }, + { + "author": "model", + "content": { + "parts": [{"text": "final response"}] + }, + }, + ], + "turn_id": "turn_0", + "turn_index": 0, + } + ], + } + ], } ) pd.testing.assert_frame_equal(result_df, expected_df) @@ -2144,24 +2288,24 @@ def test_run_agent_internal_malformed_event(self, mock_run_agent): assert not result_df["intermediate_events"][0] -class TestIsMultiTurnAgentRun: - """Unit tests for the _is_multi_turn_agent_run function.""" +class TestIsMultiTurnAgentSimulation: + """Unit tests for the _is_multi_turn_agent_simulation function.""" - def test_is_multi_turn_agent_run_with_config(self): + def test_is_multi_turn_agent_simulation_with_config(self): config = vertexai_genai_types.evals.UserSimulatorConfig(model_name="gemini-pro") - assert _evals_common._is_multi_turn_agent_run( + assert _evals_common._is_multi_turn_agent_simulation( user_simulator_config=config, prompt_dataset=pd.DataFrame() ) - def test_is_multi_turn_agent_run_with_conversation_plan(self): + def test_is_multi_turn_agent_simulation_with_conversation_plan(self): prompt_dataset = pd.DataFrame({"conversation_plan": ["plan"]}) - assert _evals_common._is_multi_turn_agent_run( + assert _evals_common._is_multi_turn_agent_simulation( user_simulator_config=None, prompt_dataset=prompt_dataset ) - def test_is_multi_turn_agent_run_false(self): + def test_is_multi_turn_agent_simulation_false(self): prompt_dataset = pd.DataFrame({"prompt": ["prompt"]}) - assert not _evals_common._is_multi_turn_agent_run( + assert not _evals_common._is_multi_turn_agent_simulation( user_simulator_config=None, prompt_dataset=prompt_dataset ) @@ -3327,15 +3471,21 @@ def test_agent_info_creation(self): ] ) agent_info = vertexai_genai_types.evals.AgentInfo( - name="agent1", - instruction="instruction1", - description="description1", - tool_declarations=[tool], + name="agent_system", + agents={ + "agent1": vertexai_genai_types.evals.AgentConfig( + agent_id="agent1", + instruction="instruction1", + description="description1", + tools=[tool], + ) + }, ) - assert agent_info.name == "agent1" - assert agent_info.instruction == "instruction1" - assert agent_info.description == "description1" - assert agent_info.tool_declarations == [tool] + assert agent_info.name == "agent_system" + assert "agent1" in agent_info.agents + assert agent_info.agents["agent1"].instruction == "instruction1" + assert agent_info.agents["agent1"].description == "description1" + assert agent_info.agents["agent1"].tools == [tool] @mock.patch.object(genai_types.FunctionDeclaration, "from_callable_with_api_option") def test_load_from_agent(self, mock_from_callable): @@ -3351,6 +3501,7 @@ def my_search_tool(query: str) -> str: mock_agent.instruction = "mock instruction" mock_agent.description = "mock description" mock_agent.tools = [my_search_tool] + mock_agent.sub_agents = [] agent_info = vertexai_genai_types.evals.AgentInfo.load_from_agent( agent=mock_agent, @@ -3358,15 +3509,15 @@ def my_search_tool(query: str) -> str: ) assert agent_info.name == "mock_agent" - assert agent_info.instruction == "mock instruction" - assert agent_info.description == "mock description" + assert agent_info.agents["mock_agent"].instruction == "mock instruction" + assert agent_info.agents["mock_agent"].description == "mock description" assert ( agent_info.agent_resource_name == "projects/123/locations/abc/reasoningEngines/456" ) - assert len(agent_info.tool_declarations) == 1 - assert isinstance(agent_info.tool_declarations[0], genai_types.Tool) - assert agent_info.tool_declarations[0].function_declarations == [ + assert len(agent_info.agents["mock_agent"].tools) == 1 + assert isinstance(agent_info.agents["mock_agent"].tools[0], genai_types.Tool) + assert agent_info.agents["mock_agent"].tools[0].function_declarations == [ mock_function_declaration ] mock_from_callable.assert_called_once_with(callable=my_search_tool) @@ -3482,7 +3633,9 @@ def test_no_conflict_with_inference_configs(self): dataset = vertexai_genai_types.EvaluationDataset( eval_dataset_df=pd.DataFrame([{"agent_data": {"turns": []}}]) ) - inference_configs = {"cand1": {"agent_configs": {"agent1": {"name": "agent1"}}}} + inference_configs = { + "cand1": {"agent_configs": {"agent1": {"agent_id": "agent1"}}} + } _evals_utils._validate_dataset_agent_data(dataset, inference_configs) def test_no_conflict_if_inference_configs_has_no_agent_configs(self): @@ -3535,9 +3688,14 @@ def test_eval_case_with_agent_eval_fields(self): ] ) agent_info = vertexai_genai_types.evals.AgentInfo( - name="agent1", - instruction="instruction1", - tool_declarations=[tool], + name="agent_system", + agents={ + "agent1": vertexai_genai_types.evals.AgentConfig( + agent_id="agent1", + instruction="instruction1", + tools=[tool], + ) + }, ) intermediate_events = [ vertexai_genai_types.evals.Event( @@ -4407,9 +4565,14 @@ def test_eval_case_to_agent_data(self): ] ) agent_info = vertexai_genai_types.evals.AgentInfo( - name="agent1", - instruction="instruction1", - tool_declarations=[tool], + name="agent_system", + agents={ + "agent1": vertexai_genai_types.evals.AgentConfig( + agent_id="agent1", + instruction="instruction1", + tools=[tool], + ) + }, ) intermediate_events = [ vertexai_genai_types.evals.Event( @@ -4417,6 +4580,7 @@ def test_eval_case_to_agent_data(self): content=genai_types.Content( parts=[genai_types.Part(text="intermediate event")] ), + author="agent1", ) ] eval_case = vertexai_genai_types.EvalCase( @@ -4432,13 +4596,19 @@ def test_eval_case_to_agent_data(self): agent_data = ( _evals_metric_handlers.PredefinedMetricHandler._eval_case_to_agent_data( - eval_case + eval_case, + eval_case.prompt, + eval_case.responses[0].response, ) ) - assert agent_data.agent_config.developer_instruction.text == "instruction1" - assert agent_data.agent_config.legacy_tools.tool == [tool] - assert agent_data.events.event[0].parts[0].text == "intermediate event" + assert "agent1" in agent_data.agents + assert agent_data.agents["agent1"].instruction == "instruction1" + assert agent_data.agents["agent1"].tools == [tool] + assert len(agent_data.turns[0].events) == 3 + assert ( + agent_data.turns[0].events[1].content.parts[0].text == "intermediate event" + ) def test_eval_case_to_agent_data_events_only(self): intermediate_events = [ @@ -4466,8 +4636,10 @@ def test_eval_case_to_agent_data_events_only(self): ) ) - assert agent_data.agent_config is None - assert agent_data.events.event[0].parts[0].text == "intermediate event" + assert agent_data.agents is None + assert ( + agent_data.turns[0].events[0].content.parts[0].text == "intermediate event" + ) def test_eval_case_to_agent_data_empty_event_content(self): intermediate_events = [ @@ -4493,14 +4665,19 @@ def test_eval_case_to_agent_data_empty_event_content(self): ) ) - assert agent_data.agent_config is None - assert not agent_data.events.event + assert agent_data.agents is None + assert agent_data.turns[0].events[0].content is None def test_eval_case_to_agent_data_empty_intermediate_events_list(self): agent_info = vertexai_genai_types.evals.AgentInfo( - name="agent1", - instruction="instruction1", - tool_declarations=[], + name="agent_system", + agents={ + "agent1": vertexai_genai_types.evals.AgentConfig( + agent_id="agent1", + instruction="instruction1", + tools=[], + ) + }, ) eval_case = vertexai_genai_types.EvalCase( @@ -4519,13 +4696,18 @@ def test_eval_case_to_agent_data_empty_intermediate_events_list(self): ) ) - assert not agent_data.events.event + assert agent_data.turns is None def test_eval_case_to_agent_data_agent_info_empty_tools(self): agent_info = vertexai_genai_types.evals.AgentInfo( - name="agent1", - instruction="instruction1", - tool_declarations=[], + name="agent_system", + agents={ + "agent1": vertexai_genai_types.evals.AgentConfig( + agent_id="agent1", + instruction="instruction1", + tools=[], + ) + }, ) eval_case = vertexai_genai_types.EvalCase( prompt=genai_types.Content(parts=[genai_types.Part(text="Hello")]), @@ -4544,8 +4726,8 @@ def test_eval_case_to_agent_data_agent_info_empty_tools(self): ) ) - assert agent_data.agent_config.developer_instruction.text == "instruction1" - assert not agent_data.agent_config.legacy_tools.tool + assert agent_data.agents["agent1"].instruction == "instruction1" + assert not agent_data.agents["agent1"].tools def test_eval_case_to_agent_data_agent_info_empty(self): intermediate_events = [ @@ -4573,7 +4755,7 @@ def test_eval_case_to_agent_data_agent_info_empty(self): ) ) - assert agent_data.agent_config is None + assert agent_data.agents is None @mock.patch.object(_evals_metric_handlers.logger, "warning") def test_tool_use_quality_metric_no_tool_call_logs_warning( @@ -5102,10 +5284,15 @@ def test_execute_evaluation_with_agent_info( ] } agent_info = { - "name": "agent1", - "instruction": "instruction1", - "description": "description1", - "tool_declarations": [tool], + "name": "agent_system", + "agents": { + "agent1": { + "agent_id": "agent1", + "instruction": "instruction1", + "description": "description1", + "tools": [tool], + } + }, } result = _evals_common._execute_evaluation( @@ -5117,9 +5304,10 @@ def test_execute_evaluation_with_agent_info( assert isinstance(result, vertexai_genai_types.EvaluationResult) assert len(result.eval_case_results) == 1 - assert result.agent_info.name == "agent1" - assert result.agent_info.instruction == "instruction1" - assert result.agent_info.tool_declarations == [ + assert result.agent_info.name == "agent_system" + assert "agent1" in result.agent_info.agents + assert result.agent_info.agents["agent1"].instruction == "instruction1" + assert result.agent_info.agents["agent1"].tools == [ genai_types.Tool( function_declarations=[ genai_types.FunctionDeclaration( diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index 9fe93230eb..5cef72e641 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -346,14 +346,7 @@ def _resolve_inference_configs( if agent_info_pydantic and agent_info_pydantic.name: inference_configs = {} inference_configs[agent_info_pydantic.name] = ( - types.EvaluationRunInferenceConfig( - agent_config=types.EvaluationRunAgentConfig( - developer_instruction=genai_types.Content( - parts=[genai_types.Part(text=agent_info_pydantic.instruction)] - ), - tools=agent_info_pydantic.tool_declarations, - ) - ) + types.EvaluationRunInferenceConfig(agent_configs=agent_info_pydantic.agents) ) # Resolve prompt template data if inference_configs: @@ -1604,93 +1597,111 @@ def _get_session_inputs(row: pd.Series) -> types.evals.SessionInput: ) -def _is_multi_turn_agent_run( +def _is_multi_turn_agent_simulation( user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None, prompt_dataset: pd.DataFrame = None, ) -> bool: - """Checks if the agent run is multi-turn.""" + """Checks if the agent run is a multi-turn user simulation.""" return ( user_simulator_config is not None or "conversation_plan" in prompt_dataset.columns ) -def _run_agent_internal( - api_client: BaseApiClient, - agent_engine: Optional[Union[str, types.AgentEngine]], - agent: Optional[LlmAgent], - prompt_dataset: pd.DataFrame, - user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None, -) -> pd.DataFrame: - """Runs an agent.""" - raw_responses = _run_agent( - api_client=api_client, - agent_engine=agent_engine, - agent=agent, - prompt_dataset=prompt_dataset, - user_simulator_config=user_simulator_config, - ) - processed_intermediate_events = [] - processed_responses = [] - processed_agent_data = [] - agent_data_agents = None - if agent: - agent_data_agents = types.evals.AgentData._get_agents_map(agent) +def _process_multi_turn_agent_response( + resp_item: Any, + agent_data_agents: Optional[dict[str, Any]], +) -> Optional[Union[str, dict[str, Any]]]: + """Processes a multi-turn agent response.""" + if isinstance(resp_item, dict) and "error" in resp_item: + return json.dumps(resp_item) + return types.evals.AgentData( + turns=resp_item, + agents=agent_data_agents, + ).model_dump(exclude_unset=True) + + +def _process_single_turn_agent_response( + resp_item: Any, + agent_data_agents: Optional[dict[str, Any]], +) -> tuple[ + Optional[Union[str, dict[str, Any]]], + list[dict[str, Any]], + Optional[Union[str, dict[str, Any]]], +]: + """Processes a single-turn agent response.""" + intermediate_events_row: list[dict[str, Any]] = [] + response_row: Optional[Union[str, dict[str, Any]]] = None + agent_data_row: Optional[Union[str, dict[str, Any]]] = None - for resp_item in raw_responses: - intermediate_events_row: list[dict[str, Any]] = [] - response_row: Optional[Union[str, dict[str, Any]]] = None - agent_data_row: Optional[Union[str, dict[str, Any]]] = None + if isinstance(resp_item, list): + try: + response_row = resp_item[-1]["content"]["parts"][0]["text"] + for intermediate_event in resp_item[:-1]: + intermediate_events_row.append( + { + "event_id": intermediate_event.get("id"), + "content": intermediate_event.get("content"), + "creation_timestamp": intermediate_event.get("timestamp"), + "author": intermediate_event.get("author"), + } + ) + # Construct AgentData natively for single-turn runs + agent_events = [] + for event_dict in resp_item: + content_dict = event_dict.get("content") + content_obj = None + if content_dict: + content_obj = genai_types.Content.model_validate(content_dict) + + agent_events.append( + types.evals.AgentEvent( + author=event_dict.get("author", "model"), + content=content_obj, + ) + ) - if _is_multi_turn_agent_run(user_simulator_config, prompt_dataset): - if isinstance(resp_item, dict) and "error" in resp_item: - agent_data_row = json.dumps(resp_item) - else: - # TODO: Migrate single turn agent run result to AgentData. - agent_data_row = types.evals.AgentData( - turns=resp_item, - agents=agent_data_agents, - ).model_dump() + turn = types.evals.ConversationTurn( + turn_index=0, + turn_id="turn_0", + events=agent_events, + ) + agent_data_row = types.evals.AgentData( + turns=[turn], + agents=agent_data_agents, + ).model_dump(exclude_unset=True) + except Exception as e: # pylint: disable=broad-exception-caught + error_payload = { + "error": ( + f"Failed to parse agent run response {str(resp_item)} to " + f"agent data: {e}" + ), + } + response_row = json.dumps(error_payload) + agent_data_row = json.dumps(error_payload) + elif isinstance(resp_item, dict) and "error" in resp_item: + response_row = json.dumps(resp_item) + else: + error_payload = { + "error": "Unexpected response type from agent run", + "response_type": str(type(resp_item)), + "details": str(resp_item), + } + response_row = json.dumps(error_payload) - else: - if isinstance(resp_item, list): - try: - response_row = resp_item[-1]["content"]["parts"][0]["text"] - for intermediate_event in resp_item[:-1]: - intermediate_events_row.append( - { - "event_id": intermediate_event.get("id"), - "content": intermediate_event.get("content"), - "creation_timestamp": intermediate_event.get( - "timestamp" - ), - "author": intermediate_event.get("author"), - } - ) - except Exception as e: # pylint: disable=broad-exception-caught - error_payload = { - "error": ( - f"Failed to parse agent run response {str(resp_item)} to " - f"agent data: {e}" - ), - } - response_row = json.dumps(error_payload) - elif isinstance(resp_item, dict) and "error" in resp_item: - response_row = json.dumps(resp_item) - else: - error_payload = { - "error": "Unexpected response type from agent run", - "response_type": str(type(resp_item)), - "details": str(resp_item), - } - response_row = json.dumps(error_payload) + return response_row, intermediate_events_row, agent_data_row - processed_intermediate_events.append(intermediate_events_row) - processed_responses.append(response_row) - processed_agent_data.append(agent_data_row) +def _create_agent_results_dataframe( + prompt_dataset: pd.DataFrame, + processed_responses: list[Any], + processed_intermediate_events: list[Any], + processed_agent_data: list[Any], + is_user_simulation: bool, +) -> pd.DataFrame: + """Creates a DataFrame from the processed agent responses.""" df_dict: dict[str, Any] = {} - if _is_multi_turn_agent_run(user_simulator_config, prompt_dataset): + if is_user_simulation: df_dict[AGENT_DATA] = processed_agent_data if len(processed_agent_data) != len(prompt_dataset): raise RuntimeError( @@ -1705,6 +1716,7 @@ def _run_agent_internal( else: df_dict[_evals_constant.INTERMEDIATE_EVENTS] = processed_intermediate_events df_dict[_evals_constant.RESPONSE] = processed_responses + df_dict[AGENT_DATA] = processed_agent_data if len(processed_responses) != len(prompt_dataset) or len( processed_responses ) != len(processed_intermediate_events): @@ -1730,6 +1742,55 @@ def _run_agent_internal( return results_df +def _run_agent_internal( + api_client: BaseApiClient, + agent_engine: Optional[Union[str, types.AgentEngine]], + agent: Optional[LlmAgent], + prompt_dataset: pd.DataFrame, + user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None, +) -> pd.DataFrame: + """Runs an agent.""" + raw_responses = _run_agent( + api_client=api_client, + agent_engine=agent_engine, + agent=agent, + prompt_dataset=prompt_dataset, + user_simulator_config=user_simulator_config, + ) + processed_intermediate_events = [] + processed_responses = [] + processed_agent_data = [] + agent_data_agents = None + if agent: + agent_data_agents = types.evals.AgentData.get_agents_map(agent) + + is_user_simulation = _is_multi_turn_agent_simulation( + user_simulator_config, prompt_dataset + ) + + for resp_item in raw_responses: + if is_user_simulation: + agent_data_row = _process_multi_turn_agent_response( + resp_item, agent_data_agents + ) + processed_agent_data.append(agent_data_row) + else: + response_row, intermediate_events_row, agent_data_row = ( + _process_single_turn_agent_response(resp_item, agent_data_agents) + ) + processed_responses.append(response_row) + processed_intermediate_events.append(intermediate_events_row) + processed_agent_data.append(agent_data_row) + + return _create_agent_results_dataframe( + prompt_dataset, + processed_responses, + processed_intermediate_events, + processed_agent_data, + is_user_simulation, + ) + + def _run_agent( api_client: BaseApiClient, agent_engine: Optional[Union[str, types.AgentEngine]], @@ -2163,12 +2224,17 @@ def _get_agent_info_from_inference_configs( else None ) instruction = di.parts[0].text if di and di.parts and di.parts[0].text else None + tools = agent_config.tools if agent_config and agent_config.tools else None + return types.evals.AgentInfo( name=candidate_names[0], - instruction=instruction, - tool_declarations=( - agent_config.tools if agent_config and agent_config.tools else None - ), + agents={ + "agent_0": types.evals.AgentConfig( + instruction=instruction, + tools=tools, + ) + }, + root_agent_id="agent_0", ) diff --git a/vertexai/_genai/_evals_data_converters.py b/vertexai/_genai/_evals_data_converters.py index 89b3cb9852..33f18b1519 100644 --- a/vertexai/_genai/_evals_data_converters.py +++ b/vertexai/_genai/_evals_data_converters.py @@ -810,6 +810,7 @@ def merge_evaluation_datasets( "conversation_history", "intermediate_events", "agent_data", + "agent_info", }, exclude_none=True, ) @@ -834,6 +835,7 @@ def merge_evaluation_datasets( "conversation_history", "intermediate_events", "agent_data", + "agent_info", }, exclude_none=True, ) @@ -865,7 +867,7 @@ def merge_evaluation_datasets( reference=base_eval_case.reference, system_instruction=base_eval_case.system_instruction, conversation_history=base_eval_case.conversation_history, - agent_info=agent_info, + agent_info=agent_info or base_eval_case.agent_info, agent_data=base_eval_case.agent_data, intermediate_events=base_eval_case.intermediate_events, **eval_case_custom_columns, diff --git a/vertexai/_genai/_evals_metric_handlers.py b/vertexai/_genai/_evals_metric_handlers.py index 47e543fa10..b7dc349e68 100644 --- a/vertexai/_genai/_evals_metric_handlers.py +++ b/vertexai/_genai/_evals_metric_handlers.py @@ -93,6 +93,72 @@ def _extract_text_from_content( return text_accumulator if any_text_part_found else None +def _get_prompt_from_eval_case( + eval_case: types.EvalCase, +) -> Optional[genai_types.Content]: + """Extracts prompt content from eval_case.prompt or starting_prompt.""" + if eval_case.prompt: + return eval_case.prompt + + user_scenario = getattr(eval_case, "user_scenario", None) + if user_scenario and user_scenario.starting_prompt: + return genai_types.Content( + parts=[genai_types.Part(text=user_scenario.starting_prompt)] + ) + + return None + + +def _get_response_from_eval_case( + eval_case: types.EvalCase, response_index: int, metric_name: str +) -> Optional[genai_types.Content]: + """Extracts response content from eval_case.responses.""" + response_content = None + if eval_case.responses and response_index < len(eval_case.responses): + response_content = eval_case.responses[response_index].response + + return response_content + + +def _value_to_content_list(value: Any) -> list[genai_types.Content]: + """Converts a value to a list of Content objects.""" + if isinstance(value, genai_types.Content): + return [value] + if isinstance(value, types.ResponseCandidate): + return [value.response] if value.response else [] + if isinstance(value, list) and value: + if isinstance(value[0], genai_types.Content): + return value + if isinstance(value[0], types.evals.Message): + history_texts = [] + for msg_obj in value: + msg_text = _extract_text_from_content(msg_obj.content) + if msg_text: + role = msg_obj.content.role or msg_obj.author or "user" + history_texts.append(f"{role}: {msg_text}") + return [ + genai_types.Content( + parts=[genai_types.Part(text="\n".join(history_texts))] + ) + ] + return [genai_types.Content(parts=[genai_types.Part(text=json.dumps(value))])] + if isinstance(value, dict): + return [genai_types.Content(parts=[genai_types.Part(text=json.dumps(value))])] + return [genai_types.Content(parts=[genai_types.Part(text=str(value))])] + + +def _get_autorater_config(metric: types.Metric) -> dict[str, Any]: + """Extracts autorater config settings from a metric.""" + autorater_config: dict[str, Any] = {} + if metric.judge_model: + autorater_config["autorater_model"] = metric.judge_model + if metric.judge_model_generation_config: + autorater_config["generation_config"] = metric.judge_model_generation_config + if metric.judge_model_sampling_count: + autorater_config["sampling_count"] = metric.judge_model_sampling_count + return autorater_config + + def _default_aggregate_scores( metric_name: str, eval_case_metric_results: list[types.EvalCaseMetricResult], @@ -213,17 +279,13 @@ def _build_request_payload( ) -> dict[str, Any]: """Builds the request parameters for evaluate instances.""" request_payload = {} - if response_index >= len(eval_case.responses): - raise IndexError( - f"response_index {response_index} out of bounds for eval_case with" - f" {len(eval_case.responses)} responses." - ) - if eval_case.responses is None: - raise ValueError( - f"No responses found for eval_case with ID {eval_case.eval_case_id}." - ) - current_response_candidate = eval_case.responses[response_index] - if _extract_text_from_content(current_response_candidate.response) is None: + + response_content = _get_response_from_eval_case( + eval_case, response_index, self.metric.name + ) + prediction_text = _extract_text_from_content(response_content) + + if prediction_text is None: raise ValueError( f"Response text missing for candidate {response_index} in eval_case" f" {eval_case.eval_case_id or 'Unknown ID'}." @@ -248,9 +310,7 @@ def _build_request_payload( }, "instances": [ { - "prediction": _extract_text_from_content( - current_response_candidate.response - ), + "prediction": prediction_text, "reference": _extract_text_from_content( eval_case.reference.response ), @@ -262,9 +322,7 @@ def _build_request_payload( "metric_spec": {}, "instances": [ { - "prediction": _extract_text_from_content( - current_response_candidate.response - ), + "prediction": prediction_text, "reference": _extract_text_from_content( eval_case.reference.response ), @@ -350,18 +408,13 @@ def _build_request_payload( if hasattr(self.metric, "target_language"): target_language = self.metric.target_language - if response_index >= len(eval_case.responses): - raise IndexError( - f"response_index {response_index} out of bounds for eval_case with" - f" {len(eval_case.responses)} responses." - ) + response_content = _get_response_from_eval_case( + eval_case, response_index, self.metric.name + ) + prediction_text = _extract_text_from_content(response_content) + prompt_text = _extract_text_from_content(_get_prompt_from_eval_case(eval_case)) - if eval_case.responses is None: - raise ValueError( - f"No responses found for eval_case with ID {eval_case.eval_case_id}." - ) - current_response_candidate = eval_case.responses[response_index] - if _extract_text_from_content(current_response_candidate.response) is None: + if prediction_text is None: raise ValueError( f"Response text missing for candidate {response_index} in eval_case" f" {eval_case.eval_case_id or 'Unknown ID'}." @@ -375,7 +428,7 @@ def _build_request_payload( "Reference text missing for eval_case" f" {eval_case.eval_case_id or 'Unknown ID'}." ) - if _extract_text_from_content(eval_case.prompt) is None: + if prompt_text is None: raise ValueError( "Prompt text (source for translation) missing for eval_case" f" {eval_case.eval_case_id or 'Unknown ID'}." @@ -388,11 +441,9 @@ def _build_request_payload( "target_language": target_language, }, "instance": { - "prediction": _extract_text_from_content( - current_response_candidate.response - ), + "prediction": prediction_text, "reference": _extract_text_from_content(eval_case.reference.response), - "source": _extract_text_from_content(eval_case.prompt), + "source": prompt_text, }, } return request_payload @@ -528,10 +579,11 @@ def _build_rubric_based_input( rubrics_list = [] parsed_rubrics = [types.evals.Rubric(**r) for r in rubrics_list] + extracted_prompt = _get_prompt_from_eval_case(eval_case) rubric_enhanced_contents = { "prompt": ( - [eval_case.prompt.model_dump(mode="json", exclude_none=True)] - if eval_case.prompt + [extracted_prompt.model_dump(mode="json", exclude_none=True)] + if extracted_prompt else None ), "response": [response_content.model_dump(mode="json", exclude_none=True)], @@ -561,8 +613,9 @@ def _build_pointwise_input( self, eval_case: types.EvalCase, response_content: genai_types.Content ) -> dict[str, Any]: """Builds the payload for a standard pointwise LLM metric.""" + extracted_prompt = _get_prompt_from_eval_case(eval_case) instance_data = { - "prompt": eval_case.prompt, + "prompt": extracted_prompt, "response": response_content, } template_obj = types.PromptTemplate(text=self.metric.prompt_template) @@ -573,46 +626,8 @@ def _build_pointwise_input( content_map_values = {} for key, value in instance_data.items(): - content_list_to_serialize = [] - if isinstance(value, genai_types.Content): - content_list_to_serialize = [value] - elif isinstance(value, types.ResponseCandidate): - if value.response: # pytype: disable=attribute-error - content_list_to_serialize = [value.response] - elif isinstance(value, list) and value: - if isinstance(value[0], genai_types.Content): - content_list_to_serialize = value - elif isinstance(value[0], types.evals.Message): - history_texts = [] - for msg_obj in value: - msg_text = _extract_text_from_content(msg_obj.content) - if msg_text: - role = msg_obj.content.role or msg_obj.author or "user" - history_texts.append(f"{role}: {msg_text}") - content_list_to_serialize = [ - genai_types.Content( - parts=[genai_types.Part(text="\n".join(history_texts))] - ) - ] - else: - content_list_to_serialize = [ - genai_types.Content( - parts=[genai_types.Part(text=json.dumps(value))] - ) - ] - elif isinstance(value, dict): - content_list_to_serialize = [ - genai_types.Content( - parts=[genai_types.Part(text=json.dumps(value))] - ) - ] - else: - content_list_to_serialize = [ - genai_types.Content(parts=[genai_types.Part(text=str(value))]) - ] - content_map_values[key] = types.ContentMapContents( - contents=content_list_to_serialize + contents=_value_to_content_list(value) ) instance_payload = types.PointwiseMetricInstance( @@ -638,15 +653,7 @@ def _build_pointwise_input( def _add_autorater_config(self, payload: dict[str, Any]) -> None: """Adds autorater config to the request payload if specified.""" - autorater_config: dict[str, Any] = {} - if self.metric.judge_model: - autorater_config["autorater_model"] = self.metric.judge_model - if self.metric.judge_model_generation_config: - autorater_config["generation_config"] = ( - self.metric.judge_model_generation_config - ) - if self.metric.judge_model_sampling_count: - autorater_config["sampling_count"] = self.metric.judge_model_sampling_count + autorater_config = _get_autorater_config(self.metric) if not autorater_config: return @@ -663,10 +670,10 @@ def _build_request_payload( self, eval_case: types.EvalCase, response_index: int ) -> dict[str, Any]: """Builds the request parameters for evaluate instances request.""" - if not eval_case.responses or response_index >= len(eval_case.responses): - raise IndexError(f"response_index {response_index} is out of bounds.") + response_content = _get_response_from_eval_case( + eval_case, response_index, self.metric.name + ) - response_content = eval_case.responses[response_index].response if not response_content: raise ValueError( f"Response content missing for candidate {response_index}." @@ -804,26 +811,36 @@ def get_metric_result( eval_case.model_dump(exclude_none=True), ) - if response_index >= len(eval_case.responses): + try: + response_content = _get_response_from_eval_case( + eval_case, response_index, metric_name + ) + except ValueError as e: + return types.EvalCaseMetricResult( + metric_name=metric_name, + error_message=str(e), + ) + + if not response_content: return types.EvalCaseMetricResult( - metric_name=self.metric.name, + metric_name=metric_name, error_message=( - f"response_index {response_index} out of bounds for EvalCase" - f" {eval_case.eval_case_id or 'Unknown ID'}." + f"No response found for candidate {response_index} in EvalCase" + f" {eval_case.eval_case_id}." ), ) - if not eval_case.responses: - raise ValueError(f"EvalCase {eval_case.eval_case_id} has no responses.") - - current_response_candidate = eval_case.responses[response_index] - instance_for_custom_fn = eval_case.model_dump( exclude={"responses"}, mode="json", exclude_none=True ) - instance_for_custom_fn["response"] = current_response_candidate.model_dump( + instance_for_custom_fn["response"] = response_content.model_dump( mode="json", exclude_none=True - ).get("response") + ) + extracted_prompt = _get_prompt_from_eval_case(eval_case) + if extracted_prompt: + instance_for_custom_fn["prompt"] = extracted_prompt.model_dump( + mode="json", exclude_none=True + ) error_msg = None score = None @@ -906,60 +923,74 @@ def _content_to_instance_data( @staticmethod def _eval_case_to_agent_data( eval_case: types.EvalCase, + prompt_content: Optional[genai_types.Content] = None, + response_content: Optional[genai_types.Content] = None, ) -> Optional[types.evals.AgentData]: - """Converts an EvalCase object to an AgentData object.""" + """Converts an EvalCase object to a single turn AgentData object.""" if getattr(eval_case, "agent_data", None): return eval_case.agent_data - if not eval_case.agent_info and not eval_case.intermediate_events: + if ( + not eval_case.agent_info + and not eval_case.intermediate_events + and not prompt_content + and not response_content + ): return None - tools = None - developer_instruction = None - agent_config = None - tool_declarations = [] - event_contents = [] + agents_map = None if eval_case.agent_info: - agent_info = eval_case.agent_info - if agent_info.instruction: - developer_instruction = types.evals.InstanceData( - text=agent_info.instruction - ) - if agent_info.tool_declarations: - tool_declarations = agent_info.tool_declarations - tools = types.evals.Tools(tool=tool_declarations) - - if tools or developer_instruction: - agent_config = types.evals.AgentConfig( - legacy_tools=tools, - developer_instruction=developer_instruction, + agents_map = eval_case.agent_info.agents + + events = [] + if prompt_content: + events.append( + types.evals.AgentEvent( + author="user", + content=prompt_content, ) + ) if eval_case.intermediate_events: - event_contents = [ - event.content - for event in eval_case.intermediate_events - if event.content + for event in eval_case.intermediate_events: + events.append( + types.evals.AgentEvent( + author=event.author, + content=event.content, + event_time=event.creation_timestamp, + ) + ) + + if response_content: + events.append( + types.evals.AgentEvent( + author="model", + content=response_content, + ) + ) + + turns = None + if events: + turns = [ + types.evals.ConversationTurn( + turn_index=0, + turn_id="turn_0", + events=events, + ) ] - events = types.evals.Events(event=event_contents) return types.evals.AgentData( - agent_config=agent_config, - events=events, + agents=agents_map, + turns=turns, ) def _build_request_payload( self, eval_case: types.EvalCase, response_index: int ) -> dict[str, Any]: """Builds the request parameters for evaluate instances request.""" - if ( - not eval_case.responses or response_index >= len(eval_case.responses) - ) and not getattr(eval_case, "agent_data", None): - raise IndexError(f"response_index {response_index} is out of bounds.") - - response_content = None - if eval_case.responses and response_index < len(eval_case.responses): - response_content = eval_case.responses[response_index].response + response_content = _get_response_from_eval_case( + eval_case, response_index, self.metric.name + ) if not response_content and not getattr(eval_case, "agent_data", None): raise ValueError( @@ -980,21 +1011,22 @@ def _build_request_payload( eval_case.reference.response ) + extracted_prompt = _get_prompt_from_eval_case(eval_case) prompt_instance_data = None if self.metric.name is not None and self.metric.name.startswith("multi_turn"): prompt_contents = [] if eval_case.conversation_history: for message in eval_case.conversation_history: prompt_contents.append(message.content) - if eval_case.prompt: - prompt_contents.append(eval_case.prompt) + if extracted_prompt: + prompt_contents.append(extracted_prompt) prompt_instance_data = types.evals.InstanceData( contents=types.evals.InstanceDataContents(contents=prompt_contents) ) else: prompt_instance_data = PredefinedMetricHandler._content_to_instance_data( - eval_case.prompt + extracted_prompt ) other_data_map: dict[str, Any] = {} @@ -1023,22 +1055,16 @@ def _build_request_payload( if other_data_map else None ), - agent_data=PredefinedMetricHandler._eval_case_to_agent_data(eval_case), + agent_data=PredefinedMetricHandler._eval_case_to_agent_data( + eval_case, extracted_prompt, response_content + ), ) request_payload: dict[str, Any] = { "instance": instance_payload, } - autorater_config: dict[str, Any] = {} - if self.metric.judge_model: - autorater_config["autorater_model"] = self.metric.judge_model - if self.metric.judge_model_generation_config: - autorater_config["generation_config"] = ( - self.metric.judge_model_generation_config - ) - if self.metric.judge_model_sampling_count: - autorater_config["sampling_count"] = self.metric.judge_model_sampling_count + autorater_config = _get_autorater_config(self.metric) if autorater_config: request_payload["autorater_config"] = genai_types.AutoraterConfig( **autorater_config @@ -1155,10 +1181,10 @@ def _build_request_payload( self, eval_case: types.EvalCase, response_index: int ) -> dict[str, Any]: """Builds the request parameters for evaluate instances request.""" - if not eval_case.responses or response_index >= len(eval_case.responses): - raise IndexError(f"response_index {response_index} is out of bounds.") + response_content = _get_response_from_eval_case( + eval_case, response_index, self.metric.name + ) - response_content = eval_case.responses[response_index].response if not response_content: raise ValueError( f"Response content missing for candidate {response_index}." @@ -1170,8 +1196,9 @@ def _build_request_payload( eval_case.reference.response ) + extracted_prompt = _get_prompt_from_eval_case(eval_case) prompt_instance_data = PredefinedMetricHandler._content_to_instance_data( - eval_case.prompt + extracted_prompt ) instance_payload = types.EvaluationInstance( diff --git a/vertexai/_genai/types/evals.py b/vertexai/_genai/types/evals.py index b95b4e320d..03271f755d 100644 --- a/vertexai/_genai/types/evals.py +++ b/vertexai/_genai/types/evals.py @@ -36,83 +36,6 @@ class Importance(_common.CaseInSensitiveEnum): """Low importance.""" -class Tools(_common.BaseModel): - """This field is experimental and will be removed in future versions. - - Represents a list of tools for an agent. - """ - - tool: Optional[list[genai_types.Tool]] = Field( - default=None, - description="""List of tools: each tool can have multiple function declarations.""", - ) - - -class ToolsDict(TypedDict, total=False): - """This field is experimental and will be removed in future versions. - - Represents a list of tools for an agent. - """ - - tool: Optional[list[genai_types.ToolDict]] - """List of tools: each tool can have multiple function declarations.""" - - -ToolsOrDict = Union[Tools, ToolsDict] - - -class InstanceDataContents(_common.BaseModel): - """This field is experimental and will be removed in future versions. - - List of standard Content messages from Gemini API. - """ - - contents: Optional[list[genai_types.Content]] = Field( - default=None, description="""Repeated contents.""" - ) - - -class InstanceDataContentsDict(TypedDict, total=False): - """This field is experimental and will be removed in future versions. - - List of standard Content messages from Gemini API. - """ - - contents: Optional[list[genai_types.ContentDict]] - """Repeated contents.""" - - -InstanceDataContentsOrDict = Union[InstanceDataContents, InstanceDataContentsDict] - - -class InstanceData(_common.BaseModel): - """This field is experimental and will be removed in future versions. - - Instance data used to populate placeholders in a metric prompt template. - """ - - text: Optional[str] = Field(default=None, description="""Text data.""") - contents: Optional[InstanceDataContents] = Field( - default=None, description="""List of Gemini content data.""" - ) - - -class InstanceDataDict(TypedDict, total=False): - """This field is experimental and will be removed in future versions. - - Instance data used to populate placeholders in a metric prompt template. - """ - - text: Optional[str] - """Text data.""" - - contents: Optional[InstanceDataContentsDict] - """List of Gemini content data.""" - - -InstanceDataOrDict = Union[InstanceData, InstanceDataDict] - - class AgentConfig(_common.BaseModel): """Represents configuration for an Agent.""" @@ -122,10 +45,6 @@ class AgentConfig(_common.BaseModel): This ID is used to refer to this agent, e.g., in AgentEvent.author, or in the `sub_agents` field. It must be unique within the `agents` map.""", ) - agent_resource_name: Optional[str] = Field( - default=None, - description="""The Agent Engine resource name, formatted as `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""", - ) agent_type: Optional[str] = Field( default=None, description="""The type or class of the agent (e.g., "LlmAgent", "RouterAgent", @@ -152,24 +71,13 @@ class AgentConfig(_common.BaseModel): description="""The list of valid agent IDs that this agent can delegate to. This defines the directed edges in the multi-agent system graph topology.""", ) - tools_text: Optional[str] = Field( - default=None, - description="""A JSON string containing a list of tools available to an agent.""", - ) - legacy_tools: Optional[Tools] = Field( - default=None, description="""List of tools.""" - ) - developer_instruction: Optional[InstanceData] = Field( - default=None, - description="""A field containing instructions from the developer for the agent.""", - ) @staticmethod def _get_tool_declarations_from_agent(agent: Any) -> genai_types.ToolListUnion: """Gets tool declarations from an agent. Args: - agent: The agent to get the tool declarations from. Data type is google.adk.agents.LLMAgent type, use Any to avoid dependency on ADK. + agent: The agent to get the tool declarations from. Data type is google.adk.agents.LLMAgent type. Returns: The tool declarations of the agent. @@ -188,25 +96,26 @@ def _get_tool_declarations_from_agent(agent: Any) -> genai_types.ToolListUnion: return tool_declarations @classmethod - def from_agent( - cls, agent: Any, agent_resource_name: Optional[str] = None - ) -> "AgentConfig": - """Creates an AgentConfig from an ADK agent object. + def from_agent(cls, agent: Any) -> "AgentConfig": + """Creates an AgentConfig from an ADK agent. Args: - agent: The agent to get the agent info from, data type is google.adk.agents.LLMAgent type, use Any to avoid dependency on ADK. - agent_resource_name: Optional. The agent engine resource name. + agent: The agent to get the agent info from, data type is google.adk.agents.LLMAgent type. + agent_resource_name: Optional. The agent engine resource name for the deployed agent. Returns: - An AgentConfig object populated with the agent's metadata. + An AgentConfig populated with the agent's metadata for evaluation. """ return cls( # pytype: disable=missing-parameter - agent_id=getattr(agent, "name", "agent_0") or "agent_0", - agent_resource_name=agent_resource_name, + agent_id=getattr(agent, "name", None), agent_type=agent.__class__.__name__, description=getattr(agent, "description", None), instruction=getattr(agent, "instruction", None), tools=AgentConfig._get_tool_declarations_from_agent(agent), + sub_agents=[ + getattr(sub_agent, "name", None) + for sub_agent in getattr(agent, "sub_agents", []) + ], ) @@ -218,9 +127,6 @@ class AgentConfigDict(TypedDict, total=False): This ID is used to refer to this agent, e.g., in AgentEvent.author, or in the `sub_agents` field. It must be unique within the `agents` map.""" - agent_resource_name: Optional[str] - """The Agent Engine resource name, formatted as `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""" - agent_type: Optional[str] """The type or class of the agent (e.g., "LlmAgent", "RouterAgent", "ToolUseAgent"). Useful for the autorater to understand the expected @@ -243,15 +149,6 @@ class AgentConfigDict(TypedDict, total=False): """The list of valid agent IDs that this agent can delegate to. This defines the directed edges in the multi-agent system graph topology.""" - tools_text: Optional[str] - """A JSON string containing a list of tools available to an agent.""" - - legacy_tools: Optional[ToolsDict] - """List of tools.""" - - developer_instruction: Optional[InstanceDataDict] - """A field containing instructions from the developer for the agent.""" - AgentConfigOrDict = Union[AgentConfig, AgentConfigDict] @@ -339,30 +236,6 @@ class ConversationTurnDict(TypedDict, total=False): ConversationTurnOrDict = Union[ConversationTurn, ConversationTurnDict] -class Events(_common.BaseModel): - """This field is experimental and will be removed in future versions. - - Represents a list of events for an agent. - """ - - event: Optional[list[genai_types.Content]] = Field( - default=None, description="""A list of events.""" - ) - - -class EventsDict(TypedDict, total=False): - """This field is experimental and will be removed in future versions. - - Represents a list of events for an agent. - """ - - event: Optional[list[genai_types.ContentDict]] - """A list of events.""" - - -EventsOrDict = Union[Events, EventsDict] - - class AgentData(_common.BaseModel): """Represents data specific to multi-turn agent evaluations.""" @@ -378,20 +251,13 @@ class AgentData(_common.BaseModel): Each turn represents a logical execution cycle (e.g., User Input -> Agent Response).""", ) - agent_config: Optional[AgentConfig] = Field( - default=None, description="""Agent configuration.""" - ) - events_text: Optional[str] = Field( - default=None, description="""A JSON string containing a sequence of events.""" - ) - events: Optional[Events] = Field(default=None, description="""A list of events.""") @classmethod - def _get_agents_map(cls, agent: Any) -> dict[str, AgentConfig]: + def get_agents_map(cls, agent: Any) -> dict[str, AgentConfig]: """Recursively gets all agent configs from an agent and its sub-agents. Args: - agent: The agent to get the agent info from. + agent: The agent to get the agent info from, data type is google.adk.agents.LLMAgent type. Returns: A dict mapping agent_id to AgentConfig. @@ -401,7 +267,7 @@ def _get_agents_map(cls, agent: Any) -> dict[str, AgentConfig]: agents_map = {agent_id: agent_config} for sub_agent in getattr(agent, "sub_agents", []): - agents_map.update(cls._get_agents_map(sub_agent)) + agents_map.update(cls.get_agents_map(sub_agent)) return agents_map @@ -419,7 +285,7 @@ def from_session(cls, agent: Any, session_history: list[Any]) -> "AgentData": Returns: An AgentData object containing the segmented history and agent config. """ - agents_map = cls._get_agents_map(agent) + agents_map = cls.get_agents_map(agent) agent_id = getattr(agent, "name", "agent_0") or "agent_0" turns: list[ConversationTurn] = [] @@ -510,21 +376,12 @@ class AgentDataDict(TypedDict, total=False): Each turn represents a logical execution cycle (e.g., User Input -> Agent Response).""" - agent_config: Optional[AgentConfigDict] - """Agent configuration.""" - - events_text: Optional[str] - """A JSON string containing a sequence of events.""" - - events: Optional[EventsDict] - """A list of events.""" - AgentDataOrDict = Union[AgentData, AgentDataDict] class AgentInfo(_common.BaseModel): - """The agent info of an agent, used for agent eval.""" + """The agent info of an agent system, used for agent evaluation.""" agent_resource_name: Optional[str] = Field( default=None, @@ -532,53 +389,30 @@ class AgentInfo(_common.BaseModel): `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""", ) name: Optional[str] = Field( - default=None, description="""Agent name, used as an identifier.""" + default=None, description="""Agent candidate name, used as an identifier.""" ) - instruction: Optional[str] = Field( - default=None, description="""Agent developer instruction.""" - ) - description: Optional[str] = Field( - default=None, description="""Agent description.""" + agents: Optional[dict[str, AgentConfig]] = Field( + default=None, + description="""A map containing the static configurations for each agent in the system. + Key: agent_id (matches the `author` field in events). + Value: The static configuration of the agent.""", ) - tool_declarations: Optional[genai_types.ToolListUnion] = Field( - default=None, description="""List of tools used by the Agent.""" + root_agent_id: Optional[str] = Field( + default=None, description="""The agent ID of the root agent.""" ) - @staticmethod - def _get_tool_declarations_from_agent(agent: Any) -> genai_types.ToolListUnion: - """Gets tool declarations from an agent. - - Args: - agent: The agent to get the tool declarations from. Data type is google.adk.agents.LLMAgent type, use Any to avoid dependency on ADK. - - Returns: - The tool declarations of the agent. - """ - tool_declarations: genai_types.ToolListUnion = [] - for tool in agent.tools: - tool_declarations.append( - { - "function_declarations": [ - genai_types.FunctionDeclaration.from_callable_with_api_option( - callable=tool - ) - ] - } - ) - return tool_declarations - @classmethod def load_from_agent( cls, agent: Any, agent_resource_name: Optional[str] = None ) -> "AgentInfo": - """Loads agent info from an agent. + """Loads agent info from an ADK agent. Args: - agent: The agent to get the agent info from, data type is google.adk.agents.LLMAgent type, use Any to avoid dependency on ADK. - agent_resource_name: Optional. The agent engine resource name. + agent: The agent to get the agent info from, data type is google.adk.agents.LLMAgent type. + agent_resource_name: Optional. The agent engine resource name for the deployed agent. Returns: - The agent info of the agent. + The agent info of the agent system. Example: ``` @@ -595,30 +429,27 @@ def load_from_agent( return cls( # pytype: disable=missing-parameter name=agent.name, agent_resource_name=agent_resource_name, - instruction=agent.instruction, - description=agent.description, - tool_declarations=AgentInfo._get_tool_declarations_from_agent(agent), + agents=AgentData.get_agents_map(agent), ) class AgentInfoDict(TypedDict, total=False): - """The agent info of an agent, used for agent eval.""" + """The agent info of an agent system, used for agent evaluation.""" agent_resource_name: Optional[str] """The agent engine used to run agent. Agent engine resource name in str type, with format `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""" name: Optional[str] - """Agent name, used as an identifier.""" - - instruction: Optional[str] - """Agent developer instruction.""" + """Agent candidate name, used as an identifier.""" - description: Optional[str] - """Agent description.""" + agents: Optional[dict[str, AgentConfigDict]] + """A map containing the static configurations for each agent in the system. + Key: agent_id (matches the `author` field in events). + Value: The static configuration of the agent.""" - tool_declarations: Optional[genai_types.ToolListUnionDict] - """List of tools used by the Agent.""" + root_agent_id: Optional[str] + """The agent ID of the root agent.""" AgentInfoOrDict = Union[AgentInfo, AgentInfoDict] @@ -854,6 +685,107 @@ class MessageDict(TypedDict, total=False): MessageOrDict = Union[Message, MessageDict] +class Events(_common.BaseModel): + """This field is experimental and will be removed in future versions. + + Represents a list of events for an agent. + """ + + event: Optional[list[genai_types.Content]] = Field( + default=None, description="""A list of events.""" + ) + + +class EventsDict(TypedDict, total=False): + """This field is experimental and will be removed in future versions. + + Represents a list of events for an agent. + """ + + event: Optional[list[genai_types.ContentDict]] + """A list of events.""" + + +EventsOrDict = Union[Events, EventsDict] + + +class InstanceDataContents(_common.BaseModel): + """This field is experimental and will be removed in future versions. + + List of standard Content messages from Gemini API. + """ + + contents: Optional[list[genai_types.Content]] = Field( + default=None, description="""Repeated contents.""" + ) + + +class InstanceDataContentsDict(TypedDict, total=False): + """This field is experimental and will be removed in future versions. + + List of standard Content messages from Gemini API. + """ + + contents: Optional[list[genai_types.ContentDict]] + """Repeated contents.""" + + +InstanceDataContentsOrDict = Union[InstanceDataContents, InstanceDataContentsDict] + + +class InstanceData(_common.BaseModel): + """This field is experimental and will be removed in future versions. + + Instance data used to populate placeholders in a metric prompt template. + """ + + text: Optional[str] = Field(default=None, description="""Text data.""") + contents: Optional[InstanceDataContents] = Field( + default=None, description="""List of Gemini content data.""" + ) + + +class InstanceDataDict(TypedDict, total=False): + """This field is experimental and will be removed in future versions. + + Instance data used to populate placeholders in a metric prompt template. + """ + + text: Optional[str] + """Text data.""" + + contents: Optional[InstanceDataContentsDict] + """List of Gemini content data.""" + + +InstanceDataOrDict = Union[InstanceData, InstanceDataDict] + + +class Tools(_common.BaseModel): + """This field is experimental and will be removed in future versions. + + Represents a list of tools for an agent. + """ + + tool: Optional[list[genai_types.Tool]] = Field( + default=None, + description="""List of tools: each tool can have multiple function declarations.""", + ) + + +class ToolsDict(TypedDict, total=False): + """This field is experimental and will be removed in future versions. + + Represents a list of tools for an agent. + """ + + tool: Optional[list[genai_types.ToolDict]] + """List of tools: each tool can have multiple function declarations.""" + + +ToolsOrDict = Union[Tools, ToolsDict] + + class RubricContentProperty(_common.BaseModel): """Defines criteria based on a specific property."""