diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py index 0c57dffb39..d20eca5d16 100644 --- a/python/packages/core/agent_framework/openai/_responses_client.py +++ b/python/packages/core/agent_framework/openai/_responses_client.py @@ -1542,7 +1542,7 @@ def _parse_response_from_openai( ) ) case "mcp_call": - call_id = item.id + call_id = getattr(item, "id", None) or getattr(item, "call_id", None) or "" contents.append( Content.from_mcp_server_tool_call( call_id=call_id, @@ -1932,27 +1932,7 @@ def _parse_chunk_from_openai( raw_representation=event_item, ) ) - result_output = ( - getattr(event_item, "result", None) - or getattr(event_item, "output", None) - or getattr(event_item, "outputs", None) - ) - parsed_output: list[Content] | None = None - if result_output: - normalized = ( # pyright: ignore[reportUnknownVariableType] - result_output - if isinstance(result_output, Sequence) - and not isinstance(result_output, (str, bytes, MutableMapping)) - else [result_output] - ) - parsed_output = [Content.from_dict(output_item) for output_item in normalized] # pyright: ignore[reportArgumentType,reportUnknownVariableType] - contents.append( - Content.from_mcp_server_tool_result( - call_id=call_id, - output=parsed_output, - raw_representation=event_item, - ) - ) + # Result deferred to response.output_item.done case "code_interpreter_call": # ResponseOutputCodeInterpreterCall call_id = getattr(event_item, "call_id", None) or getattr(event_item, "id", None) outputs: list[Content] = [] @@ -2222,6 +2202,21 @@ def _get_ann_value(key: str) -> Any: ) else: logger.debug("Unparsed annotation type in streaming: %s", ann_type) + case "response.output_item.done": + done_item = event.item + if getattr(done_item, "type", None) == "mcp_call": + call_id = getattr(done_item, "id", None) or getattr(done_item, "call_id", None) or "" + output_text = getattr(done_item, "output", None) + parsed_output: list[Content] | None = ( + [Content.from_text(text=output_text)] if isinstance(output_text, str) else None + ) + contents.append( + Content.from_mcp_server_tool_result( + call_id=call_id, + output=parsed_output, + raw_representation=done_item, + ) + ) case _: logger.debug("Unparsed event of type: %s: %s", event.type, event) diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py index 6a2c9f5173..997d02ca01 100644 --- a/python/packages/core/tests/openai/test_openai_responses_client.py +++ b/python/packages/core/tests/openai/test_openai_responses_client.py @@ -1184,11 +1184,13 @@ def test_parse_response_from_openai_with_mcp_server_tool_result() -> None: assert result_content.output is not None -def test_parse_chunk_from_openai_with_mcp_call_result() -> None: - """Test _parse_chunk_from_openai with MCP call output.""" +def test_parse_chunk_from_openai_with_mcp_call_added_defers_result() -> None: + """Test that response.output_item.added for mcp_call emits only the call, not the result. + + The result is deferred to response.output_item.done. + """ client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") - # Mock event with MCP call that has output mock_event = MagicMock() mock_event.type = "response.output_item.added" @@ -1199,8 +1201,9 @@ def test_parse_chunk_from_openai_with_mcp_call_result() -> None: mock_item.name = "fetch_resource" mock_item.server_label = "ResourceServer" mock_item.arguments = {"resource_id": "123"} - # Use proper content structure that _parse_content can handle - mock_item.result = [{"type": "text", "text": "test result"}] + mock_item.result = None + mock_item.output = None + mock_item.outputs = None mock_event.item = mock_item mock_event.output_index = 0 @@ -1209,18 +1212,124 @@ def test_parse_chunk_from_openai_with_mcp_call_result() -> None: update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids) - # Should have both call and result in contents - assert len(update.contents) == 2 - call_content, result_content = update.contents + # Should have only the call content — result is deferred + assert len(update.contents) == 1 + call_content = update.contents[0] assert call_content.type == "mcp_server_tool_call" assert call_content.call_id in ["mcp_call_456", "call_456"] assert call_content.tool_name == "fetch_resource" + # No result should be emitted at this point + result_contents = [c for c in update.contents if c.type == "mcp_server_tool_result"] + assert len(result_contents) == 0 + + +def test_parse_chunk_from_openai_with_mcp_output_item_done() -> None: + """Test that response.output_item.done for mcp_call emits mcp_server_tool_result with output.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_event = MagicMock() + mock_event.type = "response.output_item.done" + + mock_item = MagicMock() + mock_item.type = "mcp_call" + mock_item.id = "mcp_call_456" + mock_item.output = "The weather in Seattle is 72F and sunny." + mock_event.item = mock_item + + function_call_ids: dict[int, tuple[str, str]] = {} + + update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids) + + assert len(update.contents) == 1 + result_content = update.contents[0] + + assert result_content.type == "mcp_server_tool_result" + assert result_content.call_id == "mcp_call_456" + assert result_content.output is not None + assert len(result_content.output) == 1 + assert result_content.output[0].text == "The weather in Seattle is 72F and sunny." + assert result_content.raw_representation is mock_item + + +def test_parse_chunk_from_openai_with_mcp_output_item_done_no_output() -> None: + """Test that response.output_item.done for mcp_call with no output emits result with None output.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_event = MagicMock() + mock_event.type = "response.output_item.done" + + mock_item = MagicMock() + mock_item.type = "mcp_call" + mock_item.id = "mcp_call_789" + mock_item.output = None + mock_event.item = mock_item + + function_call_ids: dict[int, tuple[str, str]] = {} + + update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids) + + assert len(update.contents) == 1 + result_content = update.contents[0] + + assert result_content.type == "mcp_server_tool_result" + assert result_content.call_id == "mcp_call_789" + assert result_content.output is None + assert result_content.raw_representation is mock_item + + +def test_parse_chunk_from_openai_with_mcp_output_item_done_call_id_fallback() -> None: + """Test that response.output_item.done for mcp_call falls back to call_id when id is missing.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_event = MagicMock() + mock_event.type = "response.output_item.done" + + mock_item = MagicMock(spec=[]) + mock_item.type = "mcp_call" + mock_item.call_id = "mcp_fallback_123" + mock_item.output = "fallback result" + mock_event.item = mock_item + + function_call_ids: dict[int, tuple[str, str]] = {} + + update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids) + + assert len(update.contents) == 1 + result_content = update.contents[0] + + assert result_content.type == "mcp_server_tool_result" + assert result_content.call_id == "mcp_fallback_123" + assert result_content.output is not None + assert result_content.output[0].text == "fallback result" + assert result_content.raw_representation is mock_item + + +def test_parse_chunk_from_openai_with_mcp_output_item_done_no_id_fallback() -> None: + """Test that response.output_item.done for mcp_call falls back to empty string when neither id nor call_id exist.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_event = MagicMock() + mock_event.type = "response.output_item.done" + + mock_item = MagicMock(spec=[]) + mock_item.type = "mcp_call" + mock_item.output = "some result" + mock_event.item = mock_item + + function_call_ids: dict[int, tuple[str, str]] = {} + + update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids) + + assert len(update.contents) == 1 + result_content = update.contents[0] + assert result_content.type == "mcp_server_tool_result" - assert result_content.call_id in ["mcp_call_456", "call_456"] - # Verify the output was parsed + assert result_content.call_id == "" assert result_content.output is not None + assert result_content.output[0].text == "some result" + assert result_content.raw_representation is mock_item def test_prepare_message_for_openai_with_function_approval_response() -> None: