From e0858c96c3073e14c13c1ff43bcf0a7c432684e1 Mon Sep 17 00:00:00 2001 From: Copilot Date: Fri, 20 Mar 2026 19:39:08 +0000 Subject: [PATCH 1/5] Fix streaming path to deliver mcp_server_tool_result content (#4814) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove premature mcp_server_tool_result emission from the response.output_item.added/mcp_call handler — at that point the MCP server has not yet responded and output is always None. Add a handler for response.mcp_call.completed that emits mcp_server_tool_result with the actual tool output, matching the non-streaming path behavior. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../openai/_responses_client.py | 38 +++++----- .../openai/test_openai_responses_client.py | 74 ++++++++++++++++--- 2 files changed, 81 insertions(+), 31 deletions(-) diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py index 0c57dffb39..567581e2b3 100644 --- a/python/packages/core/agent_framework/openai/_responses_client.py +++ b/python/packages/core/agent_framework/openai/_responses_client.py @@ -1932,27 +1932,7 @@ def _parse_chunk_from_openai( raw_representation=event_item, ) ) - result_output = ( - getattr(event_item, "result", None) - or getattr(event_item, "output", None) - or getattr(event_item, "outputs", None) - ) - parsed_output: list[Content] | None = None - if result_output: - normalized = ( # pyright: ignore[reportUnknownVariableType] - result_output - if isinstance(result_output, Sequence) - and not isinstance(result_output, (str, bytes, MutableMapping)) - else [result_output] - ) - parsed_output = [Content.from_dict(output_item) for output_item in normalized] # pyright: ignore[reportArgumentType,reportUnknownVariableType] - contents.append( - Content.from_mcp_server_tool_result( - call_id=call_id, - output=parsed_output, - raw_representation=event_item, - ) - ) + # Result deferred to response.mcp_call.completed case "code_interpreter_call": # ResponseOutputCodeInterpreterCall call_id = getattr(event_item, "call_id", None) or getattr(event_item, "id", None) outputs: list[Content] = [] @@ -2222,6 +2202,22 @@ def _get_ann_value(key: str) -> Any: ) else: logger.debug("Unparsed annotation type in streaming: %s", ann_type) + case "response.mcp_call.completed": + item = event.item + call_id = getattr(item, "id", None) or "" + output_text = getattr(item, "output", None) + parsed_output: list[Content] | None = ( + [Content.from_text(text=output_text)] + if isinstance(output_text, str) + else None + ) + contents.append( + Content.from_mcp_server_tool_result( + call_id=call_id, + output=parsed_output, + raw_representation=event, + ) + ) case _: logger.debug("Unparsed event of type: %s: %s", event.type, event) diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py index 6a2c9f5173..7f4fbb05f4 100644 --- a/python/packages/core/tests/openai/test_openai_responses_client.py +++ b/python/packages/core/tests/openai/test_openai_responses_client.py @@ -1184,11 +1184,13 @@ def test_parse_response_from_openai_with_mcp_server_tool_result() -> None: assert result_content.output is not None -def test_parse_chunk_from_openai_with_mcp_call_result() -> None: - """Test _parse_chunk_from_openai with MCP call output.""" +def test_parse_chunk_from_openai_with_mcp_call_added_defers_result() -> None: + """Test that response.output_item.added for mcp_call emits only the call, not the result. + + The result is deferred to response.mcp_call.completed. + """ client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") - # Mock event with MCP call that has output mock_event = MagicMock() mock_event.type = "response.output_item.added" @@ -1199,8 +1201,9 @@ def test_parse_chunk_from_openai_with_mcp_call_result() -> None: mock_item.name = "fetch_resource" mock_item.server_label = "ResourceServer" mock_item.arguments = {"resource_id": "123"} - # Use proper content structure that _parse_content can handle - mock_item.result = [{"type": "text", "text": "test result"}] + mock_item.result = None + mock_item.output = None + mock_item.outputs = None mock_event.item = mock_item mock_event.output_index = 0 @@ -1209,18 +1212,69 @@ def test_parse_chunk_from_openai_with_mcp_call_result() -> None: update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids) - # Should have both call and result in contents - assert len(update.contents) == 2 - call_content, result_content = update.contents + # Should have only the call content — result is deferred + assert len(update.contents) == 1 + call_content = update.contents[0] assert call_content.type == "mcp_server_tool_call" assert call_content.call_id in ["mcp_call_456", "call_456"] assert call_content.tool_name == "fetch_resource" + # No result should be emitted at this point + result_contents = [c for c in update.contents if c.type == "mcp_server_tool_result"] + assert len(result_contents) == 0 + + +def test_parse_chunk_from_openai_with_mcp_call_completed() -> None: + """Test that response.mcp_call.completed emits mcp_server_tool_result with output.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_event = MagicMock() + mock_event.type = "response.mcp_call.completed" + + mock_item = MagicMock() + mock_item.type = "mcp_call" + mock_item.id = "mcp_call_456" + mock_item.output = "The weather in Seattle is 72F and sunny." + mock_event.item = mock_item + + function_call_ids: dict[int, tuple[str, str]] = {} + + update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids) + + assert len(update.contents) == 1 + result_content = update.contents[0] + assert result_content.type == "mcp_server_tool_result" - assert result_content.call_id in ["mcp_call_456", "call_456"] - # Verify the output was parsed + assert result_content.call_id == "mcp_call_456" assert result_content.output is not None + assert len(result_content.output) == 1 + assert result_content.output[0].text == "The weather in Seattle is 72F and sunny." + + +def test_parse_chunk_from_openai_with_mcp_call_completed_no_output() -> None: + """Test that response.mcp_call.completed with no output emits result with None output.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_event = MagicMock() + mock_event.type = "response.mcp_call.completed" + + mock_item = MagicMock() + mock_item.type = "mcp_call" + mock_item.id = "mcp_call_789" + mock_item.output = None + mock_event.item = mock_item + + function_call_ids: dict[int, tuple[str, str]] = {} + + update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids) + + assert len(update.contents) == 1 + result_content = update.contents[0] + + assert result_content.type == "mcp_server_tool_result" + assert result_content.call_id == "mcp_call_789" + assert result_content.output is None def test_prepare_message_for_openai_with_function_approval_response() -> None: From 6bfb8703c109c03c733fb40b6a83170cbebcc991 Mon Sep 17 00:00:00 2001 From: Copilot Date: Fri, 20 Mar 2026 19:39:22 +0000 Subject: [PATCH 2/5] Fix streaming path to deliver mcp_server_tool_result content (#4814) Stop eagerly emitting mcp_server_tool_result on response.output_item.added (when output is always None). Instead, handle response.output_item.done for mcp_call items, which carries the full McpCall with populated output. This matches the non-streaming path which guards with 'if item.output is not None' before emitting the result. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../openai/_responses_client.py | 31 +++++++++---------- .../openai/test_openai_responses_client.py | 12 +++---- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py index 567581e2b3..e5823329ff 100644 --- a/python/packages/core/agent_framework/openai/_responses_client.py +++ b/python/packages/core/agent_framework/openai/_responses_client.py @@ -1932,7 +1932,7 @@ def _parse_chunk_from_openai( raw_representation=event_item, ) ) - # Result deferred to response.mcp_call.completed + # Result deferred to response.output_item.done case "code_interpreter_call": # ResponseOutputCodeInterpreterCall call_id = getattr(event_item, "call_id", None) or getattr(event_item, "id", None) outputs: list[Content] = [] @@ -2202,22 +2202,21 @@ def _get_ann_value(key: str) -> Any: ) else: logger.debug("Unparsed annotation type in streaming: %s", ann_type) - case "response.mcp_call.completed": - item = event.item - call_id = getattr(item, "id", None) or "" - output_text = getattr(item, "output", None) - parsed_output: list[Content] | None = ( - [Content.from_text(text=output_text)] - if isinstance(output_text, str) - else None - ) - contents.append( - Content.from_mcp_server_tool_result( - call_id=call_id, - output=parsed_output, - raw_representation=event, + case "response.output_item.done": + done_item = event.item + if getattr(done_item, "type", None) == "mcp_call": + call_id = getattr(done_item, "id", None) or "" + output_text = getattr(done_item, "output", None) + parsed_output: list[Content] | None = ( + [Content.from_text(text=output_text)] if isinstance(output_text, str) else None + ) + contents.append( + Content.from_mcp_server_tool_result( + call_id=call_id, + output=parsed_output, + raw_representation=event, + ) ) - ) case _: logger.debug("Unparsed event of type: %s: %s", event.type, event) diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py index 7f4fbb05f4..056568fc72 100644 --- a/python/packages/core/tests/openai/test_openai_responses_client.py +++ b/python/packages/core/tests/openai/test_openai_responses_client.py @@ -1225,12 +1225,12 @@ def test_parse_chunk_from_openai_with_mcp_call_added_defers_result() -> None: assert len(result_contents) == 0 -def test_parse_chunk_from_openai_with_mcp_call_completed() -> None: - """Test that response.mcp_call.completed emits mcp_server_tool_result with output.""" +def test_parse_chunk_from_openai_with_mcp_output_item_done() -> None: + """Test that response.output_item.done for mcp_call emits mcp_server_tool_result with output.""" client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") mock_event = MagicMock() - mock_event.type = "response.mcp_call.completed" + mock_event.type = "response.output_item.done" mock_item = MagicMock() mock_item.type = "mcp_call" @@ -1252,12 +1252,12 @@ def test_parse_chunk_from_openai_with_mcp_call_completed() -> None: assert result_content.output[0].text == "The weather in Seattle is 72F and sunny." -def test_parse_chunk_from_openai_with_mcp_call_completed_no_output() -> None: - """Test that response.mcp_call.completed with no output emits result with None output.""" +def test_parse_chunk_from_openai_with_mcp_output_item_done_no_output() -> None: + """Test that response.output_item.done for mcp_call with no output emits result with None output.""" client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") mock_event = MagicMock() - mock_event.type = "response.mcp_call.completed" + mock_event.type = "response.output_item.done" mock_item = MagicMock() mock_item.type = "mcp_call" From 866265afe72b919b7187afe7028833d451a45863 Mon Sep 17 00:00:00 2001 From: Copilot Date: Fri, 20 Mar 2026 19:51:47 +0000 Subject: [PATCH 3/5] Fix test docstring to match actual implementation event name Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../packages/core/tests/openai/test_openai_responses_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py index 056568fc72..e670ff2629 100644 --- a/python/packages/core/tests/openai/test_openai_responses_client.py +++ b/python/packages/core/tests/openai/test_openai_responses_client.py @@ -1187,7 +1187,7 @@ def test_parse_response_from_openai_with_mcp_server_tool_result() -> None: def test_parse_chunk_from_openai_with_mcp_call_added_defers_result() -> None: """Test that response.output_item.added for mcp_call emits only the call, not the result. - The result is deferred to response.mcp_call.completed. + The result is deferred to response.output_item.done. """ client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") From 45f5b6140dc9713941e5c4873137bd6f866f8609 Mon Sep 17 00:00:00 2001 From: Copilot Date: Fri, 20 Mar 2026 19:59:41 +0000 Subject: [PATCH 4/5] Address review: call_id fallback and raw_representation consistency (#4814) - Add call_id fallback in response.output_item.done mcp_call handler to match the output_item.added handler pattern - Use done_item instead of event for raw_representation to keep consistent with other output_item branches and non-streaming path - Add test for call_id fallback when id attribute is missing - Add raw_representation assertions to existing done handler tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../openai/_responses_client.py | 4 +-- .../openai/test_openai_responses_client.py | 28 +++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py index e5823329ff..d46987a9fb 100644 --- a/python/packages/core/agent_framework/openai/_responses_client.py +++ b/python/packages/core/agent_framework/openai/_responses_client.py @@ -2205,7 +2205,7 @@ def _get_ann_value(key: str) -> Any: case "response.output_item.done": done_item = event.item if getattr(done_item, "type", None) == "mcp_call": - call_id = getattr(done_item, "id", None) or "" + call_id = getattr(done_item, "id", None) or getattr(done_item, "call_id", None) or "" output_text = getattr(done_item, "output", None) parsed_output: list[Content] | None = ( [Content.from_text(text=output_text)] if isinstance(output_text, str) else None @@ -2214,7 +2214,7 @@ def _get_ann_value(key: str) -> Any: Content.from_mcp_server_tool_result( call_id=call_id, output=parsed_output, - raw_representation=event, + raw_representation=done_item, ) ) case _: diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py index e670ff2629..02a141f8f6 100644 --- a/python/packages/core/tests/openai/test_openai_responses_client.py +++ b/python/packages/core/tests/openai/test_openai_responses_client.py @@ -1250,6 +1250,7 @@ def test_parse_chunk_from_openai_with_mcp_output_item_done() -> None: assert result_content.output is not None assert len(result_content.output) == 1 assert result_content.output[0].text == "The weather in Seattle is 72F and sunny." + assert result_content.raw_representation is mock_item def test_parse_chunk_from_openai_with_mcp_output_item_done_no_output() -> None: @@ -1275,6 +1276,33 @@ def test_parse_chunk_from_openai_with_mcp_output_item_done_no_output() -> None: assert result_content.type == "mcp_server_tool_result" assert result_content.call_id == "mcp_call_789" assert result_content.output is None + assert result_content.raw_representation is mock_item + + +def test_parse_chunk_from_openai_with_mcp_output_item_done_call_id_fallback() -> None: + """Test that response.output_item.done for mcp_call falls back to call_id when id is missing.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_event = MagicMock() + mock_event.type = "response.output_item.done" + + mock_item = MagicMock(spec=[]) + mock_item.type = "mcp_call" + mock_item.call_id = "mcp_fallback_123" + mock_item.output = "fallback result" + mock_event.item = mock_item + + function_call_ids: dict[int, tuple[str, str]] = {} + + update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids) + + assert len(update.contents) == 1 + result_content = update.contents[0] + + assert result_content.type == "mcp_server_tool_result" + assert result_content.call_id == "mcp_fallback_123" + assert result_content.output is not None + assert result_content.output[0].text == "fallback result" def test_prepare_message_for_openai_with_function_approval_response() -> None: From 05341221d81ba9c5b1ee8ffc8ff96928306c5562 Mon Sep 17 00:00:00 2001 From: Copilot Date: Fri, 20 Mar 2026 20:05:32 +0000 Subject: [PATCH 5/5] Address review: call_id fallback for non-streaming path and test coverage (#4814) - Apply defensive call_id fallback (getattr with id/call_id/empty) to non-streaming mcp_call path for consistency with streaming path - Add raw_representation assertion to call_id fallback test - Add test for empty-string fallback when neither id nor call_id exist Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../openai/_responses_client.py | 2 +- .../openai/test_openai_responses_client.py | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py index d46987a9fb..d20eca5d16 100644 --- a/python/packages/core/agent_framework/openai/_responses_client.py +++ b/python/packages/core/agent_framework/openai/_responses_client.py @@ -1542,7 +1542,7 @@ def _parse_response_from_openai( ) ) case "mcp_call": - call_id = item.id + call_id = getattr(item, "id", None) or getattr(item, "call_id", None) or "" contents.append( Content.from_mcp_server_tool_call( call_id=call_id, diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py index 02a141f8f6..997d02ca01 100644 --- a/python/packages/core/tests/openai/test_openai_responses_client.py +++ b/python/packages/core/tests/openai/test_openai_responses_client.py @@ -1303,6 +1303,33 @@ def test_parse_chunk_from_openai_with_mcp_output_item_done_call_id_fallback() -> assert result_content.call_id == "mcp_fallback_123" assert result_content.output is not None assert result_content.output[0].text == "fallback result" + assert result_content.raw_representation is mock_item + + +def test_parse_chunk_from_openai_with_mcp_output_item_done_no_id_fallback() -> None: + """Test that response.output_item.done for mcp_call falls back to empty string when neither id nor call_id exist.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_event = MagicMock() + mock_event.type = "response.output_item.done" + + mock_item = MagicMock(spec=[]) + mock_item.type = "mcp_call" + mock_item.output = "some result" + mock_event.item = mock_item + + function_call_ids: dict[int, tuple[str, str]] = {} + + update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids) + + assert len(update.contents) == 1 + result_content = update.contents[0] + + assert result_content.type == "mcp_server_tool_result" + assert result_content.call_id == "" + assert result_content.output is not None + assert result_content.output[0].text == "some result" + assert result_content.raw_representation is mock_item def test_prepare_message_for_openai_with_function_approval_response() -> None: