From e0858c96c3073e14c13c1ff43bcf0a7c432684e1 Mon Sep 17 00:00:00 2001
From: Copilot <copilot@github.com>
Date: Fri, 20 Mar 2026 19:39:08 +0000
Subject: [PATCH 1/5] Fix streaming path to deliver mcp_server_tool_result
 content (#4814)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove premature mcp_server_tool_result emission from the
response.output_item.added/mcp_call handler — at that point the MCP
server has not yet responded and output is always None.

Add a handler for response.mcp_call.completed that emits
mcp_server_tool_result with the actual tool output, matching the
non-streaming path behavior.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../openai/_responses_client.py               | 38 +++++-----
 .../openai/test_openai_responses_client.py    | 74 ++++++++++++++++---
 2 files changed, 81 insertions(+), 31 deletions(-)

diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py
index 0c57dffb39..567581e2b3 100644
--- a/python/packages/core/agent_framework/openai/_responses_client.py
+++ b/python/packages/core/agent_framework/openai/_responses_client.py
@@ -1932,27 +1932,7 @@ def _parse_chunk_from_openai(
                                 raw_representation=event_item,
                             )
                         )
-                        result_output = (
-                            getattr(event_item, "result", None)
-                            or getattr(event_item, "output", None)
-                            or getattr(event_item, "outputs", None)
-                        )
-                        parsed_output: list[Content] | None = None
-                        if result_output:
-                            normalized = (  # pyright: ignore[reportUnknownVariableType]
-                                result_output
-                                if isinstance(result_output, Sequence)
-                                and not isinstance(result_output, (str, bytes, MutableMapping))
-                                else [result_output]
-                            )
-                            parsed_output = [Content.from_dict(output_item) for output_item in normalized]  # pyright: ignore[reportArgumentType,reportUnknownVariableType]
-                        contents.append(
-                            Content.from_mcp_server_tool_result(
-                                call_id=call_id,
-                                output=parsed_output,
-                                raw_representation=event_item,
-                            )
-                        )
+                        # Result deferred to response.mcp_call.completed
                     case "code_interpreter_call":  # ResponseOutputCodeInterpreterCall
                         call_id = getattr(event_item, "call_id", None) or getattr(event_item, "id", None)
                         outputs: list[Content] = []
@@ -2222,6 +2202,22 @@ def _get_ann_value(key: str) -> Any:
                         )
                 else:
                     logger.debug("Unparsed annotation type in streaming: %s", ann_type)
+            case "response.mcp_call.completed":
+                item = event.item
+                call_id = getattr(item, "id", None) or ""
+                output_text = getattr(item, "output", None)
+                parsed_output: list[Content] | None = (
+                    [Content.from_text(text=output_text)]
+                    if isinstance(output_text, str)
+                    else None
+                )
+                contents.append(
+                    Content.from_mcp_server_tool_result(
+                        call_id=call_id,
+                        output=parsed_output,
+                        raw_representation=event,
+                    )
+                )
             case _:
                 logger.debug("Unparsed event of type: %s: %s", event.type, event)
 
diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py
index 6a2c9f5173..7f4fbb05f4 100644
--- a/python/packages/core/tests/openai/test_openai_responses_client.py
+++ b/python/packages/core/tests/openai/test_openai_responses_client.py
@@ -1184,11 +1184,13 @@ def test_parse_response_from_openai_with_mcp_server_tool_result() -> None:
     assert result_content.output is not None
 
 
-def test_parse_chunk_from_openai_with_mcp_call_result() -> None:
-    """Test _parse_chunk_from_openai with MCP call output."""
+def test_parse_chunk_from_openai_with_mcp_call_added_defers_result() -> None:
+    """Test that response.output_item.added for mcp_call emits only the call, not the result.
+
+    The result is deferred to response.mcp_call.completed.
+    """
     client = OpenAIResponsesClient(model_id="test-model", api_key="test-key")
 
-    # Mock event with MCP call that has output
     mock_event = MagicMock()
     mock_event.type = "response.output_item.added"
 
@@ -1199,8 +1201,9 @@ def test_parse_chunk_from_openai_with_mcp_call_result() -> None:
     mock_item.name = "fetch_resource"
     mock_item.server_label = "ResourceServer"
     mock_item.arguments = {"resource_id": "123"}
-    # Use proper content structure that _parse_content can handle
-    mock_item.result = [{"type": "text", "text": "test result"}]
+    mock_item.result = None
+    mock_item.output = None
+    mock_item.outputs = None
 
     mock_event.item = mock_item
     mock_event.output_index = 0
@@ -1209,18 +1212,69 @@ def test_parse_chunk_from_openai_with_mcp_call_result() -> None:
 
     update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids)
 
-    # Should have both call and result in contents
-    assert len(update.contents) == 2
-    call_content, result_content = update.contents
+    # Should have only the call content — result is deferred
+    assert len(update.contents) == 1
+    call_content = update.contents[0]
 
     assert call_content.type == "mcp_server_tool_call"
     assert call_content.call_id in ["mcp_call_456", "call_456"]
     assert call_content.tool_name == "fetch_resource"
 
+    # No result should be emitted at this point
+    result_contents = [c for c in update.contents if c.type == "mcp_server_tool_result"]
+    assert len(result_contents) == 0
+
+
+def test_parse_chunk_from_openai_with_mcp_call_completed() -> None:
+    """Test that response.mcp_call.completed emits mcp_server_tool_result with output."""
+    client = OpenAIResponsesClient(model_id="test-model", api_key="test-key")
+
+    mock_event = MagicMock()
+    mock_event.type = "response.mcp_call.completed"
+
+    mock_item = MagicMock()
+    mock_item.type = "mcp_call"
+    mock_item.id = "mcp_call_456"
+    mock_item.output = "The weather in Seattle is 72F and sunny."
+    mock_event.item = mock_item
+
+    function_call_ids: dict[int, tuple[str, str]] = {}
+
+    update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids)
+
+    assert len(update.contents) == 1
+    result_content = update.contents[0]
+
     assert result_content.type == "mcp_server_tool_result"
-    assert result_content.call_id in ["mcp_call_456", "call_456"]
-    # Verify the output was parsed
+    assert result_content.call_id == "mcp_call_456"
     assert result_content.output is not None
+    assert len(result_content.output) == 1
+    assert result_content.output[0].text == "The weather in Seattle is 72F and sunny."
+
+
+def test_parse_chunk_from_openai_with_mcp_call_completed_no_output() -> None:
+    """Test that response.mcp_call.completed with no output emits result with None output."""
+    client = OpenAIResponsesClient(model_id="test-model", api_key="test-key")
+
+    mock_event = MagicMock()
+    mock_event.type = "response.mcp_call.completed"
+
+    mock_item = MagicMock()
+    mock_item.type = "mcp_call"
+    mock_item.id = "mcp_call_789"
+    mock_item.output = None
+    mock_event.item = mock_item
+
+    function_call_ids: dict[int, tuple[str, str]] = {}
+
+    update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids)
+
+    assert len(update.contents) == 1
+    result_content = update.contents[0]
+
+    assert result_content.type == "mcp_server_tool_result"
+    assert result_content.call_id == "mcp_call_789"
+    assert result_content.output is None
 
 
 def test_prepare_message_for_openai_with_function_approval_response() -> None:

From 6bfb8703c109c03c733fb40b6a83170cbebcc991 Mon Sep 17 00:00:00 2001
From: Copilot <copilot@github.com>
Date: Fri, 20 Mar 2026 19:39:22 +0000
Subject: [PATCH 2/5] Fix streaming path to deliver mcp_server_tool_result
 content (#4814)

Stop eagerly emitting mcp_server_tool_result on response.output_item.added
(when output is always None). Instead, handle response.output_item.done for
mcp_call items, which carries the full McpCall with populated output.

This matches the non-streaming path which guards with 'if item.output is not
None' before emitting the result.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../openai/_responses_client.py               | 31 +++++++++----------
 .../openai/test_openai_responses_client.py    | 12 +++----
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py
index 567581e2b3..e5823329ff 100644
--- a/python/packages/core/agent_framework/openai/_responses_client.py
+++ b/python/packages/core/agent_framework/openai/_responses_client.py
@@ -1932,7 +1932,7 @@ def _parse_chunk_from_openai(
                                 raw_representation=event_item,
                             )
                         )
-                        # Result deferred to response.mcp_call.completed
+                        # Result deferred to response.output_item.done
                     case "code_interpreter_call":  # ResponseOutputCodeInterpreterCall
                         call_id = getattr(event_item, "call_id", None) or getattr(event_item, "id", None)
                         outputs: list[Content] = []
@@ -2202,22 +2202,21 @@ def _get_ann_value(key: str) -> Any:
                         )
                 else:
                     logger.debug("Unparsed annotation type in streaming: %s", ann_type)
-            case "response.mcp_call.completed":
-                item = event.item
-                call_id = getattr(item, "id", None) or ""
-                output_text = getattr(item, "output", None)
-                parsed_output: list[Content] | None = (
-                    [Content.from_text(text=output_text)]
-                    if isinstance(output_text, str)
-                    else None
-                )
-                contents.append(
-                    Content.from_mcp_server_tool_result(
-                        call_id=call_id,
-                        output=parsed_output,
-                        raw_representation=event,
+            case "response.output_item.done":
+                done_item = event.item
+                if getattr(done_item, "type", None) == "mcp_call":
+                    call_id = getattr(done_item, "id", None) or ""
+                    output_text = getattr(done_item, "output", None)
+                    parsed_output: list[Content] | None = (
+                        [Content.from_text(text=output_text)] if isinstance(output_text, str) else None
+                    )
+                    contents.append(
+                        Content.from_mcp_server_tool_result(
+                            call_id=call_id,
+                            output=parsed_output,
+                            raw_representation=event,
+                        )
                     )
-                )
             case _:
                 logger.debug("Unparsed event of type: %s: %s", event.type, event)
 
diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py
index 7f4fbb05f4..056568fc72 100644
--- a/python/packages/core/tests/openai/test_openai_responses_client.py
+++ b/python/packages/core/tests/openai/test_openai_responses_client.py
@@ -1225,12 +1225,12 @@ def test_parse_chunk_from_openai_with_mcp_call_added_defers_result() -> None:
     assert len(result_contents) == 0
 
 
-def test_parse_chunk_from_openai_with_mcp_call_completed() -> None:
-    """Test that response.mcp_call.completed emits mcp_server_tool_result with output."""
+def test_parse_chunk_from_openai_with_mcp_output_item_done() -> None:
+    """Test that response.output_item.done for mcp_call emits mcp_server_tool_result with output."""
     client = OpenAIResponsesClient(model_id="test-model", api_key="test-key")
 
     mock_event = MagicMock()
-    mock_event.type = "response.mcp_call.completed"
+    mock_event.type = "response.output_item.done"
 
     mock_item = MagicMock()
     mock_item.type = "mcp_call"
@@ -1252,12 +1252,12 @@ def test_parse_chunk_from_openai_with_mcp_call_completed() -> None:
     assert result_content.output[0].text == "The weather in Seattle is 72F and sunny."
 
 
-def test_parse_chunk_from_openai_with_mcp_call_completed_no_output() -> None:
-    """Test that response.mcp_call.completed with no output emits result with None output."""
+def test_parse_chunk_from_openai_with_mcp_output_item_done_no_output() -> None:
+    """Test that response.output_item.done for mcp_call with no output emits result with None output."""
     client = OpenAIResponsesClient(model_id="test-model", api_key="test-key")
 
     mock_event = MagicMock()
-    mock_event.type = "response.mcp_call.completed"
+    mock_event.type = "response.output_item.done"
 
     mock_item = MagicMock()
     mock_item.type = "mcp_call"

From 866265afe72b919b7187afe7028833d451a45863 Mon Sep 17 00:00:00 2001
From: Copilot <copilot@github.com>
Date: Fri, 20 Mar 2026 19:51:47 +0000
Subject: [PATCH 3/5] Fix test docstring to match actual implementation event
 name

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../packages/core/tests/openai/test_openai_responses_client.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py
index 056568fc72..e670ff2629 100644
--- a/python/packages/core/tests/openai/test_openai_responses_client.py
+++ b/python/packages/core/tests/openai/test_openai_responses_client.py
@@ -1187,7 +1187,7 @@ def test_parse_response_from_openai_with_mcp_server_tool_result() -> None:
 def test_parse_chunk_from_openai_with_mcp_call_added_defers_result() -> None:
     """Test that response.output_item.added for mcp_call emits only the call, not the result.
 
-    The result is deferred to response.mcp_call.completed.
+    The result is deferred to response.output_item.done.
     """
     client = OpenAIResponsesClient(model_id="test-model", api_key="test-key")
 

From 45f5b6140dc9713941e5c4873137bd6f866f8609 Mon Sep 17 00:00:00 2001
From: Copilot <copilot@github.com>
Date: Fri, 20 Mar 2026 19:59:41 +0000
Subject: [PATCH 4/5] Address review: call_id fallback and raw_representation
 consistency (#4814)

- Add call_id fallback in response.output_item.done mcp_call handler to
  match the output_item.added handler pattern
- Use done_item instead of event for raw_representation to keep
  consistent with other output_item branches and non-streaming path
- Add test for call_id fallback when id attribute is missing
- Add raw_representation assertions to existing done handler tests

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../openai/_responses_client.py               |  4 +--
 .../openai/test_openai_responses_client.py    | 28 +++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py
index e5823329ff..d46987a9fb 100644
--- a/python/packages/core/agent_framework/openai/_responses_client.py
+++ b/python/packages/core/agent_framework/openai/_responses_client.py
@@ -2205,7 +2205,7 @@ def _get_ann_value(key: str) -> Any:
             case "response.output_item.done":
                 done_item = event.item
                 if getattr(done_item, "type", None) == "mcp_call":
-                    call_id = getattr(done_item, "id", None) or ""
+                    call_id = getattr(done_item, "id", None) or getattr(done_item, "call_id", None) or ""
                     output_text = getattr(done_item, "output", None)
                     parsed_output: list[Content] | None = (
                         [Content.from_text(text=output_text)] if isinstance(output_text, str) else None
@@ -2214,7 +2214,7 @@ def _get_ann_value(key: str) -> Any:
                         Content.from_mcp_server_tool_result(
                             call_id=call_id,
                             output=parsed_output,
-                            raw_representation=event,
+                            raw_representation=done_item,
                         )
                     )
             case _:
diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py
index e670ff2629..02a141f8f6 100644
--- a/python/packages/core/tests/openai/test_openai_responses_client.py
+++ b/python/packages/core/tests/openai/test_openai_responses_client.py
@@ -1250,6 +1250,7 @@ def test_parse_chunk_from_openai_with_mcp_output_item_done() -> None:
     assert result_content.output is not None
     assert len(result_content.output) == 1
     assert result_content.output[0].text == "The weather in Seattle is 72F and sunny."
+    assert result_content.raw_representation is mock_item
 
 
 def test_parse_chunk_from_openai_with_mcp_output_item_done_no_output() -> None:
@@ -1275,6 +1276,33 @@ def test_parse_chunk_from_openai_with_mcp_output_item_done_no_output() -> None:
     assert result_content.type == "mcp_server_tool_result"
     assert result_content.call_id == "mcp_call_789"
     assert result_content.output is None
+    assert result_content.raw_representation is mock_item
+
+
+def test_parse_chunk_from_openai_with_mcp_output_item_done_call_id_fallback() -> None:
+    """Test that response.output_item.done for mcp_call falls back to call_id when id is missing."""
+    client = OpenAIResponsesClient(model_id="test-model", api_key="test-key")
+
+    mock_event = MagicMock()
+    mock_event.type = "response.output_item.done"
+
+    mock_item = MagicMock(spec=[])
+    mock_item.type = "mcp_call"
+    mock_item.call_id = "mcp_fallback_123"
+    mock_item.output = "fallback result"
+    mock_event.item = mock_item
+
+    function_call_ids: dict[int, tuple[str, str]] = {}
+
+    update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids)
+
+    assert len(update.contents) == 1
+    result_content = update.contents[0]
+
+    assert result_content.type == "mcp_server_tool_result"
+    assert result_content.call_id == "mcp_fallback_123"
+    assert result_content.output is not None
+    assert result_content.output[0].text == "fallback result"
 
 
 def test_prepare_message_for_openai_with_function_approval_response() -> None:

From 05341221d81ba9c5b1ee8ffc8ff96928306c5562 Mon Sep 17 00:00:00 2001
From: Copilot <copilot@github.com>
Date: Fri, 20 Mar 2026 20:05:32 +0000
Subject: [PATCH 5/5] Address review: call_id fallback for non-streaming path
 and test coverage (#4814)

- Apply defensive call_id fallback (getattr with id/call_id/empty) to
  non-streaming mcp_call path for consistency with streaming path
- Add raw_representation assertion to call_id fallback test
- Add test for empty-string fallback when neither id nor call_id exist

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../openai/_responses_client.py               |  2 +-
 .../openai/test_openai_responses_client.py    | 27 +++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py
index d46987a9fb..d20eca5d16 100644
--- a/python/packages/core/agent_framework/openai/_responses_client.py
+++ b/python/packages/core/agent_framework/openai/_responses_client.py
@@ -1542,7 +1542,7 @@ def _parse_response_from_openai(
                         )
                     )
                 case "mcp_call":
-                    call_id = item.id
+                    call_id = getattr(item, "id", None) or getattr(item, "call_id", None) or ""
                     contents.append(
                         Content.from_mcp_server_tool_call(
                             call_id=call_id,
diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py
index 02a141f8f6..997d02ca01 100644
--- a/python/packages/core/tests/openai/test_openai_responses_client.py
+++ b/python/packages/core/tests/openai/test_openai_responses_client.py
@@ -1303,6 +1303,33 @@ def test_parse_chunk_from_openai_with_mcp_output_item_done_call_id_fallback() ->
     assert result_content.call_id == "mcp_fallback_123"
     assert result_content.output is not None
     assert result_content.output[0].text == "fallback result"
+    assert result_content.raw_representation is mock_item
+
+
+def test_parse_chunk_from_openai_with_mcp_output_item_done_no_id_fallback() -> None:
+    """Test that response.output_item.done for mcp_call falls back to empty string when neither id nor call_id exist."""
+    client = OpenAIResponsesClient(model_id="test-model", api_key="test-key")
+
+    mock_event = MagicMock()
+    mock_event.type = "response.output_item.done"
+
+    mock_item = MagicMock(spec=[])
+    mock_item.type = "mcp_call"
+    mock_item.output = "some result"
+    mock_event.item = mock_item
+
+    function_call_ids: dict[int, tuple[str, str]] = {}
+
+    update = client._parse_chunk_from_openai(mock_event, options={}, function_call_ids=function_call_ids)
+
+    assert len(update.contents) == 1
+    result_content = update.contents[0]
+
+    assert result_content.type == "mcp_server_tool_result"
+    assert result_content.call_id == ""
+    assert result_content.output is not None
+    assert result_content.output[0].text == "some result"
+    assert result_content.raw_representation is mock_item
 
 
 def test_prepare_message_for_openai_with_function_approval_response() -> None: