diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index f294414edf..c9153a937f 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -465,6 +465,12 @@ class SPANDATA: Example: "ResearchAssistant" """ + GEN_AI_CONVERSATION_ID = "gen_ai.conversation.id" + """ + The unique identifier for the conversation/thread with the AI model. + Example: "conv_abc123" + """ + GEN_AI_CHOICE = "gen_ai.choice" """ The model's response message. diff --git a/sentry_sdk/integrations/openai_agents/patches/runner.py b/sentry_sdk/integrations/openai_agents/patches/runner.py index fd3bc284e9..92baf705e7 100644 --- a/sentry_sdk/integrations/openai_agents/patches/runner.py +++ b/sentry_sdk/integrations/openai_agents/patches/runner.py @@ -2,6 +2,7 @@ from functools import wraps import sentry_sdk +from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable from sentry_sdk.utils import capture_internal_exceptions, reraise @@ -34,7 +35,13 @@ async def wrapper(*args: "Any", **kwargs: "Any") -> "Any": with sentry_sdk.isolation_scope(): # Clone agent because agent invocation spans are attached per run. agent = args[0].clone() + with agent_workflow_span(agent): + # Set conversation ID on workflow span early so it's captured even on errors + conversation_id = kwargs.get("conversation_id") + if conversation_id: + agent._sentry_conversation_id = conversation_id + args = (agent, *args[1:]) try: run_result = await original_func(*args, **kwargs) @@ -91,10 +98,19 @@ def wrapper(*args: "Any", **kwargs: "Any") -> "Any": # Clone agent because agent invocation spans are attached per run. agent = args[0].clone() + # Capture conversation_id from kwargs if provided + conversation_id = kwargs.get("conversation_id") + if conversation_id: + agent._sentry_conversation_id = conversation_id + # Start workflow span immediately (before run_streamed returns) workflow_span = agent_workflow_span(agent) workflow_span.__enter__() + # Set conversation ID on workflow span early so it's captured even on errors + if conversation_id: + workflow_span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id) + # Store span on agent for cleanup agent._sentry_workflow_span = workflow_span diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py index c099f133f4..57244a6992 100644 --- a/sentry_sdk/integrations/openai_agents/spans/ai_client.py +++ b/sentry_sdk/integrations/openai_agents/spans/ai_client.py @@ -46,6 +46,7 @@ def update_ai_client_span( span: "sentry_sdk.tracing.Span", response: "Any", response_model: "Optional[str]" = None, + agent: "Optional[Agent]" = None, ) -> None: """Update AI client span with response data (works for streaming and non-streaming).""" if hasattr(response, "usage") and response.usage: @@ -59,3 +60,9 @@ def update_ai_client_span( span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) elif hasattr(response, "model") and response.model: span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, str(response.model)) + + # Set conversation ID from agent if available + if agent: + conv_id = getattr(agent, "_sentry_conversation_id", None) + if conv_id: + span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) diff --git a/sentry_sdk/integrations/openai_agents/spans/execute_tool.py b/sentry_sdk/integrations/openai_agents/spans/execute_tool.py index aa89da1610..e12dce4e3e 100644 --- a/sentry_sdk/integrations/openai_agents/spans/execute_tool.py +++ b/sentry_sdk/integrations/openai_agents/spans/execute_tool.py @@ -51,3 +51,8 @@ def update_execute_tool_span( if should_send_default_pii(): span.set_data(SPANDATA.GEN_AI_TOOL_OUTPUT, result) + + # Add conversation ID from agent + conv_id = getattr(agent, "_sentry_conversation_id", None) + if conv_id: + span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) diff --git a/sentry_sdk/integrations/openai_agents/spans/handoff.py b/sentry_sdk/integrations/openai_agents/spans/handoff.py index c514505b17..e4e02e9ec4 100644 --- a/sentry_sdk/integrations/openai_agents/spans/handoff.py +++ b/sentry_sdk/integrations/openai_agents/spans/handoff.py @@ -18,3 +18,8 @@ def handoff_span( origin=SPAN_ORIGIN, ) as span: span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "handoff") + + # Add conversation ID from agent + conv_id = getattr(from_agent, "_sentry_conversation_id", None) + if conv_id: + span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index c3a3a04dc9..27f9fdab25 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -95,6 +95,11 @@ def update_invoke_agent_span( span, SPANDATA.GEN_AI_RESPONSE_TEXT, output, unpack=False ) + # Add conversation ID from agent + conv_id = getattr(agent, "_sentry_conversation_id", None) + if conv_id: + span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + span.__exit__(None, None, None) delattr(context, "_sentry_agent_span") diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 4bf212b8f3..892cc95672 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -2710,3 +2710,182 @@ def mock_get_model(agent, run_config): # Verify streaming flag is set assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True + + +@pytest.mark.asyncio +async def test_conversation_id_on_all_spans( + sentry_init, capture_events, test_agent, mock_model_response +): + """ + Test that gen_ai.conversation.id is set on all AI-related spans when passed to Runner.run(). + """ + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.return_value = mock_model_response + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) + + events = capture_events() + + result = await agents.Runner.run( + test_agent, + "Test input", + run_config=test_run_config, + conversation_id="conv_test_123", + ) + + assert result is not None + + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span, ai_client_span = spans + + # Verify workflow span (transaction) has conversation_id + assert ( + transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] + == "conv_test_123" + ) + + # Verify invoke_agent span has conversation_id + assert invoke_agent_span["data"]["gen_ai.conversation.id"] == "conv_test_123" + + # Verify ai_client span has conversation_id + assert ai_client_span["data"]["gen_ai.conversation.id"] == "conv_test_123" + + +@pytest.mark.asyncio +async def test_conversation_id_on_tool_span(sentry_init, capture_events, test_agent): + """ + Test that gen_ai.conversation.id is set on tool execution spans when passed to Runner.run(). + """ + + @agents.function_tool + def simple_tool(message: str) -> str: + """A simple tool""" + return f"Result: {message}" + + agent_with_tool = test_agent.clone(tools=[simple_tool]) + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + tool_call = ResponseFunctionToolCall( + id="call_123", + call_id="call_123", + name="simple_tool", + type="function_call", + arguments='{"message": "hello"}', + ) + + tool_response = ModelResponse( + output=[tool_call], + usage=Usage( + requests=1, input_tokens=10, output_tokens=5, total_tokens=15 + ), + response_id="resp_tool_456", + ) + + final_response = ModelResponse( + output=[ + ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="Done", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + usage=Usage( + requests=1, input_tokens=15, output_tokens=10, total_tokens=25 + ), + response_id="resp_final_789", + ) + + mock_get_response.side_effect = [tool_response, final_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) + + events = capture_events() + + await agents.Runner.run( + agent_with_tool, + "Use the tool", + run_config=test_run_config, + conversation_id="conv_tool_test_456", + ) + + (transaction,) = events + spans = transaction["spans"] + + # Find the tool span + tool_span = None + for span in spans: + if span.get("description", "").startswith("execute_tool"): + tool_span = span + break + + assert tool_span is not None + # Tool span should have the conversation_id passed to Runner.run() + assert tool_span["data"]["gen_ai.conversation.id"] == "conv_tool_test_456" + + # Workflow span (transaction) should have the same conversation_id + assert ( + transaction["contexts"]["trace"]["data"]["gen_ai.conversation.id"] + == "conv_tool_test_456" + ) + + +@pytest.mark.asyncio +async def test_no_conversation_id_when_not_provided( + sentry_init, capture_events, test_agent, mock_model_response +): + """ + Test that gen_ai.conversation.id is not set when not passed to Runner.run(). + """ + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.return_value = mock_model_response + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) + + events = capture_events() + + # Don't pass conversation_id + result = await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) + + assert result is not None + + (transaction,) = events + spans = transaction["spans"] + invoke_agent_span, ai_client_span = spans + + # Verify conversation_id is NOT set on any spans + assert "gen_ai.conversation.id" not in transaction["contexts"]["trace"].get( + "data", {} + ) + assert "gen_ai.conversation.id" not in invoke_agent_span.get("data", {}) + assert "gen_ai.conversation.id" not in ai_client_span.get("data", {})