diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index 5c05968d31..04d2f7e214 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -744,6 +744,14 @@ def trace_generate_content_result(span: Span | None, llm_response: LlmResponse): span.set_attribute( GEN_AI_USAGE_OUTPUT_TOKENS, usage_metadata.candidates_token_count ) + try: + if usage_metadata.thoughts_token_count is not None: + span.set_attribute( + 'gen_ai.usage.experimental.reasoning_tokens', + usage_metadata.thoughts_token_count, + ) + except AttributeError: + pass otel_logger.emit( LogRecord( @@ -768,25 +776,35 @@ def trace_inference_result( gc_span = None if isinstance(span, GenerateContentSpan): gc_span = span - span = gc_span.span + otel_span = gc_span.span + else: + otel_span = span - if span is None: + if otel_span is None: return if llm_response.partial: return if finish_reason := llm_response.finish_reason: - span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [finish_reason.lower()]) + otel_span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [finish_reason.lower()]) if usage_metadata := llm_response.usage_metadata: if usage_metadata.prompt_token_count is not None: - span.set_attribute( + otel_span.set_attribute( GEN_AI_USAGE_INPUT_TOKENS, usage_metadata.prompt_token_count ) if usage_metadata.candidates_token_count is not None: - span.set_attribute( + otel_span.set_attribute( GEN_AI_USAGE_OUTPUT_TOKENS, usage_metadata.candidates_token_count ) + try: + if usage_metadata.thoughts_token_count is not None: + otel_span.set_attribute( + 'gen_ai.usage.experimental.reasoning_tokens', + usage_metadata.thoughts_token_count, + ) + except AttributeError: + pass if is_experimental_semconv() and isinstance(gc_span, GenerateContentSpan): set_operation_details_attributes_from_response( diff --git a/tests/unittests/telemetry/test_spans.py b/tests/unittests/telemetry/test_spans.py index c4bd485fba..5612f0239a 100644 --- a/tests/unittests/telemetry/test_spans.py +++ b/tests/unittests/telemetry/test_spans.py @@ -28,6 +28,7 @@ from google.adk.telemetry.tracing import ADK_CAPTURE_MESSAGE_CONTENT_IN_SPANS from google.adk.telemetry.tracing import trace_agent_invocation from google.adk.telemetry.tracing import trace_call_llm +from google.adk.telemetry.tracing import trace_generate_content_result from google.adk.telemetry.tracing import trace_inference_result from google.adk.telemetry.tracing import trace_merged_tool_calls from google.adk.telemetry.tracing import trace_send_data @@ -1284,3 +1285,107 @@ def test_trace_tool_call_with_standard_error( mock.call('error.type', 'ValueError') in mock_span_fixture.set_attribute.call_args_list ) + + +def test_trace_inference_result_with_thinking_tokens(mock_span_fixture): + """Test trace_inference_result exports thoughts_token_count.""" + llm_response = LlmResponse( + turn_complete=True, + finish_reason=types.FinishReason.STOP, + usage_metadata=types.GenerateContentResponseUsageMetadata( + total_token_count=110, + prompt_token_count=50, + candidates_token_count=10, + thoughts_token_count=50, + ), + ) + + trace_inference_result(mock_span_fixture, llm_response) + + mock_span_fixture.set_attribute.assert_any_call(GEN_AI_USAGE_INPUT_TOKENS, 50) + mock_span_fixture.set_attribute.assert_any_call( + GEN_AI_USAGE_OUTPUT_TOKENS, 10 + ) + mock_span_fixture.set_attribute.assert_any_call( + 'gen_ai.usage.experimental.reasoning_tokens', 50 + ) + + +def test_trace_inference_result_without_thinking_tokens(mock_span_fixture): + """Test trace_inference_result works when thoughts_token_count is None.""" + llm_response = LlmResponse( + turn_complete=True, + finish_reason=types.FinishReason.STOP, + usage_metadata=types.GenerateContentResponseUsageMetadata( + total_token_count=60, + prompt_token_count=50, + candidates_token_count=10, + ), + ) + + trace_inference_result(mock_span_fixture, llm_response) + + mock_span_fixture.set_attribute.assert_any_call(GEN_AI_USAGE_INPUT_TOKENS, 50) + mock_span_fixture.set_attribute.assert_any_call( + GEN_AI_USAGE_OUTPUT_TOKENS, 10 + ) + # Verify reasoning_tokens is NOT set when thoughts_token_count is None + reasoning_calls = [ + call + for call in mock_span_fixture.set_attribute.call_args_list + if call.args[0] == 'gen_ai.usage.experimental.reasoning_tokens' + ] + assert len(reasoning_calls) == 0 + + +def test_trace_generate_content_result_with_thinking_tokens(mock_span_fixture): + """Test trace_generate_content_result exports thoughts_token_count.""" + llm_response = LlmResponse( + turn_complete=True, + finish_reason=types.FinishReason.STOP, + usage_metadata=types.GenerateContentResponseUsageMetadata( + total_token_count=110, + prompt_token_count=50, + candidates_token_count=10, + thoughts_token_count=50, + ), + ) + + trace_generate_content_result(mock_span_fixture, llm_response) + + mock_span_fixture.set_attribute.assert_any_call(GEN_AI_USAGE_INPUT_TOKENS, 50) + mock_span_fixture.set_attribute.assert_any_call( + GEN_AI_USAGE_OUTPUT_TOKENS, 10 + ) + mock_span_fixture.set_attribute.assert_any_call( + 'gen_ai.usage.experimental.reasoning_tokens', 50 + ) + + +def test_trace_generate_content_result_without_thinking_tokens( + mock_span_fixture, +): + """Test trace_generate_content_result works when thoughts_token_count is None.""" + llm_response = LlmResponse( + turn_complete=True, + finish_reason=types.FinishReason.STOP, + usage_metadata=types.GenerateContentResponseUsageMetadata( + total_token_count=60, + prompt_token_count=50, + candidates_token_count=10, + ), + ) + + trace_generate_content_result(mock_span_fixture, llm_response) + + mock_span_fixture.set_attribute.assert_any_call(GEN_AI_USAGE_INPUT_TOKENS, 50) + mock_span_fixture.set_attribute.assert_any_call( + GEN_AI_USAGE_OUTPUT_TOKENS, 10 + ) + # Verify reasoning_tokens is NOT set when thoughts_token_count is None + reasoning_calls = [ + call + for call in mock_span_fixture.set_attribute.call_args_list + if call.args[0] == 'gen_ai.usage.experimental.reasoning_tokens' + ] + assert len(reasoning_calls) == 0