From 30523d649f55876f336ef07d7d4a33391ec3acb5 Mon Sep 17 00:00:00 2001 From: Pavan Sudheendra Date: Fri, 6 Feb 2026 08:52:40 +0000 Subject: [PATCH] fix: fix duplicate eval logs & metrics from showing up Skip util-genai scope spans so we don't process already-instrumented spans Signed-off-by: Pavan Sudheendra --- .../processor/langsmith_span_processor.py | 38 +++++++++-------- .../genai/processor/openlit_span_processor.py | 36 ++++++++-------- .../processor/traceloop_span_processor.py | 41 ++++++++----------- 3 files changed, 54 insertions(+), 61 deletions(-) diff --git a/util/opentelemetry-util-genai-langsmith-translator/src/opentelemetry/util/genai/processor/langsmith_span_processor.py b/util/opentelemetry-util-genai-langsmith-translator/src/opentelemetry/util/genai/processor/langsmith_span_processor.py index 91916d20..edeba758 100644 --- a/util/opentelemetry-util-genai-langsmith-translator/src/opentelemetry/util/genai/processor/langsmith_span_processor.py +++ b/util/opentelemetry-util-genai-langsmith-translator/src/opentelemetry/util/genai/processor/langsmith_span_processor.py @@ -483,6 +483,20 @@ def _should_skip_span( if not span or not span.name: return True + # Skip spans created by the util-genai library itself. + # These spans are already properly instrumented and should not be + # processed again by the langsmith translator to avoid duplicate + # evaluations, metrics, and logs. + scope = getattr(span, "instrumentation_scope", None) + scope_name = getattr(scope, "name", "") if scope else "" + if scope_name.startswith("opentelemetry.util.genai"): + _logger.debug( + "[LANGSMITH_PROCESSOR] Skipping util-genai span (scope=%s): %s", + scope_name, + span.name, + ) + return True + # Skip synthetic spans we created (check span ID in set) if span_id and span_id in self._synthetic_span_ids: _logger.debug( @@ -597,6 +611,13 @@ def on_end(self, span: ReadableSpan) -> None: ) # Convert ns to seconds # type: ignore[attr-defined] # Use handler.finish() for full functionality + # This will: + # 1. Set end_time if not set + # 2. Determine sample_for_evaluation + # 3. Call _emitter.on_end() - which handles ReadableSpan gracefully + # 4. Call _notify_completion() - triggers evaluation callbacks + # Note: Do NOT call handler.evaluate_agent() after finish() + # as finish() already triggers evaluations via _notify_completion() try: handler.finish(invocation) _logger.debug( @@ -606,23 +627,6 @@ def on_end(self, span: ReadableSpan) -> None: trace_id, ) - # If this invocation is an AgentInvocation, explicitly - # trigger agent-level evaluations - if isinstance(invocation, AgentInvocation): # type: ignore[attr-defined] - try: - handler.evaluate_agent(invocation) - _logger.debug( - "[LANGSMITH_PROCESSOR] Agent invocation evaluated: %s", - span.name, - ) - except ( - Exception - ) as eval_err: # pragma: no cover - defensive - _logger.warning( - "[LANGSMITH_PROCESSOR] Failed to evaluate AgentInvocation: %s", - eval_err, - ) - except Exception as stop_err: _logger.warning( "[LANGSMITH_PROCESSOR] handler.finish failed: %s", diff --git a/util/opentelemetry-util-genai-openlit-translator/src/opentelemetry/util/genai/processor/openlit_span_processor.py b/util/opentelemetry-util-genai-openlit-translator/src/opentelemetry/util/genai/processor/openlit_span_processor.py index 710e1302..49bcb016 100644 --- a/util/opentelemetry-util-genai-openlit-translator/src/opentelemetry/util/genai/processor/openlit_span_processor.py +++ b/util/opentelemetry-util-genai-openlit-translator/src/opentelemetry/util/genai/processor/openlit_span_processor.py @@ -461,6 +461,20 @@ def _should_skip_span( if not span or not span.name: return True + # Skip spans created by the util-genai library itself. + # These spans are already properly instrumented and should not be + # processed again by the openlit translator to avoid duplicate + # evaluations, metrics, and logs. + scope = getattr(span, "instrumentation_scope", None) + scope_name = getattr(scope, "name", "") if scope else "" + if scope_name.startswith("opentelemetry.util.genai"): + _logger.debug( + "[OPENLIT_PROCESSOR] Skipping util-genai span (scope=%s): %s", + scope_name, + span.name, + ) + return True + # Skip synthetic spans we created (check span ID in set) if span_id and span_id in self._synthetic_span_ids: _logger.debug( @@ -583,6 +597,8 @@ def on_end(self, span: ReadableSpan) -> None: # 2. Determine sample_for_evaluation # 3. Call _emitter.on_end() - which handles ReadableSpan gracefully # 4. Call _notify_completion() - triggers evaluation callbacks + # Note: Do NOT call handler.evaluate_agent() after finish() + # as finish() already triggers evaluations via _notify_completion() try: handler.finish(invocation) _logger.debug( @@ -592,26 +608,6 @@ def on_end(self, span: ReadableSpan) -> None: trace_id, ) - # If this invocation is an AgentInvocation (for example, - # an OpenLit span representing an agent call), explicitly - # trigger agent-level evaluations so that - # gen_ai.evaluation.result events can be attached to the - # agent span itself, mirroring the Traceloop behavior. - if isinstance(invocation, AgentInvocation): # type: ignore[attr-defined] - try: - handler.evaluate_agent(invocation) - _logger.debug( - "[OPENLIT_PROCESSOR] Agent invocation evaluated: %s", - span.name, - ) - except ( - Exception - ) as eval_err: # pragma: no cover - defensive - _logger.warning( - "[OPENLIT_PROCESSOR] Failed to evaluate AgentInvocation: %s", - eval_err, - ) - except Exception as stop_err: _logger.warning( "[OPENLIT_PROCESSOR] handler.finish failed: %s", diff --git a/util/opentelemetry-util-genai-traceloop-translator/src/opentelemetry/util/genai/processor/traceloop_span_processor.py b/util/opentelemetry-util-genai-traceloop-translator/src/opentelemetry/util/genai/processor/traceloop_span_processor.py index 7f53f95e..9fb8e251 100644 --- a/util/opentelemetry-util-genai-traceloop-translator/src/opentelemetry/util/genai/processor/traceloop_span_processor.py +++ b/util/opentelemetry-util-genai-traceloop-translator/src/opentelemetry/util/genai/processor/traceloop_span_processor.py @@ -482,6 +482,20 @@ def _should_skip_span( if not span or not span.name: return True + # Skip spans created by the util-genai library itself. + # These spans are already properly instrumented and should not be + # processed again by the traceloop translator to avoid duplicate + # evaluations, metrics, and logs. + scope = getattr(span, "instrumentation_scope", None) + scope_name = getattr(scope, "name", "") if scope else "" + if scope_name.startswith("opentelemetry.util.genai"): + _logger.debug( + "[TL_PROCESSOR] Skipping util-genai span (scope=%s): %s", + scope_name, + span.name, + ) + return True + # Skip synthetic spans we created (check span ID in set) if span_id and span_id in self._synthetic_span_ids: _logger.debug( @@ -619,6 +633,9 @@ def on_end(self, span: ReadableSpan) -> None: # Close the invocation to trigger core lifecycle handling # This will call the appropriate stop_* method and emit spans/metrics. + # Note: handler.finish() already triggers evaluations via the completion + # callback mechanism (_notify_completion -> on_completion), so no explicit + # evaluate_agent() call is needed for AgentInvocation types. handler = self.telemetry_handler or get_telemetry_handler() try: handler.finish(invocation) @@ -628,25 +645,6 @@ def on_end(self, span: ReadableSpan) -> None: getattr(invocation, "sample_for_evaluation", None), ) - # If this invocation represents an agent call (invoke_agent), - # explicitly trigger agent-level evaluations so that - # gen_ai.evaluation.result events can be attached to the - # agent span itself, in addition to any LLM-level evaluations. - if isinstance(invocation, AgentInvocation): # type: ignore[attr-defined] - try: - handler.evaluate_agent(invocation) - _logger.debug( - "[TL_PROCESSOR] Agent invocation evaluated: %s", - span.name, - ) - except ( - Exception - ) as eval_err: # pragma: no cover - defensive - _logger.warning( - "[TL_PROCESSOR] Failed to evaluate AgentInvocation: %s", - eval_err, - ) - except Exception as stop_err: _logger.warning( "[TL_PROCESSOR] Failed to finish invocation: %s", @@ -778,9 +776,6 @@ def _is_llm_span(self, span: ReadableSpan) -> bool: if span.attributes and "_traceloop_translated" in span.attributes: return False - # CRITICAL: Exclude evaluation-related spans (prevent recursive evaluation) - # Deepeval creates spans like "Run evaluate()", "Bias", "Toxicity", etc. - # These should NEVER be queued for evaluation span_name = span.name or "" for exclude_pattern in _EXCLUDE_SPAN_PATTERNS: if exclude_pattern.lower() in span_name.lower(): @@ -1528,8 +1523,6 @@ def _convert_langchain_to_genai_messages( # Extract content and convert to parts content = getattr(lc_msg, "content", "") - # CRITICAL 1: Check if content is a JSON string with LangChain serialization format - # Basically only use the "content" of the incoming traceloop entity input/output if ( isinstance(content, str) and content.startswith("{")