Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,20 @@ def _should_skip_span(
if not span or not span.name:
return True

# Skip spans created by the util-genai library itself.
# These spans are already properly instrumented and should not be
# processed again by the langsmith translator to avoid duplicate
# evaluations, metrics, and logs.
scope = getattr(span, "instrumentation_scope", None)
scope_name = getattr(scope, "name", "") if scope else ""
if scope_name.startswith("opentelemetry.util.genai"):
_logger.debug(
"[LANGSMITH_PROCESSOR] Skipping util-genai span (scope=%s): %s",
scope_name,
span.name,
)
return True

# Skip synthetic spans we created (check span ID in set)
if span_id and span_id in self._synthetic_span_ids:
_logger.debug(
Expand Down Expand Up @@ -597,6 +611,13 @@ def on_end(self, span: ReadableSpan) -> None:
) # Convert ns to seconds # type: ignore[attr-defined]

# Use handler.finish() for full functionality
# This will:
# 1. Set end_time if not set
# 2. Determine sample_for_evaluation
# 3. Call _emitter.on_end() - which handles ReadableSpan gracefully
# 4. Call _notify_completion() - triggers evaluation callbacks
# Note: Do NOT call handler.evaluate_agent() after finish()
# as finish() already triggers evaluations via _notify_completion()
try:
handler.finish(invocation)
_logger.debug(
Expand All @@ -606,23 +627,6 @@ def on_end(self, span: ReadableSpan) -> None:
trace_id,
)

# If this invocation is an AgentInvocation, explicitly
# trigger agent-level evaluations
if isinstance(invocation, AgentInvocation): # type: ignore[attr-defined]
try:
handler.evaluate_agent(invocation)
_logger.debug(
"[LANGSMITH_PROCESSOR] Agent invocation evaluated: %s",
span.name,
)
except (
Exception
) as eval_err: # pragma: no cover - defensive
_logger.warning(
"[LANGSMITH_PROCESSOR] Failed to evaluate AgentInvocation: %s",
eval_err,
)

except Exception as stop_err:
_logger.warning(
"[LANGSMITH_PROCESSOR] handler.finish failed: %s",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,20 @@ def _should_skip_span(
if not span or not span.name:
return True

# Skip spans created by the util-genai library itself.
# These spans are already properly instrumented and should not be
# processed again by the openlit translator to avoid duplicate
# evaluations, metrics, and logs.
scope = getattr(span, "instrumentation_scope", None)
scope_name = getattr(scope, "name", "") if scope else ""
if scope_name.startswith("opentelemetry.util.genai"):
_logger.debug(
"[OPENLIT_PROCESSOR] Skipping util-genai span (scope=%s): %s",
scope_name,
span.name,
)
return True

# Skip synthetic spans we created (check span ID in set)
if span_id and span_id in self._synthetic_span_ids:
_logger.debug(
Expand Down Expand Up @@ -583,6 +597,8 @@ def on_end(self, span: ReadableSpan) -> None:
# 2. Determine sample_for_evaluation
# 3. Call _emitter.on_end() - which handles ReadableSpan gracefully
# 4. Call _notify_completion() - triggers evaluation callbacks
# Note: Do NOT call handler.evaluate_agent() after finish()
# as finish() already triggers evaluations via _notify_completion()
try:
handler.finish(invocation)
_logger.debug(
Expand All @@ -592,26 +608,6 @@ def on_end(self, span: ReadableSpan) -> None:
trace_id,
)

# If this invocation is an AgentInvocation (for example,
# an OpenLit span representing an agent call), explicitly
# trigger agent-level evaluations so that
# gen_ai.evaluation.result events can be attached to the
# agent span itself, mirroring the Traceloop behavior.
if isinstance(invocation, AgentInvocation): # type: ignore[attr-defined]
try:
handler.evaluate_agent(invocation)
_logger.debug(
"[OPENLIT_PROCESSOR] Agent invocation evaluated: %s",
span.name,
)
except (
Exception
) as eval_err: # pragma: no cover - defensive
_logger.warning(
"[OPENLIT_PROCESSOR] Failed to evaluate AgentInvocation: %s",
eval_err,
)

except Exception as stop_err:
_logger.warning(
"[OPENLIT_PROCESSOR] handler.finish failed: %s",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,20 @@ def _should_skip_span(
if not span or not span.name:
return True

# Skip spans created by the util-genai library itself.
# These spans are already properly instrumented and should not be
# processed again by the traceloop translator to avoid duplicate
# evaluations, metrics, and logs.
scope = getattr(span, "instrumentation_scope", None)
scope_name = getattr(scope, "name", "") if scope else ""
if scope_name.startswith("opentelemetry.util.genai"):
_logger.debug(
"[TL_PROCESSOR] Skipping util-genai span (scope=%s): %s",
scope_name,
span.name,
)
return True

# Skip synthetic spans we created (check span ID in set)
if span_id and span_id in self._synthetic_span_ids:
_logger.debug(
Expand Down Expand Up @@ -619,6 +633,9 @@ def on_end(self, span: ReadableSpan) -> None:

# Close the invocation to trigger core lifecycle handling
# This will call the appropriate stop_* method and emit spans/metrics.
# Note: handler.finish() already triggers evaluations via the completion
# callback mechanism (_notify_completion -> on_completion), so no explicit
# evaluate_agent() call is needed for AgentInvocation types.
handler = self.telemetry_handler or get_telemetry_handler()
try:
handler.finish(invocation)
Expand All @@ -628,25 +645,6 @@ def on_end(self, span: ReadableSpan) -> None:
getattr(invocation, "sample_for_evaluation", None),
)

# If this invocation represents an agent call (invoke_agent),
# explicitly trigger agent-level evaluations so that
# gen_ai.evaluation.result events can be attached to the
# agent span itself, in addition to any LLM-level evaluations.
if isinstance(invocation, AgentInvocation): # type: ignore[attr-defined]
try:
handler.evaluate_agent(invocation)
_logger.debug(
"[TL_PROCESSOR] Agent invocation evaluated: %s",
span.name,
)
except (
Exception
) as eval_err: # pragma: no cover - defensive
_logger.warning(
"[TL_PROCESSOR] Failed to evaluate AgentInvocation: %s",
eval_err,
)

except Exception as stop_err:
_logger.warning(
"[TL_PROCESSOR] Failed to finish invocation: %s",
Expand Down Expand Up @@ -778,9 +776,6 @@ def _is_llm_span(self, span: ReadableSpan) -> bool:
if span.attributes and "_traceloop_translated" in span.attributes:
return False

# CRITICAL: Exclude evaluation-related spans (prevent recursive evaluation)
# Deepeval creates spans like "Run evaluate()", "Bias", "Toxicity", etc.
# These should NEVER be queued for evaluation
span_name = span.name or ""
for exclude_pattern in _EXCLUDE_SPAN_PATTERNS:
if exclude_pattern.lower() in span_name.lower():
Expand Down Expand Up @@ -1528,8 +1523,6 @@ def _convert_langchain_to_genai_messages(
# Extract content and convert to parts
content = getattr(lc_msg, "content", "")

# CRITICAL 1: Check if content is a JSON string with LangChain serialization format
# Basically only use the "content" of the incoming traceloop entity input/output
if (
isinstance(content, str)
and content.startswith("{")
Expand Down