From 58eb627341fdb037753b05db794f32bc3454db09 Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Mon, 23 Mar 2026 13:01:44 -0700 Subject: [PATCH 01/18] Add provider agnostic traceing --- sdks/python/src/agent_control/__init__.py | 14 +++++ .../src/agent_control/telemetry/__init__.py | 27 +++++++++ .../src/agent_control/telemetry/event_sink.py | 33 +++++++++++ .../agent_control/telemetry/trace_context.py | 53 +++++++++++++++++ sdks/python/src/agent_control/tracing.py | 22 ++++++- sdks/python/tests/test_event_sink.py | 59 +++++++++++++++++++ sdks/python/tests/test_trace_context.py | 48 +++++++++++++++ sdks/python/tests/test_tracing.py | 46 +++++++++++++++ 8 files changed, 300 insertions(+), 2 deletions(-) create mode 100644 sdks/python/src/agent_control/telemetry/__init__.py create mode 100644 sdks/python/src/agent_control/telemetry/event_sink.py create mode 100644 sdks/python/src/agent_control/telemetry/trace_context.py create mode 100644 sdks/python/tests/test_event_sink.py create mode 100644 sdks/python/tests/test_trace_context.py diff --git a/sdks/python/src/agent_control/__init__.py b/sdks/python/src/agent_control/__init__.py index 33658fb4..364e76bc 100644 --- a/sdks/python/src/agent_control/__init__.py +++ b/sdks/python/src/agent_control/__init__.py @@ -107,6 +107,14 @@ async def handle_input(user_message: str) -> str: is_otel_available, with_trace, ) +from .telemetry import ( + clear_control_event_sink, + clear_trace_context_provider, + emit_control_events, + get_trace_context_from_provider, + set_control_event_sink, + set_trace_context_provider, +) from .validation import ensure_agent_name # Module logger @@ -1305,6 +1313,12 @@ async def main(): "get_current_span_id", "with_trace", "is_otel_available", + "set_trace_context_provider", + "get_trace_context_from_provider", + "clear_trace_context_provider", + "set_control_event_sink", + "emit_control_events", + "clear_control_event_sink", # Observability "init_observability", "add_event", diff --git a/sdks/python/src/agent_control/telemetry/__init__.py b/sdks/python/src/agent_control/telemetry/__init__.py new file mode 100644 index 00000000..8933553d --- /dev/null +++ b/sdks/python/src/agent_control/telemetry/__init__.py @@ -0,0 +1,27 @@ +"""Telemetry interfaces for provider-agnostic tracing and event emission.""" + +from .event_sink import ( + ControlEventSink, + clear_control_event_sink, + emit_control_events, + set_control_event_sink, +) +from .trace_context import ( + TraceContext, + TraceContextProvider, + clear_trace_context_provider, + get_trace_context_from_provider, + set_trace_context_provider, +) + +__all__ = [ + "ControlEventSink", + "TraceContext", + "TraceContextProvider", + "clear_control_event_sink", + "clear_trace_context_provider", + "emit_control_events", + "get_trace_context_from_provider", + "set_control_event_sink", + "set_trace_context_provider", +] diff --git a/sdks/python/src/agent_control/telemetry/event_sink.py b/sdks/python/src/agent_control/telemetry/event_sink.py new file mode 100644 index 00000000..b36e9c13 --- /dev/null +++ b/sdks/python/src/agent_control/telemetry/event_sink.py @@ -0,0 +1,33 @@ +"""Provider-agnostic sink for merged control execution events.""" + +from collections.abc import Callable + +from agent_control_models import ControlExecutionEvent + +ControlEventSink = Callable[[list[ControlExecutionEvent]], None] + +_control_event_sink: ControlEventSink | None = None + + +def set_control_event_sink(sink: ControlEventSink | None) -> None: + """Register a sink for merged control execution events.""" + global _control_event_sink + _control_event_sink = sink + + +def emit_control_events(events: list[ControlExecutionEvent]) -> None: + """Emit merged control execution events to the registered sink.""" + if not events or _control_event_sink is None: + return + + try: + _control_event_sink(events) + except Exception: + # Sink failures should not break control evaluation. + pass + + +def clear_control_event_sink() -> None: + """Clear the registered control event sink.""" + global _control_event_sink + _control_event_sink = None diff --git a/sdks/python/src/agent_control/telemetry/trace_context.py b/sdks/python/src/agent_control/telemetry/trace_context.py new file mode 100644 index 00000000..82c4326e --- /dev/null +++ b/sdks/python/src/agent_control/telemetry/trace_context.py @@ -0,0 +1,53 @@ +"""Provider-agnostic trace context interface for external tracing systems.""" + +from collections.abc import Callable +from typing import TypedDict + + +class TraceContext(TypedDict): + """Resolved trace context for a control evaluation.""" + + trace_id: str + span_id: str + + +TraceContextProvider = Callable[[], TraceContext | None] + +_trace_context_provider: TraceContextProvider | None = None + + +def set_trace_context_provider(provider: TraceContextProvider | None) -> None: + """Register a provider that returns the current trace context.""" + global _trace_context_provider + _trace_context_provider = provider + + +def get_trace_context_from_provider() -> TraceContext | None: + """Return trace context from the registered provider, if any.""" + if _trace_context_provider is None: + return None + + try: + trace_context = _trace_context_provider() + except Exception: + # Provider failures should not break control evaluation. + return None + + if trace_context is None: + return None + + trace_id = trace_context.get("trace_id") + span_id = trace_context.get("span_id") + if not isinstance(trace_id, str) or not isinstance(span_id, str): + return None + + return { + "trace_id": trace_id, + "span_id": span_id, + } + + +def clear_trace_context_provider() -> None: + """Clear the registered trace context provider.""" + global _trace_context_provider + _trace_context_provider = None diff --git a/sdks/python/src/agent_control/tracing.py b/sdks/python/src/agent_control/tracing.py index 473b5633..47696b15 100644 --- a/sdks/python/src/agent_control/tracing.py +++ b/sdks/python/src/agent_control/tracing.py @@ -31,6 +31,8 @@ from contextlib import contextmanager from contextvars import ContextVar, Token +from .telemetry.trace_context import get_trace_context_from_provider + # Context variables for trace/span propagation _trace_id_var: ContextVar[str | None] = ContextVar("trace_id", default=None) _span_id_var: ContextVar[str | None] = ContextVar("span_id", default=None) @@ -94,8 +96,9 @@ def get_trace_and_span_ids() -> tuple[str, str]: Priority: 1. Context variable (set by with_trace or explicitly) - 2. OpenTelemetry context (if OTEL is installed and active) - 3. Generate new OTEL-compatible IDs + 2. External provider + 3. OpenTelemetry context (if OTEL is installed and active) + 4. Generate new OTEL-compatible IDs Returns: Tuple of (trace_id, span_id) - both are hex strings @@ -114,6 +117,11 @@ def get_trace_and_span_ids() -> tuple[str, str]: if trace_id is not None and span_id is not None: return trace_id, span_id + # Try external provider + trace_context = get_trace_context_from_provider() + if trace_context: + return trace_context["trace_id"], trace_context["span_id"] + # Try OpenTelemetry context otel_trace_id, otel_span_id = _get_otel_ids() @@ -136,6 +144,11 @@ def get_current_trace_id() -> str | None: if trace_id is not None: return trace_id + # Try external provider + trace_context = get_trace_context_from_provider() + if trace_context: + return trace_context["trace_id"] + # Try OpenTelemetry otel_trace_id, _ = _get_otel_ids() return otel_trace_id @@ -153,6 +166,11 @@ def get_current_span_id() -> str | None: if span_id is not None: return span_id + # Try external provider + trace_context = get_trace_context_from_provider() + if trace_context: + return trace_context["span_id"] + # Try OpenTelemetry _, otel_span_id = _get_otel_ids() return otel_span_id diff --git a/sdks/python/tests/test_event_sink.py b/sdks/python/tests/test_event_sink.py new file mode 100644 index 00000000..8013f4d6 --- /dev/null +++ b/sdks/python/tests/test_event_sink.py @@ -0,0 +1,59 @@ +"""Tests for the telemetry merged control event sink interface.""" + +from datetime import UTC, datetime + +from agent_control.telemetry.event_sink import ( + clear_control_event_sink, + emit_control_events, + set_control_event_sink, +) +from agent_control_models import ControlExecutionEvent + + +def _event() -> ControlExecutionEvent: + return ControlExecutionEvent( + control_execution_id="ce-1", + trace_id="a" * 32, + span_id="b" * 16, + agent_name="test-agent", + control_id=1, + control_name="pii_check", + check_stage="pre", + applies_to="llm_call", + action="allow", + matched=False, + confidence=0.95, + timestamp=datetime.now(UTC), + metadata={}, + ) + + +def teardown_function() -> None: + clear_control_event_sink() + + +def test_emit_control_events_calls_registered_sink() -> None: + seen: list[list[ControlExecutionEvent]] = [] + + def _sink(events: list[ControlExecutionEvent]) -> None: + seen.append(events) + + event = _event() + set_control_event_sink(_sink) + + emit_control_events([event]) + + assert seen == [[event]] + + +def test_emit_control_events_noops_without_sink() -> None: + emit_control_events([_event()]) + + +def test_emit_control_events_swallows_sink_failures() -> None: + def _sink(_events: list[ControlExecutionEvent]) -> None: + raise RuntimeError("boom") + + set_control_event_sink(_sink) + + emit_control_events([_event()]) diff --git a/sdks/python/tests/test_trace_context.py b/sdks/python/tests/test_trace_context.py new file mode 100644 index 00000000..9df234c6 --- /dev/null +++ b/sdks/python/tests/test_trace_context.py @@ -0,0 +1,48 @@ +"""Tests for the telemetry trace context provider interface.""" + +from agent_control.telemetry.trace_context import ( + clear_trace_context_provider, + get_trace_context_from_provider, + set_trace_context_provider, +) + + +def teardown_function() -> None: + clear_trace_context_provider() + + +def test_get_trace_context_from_provider_returns_registered_context() -> None: + set_trace_context_provider( + lambda: { + "trace_id": "a" * 32, + "span_id": "b" * 16, + } + ) + + assert get_trace_context_from_provider() == { + "trace_id": "a" * 32, + "span_id": "b" * 16, + } + + +def test_get_trace_context_from_provider_returns_none_when_unset() -> None: + assert get_trace_context_from_provider() is None + + +def test_get_trace_context_from_provider_swallows_provider_failures() -> None: + def _raising_provider(): + raise RuntimeError("boom") + + set_trace_context_provider(_raising_provider) + + assert get_trace_context_from_provider() is None + + +def test_get_trace_context_from_provider_returns_none_for_invalid_shape() -> None: + set_trace_context_provider( # type: ignore[arg-type] + lambda: { + "trace_id": "a" * 32, + } + ) + + assert get_trace_context_from_provider() is None diff --git a/sdks/python/tests/test_tracing.py b/sdks/python/tests/test_tracing.py index 175cb7c4..97397b8d 100644 --- a/sdks/python/tests/test_tracing.py +++ b/sdks/python/tests/test_tracing.py @@ -2,6 +2,7 @@ import pytest +from agent_control.telemetry.trace_context import clear_trace_context_provider, set_trace_context_provider from agent_control.tracing import ( _generate_span_id, _generate_trace_id, @@ -17,6 +18,10 @@ ) +def teardown_function() -> None: + clear_trace_context_provider() + + class TestIdGeneration: """Tests for trace and span ID generation.""" @@ -132,6 +137,30 @@ def test_get_current_ids_without_context(self): assert trace_id is None or isinstance(trace_id, str) assert span_id is None or isinstance(span_id, str) + def test_get_current_trace_id_uses_provider(self): + """Test that get_current_trace_id uses external provider before OTEL fallback.""" + expected_trace = "a" * 32 + set_trace_context_provider( + lambda: { + "trace_id": expected_trace, + "span_id": "b" * 16, + } + ) + + assert get_current_trace_id() == expected_trace + + def test_get_current_span_id_uses_provider(self): + """Test that get_current_span_id uses external provider before OTEL fallback.""" + expected_span = "b" * 16 + set_trace_context_provider( + lambda: { + "trace_id": "a" * 32, + "span_id": expected_span, + } + ) + + assert get_current_span_id() == expected_span + class TestWithTraceContextManager: """Tests for the with_trace context manager.""" @@ -237,6 +266,23 @@ def test_get_trace_and_span_ids_uses_context(self): assert trace_id == expected_trace assert span_id == expected_span + def test_get_trace_and_span_ids_uses_provider_before_otel(self): + """Test that an external provider is checked before OTEL fallback.""" + expected_trace = "c" * 32 + expected_span = "d" * 16 + + set_trace_context_provider( + lambda: { + "trace_id": expected_trace, + "span_id": expected_span, + } + ) + + trace_id, span_id = get_trace_and_span_ids() + + assert trace_id == expected_trace + assert span_id == expected_span + class TestOtelAvailability: """Tests for OpenTelemetry availability detection.""" From 3d39706d53742bb7185ce751d953db4a8be0fa76 Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Mon, 23 Mar 2026 13:38:26 -0700 Subject: [PATCH 02/18] fix linting --- sdks/python/src/agent_control/__init__.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/sdks/python/src/agent_control/__init__.py b/sdks/python/src/agent_control/__init__.py index 364e76bc..c03a3f66 100644 --- a/sdks/python/src/agent_control/__init__.py +++ b/sdks/python/src/agent_control/__init__.py @@ -78,11 +78,7 @@ async def handle_input(user_message: str) -> str: from ._control_registry import ( clear as clear_step_registry, ) - -# Import client and operations modules from .client import AgentControlClient - -# Import control decorator from .control_decorators import ControlSteerError, ControlViolationError, control from .evaluation import check_evaluation_with_local, evaluate_controls from .observability import ( @@ -98,15 +94,6 @@ async def handle_input(user_message: str) -> str: shutdown_observability, sync_shutdown_observability, ) - -# Import tracing and observability -from .tracing import ( - get_current_span_id, - get_current_trace_id, - get_trace_and_span_ids, - is_otel_available, - with_trace, -) from .telemetry import ( clear_control_event_sink, clear_trace_context_provider, @@ -115,6 +102,13 @@ async def handle_input(user_message: str) -> str: set_control_event_sink, set_trace_context_provider, ) +from .tracing import ( + get_current_span_id, + get_current_trace_id, + get_trace_and_span_ids, + is_otel_available, + with_trace, +) from .validation import ensure_agent_name # Module logger From c3241c1c954b09e0e25f412c7396c5d3fdf31c6e Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Mon, 23 Mar 2026 13:50:40 -0700 Subject: [PATCH 03/18] add test --- sdks/python/tests/test_trace_context.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sdks/python/tests/test_trace_context.py b/sdks/python/tests/test_trace_context.py index 9df234c6..e1305711 100644 --- a/sdks/python/tests/test_trace_context.py +++ b/sdks/python/tests/test_trace_context.py @@ -29,6 +29,12 @@ def test_get_trace_context_from_provider_returns_none_when_unset() -> None: assert get_trace_context_from_provider() is None +def test_get_trace_context_from_provider_returns_none_when_provider_returns_none() -> None: + set_trace_context_provider(lambda: None) + + assert get_trace_context_from_provider() is None + + def test_get_trace_context_from_provider_swallows_provider_failures() -> None: def _raising_provider(): raise RuntimeError("boom") From 55e57b594520889957d2d179dd76f05c375ac369 Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Thu, 26 Mar 2026 14:42:22 -0700 Subject: [PATCH 04/18] draft --- models/src/agent_control_models/evaluation.py | 5 + sdks/python/src/agent_control/__init__.py | 2 + sdks/python/src/agent_control/evaluation.py | 115 ++++--- .../src/agent_control/telemetry/__init__.py | 2 + .../src/agent_control/telemetry/event_sink.py | 5 + .../tests/test_observability_updates.py | 303 +++++++++++++----- .../endpoints/evaluation.py | 68 ++-- .../tests/test_evaluation_error_handling.py | 61 +++- 8 files changed, 400 insertions(+), 161 deletions(-) diff --git a/models/src/agent_control_models/evaluation.py b/models/src/agent_control_models/evaluation.py index 07ab4810..8b0dd9c2 100644 --- a/models/src/agent_control_models/evaluation.py +++ b/models/src/agent_control_models/evaluation.py @@ -6,6 +6,7 @@ from .agent import AGENT_NAME_MIN_LENGTH, AGENT_NAME_PATTERN, Step, normalize_agent_name from .base import BaseModel from .controls import ControlMatch +from .observability import ControlExecutionEvent class EvaluationRequest(BaseModel): @@ -127,6 +128,10 @@ class EvaluationResponse(BaseModel): default=None, description="List of controls that were evaluated but did not match (if any)", ) + events: list[ControlExecutionEvent] | None = Field( + default=None, + description="Control execution events produced during evaluation (if any)", + ) class EvaluationResult(EvaluationResponse): diff --git a/sdks/python/src/agent_control/__init__.py b/sdks/python/src/agent_control/__init__.py index c03a3f66..d57ca0d9 100644 --- a/sdks/python/src/agent_control/__init__.py +++ b/sdks/python/src/agent_control/__init__.py @@ -99,6 +99,7 @@ async def handle_input(user_message: str) -> str: clear_trace_context_provider, emit_control_events, get_trace_context_from_provider, + has_control_event_sink, set_control_event_sink, set_trace_context_provider, ) @@ -1311,6 +1312,7 @@ async def main(): "get_trace_context_from_provider", "clear_trace_context_provider", "set_control_event_sink", + "has_control_event_sink", "emit_control_events", "clear_control_event_sink", # Observability diff --git a/sdks/python/src/agent_control/evaluation.py b/sdks/python/src/agent_control/evaluation.py index 55f5efc1..70695b92 100644 --- a/sdks/python/src/agent_control/evaluation.py +++ b/sdks/python/src/agent_control/evaluation.py @@ -20,6 +20,7 @@ from ._state import state from .client import AgentControlClient from .observability import add_event, get_logger, is_observability_enabled +from .telemetry import emit_control_events, has_control_event_sink from .validation import ensure_agent_name _logger = get_logger(__name__) @@ -53,15 +54,15 @@ def _map_applies_to(step_type: str) -> Literal["llm_call", "tool_call"]: return "tool_call" if step_type == "tool" else "llm_call" -def _emit_local_events( +def _build_local_events( local_result: "EvaluationResponse", request: "EvaluationRequest", local_controls: list["_ControlAdapter"], trace_id: str | None, span_id: str | None, agent_name: str | None, -) -> None: - """Emit observability events for locally-evaluated controls. +) -> list[ControlExecutionEvent]: + """Build observability events for locally-evaluated controls. Mirrors the server's _emit_observability_events() so that SDK-evaluated controls are visible in the observability pipeline. @@ -69,11 +70,9 @@ def _emit_local_events( When trace_id/span_id are missing, fallback all-zero IDs are used so events are still recorded (but clearly marked as uncorrelated). - Only runs when observability is enabled. + Returns a list of local events. Fallback IDs are applied when trace context + is missing so the events can still be correlated within the SDK pipeline. """ - if not is_observability_enabled(): - return - global _trace_warning_logged # noqa: PLW0603 if not trace_id or not span_id: if not _trace_warning_logged: @@ -90,8 +89,9 @@ def _emit_local_events( control_lookup = {c.id: c for c in local_controls} now = datetime.now(UTC) resolved_agent_name = agent_name or request.agent_name + events: list[ControlExecutionEvent] = [] - def _emit_matches(matches: list[ControlMatch] | None, matched: bool) -> None: + def _append_matches(matches: list[ControlMatch] | None, matched: bool) -> None: if not matches: return for match in matches: @@ -104,7 +104,7 @@ def _emit_matches(matches: list[ControlMatch] | None, matched: bool) -> None: ctrl.control ) event_metadata.update(identity_metadata) - add_event( + events.append( ControlExecutionEvent( control_execution_id=match.control_execution_id, trace_id=trace_id, @@ -125,9 +125,19 @@ def _emit_matches(matches: list[ControlMatch] | None, matched: bool) -> None: ) ) - _emit_matches(local_result.matches, matched=True) - _emit_matches(local_result.errors, matched=False) - _emit_matches(local_result.non_matches, matched=False) + _append_matches(local_result.matches, matched=True) + _append_matches(local_result.errors, matched=False) + _append_matches(local_result.non_matches, matched=False) + return events + + +def _deliver_oss_events(events: list[ControlExecutionEvent]) -> None: + """Send events through the existing OSS SDK observability path.""" + if not is_observability_enabled(): + return + + for event in events: + add_event(event) async def check_evaluation( @@ -236,6 +246,10 @@ def _merge_results( if local_result.non_matches or server_result.non_matches: non_matches = (local_result.non_matches or []) + (server_result.non_matches or []) + events: list[ControlExecutionEvent] | None = None + if local_result.events or server_result.events: + events = (local_result.events or []) + (server_result.events or []) + reason = None if local_result.reason and server_result.reason: reason = f"{local_result.reason}; {server_result.reason}" @@ -251,6 +265,7 @@ def _merge_results( matches=matches if matches else None, errors=errors if errors else None, non_matches=non_matches if non_matches else None, + events=events if events else None, ) @@ -366,16 +381,14 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: if not parse_errors: return result combined_errors = (result.errors or []) + parse_errors - return EvaluationResult( - is_safe=result.is_safe, - confidence=result.confidence, - reason=result.reason, - matches=result.matches, - errors=combined_errors, - non_matches=result.non_matches, + return result.model_copy( + update={ + "errors": combined_errors, + } ) local_result: EvaluationResponse | None = None + merged_emission_enabled = has_control_event_sink() applicable_local_controls = _get_applicable_controls( local_controls, request, @@ -385,7 +398,7 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: engine = ControlEngine(applicable_local_controls, context="sdk") local_result = await engine.process(request) - _emit_local_events( + local_events = _build_local_events( local_result, request, applicable_local_controls, @@ -393,18 +406,16 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: span_id, agent_name=event_agent_name, ) + local_result = local_result.model_copy(update={"events": local_events or None}) + + if not merged_emission_enabled: + _deliver_oss_events(local_events) if not local_result.is_safe: - return _with_parse_errors( - EvaluationResult( - is_safe=local_result.is_safe, - confidence=local_result.confidence, - reason=local_result.reason, - matches=local_result.matches, - errors=local_result.errors, - non_matches=local_result.non_matches, - ) - ) + result = _with_parse_errors(EvaluationResult.model_validate(local_result.model_dump())) + if merged_emission_enabled and result.events: + emit_control_events(result.events) + return result if _has_applicable_prefiltered_server_controls(server_control_payloads, request): request_payload = request.model_dump(mode="json", exclude_none=True) @@ -413,6 +424,8 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: headers["X-Trace-Id"] = trace_id if span_id: headers["X-Span-Id"] = span_id + if merged_emission_enabled: + headers["X-Agent-Control-Merge-Events"] = "true" response = await client.http_client.post( "/api/v1/evaluation", @@ -423,32 +436,28 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: server_result = EvaluationResponse.model_validate(response.json()) if local_result is not None: - return _with_parse_errors(_merge_results(local_result, server_result)) - - return _with_parse_errors( - EvaluationResult( - is_safe=server_result.is_safe, - confidence=server_result.confidence, - reason=server_result.reason, - matches=server_result.matches, - errors=server_result.errors, - non_matches=server_result.non_matches, - ) - ) + result = _with_parse_errors(_merge_results(local_result, server_result)) + if merged_emission_enabled and result.events: + emit_control_events(result.events) + return result - if local_result is not None: - return _with_parse_errors( - EvaluationResult( - is_safe=local_result.is_safe, - confidence=local_result.confidence, - reason=local_result.reason, - matches=local_result.matches, - errors=local_result.errors, - non_matches=local_result.non_matches, - ) + result = _with_parse_errors( + EvaluationResult.model_validate(server_result.model_dump()) ) + if merged_emission_enabled and result.events: + emit_control_events(result.events) + return result - return _with_parse_errors(EvaluationResult(is_safe=True, confidence=1.0)) + if local_result is not None: + result = _with_parse_errors(EvaluationResult.model_validate(local_result.model_dump())) + if merged_emission_enabled and result.events: + emit_control_events(result.events) + return result + + result = _with_parse_errors(EvaluationResult(is_safe=True, confidence=1.0)) + if merged_emission_enabled and result.events: + emit_control_events(result.events) + return result async def evaluate_controls( diff --git a/sdks/python/src/agent_control/telemetry/__init__.py b/sdks/python/src/agent_control/telemetry/__init__.py index 8933553d..6e40b8a2 100644 --- a/sdks/python/src/agent_control/telemetry/__init__.py +++ b/sdks/python/src/agent_control/telemetry/__init__.py @@ -4,6 +4,7 @@ ControlEventSink, clear_control_event_sink, emit_control_events, + has_control_event_sink, set_control_event_sink, ) from .trace_context import ( @@ -22,6 +23,7 @@ "clear_trace_context_provider", "emit_control_events", "get_trace_context_from_provider", + "has_control_event_sink", "set_control_event_sink", "set_trace_context_provider", ] diff --git a/sdks/python/src/agent_control/telemetry/event_sink.py b/sdks/python/src/agent_control/telemetry/event_sink.py index b36e9c13..19062604 100644 --- a/sdks/python/src/agent_control/telemetry/event_sink.py +++ b/sdks/python/src/agent_control/telemetry/event_sink.py @@ -27,6 +27,11 @@ def emit_control_events(events: list[ControlExecutionEvent]) -> None: pass +def has_control_event_sink() -> bool: + """Return whether a merged control event sink is currently registered.""" + return _control_event_sink is not None + + def clear_control_event_sink() -> None: """Clear the registered control event sink.""" global _control_event_sink diff --git a/sdks/python/tests/test_observability_updates.py b/sdks/python/tests/test_observability_updates.py index cdaaa6ce..b3e654b8 100644 --- a/sdks/python/tests/test_observability_updates.py +++ b/sdks/python/tests/test_observability_updates.py @@ -6,7 +6,8 @@ from agent_control import evaluation from agent_control.evaluation import ( _ControlAdapter, - _emit_local_events, + _build_local_events, + _deliver_oss_events, _map_applies_to, _merge_results, ) @@ -103,14 +104,49 @@ def test_still_combines_matches_and_errors(self): assert len(result.matches) == 2 assert len(result.errors) == 1 + def test_combines_events(self): + from agent_control_models import ControlExecutionEvent + + ev1 = ControlExecutionEvent( + trace_id="a" * 32, + span_id="b" * 16, + agent_name="agent-000000000001", + control_id=1, + control_name="ctrl-1", + check_stage="pre", + applies_to="llm_call", + action="allow", + matched=False, + confidence=1.0, + ) + ev2 = ControlExecutionEvent( + trace_id="c" * 32, + span_id="d" * 16, + agent_name="agent-000000000001", + control_id=2, + control_name="ctrl-2", + check_stage="pre", + applies_to="llm_call", + action="deny", + matched=True, + confidence=1.0, + ) + + local = self._make_response(events=[ev1]) + server = self._make_response(events=[ev2]) + + result = _merge_results(local, server) + assert result.events is not None + assert [event.control_id for event in result.events] == [1, 2] + # ============================================================================= -# _emit_local_events tests +# local event build/delivery tests # ============================================================================= class TestEmitLocalEvents: - """Tests for _emit_local_events helper.""" + """Tests for local event build/delivery helpers.""" def _make_control_adapter(self, id, name, evaluator_name="regex", selector_path="input"): """Create a _ControlAdapter for testing.""" @@ -153,75 +189,97 @@ def _make_request(self, step_type="llm"): stage="pre", ) - def test_emits_events_when_observability_enabled(self): - """Should call add_event for each match/error/non_match.""" - from agent_control.evaluation import _emit_local_events - + def test_builds_events(self): + """Should build one event per match/error/non_match.""" ctrl = self._make_control_adapter(1, "ctrl-1") match = self._make_match(1, "ctrl-1") non_match = self._make_match(2, "ctrl-2", matched=False) response = self._make_response(matches=[match], non_matches=[non_match]) request = self._make_request() + events = _build_local_events( + response, + request, + [ctrl, self._make_control_adapter(2, "ctrl-2")], + "trace123", + "span456", + "test-agent", + ) + assert len(events) == 2 + event = events[0] + assert event.trace_id == "trace123" + assert event.span_id == "span456" + assert event.agent_name == "test-agent" + assert event.matched is True + assert event.evaluator_name == "regex" + assert event.selector_path == "input" + + def test_delivers_events_when_observability_enabled(self): + """Should call add_event for each built event when OSS delivery is enabled.""" + from agent_control_models import ControlExecutionEvent + + built_events = [ + ControlExecutionEvent( + trace_id="a" * 32, + span_id="b" * 16, + agent_name="agent-000000000001", + control_id=1, + control_name="ctrl-1", + check_stage="pre", + applies_to="llm_call", + action="allow", + matched=False, + confidence=1.0, + ) + ] + with patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ patch("agent_control.evaluation.add_event") as mock_add: - _emit_local_events( - response, request, - [ctrl, self._make_control_adapter(2, "ctrl-2")], - "trace123", "span456", "test-agent", - ) - assert mock_add.call_count == 2 - # Verify event fields for the match - event = mock_add.call_args_list[0][0][0] - assert event.trace_id == "trace123" - assert event.span_id == "span456" - assert event.agent_name == "test-agent" - assert event.matched is True - assert event.evaluator_name == "regex" - assert event.selector_path == "input" - - def test_skips_when_observability_disabled(self): + _deliver_oss_events(built_events) + mock_add.assert_called_once_with(built_events[0]) + + def test_skips_delivery_when_observability_disabled(self): """Should not call add_event when observability is disabled.""" - from agent_control.evaluation import _emit_local_events + from agent_control_models import ControlExecutionEvent - ctrl = self._make_control_adapter(1, "ctrl-1") - match = self._make_match(1, "ctrl-1") - response = self._make_response(matches=[match]) - request = self._make_request() + built_events = [ + ControlExecutionEvent( + trace_id="a" * 32, + span_id="b" * 16, + agent_name="agent-000000000001", + control_id=1, + control_name="ctrl-1", + check_stage="pre", + applies_to="llm_call", + action="allow", + matched=False, + confidence=1.0, + ) + ] with patch("agent_control.evaluation.is_observability_enabled", return_value=False), \ patch("agent_control.evaluation.add_event") as mock_add: - _emit_local_events( - response, request, [ctrl], - "trace123", "span456", "test-agent", - ) + _deliver_oss_events(built_events) mock_add.assert_not_called() def test_maps_tool_step_to_tool_call(self): """Should set applies_to='tool_call' for tool steps.""" - from agent_control.evaluation import _emit_local_events - ctrl = self._make_control_adapter(1, "ctrl-1") match = self._make_match(1, "ctrl-1") response = self._make_response(matches=[match]) request = self._make_request(step_type="tool") - with patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ - patch("agent_control.evaluation.add_event") as mock_add: - _emit_local_events( - response, request, [ctrl], - "trace123", "span456", "test-agent", - ) - event = mock_add.call_args_list[0][0][0] - assert event.applies_to == "tool_call" + built_events = _build_local_events( + response, request, [ctrl], "trace123", "span456", "test-agent" + ) + assert built_events[0].applies_to == "tool_call" def test_uses_fallback_ids_when_trace_context_missing(self): - """Should emit events with all-zero fallback IDs when trace context is absent.""" + """Should build events with all-zero fallback IDs when trace context is absent.""" import agent_control.evaluation as eval_mod from agent_control.evaluation import ( _FALLBACK_SPAN_ID, _FALLBACK_TRACE_ID, - _emit_local_events, ) ctrl = self._make_control_adapter(1, "ctrl-1") @@ -232,15 +290,12 @@ def test_uses_fallback_ids_when_trace_context_missing(self): # Reset the once-only warning flag so the warning fires in this test eval_mod._trace_warning_logged = False - with patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ - patch("agent_control.evaluation.add_event") as mock_add, \ - patch("agent_control.evaluation._logger") as mock_logger: - _emit_local_events( - response, request, [ctrl], - None, None, "test-agent", + with patch("agent_control.evaluation._logger") as mock_logger: + built_events = _build_local_events( + response, request, [ctrl], None, None, "test-agent" ) - assert mock_add.call_count == 1 - event = mock_add.call_args_list[0][0][0] + assert len(built_events) == 1 + event = built_events[0] assert event.trace_id == _FALLBACK_TRACE_ID assert event.span_id == _FALLBACK_SPAN_ID assert event.trace_id == "0" * 32 @@ -277,9 +332,7 @@ def test_composite_control_emits_representative_leaf_metadata(self): request = self._make_request() # When: emitting local observability events - with patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ - patch("agent_control.evaluation.add_event") as mock_add: - _emit_local_events( + built_events = _build_local_events( response, request, [ctrl], @@ -287,7 +340,7 @@ def test_composite_control_emits_representative_leaf_metadata(self): "span456", "test-agent", ) - event = mock_add.call_args_list[0][0][0] + event = built_events[0] # Then: the first leaf becomes the event identity and full context is preserved assert event.evaluator_name == "regex" @@ -301,8 +354,6 @@ def test_composite_control_emits_representative_leaf_metadata(self): def test_fallback_warning_logged_only_once(self): """The missing-trace-context warning should fire only on the first call.""" import agent_control.evaluation as eval_mod - from agent_control.evaluation import _emit_local_events - ctrl = self._make_control_adapter(1, "ctrl-1") match = self._make_match(1, "ctrl-1") response = self._make_response(matches=[match]) @@ -310,11 +361,9 @@ def test_fallback_warning_logged_only_once(self): eval_mod._trace_warning_logged = False - with patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ - patch("agent_control.evaluation.add_event"), \ - patch("agent_control.evaluation._logger") as mock_logger: - _emit_local_events(response, request, [ctrl], None, None, "agent-test-a1") - _emit_local_events(response, request, [ctrl], None, None, "agent-test-a1") + with patch("agent_control.evaluation._logger") as mock_logger: + _build_local_events(response, request, [ctrl], None, None, "agent-test-a1") + _build_local_events(response, request, [ctrl], None, None, "agent-test-a1") assert mock_logger.warning.call_count == 1 @@ -373,7 +422,7 @@ async def test_emits_events_when_trace_context_provided(self): with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ - patch("agent_control.evaluation._emit_local_events") as mock_emit: + patch("agent_control.evaluation._deliver_oss_events") as mock_deliver: result = await evaluation.check_evaluation_with_local( client=client, agent_name="agent-000000000001", @@ -385,16 +434,15 @@ async def test_emits_events_when_trace_context_provided(self): event_agent_name="test-agent", ) - mock_emit.assert_called_once() - call_args = mock_emit.call_args - assert call_args[0][2] is not None # local_controls - assert call_args[0][3] == "abc123" # trace_id - assert call_args[0][4] == "def456" # span_id - assert call_args.kwargs["agent_name"] == "test-agent" + mock_deliver.assert_called_once() # Also verify non_matches propagated assert result.non_matches is not None assert len(result.non_matches) == 1 + assert result.events is not None + assert len(result.events) == 1 + assert result.events[0].trace_id == "abc123" + assert result.events[0].span_id == "def456" @pytest.mark.asyncio async def test_emits_events_without_trace_context(self): @@ -427,8 +475,8 @@ async def test_emits_events_without_trace_context(self): with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ - patch("agent_control.evaluation._emit_local_events") as mock_emit: - await evaluation.check_evaluation_with_local( + patch("agent_control.evaluation._deliver_oss_events") as mock_deliver: + result = await evaluation.check_evaluation_with_local( client=client, agent_name="agent-000000000001", step=step, @@ -436,10 +484,10 @@ async def test_emits_events_without_trace_context(self): controls=controls, # No trace_id/span_id ) - mock_emit.assert_called_once() - call_args = mock_emit.call_args - assert call_args[0][3] is None # trace_id passed as None - assert call_args[0][4] is None # span_id passed as None + mock_deliver.assert_called_once() + assert result.events is not None + assert result.events[0].trace_id == "0" * 32 + assert result.events[0].span_id == "0" * 16 @pytest.mark.asyncio async def test_forwards_trace_headers_to_server(self): @@ -492,6 +540,109 @@ async def test_forwards_trace_headers_to_server(self): assert headers["X-Trace-Id"] == "aaaa1111bbbb2222cccc3333dddd4444" assert headers["X-Span-Id"] == "eeee5555ffff6666" + @pytest.mark.asyncio + async def test_merged_event_sink_emits_once_after_merge(self): + """When a sink is registered, local/server events should merge and emit once.""" + from agent_control_models import ( + ControlExecutionEvent, + ControlMatch, + EvaluationResponse, + EvaluatorResult, + Step, + ) + + local_response = EvaluationResponse( + is_safe=True, + confidence=1.0, + matches=[ + ControlMatch( + control_id=1, + control_name="local-ctrl", + action="allow", + result=EvaluatorResult(matched=False, confidence=0.8), + ) + ], + ) + server_event = ControlExecutionEvent( + trace_id="a" * 32, + span_id="b" * 16, + agent_name="agent-000000000001", + control_id=2, + control_name="server-ctrl", + check_stage="pre", + applies_to="llm_call", + action="allow", + matched=False, + confidence=0.4, + ) + mock_http_response = MagicMock() + mock_http_response.raise_for_status = MagicMock() + mock_http_response.json.return_value = { + "is_safe": True, + "confidence": 0.9, + "matches": None, + "errors": None, + "non_matches": None, + "events": [server_event.model_dump(mode="json")], + } + + controls = [ + { + "id": 1, + "name": "local-ctrl", + "control": { + "condition": { + "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "selector": {"path": "input"}, + }, + "action": {"decision": "allow"}, + "execution": "sdk", + }, + }, + { + "id": 2, + "name": "server-ctrl", + "control": { + "condition": { + "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "selector": {"path": "input"}, + }, + "action": {"decision": "allow"}, + "execution": "server", + }, + }, + ] + + mock_engine = MagicMock() + mock_engine.process = AsyncMock(return_value=local_response) + client = MagicMock() + client.http_client = AsyncMock() + client.http_client.post = AsyncMock(return_value=mock_http_response) + step = Step(type="llm", name="test-step", input="hello") + + with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ + patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ + patch("agent_control.evaluation.has_control_event_sink", return_value=True), \ + patch("agent_control.evaluation.emit_control_events") as mock_emit, \ + patch("agent_control.evaluation.add_event") as mock_add: + result = await evaluation.check_evaluation_with_local( + client=client, + agent_name="agent-000000000001", + step=step, + stage="pre", + controls=controls, + ) + + mock_add.assert_not_called() + mock_emit.assert_called_once() + merged_events = mock_emit.call_args.args[0] + assert len(merged_events) == 2 + assert {event.control_id for event in merged_events} == {1, 2} + headers = client.http_client.post.call_args.kwargs["headers"] + assert headers["X-Agent-Control-Merge-Events"] == "true" + assert result.events is not None + assert len(result.events) == 2 + # ============================================================================= # control_decorators non_matches dict conversion diff --git a/server/src/agent_control_server/endpoints/evaluation.py b/server/src/agent_control_server/endpoints/evaluation.py index c92ea315..128c9e08 100644 --- a/server/src/agent_control_server/endpoints/evaluation.py +++ b/server/src/agent_control_server/endpoints/evaluation.py @@ -156,6 +156,7 @@ async def evaluate( db: AsyncSession = Depends(get_async_db), x_trace_id: str | None = Header(default=None, alias="X-Trace-Id"), x_span_id: str | None = Header(default=None, alias="X-Span-Id"), + x_merge_events: str | None = Header(default=None, alias="X-Agent-Control-Merge-Events"), ) -> EvaluationResponse: """Analyze content for safety and control violations. @@ -238,30 +239,35 @@ async def evaluate( # Calculate total execution time total_duration_ms = (time.perf_counter() - start_time) * 1000 - # Emit observability events if enabled - if observability_settings.enabled: + merge_events_requested = (x_merge_events or "").lower() == "true" + response_events = _build_observability_events( + response=raw_response, + request=request, + trace_id=trace_id, + span_id=span_id, + agent_name=agent_name, + applies_to=applies_to, + control_lookup=control_lookup, + total_duration_ms=total_duration_ms, + ) + + # OSS keeps server-side ingestion as the default. Enterprise merged mode + # returns events to the SDK and skips this server-side delivery step. + if observability_settings.enabled and not merge_events_requested: # Get ingestor from app.state (None if not initialized) try: ingestor = get_event_ingestor(req) except RuntimeError: ingestor = None + await _ingest_observability_events(response_events, ingestor) - await _emit_observability_events( - response=raw_response, - request=request, - trace_id=trace_id, - span_id=span_id, - agent_name=agent_name, - applies_to=applies_to, - control_lookup=control_lookup, - total_duration_ms=total_duration_ms, - ingestor=ingestor, - ) - - return _sanitize_evaluation_response(raw_response) + sanitized = _sanitize_evaluation_response(raw_response) + if response_events: + sanitized = sanitized.model_copy(update={"events": response_events}) + return sanitized -async def _emit_observability_events( +def _build_observability_events( response: EvaluationResponse, request: EvaluationRequest, trace_id: str, @@ -270,9 +276,8 @@ async def _emit_observability_events( applies_to: Literal["llm_call", "tool_call"], control_lookup: dict, total_duration_ms: float, - ingestor: EventIngestor | None, -) -> None: - """Create and enqueue observability events for all evaluated controls. + ) -> list[ControlExecutionEvent]: + """Create observability events for all evaluated controls. Uses control_execution_id from the engine response to ensure correlation between SDK logs and server observability events. @@ -379,11 +384,20 @@ async def _emit_observability_events( ) ) - # Ingest events - if events and ingestor: - result = await ingestor.ingest(events) - if result.dropped > 0: - _logger.warning( - f"Dropped {result.dropped} observability events, " - f"processed {result.processed}" - ) + return events + + +async def _ingest_observability_events( + events: list[ControlExecutionEvent], + ingestor: EventIngestor | None, +) -> None: + """Ingest server-side observability events when OSS batching is active.""" + if not events or ingestor is None: + return + + result = await ingestor.ingest(events) + if result.dropped > 0: + _logger.warning( + f"Dropped {result.dropped} observability events, " + f"processed {result.processed}" + ) diff --git a/server/tests/test_evaluation_error_handling.py b/server/tests/test_evaluation_error_handling.py index 942dca66..a5abdb00 100644 --- a/server/tests/test_evaluation_error_handling.py +++ b/server/tests/test_evaluation_error_handling.py @@ -3,7 +3,13 @@ import uuid from unittest.mock import AsyncMock, MagicMock -from agent_control_models import ControlMatch, EvaluationRequest, EvaluatorResult, Step +from agent_control_models import ( + ControlExecutionEvent, + ControlMatch, + EvaluationRequest, + EvaluatorResult, + Step, +) from fastapi.testclient import TestClient from agent_control_server.endpoints.evaluation import ( @@ -198,8 +204,10 @@ def test_evaluation_observability_receives_raw_errors_while_api_response_is_sani lambda _config: mock_evaluator, ) - emit_mock = AsyncMock() - monkeypatch.setattr(evaluation_module, "_emit_observability_events", emit_mock) + build_mock = MagicMock(return_value=[]) + ingest_mock = AsyncMock() + monkeypatch.setattr(evaluation_module, "_build_observability_events", build_mock) + monkeypatch.setattr(evaluation_module, "_ingest_observability_events", ingest_mock) monkeypatch.setattr(evaluation_module.observability_settings, "enabled", True) # When: sending an evaluation request @@ -220,8 +228,8 @@ def test_evaluation_observability_receives_raw_errors_while_api_response_is_sani assert data["errors"][0]["result"]["error"] == SAFE_EVALUATOR_ERROR # And: observability receives the raw engine response with unsanitized diagnostics - emit_mock.assert_awaited_once() - raw_response = emit_mock.await_args.kwargs["response"] + build_mock.assert_called_once() + raw_response = build_mock.call_args.kwargs["response"] assert raw_response.errors is not None raw_error = raw_response.errors[0] assert raw_error.control_name == control_name @@ -229,6 +237,7 @@ def test_evaluation_observability_receives_raw_errors_while_api_response_is_sani raw_trace = raw_error.result.metadata["condition_trace"] assert raw_trace["error"] == "RuntimeError: Simulated evaluator crash" assert raw_trace["message"] == "Evaluation failed: RuntimeError: Simulated evaluator crash" + ingest_mock.assert_awaited_once() def test_sanitize_control_match_redacts_nested_condition_trace_errors() -> None: @@ -372,3 +381,45 @@ async def ingest(self, events): # type: ignore[no-untyped-def] del app.state.event_ingestor else: app.state.event_ingestor = previous_ingestor + + +def test_evaluation_returns_events_and_skips_ingest_for_merge_mode( + client: TestClient, monkeypatch +) -> None: + """Merged-event mode should return events without ingesting them server-side.""" + agent_name, _ = create_and_assign_policy(client) + + import agent_control_server.endpoints.evaluation as evaluation_module + + event = ControlExecutionEvent( + trace_id="a" * 32, + span_id="b" * 16, + agent_name=agent_name, + control_id=1, + control_name="test-control", + check_stage="pre", + applies_to="llm_call", + action="deny", + matched=True, + confidence=0.9, + ) + build_mock = MagicMock(return_value=[event]) + ingest_mock = AsyncMock() + monkeypatch.setattr(evaluation_module, "_build_observability_events", build_mock) + monkeypatch.setattr(evaluation_module, "_ingest_observability_events", ingest_mock) + monkeypatch.setattr(evaluation_module.observability_settings, "enabled", True) + + payload = Step(type="llm", name="test-step", input="x", output=None) + req = EvaluationRequest(agent_name=agent_name, step=payload, stage="pre") + resp = client.post( + "/api/v1/evaluation", + json=req.model_dump(mode="json"), + headers={"X-Agent-Control-Merge-Events": "true"}, + ) + + assert resp.status_code == 200 + body = resp.json() + assert body["events"] is not None + assert len(body["events"]) == 1 + assert body["events"][0]["control_execution_id"] == event.control_execution_id + ingest_mock.assert_not_awaited() From 61cd78875e531a0de12298295c92bdf7c328e641 Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Thu, 26 Mar 2026 14:52:12 -0700 Subject: [PATCH 05/18] address comments --- sdks/python/src/agent_control/evaluation.py | 20 ++++-- .../agent_control/telemetry/trace_context.py | 2 + .../tests/test_observability_updates.py | 72 ++++++++++++++++++- sdks/python/tests/test_trace_context.py | 11 +++ 4 files changed, 96 insertions(+), 9 deletions(-) diff --git a/sdks/python/src/agent_control/evaluation.py b/sdks/python/src/agent_control/evaluation.py index 55f5efc1..d76af177 100644 --- a/sdks/python/src/agent_control/evaluation.py +++ b/sdks/python/src/agent_control/evaluation.py @@ -20,6 +20,7 @@ from ._state import state from .client import AgentControlClient from .observability import add_event, get_logger, is_observability_enabled +from .tracing import get_trace_and_span_ids from .validation import ensure_agent_name _logger = get_logger(__name__) @@ -291,6 +292,13 @@ async def check_evaluation_with_local( httpx.HTTPError: If server request fails """ normalized_name = ensure_agent_name(agent_name) + resolved_trace_id = trace_id + resolved_span_id = span_id + if trace_id is None or span_id is None: + current_trace_id, current_span_id = get_trace_and_span_ids() + resolved_trace_id = trace_id or current_trace_id + resolved_span_id = span_id or current_span_id + # Partition controls by local flag local_controls: list[_ControlAdapter] = [] parse_errors: list[ControlMatch] = [] @@ -389,8 +397,8 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: local_result, request, applicable_local_controls, - trace_id, - span_id, + resolved_trace_id, + resolved_span_id, agent_name=event_agent_name, ) @@ -409,10 +417,10 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: if _has_applicable_prefiltered_server_controls(server_control_payloads, request): request_payload = request.model_dump(mode="json", exclude_none=True) headers: dict[str, str] = {} - if trace_id: - headers["X-Trace-Id"] = trace_id - if span_id: - headers["X-Span-Id"] = span_id + if resolved_trace_id: + headers["X-Trace-Id"] = resolved_trace_id + if resolved_span_id: + headers["X-Span-Id"] = resolved_span_id response = await client.http_client.post( "/api/v1/evaluation", diff --git a/sdks/python/src/agent_control/telemetry/trace_context.py b/sdks/python/src/agent_control/telemetry/trace_context.py index 82c4326e..a871fb29 100644 --- a/sdks/python/src/agent_control/telemetry/trace_context.py +++ b/sdks/python/src/agent_control/telemetry/trace_context.py @@ -40,6 +40,8 @@ def get_trace_context_from_provider() -> TraceContext | None: span_id = trace_context.get("span_id") if not isinstance(trace_id, str) or not isinstance(span_id, str): return None + if not trace_id or not span_id: + return None return { "trace_id": trace_id, diff --git a/sdks/python/tests/test_observability_updates.py b/sdks/python/tests/test_observability_updates.py index cdaaa6ce..bb11a5ae 100644 --- a/sdks/python/tests/test_observability_updates.py +++ b/sdks/python/tests/test_observability_updates.py @@ -10,6 +10,10 @@ _map_applies_to, _merge_results, ) +from agent_control.telemetry.trace_context import ( + clear_trace_context_provider, + set_trace_context_provider, +) from agent_control_models import ControlDefinition # ============================================================================= @@ -326,6 +330,9 @@ def test_fallback_warning_logged_only_once(self): class TestCheckEvaluationWithLocal: """Tests for check_evaluation_with_local event emission and non_matches.""" + def teardown_method(self) -> None: + clear_trace_context_provider() + @pytest.mark.asyncio async def test_emits_events_when_trace_context_provided(self): """Should emit observability events when trace_id and span_id are passed.""" @@ -398,7 +405,7 @@ async def test_emits_events_when_trace_context_provided(self): @pytest.mark.asyncio async def test_emits_events_without_trace_context(self): - """Should still emit events when trace_id/span_id not provided (fallback IDs).""" + """Should resolve trace context from the provider when IDs are omitted.""" from agent_control_models import EvaluationResponse, Step mock_response = EvaluationResponse( @@ -424,6 +431,12 @@ async def test_emits_events_without_trace_context(self): client = MagicMock() client.http_client = AsyncMock() step = Step(type="llm", name="test-step", input="hello") + set_trace_context_provider( + lambda: { + "trace_id": "a" * 32, + "span_id": "b" * 16, + } + ) with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ @@ -438,8 +451,8 @@ async def test_emits_events_without_trace_context(self): ) mock_emit.assert_called_once() call_args = mock_emit.call_args - assert call_args[0][3] is None # trace_id passed as None - assert call_args[0][4] is None # span_id passed as None + assert call_args[0][3] == "a" * 32 + assert call_args[0][4] == "b" * 16 @pytest.mark.asyncio async def test_forwards_trace_headers_to_server(self): @@ -492,6 +505,59 @@ async def test_forwards_trace_headers_to_server(self): assert headers["X-Trace-Id"] == "aaaa1111bbbb2222cccc3333dddd4444" assert headers["X-Span-Id"] == "eeee5555ffff6666" + @pytest.mark.asyncio + async def test_forwards_provider_trace_headers_to_server_when_ids_omitted(self): + """Server POST should resolve trace headers from the provider when omitted.""" + from agent_control_models import Step + + controls = [{ + "id": 1, + "name": "server-ctrl", + "control": { + "condition": { + "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "selector": {"path": "input"}, + }, + "action": {"decision": "deny"}, + "execution": "server", + }, + }] + + mock_http_response = MagicMock() + mock_http_response.json.return_value = { + "is_safe": True, + "confidence": 1.0, + "matches": None, + "errors": None, + "non_matches": None, + } + mock_http_response.raise_for_status = MagicMock() + + client = MagicMock() + client.http_client = AsyncMock() + client.http_client.post = AsyncMock(return_value=mock_http_response) + step = Step(type="llm", name="test-step", input="hello") + set_trace_context_provider( + lambda: { + "trace_id": "c" * 32, + "span_id": "d" * 16, + } + ) + + with patch("agent_control.evaluation.list_evaluators", return_value=["regex"]): + await evaluation.check_evaluation_with_local( + client=client, + agent_name="agent-000000000001", + step=step, + stage="pre", + controls=controls, + ) + + call_kwargs = client.http_client.post.call_args + headers = call_kwargs.kwargs.get("headers", {}) + assert headers["X-Trace-Id"] == "c" * 32 + assert headers["X-Span-Id"] == "d" * 16 + # ============================================================================= # control_decorators non_matches dict conversion diff --git a/sdks/python/tests/test_trace_context.py b/sdks/python/tests/test_trace_context.py index e1305711..f08306e0 100644 --- a/sdks/python/tests/test_trace_context.py +++ b/sdks/python/tests/test_trace_context.py @@ -52,3 +52,14 @@ def test_get_trace_context_from_provider_returns_none_for_invalid_shape() -> Non ) assert get_trace_context_from_provider() is None + + +def test_get_trace_context_from_provider_returns_none_for_empty_ids() -> None: + set_trace_context_provider( + lambda: { + "trace_id": "", + "span_id": "", + } + ) + + assert get_trace_context_from_provider() is None From ff1e3440c3ac7bce17bbf085b396b10de6b0543e Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Tue, 31 Mar 2026 12:27:28 -0700 Subject: [PATCH 06/18] update docstring --- sdks/python/src/agent_control/evaluation.py | 66 +++++++++++++++++-- .../src/agent_control/evaluation_events.py | 37 ++++++++++- .../src/agent_control/telemetry/event_sink.py | 33 +++++++++- .../tests/test_observability_updates.py | 35 ++++++---- .../endpoints/evaluation.py | 47 +++++++++++-- 5 files changed, 190 insertions(+), 28 deletions(-) diff --git a/sdks/python/src/agent_control/evaluation.py b/sdks/python/src/agent_control/evaluation.py index e63940c6..b9de8d1c 100644 --- a/sdks/python/src/agent_control/evaluation.py +++ b/sdks/python/src/agent_control/evaluation.py @@ -17,7 +17,7 @@ from ._state import state from .client import AgentControlClient -from .evaluation_events import build_control_execution_events, deliver_oss_events +from .evaluation_events import build_control_execution_events, enqueue_observability_events from .telemetry import emit_control_events, has_control_event_sink from .tracing import get_trace_and_span_ids from .validation import ensure_agent_name @@ -49,7 +49,20 @@ def _get_applicable_controls( def _build_server_control_lookup( server_control_payloads: list[dict[str, Any]], ) -> dict[int, ControlDefinition]: - """Return best-effort parsed server control definitions keyed by control ID.""" + """Build a best-effort lookup of server control definitions. + + The merged-event path reconstructs server-side events in the SDK after the + server returns a lightweight ``EvaluationResponse``. This helper parses the + cached server control payloads so the shared event builder can reconstruct + those events locally. + + Args: + server_control_payloads: Raw cached server control payloads. + + Returns: + A mapping of control ID to parsed ``ControlDefinition`` for every + payload that can be parsed locally. + """ control_lookup: dict[int, ControlDefinition] = {} for control in server_control_payloads: @@ -107,7 +120,20 @@ def _merge_results( local_result: EvaluationResponse, server_result: EvaluationResponse, ) -> EvaluationResult: - """Merge local and server evaluation results.""" + """Merge local and server evaluation results into one SDK-facing result. + + This helper merges only evaluation semantics. Event reconstruction happens + later so the response shape can stay lightweight regardless of which event + ingestion path is used. + + Args: + local_result: Evaluation response produced by SDK-local controls. + server_result: Evaluation response produced by server-side controls. + + Returns: + A merged ``EvaluationResult`` with combined matches, errors, + non-matches, and the strictest safety/confidence outcome. + """ is_safe = local_result.is_safe and server_result.is_safe confidence = min(local_result.confidence, server_result.confidence) @@ -173,7 +199,33 @@ async def check_evaluation_with_local( span_id: str | None = None, event_agent_name: str | None = None, ) -> EvaluationResult: - """Check if agent interaction is safe, running local controls first.""" + """Evaluate controls with local-first execution and configurable event flow. + + This is the main decision boundary between the two supported event + ingestion styles: + - default behavior: local events are reconstructed and queued immediately in + the SDK, while server-side events are still emitted by the server + - merged-event behavior: local and server events are reconstructed in the + SDK and emitted once through a registered sink + + In both cases, the evaluation result itself stays lightweight and event + reconstruction happens after evaluation completes. + + Args: + client: Configured AgentControl client. + agent_name: Agent name to evaluate against. + step: Step payload to evaluate. + stage: Evaluation stage, ``pre`` or ``post``. + controls: Cached control payloads used to split local vs server + execution. + trace_id: Optional explicit trace ID. + span_id: Optional explicit span ID. + event_agent_name: Optional override for the agent name stamped on + reconstructed events. + + Returns: + A merged evaluation result across local and server execution. + """ normalized_name = ensure_agent_name(agent_name) resolved_trace_id = trace_id resolved_span_id = span_id @@ -264,7 +316,9 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: if applicable_local_controls: engine = ControlEngine(applicable_local_controls, context="sdk") local_result = await engine.process(request) - local_control_lookup = {control.id: control.control for control in applicable_local_controls} + local_control_lookup = { + control.id: control.control for control in applicable_local_controls + } local_events = build_control_execution_events( local_result, request, @@ -275,7 +329,7 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: ) if not merged_emission_enabled: - deliver_oss_events(local_events) + enqueue_observability_events(local_events) if not local_result.is_safe: result = _with_parse_errors(EvaluationResult.model_validate(local_result.model_dump())) diff --git a/sdks/python/src/agent_control/evaluation_events.py b/sdks/python/src/agent_control/evaluation_events.py index f74e1d32..c21ac08a 100644 --- a/sdks/python/src/agent_control/evaluation_events.py +++ b/sdks/python/src/agent_control/evaluation_events.py @@ -125,7 +125,28 @@ def build_control_execution_events( span_id: str | None, agent_name: str | None, ) -> list[ControlExecutionEvent]: - """Construct final ControlExecutionEvents from an evaluation response.""" + """Reconstruct control execution events from an evaluation response. + + This is the shared reconstruction step used by both supported ingestion + styles: + - the default SDK observability path, where reconstructed local events are + queued into the existing SDK batcher + - the merged-event path, where local and server events are reconstructed in + the SDK and emitted together through a registered sink + + Args: + response: Evaluation response containing matches, errors, and + non-matches. + request: Original evaluation request used to derive stage and + ``applies_to``. + control_lookup: Parsed controls keyed by control ID. + trace_id: Optional trace ID for correlation. + span_id: Optional span ID for correlation. + agent_name: Optional override for the agent name stamped on events. + + Returns: + A list of reconstructed ``ControlExecutionEvent`` objects. + """ resolved_trace_id, resolved_span_id = _resolve_event_trace_context(trace_id, span_id) resolved_agent_name = agent_name or request.agent_name now = datetime.now(UTC) @@ -170,8 +191,18 @@ def build_control_execution_events( return events -def deliver_oss_events(events: list[ControlExecutionEvent]) -> None: - """Send reconstructed events through the existing OSS SDK observability path.""" +def enqueue_observability_events(events: list[ControlExecutionEvent]) -> None: + """Enqueue reconstructed events through the existing SDK observability path. + + This preserves the default SDK behavior of forwarding local events through + the existing observability batcher rather than a custom merged-event sink. + + Args: + events: Reconstructed control execution events to enqueue. + + Returns: + None. + """ if not is_observability_enabled(): return diff --git a/sdks/python/src/agent_control/telemetry/event_sink.py b/sdks/python/src/agent_control/telemetry/event_sink.py index 19062604..cb9910c3 100644 --- a/sdks/python/src/agent_control/telemetry/event_sink.py +++ b/sdks/python/src/agent_control/telemetry/event_sink.py @@ -10,13 +10,33 @@ def set_control_event_sink(sink: ControlEventSink | None) -> None: - """Register a sink for merged control execution events.""" + """Register a sink for merged control execution events. + + Registering a sink enables the optional merged-event path, where the SDK + reconstructs local and server events and emits them together after merging + results. + + Args: + sink: Sink callback to receive merged control execution events, or + ``None`` to clear the current sink. + + Returns: + None. + """ global _control_event_sink _control_event_sink = sink def emit_control_events(events: list[ControlExecutionEvent]) -> None: - """Emit merged control execution events to the registered sink.""" + """Emit merged control execution events to the registered sink. + + Args: + events: Merged control execution events to emit. + + Returns: + None. Sink failures are swallowed so evaluation behavior is not changed + by telemetry issues. + """ if not events or _control_event_sink is None: return @@ -28,7 +48,14 @@ def emit_control_events(events: list[ControlExecutionEvent]) -> None: def has_control_event_sink() -> bool: - """Return whether a merged control event sink is currently registered.""" + """Return whether the optional merged-event path is enabled. + + Args: + None. + + Returns: + ``True`` when a merged control event sink has been registered. + """ return _control_event_sink is not None diff --git a/sdks/python/tests/test_observability_updates.py b/sdks/python/tests/test_observability_updates.py index 3bb8b4cb..3d8c284c 100644 --- a/sdks/python/tests/test_observability_updates.py +++ b/sdks/python/tests/test_observability_updates.py @@ -7,7 +7,7 @@ from agent_control.evaluation import _ControlAdapter, _merge_results from agent_control.evaluation_events import ( build_control_execution_events, - deliver_oss_events, + enqueue_observability_events, map_applies_to, ) from agent_control.telemetry.trace_context import ( @@ -183,7 +183,7 @@ def test_composite_control_uses_representative_observability_identity(self): assert event.metadata["all_evaluators"] == ["regex"] assert event.metadata["all_selector_paths"] == ["input", "output"] - def test_deliver_oss_events_uses_existing_batcher(self): + def test_enqueue_observability_events_uses_existing_batcher(self): from agent_control_models import ControlExecutionEvent events = [ @@ -203,7 +203,7 @@ def test_deliver_oss_events_uses_existing_batcher(self): with patch("agent_control.evaluation_events.is_observability_enabled", return_value=True), \ patch("agent_control.evaluation_events.add_event") as mock_add: - deliver_oss_events(events) + enqueue_observability_events(events) mock_add.assert_called_once_with(events[0]) @@ -253,7 +253,7 @@ async def test_delivers_local_events_in_oss_mode(self): with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ - patch("agent_control.evaluation.deliver_oss_events") as mock_deliver: + patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation_with_local( client=client, agent_name="agent-000000000001", @@ -265,8 +265,8 @@ async def test_delivers_local_events_in_oss_mode(self): event_agent_name="test-agent", ) - mock_deliver.assert_called_once() - delivered_events = mock_deliver.call_args.args[0] + mock_enqueue.assert_called_once() + delivered_events = mock_enqueue.call_args.args[0] assert len(delivered_events) == 1 assert delivered_events[0].trace_id == "abc123" assert delivered_events[0].span_id == "def456" @@ -275,9 +275,20 @@ async def test_delivers_local_events_in_oss_mode(self): @pytest.mark.asyncio async def test_resolves_provider_trace_context_for_local_events(self): - from agent_control_models import EvaluationResponse, Step + from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult, Step - mock_response = EvaluationResponse(is_safe=True, confidence=1.0) + mock_response = EvaluationResponse( + is_safe=True, + confidence=1.0, + non_matches=[ + ControlMatch( + control_id=1, + control_name="test-ctrl", + action="allow", + result=EvaluatorResult(matched=False, confidence=0.1), + ) + ], + ) mock_engine = MagicMock() mock_engine.process = AsyncMock(return_value=mock_response) controls = [{ @@ -300,7 +311,7 @@ async def test_resolves_provider_trace_context_for_local_events(self): with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ - patch("agent_control.evaluation.deliver_oss_events") as mock_deliver: + patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: await evaluation.check_evaluation_with_local( client=client, agent_name="agent-000000000001", @@ -309,7 +320,7 @@ async def test_resolves_provider_trace_context_for_local_events(self): controls=controls, ) - delivered_events = mock_deliver.call_args.args[0] + delivered_events = mock_enqueue.call_args.args[0] assert delivered_events[0].trace_id == "a" * 32 assert delivered_events[0].span_id == "b" * 16 @@ -500,7 +511,7 @@ async def test_merged_event_sink_emits_reconstructed_local_and_server_events_onc patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ patch("agent_control.evaluation.has_control_event_sink", return_value=True), \ patch("agent_control.evaluation.emit_control_events") as mock_emit, \ - patch("agent_control.evaluation.deliver_oss_events") as mock_deliver: + patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation_with_local( client=client, agent_name="agent-000000000001", @@ -512,7 +523,7 @@ async def test_merged_event_sink_emits_reconstructed_local_and_server_events_onc event_agent_name="test-agent", ) - mock_deliver.assert_not_called() + mock_enqueue.assert_not_called() mock_emit.assert_called_once() merged_events = mock_emit.call_args.args[0] assert len(merged_events) == 2 diff --git a/server/src/agent_control_server/endpoints/evaluation.py b/server/src/agent_control_server/endpoints/evaluation.py index cd71ab07..68a315b4 100644 --- a/server/src/agent_control_server/endpoints/evaluation.py +++ b/server/src/agent_control_server/endpoints/evaluation.py @@ -274,10 +274,24 @@ def _build_observability_events( control_lookup: dict, total_duration_ms: float, ) -> list[ControlExecutionEvent]: - """Create observability events for all evaluated controls. - - Uses control_execution_id from the engine response to ensure correlation - between SDK logs and server observability events. + """Build observability events for all evaluated controls. + + This preserves the existing server-side event shape while allowing the + merged-event path to skip server-side ingestion and keep the response + lightweight. + + Args: + response: Raw evaluation response from the engine. + request: Original evaluation request. + trace_id: Trace ID to stamp on emitted events. + span_id: Span ID to stamp on emitted events. + agent_name: Agent name to stamp on emitted events. + applies_to: Observability applies_to value derived from the step type. + control_lookup: Controls keyed by control ID. + total_duration_ms: Total request execution duration in milliseconds. + + Returns: + A list of reconstructed server-side control execution events. """ events: list[ControlExecutionEvent] = [] now = datetime.now(UTC) @@ -398,3 +412,28 @@ async def _ingest_observability_events( f"Dropped {result.dropped} observability events, " f"processed {result.processed}" ) + + +async def _emit_observability_events( + response: EvaluationResponse, + request: EvaluationRequest, + trace_id: str, + span_id: str, + agent_name: str, + applies_to: Literal["llm_call", "tool_call"], + control_lookup: dict, + total_duration_ms: float, + ingestor: EventIngestor | None, +) -> None: + """Backward-compatible wrapper around build + ingest observability helpers.""" + events = _build_observability_events( + response=response, + request=request, + trace_id=trace_id, + span_id=span_id, + agent_name=agent_name, + applies_to=applies_to, + control_lookup=control_lookup, + total_duration_ms=total_duration_ms, + ) + await _ingest_observability_events(events, ingestor) From a8c40c5c372f4b020475328de330404294ee2248 Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Tue, 31 Mar 2026 12:34:57 -0700 Subject: [PATCH 07/18] TS sdk fix --- sdks/typescript/src/generated/funcs/evaluation-evaluate.ts | 5 +++++ .../models/operations/evaluate-api-v1-evaluation-post.ts | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/sdks/typescript/src/generated/funcs/evaluation-evaluate.ts b/sdks/typescript/src/generated/funcs/evaluation-evaluate.ts index 47ec39e4..81862c54 100644 --- a/sdks/typescript/src/generated/funcs/evaluation-evaluate.ts +++ b/sdks/typescript/src/generated/funcs/evaluation-evaluate.ts @@ -109,6 +109,11 @@ async function $do( const headers = new Headers(compactMap({ "Content-Type": "application/json", Accept: "application/json", + "X-Agent-Control-Merge-Events": encodeSimple( + "X-Agent-Control-Merge-Events", + payload["X-Agent-Control-Merge-Events"], + { explode: false, charEncoding: "none" }, + ), "X-Span-Id": encodeSimple("X-Span-Id", payload["X-Span-Id"], { explode: false, charEncoding: "none", diff --git a/sdks/typescript/src/generated/models/operations/evaluate-api-v1-evaluation-post.ts b/sdks/typescript/src/generated/models/operations/evaluate-api-v1-evaluation-post.ts index 026e4065..204841a6 100644 --- a/sdks/typescript/src/generated/models/operations/evaluate-api-v1-evaluation-post.ts +++ b/sdks/typescript/src/generated/models/operations/evaluate-api-v1-evaluation-post.ts @@ -9,6 +9,7 @@ import * as models from "../index.js"; export type EvaluateApiV1EvaluationPostRequest = { xTraceId?: string | null | undefined; xSpanId?: string | null | undefined; + xAgentControlMergeEvents?: string | null | undefined; body: models.EvaluationRequest; }; @@ -16,6 +17,7 @@ export type EvaluateApiV1EvaluationPostRequest = { export type EvaluateApiV1EvaluationPostRequest$Outbound = { "X-Trace-Id"?: string | null | undefined; "X-Span-Id"?: string | null | undefined; + "X-Agent-Control-Merge-Events"?: string | null | undefined; body: models.EvaluationRequest$Outbound; }; @@ -27,12 +29,14 @@ export const EvaluateApiV1EvaluationPostRequest$outboundSchema: z.ZodMiniType< z.object({ xTraceId: z.optional(z.nullable(z.string())), xSpanId: z.optional(z.nullable(z.string())), + xAgentControlMergeEvents: z.optional(z.nullable(z.string())), body: models.EvaluationRequest$outboundSchema, }), z.transform((v) => { return remap$(v, { xTraceId: "X-Trace-Id", xSpanId: "X-Span-Id", + xAgentControlMergeEvents: "X-Agent-Control-Merge-Events", }); }), ); From f58778e19a1ceb534b8f2fbd71ebbeba2e6e4cfd Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Tue, 31 Mar 2026 19:19:05 -0700 Subject: [PATCH 08/18] address comments --- sdks/python/src/agent_control/evaluation.py | 101 ++++++++--- sdks/python/tests/test_evaluation.py | 1 + .../tests/test_observability_updates.py | 165 ++++++++++++++---- 3 files changed, 208 insertions(+), 59 deletions(-) diff --git a/sdks/python/src/agent_control/evaluation.py b/sdks/python/src/agent_control/evaluation.py index b9de8d1c..bc26bf37 100644 --- a/sdks/python/src/agent_control/evaluation.py +++ b/sdks/python/src/agent_control/evaluation.py @@ -18,6 +18,7 @@ from ._state import state from .client import AgentControlClient from .evaluation_events import build_control_execution_events, enqueue_observability_events +from .observability import is_observability_enabled from .telemetry import emit_control_events, has_control_event_sink from .tracing import get_trace_and_span_ids from .validation import ensure_agent_name @@ -173,8 +174,36 @@ async def check_evaluation( step: Step, stage: Literal["pre", "post"], ) -> EvaluationResult: - """Check if agent interaction is safe.""" + """Check if agent interaction is safe through the public SDK helper. + + This helper preserves the default server-only evaluation path, but it also + participates in the optional merged-event flow when a control event sink is + registered. In that mode, the SDK asks the server to skip final event + ingestion, reconstructs server events from the lightweight response, and + emits them through the registered sink before returning the parsed result. + + Args: + client: Configured AgentControl client. + agent_name: Agent name to evaluate against. + step: Step payload to evaluate. + stage: Evaluation stage, ``pre`` or ``post``. + + Returns: + The parsed evaluation result returned by the server. + """ normalized_name = ensure_agent_name(agent_name) + merged_emission_enabled = has_control_event_sink() + trace_id = None + span_id = None + headers: dict[str, str] | None = None + + if merged_emission_enabled: + trace_id, span_id = get_trace_and_span_ids() + headers = { + "X-Trace-Id": trace_id, + "X-Span-Id": span_id, + "X-Agent-Control-Merge-Events": "true", + } request = EvaluationRequest( agent_name=normalized_name, @@ -183,10 +212,28 @@ async def check_evaluation( ) request_payload = request.model_dump(mode="json") - response = await client.http_client.post("/api/v1/evaluation", json=request_payload) + response = await client.http_client.post( + "/api/v1/evaluation", + json=request_payload, + headers=headers, + ) response.raise_for_status() - return cast(EvaluationResult, EvaluationResult.from_dict(response.json())) + evaluation_response = EvaluationResponse.model_validate(response.json()) + + if merged_emission_enabled: + server_control_lookup = _build_server_control_lookup(state.server_controls or []) + server_events = build_control_execution_events( + evaluation_response, + request, + server_control_lookup, + trace_id, + span_id, + normalized_name, + ) + emit_control_events(server_events) + + return cast(EvaluationResult, EvaluationResult.from_dict(evaluation_response.model_dump())) async def check_evaluation_with_local( @@ -305,6 +352,7 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: return result.model_copy(update={"errors": combined_errors}) merged_emission_enabled = has_control_event_sink() + should_reconstruct_local_events = merged_emission_enabled or is_observability_enabled() local_result: EvaluationResponse | None = None local_events = [] @@ -316,20 +364,21 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: if applicable_local_controls: engine = ControlEngine(applicable_local_controls, context="sdk") local_result = await engine.process(request) - local_control_lookup = { - control.id: control.control for control in applicable_local_controls - } - local_events = build_control_execution_events( - local_result, - request, - local_control_lookup, - resolved_trace_id, - resolved_span_id, - event_agent_name, - ) + if should_reconstruct_local_events: + local_control_lookup = { + control.id: control.control for control in applicable_local_controls + } + local_events = build_control_execution_events( + local_result, + request, + local_control_lookup, + resolved_trace_id, + resolved_span_id, + event_agent_name, + ) - if not merged_emission_enabled: - enqueue_observability_events(local_events) + if not merged_emission_enabled: + enqueue_observability_events(local_events) if not local_result.is_safe: result = _with_parse_errors(EvaluationResult.model_validate(local_result.model_dump())) @@ -354,15 +403,17 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: ) response.raise_for_status() server_result = EvaluationResponse.model_validate(response.json()) - server_control_lookup = _build_server_control_lookup(server_control_payloads) - server_events = build_control_execution_events( - server_result, - request, - server_control_lookup, - resolved_trace_id, - resolved_span_id, - event_agent_name, - ) + server_events = [] + if merged_emission_enabled: + server_control_lookup = _build_server_control_lookup(server_control_payloads) + server_events = build_control_execution_events( + server_result, + request, + server_control_lookup, + resolved_trace_id, + resolved_span_id, + event_agent_name, + ) if local_result is not None: result = _with_parse_errors(_merge_results(local_result, server_result)) diff --git a/sdks/python/tests/test_evaluation.py b/sdks/python/tests/test_evaluation.py index e9842313..4c7a647b 100644 --- a/sdks/python/tests/test_evaluation.py +++ b/sdks/python/tests/test_evaluation.py @@ -66,6 +66,7 @@ def json(self) -> dict[str, object]: }, "stage": "pre", }, + headers=None, ) diff --git a/sdks/python/tests/test_observability_updates.py b/sdks/python/tests/test_observability_updates.py index 3d8c284c..5b81be21 100644 --- a/sdks/python/tests/test_observability_updates.py +++ b/sdks/python/tests/test_observability_updates.py @@ -326,6 +326,7 @@ async def test_resolves_provider_trace_context_for_local_events(self): @pytest.mark.asyncio async def test_forwards_provider_trace_headers_to_server_when_ids_omitted(self): + """Server POST should resolve trace headers from the provider when omitted.""" from agent_control_models import Step controls = [{ @@ -355,7 +356,12 @@ async def test_forwards_provider_trace_headers_to_server_when_ids_omitted(self): client.http_client = AsyncMock() client.http_client.post = AsyncMock(return_value=mock_http_response) step = Step(type="llm", name="test-step", input="hello") - set_trace_context_provider(lambda: {"trace_id": "c" * 32, "span_id": "d" * 16}) + set_trace_context_provider( + lambda: { + "trace_id": "c" * 32, + "span_id": "d" * 16, + } + ) with patch("agent_control.evaluation.list_evaluators", return_value=["regex"]): await evaluation.check_evaluation_with_local( @@ -366,56 +372,147 @@ async def test_forwards_provider_trace_headers_to_server_when_ids_omitted(self): controls=controls, ) - headers = client.http_client.post.call_args.kwargs["headers"] - assert headers["X-Trace-Id"] == "c" * 32 - assert headers["X-Span-Id"] == "d" * 16 - # Verify POST was called with headers call_kwargs = client.http_client.post.call_args headers = call_kwargs.kwargs.get("headers", {}) - assert headers["X-Trace-Id"] == "aaaa1111bbbb2222cccc3333dddd4444" - assert headers["X-Span-Id"] == "eeee5555ffff6666" + assert headers["X-Trace-Id"] == "c" * 32 + assert headers["X-Span-Id"] == "d" * 16 + +class TestCheckEvaluation: @pytest.mark.asyncio - async def test_forwards_provider_trace_headers_to_server_when_ids_omitted(self): - """Server POST should resolve trace headers from the provider when omitted.""" + async def test_default_path_keeps_server_only_behavior(self): from agent_control_models import Step + mock_http_response = MagicMock() + mock_http_response.raise_for_status = MagicMock() + mock_http_response.json.return_value = { + "is_safe": True, + "confidence": 0.9, + "matches": None, + "errors": None, + "non_matches": None, + } + + client = MagicMock() + client.http_client = AsyncMock() + client.http_client.post = AsyncMock(return_value=mock_http_response) + step = Step(type="llm", name="test-step", input="hello") + + with patch("agent_control.evaluation.has_control_event_sink", return_value=False), \ + patch("agent_control.evaluation.emit_control_events") as mock_emit: + result = await evaluation.check_evaluation( + client=client, + agent_name="agent-000000000001", + step=step, + stage="pre", + ) + + call_kwargs = client.http_client.post.call_args.kwargs + assert call_kwargs["headers"] is None + mock_emit.assert_not_called() + assert result.is_safe is True + assert result.confidence == 0.9 + + @pytest.mark.asyncio + async def test_merged_event_sink_emits_reconstructed_server_events(self): + from agent_control_models import Step + + controls = [ + { + "id": 2, + "name": "server-ctrl", + "control": { + "condition": { + "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "selector": {"path": "input"}, + }, + "action": {"decision": "allow"}, + "execution": "server", + }, + } + ] + + mock_http_response = MagicMock() + mock_http_response.raise_for_status = MagicMock() + mock_http_response.json.return_value = { + "is_safe": True, + "confidence": 0.9, + "matches": [ + { + "control_id": 2, + "control_name": "server-ctrl", + "action": "allow", + "control_execution_id": "ce-server", + "result": {"matched": True, "confidence": 0.4}, + } + ], + "errors": None, + "non_matches": None, + } + + client = MagicMock() + client.http_client = AsyncMock() + client.http_client.post = AsyncMock(return_value=mock_http_response) + step = Step(type="llm", name="test-step", input="hello") + + with patch("agent_control.evaluation.has_control_event_sink", return_value=True), \ + patch("agent_control.evaluation.get_trace_and_span_ids", return_value=("a" * 32, "b" * 16)), \ + patch("agent_control.evaluation.emit_control_events") as mock_emit, \ + patch.object(evaluation.state, "server_controls", controls): + result = await evaluation.check_evaluation( + client=client, + agent_name="agent-000000000001", + step=step, + stage="pre", + ) + + headers = client.http_client.post.call_args.kwargs["headers"] + assert headers["X-Trace-Id"] == "a" * 32 + assert headers["X-Span-Id"] == "b" * 16 + assert headers["X-Agent-Control-Merge-Events"] == "true" + + mock_emit.assert_called_once() + emitted_events = mock_emit.call_args.args[0] + assert len(emitted_events) == 1 + assert emitted_events[0].control_id == 2 + assert emitted_events[0].trace_id == "a" * 32 + assert emitted_events[0].span_id == "b" * 16 + assert emitted_events[0].metadata["primary_evaluator"] == "regex" + assert result.matches is not None + assert len(result.matches) == 1 + + @pytest.mark.asyncio + async def test_skips_local_event_reconstruction_when_nothing_consumes_events(self): + from agent_control_models import EvaluationResponse, Step + controls = [{ "id": 1, - "name": "server-ctrl", + "name": "local-ctrl", "control": { "condition": { "evaluator": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, - "action": {"decision": "deny"}, - "execution": "server", + "action": {"decision": "allow"}, + "execution": "sdk", }, }] - mock_http_response = MagicMock() - mock_http_response.json.return_value = { - "is_safe": True, - "confidence": 1.0, - "matches": None, - "errors": None, - "non_matches": None, - } - mock_http_response.raise_for_status = MagicMock() + mock_response = EvaluationResponse(is_safe=True, confidence=1.0) + mock_engine = MagicMock() + mock_engine.process = AsyncMock(return_value=mock_response) client = MagicMock() client.http_client = AsyncMock() - client.http_client.post = AsyncMock(return_value=mock_http_response) step = Step(type="llm", name="test-step", input="hello") - set_trace_context_provider( - lambda: { - "trace_id": "c" * 32, - "span_id": "d" * 16, - } - ) - with patch("agent_control.evaluation.list_evaluators", return_value=["regex"]): - await evaluation.check_evaluation_with_local( + with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ + patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ + patch("agent_control.evaluation.has_control_event_sink", return_value=False), \ + patch("agent_control.evaluation.is_observability_enabled", return_value=False), \ + patch("agent_control.evaluation.build_control_execution_events") as mock_build, \ + patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: + result = await evaluation.check_evaluation_with_local( client=client, agent_name="agent-000000000001", step=step, @@ -423,10 +520,10 @@ async def test_forwards_provider_trace_headers_to_server_when_ids_omitted(self): controls=controls, ) - call_kwargs = client.http_client.post.call_args - headers = call_kwargs.kwargs.get("headers", {}) - assert headers["X-Trace-Id"] == "c" * 32 - assert headers["X-Span-Id"] == "d" * 16 + mock_build.assert_not_called() + mock_enqueue.assert_not_called() + assert result.is_safe is True + assert result.confidence == 1.0 # ============================================================================= From c0d9a576692f0e8f31f1afcae105d6f56d761207 Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Tue, 31 Mar 2026 20:12:00 -0700 Subject: [PATCH 09/18] ensure control sink exists for merged mode --- sdks/python/src/agent_control/__init__.py | 2 + sdks/python/src/agent_control/evaluation.py | 30 ++++++++++++- sdks/python/tests/test_init_step_merge.py | 2 + .../tests/test_observability_updates.py | 44 +++++++++++++++++-- sdks/python/tests/test_shutdown.py | 3 ++ 5 files changed, 75 insertions(+), 6 deletions(-) diff --git a/sdks/python/src/agent_control/__init__.py b/sdks/python/src/agent_control/__init__.py index d57ca0d9..dffdde05 100644 --- a/sdks/python/src/agent_control/__init__.py +++ b/sdks/python/src/agent_control/__init__.py @@ -477,6 +477,7 @@ async def handle(message: str): # Re-init behavior: always stop existing loop before mutating shared agent/session globals. _stop_policy_refresh_loop() + clear_control_event_sink() # Configure logging if provided (do this early before any logging happens) if log_config: @@ -647,6 +648,7 @@ def _reset_state() -> None: state.server_controls = None state.server_url = None state.api_key = None + clear_control_event_sink() async def ashutdown() -> None: diff --git a/sdks/python/src/agent_control/evaluation.py b/sdks/python/src/agent_control/evaluation.py index bc26bf37..170a46ef 100644 --- a/sdks/python/src/agent_control/evaluation.py +++ b/sdks/python/src/agent_control/evaluation.py @@ -117,6 +117,32 @@ def _has_applicable_prefiltered_server_controls( ) +def _is_merged_event_mode_enabled(agent_name: str) -> bool: + """Return whether SDK-side merged event emission is safe for this request. + + Merged reconstruction depends on initialized SDK session state: + a registered sink, an initialized agent, and a cached server-control + snapshot for the same agent. If any of those prerequisites are missing, + evaluation falls back to the default behavior where the server remains the + final emitter for server-side events. + + Args: + agent_name: Normalized agent name for the current request. + + Returns: + ``True`` when the current SDK session has enough state to reconstruct + and emit merged events safely. + """ + if not has_control_event_sink(): + return False + + current_agent = state.current_agent + if current_agent is None or current_agent.agent_name != agent_name: + return False + + return state.server_controls is not None + + def _merge_results( local_result: EvaluationResponse, server_result: EvaluationResponse, @@ -192,7 +218,7 @@ async def check_evaluation( The parsed evaluation result returned by the server. """ normalized_name = ensure_agent_name(agent_name) - merged_emission_enabled = has_control_event_sink() + merged_emission_enabled = _is_merged_event_mode_enabled(normalized_name) trace_id = None span_id = None headers: dict[str, str] | None = None @@ -351,7 +377,7 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: combined_errors = (result.errors or []) + parse_errors return result.model_copy(update={"errors": combined_errors}) - merged_emission_enabled = has_control_event_sink() + merged_emission_enabled = _is_merged_event_mode_enabled(normalized_name) should_reconstruct_local_events = merged_emission_enabled or is_observability_enabled() local_result: EvaluationResponse | None = None diff --git a/sdks/python/tests/test_init_step_merge.py b/sdks/python/tests/test_init_step_merge.py index 669e1236..cbe66fd9 100644 --- a/sdks/python/tests/test_init_step_merge.py +++ b/sdks/python/tests/test_init_step_merge.py @@ -20,9 +20,11 @@ class DoesNotExist: ... @pytest.fixture(autouse=True) def _clean_registry() -> Generator[None, None, None]: """Ensure each test starts with an empty step registry.""" + agent_control._reset_state() clear() yield clear() + agent_control._reset_state() def test_init_passes_merged_steps_to_register_agent( diff --git a/sdks/python/tests/test_observability_updates.py b/sdks/python/tests/test_observability_updates.py index 5b81be21..8dfe53b8 100644 --- a/sdks/python/tests/test_observability_updates.py +++ b/sdks/python/tests/test_observability_updates.py @@ -398,7 +398,7 @@ async def test_default_path_keeps_server_only_behavior(self): client.http_client.post = AsyncMock(return_value=mock_http_response) step = Step(type="llm", name="test-step", input="hello") - with patch("agent_control.evaluation.has_control_event_sink", return_value=False), \ + with patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=False), \ patch("agent_control.evaluation.emit_control_events") as mock_emit: result = await evaluation.check_evaluation( client=client, @@ -455,7 +455,7 @@ async def test_merged_event_sink_emits_reconstructed_server_events(self): client.http_client.post = AsyncMock(return_value=mock_http_response) step = Step(type="llm", name="test-step", input="hello") - with patch("agent_control.evaluation.has_control_event_sink", return_value=True), \ + with patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=True), \ patch("agent_control.evaluation.get_trace_and_span_ids", return_value=("a" * 32, "b" * 16)), \ patch("agent_control.evaluation.emit_control_events") as mock_emit, \ patch.object(evaluation.state, "server_controls", controls): @@ -508,7 +508,7 @@ async def test_skips_local_event_reconstruction_when_nothing_consumes_events(sel with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ - patch("agent_control.evaluation.has_control_event_sink", return_value=False), \ + patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=False), \ patch("agent_control.evaluation.is_observability_enabled", return_value=False), \ patch("agent_control.evaluation.build_control_execution_events") as mock_build, \ patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: @@ -525,6 +525,42 @@ async def test_skips_local_event_reconstruction_when_nothing_consumes_events(sel assert result.is_safe is True assert result.confidence == 1.0 + @pytest.mark.asyncio + async def test_check_evaluation_falls_back_when_initialized_agent_does_not_match(self): + from agent_control_models import Step + + mock_http_response = MagicMock() + mock_http_response.raise_for_status = MagicMock() + mock_http_response.json.return_value = { + "is_safe": True, + "confidence": 0.9, + "matches": None, + "errors": None, + "non_matches": None, + } + + client = MagicMock() + client.http_client = AsyncMock() + client.http_client.post = AsyncMock(return_value=mock_http_response) + step = Step(type="llm", name="test-step", input="hello") + + with patch("agent_control.evaluation.has_control_event_sink", return_value=True), \ + patch("agent_control.evaluation.emit_control_events") as mock_emit, \ + patch.object(evaluation.state, "current_agent", MagicMock(agent_name="agent-000000000002")), \ + patch.object(evaluation.state, "server_controls", []): + result = await evaluation.check_evaluation( + client=client, + agent_name="agent-000000000001", + step=step, + stage="pre", + ) + + call_kwargs = client.http_client.post.call_args.kwargs + assert call_kwargs["headers"] is None + mock_emit.assert_not_called() + assert result.is_safe is True + assert result.confidence == 0.9 + # ============================================================================= # control_decorators non_matches dict conversion @@ -606,7 +642,7 @@ async def test_merged_event_sink_emits_reconstructed_local_and_server_events_onc with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ - patch("agent_control.evaluation.has_control_event_sink", return_value=True), \ + patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=True), \ patch("agent_control.evaluation.emit_control_events") as mock_emit, \ patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation_with_local( diff --git a/sdks/python/tests/test_shutdown.py b/sdks/python/tests/test_shutdown.py index 073745ed..1ee128d9 100644 --- a/sdks/python/tests/test_shutdown.py +++ b/sdks/python/tests/test_shutdown.py @@ -15,6 +15,7 @@ import agent_control.observability as obs_mod from agent_control._state import state from agent_control.observability import EventBatcher +from agent_control.telemetry.event_sink import has_control_event_sink, set_control_event_sink def _make_started_batcher() -> EventBatcher: @@ -64,6 +65,7 @@ def test_shutdown_resets_state(self): state.server_controls = [{"name": "test"}] state.server_url = "http://localhost:8000" state.api_key = "key" + set_control_event_sink(lambda events: None) agent_control.shutdown() @@ -72,6 +74,7 @@ def test_shutdown_resets_state(self): assert state.server_controls is None assert state.server_url is None assert state.api_key is None + assert has_control_event_sink() is False def test_shutdown_idempotent(self): agent_control.shutdown() From 33fcc181d2e10299d6593723f48b95fb1add860f Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Wed, 1 Apr 2026 11:57:44 -0700 Subject: [PATCH 10/18] refactor this PR to only have merge mode --- sdks/python/src/agent_control/__init__.py | 16 ++-- sdks/python/src/agent_control/_state.py | 1 + sdks/python/src/agent_control/evaluation.py | 61 +++++++------ .../src/agent_control/evaluation_events.py | 10 +-- .../src/agent_control/telemetry/__init__.py | 15 +--- .../src/agent_control/telemetry/event_sink.py | 65 -------------- sdks/python/tests/test_event_sink.py | 59 ------------- sdks/python/tests/test_init_step_merge.py | 23 +++++ .../tests/test_observability_updates.py | 85 +++++++++++++------ sdks/python/tests/test_shutdown.py | 5 +- .../endpoints/evaluation.py | 27 +++--- .../tests/test_evaluation_error_handling.py | 23 +++++ 12 files changed, 171 insertions(+), 219 deletions(-) delete mode 100644 sdks/python/src/agent_control/telemetry/event_sink.py delete mode 100644 sdks/python/tests/test_event_sink.py diff --git a/sdks/python/src/agent_control/__init__.py b/sdks/python/src/agent_control/__init__.py index dffdde05..b0a009dc 100644 --- a/sdks/python/src/agent_control/__init__.py +++ b/sdks/python/src/agent_control/__init__.py @@ -95,12 +95,8 @@ async def handle_input(user_message: str) -> str: sync_shutdown_observability, ) from .telemetry import ( - clear_control_event_sink, clear_trace_context_provider, - emit_control_events, get_trace_context_from_provider, - has_control_event_sink, - set_control_event_sink, set_trace_context_provider, ) from .tracing import ( @@ -400,6 +396,7 @@ def init( observability_enabled: bool | None = None, log_config: dict[str, Any] | None = None, policy_refresh_interval_seconds: int = 60, + merge_events: bool = False, **kwargs: object ) -> Agent: """ @@ -430,6 +427,9 @@ def init( {"enabled": True, "span_start": True, "span_end": True, "control_eval": True} policy_refresh_interval_seconds: Interval for background policy refresh loop. Defaults to 60 seconds. Set to 0 to disable background refresh. + merge_events: Whether to merge local and server event creation in the + SDK before enqueueing through the built-in observability path. + Defaults to False. **kwargs: Additional metadata to store with the agent Returns: @@ -477,7 +477,6 @@ async def handle(message: str): # Re-init behavior: always stop existing loop before mutating shared agent/session globals. _stop_policy_refresh_loop() - clear_control_event_sink() # Configure logging if provided (do this early before any logging happens) if log_config: @@ -501,6 +500,7 @@ async def handle(message: str): state.current_agent = next_agent state.server_url = server_url or os.getenv('AGENT_CONTROL_URL') or 'http://localhost:8000' state.api_key = api_key + state.merge_events = merge_events # Merge auto-discovered steps from @control() decorators with explicit steps. # Explicit steps take precedence when (type, name) collides. @@ -648,7 +648,7 @@ def _reset_state() -> None: state.server_controls = None state.server_url = None state.api_key = None - clear_control_event_sink() + state.merge_events = False async def ashutdown() -> None: @@ -1313,10 +1313,6 @@ async def main(): "set_trace_context_provider", "get_trace_context_from_provider", "clear_trace_context_provider", - "set_control_event_sink", - "has_control_event_sink", - "emit_control_events", - "clear_control_event_sink", # Observability "init_observability", "add_event", diff --git a/sdks/python/src/agent_control/_state.py b/sdks/python/src/agent_control/_state.py index 143bb2d3..a0f9eb7b 100644 --- a/sdks/python/src/agent_control/_state.py +++ b/sdks/python/src/agent_control/_state.py @@ -24,6 +24,7 @@ def __init__(self) -> None: self.server_controls: list[dict[str, Any]] | None = None self.server_url: str | None = None self.api_key: str | None = None + self.merge_events: bool = False # Singleton state instance diff --git a/sdks/python/src/agent_control/evaluation.py b/sdks/python/src/agent_control/evaluation.py index 170a46ef..6dae427e 100644 --- a/sdks/python/src/agent_control/evaluation.py +++ b/sdks/python/src/agent_control/evaluation.py @@ -19,7 +19,6 @@ from .client import AgentControlClient from .evaluation_events import build_control_execution_events, enqueue_observability_events from .observability import is_observability_enabled -from .telemetry import emit_control_events, has_control_event_sink from .tracing import get_trace_and_span_ids from .validation import ensure_agent_name @@ -117,29 +116,40 @@ def _has_applicable_prefiltered_server_controls( ) -def _is_merged_event_mode_enabled(agent_name: str) -> bool: - """Return whether SDK-side merged event emission is safe for this request. +def _is_merged_event_mode_enabled( + agent_name: str, + client: AgentControlClient | None = None, +) -> bool: + """Return whether SDK-side merged event creation is safe for this request. - Merged reconstruction depends on initialized SDK session state: - a registered sink, an initialized agent, and a cached server-control - snapshot for the same agent. If any of those prerequisites are missing, - evaluation falls back to the default behavior where the server remains the - final emitter for server-side events. + Merged event creation is a session-scoped option that depends on + initialized SDK state: an initialized agent, cached server controls for the + same agent, and observability enabled so the merged batch will actually be + consumed by the built-in queue/Postgres path. Args: agent_name: Normalized agent name for the current request. + client: Optional client used for the current request. When provided, + merged mode is allowed only if it targets the active initialized + session server. Returns: ``True`` when the current SDK session has enough state to reconstruct - and emit merged events safely. + and enqueue merged events safely. """ - if not has_control_event_sink(): + if not state.merge_events or not is_observability_enabled(): return False current_agent = state.current_agent if current_agent is None or current_agent.agent_name != agent_name: return False + if client is not None and state.server_url is not None: + normalized_state_server_url = state.server_url.rstrip("/") + normalized_client_server_url = client.base_url.rstrip("/") + if normalized_client_server_url != normalized_state_server_url: + return False + return state.server_controls is not None @@ -202,11 +212,12 @@ async def check_evaluation( ) -> EvaluationResult: """Check if agent interaction is safe through the public SDK helper. - This helper preserves the default server-only evaluation path, but it also - participates in the optional merged-event flow when a control event sink is - registered. In that mode, the SDK asks the server to skip final event - ingestion, reconstructs server events from the lightweight response, and - emits them through the registered sink before returning the parsed result. + This helper preserves the default server-only evaluation path, but it can + also participate in merged event creation when the initialized SDK session + has ``merge_events`` enabled. In that mode, the SDK asks the server to skip + final event ingestion, reconstructs server events from the lightweight + response, and enqueues them through the existing observability pipeline + before returning the parsed result. Args: client: Configured AgentControl client. @@ -218,7 +229,7 @@ async def check_evaluation( The parsed evaluation result returned by the server. """ normalized_name = ensure_agent_name(agent_name) - merged_emission_enabled = _is_merged_event_mode_enabled(normalized_name) + merged_emission_enabled = _is_merged_event_mode_enabled(normalized_name, client) trace_id = None span_id = None headers: dict[str, str] | None = None @@ -257,7 +268,7 @@ async def check_evaluation( span_id, normalized_name, ) - emit_control_events(server_events) + enqueue_observability_events(server_events) return cast(EvaluationResult, EvaluationResult.from_dict(evaluation_response.model_dump())) @@ -275,11 +286,11 @@ async def check_evaluation_with_local( """Evaluate controls with local-first execution and configurable event flow. This is the main decision boundary between the two supported event - ingestion styles: + creation styles: - default behavior: local events are reconstructed and queued immediately in the SDK, while server-side events are still emitted by the server - merged-event behavior: local and server events are reconstructed in the - SDK and emitted once through a registered sink + SDK and enqueued once through the built-in observability path In both cases, the evaluation result itself stays lightweight and event reconstruction happens after evaluation completes. @@ -377,8 +388,8 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: combined_errors = (result.errors or []) + parse_errors return result.model_copy(update={"errors": combined_errors}) - merged_emission_enabled = _is_merged_event_mode_enabled(normalized_name) - should_reconstruct_local_events = merged_emission_enabled or is_observability_enabled() + merged_emission_enabled = _is_merged_event_mode_enabled(normalized_name, client) + should_reconstruct_local_events = is_observability_enabled() local_result: EvaluationResponse | None = None local_events = [] @@ -409,7 +420,7 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: if not local_result.is_safe: result = _with_parse_errors(EvaluationResult.model_validate(local_result.model_dump())) if merged_emission_enabled: - emit_control_events(local_events) + enqueue_observability_events(local_events) return result if _has_applicable_prefiltered_server_controls(server_control_payloads, request): @@ -444,18 +455,18 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: if local_result is not None: result = _with_parse_errors(_merge_results(local_result, server_result)) if merged_emission_enabled: - emit_control_events(local_events + server_events) + enqueue_observability_events(local_events + server_events) return result result = _with_parse_errors(EvaluationResult.model_validate(server_result.model_dump())) if merged_emission_enabled: - emit_control_events(server_events) + enqueue_observability_events(server_events) return result if local_result is not None: result = _with_parse_errors(EvaluationResult.model_validate(local_result.model_dump())) if merged_emission_enabled: - emit_control_events(local_events) + enqueue_observability_events(local_events) return result return _with_parse_errors(EvaluationResult(is_safe=True, confidence=1.0)) diff --git a/sdks/python/src/agent_control/evaluation_events.py b/sdks/python/src/agent_control/evaluation_events.py index c21ac08a..0f25d442 100644 --- a/sdks/python/src/agent_control/evaluation_events.py +++ b/sdks/python/src/agent_control/evaluation_events.py @@ -127,12 +127,12 @@ def build_control_execution_events( ) -> list[ControlExecutionEvent]: """Reconstruct control execution events from an evaluation response. - This is the shared reconstruction step used by both supported ingestion - styles: + This is the shared reconstruction step used by both supported event + creation styles: - the default SDK observability path, where reconstructed local events are queued into the existing SDK batcher - the merged-event path, where local and server events are reconstructed in - the SDK and emitted together through a registered sink + the SDK and queued together through the existing SDK batcher Args: response: Evaluation response containing matches, errors, and @@ -194,8 +194,8 @@ def build_control_execution_events( def enqueue_observability_events(events: list[ControlExecutionEvent]) -> None: """Enqueue reconstructed events through the existing SDK observability path. - This preserves the default SDK behavior of forwarding local events through - the existing observability batcher rather than a custom merged-event sink. + This preserves the built-in SDK behavior of forwarding events through the + existing observability batcher. Args: events: Reconstructed control execution events to enqueue. diff --git a/sdks/python/src/agent_control/telemetry/__init__.py b/sdks/python/src/agent_control/telemetry/__init__.py index 6e40b8a2..c488d4a2 100644 --- a/sdks/python/src/agent_control/telemetry/__init__.py +++ b/sdks/python/src/agent_control/telemetry/__init__.py @@ -1,12 +1,4 @@ -"""Telemetry interfaces for provider-agnostic tracing and event emission.""" - -from .event_sink import ( - ControlEventSink, - clear_control_event_sink, - emit_control_events, - has_control_event_sink, - set_control_event_sink, -) +"""Telemetry interfaces for provider-agnostic tracing.""" from .trace_context import ( TraceContext, TraceContextProvider, @@ -16,14 +8,9 @@ ) __all__ = [ - "ControlEventSink", "TraceContext", "TraceContextProvider", - "clear_control_event_sink", "clear_trace_context_provider", - "emit_control_events", "get_trace_context_from_provider", - "has_control_event_sink", - "set_control_event_sink", "set_trace_context_provider", ] diff --git a/sdks/python/src/agent_control/telemetry/event_sink.py b/sdks/python/src/agent_control/telemetry/event_sink.py deleted file mode 100644 index cb9910c3..00000000 --- a/sdks/python/src/agent_control/telemetry/event_sink.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Provider-agnostic sink for merged control execution events.""" - -from collections.abc import Callable - -from agent_control_models import ControlExecutionEvent - -ControlEventSink = Callable[[list[ControlExecutionEvent]], None] - -_control_event_sink: ControlEventSink | None = None - - -def set_control_event_sink(sink: ControlEventSink | None) -> None: - """Register a sink for merged control execution events. - - Registering a sink enables the optional merged-event path, where the SDK - reconstructs local and server events and emits them together after merging - results. - - Args: - sink: Sink callback to receive merged control execution events, or - ``None`` to clear the current sink. - - Returns: - None. - """ - global _control_event_sink - _control_event_sink = sink - - -def emit_control_events(events: list[ControlExecutionEvent]) -> None: - """Emit merged control execution events to the registered sink. - - Args: - events: Merged control execution events to emit. - - Returns: - None. Sink failures are swallowed so evaluation behavior is not changed - by telemetry issues. - """ - if not events or _control_event_sink is None: - return - - try: - _control_event_sink(events) - except Exception: - # Sink failures should not break control evaluation. - pass - - -def has_control_event_sink() -> bool: - """Return whether the optional merged-event path is enabled. - - Args: - None. - - Returns: - ``True`` when a merged control event sink has been registered. - """ - return _control_event_sink is not None - - -def clear_control_event_sink() -> None: - """Clear the registered control event sink.""" - global _control_event_sink - _control_event_sink = None diff --git a/sdks/python/tests/test_event_sink.py b/sdks/python/tests/test_event_sink.py deleted file mode 100644 index 8013f4d6..00000000 --- a/sdks/python/tests/test_event_sink.py +++ /dev/null @@ -1,59 +0,0 @@ -"""Tests for the telemetry merged control event sink interface.""" - -from datetime import UTC, datetime - -from agent_control.telemetry.event_sink import ( - clear_control_event_sink, - emit_control_events, - set_control_event_sink, -) -from agent_control_models import ControlExecutionEvent - - -def _event() -> ControlExecutionEvent: - return ControlExecutionEvent( - control_execution_id="ce-1", - trace_id="a" * 32, - span_id="b" * 16, - agent_name="test-agent", - control_id=1, - control_name="pii_check", - check_stage="pre", - applies_to="llm_call", - action="allow", - matched=False, - confidence=0.95, - timestamp=datetime.now(UTC), - metadata={}, - ) - - -def teardown_function() -> None: - clear_control_event_sink() - - -def test_emit_control_events_calls_registered_sink() -> None: - seen: list[list[ControlExecutionEvent]] = [] - - def _sink(events: list[ControlExecutionEvent]) -> None: - seen.append(events) - - event = _event() - set_control_event_sink(_sink) - - emit_control_events([event]) - - assert seen == [[event]] - - -def test_emit_control_events_noops_without_sink() -> None: - emit_control_events([_event()]) - - -def test_emit_control_events_swallows_sink_failures() -> None: - def _sink(_events: list[ControlExecutionEvent]) -> None: - raise RuntimeError("boom") - - set_control_event_sink(_sink) - - emit_control_events([_event()]) diff --git a/sdks/python/tests/test_init_step_merge.py b/sdks/python/tests/test_init_step_merge.py index cbe66fd9..4b6b0f5d 100644 --- a/sdks/python/tests/test_init_step_merge.py +++ b/sdks/python/tests/test_init_step_merge.py @@ -191,6 +191,29 @@ def test_init_logs_agent_updated_when_registration_already_exists( assert agent_name in caplog.text +def test_init_sets_merge_events_session_flag() -> None: + # Given: a normal init path with registration mocked out. + register_agent_mock = AsyncMock(return_value={"created": True, "controls": []}) + health_check_mock = AsyncMock(return_value={"status": "healthy"}) + + with patch( + "agent_control.__init__.AgentControlClient.health_check", + new=health_check_mock, + ), patch( + "agent_control.__init__.agents.register_agent", + new=register_agent_mock, + ): + # When: init() enables merged event creation for the session. + agent_control.init( + agent_name=f"agent-{uuid4().hex[:12]}", + policy_refresh_interval_seconds=0, + merge_events=True, + ) + + # Then: the session state remembers that merged event creation is enabled. + assert agent_control.state.merge_events is True + + @pytest.mark.asyncio async def test_refresh_controls_calls_agent_controls_endpoint() -> None: # Given: an initialized SDK agent session with network-facing calls mocked. diff --git a/sdks/python/tests/test_observability_updates.py b/sdks/python/tests/test_observability_updates.py index 8dfe53b8..6e92f9c9 100644 --- a/sdks/python/tests/test_observability_updates.py +++ b/sdks/python/tests/test_observability_updates.py @@ -14,10 +14,6 @@ clear_trace_context_provider, set_trace_context_provider, ) -from agent_control.telemetry.trace_context import ( - clear_trace_context_provider, - set_trace_context_provider, -) from agent_control_models import ControlDefinition @@ -212,9 +208,6 @@ class TestCheckEvaluationWithLocal: def teardown_method(self) -> None: clear_trace_context_provider() - def teardown_method(self) -> None: - clear_trace_context_provider() - @pytest.mark.asyncio async def test_delivers_local_events_in_oss_mode(self): from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult, Step @@ -253,6 +246,7 @@ async def test_delivers_local_events_in_oss_mode(self): with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ + patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation_with_local( client=client, @@ -311,6 +305,7 @@ async def test_resolves_provider_trace_context_for_local_events(self): with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ + patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: await evaluation.check_evaluation_with_local( client=client, @@ -399,7 +394,7 @@ async def test_default_path_keeps_server_only_behavior(self): step = Step(type="llm", name="test-step", input="hello") with patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=False), \ - patch("agent_control.evaluation.emit_control_events") as mock_emit: + patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation( client=client, agent_name="agent-000000000001", @@ -409,12 +404,12 @@ async def test_default_path_keeps_server_only_behavior(self): call_kwargs = client.http_client.post.call_args.kwargs assert call_kwargs["headers"] is None - mock_emit.assert_not_called() + mock_enqueue.assert_not_called() assert result.is_safe is True assert result.confidence == 0.9 @pytest.mark.asyncio - async def test_merged_event_sink_emits_reconstructed_server_events(self): + async def test_merged_event_mode_enqueues_reconstructed_server_events(self): from agent_control_models import Step controls = [ @@ -457,7 +452,7 @@ async def test_merged_event_sink_emits_reconstructed_server_events(self): with patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=True), \ patch("agent_control.evaluation.get_trace_and_span_ids", return_value=("a" * 32, "b" * 16)), \ - patch("agent_control.evaluation.emit_control_events") as mock_emit, \ + patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue, \ patch.object(evaluation.state, "server_controls", controls): result = await evaluation.check_evaluation( client=client, @@ -471,8 +466,8 @@ async def test_merged_event_sink_emits_reconstructed_server_events(self): assert headers["X-Span-Id"] == "b" * 16 assert headers["X-Agent-Control-Merge-Events"] == "true" - mock_emit.assert_called_once() - emitted_events = mock_emit.call_args.args[0] + mock_enqueue.assert_called_once() + emitted_events = mock_enqueue.call_args.args[0] assert len(emitted_events) == 1 assert emitted_events[0].control_id == 2 assert emitted_events[0].trace_id == "a" * 32 @@ -544,8 +539,8 @@ async def test_check_evaluation_falls_back_when_initialized_agent_does_not_match client.http_client.post = AsyncMock(return_value=mock_http_response) step = Step(type="llm", name="test-step", input="hello") - with patch("agent_control.evaluation.has_control_event_sink", return_value=True), \ - patch("agent_control.evaluation.emit_control_events") as mock_emit, \ + with patch.object(evaluation.state, "merge_events", True), \ + patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue, \ patch.object(evaluation.state, "current_agent", MagicMock(agent_name="agent-000000000002")), \ patch.object(evaluation.state, "server_controls", []): result = await evaluation.check_evaluation( @@ -557,21 +552,63 @@ async def test_check_evaluation_falls_back_when_initialized_agent_does_not_match call_kwargs = client.http_client.post.call_args.kwargs assert call_kwargs["headers"] is None - mock_emit.assert_not_called() + mock_enqueue.assert_not_called() + assert result.is_safe is True + assert result.confidence == 0.9 + + @pytest.mark.asyncio + async def test_check_evaluation_falls_back_when_client_targets_different_server(self): + from agent_control_models import Step + + mock_http_response = MagicMock() + mock_http_response.raise_for_status = MagicMock() + mock_http_response.json.return_value = { + "is_safe": True, + "confidence": 0.9, + "matches": None, + "errors": None, + "non_matches": None, + } + + client = MagicMock() + client.base_url = "http://different-server:8000" + client.http_client = AsyncMock() + client.http_client.post = AsyncMock(return_value=mock_http_response) + step = Step(type="llm", name="test-step", input="hello") + + with patch.object(evaluation.state, "merge_events", True), \ + patch.object(evaluation.state, "server_url", "http://localhost:8000"), \ + patch.object( + evaluation.state, + "current_agent", + MagicMock(agent_name="agent-000000000001"), + ), \ + patch.object(evaluation.state, "server_controls", [{"id": 1, "name": "ctrl"}]), \ + patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: + result = await evaluation.check_evaluation( + client=client, + agent_name="agent-000000000001", + step=step, + stage="pre", + ) + + call_kwargs = client.http_client.post.call_args.kwargs + assert call_kwargs["headers"] is None + mock_enqueue.assert_not_called() assert result.is_safe is True assert result.confidence == 0.9 # ============================================================================= -# control_decorators non_matches dict conversion +# Merged Event Creation # ============================================================================= -class TestControlDecoratorsNonMatches: - """Tests for non_matches dict conversion in control_decorators._evaluate.""" +class TestMergedEventCreation: + """Tests for SDK-side merged event reconstruction and enqueueing.""" @pytest.mark.asyncio - async def test_merged_event_sink_emits_reconstructed_local_and_server_events_once(self): + async def test_merged_event_mode_enqueues_reconstructed_local_and_server_events_once(self): from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult, Step local_response = EvaluationResponse( @@ -643,7 +680,7 @@ async def test_merged_event_sink_emits_reconstructed_local_and_server_events_onc with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=True), \ - patch("agent_control.evaluation.emit_control_events") as mock_emit, \ + patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation_with_local( client=client, @@ -655,10 +692,8 @@ async def test_merged_event_sink_emits_reconstructed_local_and_server_events_onc span_id="def456", event_agent_name="test-agent", ) - - mock_enqueue.assert_not_called() - mock_emit.assert_called_once() - merged_events = mock_emit.call_args.args[0] + mock_enqueue.assert_called_once() + merged_events = mock_enqueue.call_args.args[0] assert len(merged_events) == 2 assert {event.control_id for event in merged_events} == {1, 2} headers = client.http_client.post.call_args.kwargs["headers"] diff --git a/sdks/python/tests/test_shutdown.py b/sdks/python/tests/test_shutdown.py index 1ee128d9..4152d8f6 100644 --- a/sdks/python/tests/test_shutdown.py +++ b/sdks/python/tests/test_shutdown.py @@ -15,7 +15,6 @@ import agent_control.observability as obs_mod from agent_control._state import state from agent_control.observability import EventBatcher -from agent_control.telemetry.event_sink import has_control_event_sink, set_control_event_sink def _make_started_batcher() -> EventBatcher: @@ -65,7 +64,7 @@ def test_shutdown_resets_state(self): state.server_controls = [{"name": "test"}] state.server_url = "http://localhost:8000" state.api_key = "key" - set_control_event_sink(lambda events: None) + state.merge_events = True agent_control.shutdown() @@ -74,7 +73,7 @@ def test_shutdown_resets_state(self): assert state.server_controls is None assert state.server_url is None assert state.api_key is None - assert has_control_event_sink() is False + assert state.merge_events is False def test_shutdown_idempotent(self): agent_control.shutdown() diff --git a/server/src/agent_control_server/endpoints/evaluation.py b/server/src/agent_control_server/endpoints/evaluation.py index 68a315b4..fbf23bd4 100644 --- a/server/src/agent_control_server/endpoints/evaluation.py +++ b/server/src/agent_control_server/endpoints/evaluation.py @@ -240,20 +240,21 @@ async def evaluate( total_duration_ms = (time.perf_counter() - start_time) * 1000 merge_events_requested = (x_merge_events or "").lower() == "true" - response_events = _build_observability_events( - response=raw_response, - request=request, - trace_id=trace_id, - span_id=span_id, - agent_name=agent_name, - applies_to=applies_to, - control_lookup=control_lookup, - total_duration_ms=total_duration_ms, - ) - # OSS keeps server-side ingestion as the default. Enterprise merged mode - # returns events to the SDK and skips this server-side delivery step. + # Default mode keeps server-side ingestion as-is. Merged event creation + # skips this server-side delivery step so the SDK can reconstruct and + # enqueue the combined batch itself. if observability_settings.enabled and not merge_events_requested: + response_events = _build_observability_events( + response=raw_response, + request=request, + trace_id=trace_id, + span_id=span_id, + agent_name=agent_name, + applies_to=applies_to, + control_lookup=control_lookup, + total_duration_ms=total_duration_ms, + ) # Get ingestor from app.state (None if not initialized) try: ingestor = get_event_ingestor(req) @@ -273,7 +274,7 @@ def _build_observability_events( applies_to: Literal["llm_call", "tool_call"], control_lookup: dict, total_duration_ms: float, - ) -> list[ControlExecutionEvent]: +) -> list[ControlExecutionEvent]: """Build observability events for all evaluated controls. This preserves the existing server-side event shape while allowing the diff --git a/server/tests/test_evaluation_error_handling.py b/server/tests/test_evaluation_error_handling.py index dd2035eb..0184295a 100644 --- a/server/tests/test_evaluation_error_handling.py +++ b/server/tests/test_evaluation_error_handling.py @@ -421,3 +421,26 @@ def test_evaluation_skips_ingest_for_merge_mode( body = resp.json() assert "events" not in body ingest_mock.assert_not_awaited() + + +def test_evaluation_skips_build_and_ingest_when_observability_disabled( + client: TestClient, monkeypatch +) -> None: + """Observability-disabled requests should not build or ingest events.""" + agent_name, _ = create_and_assign_policy(client) + + import agent_control_server.endpoints.evaluation as evaluation_module + + build_mock = MagicMock() + ingest_mock = AsyncMock() + monkeypatch.setattr(evaluation_module, "_build_observability_events", build_mock) + monkeypatch.setattr(evaluation_module, "_ingest_observability_events", ingest_mock) + monkeypatch.setattr(evaluation_module.observability_settings, "enabled", False) + + payload = Step(type="llm", name="test-step", input="x", output=None) + req = EvaluationRequest(agent_name=agent_name, step=payload, stage="pre") + resp = client.post("/api/v1/evaluation", json=req.model_dump(mode="json")) + + assert resp.status_code == 200 + build_mock.assert_not_called() + ingest_mock.assert_not_awaited() From ca5bf6f75287ae0f3f4da7040bca2aaf8b34ad7a Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Thu, 2 Apr 2026 10:55:52 -0700 Subject: [PATCH 11/18] address comments --- sdks/python/src/agent_control/__init__.py | 1 + sdks/python/src/agent_control/agents.py | 8 +- sdks/python/src/agent_control/evaluation.py | 55 +--- .../src/agent_control/evaluation_events.py | 6 +- .../agent_control/telemetry/trace_context.py | 2 + sdks/python/tests/test_init_step_merge.py | 2 + .../tests/test_observability_updates.py | 303 +++++++++++++----- .../agent_control_server/endpoints/agents.py | 20 +- .../endpoints/evaluation.py | 26 +- .../merge_event_sessions.py | 86 +++++ .../tests/test_evaluation_error_handling.py | 48 +++ 11 files changed, 436 insertions(+), 121 deletions(-) create mode 100644 server/src/agent_control_server/merge_event_sessions.py diff --git a/sdks/python/src/agent_control/__init__.py b/sdks/python/src/agent_control/__init__.py index b0a009dc..7db187d9 100644 --- a/sdks/python/src/agent_control/__init__.py +++ b/sdks/python/src/agent_control/__init__.py @@ -552,6 +552,7 @@ async def register() -> list[dict[str, Any]] | None: state.current_agent, steps=registration_steps, conflict_mode=conflict_mode, + merge_events=merge_events, ) created = response.get('created', False) controls: list[dict[str, Any]] = response.get('controls', []) diff --git a/sdks/python/src/agent_control/agents.py b/sdks/python/src/agent_control/agents.py index 3d63cc5e..5a4df05b 100644 --- a/sdks/python/src/agent_control/agents.py +++ b/sdks/python/src/agent_control/agents.py @@ -15,6 +15,7 @@ async def register_agent( agent: Agent, steps: list[dict[str, Any]] | None = None, conflict_mode: Literal["strict", "overwrite"] = "overwrite", + merge_events: bool = False, ) -> dict[str, Any]: """Register an agent with the server via /initAgent endpoint.""" ensure_evaluators_discovered() @@ -27,7 +28,12 @@ async def register_agent( "conflict_mode": conflict_mode, } - response = await client.http_client.post("/api/v1/agents/initAgent", json=payload) + headers = {"X-Agent-Control-Merge-Session": "true"} if merge_events else None + response = await client.http_client.post( + "/api/v1/agents/initAgent", + json=payload, + headers=headers, + ) response.raise_for_status() return cast(dict[str, Any], response.json()) diff --git a/sdks/python/src/agent_control/evaluation.py b/sdks/python/src/agent_control/evaluation.py index d4fe41b9..65613fd5 100644 --- a/sdks/python/src/agent_control/evaluation.py +++ b/sdks/python/src/agent_control/evaluation.py @@ -212,12 +212,9 @@ async def check_evaluation( ) -> EvaluationResult: """Check if agent interaction is safe through the public SDK helper. - This helper preserves the default server-only evaluation path, but it can - also participate in merged event creation when the initialized SDK session - has ``merge_events`` enabled. In that mode, the SDK asks the server to skip - final event ingestion, reconstructs server events from the lightweight - response, and enqueues them through the existing observability pipeline - before returning the parsed result. + This helper always uses the default server-only evaluation path. It does + not participate in merged event creation, so server-side observability + ingestion remains enabled and the SDK simply returns the parsed result. Args: client: Configured AgentControl client. @@ -229,19 +226,6 @@ async def check_evaluation( The parsed evaluation result returned by the server. """ normalized_name = ensure_agent_name(agent_name) - merged_emission_enabled = _is_merged_event_mode_enabled(normalized_name, client) - trace_id = None - span_id = None - headers: dict[str, str] | None = None - - if merged_emission_enabled: - trace_id, span_id = get_trace_and_span_ids() - headers = { - "X-Trace-Id": trace_id, - "X-Span-Id": span_id, - "X-Agent-Control-Merge-Events": "true", - } - request = EvaluationRequest( agent_name=normalized_name, step=step, @@ -252,24 +236,12 @@ async def check_evaluation( response = await client.http_client.post( "/api/v1/evaluation", json=request_payload, - headers=headers, + headers=None, ) response.raise_for_status() evaluation_response = EvaluationResponse.model_validate(response.json()) - if merged_emission_enabled: - server_control_lookup = _build_server_control_lookup(state.server_controls or []) - server_events = build_control_execution_events( - evaluation_response, - request, - server_control_lookup, - trace_id, - span_id, - normalized_name, - ) - enqueue_observability_events(server_events) - return cast(EvaluationResult, EvaluationResult.from_dict(evaluation_response.model_dump())) @@ -433,13 +405,18 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: if merged_emission_enabled: headers["X-Agent-Control-Merge-Events"] = "true" - response = await client.http_client.post( - "/api/v1/evaluation", - json=request_payload, - headers=headers, - ) - response.raise_for_status() - server_result = EvaluationResponse.model_validate(response.json()) + try: + response = await client.http_client.post( + "/api/v1/evaluation", + json=request_payload, + headers=headers, + ) + response.raise_for_status() + server_result = EvaluationResponse.model_validate(response.json()) + except Exception: + if merged_emission_enabled and local_events: + enqueue_observability_events(local_events) + raise server_events = [] if merged_emission_enabled: server_control_lookup = _build_server_control_lookup(server_control_payloads) diff --git a/sdks/python/src/agent_control/evaluation_events.py b/sdks/python/src/agent_control/evaluation_events.py index 0f25d442..0c3f9438 100644 --- a/sdks/python/src/agent_control/evaluation_events.py +++ b/sdks/python/src/agent_control/evaluation_events.py @@ -70,6 +70,7 @@ def _build_events_for_matches( matches: list[ControlMatch] | None, *, matched: bool, + include_error_message: bool, request: EvaluationRequest, control_lookup: dict[int, ControlDefinition], trace_id: str, @@ -109,7 +110,7 @@ def _build_events_for_matches( timestamp=now, evaluator_name=evaluator_name, selector_path=selector_path, - error_message=match.result.error if not matched else None, + error_message=match.result.error if include_error_message else None, metadata=event_metadata, ) ) @@ -156,6 +157,7 @@ def build_control_execution_events( _build_events_for_matches( response.matches, matched=True, + include_error_message=True, request=request, control_lookup=control_lookup, trace_id=resolved_trace_id, @@ -168,6 +170,7 @@ def build_control_execution_events( _build_events_for_matches( response.errors, matched=False, + include_error_message=True, request=request, control_lookup=control_lookup, trace_id=resolved_trace_id, @@ -180,6 +183,7 @@ def build_control_execution_events( _build_events_for_matches( response.non_matches, matched=False, + include_error_message=False, request=request, control_lookup=control_lookup, trace_id=resolved_trace_id, diff --git a/sdks/python/src/agent_control/telemetry/trace_context.py b/sdks/python/src/agent_control/telemetry/trace_context.py index be545725..a871fb29 100644 --- a/sdks/python/src/agent_control/telemetry/trace_context.py +++ b/sdks/python/src/agent_control/telemetry/trace_context.py @@ -38,6 +38,8 @@ def get_trace_context_from_provider() -> TraceContext | None: trace_id = trace_context.get("trace_id") span_id = trace_context.get("span_id") + if not isinstance(trace_id, str) or not isinstance(span_id, str): + return None if not trace_id or not span_id: return None diff --git a/sdks/python/tests/test_init_step_merge.py b/sdks/python/tests/test_init_step_merge.py index 4b6b0f5d..ca71aa61 100644 --- a/sdks/python/tests/test_init_step_merge.py +++ b/sdks/python/tests/test_init_step_merge.py @@ -212,6 +212,8 @@ def test_init_sets_merge_events_session_flag() -> None: # Then: the session state remembers that merged event creation is enabled. assert agent_control.state.merge_events is True + assert register_agent_mock.await_args is not None + assert register_agent_mock.await_args.kwargs["merge_events"] is True @pytest.mark.asyncio diff --git a/sdks/python/tests/test_observability_updates.py b/sdks/python/tests/test_observability_updates.py index cd3024bc..f91d53a8 100644 --- a/sdks/python/tests/test_observability_updates.py +++ b/sdks/python/tests/test_observability_updates.py @@ -168,12 +168,6 @@ def test_composite_control_uses_representative_observability_identity(self): "span456", "test-agent", ) - action={"decision": "observe"}, - ), - ) - non_match = self._make_match(1, "composite-ctrl", action="observe", matched=False) - response = self._make_response(non_matches=[non_match]) - request = self._make_request() assert len(events) == 1 event = events[0] @@ -185,6 +179,66 @@ def test_composite_control_uses_representative_observability_identity(self): assert event.metadata["all_evaluators"] == ["regex"] assert event.metadata["all_selector_paths"] == ["input", "output"] + def test_preserves_error_message_parity_by_result_category(self): + from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult + + request = self._make_request() + control_lookup = { + 1: self._make_control( + 1, + "ctrl-1", + { + "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "selector": {"path": "input"}, + }, + ).control + } + response = EvaluationResponse( + is_safe=False, + confidence=0.5, + matches=[ + ControlMatch( + control_id=1, + control_name="ctrl-1", + action="allow", + result=EvaluatorResult( + matched=True, + confidence=0.9, + metadata={"server_error_message": "match-error"}, + ), + ) + ], + errors=[ + ControlMatch( + control_id=1, + control_name="ctrl-1", + action="allow", + result=EvaluatorResult(matched=False, confidence=0.2, error="eval-error"), + ) + ], + non_matches=[ + ControlMatch( + control_id=1, + control_name="ctrl-1", + action="allow", + result=EvaluatorResult(matched=False, confidence=0.1, error="ignored-error"), + ) + ], + ) + + events = build_control_execution_events( + response, + request, + control_lookup, + "trace123", + "span456", + "test-agent", + ) + + assert events[0].error_message is None + assert events[1].error_message == "eval-error" + assert events[2].error_message is None + def test_enqueue_observability_events_uses_existing_batcher(self): from agent_control_models import ControlExecutionEvent @@ -264,7 +318,6 @@ async def test_delivers_local_events_in_oss_mode(self): span_id="def456", event_agent_name="test-agent", ) - mock_enqueue.assert_called_once() delivered_events = mock_enqueue.call_args.args[0] assert len(delivered_events) == 1 @@ -428,24 +481,9 @@ async def test_default_path_keeps_server_only_behavior(self): assert result.confidence == 0.9 @pytest.mark.asyncio - async def test_merged_event_mode_enqueues_reconstructed_server_events(self): + async def test_check_evaluation_ignores_merged_mode_and_keeps_server_only_behavior(self): from agent_control_models import Step - controls = [ - { - "id": 2, - "name": "server-ctrl", - "control": { - "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, - "selector": {"path": "input"}, - }, - "action": {"decision": "allow"}, - "execution": "server", - }, - } - ] - mock_http_response = MagicMock() mock_http_response.raise_for_status = MagicMock() mock_http_response.json.return_value = { @@ -470,9 +508,7 @@ async def test_merged_event_mode_enqueues_reconstructed_server_events(self): step = Step(type="llm", name="test-step", input="hello") with patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=True), \ - patch("agent_control.evaluation.get_trace_and_span_ids", return_value=("a" * 32, "b" * 16)), \ - patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue, \ - patch.object(evaluation.state, "server_controls", controls): + patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation( client=client, agent_name="agent-000000000001", @@ -480,18 +516,9 @@ async def test_merged_event_mode_enqueues_reconstructed_server_events(self): stage="pre", ) - headers = client.http_client.post.call_args.kwargs["headers"] - assert headers["X-Trace-Id"] == "a" * 32 - assert headers["X-Span-Id"] == "b" * 16 - assert headers["X-Agent-Control-Merge-Events"] == "true" - - mock_enqueue.assert_called_once() - emitted_events = mock_enqueue.call_args.args[0] - assert len(emitted_events) == 1 - assert emitted_events[0].control_id == 2 - assert emitted_events[0].trace_id == "a" * 32 - assert emitted_events[0].span_id == "b" * 16 - assert emitted_events[0].metadata["primary_evaluator"] == "regex" + call_kwargs = client.http_client.post.call_args.kwargs + assert call_kwargs["headers"] is None + mock_enqueue.assert_not_called() assert result.matches is not None assert len(result.matches) == 1 @@ -653,44 +680,6 @@ async def test_merged_event_mode_enqueues_reconstructed_local_and_server_events_ "control_execution_id": "ce-server", "result": {"matched": False, "confidence": 0.4}, } - ctx._update_stats(result) - assert ctx.total_executions == 2 - assert ctx.total_non_matches == 2 - assert ctx.total_matches == 0 - assert ctx.total_errors == 0 - - def test_legacy_advisory_matches_collapse_into_observe_stats(self): - """Legacy advisory action names should not leak into local action counters.""" - from agent_control.control_decorators import ControlContext - - result = { - "is_safe": True, - "confidence": 1.0, - "matches": [ - { - "control_id": 1, - "control_name": "ctrl-allow", - "action": "allow", - "result": {"matched": True, "confidence": 0.3}, - }, - { - "control_id": 2, - "control_name": "ctrl-warn", - "action": "warn", - "result": {"matched": True, "confidence": 0.4}, - }, - { - "control_id": 3, - "control_name": "ctrl-log", - "action": "log", - "result": {"matched": True, "confidence": 0.5}, - }, - { - "control_id": 4, - "control_name": "ctrl-observe", - "action": "observe", - "result": {"matched": True, "confidence": 0.6}, - }, ], "errors": None, "non_matches": None, @@ -757,6 +746,168 @@ def test_legacy_advisory_matches_collapse_into_observe_stats(self): assert headers["X-Agent-Control-Merge-Events"] == "true" assert result.matches is not None assert len(result.matches) == 2 + + @pytest.mark.asyncio + async def test_merged_event_mode_enqueues_local_events_before_reraising_server_failure(self): + from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult, Step + + local_response = EvaluationResponse( + is_safe=True, + confidence=1.0, + matches=[ + ControlMatch( + control_id=1, + control_name="local-ctrl", + action="allow", + result=EvaluatorResult(matched=False, confidence=0.8), + ) + ], + ) + + controls = [ + { + "id": 1, + "name": "local-ctrl", + "control": { + "condition": { + "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "selector": {"path": "input"}, + }, + "action": {"decision": "allow"}, + "execution": "sdk", + }, + }, + { + "id": 2, + "name": "server-ctrl", + "control": { + "condition": { + "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "selector": {"path": "input"}, + }, + "action": {"decision": "allow"}, + "execution": "server", + }, + }, + ] + + mock_engine = MagicMock() + mock_engine.process = AsyncMock(return_value=local_response) + + client = MagicMock() + client.http_client = AsyncMock() + client.http_client.post = AsyncMock(side_effect=RuntimeError("server unavailable")) + step = Step(type="llm", name="test-step", input="hello") + + with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=True), patch("agent_control.evaluation.is_observability_enabled", return_value=True), patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: + with pytest.raises(RuntimeError, match="server unavailable"): + await evaluation.check_evaluation_with_local( + client=client, + agent_name="agent-000000000001", + step=step, + stage="pre", + controls=controls, + trace_id="abc123", + span_id="def456", + event_agent_name="test-agent", + ) + + mock_enqueue.assert_called_once() + local_events = mock_enqueue.call_args.args[0] + assert len(local_events) == 1 + assert local_events[0].control_id == 1 + assert local_events[0].trace_id == "abc123" + assert local_events[0].span_id == "def456" + + +# ============================================================================= +# control_decorators non_matches dict conversion +# ============================================================================= + + +class TestControlDecoratorsNonMatches: + """Tests for non_matches dict conversion in control_decorators._evaluate.""" + + @pytest.mark.asyncio + async def test_non_matches_populated_in_stats(self): + """non_matches should be properly converted to dicts for stats tracking.""" + from agent_control.control_decorators import ControlContext + + result = { + "is_safe": True, + "confidence": 1.0, + "matches": None, + "errors": None, + "non_matches": [ + { + "control_id": 1, + "control_name": "ctrl-1", + "action": "observe", + "result": {"matched": False, "confidence": 0.1}, + }, + { + "control_id": 2, + "control_name": "ctrl-2", + "action": "deny", + "result": {"matched": False, "confidence": 0.2}, + }, + ], + } + + ctx = ControlContext( + agent_name="test-agent", + server_url="http://localhost:8000", + func=lambda: None, + args=(), + kwargs={}, + trace_id="trace123", + span_id="span456", + start_time=0, + ) + + ctx._update_stats(result) + assert ctx.total_executions == 2 + assert ctx.total_non_matches == 2 + assert ctx.total_matches == 0 + assert ctx.total_errors == 0 + + def test_legacy_advisory_matches_collapse_into_observe_stats(self): + """Legacy advisory action names should not leak into local action counters.""" + from agent_control.control_decorators import ControlContext + + result = { + "is_safe": True, + "confidence": 1.0, + "matches": [ + { + "control_id": 1, + "control_name": "ctrl-allow", + "action": "allow", + "result": {"matched": True, "confidence": 0.3}, + }, + { + "control_id": 2, + "control_name": "ctrl-warn", + "action": "warn", + "result": {"matched": True, "confidence": 0.4}, + }, + { + "control_id": 3, + "control_name": "ctrl-log", + "action": "log", + "result": {"matched": True, "confidence": 0.5}, + }, + { + "control_id": 4, + "control_name": "ctrl-observe", + "action": "observe", + "result": {"matched": True, "confidence": 0.6}, + }, + ], + "errors": None, + "non_matches": None, + } + ctx = ControlContext( agent_name="test-agent", server_url="http://localhost:8000", diff --git a/server/src/agent_control_server/endpoints/agents.py b/server/src/agent_control_server/endpoints/agents.py index d88a9f0c..24a91cd4 100644 --- a/server/src/agent_control_server/endpoints/agents.py +++ b/server/src/agent_control_server/endpoints/agents.py @@ -28,14 +28,14 @@ SetPolicyResponse, StepKey, ) -from fastapi import APIRouter, Depends +from fastapi import APIRouter, Depends, Header from jsonschema_rs import ValidationError as JSONSchemaValidationError from pydantic import BaseModel, ValidationError from sqlalchemy import delete, func, or_, select, union_all from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.ext.asyncio import AsyncSession -from ..auth import require_admin_key +from ..auth import RequireAPIKey, require_admin_key from ..db import get_async_db from ..errors import ( APIValidationError, @@ -45,6 +45,7 @@ NotFoundError, ) from ..logging_utils import get_logger +from ..merge_event_sessions import set_merge_events_enabled from ..models import ( Agent, AgentData, @@ -447,7 +448,10 @@ async def list_agents( response_description="Agent registration status with active controls", ) async def init_agent( - request: InitAgentRequest, db: AsyncSession = Depends(get_async_db) + request: InitAgentRequest, + client: RequireAPIKey, + db: AsyncSession = Depends(get_async_db), + x_merge_session: str | None = Header(default=None, alias="X-Agent-Control-Merge-Session"), ) -> InitAgentResponse: """ Register a new agent or update an existing agent's steps and metadata. @@ -547,6 +551,11 @@ async def init_agent( resource="Agent", operation="create", ) + set_merge_events_enabled( + client, + request.agent.agent_name, + enabled=(x_merge_session or "").lower() == "true", + ) return InitAgentResponse(created=created, controls=[]) # Parse existing data via AgentData Pydantic model @@ -797,6 +806,11 @@ async def init_agent( ) controls = await list_controls_for_agent(existing.name, db) + set_merge_events_enabled( + client, + existing.name, + enabled=(x_merge_session or "").lower() == "true", + ) return InitAgentResponse( created=created, diff --git a/server/src/agent_control_server/endpoints/evaluation.py b/server/src/agent_control_server/endpoints/evaluation.py index fbf23bd4..14833160 100644 --- a/server/src/agent_control_server/endpoints/evaluation.py +++ b/server/src/agent_control_server/endpoints/evaluation.py @@ -17,10 +17,12 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession +from ..auth import AuthenticatedClient, RequireAPIKey from ..config import observability_settings from ..db import get_async_db from ..errors import APIValidationError, NotFoundError from ..logging_utils import get_logger +from ..merge_event_sessions import is_merge_events_enabled from ..models import Agent from ..observability.ingest.base import EventIngestor from ..services.controls import list_controls_for_agent @@ -144,6 +146,23 @@ def _observability_metadata( ) +def _is_trusted_sdk_merge_request( + x_merge_events: str | None, + client: AuthenticatedClient, + agent_name: str, +) -> bool: + """Return whether merged delivery is enabled for this init-scoped session. + + A request must explicitly ask for merged delivery, and the same + authenticated client must previously have initialized this agent with + merge-events enabled. + """ + if (x_merge_events or "").lower() != "true": + return False + + return is_merge_events_enabled(client, agent_name) + + @router.post( "", response_model=EvaluationResponse, @@ -153,6 +172,7 @@ def _observability_metadata( async def evaluate( request: EvaluationRequest, req: Request, + client: RequireAPIKey, db: AsyncSession = Depends(get_async_db), x_trace_id: str | None = Header(default=None, alias="X-Trace-Id"), x_span_id: str | None = Header(default=None, alias="X-Span-Id"), @@ -239,7 +259,11 @@ async def evaluate( # Calculate total execution time total_duration_ms = (time.perf_counter() - start_time) * 1000 - merge_events_requested = (x_merge_events or "").lower() == "true" + merge_events_requested = _is_trusted_sdk_merge_request( + x_merge_events=x_merge_events, + client=client, + agent_name=agent_name, + ) # Default mode keeps server-side ingestion as-is. Merged event creation # skips this server-side delivery step so the SDK can reconstruct and diff --git a/server/src/agent_control_server/merge_event_sessions.py b/server/src/agent_control_server/merge_event_sessions.py new file mode 100644 index 00000000..5ce64650 --- /dev/null +++ b/server/src/agent_control_server/merge_event_sessions.py @@ -0,0 +1,86 @@ +"""Server-side trust state for merged event creation. + +Merged event creation is an SDK-owned flow where the SDK reconstructs and +enqueues the final batch of control-execution events after combining local and +server evaluation results. In that mode the server must skip its normal +observability ingestion step, otherwise both the server and SDK would emit +events for the same evaluation. + +The merge request header on ``/evaluation`` is caller-controlled, so the server +cannot safely trust that header on its own. This module stores a small amount +of init-scoped session state so the server can distinguish: + +- callers that previously initialized a given agent with ``merge_events=True`` +- callers that merely send the merge header directly + +Only the first case is allowed to suppress server-side observability +ingestion. All other callers stay on the default server-ingestion path. +""" + +from __future__ import annotations + +from threading import Lock + +from .auth import AuthenticatedClient + +_merge_enabled_sessions: set[tuple[str, str]] = set() +_lock = Lock() + + +def _session_key(client: AuthenticatedClient, agent_name: str) -> tuple[str, str]: + """Return the in-memory lookup key for one merge-enabled SDK session. + + Args: + client: Authenticated caller identity resolved by the server. + agent_name: Normalized agent name for the initialized SDK session. + + Returns: + A stable ``(client, agent)`` key used for merge-session tracking. + """ + return (client.api_key, agent_name) + + +def set_merge_events_enabled( + client: AuthenticatedClient, + agent_name: str, + enabled: bool, +) -> None: + """Record whether merged event creation is enabled for a client/agent pair. + + This is called from the agent-init flow so later evaluation requests can be + checked against the server's trusted SDK session state instead of relying on + request headers alone. + + Args: + client: Authenticated caller identity resolved by the server. + agent_name: Normalized agent name whose session state is being updated. + enabled: Whether merged event creation is enabled for this session. + + Returns: + None. + """ + key = _session_key(client, agent_name) + with _lock: + if enabled: + _merge_enabled_sessions.add(key) + else: + _merge_enabled_sessions.discard(key) + + +def is_merge_events_enabled( + client: AuthenticatedClient, + agent_name: str, +) -> bool: + """Return whether merged event creation is enabled for this SDK session. + + Args: + client: Authenticated caller identity resolved by the server. + agent_name: Normalized agent name for the evaluation request. + + Returns: + ``True`` when the caller previously initialized the same agent with + merged event creation enabled; otherwise ``False``. + """ + key = _session_key(client, agent_name) + with _lock: + return key in _merge_enabled_sessions diff --git a/server/tests/test_evaluation_error_handling.py b/server/tests/test_evaluation_error_handling.py index 0184295a..a1a11e85 100644 --- a/server/tests/test_evaluation_error_handling.py +++ b/server/tests/test_evaluation_error_handling.py @@ -388,6 +388,15 @@ def test_evaluation_skips_ingest_for_merge_mode( ) -> None: """Merged-event mode should skip server-side observability ingestion.""" agent_name, _ = create_and_assign_policy(client) + client.post( + "/api/v1/agents/initAgent", + json={ + "agent": {"agent_name": agent_name}, + "steps": [], + "evaluators": [], + }, + headers={"X-Agent-Control-Merge-Session": "true"}, + ) import agent_control_server.endpoints.evaluation as evaluation_module @@ -423,6 +432,45 @@ def test_evaluation_skips_ingest_for_merge_mode( ingest_mock.assert_not_awaited() +def test_evaluation_merge_header_alone_does_not_skip_ingest( + client: TestClient, monkeypatch +) -> None: + """Untrusted callers should not suppress server-side observability ingestion.""" + agent_name, _ = create_and_assign_policy(client) + + import agent_control_server.endpoints.evaluation as evaluation_module + + event = ControlExecutionEvent( + trace_id="a" * 32, + span_id="b" * 16, + agent_name=agent_name, + control_id=1, + control_name="test-control", + check_stage="pre", + applies_to="llm_call", + action="deny", + matched=True, + confidence=0.9, + ) + build_mock = MagicMock(return_value=[event]) + ingest_mock = AsyncMock() + monkeypatch.setattr(evaluation_module, "_build_observability_events", build_mock) + monkeypatch.setattr(evaluation_module, "_ingest_observability_events", ingest_mock) + monkeypatch.setattr(evaluation_module.observability_settings, "enabled", True) + + payload = Step(type="llm", name="test-step", input="x", output=None) + req = EvaluationRequest(agent_name=agent_name, step=payload, stage="pre") + resp = client.post( + "/api/v1/evaluation", + json=req.model_dump(mode="json"), + headers={"X-Agent-Control-Merge-Events": "true"}, + ) + + assert resp.status_code == 200 + build_mock.assert_called_once() + ingest_mock.assert_awaited_once() + + def test_evaluation_skips_build_and_ingest_when_observability_disabled( client: TestClient, monkeypatch ) -> None: From 001228a8b70c502a26d40a3f67a7862267d3aa5a Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Thu, 2 Apr 2026 11:00:49 -0700 Subject: [PATCH 12/18] fix typescript --- .../src/generated/funcs/agents-init.ts | 20 ++++++++++++++----- .../src/generated/models/operations/index.ts | 1 + sdks/typescript/src/generated/sdk/agents.ts | 2 +- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/sdks/typescript/src/generated/funcs/agents-init.ts b/sdks/typescript/src/generated/funcs/agents-init.ts index 508b00ed..715b2470 100644 --- a/sdks/typescript/src/generated/funcs/agents-init.ts +++ b/sdks/typescript/src/generated/funcs/agents-init.ts @@ -4,7 +4,7 @@ import * as z from "zod/v4-mini"; import { AgentControlSDKCore } from "../core.js"; -import { encodeJSON } from "../lib/encodings.js"; +import { encodeJSON, encodeSimple } from "../lib/encodings.js"; import * as M from "../lib/matchers.js"; import { compactMap } from "../lib/primitives.js"; import { safeParse } from "../lib/schemas.js"; @@ -23,6 +23,7 @@ import * as errors from "../models/errors/index.js"; import { ResponseValidationError } from "../models/errors/response-validation-error.js"; import { SDKValidationError } from "../models/errors/sdk-validation-error.js"; import * as models from "../models/index.js"; +import * as operations from "../models/operations/index.js"; import { APICall, APIPromise } from "../types/async.js"; import { Result } from "../types/fp.js"; @@ -49,7 +50,7 @@ import { Result } from "../types/fp.js"; */ export function agentsInit( client: AgentControlSDKCore, - request: models.InitAgentRequest, + request: operations.InitAgentApiV1AgentsInitAgentPostRequest, options?: RequestOptions, ): APIPromise< Result< @@ -74,7 +75,7 @@ export function agentsInit( async function $do( client: AgentControlSDKCore, - request: models.InitAgentRequest, + request: operations.InitAgentApiV1AgentsInitAgentPostRequest, options?: RequestOptions, ): Promise< [ @@ -95,20 +96,29 @@ async function $do( > { const parsed = safeParse( request, - (value) => z.parse(models.InitAgentRequest$outboundSchema, value), + (value) => + z.parse( + operations.InitAgentApiV1AgentsInitAgentPostRequest$outboundSchema, + value, + ), "Input validation failed", ); if (!parsed.ok) { return [parsed, { status: "invalid" }]; } const payload = parsed.value; - const body = encodeJSON("body", payload, { explode: true }); + const body = encodeJSON("body", payload.body, { explode: true }); const path = pathToFunc("/api/v1/agents/initAgent")(); const headers = new Headers(compactMap({ "Content-Type": "application/json", Accept: "application/json", + "X-Agent-Control-Merge-Session": encodeSimple( + "X-Agent-Control-Merge-Session", + payload["X-Agent-Control-Merge-Session"], + { explode: false, charEncoding: "none" }, + ), })); const secConfig = await extractSecurity(client._options.apiKeyHeader); diff --git a/sdks/typescript/src/generated/models/operations/index.ts b/sdks/typescript/src/generated/models/operations/index.ts index a8706eef..80d25cc6 100644 --- a/sdks/typescript/src/generated/models/operations/index.ts +++ b/sdks/typescript/src/generated/models/operations/index.ts @@ -16,6 +16,7 @@ export * from "./get-control-api-v1-controls-control-id-get.js"; export * from "./get-control-data-api-v1-controls-control-id-data-get.js"; export * from "./get-control-stats-api-v1-observability-stats-controls-control-id-get.js"; export * from "./get-stats-api-v1-observability-stats-get.js"; +export * from "./init-agent-api-v1-agents-init-agent-post.js"; export * from "./list-agent-controls-api-v1-agents-agent-name-controls-get.js"; export * from "./list-agent-evaluators-api-v1-agents-agent-name-evaluators-get.js"; export * from "./list-agents-api-v1-agents-get.js"; diff --git a/sdks/typescript/src/generated/sdk/agents.ts b/sdks/typescript/src/generated/sdk/agents.ts index d606b384..78353323 100644 --- a/sdks/typescript/src/generated/sdk/agents.ts +++ b/sdks/typescript/src/generated/sdk/agents.ts @@ -75,7 +75,7 @@ export class Agents extends ClientSDK { * InitAgentResponse with created flag and active controls (policy-derived + direct) */ async init( - request: models.InitAgentRequest, + request: operations.InitAgentApiV1AgentsInitAgentPostRequest, options?: RequestOptions, ): Promise { return unwrapAsync(agentsInit( From f24e07e8f84ebd498e7df0b6586747428ff3570a Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Thu, 2 Apr 2026 11:03:43 -0700 Subject: [PATCH 13/18] fix TS --- ...nit-agent-api-v1-agents-init-agent-post.ts | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 sdks/typescript/src/generated/models/operations/init-agent-api-v1-agents-init-agent-post.ts diff --git a/sdks/typescript/src/generated/models/operations/init-agent-api-v1-agents-init-agent-post.ts b/sdks/typescript/src/generated/models/operations/init-agent-api-v1-agents-init-agent-post.ts new file mode 100644 index 00000000..0ad7c2b3 --- /dev/null +++ b/sdks/typescript/src/generated/models/operations/init-agent-api-v1-agents-init-agent-post.ts @@ -0,0 +1,44 @@ +/* + * Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT. + */ + +import * as z from "zod/v4-mini"; +import { remap as remap$ } from "../../lib/primitives.js"; +import * as models from "../index.js"; + +export type InitAgentApiV1AgentsInitAgentPostRequest = { + xAgentControlMergeSession?: string | null | undefined; + body: models.InitAgentRequest; +}; + +/** @internal */ +export type InitAgentApiV1AgentsInitAgentPostRequest$Outbound = { + "X-Agent-Control-Merge-Session"?: string | null | undefined; + body: models.InitAgentRequest$Outbound; +}; + +/** @internal */ +export const InitAgentApiV1AgentsInitAgentPostRequest$outboundSchema: z.ZodMiniType< + InitAgentApiV1AgentsInitAgentPostRequest$Outbound, + InitAgentApiV1AgentsInitAgentPostRequest +> = z.pipe( + z.object({ + xAgentControlMergeSession: z.optional(z.nullable(z.string())), + body: models.InitAgentRequest$outboundSchema, + }), + z.transform((v) => { + return remap$(v, { + xAgentControlMergeSession: "X-Agent-Control-Merge-Session", + }); + }), +); + +export function initAgentApiV1AgentsInitAgentPostRequestToJSON( + initAgentApiV1AgentsInitAgentPostRequest: InitAgentApiV1AgentsInitAgentPostRequest, +): string { + return JSON.stringify( + InitAgentApiV1AgentsInitAgentPostRequest$outboundSchema.parse( + initAgentApiV1AgentsInitAgentPostRequest, + ), + ); +} From a714e277cbbada0ef9cbd422545aaf77a7fc6cd4 Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Thu, 2 Apr 2026 11:08:23 -0700 Subject: [PATCH 14/18] fix ts --- sdks/typescript/src/generated/sdk/agents.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sdks/typescript/src/generated/sdk/agents.ts b/sdks/typescript/src/generated/sdk/agents.ts index 78353323..31adab5c 100644 --- a/sdks/typescript/src/generated/sdk/agents.ts +++ b/sdks/typescript/src/generated/sdk/agents.ts @@ -75,12 +75,20 @@ export class Agents extends ClientSDK { * InitAgentResponse with created flag and active controls (policy-derived + direct) */ async init( - request: operations.InitAgentApiV1AgentsInitAgentPostRequest, + request: + | models.InitAgentRequest + | operations.InitAgentApiV1AgentsInitAgentPostRequest, options?: RequestOptions, ): Promise { + const normalizedRequest = + "body" in request + ? request + : { + body: request, + }; return unwrapAsync(agentsInit( this, - request, + normalizedRequest, options, )); } From 9baf9d1ae01fa28d73cf36b58c51645301890125 Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Thu, 2 Apr 2026 11:12:38 -0700 Subject: [PATCH 15/18] add more TS fix --- ...nit-agent-api-v1-agents-init-agent-post.ts | 32 ++++++++++--------- sdks/typescript/src/generated/sdk/agents.ts | 12 ++----- sdks/typescript/tests/client-api.test.ts | 8 +++-- 3 files changed, 24 insertions(+), 28 deletions(-) diff --git a/sdks/typescript/src/generated/models/operations/init-agent-api-v1-agents-init-agent-post.ts b/sdks/typescript/src/generated/models/operations/init-agent-api-v1-agents-init-agent-post.ts index 0ad7c2b3..67def39a 100644 --- a/sdks/typescript/src/generated/models/operations/init-agent-api-v1-agents-init-agent-post.ts +++ b/sdks/typescript/src/generated/models/operations/init-agent-api-v1-agents-init-agent-post.ts @@ -18,23 +18,25 @@ export type InitAgentApiV1AgentsInitAgentPostRequest$Outbound = { }; /** @internal */ -export const InitAgentApiV1AgentsInitAgentPostRequest$outboundSchema: z.ZodMiniType< - InitAgentApiV1AgentsInitAgentPostRequest$Outbound, - InitAgentApiV1AgentsInitAgentPostRequest -> = z.pipe( - z.object({ - xAgentControlMergeSession: z.optional(z.nullable(z.string())), - body: models.InitAgentRequest$outboundSchema, - }), - z.transform((v) => { - return remap$(v, { - xAgentControlMergeSession: "X-Agent-Control-Merge-Session", - }); - }), -); +export const InitAgentApiV1AgentsInitAgentPostRequest$outboundSchema: + z.ZodMiniType< + InitAgentApiV1AgentsInitAgentPostRequest$Outbound, + InitAgentApiV1AgentsInitAgentPostRequest + > = z.pipe( + z.object({ + xAgentControlMergeSession: z.optional(z.nullable(z.string())), + body: models.InitAgentRequest$outboundSchema, + }), + z.transform((v) => { + return remap$(v, { + xAgentControlMergeSession: "X-Agent-Control-Merge-Session", + }); + }), + ); export function initAgentApiV1AgentsInitAgentPostRequestToJSON( - initAgentApiV1AgentsInitAgentPostRequest: InitAgentApiV1AgentsInitAgentPostRequest, + initAgentApiV1AgentsInitAgentPostRequest: + InitAgentApiV1AgentsInitAgentPostRequest, ): string { return JSON.stringify( InitAgentApiV1AgentsInitAgentPostRequest$outboundSchema.parse( diff --git a/sdks/typescript/src/generated/sdk/agents.ts b/sdks/typescript/src/generated/sdk/agents.ts index 31adab5c..78353323 100644 --- a/sdks/typescript/src/generated/sdk/agents.ts +++ b/sdks/typescript/src/generated/sdk/agents.ts @@ -75,20 +75,12 @@ export class Agents extends ClientSDK { * InitAgentResponse with created flag and active controls (policy-derived + direct) */ async init( - request: - | models.InitAgentRequest - | operations.InitAgentApiV1AgentsInitAgentPostRequest, + request: operations.InitAgentApiV1AgentsInitAgentPostRequest, options?: RequestOptions, ): Promise { - const normalizedRequest = - "body" in request - ? request - : { - body: request, - }; return unwrapAsync(agentsInit( this, - normalizedRequest, + request, options, )); } diff --git a/sdks/typescript/tests/client-api.test.ts b/sdks/typescript/tests/client-api.test.ts index fe5a98db..ff156fab 100644 --- a/sdks/typescript/tests/client-api.test.ts +++ b/sdks/typescript/tests/client-api.test.ts @@ -144,9 +144,11 @@ describe("AgentControlClient API wiring", () => { }); await client.agents.init({ - agent: { - agentId: "550e8400-e29b-41d4-a716-446655440000", - agentName: "test-agent", + body: { + agent: { + agentId: "550e8400-e29b-41d4-a716-446655440000", + agentName: "test-agent", + }, }, }); From 8731f7eb408dba842e4e3f5d0c3ca050b9d8a959 Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Thu, 2 Apr 2026 11:21:10 -0700 Subject: [PATCH 16/18] code coverage --- sdks/python/tests/test_trace_context.py | 11 ++++++++ server/tests/test_merge_event_sessions.py | 34 +++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 server/tests/test_merge_event_sessions.py diff --git a/sdks/python/tests/test_trace_context.py b/sdks/python/tests/test_trace_context.py index f08306e0..2c1d727f 100644 --- a/sdks/python/tests/test_trace_context.py +++ b/sdks/python/tests/test_trace_context.py @@ -63,3 +63,14 @@ def test_get_trace_context_from_provider_returns_none_for_empty_ids() -> None: ) assert get_trace_context_from_provider() is None + + +def test_get_trace_context_from_provider_returns_none_for_non_string_ids() -> None: + set_trace_context_provider( # type: ignore[arg-type] + lambda: { + "trace_id": 123, + "span_id": b"abc", + } + ) + + assert get_trace_context_from_provider() is None diff --git a/server/tests/test_merge_event_sessions.py b/server/tests/test_merge_event_sessions.py new file mode 100644 index 00000000..4c81f849 --- /dev/null +++ b/server/tests/test_merge_event_sessions.py @@ -0,0 +1,34 @@ +from agent_control_server.auth import AuthLevel, AuthenticatedClient +from agent_control_server.merge_event_sessions import ( + is_merge_events_enabled, + set_merge_events_enabled, +) + + +def _client(api_key: str) -> AuthenticatedClient: + return AuthenticatedClient( + api_key=api_key, + is_admin=False, + auth_level=AuthLevel.API_KEY, + ) + + +def test_merge_event_session_enable_disable_is_scoped_by_client_and_agent() -> None: + client_a = _client("key-a") + client_b = _client("key-b") + + set_merge_events_enabled(client_a, "agent-a", enabled=False) + set_merge_events_enabled(client_b, "agent-a", enabled=False) + + assert is_merge_events_enabled(client_a, "agent-a") is False + assert is_merge_events_enabled(client_b, "agent-a") is False + + set_merge_events_enabled(client_a, "agent-a", enabled=True) + + assert is_merge_events_enabled(client_a, "agent-a") is True + assert is_merge_events_enabled(client_a, "agent-b") is False + assert is_merge_events_enabled(client_b, "agent-a") is False + + set_merge_events_enabled(client_a, "agent-a", enabled=False) + + assert is_merge_events_enabled(client_a, "agent-a") is False From 9af1755592db7a0924dd6c5f232a8e6d7a70086f Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Thu, 2 Apr 2026 11:28:26 -0700 Subject: [PATCH 17/18] add more tests for code cov --- sdks/python/tests/test_init_step_merge.py | 21 +++ .../tests/test_observability_updates.py | 162 +++++++++++++++++- 2 files changed, 182 insertions(+), 1 deletion(-) diff --git a/sdks/python/tests/test_init_step_merge.py b/sdks/python/tests/test_init_step_merge.py index ca71aa61..551a3b24 100644 --- a/sdks/python/tests/test_init_step_merge.py +++ b/sdks/python/tests/test_init_step_merge.py @@ -216,6 +216,27 @@ def test_init_sets_merge_events_session_flag() -> None: assert register_agent_mock.await_args.kwargs["merge_events"] is True +def test_init_defaults_merge_events_session_flag_to_false() -> None: + register_agent_mock = AsyncMock(return_value={"created": True, "controls": []}) + health_check_mock = AsyncMock(return_value={"status": "healthy"}) + + with patch( + "agent_control.__init__.AgentControlClient.health_check", + new=health_check_mock, + ), patch( + "agent_control.__init__.agents.register_agent", + new=register_agent_mock, + ): + agent_control.init( + agent_name=f"agent-{uuid4().hex[:12]}", + policy_refresh_interval_seconds=0, + ) + + assert agent_control.state.merge_events is False + assert register_agent_mock.await_args is not None + assert register_agent_mock.await_args.kwargs["merge_events"] is False + + @pytest.mark.asyncio async def test_refresh_controls_calls_agent_controls_endpoint() -> None: # Given: an initialized SDK agent session with network-facing calls mocked. diff --git a/sdks/python/tests/test_observability_updates.py b/sdks/python/tests/test_observability_updates.py index f91d53a8..6544d90a 100644 --- a/sdks/python/tests/test_observability_updates.py +++ b/sdks/python/tests/test_observability_updates.py @@ -4,7 +4,13 @@ import pytest from agent_control import evaluation -from agent_control.evaluation import _ControlAdapter, _merge_results +from agent_control.evaluation import ( + _ControlAdapter, + _build_server_control_lookup, + _has_applicable_prefiltered_server_controls, + _is_merged_event_mode_enabled, + _merge_results, +) from agent_control.evaluation_events import ( build_control_execution_events, enqueue_observability_events, @@ -64,6 +70,99 @@ def test_combines_matches_errors_and_non_matches(self): assert [match.control_id for match in result.non_matches or []] == [3] +class TestEvaluationHelpers: + def test_build_server_control_lookup_skips_unparseable_controls(self): + lookup = _build_server_control_lookup( + [ + { + "id": 1, + "name": "ctrl-1", + "control": { + "condition": { + "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "selector": {"path": "input"}, + }, + "action": {"decision": "observe"}, + "execution": "server", + }, + }, + { + "id": 2, + "name": "ctrl-2", + "control": { + "condition": {"selector": {"path": "input"}}, + "action": {"decision": "observe"}, + "execution": "server", + }, + }, + ] + ) + + assert list(lookup.keys()) == [1] + + def test_has_applicable_prefiltered_server_controls_returns_true_for_malformed_payload(self): + from agent_control_models import EvaluationRequest + + request = EvaluationRequest( + agent_name="agent-000000000001", + step={"type": "llm", "name": "test-step", "input": "hello"}, + stage="pre", + ) + + assert _has_applicable_prefiltered_server_controls( + [ + { + "id": 1, + "name": "bad-server-ctrl", + "control": { + "condition": {"selector": {"path": "input"}}, + "action": {"decision": "observe"}, + "execution": "server", + }, + } + ], + request, + ) is True + + def test_merged_event_mode_enabled_false_when_merge_disabled(self): + with patch.object(evaluation.state, "merge_events", False), \ + patch("agent_control.evaluation.is_observability_enabled", return_value=True): + assert _is_merged_event_mode_enabled("agent-000000000001") is False + + def test_merged_event_mode_enabled_false_when_current_agent_missing(self): + with patch.object(evaluation.state, "merge_events", True), \ + patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ + patch.object(evaluation.state, "current_agent", None), \ + patch.object(evaluation.state, "server_controls", [{"id": 1}]): + assert _is_merged_event_mode_enabled("agent-000000000001") is False + + def test_merged_event_mode_enabled_false_when_server_controls_missing(self): + with patch.object(evaluation.state, "merge_events", True), \ + patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ + patch.object( + evaluation.state, + "current_agent", + MagicMock(agent_name="agent-000000000001"), + ), \ + patch.object(evaluation.state, "server_controls", None): + assert _is_merged_event_mode_enabled("agent-000000000001") is False + + def test_merged_event_mode_enabled_true_for_matching_initialized_session(self): + client = MagicMock() + client.base_url = "http://localhost:8000" + + with patch.object(evaluation.state, "merge_events", True), \ + patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ + patch.object(evaluation.state, "server_url", "http://localhost:8000"), \ + patch.object( + evaluation.state, + "current_agent", + MagicMock(agent_name="agent-000000000001"), + ), \ + patch.object(evaluation.state, "server_controls", [{"id": 1}]): + assert _is_merged_event_mode_enabled("agent-000000000001", client) is True + + class TestBuildControlExecutionEvents: def _make_control(self, id, name, condition): return _ControlAdapter( @@ -819,6 +918,67 @@ async def test_merged_event_mode_enqueues_local_events_before_reraising_server_f assert local_events[0].trace_id == "abc123" assert local_events[0].span_id == "def456" + @pytest.mark.asyncio + async def test_merged_event_mode_enqueues_only_local_events_when_no_server_controls_apply(self): + from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult, Step + + local_response = EvaluationResponse( + is_safe=True, + confidence=1.0, + matches=[ + ControlMatch( + control_id=1, + control_name="local-ctrl", + action="allow", + result=EvaluatorResult(matched=True, confidence=0.8), + ) + ], + ) + controls = [ + { + "id": 1, + "name": "local-ctrl", + "control": { + "condition": { + "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "selector": {"path": "input"}, + }, + "action": {"decision": "allow"}, + "execution": "sdk", + }, + } + ] + + mock_engine = MagicMock() + mock_engine.process = AsyncMock(return_value=local_response) + client = MagicMock() + client.http_client = AsyncMock() + step = Step(type="llm", name="test-step", input="hello") + + with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ + patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ + patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=True), \ + patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ + patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: + result = await evaluation.check_evaluation_with_local( + client=client, + agent_name="agent-000000000001", + step=step, + stage="pre", + controls=controls, + trace_id="abc123", + span_id="def456", + event_agent_name="test-agent", + ) + + client.http_client.post.assert_not_called() + mock_enqueue.assert_called_once() + merged_events = mock_enqueue.call_args.args[0] + assert len(merged_events) == 1 + assert merged_events[0].control_id == 1 + assert result.matches is not None + assert len(result.matches) == 1 + # ============================================================================= # control_decorators non_matches dict conversion From 45203f088266272462a4bc69cd10281cc0f109be Mon Sep 17 00:00:00 2001 From: "namrata.ghadi" Date: Thu, 2 Apr 2026 17:20:40 -0700 Subject: [PATCH 18/18] address comments --- sdks/python/src/agent_control/__init__.py | 7 - sdks/python/src/agent_control/_state.py | 1 - sdks/python/src/agent_control/agents.py | 7 +- sdks/python/src/agent_control/evaluation.py | 173 +++------- sdks/python/tests/test_init_step_merge.py | 30 +- .../tests/test_observability_updates.py | 160 +--------- sdks/python/tests/test_shutdown.py | 2 - .../src/generated/funcs/agents-init.ts | 7 +- .../generated/funcs/evaluation-evaluate.ts | 5 - .../evaluate-api-v1-evaluation-post.ts | 4 - ...nit-agent-api-v1-agents-init-agent-post.ts | 9 +- .../agent_control_server/endpoints/agents.py | 14 +- .../endpoints/evaluation.py | 300 +----------------- .../merge_event_sessions.py | 86 ----- .../tests/test_evaluation_error_handling.py | 164 +--------- server/tests/test_merge_event_sessions.py | 34 -- server/unit_tests/test_endpoint_helpers.py | 96 ++---- 17 files changed, 122 insertions(+), 977 deletions(-) delete mode 100644 server/src/agent_control_server/merge_event_sessions.py delete mode 100644 server/tests/test_merge_event_sessions.py diff --git a/sdks/python/src/agent_control/__init__.py b/sdks/python/src/agent_control/__init__.py index 7db187d9..24353411 100644 --- a/sdks/python/src/agent_control/__init__.py +++ b/sdks/python/src/agent_control/__init__.py @@ -396,7 +396,6 @@ def init( observability_enabled: bool | None = None, log_config: dict[str, Any] | None = None, policy_refresh_interval_seconds: int = 60, - merge_events: bool = False, **kwargs: object ) -> Agent: """ @@ -427,9 +426,6 @@ def init( {"enabled": True, "span_start": True, "span_end": True, "control_eval": True} policy_refresh_interval_seconds: Interval for background policy refresh loop. Defaults to 60 seconds. Set to 0 to disable background refresh. - merge_events: Whether to merge local and server event creation in the - SDK before enqueueing through the built-in observability path. - Defaults to False. **kwargs: Additional metadata to store with the agent Returns: @@ -500,7 +496,6 @@ async def handle(message: str): state.current_agent = next_agent state.server_url = server_url or os.getenv('AGENT_CONTROL_URL') or 'http://localhost:8000' state.api_key = api_key - state.merge_events = merge_events # Merge auto-discovered steps from @control() decorators with explicit steps. # Explicit steps take precedence when (type, name) collides. @@ -552,7 +547,6 @@ async def register() -> list[dict[str, Any]] | None: state.current_agent, steps=registration_steps, conflict_mode=conflict_mode, - merge_events=merge_events, ) created = response.get('created', False) controls: list[dict[str, Any]] = response.get('controls', []) @@ -649,7 +643,6 @@ def _reset_state() -> None: state.server_controls = None state.server_url = None state.api_key = None - state.merge_events = False async def ashutdown() -> None: diff --git a/sdks/python/src/agent_control/_state.py b/sdks/python/src/agent_control/_state.py index a0f9eb7b..143bb2d3 100644 --- a/sdks/python/src/agent_control/_state.py +++ b/sdks/python/src/agent_control/_state.py @@ -24,7 +24,6 @@ def __init__(self) -> None: self.server_controls: list[dict[str, Any]] | None = None self.server_url: str | None = None self.api_key: str | None = None - self.merge_events: bool = False # Singleton state instance diff --git a/sdks/python/src/agent_control/agents.py b/sdks/python/src/agent_control/agents.py index 5a4df05b..f6ca8d58 100644 --- a/sdks/python/src/agent_control/agents.py +++ b/sdks/python/src/agent_control/agents.py @@ -15,9 +15,10 @@ async def register_agent( agent: Agent, steps: list[dict[str, Any]] | None = None, conflict_mode: Literal["strict", "overwrite"] = "overwrite", - merge_events: bool = False, ) -> dict[str, Any]: - """Register an agent with the server via /initAgent endpoint.""" + """Register an agent with the server via /initAgent endpoint. + + """ ensure_evaluators_discovered() agent_dict = agent.to_dict() @@ -28,7 +29,7 @@ async def register_agent( "conflict_mode": conflict_mode, } - headers = {"X-Agent-Control-Merge-Session": "true"} if merge_events else None + headers = None response = await client.http_client.post( "/api/v1/agents/initAgent", json=payload, diff --git a/sdks/python/src/agent_control/evaluation.py b/sdks/python/src/agent_control/evaluation.py index 65613fd5..4237baf9 100644 --- a/sdks/python/src/agent_control/evaluation.py +++ b/sdks/python/src/agent_control/evaluation.py @@ -49,27 +49,13 @@ def _get_applicable_controls( def _build_server_control_lookup( server_control_payloads: list[dict[str, Any]], ) -> dict[int, ControlDefinition]: - """Build a best-effort lookup of server control definitions. - - The merged-event path reconstructs server-side events in the SDK after the - server returns a lightweight ``EvaluationResponse``. This helper parses the - cached server control payloads so the shared event builder can reconstruct - those events locally. - - Args: - server_control_payloads: Raw cached server control payloads. - - Returns: - A mapping of control ID to parsed ``ControlDefinition`` for every - payload that can be parsed locally. - """ + """Build a best-effort lookup of server control definitions.""" control_lookup: dict[int, ControlDefinition] = {} for control in server_control_payloads: try: control_lookup[control["id"]] = ControlDefinition.model_validate(control["control"]) except Exception: - # The server remains authoritative for malformed/unparseable controls. continue return control_lookup @@ -79,15 +65,7 @@ def _has_applicable_prefiltered_server_controls( server_control_payloads: list[dict[str, Any]], request: EvaluationRequest, ) -> bool: - """Return whether any partitioned server control applies to this request. - - The caller is responsible for partitioning raw control payloads by - ``execution`` before calling this helper. This function only inspects the - server-control subset and does not re-check ``execution`` itself. - - If any server control payload cannot be parsed locally, this returns True so - the SDK still defers to the server for authoritative handling. - """ + """Return whether any partitioned server control applies to this request.""" parsed_server_controls: list[_ControlAdapter] = [] for control in server_control_payloads: @@ -101,7 +79,6 @@ def _has_applicable_prefiltered_server_controls( ) ) except Exception: - # Preserve existing fail-open behavior for malformed server controls. return True if not parsed_server_controls: @@ -116,61 +93,11 @@ def _has_applicable_prefiltered_server_controls( ) -def _is_merged_event_mode_enabled( - agent_name: str, - client: AgentControlClient | None = None, -) -> bool: - """Return whether SDK-side merged event creation is safe for this request. - - Merged event creation is a session-scoped option that depends on - initialized SDK state: an initialized agent, cached server controls for the - same agent, and observability enabled so the merged batch will actually be - consumed by the built-in queue/Postgres path. - - Args: - agent_name: Normalized agent name for the current request. - client: Optional client used for the current request. When provided, - merged mode is allowed only if it targets the active initialized - session server. - - Returns: - ``True`` when the current SDK session has enough state to reconstruct - and enqueue merged events safely. - """ - if not state.merge_events or not is_observability_enabled(): - return False - - current_agent = state.current_agent - if current_agent is None or current_agent.agent_name != agent_name: - return False - - if client is not None and state.server_url is not None: - normalized_state_server_url = state.server_url.rstrip("/") - normalized_client_server_url = client.base_url.rstrip("/") - if normalized_client_server_url != normalized_state_server_url: - return False - - return state.server_controls is not None - - def _merge_results( local_result: EvaluationResponse, server_result: EvaluationResponse, ) -> EvaluationResult: - """Merge local and server evaluation results into one SDK-facing result. - - This helper merges only evaluation semantics. Event reconstruction happens - later so the response shape can stay lightweight regardless of which event - ingestion path is used. - - Args: - local_result: Evaluation response produced by SDK-local controls. - server_result: Evaluation response produced by server-side controls. - - Returns: - A merged ``EvaluationResult`` with combined matches, errors, - non-matches, and the strictest safety/confidence outcome. - """ + """Merge local and server evaluation results into one SDK-facing result.""" is_safe = local_result.is_safe and server_result.is_safe confidence = min(local_result.confidence, server_result.confidence) @@ -204,6 +131,22 @@ def _merge_results( ) +def _cached_server_control_lookup( + agent_name: str, + client: AgentControlClient, +) -> dict[int, ControlDefinition]: + """Return cached server controls for the active session when they are trustworthy.""" + current_agent = state.current_agent + if current_agent is None or current_agent.agent_name != agent_name: + return {} + if state.server_controls is None: + return {} + if state.server_url is not None: + if client.base_url.rstrip("/") != state.server_url.rstrip("/"): + return {} + return _build_server_control_lookup(state.server_controls) + + async def check_evaluation( client: AgentControlClient, agent_name: str, @@ -212,20 +155,12 @@ async def check_evaluation( ) -> EvaluationResult: """Check if agent interaction is safe through the public SDK helper. - This helper always uses the default server-only evaluation path. It does - not participate in merged event creation, so server-side observability - ingestion remains enabled and the SDK simply returns the parsed result. - - Args: - client: Configured AgentControl client. - agent_name: Agent name to evaluate against. - step: Step payload to evaluate. - stage: Evaluation stage, ``pre`` or ``post``. - - Returns: - The parsed evaluation result returned by the server. + The server returns only evaluation semantics. When SDK observability is + enabled, this helper reconstructs server-side control-execution events + from the response and enqueues them through the built-in SDK batcher. """ normalized_name = ensure_agent_name(agent_name) + resolved_trace_id, resolved_span_id = get_trace_and_span_ids() request = EvaluationRequest( agent_name=normalized_name, step=step, @@ -242,6 +177,17 @@ async def check_evaluation( evaluation_response = EvaluationResponse.model_validate(response.json()) + if is_observability_enabled(): + server_events = build_control_execution_events( + evaluation_response, + request, + _cached_server_control_lookup(normalized_name, client), + resolved_trace_id, + resolved_span_id, + normalized_name, + ) + enqueue_observability_events(server_events) + return cast(EvaluationResult, EvaluationResult.from_dict(evaluation_response.model_dump())) @@ -255,33 +201,7 @@ async def check_evaluation_with_local( span_id: str | None = None, event_agent_name: str | None = None, ) -> EvaluationResult: - """Evaluate controls with local-first execution and configurable event flow. - - This is the main decision boundary between the two supported event - creation styles: - - default behavior: local events are reconstructed and queued immediately in - the SDK, while server-side events are still emitted by the server - - merged-event behavior: local and server events are reconstructed in the - SDK and enqueued once through the built-in observability path - - In both cases, the evaluation result itself stays lightweight and event - reconstruction happens after evaluation completes. - - Args: - client: Configured AgentControl client. - agent_name: Agent name to evaluate against. - step: Step payload to evaluate. - stage: Evaluation stage, ``pre`` or ``post``. - controls: Cached control payloads used to split local vs server - execution. - trace_id: Optional explicit trace ID. - span_id: Optional explicit span ID. - event_agent_name: Optional override for the agent name stamped on - reconstructed events. - - Returns: - A merged evaluation result across local and server execution. - """ + """Evaluate controls with local-first execution and SDK-owned event emission.""" normalized_name = ensure_agent_name(agent_name) resolved_trace_id = trace_id resolved_span_id = span_id @@ -360,8 +280,7 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: combined_errors = (result.errors or []) + parse_errors return result.model_copy(update={"errors": combined_errors}) - merged_emission_enabled = _is_merged_event_mode_enabled(normalized_name, client) - should_reconstruct_local_events = is_observability_enabled() + should_emit_events = is_observability_enabled() local_result: EvaluationResponse | None = None local_events = [] @@ -373,7 +292,7 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: if applicable_local_controls: engine = ControlEngine(applicable_local_controls, context="sdk") local_result = await engine.process(request) - if should_reconstruct_local_events: + if should_emit_events: local_control_lookup = { control.id: control.control for control in applicable_local_controls } @@ -386,12 +305,9 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: event_agent_name, ) - if not merged_emission_enabled: - enqueue_observability_events(local_events) - if not local_result.is_safe: result = _with_parse_errors(EvaluationResult.model_validate(local_result.model_dump())) - if merged_emission_enabled: + if should_emit_events: enqueue_observability_events(local_events) return result @@ -402,8 +318,6 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: headers["X-Trace-Id"] = resolved_trace_id if resolved_span_id: headers["X-Span-Id"] = resolved_span_id - if merged_emission_enabled: - headers["X-Agent-Control-Merge-Events"] = "true" try: response = await client.http_client.post( @@ -414,11 +328,12 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: response.raise_for_status() server_result = EvaluationResponse.model_validate(response.json()) except Exception: - if merged_emission_enabled and local_events: + if should_emit_events and local_events: enqueue_observability_events(local_events) raise + server_events = [] - if merged_emission_enabled: + if should_emit_events: server_control_lookup = _build_server_control_lookup(server_control_payloads) server_events = build_control_execution_events( server_result, @@ -431,18 +346,18 @@ def _with_parse_errors(result: EvaluationResult) -> EvaluationResult: if local_result is not None: result = _with_parse_errors(_merge_results(local_result, server_result)) - if merged_emission_enabled: + if should_emit_events: enqueue_observability_events(local_events + server_events) return result result = _with_parse_errors(EvaluationResult.model_validate(server_result.model_dump())) - if merged_emission_enabled: + if should_emit_events: enqueue_observability_events(server_events) return result if local_result is not None: result = _with_parse_errors(EvaluationResult.model_validate(local_result.model_dump())) - if merged_emission_enabled: + if should_emit_events: enqueue_observability_events(local_events) return result diff --git a/sdks/python/tests/test_init_step_merge.py b/sdks/python/tests/test_init_step_merge.py index 551a3b24..f423593f 100644 --- a/sdks/python/tests/test_init_step_merge.py +++ b/sdks/python/tests/test_init_step_merge.py @@ -191,8 +191,7 @@ def test_init_logs_agent_updated_when_registration_already_exists( assert agent_name in caplog.text -def test_init_sets_merge_events_session_flag() -> None: - # Given: a normal init path with registration mocked out. +def test_init_registers_agent_without_merge_events_arg() -> None: register_agent_mock = AsyncMock(return_value={"created": True, "controls": []}) health_check_mock = AsyncMock(return_value={"status": "healthy"}) @@ -203,38 +202,21 @@ def test_init_sets_merge_events_session_flag() -> None: "agent_control.__init__.agents.register_agent", new=register_agent_mock, ): - # When: init() enables merged event creation for the session. agent_control.init( agent_name=f"agent-{uuid4().hex[:12]}", policy_refresh_interval_seconds=0, - merge_events=True, ) - # Then: the session state remembers that merged event creation is enabled. - assert agent_control.state.merge_events is True assert register_agent_mock.await_args is not None - assert register_agent_mock.await_args.kwargs["merge_events"] is True + assert "merge_events" not in register_agent_mock.await_args.kwargs -def test_init_defaults_merge_events_session_flag_to_false() -> None: - register_agent_mock = AsyncMock(return_value={"created": True, "controls": []}) - health_check_mock = AsyncMock(return_value={"status": "healthy"}) +def test_init_omits_merge_events_from_public_signature() -> None: + import inspect - with patch( - "agent_control.__init__.AgentControlClient.health_check", - new=health_check_mock, - ), patch( - "agent_control.__init__.agents.register_agent", - new=register_agent_mock, - ): - agent_control.init( - agent_name=f"agent-{uuid4().hex[:12]}", - policy_refresh_interval_seconds=0, - ) + signature = inspect.signature(agent_control.init) - assert agent_control.state.merge_events is False - assert register_agent_mock.await_args is not None - assert register_agent_mock.await_args.kwargs["merge_events"] is False + assert "merge_events" not in signature.parameters @pytest.mark.asyncio diff --git a/sdks/python/tests/test_observability_updates.py b/sdks/python/tests/test_observability_updates.py index 6544d90a..a90ea785 100644 --- a/sdks/python/tests/test_observability_updates.py +++ b/sdks/python/tests/test_observability_updates.py @@ -8,7 +8,6 @@ _ControlAdapter, _build_server_control_lookup, _has_applicable_prefiltered_server_controls, - _is_merged_event_mode_enabled, _merge_results, ) from agent_control.evaluation_events import ( @@ -124,43 +123,9 @@ def test_has_applicable_prefiltered_server_controls_returns_true_for_malformed_p request, ) is True - def test_merged_event_mode_enabled_false_when_merge_disabled(self): - with patch.object(evaluation.state, "merge_events", False), \ - patch("agent_control.evaluation.is_observability_enabled", return_value=True): - assert _is_merged_event_mode_enabled("agent-000000000001") is False - def test_merged_event_mode_enabled_false_when_current_agent_missing(self): - with patch.object(evaluation.state, "merge_events", True), \ - patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ - patch.object(evaluation.state, "current_agent", None), \ - patch.object(evaluation.state, "server_controls", [{"id": 1}]): - assert _is_merged_event_mode_enabled("agent-000000000001") is False - def test_merged_event_mode_enabled_false_when_server_controls_missing(self): - with patch.object(evaluation.state, "merge_events", True), \ - patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ - patch.object( - evaluation.state, - "current_agent", - MagicMock(agent_name="agent-000000000001"), - ), \ - patch.object(evaluation.state, "server_controls", None): - assert _is_merged_event_mode_enabled("agent-000000000001") is False - - def test_merged_event_mode_enabled_true_for_matching_initialized_session(self): - client = MagicMock() - client.base_url = "http://localhost:8000" - with patch.object(evaluation.state, "merge_events", True), \ - patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ - patch.object(evaluation.state, "server_url", "http://localhost:8000"), \ - patch.object( - evaluation.state, - "current_agent", - MagicMock(agent_name="agent-000000000001"), - ), \ - patch.object(evaluation.state, "server_controls", [{"id": 1}]): - assert _is_merged_event_mode_enabled("agent-000000000001", client) is True class TestBuildControlExecutionEvents: @@ -532,8 +497,9 @@ async def test_forwards_provider_trace_headers_to_server_when_ids_omitted(self): class TestCheckEvaluation: + @pytest.mark.asyncio - async def test_default_path_keeps_server_only_behavior(self): + async def test_check_evaluation_enqueues_reconstructed_server_events_when_observability_enabled(self): from agent_control_models import Step mock_http_response = MagicMock() @@ -548,24 +514,19 @@ async def test_default_path_keeps_server_only_behavior(self): "control_id": 1, "control_name": "ctrl-1", "action": "observe", + "control_execution_id": "ce-1", "result": {"matched": False, "confidence": 0.1}, - }, - { - "control_id": 2, - "control_name": "ctrl-2", - "action": "deny", - "result": {"matched": False, "confidence": 0.2}, - }, + } ], } client = MagicMock() + client.base_url = "http://localhost:8000" client.http_client = AsyncMock() client.http_client.post = AsyncMock(return_value=mock_http_response) step = Step(type="llm", name="test-step", input="hello") - with patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=False), \ - patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: + with patch("agent_control.evaluation.is_observability_enabled", return_value=True), patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation( client=client, agent_name="agent-000000000001", @@ -575,54 +536,12 @@ async def test_default_path_keeps_server_only_behavior(self): call_kwargs = client.http_client.post.call_args.kwargs assert call_kwargs["headers"] is None - mock_enqueue.assert_not_called() + mock_enqueue.assert_called_once() assert result.is_safe is True assert result.confidence == 0.9 @pytest.mark.asyncio - async def test_check_evaluation_ignores_merged_mode_and_keeps_server_only_behavior(self): - from agent_control_models import Step - - mock_http_response = MagicMock() - mock_http_response.raise_for_status = MagicMock() - mock_http_response.json.return_value = { - "is_safe": True, - "confidence": 0.9, - "matches": [ - { - "control_id": 2, - "control_name": "server-ctrl", - "action": "allow", - "control_execution_id": "ce-server", - "result": {"matched": True, "confidence": 0.4}, - } - ], - "errors": None, - "non_matches": None, - } - - client = MagicMock() - client.http_client = AsyncMock() - client.http_client.post = AsyncMock(return_value=mock_http_response) - step = Step(type="llm", name="test-step", input="hello") - - with patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=True), \ - patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: - result = await evaluation.check_evaluation( - client=client, - agent_name="agent-000000000001", - step=step, - stage="pre", - ) - - call_kwargs = client.http_client.post.call_args.kwargs - assert call_kwargs["headers"] is None - mock_enqueue.assert_not_called() - assert result.matches is not None - assert len(result.matches) == 1 - - @pytest.mark.asyncio - async def test_skips_local_event_reconstruction_when_nothing_consumes_events(self): + async def test_skips_local_event_reconstruction_when_observability_disabled(self): from agent_control_models import EvaluationResponse, Step controls = [{ @@ -646,12 +565,7 @@ async def test_skips_local_event_reconstruction_when_nothing_consumes_events(sel client.http_client = AsyncMock() step = Step(type="llm", name="test-step", input="hello") - with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ - patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ - patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=False), \ - patch("agent_control.evaluation.is_observability_enabled", return_value=False), \ - patch("agent_control.evaluation.build_control_execution_events") as mock_build, \ - patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: + with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), patch("agent_control.evaluation.is_observability_enabled", return_value=False), patch("agent_control.evaluation.build_control_execution_events") as mock_build, patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation_with_local( client=client, agent_name="agent-000000000001", @@ -666,7 +580,7 @@ async def test_skips_local_event_reconstruction_when_nothing_consumes_events(sel assert result.confidence == 1.0 @pytest.mark.asyncio - async def test_check_evaluation_falls_back_when_initialized_agent_does_not_match(self): + async def test_check_evaluation_skips_enqueue_when_observability_disabled(self): from agent_control_models import Step mock_http_response = MagicMock() @@ -680,56 +594,12 @@ async def test_check_evaluation_falls_back_when_initialized_agent_does_not_match } client = MagicMock() + client.base_url = "http://localhost:8000" client.http_client = AsyncMock() client.http_client.post = AsyncMock(return_value=mock_http_response) step = Step(type="llm", name="test-step", input="hello") - with patch.object(evaluation.state, "merge_events", True), \ - patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue, \ - patch.object(evaluation.state, "current_agent", MagicMock(agent_name="agent-000000000002")), \ - patch.object(evaluation.state, "server_controls", []): - result = await evaluation.check_evaluation( - client=client, - agent_name="agent-000000000001", - step=step, - stage="pre", - ) - - call_kwargs = client.http_client.post.call_args.kwargs - assert call_kwargs["headers"] is None - mock_enqueue.assert_not_called() - assert result.is_safe is True - assert result.confidence == 0.9 - - @pytest.mark.asyncio - async def test_check_evaluation_falls_back_when_client_targets_different_server(self): - from agent_control_models import Step - - mock_http_response = MagicMock() - mock_http_response.raise_for_status = MagicMock() - mock_http_response.json.return_value = { - "is_safe": True, - "confidence": 0.9, - "matches": None, - "errors": None, - "non_matches": None, - } - - client = MagicMock() - client.base_url = "http://different-server:8000" - client.http_client = AsyncMock() - client.http_client.post = AsyncMock(return_value=mock_http_response) - step = Step(type="llm", name="test-step", input="hello") - - with patch.object(evaluation.state, "merge_events", True), \ - patch.object(evaluation.state, "server_url", "http://localhost:8000"), \ - patch.object( - evaluation.state, - "current_agent", - MagicMock(agent_name="agent-000000000001"), - ), \ - patch.object(evaluation.state, "server_controls", [{"id": 1, "name": "ctrl"}]), \ - patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: + with patch("agent_control.evaluation.is_observability_enabled", return_value=False), patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation( client=client, agent_name="agent-000000000001", @@ -824,7 +694,6 @@ async def test_merged_event_mode_enqueues_reconstructed_local_and_server_events_ with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ - patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=True), \ patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation_with_local( @@ -841,8 +710,6 @@ async def test_merged_event_mode_enqueues_reconstructed_local_and_server_events_ merged_events = mock_enqueue.call_args.args[0] assert len(merged_events) == 2 assert {event.control_id for event in merged_events} == {1, 2} - headers = client.http_client.post.call_args.kwargs["headers"] - assert headers["X-Agent-Control-Merge-Events"] == "true" assert result.matches is not None assert len(result.matches) == 2 @@ -898,7 +765,7 @@ async def test_merged_event_mode_enqueues_local_events_before_reraising_server_f client.http_client.post = AsyncMock(side_effect=RuntimeError("server unavailable")) step = Step(type="llm", name="test-step", input="hello") - with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=True), patch("agent_control.evaluation.is_observability_enabled", return_value=True), patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: + with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), patch("agent_control.evaluation.is_observability_enabled", return_value=True), patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: with pytest.raises(RuntimeError, match="server unavailable"): await evaluation.check_evaluation_with_local( client=client, @@ -957,7 +824,6 @@ async def test_merged_event_mode_enqueues_only_local_events_when_no_server_contr with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ - patch("agent_control.evaluation._is_merged_event_mode_enabled", return_value=True), \ patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation_with_local( diff --git a/sdks/python/tests/test_shutdown.py b/sdks/python/tests/test_shutdown.py index 4152d8f6..073745ed 100644 --- a/sdks/python/tests/test_shutdown.py +++ b/sdks/python/tests/test_shutdown.py @@ -64,7 +64,6 @@ def test_shutdown_resets_state(self): state.server_controls = [{"name": "test"}] state.server_url = "http://localhost:8000" state.api_key = "key" - state.merge_events = True agent_control.shutdown() @@ -73,7 +72,6 @@ def test_shutdown_resets_state(self): assert state.server_controls is None assert state.server_url is None assert state.api_key is None - assert state.merge_events is False def test_shutdown_idempotent(self): agent_control.shutdown() diff --git a/sdks/typescript/src/generated/funcs/agents-init.ts b/sdks/typescript/src/generated/funcs/agents-init.ts index 715b2470..9537567d 100644 --- a/sdks/typescript/src/generated/funcs/agents-init.ts +++ b/sdks/typescript/src/generated/funcs/agents-init.ts @@ -4,7 +4,7 @@ import * as z from "zod/v4-mini"; import { AgentControlSDKCore } from "../core.js"; -import { encodeJSON, encodeSimple } from "../lib/encodings.js"; +import { encodeJSON } from "../lib/encodings.js"; import * as M from "../lib/matchers.js"; import { compactMap } from "../lib/primitives.js"; import { safeParse } from "../lib/schemas.js"; @@ -114,11 +114,6 @@ async function $do( const headers = new Headers(compactMap({ "Content-Type": "application/json", Accept: "application/json", - "X-Agent-Control-Merge-Session": encodeSimple( - "X-Agent-Control-Merge-Session", - payload["X-Agent-Control-Merge-Session"], - { explode: false, charEncoding: "none" }, - ), })); const secConfig = await extractSecurity(client._options.apiKeyHeader); diff --git a/sdks/typescript/src/generated/funcs/evaluation-evaluate.ts b/sdks/typescript/src/generated/funcs/evaluation-evaluate.ts index 81862c54..47ec39e4 100644 --- a/sdks/typescript/src/generated/funcs/evaluation-evaluate.ts +++ b/sdks/typescript/src/generated/funcs/evaluation-evaluate.ts @@ -109,11 +109,6 @@ async function $do( const headers = new Headers(compactMap({ "Content-Type": "application/json", Accept: "application/json", - "X-Agent-Control-Merge-Events": encodeSimple( - "X-Agent-Control-Merge-Events", - payload["X-Agent-Control-Merge-Events"], - { explode: false, charEncoding: "none" }, - ), "X-Span-Id": encodeSimple("X-Span-Id", payload["X-Span-Id"], { explode: false, charEncoding: "none", diff --git a/sdks/typescript/src/generated/models/operations/evaluate-api-v1-evaluation-post.ts b/sdks/typescript/src/generated/models/operations/evaluate-api-v1-evaluation-post.ts index 204841a6..026e4065 100644 --- a/sdks/typescript/src/generated/models/operations/evaluate-api-v1-evaluation-post.ts +++ b/sdks/typescript/src/generated/models/operations/evaluate-api-v1-evaluation-post.ts @@ -9,7 +9,6 @@ import * as models from "../index.js"; export type EvaluateApiV1EvaluationPostRequest = { xTraceId?: string | null | undefined; xSpanId?: string | null | undefined; - xAgentControlMergeEvents?: string | null | undefined; body: models.EvaluationRequest; }; @@ -17,7 +16,6 @@ export type EvaluateApiV1EvaluationPostRequest = { export type EvaluateApiV1EvaluationPostRequest$Outbound = { "X-Trace-Id"?: string | null | undefined; "X-Span-Id"?: string | null | undefined; - "X-Agent-Control-Merge-Events"?: string | null | undefined; body: models.EvaluationRequest$Outbound; }; @@ -29,14 +27,12 @@ export const EvaluateApiV1EvaluationPostRequest$outboundSchema: z.ZodMiniType< z.object({ xTraceId: z.optional(z.nullable(z.string())), xSpanId: z.optional(z.nullable(z.string())), - xAgentControlMergeEvents: z.optional(z.nullable(z.string())), body: models.EvaluationRequest$outboundSchema, }), z.transform((v) => { return remap$(v, { xTraceId: "X-Trace-Id", xSpanId: "X-Span-Id", - xAgentControlMergeEvents: "X-Agent-Control-Merge-Events", }); }), ); diff --git a/sdks/typescript/src/generated/models/operations/init-agent-api-v1-agents-init-agent-post.ts b/sdks/typescript/src/generated/models/operations/init-agent-api-v1-agents-init-agent-post.ts index 67def39a..b611fef9 100644 --- a/sdks/typescript/src/generated/models/operations/init-agent-api-v1-agents-init-agent-post.ts +++ b/sdks/typescript/src/generated/models/operations/init-agent-api-v1-agents-init-agent-post.ts @@ -7,13 +7,11 @@ import { remap as remap$ } from "../../lib/primitives.js"; import * as models from "../index.js"; export type InitAgentApiV1AgentsInitAgentPostRequest = { - xAgentControlMergeSession?: string | null | undefined; body: models.InitAgentRequest; }; /** @internal */ export type InitAgentApiV1AgentsInitAgentPostRequest$Outbound = { - "X-Agent-Control-Merge-Session"?: string | null | undefined; body: models.InitAgentRequest$Outbound; }; @@ -24,14 +22,9 @@ export const InitAgentApiV1AgentsInitAgentPostRequest$outboundSchema: InitAgentApiV1AgentsInitAgentPostRequest > = z.pipe( z.object({ - xAgentControlMergeSession: z.optional(z.nullable(z.string())), body: models.InitAgentRequest$outboundSchema, }), - z.transform((v) => { - return remap$(v, { - xAgentControlMergeSession: "X-Agent-Control-Merge-Session", - }); - }), + z.transform((v) => remap$(v, {})), ); export function initAgentApiV1AgentsInitAgentPostRequestToJSON( diff --git a/server/src/agent_control_server/endpoints/agents.py b/server/src/agent_control_server/endpoints/agents.py index 24a91cd4..53a76a24 100644 --- a/server/src/agent_control_server/endpoints/agents.py +++ b/server/src/agent_control_server/endpoints/agents.py @@ -28,7 +28,7 @@ SetPolicyResponse, StepKey, ) -from fastapi import APIRouter, Depends, Header +from fastapi import APIRouter, Depends from jsonschema_rs import ValidationError as JSONSchemaValidationError from pydantic import BaseModel, ValidationError from sqlalchemy import delete, func, or_, select, union_all @@ -45,7 +45,6 @@ NotFoundError, ) from ..logging_utils import get_logger -from ..merge_event_sessions import set_merge_events_enabled from ..models import ( Agent, AgentData, @@ -451,7 +450,6 @@ async def init_agent( request: InitAgentRequest, client: RequireAPIKey, db: AsyncSession = Depends(get_async_db), - x_merge_session: str | None = Header(default=None, alias="X-Agent-Control-Merge-Session"), ) -> InitAgentResponse: """ Register a new agent or update an existing agent's steps and metadata. @@ -551,11 +549,6 @@ async def init_agent( resource="Agent", operation="create", ) - set_merge_events_enabled( - client, - request.agent.agent_name, - enabled=(x_merge_session or "").lower() == "true", - ) return InitAgentResponse(created=created, controls=[]) # Parse existing data via AgentData Pydantic model @@ -806,11 +799,6 @@ async def init_agent( ) controls = await list_controls_for_agent(existing.name, db) - set_merge_events_enabled( - client, - existing.name, - enabled=(x_merge_session or "").lower() == "true", - ) return InitAgentResponse( created=created, diff --git a/server/src/agent_control_server/endpoints/evaluation.py b/server/src/agent_control_server/endpoints/evaluation.py index 14833160..18b945fc 100644 --- a/server/src/agent_control_server/endpoints/evaluation.py +++ b/server/src/agent_control_server/endpoints/evaluation.py @@ -1,41 +1,28 @@ """Evaluation analysis endpoints.""" -import time -from datetime import UTC, datetime -from typing import Literal - from agent_control_engine.core import ControlEngine from agent_control_models import ( ControlDefinition, - ControlExecutionEvent, ControlMatch, EvaluationRequest, EvaluationResponse, ) from agent_control_models.errors import ErrorCode, ValidationErrorItem -from fastapi import APIRouter, Depends, Header, Request +from fastapi import APIRouter, Depends from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from ..auth import AuthenticatedClient, RequireAPIKey -from ..config import observability_settings +from ..auth import RequireAPIKey from ..db import get_async_db from ..errors import APIValidationError, NotFoundError from ..logging_utils import get_logger -from ..merge_event_sessions import is_merge_events_enabled from ..models import Agent -from ..observability.ingest.base import EventIngestor from ..services.controls import list_controls_for_agent -from .observability import get_event_ingestor router = APIRouter(prefix="/evaluation", tags=["evaluation"]) _logger = get_logger(__name__) -# OTEL-standard invalid IDs - used when client doesn't provide trace context. -# These are immediately recognizable as "not traced" and can be filtered in queries. -INVALID_TRACE_ID = "0" * 32 # 128-bit, 32 hex chars -INVALID_SPAN_ID = "0" * 16 # 64-bit, 16 hex chars SAFE_EVALUATOR_ERROR = "Evaluation failed due to an internal evaluator error." SAFE_EVALUATOR_TIMEOUT_ERROR = "Evaluation timed out before completion." SAFE_INVALID_STEP_REGEX_ERROR = "Control configuration error: invalid step name regex." @@ -128,41 +115,6 @@ def _sanitize_evaluation_response(response: EvaluationResponse) -> EvaluationRes ) -def _observability_metadata( - control_def: ControlDefinition, -) -> tuple[str | None, str | None, dict[str, object]]: - """Return representative event fields plus full composite context.""" - identity = control_def.observability_identity() - return ( - identity.selector_path, - identity.evaluator_name, - { - "primary_evaluator": identity.evaluator_name, - "primary_selector_path": identity.selector_path, - "leaf_count": identity.leaf_count, - "all_evaluators": identity.all_evaluators, - "all_selector_paths": identity.all_selector_paths, - }, - ) - - -def _is_trusted_sdk_merge_request( - x_merge_events: str | None, - client: AuthenticatedClient, - agent_name: str, -) -> bool: - """Return whether merged delivery is enabled for this init-scoped session. - - A request must explicitly ask for merged delivery, and the same - authenticated client must previously have initialized this agent with - merge-events enabled. - """ - if (x_merge_events or "").lower() != "true": - return False - - return is_merge_events_enabled(client, agent_name) - - @router.post( "", response_model=EvaluationResponse, @@ -171,43 +123,18 @@ def _is_trusted_sdk_merge_request( ) async def evaluate( request: EvaluationRequest, - req: Request, client: RequireAPIKey, db: AsyncSession = Depends(get_async_db), - x_trace_id: str | None = Header(default=None, alias="X-Trace-Id"), - x_span_id: str | None = Header(default=None, alias="X-Span-Id"), - x_merge_events: str | None = Header(default=None, alias="X-Agent-Control-Merge-Events"), ) -> EvaluationResponse: """Analyze content for safety and control violations. - Runs all controls assigned to the agent via policy through the - evaluation engine. Controls are evaluated in parallel with - cancel-on-deny for efficiency. - - Custom evaluators must be deployed as Evaluator classes - with the engine. Their schemas are registered via initAgent. - - Optionally accepts X-Trace-Id and X-Span-Id headers for - OpenTelemetry-compatible distributed tracing. + This endpoint is intentionally evaluation-only. It returns the semantic + ``EvaluationResponse`` and does not build or ingest observability events + on the server; SDKs reconstruct and emit those events separately through + the observability ingestion endpoint. """ - start_time = time.perf_counter() - - # Use provided trace/span IDs or fall back to OTEL invalid IDs. - # Invalid IDs make it obvious that trace context wasn't provided by the client. - if not x_trace_id or not x_span_id: - _logger.warning( - "Missing trace context headers (X-Trace-Id, X-Span-Id). " - "Using invalid IDs - observability data will not be traceable." - ) - trace_id = x_trace_id or INVALID_TRACE_ID - span_id = x_span_id or INVALID_SPAN_ID - - # Determine payload type for observability based on step type - applies_to: Literal["llm_call", "tool_call"] = ( - "tool_call" if request.step.type == "tool" else "llm_call" - ) + del client # Authentication is still required by dependency injection. - # Fetch agent to get the name agent_result = await db.execute( select(Agent).where(Agent.name == request.agent_name) ) @@ -220,22 +147,14 @@ async def evaluate( resource_id=request.agent_name, hint="Register the agent via initAgent before evaluating.", ) - agent_name = agent.name - # Fetch controls for the agent (already validated as ControlDefinition) api_controls = await list_controls_for_agent( request.agent_name, db, allow_invalid_step_name_regex=True, ) - - # Build control lookup for observability - control_lookup = {c.id: c for c in api_controls} - - # Adapt controls for the engine engine_controls = [ControlAdapter(c.id, c.name, c.control) for c in api_controls] - # Execute Control Engine (parallel with cancel-on-deny) engine = ControlEngine(engine_controls) try: raw_response = await engine.process(request) @@ -256,209 +175,4 @@ async def evaluate( ], ) - # Calculate total execution time - total_duration_ms = (time.perf_counter() - start_time) * 1000 - - merge_events_requested = _is_trusted_sdk_merge_request( - x_merge_events=x_merge_events, - client=client, - agent_name=agent_name, - ) - - # Default mode keeps server-side ingestion as-is. Merged event creation - # skips this server-side delivery step so the SDK can reconstruct and - # enqueue the combined batch itself. - if observability_settings.enabled and not merge_events_requested: - response_events = _build_observability_events( - response=raw_response, - request=request, - trace_id=trace_id, - span_id=span_id, - agent_name=agent_name, - applies_to=applies_to, - control_lookup=control_lookup, - total_duration_ms=total_duration_ms, - ) - # Get ingestor from app.state (None if not initialized) - try: - ingestor = get_event_ingestor(req) - except RuntimeError: - ingestor = None - await _ingest_observability_events(response_events, ingestor) - return _sanitize_evaluation_response(raw_response) - - -def _build_observability_events( - response: EvaluationResponse, - request: EvaluationRequest, - trace_id: str, - span_id: str, - agent_name: str, - applies_to: Literal["llm_call", "tool_call"], - control_lookup: dict, - total_duration_ms: float, -) -> list[ControlExecutionEvent]: - """Build observability events for all evaluated controls. - - This preserves the existing server-side event shape while allowing the - merged-event path to skip server-side ingestion and keep the response - lightweight. - - Args: - response: Raw evaluation response from the engine. - request: Original evaluation request. - trace_id: Trace ID to stamp on emitted events. - span_id: Span ID to stamp on emitted events. - agent_name: Agent name to stamp on emitted events. - applies_to: Observability applies_to value derived from the step type. - control_lookup: Controls keyed by control ID. - total_duration_ms: Total request execution duration in milliseconds. - - Returns: - A list of reconstructed server-side control execution events. - """ - events: list[ControlExecutionEvent] = [] - now = datetime.now(UTC) - - # Process matches (controls that matched) - if response.matches: - for match in response.matches: - ctrl = control_lookup.get(match.control_id) - event_metadata = dict(match.result.metadata or {}) - selector_path = None - evaluator_name = None - if ctrl: - selector_path, evaluator_name, identity_metadata = _observability_metadata( - ctrl.control - ) - event_metadata.update(identity_metadata) - events.append( - ControlExecutionEvent( - control_execution_id=match.control_execution_id, - trace_id=trace_id, - span_id=span_id, - agent_name=agent_name, - control_id=match.control_id, - control_name=match.control_name, - check_stage=request.stage, - applies_to=applies_to, - action=match.action, - matched=True, - confidence=match.result.confidence, - timestamp=now, - evaluator_name=evaluator_name, - selector_path=selector_path, - error_message=match.result.error, - metadata=event_metadata, - ) - ) - - # Process errors (controls that failed during evaluation) - if response.errors: - for error in response.errors: - ctrl = control_lookup.get(error.control_id) - event_metadata = dict(error.result.metadata or {}) - selector_path = None - evaluator_name = None - if ctrl: - selector_path, evaluator_name, identity_metadata = _observability_metadata( - ctrl.control - ) - event_metadata.update(identity_metadata) - events.append( - ControlExecutionEvent( - control_execution_id=error.control_execution_id, - trace_id=trace_id, - span_id=span_id, - agent_name=agent_name, - control_id=error.control_id, - control_name=error.control_name, - check_stage=request.stage, - applies_to=applies_to, - action=error.action, - matched=False, - confidence=error.result.confidence, - timestamp=now, - evaluator_name=evaluator_name, - selector_path=selector_path, - error_message=error.result.error, - metadata=event_metadata, - ) - ) - - # Process non-matches (controls that were evaluated but did not match) - if response.non_matches: - for non_match in response.non_matches: - ctrl = control_lookup.get(non_match.control_id) - event_metadata = dict(non_match.result.metadata or {}) - selector_path = None - evaluator_name = None - if ctrl: - selector_path, evaluator_name, identity_metadata = _observability_metadata( - ctrl.control - ) - event_metadata.update(identity_metadata) - events.append( - ControlExecutionEvent( - control_execution_id=non_match.control_execution_id, - trace_id=trace_id, - span_id=span_id, - agent_name=agent_name, - control_id=non_match.control_id, - control_name=non_match.control_name, - check_stage=request.stage, - applies_to=applies_to, - action=non_match.action, - matched=False, - confidence=non_match.result.confidence, - timestamp=now, - evaluator_name=evaluator_name, - selector_path=selector_path, - error_message=None, - metadata=event_metadata, - ) - ) - - return events - - -async def _ingest_observability_events( - events: list[ControlExecutionEvent], - ingestor: EventIngestor | None, -) -> None: - """Ingest server-side observability events when OSS batching is active.""" - if not events or ingestor is None: - return - - result = await ingestor.ingest(events) - if result.dropped > 0: - _logger.warning( - f"Dropped {result.dropped} observability events, " - f"processed {result.processed}" - ) - - -async def _emit_observability_events( - response: EvaluationResponse, - request: EvaluationRequest, - trace_id: str, - span_id: str, - agent_name: str, - applies_to: Literal["llm_call", "tool_call"], - control_lookup: dict, - total_duration_ms: float, - ingestor: EventIngestor | None, -) -> None: - """Backward-compatible wrapper around build + ingest observability helpers.""" - events = _build_observability_events( - response=response, - request=request, - trace_id=trace_id, - span_id=span_id, - agent_name=agent_name, - applies_to=applies_to, - control_lookup=control_lookup, - total_duration_ms=total_duration_ms, - ) - await _ingest_observability_events(events, ingestor) diff --git a/server/src/agent_control_server/merge_event_sessions.py b/server/src/agent_control_server/merge_event_sessions.py deleted file mode 100644 index 5ce64650..00000000 --- a/server/src/agent_control_server/merge_event_sessions.py +++ /dev/null @@ -1,86 +0,0 @@ -"""Server-side trust state for merged event creation. - -Merged event creation is an SDK-owned flow where the SDK reconstructs and -enqueues the final batch of control-execution events after combining local and -server evaluation results. In that mode the server must skip its normal -observability ingestion step, otherwise both the server and SDK would emit -events for the same evaluation. - -The merge request header on ``/evaluation`` is caller-controlled, so the server -cannot safely trust that header on its own. This module stores a small amount -of init-scoped session state so the server can distinguish: - -- callers that previously initialized a given agent with ``merge_events=True`` -- callers that merely send the merge header directly - -Only the first case is allowed to suppress server-side observability -ingestion. All other callers stay on the default server-ingestion path. -""" - -from __future__ import annotations - -from threading import Lock - -from .auth import AuthenticatedClient - -_merge_enabled_sessions: set[tuple[str, str]] = set() -_lock = Lock() - - -def _session_key(client: AuthenticatedClient, agent_name: str) -> tuple[str, str]: - """Return the in-memory lookup key for one merge-enabled SDK session. - - Args: - client: Authenticated caller identity resolved by the server. - agent_name: Normalized agent name for the initialized SDK session. - - Returns: - A stable ``(client, agent)`` key used for merge-session tracking. - """ - return (client.api_key, agent_name) - - -def set_merge_events_enabled( - client: AuthenticatedClient, - agent_name: str, - enabled: bool, -) -> None: - """Record whether merged event creation is enabled for a client/agent pair. - - This is called from the agent-init flow so later evaluation requests can be - checked against the server's trusted SDK session state instead of relying on - request headers alone. - - Args: - client: Authenticated caller identity resolved by the server. - agent_name: Normalized agent name whose session state is being updated. - enabled: Whether merged event creation is enabled for this session. - - Returns: - None. - """ - key = _session_key(client, agent_name) - with _lock: - if enabled: - _merge_enabled_sessions.add(key) - else: - _merge_enabled_sessions.discard(key) - - -def is_merge_events_enabled( - client: AuthenticatedClient, - agent_name: str, -) -> bool: - """Return whether merged event creation is enabled for this SDK session. - - Args: - client: Authenticated caller identity resolved by the server. - agent_name: Normalized agent name for the evaluation request. - - Returns: - ``True`` when the caller previously initialized the same agent with - merged event creation enabled; otherwise ``False``. - """ - key = _session_key(client, agent_name) - with _lock: - return key in _merge_enabled_sessions diff --git a/server/tests/test_evaluation_error_handling.py b/server/tests/test_evaluation_error_handling.py index a1a11e85..1df795da 100644 --- a/server/tests/test_evaluation_error_handling.py +++ b/server/tests/test_evaluation_error_handling.py @@ -1,10 +1,8 @@ """End-to-end tests for evaluator error handling.""" -import logging import uuid from unittest.mock import AsyncMock, MagicMock from agent_control_models import ( - ControlExecutionEvent, ControlMatch, EvaluationRequest, EvaluatorResult, @@ -17,7 +15,6 @@ SAFE_EVALUATOR_TIMEOUT_ERROR, _sanitize_control_match, ) -from agent_control_server.observability.ingest.base import IngestResult from .utils import create_and_assign_policy @@ -171,12 +168,11 @@ def mock_get_evaluator_instance(config): assert data["matches"] is None or len(data["matches"]) == 0 -def test_evaluation_observability_receives_raw_errors_while_api_response_is_sanitized( +def test_evaluation_response_is_sanitized_without_server_side_observability( client: TestClient, monkeypatch, ) -> None: - """Observability should ingest raw evaluator diagnostics while API clients see safe text.""" - # Given: an agent with a deny control and an evaluator that crashes at runtime + """Evaluation stays pure and returns only sanitized semantics.""" control_data = { "description": "Test control", "enabled": True, @@ -196,7 +192,6 @@ def test_evaluation_observability_receives_raw_errors_while_api_response_is_sani mock_evaluator.get_timeout_seconds = MagicMock(return_value=30.0) import agent_control_engine.core as core_module - import agent_control_server.endpoints.evaluation as evaluation_module monkeypatch.setattr( core_module, @@ -204,13 +199,6 @@ def test_evaluation_observability_receives_raw_errors_while_api_response_is_sani lambda _config: mock_evaluator, ) - build_mock = MagicMock(return_value=[]) - ingest_mock = AsyncMock() - monkeypatch.setattr(evaluation_module, "_build_observability_events", build_mock) - monkeypatch.setattr(evaluation_module, "_ingest_observability_events", ingest_mock) - monkeypatch.setattr(evaluation_module.observability_settings, "enabled", True) - - # When: sending an evaluation request payload = Step(type="llm", name="test-step", input="test content", output=None) req = EvaluationRequest( agent_name=agent_name, @@ -219,7 +207,6 @@ def test_evaluation_observability_receives_raw_errors_while_api_response_is_sani ) resp = client.post("/api/v1/evaluation", json=req.model_dump(mode="json")) - # Then: the API response remains sanitized assert resp.status_code == 200 data = resp.json() assert data["errors"] is not None @@ -227,18 +214,6 @@ def test_evaluation_observability_receives_raw_errors_while_api_response_is_sani assert data["errors"][0]["control_name"] == control_name assert data["errors"][0]["result"]["error"] == SAFE_EVALUATOR_ERROR - # And: observability receives the raw engine response with unsanitized diagnostics - build_mock.assert_called_once() - raw_response = build_mock.call_args.kwargs["response"] - assert raw_response.errors is not None - raw_error = raw_response.errors[0] - assert raw_error.control_name == control_name - assert raw_error.result.error == "RuntimeError: Simulated evaluator crash" - raw_trace = raw_error.result.metadata["condition_trace"] - assert raw_trace["error"] == "RuntimeError: Simulated evaluator crash" - assert raw_trace["message"] == "Evaluation failed: RuntimeError: Simulated evaluator crash" - ingest_mock.assert_awaited_once() - def test_sanitize_control_match_redacts_nested_condition_trace_errors() -> None: # Given: a control match whose nested condition trace contains raw evaluator errors @@ -352,143 +327,24 @@ async def raise_value_error(*_args, **_kwargs): assert body["errors"][0]["message"] == "Invalid evaluation request or control configuration." -def test_evaluation_warns_when_observability_drops_events( - client: TestClient, app, caplog -) -> None: - # Given: an agent with a control that will match - agent_name, _ = create_and_assign_policy(client) - - class DroppingIngestor: - async def ingest(self, events): # type: ignore[no-untyped-def] - return IngestResult(received=len(events), processed=0, dropped=len(events)) - - previous_ingestor = getattr(app.state, "event_ingestor", None) - app.state.event_ingestor = DroppingIngestor() - try: - # And: a log capture for the evaluation warning - caplog.set_level(logging.WARNING, logger="agent_control_server.endpoints.evaluation") - # When: sending an evaluation request - payload = Step(type="llm", name="test-step", input="x", output=None) - req = EvaluationRequest(agent_name=agent_name, step=payload, stage="pre") - resp = client.post("/api/v1/evaluation", json=req.model_dump(mode="json")) - - # Then: the evaluation succeeds but logs a dropped-events warning - assert resp.status_code == 200 - assert any("Dropped" in record.message for record in caplog.records) - finally: - if previous_ingestor is None: - del app.state.event_ingestor - else: - app.state.event_ingestor = previous_ingestor - - -def test_evaluation_skips_ingest_for_merge_mode( - client: TestClient, monkeypatch -) -> None: - """Merged-event mode should skip server-side observability ingestion.""" +def test_evaluation_ignores_merge_headers_and_remains_pure(client: TestClient) -> None: + """/evaluation should return only semantic results regardless of merge headers.""" agent_name, _ = create_and_assign_policy(client) - client.post( - "/api/v1/agents/initAgent", - json={ - "agent": {"agent_name": agent_name}, - "steps": [], - "evaluators": [], - }, - headers={"X-Agent-Control-Merge-Session": "true"}, - ) - - import agent_control_server.endpoints.evaluation as evaluation_module - - event = ControlExecutionEvent( - trace_id="a" * 32, - span_id="b" * 16, - agent_name=agent_name, - control_id=1, - control_name="test-control", - check_stage="pre", - applies_to="llm_call", - action="deny", - matched=True, - confidence=0.9, - ) - build_mock = MagicMock(return_value=[event]) - ingest_mock = AsyncMock() - monkeypatch.setattr(evaluation_module, "_build_observability_events", build_mock) - monkeypatch.setattr(evaluation_module, "_ingest_observability_events", ingest_mock) - monkeypatch.setattr(evaluation_module.observability_settings, "enabled", True) payload = Step(type="llm", name="test-step", input="x", output=None) req = EvaluationRequest(agent_name=agent_name, step=payload, stage="pre") resp = client.post( "/api/v1/evaluation", json=req.model_dump(mode="json"), - headers={"X-Agent-Control-Merge-Events": "true"}, + headers={ + "X-Agent-Control-Merge-Events": "true", + "X-Trace-Id": "a" * 32, + "X-Span-Id": "b" * 16, + }, ) assert resp.status_code == 200 body = resp.json() assert "events" not in body - ingest_mock.assert_not_awaited() - - -def test_evaluation_merge_header_alone_does_not_skip_ingest( - client: TestClient, monkeypatch -) -> None: - """Untrusted callers should not suppress server-side observability ingestion.""" - agent_name, _ = create_and_assign_policy(client) - - import agent_control_server.endpoints.evaluation as evaluation_module - - event = ControlExecutionEvent( - trace_id="a" * 32, - span_id="b" * 16, - agent_name=agent_name, - control_id=1, - control_name="test-control", - check_stage="pre", - applies_to="llm_call", - action="deny", - matched=True, - confidence=0.9, - ) - build_mock = MagicMock(return_value=[event]) - ingest_mock = AsyncMock() - monkeypatch.setattr(evaluation_module, "_build_observability_events", build_mock) - monkeypatch.setattr(evaluation_module, "_ingest_observability_events", ingest_mock) - monkeypatch.setattr(evaluation_module.observability_settings, "enabled", True) - - payload = Step(type="llm", name="test-step", input="x", output=None) - req = EvaluationRequest(agent_name=agent_name, step=payload, stage="pre") - resp = client.post( - "/api/v1/evaluation", - json=req.model_dump(mode="json"), - headers={"X-Agent-Control-Merge-Events": "true"}, - ) - - assert resp.status_code == 200 - build_mock.assert_called_once() - ingest_mock.assert_awaited_once() - - -def test_evaluation_skips_build_and_ingest_when_observability_disabled( - client: TestClient, monkeypatch -) -> None: - """Observability-disabled requests should not build or ingest events.""" - agent_name, _ = create_and_assign_policy(client) - - import agent_control_server.endpoints.evaluation as evaluation_module - - build_mock = MagicMock() - ingest_mock = AsyncMock() - monkeypatch.setattr(evaluation_module, "_build_observability_events", build_mock) - monkeypatch.setattr(evaluation_module, "_ingest_observability_events", ingest_mock) - monkeypatch.setattr(evaluation_module.observability_settings, "enabled", False) - - payload = Step(type="llm", name="test-step", input="x", output=None) - req = EvaluationRequest(agent_name=agent_name, step=payload, stage="pre") - resp = client.post("/api/v1/evaluation", json=req.model_dump(mode="json")) - - assert resp.status_code == 200 - build_mock.assert_not_called() - ingest_mock.assert_not_awaited() + assert body["is_safe"] is False diff --git a/server/tests/test_merge_event_sessions.py b/server/tests/test_merge_event_sessions.py deleted file mode 100644 index 4c81f849..00000000 --- a/server/tests/test_merge_event_sessions.py +++ /dev/null @@ -1,34 +0,0 @@ -from agent_control_server.auth import AuthLevel, AuthenticatedClient -from agent_control_server.merge_event_sessions import ( - is_merge_events_enabled, - set_merge_events_enabled, -) - - -def _client(api_key: str) -> AuthenticatedClient: - return AuthenticatedClient( - api_key=api_key, - is_admin=False, - auth_level=AuthLevel.API_KEY, - ) - - -def test_merge_event_session_enable_disable_is_scoped_by_client_and_agent() -> None: - client_a = _client("key-a") - client_b = _client("key-b") - - set_merge_events_enabled(client_a, "agent-a", enabled=False) - set_merge_events_enabled(client_b, "agent-a", enabled=False) - - assert is_merge_events_enabled(client_a, "agent-a") is False - assert is_merge_events_enabled(client_b, "agent-a") is False - - set_merge_events_enabled(client_a, "agent-a", enabled=True) - - assert is_merge_events_enabled(client_a, "agent-a") is True - assert is_merge_events_enabled(client_a, "agent-b") is False - assert is_merge_events_enabled(client_b, "agent-a") is False - - set_merge_events_enabled(client_a, "agent-a", enabled=False) - - assert is_merge_events_enabled(client_a, "agent-a") is False diff --git a/server/unit_tests/test_endpoint_helpers.py b/server/unit_tests/test_endpoint_helpers.py index 0b0b214f..4d245206 100644 --- a/server/unit_tests/test_endpoint_helpers.py +++ b/server/unit_tests/test_endpoint_helpers.py @@ -1,22 +1,14 @@ """Unit tests for endpoint helpers that don't require the DB test fixture.""" from types import SimpleNamespace -from unittest.mock import AsyncMock -import pytest -from agent_control_models import ( - ControlDefinition, - ControlMatch, - EvaluationRequest, - EvaluationResponse, - EvaluatorResult, -) +from agent_control_models import ControlDefinition, ControlMatch, EvaluatorResult from agent_control_server.endpoints.agents import ( _find_referencing_controls_for_removed_evaluators, ) from agent_control_server.endpoints.evaluation import ( ControlAdapter, - _emit_observability_events, + _sanitize_control_match, ) @@ -55,10 +47,9 @@ def test_find_referencing_controls_dedupes_composite_matches() -> None: assert referencing_controls == [("composite-ctrl", "custom")] -@pytest.mark.asyncio -async def test_emit_observability_events_uses_representative_leaf_for_composites() -> None: - # Given: a composite control with two leaves and existing condition metadata - control = ControlAdapter( +def test_sanitize_control_match_redacts_nested_condition_trace_errors() -> None: + # Given: a composite control whose condition trace includes a raw evaluator error + _ = ControlAdapter( id=1, name="composite-ctrl", control=ControlDefinition( @@ -78,53 +69,36 @@ async def test_emit_observability_events_uses_representative_leaf_for_composites action={"decision": "observe"}, ), ) - response = EvaluationResponse( - is_safe=True, - confidence=1.0, - non_matches=[ - ControlMatch( - control_id=1, - control_name="composite-ctrl", - action="observe", - result=EvaluatorResult( - matched=False, - confidence=0.9, - metadata={"condition_trace": {"kind": "and"}}, - ), - ) - ], - ) - request = EvaluationRequest( - agent_name="agent-000000000001", - step={"type": "llm", "name": "test-step", "input": "hello"}, - stage="pre", - ) - ingestor = SimpleNamespace( - ingest=AsyncMock(return_value=SimpleNamespace(dropped=0, processed=1)) + match = ControlMatch( + control_id=1, + control_name="composite-ctrl", + action="observe", + result=EvaluatorResult( + matched=False, + confidence=0.9, + error="RuntimeError: secret evaluator failure", + metadata={ + "condition_trace": { + "type": "and", + "children": [ + { + "type": "leaf", + "error": "RuntimeError: secret evaluator failure", + "message": "Evaluation failed: RuntimeError: secret evaluator failure", + } + ], + } + }, + ), ) - # When: emitting observability events - await _emit_observability_events( - response=response, - request=request, - trace_id="trace123", - span_id="span456", - agent_name="agent-000000000001", - applies_to="llm_call", - control_lookup={1: control}, - total_duration_ms=5.0, - ingestor=ingestor, - ) + # When: sanitizing the control match for API output + sanitized = _sanitize_control_match(match) - # Then: the first leaf becomes the event identity and full context is retained - events = ingestor.ingest.await_args.args[0] - assert len(events) == 1 - event = events[0] - assert event.evaluator_name == "regex" - assert event.selector_path == "input" - assert event.metadata["condition_trace"] == {"kind": "and"} - assert event.metadata["primary_evaluator"] == "regex" - assert event.metadata["primary_selector_path"] == "input" - assert event.metadata["leaf_count"] == 2 - assert event.metadata["all_evaluators"] == ["regex", "list"] - assert event.metadata["all_selector_paths"] == ["input", "output"] + # Then: top-level and nested errors are redacted to the safe public message + assert sanitized.result.error is not None + assert "secret evaluator failure" not in sanitized.result.error + trace = sanitized.result.metadata["condition_trace"] + child = trace["children"][0] + assert child["error"] == sanitized.result.error + assert child["message"] == sanitized.result.error