diff --git a/.flow/epics/fn-17.json b/.flow/epics/fn-17.json new file mode 100644 index 000000000..24cd673a1 --- /dev/null +++ b/.flow/epics/fn-17.json @@ -0,0 +1,13 @@ +{ + "branch_name": "fn-17", + "created_at": "2026-01-19T01:17:41.162846Z", + "depends_on_epics": [], + "id": "fn-17", + "next_task": 1, + "plan_review_status": "unknown", + "plan_reviewed_at": null, + "spec_path": ".flow/specs/fn-17.md", + "status": "open", + "title": "V7 Golden Master: Rust State Machine with PyO3 Bindings", + "updated_at": "2026-01-19T01:17:47.652945Z" +} diff --git a/.flow/specs/fn-17.md b/.flow/specs/fn-17.md new file mode 100644 index 000000000..946bd32fd --- /dev/null +++ b/.flow/specs/fn-17.md @@ -0,0 +1,354 @@ +# fn-17 V7 Golden Master: Rust State Machine with PyO3 Bindings + +## Overview + +Implement a Rust-based investigation state machine with Python bindings that replaces the **Temporal workflow local variables** (`_current_step`, `_state`, etc. in `InvestigationWorkflow`). The existing `core/state.py` event log is retained for audit/persistence; Rust owns the **in-flight workflow state**. + +**Why Rust:** Total, deterministic core; illegal transitions become explicit errors; state is serializable; side effects stay outside. + +**Target replacement:** Temporal workflow state variables at `python-packages/dataing/src/dataing/temporal/workflows/investigation.py:67-90` + +**What's NOT replaced:** `core/state.py` (event log for persistence), `core/investigation/` (branch/snapshot domain model) + +**Architecture:** +- `core/` - Self-contained Rust workspace + - `crates/dataing_investigator/` - Pure Rust library (domain, protocol, state, machine) + - `bindings/python/` - PyO3 adapter exposing `dataing_investigator` module +- `python-packages/investigator/` - Python runtime (envelope, runtime, security) +- Integration with existing `dataing` Temporal workflows + +## Scope + +### In Scope +- Rust workspace setup with Maturin/PyO3 and wheel distribution +- Event-sourced state machine with strict phase transitions +- **Rust runs in Temporal ACTIVITIES** (not workflow code) for side-effect isolation +- All IDs externally generated (Rust never generates, only accepts) +- Versioned JSON wire protocol (v1) with strict schema +- Python runtime with envelope tracing and defense-in-depth validation +- Temporal workflow integration with signals/queries for HITL +- Build system integration (Justfile, uv workspace, CI wheel builds) + +### Out of Scope +- Migration of existing investigations (separate epic) +- Replacing `core/state.py` event log (retained for audit) +- Performance optimization (benchmark after MVP) +- Multi-tenancy in Rust (handled at Python layer) +- OpenTelemetry integration (future enhancement) + +## Execution Model + +### Where Rust Runs + +**CRITICAL DECISION:** The Rust state machine runs **inside Temporal activities**, NOT inside workflow code. + +| Layer | Determinism | Contains | +|-------|-------------|----------| +| Workflow code | Must be deterministic | Orchestration, signals, queries, `workflow.uuid4()` | +| Activities | No determinism required | Rust state transitions, LLM calls, DB queries | + +**Rationale:** +- Activities can use non-deterministic code safely +- State machine has side effects (generates intents, records metadata) +- Workflow passes IDs and events to activity, receives intent back +- State snapshots persisted to DB via activity (not Temporal history) + +### Durability Mechanism + +**State is NOT stored in Temporal history.** Instead: + +1. Workflow calls `run_brain_step` activity with `(state_json, event_json, workflow_id)` +2. Activity runs Rust state machine, gets new state + intent +3. Activity persists state snapshot to **application DB** with idempotency key `(workflow_id, step)` +4. Activity returns `{intent_json, step}` to workflow +5. Workflow uses `continue_as_new` every N steps to bound history + +**History growth mitigation:** +- Only intent payloads flow through history (small) +- State snapshots in DB, keyed by `(workflow_id, step)` +- `continue_as_new` every 100 steps with compacted checkpoint + +## Key Design Decisions + +### Phase Enum (Mapped to Workflow Steps) + +```rust +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "type", content = "data")] +pub enum Phase { + // Maps to: workflow start + Init, + // Maps to: gather_context activity + GatheringContext { call_id: Option }, + // Maps to: check_patterns activity (optional) + CheckingPatterns { call_id: Option }, + // Maps to: generate_hypotheses activity + GeneratingHypotheses { call_id: Option }, + // Maps to: generate_query + execute_query activities (parallel per hypothesis) + EvaluatingHypotheses { pending_call_ids: Vec, completed: Vec }, + // Maps to: interpret_evidence activity + InterpretingEvidence { call_id: Option }, + // Maps to: _await_user_input (signal-based) + AwaitingUser { question_id: String, prompt: String, timeout_seconds: u64 }, + // Maps to: synthesize activity + Synthesizing { call_id: Option }, + // Maps to: counter_analyze activity (optional) + CounterAnalyzing { call_id: Option }, + // Terminal: success + Finished { insight: String }, + // Terminal: failure (includes cancellation) + Failed { error: String, retryable: bool }, +} +``` + +### HITL Specification + +**Trigger conditions for `AwaitingUser`:** +1. LLM requests clarification (intent type `RequestUser`) +2. Confidence below threshold after hypothesis evaluation +3. Ambiguous evidence requiring human judgment + +**Signal schema:** +```python +@workflow.signal +def submit_user_response(self, response: UserResponsePayload): + """ + UserResponsePayload: + - question_id: str (must match awaiting question) + - content: str (user's answer) + - timestamp: str (ISO8601) + """ +``` + +**Timeout semantics:** +- Default: 60 minutes +- On timeout: transition to `Failed { error: "User response timeout", retryable: true }` +- On cancel signal: transition to `Failed { error: "Cancelled", retryable: false }` + +**Query surface:** +```python +@workflow.query +def get_awaiting_user_state(self) -> AwaitingUserState | None: + """Returns question_id, prompt, timeout_remaining if in AwaitingUser phase.""" +``` + +### ID Generation + +**RULE: Rust NEVER generates IDs.** All IDs come from external sources: + +| ID Type | Source | When | +|---------|--------|------| +| `event_id` | `workflow.uuid4()` | Workflow creates before calling activity | +| `call_id` | `workflow.uuid4()` | Workflow creates before scheduling tool call | +| `step` | Workflow counter | Monotonic, passed to activity | + +**Rust accepts IDs via event payload:** +```rust +pub struct Event { + pub id: String, // External: workflow.uuid4() + pub step: u64, // External: workflow counter + pub payload: EventPayload, +} +``` + +**Idempotency:** +- Activity uses `(workflow_id, event_id)` as dedup key for DB writes +- Rust state machine maintains `seen_event_ids: HashSet` to reject duplicates +- On replay, duplicate events are no-ops + +### Versioned Wire Protocol (v1) + +```json +{ + "protocol_version": 1, + "event_id": "uuid", + "step": 42, + "kind": "CallResult", + "payload": { ... } +} +``` + +**Schema rules:** +- `protocol_version`: Required, reject if unknown +- Unknown fields: Ignored (forward compat) +- Missing required fields: Error +- Canonical JSON: Keys sorted, no trailing commas, UTF-8 + +**Backwards compatibility tests:** +- Golden fixtures for each event/intent type +- Round-trip tests across Rust/Python boundary + +### Error Classification for Temporal + +| Error Type | Temporal Behavior | Rust Exception | +|------------|-------------------|----------------| +| `InvalidTransition` | Non-retryable, fail workflow | `InvalidTransitionError` | +| `SerializationError` | Non-retryable, fail workflow | `SerializationError` | +| `InvariantViolation` | Non-retryable, bug → fail workflow | `InvariantError` | +| `ExternalCallFailed` | Retryable (Temporal retry policy) | `RetryableError` | + +### Panic-Free Policy + +**Crate-level enforcement:** +```rust +// lib.rs +#![deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)] +``` + +**All FFI entrypoints wrapped:** +```rust +fn safe_ingest(event_json: String) -> PyResult { + std::panic::catch_unwind(|| ingest_inner(&event_json)) + .map_err(|_| PyRuntimeError::new_err("Internal panic - please report"))? +} +``` + +**Panic strategy:** `panic = "unwind"` in Cargo.toml to enable `catch_unwind` + +### Security Validation Boundaries + +| Boundary | Validations | +|----------|-------------| +| API → Workflow signal | Schema validation, size limits (< 1MB), user auth | +| Workflow → Rust event | Protocol version, required fields, event_id uniqueness | +| Rust state invariants | Phase transition rules, call_id matching, step monotonicity | +| Activity → SQL | Existing `safety/validator.py`, forbidden statements, PII redaction | + +**Size limits:** +- Event payload: < 100KB +- State snapshot: < 10MB +- Signal payload: < 1MB + +## Naming Convention + +**Unified naming:** `dataing_investigator` + +| Component | Name | +|-----------|------| +| Rust crate | `dataing_investigator` | +| Python wheel | `dataing-investigator` | +| Python import | `from dataing_investigator import Investigator` | +| Module location | `python-packages/dataing-investigator/` (bindings) | +| Runtime package | `python-packages/investigator/` (Python runtime) | + +## Build & Distribution + +### Prerequisites +```bash +# Required tooling +rustup toolchain install stable +cargo install maturin +``` + +### Development +```bash +# Build Rust library +just rust-build + +# Install bindings to venv (dev mode) +just rust-dev + +# Run Rust tests +just rust-test +``` + +### CI/Release +```bash +# Build wheels for distribution (manylinux, macos, windows) +maturin build --release --strip + +# Wheels output to target/wheels/ +``` + +### Justfile additions +```just +# Prerequisites check +rust-check: + @command -v cargo >/dev/null || (echo "Install Rust: rustup.rs" && exit 1) + @command -v maturin >/dev/null || (echo "Install maturin: pip install maturin" && exit 1) + +# Build Rust library +rust-build: rust-check + cd core && cargo build --release + +# Dev install bindings +rust-dev: rust-check + cd core/bindings/python && maturin develop --uv + +# Run Rust tests +rust-test: rust-check + cd core && cargo test + +# Note: `just test` does NOT include rust-test until CI is ready +``` + +## Quick commands + +```bash +# Prerequisites +rustup toolchain install stable +cargo install maturin + +# Build Rust crate +just rust-build + +# Install bindings (dev mode) +just rust-dev + +# Run Rust tests +just rust-test + +# Run Python tests (excluding Rust for now) +just test +``` + +## Acceptance + +**Testable gates:** + +1. **Rust unit tests pass:** `cargo test` in `core/` with 0 failures +2. **Transition table coverage:** Tests for every Phase → Phase transition +3. **Python binding smoke test:** `python -c "from dataing_investigator import Investigator; inv = Investigator(); print(inv.snapshot())"` +4. **Golden fixture tests:** Rust/Python round-trip for all event/intent types +5. **Idempotency test:** Duplicate event_id is rejected gracefully +6. **Panic-free test:** Malformed JSON returns PyResult error, not crash +7. **Deterministic replay test:** Same events → same state (with Temporal test env) +8. **Unexpected call_id handling:** Unexpected call_id produces deterministic `Error`/`Failed` (not silent ignore) +9. **Signal dedup documented:** Temporal signal dedup strategy documented (esp. `continue_as_new` boundary) +10. **Build tooling pinned:** Maturin version pinned in pyproject.toml and verified with uv integration + +**NOT required for MVP (future epic):** +- E2E test with live Temporal server +- Coverage percentage metrics (add llvm-cov later) + +## Risks & Mitigations + +| Risk | Mitigation | +|------|------------| +| Maturin/uv integration complexity | Task fn-17.9 focused on build validation; CI builds wheels | +| Temporal replay non-determinism | Rust runs in activities (not workflow), all IDs external | +| History growth from state snapshots | State in DB, not history; `continue_as_new` every 100 steps | +| Native extension distribution | CI builds manylinux/macos wheels; pin Rust toolchain | +| Protocol drift between Rust/Python | Versioned protocol v1; golden fixtures; backwards-compat tests | +| Rollout risk | Feature flag to use Python-only path; gradual rollout | +| Panic propagation | `#![deny(clippy::unwrap_used)]`, `catch_unwind` at boundary | + +## References + +### Existing Code +- `python-packages/dataing/src/dataing/temporal/workflows/investigation.py:67-90` - **Target: workflow state to replace** +- `python-packages/dataing/src/dataing/core/state.py:26-203` - Event log (retained) +- `python-packages/dataing/src/dataing/safety/validator.py:24-128` - SQL safety (reused) +- `pyproject.toml:161-162` - uv workspace configuration + +### Documentation +- [PyO3 Error Handling](https://pyo3.rs/v0.23.5/function/error-handling) +- [Maturin + uv Integration](https://quanttype.net/posts/2025-09-12-uv-and-maturin.html) +- [Temporal Python SDK - Message Passing](https://docs.temporal.io/develop/python/message-passing) +- [Serde Attributes](https://serde.rs/attributes.html) + +## Open Questions (Deferred to Implementation) + +1. **Exact `continue_as_new` threshold:** Start with 100 steps, tune based on history size +2. **State snapshot DB schema:** Defer to fn-17.14 (Temporal integration task) +3. **Feature flag mechanism:** Use existing entitlements system or env var diff --git a/.flow/tasks/fn-17.1.json b/.flow/tasks/fn-17.1.json new file mode 100644 index 000000000..f74db12ef --- /dev/null +++ b/.flow/tasks/fn-17.1.json @@ -0,0 +1,23 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T01:46:01.490848Z", + "created_at": "2026-01-19T01:18:50.390127Z", + "depends_on": [], + "epic": "fn-17", + "evidence": { + "commits": [ + "23f1fd4792dc94bd66d6949506e96240ed304ebf" + ], + "prs": [], + "tests": [ + "cargo test" + ] + }, + "id": "fn-17.1", + "priority": null, + "spec_path": ".flow/tasks/fn-17.1.md", + "status": "done", + "title": "Scaffold Rust workspace structure", + "updated_at": "2026-01-19T01:50:20.724622Z" +} diff --git a/.flow/tasks/fn-17.1.md b/.flow/tasks/fn-17.1.md new file mode 100644 index 000000000..f38ef7fd3 --- /dev/null +++ b/.flow/tasks/fn-17.1.md @@ -0,0 +1,68 @@ +# fn-17.1 Scaffold Rust workspace structure + +## Description + +Create the `core/` Rust workspace directory structure with workspace-level Cargo.toml and empty crate scaffolds. + +**Directory structure:** +``` +core/ +├── Cargo.toml # Workspace root +├── crates/ +│ └── dataing_investigator/ +│ ├── Cargo.toml +│ └── src/ +│ └── lib.rs +└── bindings/ + └── python/ + ├── Cargo.toml + ├── pyproject.toml + └── src/ + └── lib.rs +``` + +**Workspace Cargo.toml:** +```toml +[workspace] +members = ["crates/dataing_investigator", "bindings/python"] +resolver = "2" +``` + +**dataing_investigator Cargo.toml:** +```toml +[package] +name = "dataing_investigator" +version = "0.1.0" +edition = "2021" + +[dependencies] +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +``` + +## Acceptance + +- [ ] `core/Cargo.toml` exists with workspace members +- [ ] `core/crates/dataing_investigator/` scaffolded with empty lib.rs +- [ ] `core/bindings/python/` scaffolded with empty lib.rs +- [ ] `cargo build` succeeds in `core/` directory +- [ ] `cargo check --workspace` passes + +## Done summary +- Created core/ Rust workspace with workspace-level Cargo.toml +- Added dataing_investigator crate with PROTOCOL_VERSION=1 and clippy deny rules +- Added PyO3 bindings crate with maturin config (>=1.7) +- Added .gitignore for target/ + +Why: +- Establishes versioned protocol foundation for backwards-compatible snapshots +- Sets up panic-free clippy rules from the start + +Verification: +- cargo build -p dataing_investigator: PASS +- cargo check --workspace: PASS +- cargo test: PASS (1 test) +## Evidence +- Commits: 23f1fd4792dc94bd66d6949506e96240ed304ebf +- Tests: cargo test +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.10.json b/.flow/tasks/fn-17.10.json new file mode 100644 index 000000000..9dd3102a4 --- /dev/null +++ b/.flow/tasks/fn-17.10.json @@ -0,0 +1,27 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T02:06:26.908780Z", + "created_at": "2026-01-19T01:18:52.040153Z", + "depends_on": [ + "fn-17.9" + ], + "epic": "fn-17", + "evidence": { + "commits": [ + "161eedb6" + ], + "prs": [], + "tests": [ + "uv sync", + "just rust-dev", + "Python import" + ] + }, + "id": "fn-17.10", + "priority": null, + "spec_path": ".flow/tasks/fn-17.10.md", + "status": "done", + "title": "Create investigator Python package structure", + "updated_at": "2026-01-19T02:07:43.483458Z" +} diff --git a/.flow/tasks/fn-17.10.md b/.flow/tasks/fn-17.10.md new file mode 100644 index 000000000..5ad37be77 --- /dev/null +++ b/.flow/tasks/fn-17.10.md @@ -0,0 +1,71 @@ +# fn-17.10 Create investigator Python package structure + +## Description + +Create the `python-packages/investigator/` Python package that wraps the Rust bindings and provides the Python runtime. + +**Directory structure:** +``` +python-packages/investigator/ +├── pyproject.toml +└── src/ + └── investigator/ + ├── __init__.py + ├── envelope.py # (Task fn-17.11) + ├── runtime.py # (Task fn-17.13) + └── security.py # (Task fn-17.12) +``` + +**pyproject.toml:** +```toml +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "investigator" +version = "0.1.0" +requires-python = ">=3.11" +dependencies = [ + "agent-core", # Rust bindings +] + +[tool.hatch.build.targets.wheel] +packages = ["src/investigator"] +``` + +**__init__.py:** +```python +"""Investigator - Rust-powered investigation state machine runtime.""" + +from dataing_investigator import Investigator, StateError, InvalidTransitionError + +__all__ = ["Investigator", "StateError", "InvalidTransitionError"] +``` + +## Acceptance + +- [ ] `python-packages/investigator/` directory created +- [ ] `pyproject.toml` configured with hatchling +- [ ] Package depends on `agent-core` +- [ ] `__init__.py` re-exports Rust bindings +- [ ] Empty module files created for envelope, runtime, security +- [ ] Package added to uv workspace +- [ ] `uv sync` works with new package + +## Done summary +- Created python-packages/investigator/ package +- pyproject.toml with hatchling build backend +- __init__.py re-exports: Investigator, StateError, SerializationError, InvalidTransitionError, protocol_version +- Empty module stubs: envelope.py, security.py, runtime.py +- Added to uv workspace sources +- Added as dependency in root pyproject.toml + +Verification: +- uv sync: PASS (installs investigator) +- just rust-dev: PASS (installs dataing-investigator) +- Python import: PASS +## Evidence +- Commits: 161eedb6 +- Tests: uv sync, just rust-dev, Python import +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.11.json b/.flow/tasks/fn-17.11.json new file mode 100644 index 000000000..0b00ad1c8 --- /dev/null +++ b/.flow/tasks/fn-17.11.json @@ -0,0 +1,25 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T02:08:11.393852Z", + "created_at": "2026-01-19T01:18:52.235200Z", + "depends_on": [ + "fn-17.10" + ], + "epic": "fn-17", + "evidence": { + "commits": [ + "fab51ee4" + ], + "prs": [], + "tests": [ + "Python smoke test" + ] + }, + "id": "fn-17.11", + "priority": null, + "spec_path": ".flow/tasks/fn-17.11.md", + "status": "done", + "title": "Implement envelope module for tracing", + "updated_at": "2026-01-19T02:09:27.618664Z" +} diff --git a/.flow/tasks/fn-17.11.md b/.flow/tasks/fn-17.11.md new file mode 100644 index 000000000..3ba54ecc8 --- /dev/null +++ b/.flow/tasks/fn-17.11.md @@ -0,0 +1,68 @@ +# fn-17.11 Implement envelope module for tracing + +## Description + +Implement `python-packages/investigator/src/investigator/envelope.py` for distributed tracing context propagation. + +**Envelope TypedDict:** +```python +import json +import uuid +from typing import TypedDict + +class Envelope(TypedDict): + id: str + trace_id: str + parent_id: str | None + payload: dict + +def wrap(payload: dict, trace_id: str, parent_id: str | None = None) -> str: + """Wrap a payload in an envelope for tracing.""" + return json.dumps({ + "id": str(uuid.uuid4()), + "trace_id": trace_id, + "parent_id": parent_id, + "payload": payload + }) + +def unwrap(json_str: str) -> Envelope: + """Unwrap an envelope from JSON string.""" + return json.loads(json_str) + +def create_trace() -> str: + """Create a new trace ID.""" + return str(uuid.uuid4()) +``` + +**Purpose:** +- Provides correlation IDs for distributed tracing +- Links parent/child relationships for event chains +- JSON-based for interop with Rust state machine + +**Integration point:** +- Temporal workflows use `workflow.uuid4()` for deterministic trace IDs +- Local runtime uses standard `uuid.uuid4()` + +## Acceptance + +- [ ] `envelope.py` exists with Envelope TypedDict +- [ ] `wrap()` creates envelope with unique ID +- [ ] `unwrap()` parses envelope from JSON +- [ ] `create_trace()` generates new trace ID +- [ ] Unit tests for wrap/unwrap roundtrip +- [ ] Type hints complete (mypy passes) + +## Done summary +- Created envelope.py with Envelope TypedDict +- wrap() creates envelope with unique ID +- unwrap() parses envelope from JSON with validation +- create_trace() generates new trace ID +- create_child_envelope() for linked events +- Exported from investigator package + +Verification: +- Python smoke test: PASS (wrap/unwrap roundtrip) +## Evidence +- Commits: fab51ee4 +- Tests: Python smoke test +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.12.json b/.flow/tasks/fn-17.12.json new file mode 100644 index 000000000..dc41a0e20 --- /dev/null +++ b/.flow/tasks/fn-17.12.json @@ -0,0 +1,25 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T02:08:11.588133Z", + "created_at": "2026-01-19T01:18:52.438659Z", + "depends_on": [ + "fn-17.10" + ], + "epic": "fn-17", + "evidence": { + "commits": [ + "fab51ee4" + ], + "prs": [], + "tests": [ + "Python smoke test" + ] + }, + "id": "fn-17.12", + "priority": null, + "spec_path": ".flow/tasks/fn-17.12.md", + "status": "done", + "title": "Implement security module with validation", + "updated_at": "2026-01-19T02:09:35.372181Z" +} diff --git a/.flow/tasks/fn-17.12.md b/.flow/tasks/fn-17.12.md new file mode 100644 index 000000000..2dc3b87c7 --- /dev/null +++ b/.flow/tasks/fn-17.12.md @@ -0,0 +1,78 @@ +# fn-17.12 Implement security module with validation + +## Description + +Implement `python-packages/investigator/src/investigator/security.py` with deny-by-default tool call validation. + +**Security validation:** +```python +from typing import Any + +class SecurityViolation(Exception): + """Raised when a tool call violates security policy.""" + pass + +def validate_tool_call( + tool_name: str, + args: dict[str, Any], + scope: dict[str, Any] +) -> None: + """ + Validate a tool call against the security policy. + Raises SecurityViolation if the call is not allowed. + + Defense-in-depth: this runs BEFORE hitting any database. + """ + allowed_tables = scope.get("permissions", []) + + # 1. Validate tool is in allowlist (if scope restricts tools) + allowed_tools = scope.get("allowed_tools") + if allowed_tools is not None and tool_name not in allowed_tools: + raise SecurityViolation(f"Tool '{tool_name}' not in allowlist") + + # 2. Validate table access + if "table_name" in args: + table = args["table_name"] + if table not in allowed_tables: + raise SecurityViolation(f"Access denied to table '{table}'") + + # 3. Validate no forbidden patterns in SQL (if applicable) + if "query" in args: + _validate_query_safety(args["query"]) + +def _validate_query_safety(query: str) -> None: + """Check for obviously dangerous SQL patterns.""" + forbidden = ["DROP", "DELETE", "TRUNCATE", "ALTER", "INSERT", "UPDATE"] + query_upper = query.upper() + for pattern in forbidden: + if pattern in query_upper: + raise SecurityViolation(f"Forbidden SQL pattern: {pattern}") +``` + +**Reference:** Existing patterns at `python-packages/dataing/src/dataing/safety/validator.py` + +## Acceptance + +- [ ] `security.py` exists with `SecurityViolation` exception +- [ ] `validate_tool_call()` checks tool allowlist +- [ ] `validate_tool_call()` checks table permissions +- [ ] `_validate_query_safety()` blocks dangerous SQL patterns +- [ ] Deny-by-default: unrecognized tools/tables are rejected +- [ ] Unit tests cover all validation paths +- [ ] Integration with existing safety module patterns + +## Done summary +- Created security.py with SecurityViolation exception +- validate_tool_call() implements deny-by-default +- Tool allowlist validation (optional) +- Table permission validation (deny if no permissions) +- _validate_query_safety() blocks forbidden SQL (DROP, DELETE, etc.) +- Word boundary matching to avoid false positives +- create_scope() helper for constructing scope objects + +Verification: +- Python smoke test: PASS (valid call, bad table, bad SQL) +## Evidence +- Commits: fab51ee4 +- Tests: Python smoke test +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.13.json b/.flow/tasks/fn-17.13.json new file mode 100644 index 000000000..deb6ec880 --- /dev/null +++ b/.flow/tasks/fn-17.13.json @@ -0,0 +1,26 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T02:09:57.862029Z", + "created_at": "2026-01-19T01:18:52.640563Z", + "depends_on": [ + "fn-17.11", + "fn-17.12" + ], + "epic": "fn-17", + "evidence": { + "commits": [ + "2abac50d" + ], + "prs": [], + "tests": [ + "Python smoke test" + ] + }, + "id": "fn-17.13", + "priority": null, + "spec_path": ".flow/tasks/fn-17.13.md", + "status": "done", + "title": "Implement runtime module", + "updated_at": "2026-01-19T02:11:18.089645Z" +} diff --git a/.flow/tasks/fn-17.13.md b/.flow/tasks/fn-17.13.md new file mode 100644 index 000000000..a2b9c1b9d --- /dev/null +++ b/.flow/tasks/fn-17.13.md @@ -0,0 +1,97 @@ +# fn-17.13 Implement runtime module + +## Description + +Implement `python-packages/investigator/src/investigator/runtime.py` with a local execution loop for running investigations outside of Temporal. + +**Local runtime loop:** +```python +import json +from typing import Any, Callable +from dataing_investigator import Investigator +from .envelope import wrap, unwrap, create_trace +from .security import validate_tool_call, SecurityViolation + +ToolExecutor = Callable[[str, dict[str, Any]], Any] + +async def run_local( + objective: str, + scope: dict[str, Any], + tool_executor: ToolExecutor, + user_responder: Callable[[str], str] | None = None, +) -> dict[str, Any]: + """ + Run an investigation locally (not in Temporal). + + Args: + objective: The investigation objective + scope: Security scope with permissions + tool_executor: Function to execute tool calls + user_responder: Optional function to get user responses + + Returns: + Final investigation result + """ + inv = Investigator() + trace_id = create_trace() + parent_id = None + + # Start event + start_event = wrap( + {"type": "Start", "payload": {"objective": objective, "scope": scope}}, + trace_id + ) + + while True: + intent_json = inv.ingest(start_event if parent_id is None else None) + intent = json.loads(intent_json) + + if intent["type"] == "Idle": + # Need to feed next event + pass + elif intent["type"] == "Call": + # Validate and execute tool + validate_tool_call(intent["payload"]["name"], intent["payload"]["args"], scope) + result = await tool_executor(intent["payload"]["name"], intent["payload"]["args"]) + # Create CallResult event... + elif intent["type"] == "RequestUser": + if user_responder is None: + raise RuntimeError("User response required but no responder provided") + response = user_responder(intent["payload"]["question"]) + # Create UserResponse event... + elif intent["type"] == "Finish": + return intent["payload"] + elif intent["type"] == "Error": + raise RuntimeError(intent["payload"]["message"]) +``` + +**Purpose:** +- Enables local testing without Temporal +- Demonstrates integration pattern for tool execution +- Security validation before every tool call + +## Acceptance + +- [ ] `runtime.py` exists with `run_local()` function +- [ ] Integration with Investigator via JSON +- [ ] Security validation before tool execution +- [ ] User response handling for HITL +- [ ] Async execution pattern +- [ ] Error handling for all intent types +- [ ] Unit tests with mock tool executor + +## Done summary +- Created runtime.py with run_local() async function +- LocalInvestigator class for fine-grained investigation control +- Security validation (validate_tool_call) before every tool execution +- Error handling for all intent types (Call, RequestUser, Finish, Error) +- Snapshot/restore support for resumability +- Max steps limit to prevent infinite loops +- Exported from investigator package + +Verification: +- Python smoke test: PASS (start, call result, snapshot/restore) +## Evidence +- Commits: 2abac50d +- Tests: Python smoke test +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.14.json b/.flow/tasks/fn-17.14.json new file mode 100644 index 000000000..5f37a4257 --- /dev/null +++ b/.flow/tasks/fn-17.14.json @@ -0,0 +1,24 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T02:11:43.226180Z", + "created_at": "2026-01-19T01:18:52.826670Z", + "depends_on": [ + "fn-17.13" + ], + "epic": "fn-17", + "evidence": { + "commits": [], + "prs": [], + "tests": [ + "Core imports", + "Temporal imports" + ] + }, + "id": "fn-17.14", + "priority": null, + "spec_path": ".flow/tasks/fn-17.14.md", + "status": "done", + "title": "Integrate Rust state machine with Temporal workflows", + "updated_at": "2026-01-19T02:15:57.216274Z" +} diff --git a/.flow/tasks/fn-17.14.md b/.flow/tasks/fn-17.14.md new file mode 100644 index 000000000..f7781ed0f --- /dev/null +++ b/.flow/tasks/fn-17.14.md @@ -0,0 +1,112 @@ +# fn-17.14 Integrate Rust state machine with Temporal workflows + +## Description + +Integrate the Rust state machine with the existing Temporal `InvestigationWorkflow` at `python-packages/dataing/src/dataing/temporal/workflows/investigation.py`. + +**Key changes:** + +1. **Import Rust bindings:** +```python +from dataing_investigator import Investigator +from investigator.envelope import wrap +from investigator.security import validate_tool_call +``` + +2. **Use workflow.uuid4() for deterministic IDs:** +```python +@workflow.run +async def run(self, objective: str, scope: dict): + inv = Investigator() + trace_id = str(workflow.uuid4()) # Deterministic! + + # Start event with deterministic ID + event_id = str(workflow.uuid4()) + start_event = json.dumps({ + "id": event_id, + "trace_id": trace_id, + "payload": {"type": "Start", "payload": {"objective": objective, "scope": scope}} + }) +``` + +3. **Brain step activity:** +```python +@activity.defn +async def run_brain_step(state_json: str | None, event_json: str) -> dict: + """Execute one step of the state machine.""" + if state_json: + inv = Investigator.restore(state_json) + else: + inv = Investigator() + + intent_json = inv.ingest(event_json) + return { + "new_state": inv.snapshot(), + "intent": json.loads(intent_json) + } +``` + +4. **Query/Signal for HITL:** +```python +@workflow.query +def get_status(self) -> dict: + """Expose current question for UI polling.""" + return { + "waiting_for_user": self._current_question is not None, + "question": self._current_question + } + +@workflow.signal +def submit_user_response(self, content: str): + """Signal with user's response.""" + self._user_response_queue.append(content) +``` + +5. **Signal dedup and continue_as_new:** +```python +# Signal dedup: use signal ID + seen_signal_ids set +@workflow.signal +def submit_user_response(self, signal_id: str, content: str): + if signal_id in self._seen_signal_ids: + return # Already processed + self._seen_signal_ids.add(signal_id) + self._user_response_queue.append(content) + +# continue_as_new every N steps to bound history +if self._step_count >= 100: + # Pass compacted state to new execution + await workflow.continue_as_new( + args=[objective, scope, self._compacted_checkpoint()] + ) +``` + +**Reference:** Existing workflow at `python-packages/dataing/src/dataing/temporal/workflows/investigation.py:67-183` + +## Acceptance + +- [ ] InvestigationWorkflow uses Rust Investigator (import: `from dataing_investigator import Investigator`) +- [ ] All UUIDs generated via `workflow.uuid4()` +- [ ] Brain step implemented as activity +- [ ] State serialized/restored via JSON snapshots +- [ ] Security validation before tool execution +- [ ] Signal/Query patterns preserved for HITL +- [ ] **Signal dedup strategy documented and implemented (seen_signal_ids)** +- [ ] **continue_as_new at step threshold (100) with checkpoint** +- [ ] Workflow tests pass with deterministic replay + +## Done summary +- Created temporal.py with InvestigatorWorkflow using Rust Investigator +- brain_step activity for pure computation (state machine runs in activity) +- Signal dedup via seen_signal_ids set +- continue_as_new at MAX_STEPS_BEFORE_CONTINUE = 100 +- HITL support via user_response signal and get_status query +- Mock tool implementations for testing +- Conditional temporal exports (requires temporalio optional dependency) + +Verification: +- Core imports: PASS +- Temporal imports: PASS (temporalio 1.20.0 available) +## Evidence +- Commits: +- Tests: Core imports, Temporal imports +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.15.json b/.flow/tasks/fn-17.15.json new file mode 100644 index 000000000..c5db867ce --- /dev/null +++ b/.flow/tasks/fn-17.15.json @@ -0,0 +1,26 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T02:16:20.562555Z", + "created_at": "2026-01-19T01:18:53.007755Z", + "depends_on": [ + "fn-17.9" + ], + "epic": "fn-17", + "evidence": { + "commits": [], + "prs": [], + "tests": [ + "test_investigator.py (17 tests)", + "test_envelope.py (13 tests)", + "test_security.py (25 tests)", + "test_runtime.py (11 tests)" + ] + }, + "id": "fn-17.15", + "priority": null, + "spec_path": ".flow/tasks/fn-17.15.md", + "status": "done", + "title": "Add Python integration tests for bindings", + "updated_at": "2026-01-19T02:21:35.654173Z" +} diff --git a/.flow/tasks/fn-17.15.md b/.flow/tasks/fn-17.15.md new file mode 100644 index 000000000..1443370cb --- /dev/null +++ b/.flow/tasks/fn-17.15.md @@ -0,0 +1,96 @@ +# fn-17.15 Add Python integration tests for bindings + +## Description + +Add integration tests for the Python bindings in `python-packages/investigator/tests/`. + +**Test file structure:** +``` +python-packages/investigator/tests/ +├── __init__.py +├── conftest.py +├── test_investigator.py +├── test_envelope.py +├── test_security.py +└── test_runtime.py +``` + +**test_investigator.py:** +```python +import json +import pytest +from dataing_investigator import Investigator, StateError + +def test_new_investigator(): + inv = Investigator() + state = json.loads(inv.snapshot()) + assert state["phase"]["type"] == "Init" + assert state["step"] == 0 + +def test_start_event(): + inv = Investigator() + event = json.dumps({ + "type": "Start", + "payload": { + "objective": "Test investigation", + "scope": {"user_id": "u1", "tenant_id": "t1", "permissions": [], "extra": {}} + } + }) + intent = json.loads(inv.ingest(event)) + assert intent["type"] == "Call" + assert intent["payload"]["name"] == "get_schema" + +def test_restore_from_snapshot(): + inv1 = Investigator() + # ... advance state ... + snapshot = inv1.snapshot() + inv2 = Investigator.restore(snapshot) + assert inv1.snapshot() == inv2.snapshot() + +def test_invalid_json_raises_error(): + inv = Investigator() + with pytest.raises(StateError): + inv.ingest("not valid json") +``` + +**test_security.py:** +```python +import pytest +from investigator.security import validate_tool_call, SecurityViolation + +def test_forbidden_table_raises(): + scope = {"permissions": ["allowed_table"]} + with pytest.raises(SecurityViolation): + validate_tool_call("query", {"table_name": "forbidden_table"}, scope) + +def test_forbidden_sql_raises(): + scope = {"permissions": []} + with pytest.raises(SecurityViolation): + validate_tool_call("execute", {"query": "DROP TABLE users"}, scope) +``` + +## Acceptance + +- [ ] Test files exist in `python-packages/investigator/tests/` +- [ ] `test_investigator.py` covers new/restore/snapshot/ingest +- [ ] `test_envelope.py` covers wrap/unwrap roundtrip +- [ ] `test_security.py` covers all validation paths +- [ ] `test_runtime.py` covers local runtime with mock executor +- [ ] All tests pass with `just test` +- [ ] No test requires Temporal running + +## Done summary +- Created test directory structure in python-packages/investigator/tests/ +- test_investigator.py: 17 tests covering Investigator basics, events, serialization, errors, full cycle +- test_envelope.py: 13 tests covering wrap/unwrap, trace IDs, child envelopes, serialization +- test_security.py: 25 tests covering tool validation, SQL patterns, create_scope +- test_runtime.py: 11 tests covering LocalInvestigator and run_local +- Added pytest and pytest-asyncio as dev dependencies +- All 74 tests pass + +Verification: +- pytest: 74 passed +## Evidence +- Commits: +- Tests: test_investigator.py (17 tests), test_envelope.py (13 tests), test_security.py (25 tests), test_runtime.py (11 tests) +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.16.json b/.flow/tasks/fn-17.16.json new file mode 100644 index 000000000..8387d5097 --- /dev/null +++ b/.flow/tasks/fn-17.16.json @@ -0,0 +1,25 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T02:21:41.831011Z", + "created_at": "2026-01-19T01:18:53.204267Z", + "depends_on": [ + "fn-17.14" + ], + "epic": "fn-17", + "evidence": { + "commits": [], + "prs": [], + "tests": [ + "TestInvestigatorWorkflowE2E (4 tests)", + "TestBrainStepActivity (2 tests)", + "TestSignalDeduplication (1 test)" + ] + }, + "id": "fn-17.16", + "priority": null, + "spec_path": ".flow/tasks/fn-17.16.md", + "status": "done", + "title": "Add E2E workflow tests", + "updated_at": "2026-01-19T02:24:24.972993Z" +} diff --git a/.flow/tasks/fn-17.16.md b/.flow/tasks/fn-17.16.md new file mode 100644 index 000000000..21cb189c5 --- /dev/null +++ b/.flow/tasks/fn-17.16.md @@ -0,0 +1,107 @@ +# fn-17.16 Add E2E workflow tests + +## Description + +Add end-to-end tests for the Temporal workflow with Rust state machine integration. + +**Test location:** `python-packages/dataing/tests/integration/temporal/test_investigation_workflow.py` + +**Test scenarios:** + +1. **Full investigation lifecycle:** +```python +@pytest.mark.asyncio +async def test_full_investigation_lifecycle(temporal_client, worker): + """Test complete investigation from start to finish.""" + handle = await temporal_client.start_workflow( + InvestigationWorkflow.run, + args=["Test objective", {"user_id": "u1", "tenant_id": "t1", "permissions": ["orders"]}], + id=f"test-{uuid.uuid4()}", + task_queue="test-queue", + ) + + result = await handle.result() + assert "insight" in result +``` + +2. **HITL signal/query flow:** +```python +@pytest.mark.asyncio +async def test_user_response_signal(temporal_client, worker): + """Test human-in-the-loop via signals.""" + handle = await temporal_client.start_workflow(...) + + # Wait for workflow to request user input + status = await handle.query(InvestigationWorkflow.get_status) + while not status["waiting_for_user"]: + await asyncio.sleep(0.1) + status = await handle.query(InvestigationWorkflow.get_status) + + # Send user response via signal + await handle.signal(InvestigationWorkflow.submit_user_response, "User's answer") + + # Verify workflow continues + result = await handle.result() + assert result is not None +``` + +3. **Deterministic replay test:** +```python +@pytest.mark.asyncio +async def test_deterministic_replay(temporal_client, worker): + """Verify workflow replays deterministically.""" + # Run workflow, capture history + # Restart worker, replay from history + # Assert same result +``` + +4. **Cancel signal:** +```python +@pytest.mark.asyncio +async def test_cancel_investigation(temporal_client, worker): + """Test cancellation via signal.""" + handle = await temporal_client.start_workflow(...) + await handle.signal(InvestigationWorkflow.cancel_investigation) + + with pytest.raises(WorkflowFailureError): + await handle.result() +``` + +**Fixtures:** +```python +@pytest.fixture +async def temporal_client(): + return await Client.connect("localhost:7233") + +@pytest.fixture +async def worker(temporal_client): + async with Worker(temporal_client, task_queue="test-queue", workflows=[InvestigationWorkflow], activities=[...]): + yield +``` + +## Acceptance + +- [ ] E2E test file exists in integration tests +- [ ] Full lifecycle test passes +- [ ] HITL signal/query test passes +- [ ] Cancel test passes +- [ ] Tests run against local Temporal server +- [ ] Deterministic replay verified +- [ ] Tests integrated with `just test` (requires Temporal) + +## Done summary +- Created E2E test file in python-packages/dataing/tests/integration/temporal/ +- TestInvestigatorWorkflowE2E: full lifecycle, query status, cancel signal, deterministic replay +- TestBrainStepActivity: unit tests for brain_step activity callable directly +- TestSignalDeduplication: verifies duplicate signal handling +- Tests marked with @pytest.mark.temporal and skip if SKIP_TEMPORAL_TESTS=1 +- 7 tests total, all properly skipped when Temporal not available + +Verification: +- Syntax check: PASS +- Test collection: 7 tests found +- Skip behavior: All 7 skipped (SKIP_TEMPORAL_TESTS=1 default) +## Evidence +- Commits: +- Tests: TestInvestigatorWorkflowE2E (4 tests), TestBrainStepActivity (2 tests), TestSignalDeduplication (1 test) +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.2.json b/.flow/tasks/fn-17.2.json new file mode 100644 index 000000000..cdd47f7a4 --- /dev/null +++ b/.flow/tasks/fn-17.2.json @@ -0,0 +1,25 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T01:51:44.596198Z", + "created_at": "2026-01-19T01:18:50.570588Z", + "depends_on": [ + "fn-17.1" + ], + "epic": "fn-17", + "evidence": { + "commits": [ + "bbc1bb88" + ], + "prs": [], + "tests": [ + "cargo test" + ] + }, + "id": "fn-17.2", + "priority": null, + "spec_path": ".flow/tasks/fn-17.2.md", + "status": "done", + "title": "Implement investigator_core domain types", + "updated_at": "2026-01-19T01:53:07.705150Z" +} diff --git a/.flow/tasks/fn-17.2.md b/.flow/tasks/fn-17.2.md new file mode 100644 index 000000000..90d75e02c --- /dev/null +++ b/.flow/tasks/fn-17.2.md @@ -0,0 +1,67 @@ +# fn-17.2 Implement dataing_investigator domain types + +## Description + +Create `core/crates/dataing_investigator/src/domain.rs` with foundational domain types. + +**Types to implement:** + +```rust +use std::collections::BTreeMap; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct Scope { + pub user_id: String, + pub tenant_id: String, + pub permissions: Vec, + pub extra: BTreeMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum CallKind { + Llm, + Tool, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CallMeta { + pub id: String, + pub name: String, + pub kind: CallKind, + pub phase_context: String, + pub created_at_step: u64, +} +``` + +**Reference:** Existing Python types at `python-packages/dataing/src/dataing/core/domain_types.py` + +## Acceptance + +- [ ] `domain.rs` exists with Scope, CallKind, CallMeta structs +- [ ] All types derive Serialize, Deserialize, Debug, Clone, PartialEq +- [ ] BTreeMap used for ordered extra fields +- [ ] `cargo test` passes (basic serialization roundtrip test) +- [ ] Types exported via lib.rs + +## Done summary +- Created `core/crates/dataing_investigator/src/domain.rs` with Scope, CallKind, CallMeta types +- All types derive Serialize, Deserialize, Debug, Clone, PartialEq +- Scope.extra uses BTreeMap for deterministic serialization order +- Added serde(default) on extra field for forward compatibility +- Exported types via lib.rs with pub use +- Added 5 serialization roundtrip tests + +Why: +- Foundational types needed by protocol (fn-17.3) and state (fn-17.4) +- BTreeMap ensures reproducible JSON for protocol stability + +Verification: +- cargo test: PASS (6 tests) +- cargo clippy --workspace: PASS (no warnings) +## Evidence +- Commits: bbc1bb88 +- Tests: cargo test +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.3.json b/.flow/tasks/fn-17.3.json new file mode 100644 index 000000000..334e6eb58 --- /dev/null +++ b/.flow/tasks/fn-17.3.json @@ -0,0 +1,25 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T01:53:21.451536Z", + "created_at": "2026-01-19T01:18:50.757624Z", + "depends_on": [ + "fn-17.2" + ], + "epic": "fn-17", + "evidence": { + "commits": [ + "9994c1df" + ], + "prs": [], + "tests": [ + "cargo test" + ] + }, + "id": "fn-17.3", + "priority": null, + "spec_path": ".flow/tasks/fn-17.3.md", + "status": "done", + "title": "Implement protocol types (Event, Intent)", + "updated_at": "2026-01-19T01:54:36.888395Z" +} diff --git a/.flow/tasks/fn-17.3.md b/.flow/tasks/fn-17.3.md new file mode 100644 index 000000000..f8427cbdf --- /dev/null +++ b/.flow/tasks/fn-17.3.md @@ -0,0 +1,72 @@ +# fn-17.3 Implement protocol types (Event, Intent) + +## Description + +Create `core/crates/dataing_investigator/src/protocol.rs` with Event and Intent enums for communication between Python runtime and Rust state machine. + +**Event types (input to state machine):** + +```rust +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use crate::domain::{Scope, CallKind}; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "type", content = "payload")] +pub enum Event { + Start { objective: String, scope: Scope }, + CallResult { call_id: String, output: Value }, + UserResponse { content: String }, + Cancel, +} +``` + +**Intent types (output from state machine):** + +```rust +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "type", content = "payload")] +pub enum Intent { + Idle, + Call { + call_id: String, + kind: CallKind, + name: String, + args: Value, + reasoning: String, + }, + RequestUser { question: String }, + Finish { insight: String }, + Error { message: String }, +} +``` + +**Reference:** Existing Python events at `python-packages/dataing/src/dataing/core/state.py:26-58` + +## Acceptance + +- [ ] `protocol.rs` exists with Event and Intent enums +- [ ] Tagged enums for JSON serialization (`#[serde(tag = "type", content = "payload")]`) +- [ ] All variants match blueprint specification +- [ ] JSON roundtrip tests pass +- [ ] Types exported via lib.rs + +## Done summary +- Created `core/crates/dataing_investigator/src/protocol.rs` with Event and Intent enums +- Event variants: Start, CallResult, UserResponse, Cancel +- Intent variants: Idle, Call, RequestUser, Finish, Error +- Tagged enum serialization with `#[serde(tag = "type", content = "payload")]` +- Exported via lib.rs with pub use +- Added 12 serialization roundtrip tests + +Why: +- Forms the wire protocol contract between Python runtime and Rust state machine +- Tagged enums allow explicit type identification in JSON + +Verification: +- cargo test: PASS (17 tests) +- cargo clippy --workspace: PASS +## Evidence +- Commits: 9994c1df +- Tests: cargo test +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.4.json b/.flow/tasks/fn-17.4.json new file mode 100644 index 000000000..eb1ccd07f --- /dev/null +++ b/.flow/tasks/fn-17.4.json @@ -0,0 +1,26 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T01:54:49.849127Z", + "created_at": "2026-01-19T01:18:50.937501Z", + "depends_on": [ + "fn-17.2", + "fn-17.3" + ], + "epic": "fn-17", + "evidence": { + "commits": [ + "a8ee797d" + ], + "prs": [], + "tests": [ + "cargo test" + ] + }, + "id": "fn-17.4", + "priority": null, + "spec_path": ".flow/tasks/fn-17.4.md", + "status": "done", + "title": "Implement state module with Phase enum", + "updated_at": "2026-01-19T01:56:28.411073Z" +} diff --git a/.flow/tasks/fn-17.4.md b/.flow/tasks/fn-17.4.md new file mode 100644 index 000000000..453d092f7 --- /dev/null +++ b/.flow/tasks/fn-17.4.md @@ -0,0 +1,81 @@ +# fn-17.4 Implement state module with Phase enum + +## Description + +Create `core/crates/dataing_investigator/src/state.rs` with State struct and Phase enum. + +**Phase enum (complete list):** + +```rust +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "type", content = "data")] +pub enum Phase { + Init, + GatheringContext { schema_call_id: Option }, + GeneratingHypotheses { llm_call_id: Option }, + EvaluatingHypotheses { pending_call_ids: Vec }, + AwaitingUser { question: String }, + Synthesizing { synthesis_call_id: Option }, + Finished { insight: String }, + Failed { error: String }, +} +``` + +**State struct:** + +```rust +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct State { + pub version: u32, + pub sequence: u64, // For ID generation + pub step: u64, // Logical clock (events ingested) + + pub objective: Option, + pub scope: Option, + pub phase: Phase, + + pub evidence: BTreeMap, + pub call_index: BTreeMap, + pub call_order: Vec, +} + +impl State { + pub fn new() -> Self { ... } + pub fn generate_id(&mut self, prefix: &str) -> String { ... } +} +``` + +**Key design:** +- `step` = logical clock (incremented on each event) +- `sequence` = ID generator (incremented on each `generate_id` call) +- BTreeMap for ordered evidence/call storage + +## Acceptance + +- [ ] `state.rs` exists with State struct and Phase enum +- [ ] Phase has all 8 variants (Init through Failed) +- [ ] State has version, sequence, step fields +- [ ] `generate_id()` increments sequence and returns prefixed ID +- [ ] BTreeMap used for evidence and call_index +- [ ] Serialization roundtrip tests pass + +## Done summary +- Created `core/crates/dataing_investigator/src/state.rs` +- Phase enum with 8 variants: Init, GatheringContext, GeneratingHypotheses, EvaluatingHypotheses, AwaitingUser, Synthesizing, Finished, Failed +- State struct with version, sequence, step fields +- generate_id() method increments sequence and returns prefixed ID +- BTreeMap for evidence and call_index (ordered serialization) +- serde(default) on optional fields for forward compatibility +- Exported via lib.rs with pub use + +Why: +- Core state container for investigation lifecycle +- Versioned snapshots enable resume-from-checkpoint + +Verification: +- cargo test: PASS (26 tests including doc test) +- cargo clippy --workspace: PASS (no warnings) +## Evidence +- Commits: a8ee797d +- Tests: cargo test +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.5.json b/.flow/tasks/fn-17.5.json new file mode 100644 index 000000000..e64b1b667 --- /dev/null +++ b/.flow/tasks/fn-17.5.json @@ -0,0 +1,25 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T01:56:41.924069Z", + "created_at": "2026-01-19T01:18:51.116691Z", + "depends_on": [ + "fn-17.4" + ], + "epic": "fn-17", + "evidence": { + "commits": [ + "4f08b714" + ], + "prs": [], + "tests": [ + "cargo test" + ] + }, + "id": "fn-17.5", + "priority": null, + "spec_path": ".flow/tasks/fn-17.5.md", + "status": "done", + "title": "Implement state machine logic", + "updated_at": "2026-01-19T01:58:48.522561Z" +} diff --git a/.flow/tasks/fn-17.5.md b/.flow/tasks/fn-17.5.md new file mode 100644 index 000000000..e1d28b890 --- /dev/null +++ b/.flow/tasks/fn-17.5.md @@ -0,0 +1,75 @@ +# fn-17.5 Implement state machine logic + +## Description + +Create `core/crates/dataing_investigator/src/machine.rs` with the Investigator struct and state transition logic. + +**Investigator struct:** + +```rust +pub struct Investigator { + state: State, +} + +impl Investigator { + pub fn new() -> Self { Self { state: State::new() } } + pub fn restore(state: State) -> Self { Self { state } } + pub fn snapshot(&self) -> State { self.state.clone() } + + pub fn ingest(&mut self, event: Option) -> Intent { + if let Some(e) = event { + self.state.step += 1; // Increment logical clock + self.apply(e); + } + self.decide() + } + + fn apply(&mut self, event: Event) { ... } + fn decide(&mut self) -> Intent { ... } + fn record_meta(&mut self, id: &str, name: &str, kind: CallKind, ctx: &str) { ... } +} +``` + +**Key transition logic:** +1. `Event::Start` → transition to `GatheringContext` +2. `Event::CallResult` → validate expected call_id, transition to next phase +3. `Event::UserResponse` → exit `AwaitingUser`, continue workflow +4. `Event::Cancel` → transition to `Failed` + +**Strict phase transition enforcement:** +- Only transition when receiving the EXACT call_id that was expected +- **Unexpected call_id → deterministic `Intent::Error` or transition to `Failed` phase (never silent ignore)** +- Return `Intent::Error` for unexpected events + +## Acceptance + +- [ ] `machine.rs` exists with Investigator struct +- [ ] `new()`, `restore()`, `snapshot()` methods work correctly +- [ ] `ingest()` increments logical clock on event +- [ ] `apply()` handles all Event variants +- [ ] `decide()` returns appropriate Intent for each Phase +- [ ] Strict expected_id checks prevent invalid transitions +- [ ] **Unexpected call_id produces deterministic Error/Failed (not silent ignore)** +- [ ] `cargo test` passes all state machine tests + +## Done summary +- Created `core/crates/dataing_investigator/src/machine.rs` +- Investigator struct with new(), restore(), snapshot() methods +- ingest() processes events, increments logical clock, returns intents +- apply() handles all Event variants (Start, CallResult, UserResponse, Cancel) +- decide() returns appropriate Intent for each Phase +- Strict call_id validation: unexpected IDs produce Failed phase + Error intent +- record_meta() tracks call metadata in call_index +- Full workflow test from Init to Finished + +Why: +- Core state machine logic enabling deterministic investigation workflows +- Strict validation ensures protocol correctness + +Verification: +- cargo test: PASS (43 tests including 2 doc tests) +- cargo clippy --workspace: PASS (no warnings) +## Evidence +- Commits: 4f08b714 +- Tests: cargo test +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.6.json b/.flow/tasks/fn-17.6.json new file mode 100644 index 000000000..9255f0600 --- /dev/null +++ b/.flow/tasks/fn-17.6.json @@ -0,0 +1,28 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T01:59:33.846198Z", + "created_at": "2026-01-19T01:18:51.293104Z", + "depends_on": [ + "fn-17.5" + ], + "epic": "fn-17", + "evidence": { + "commits": [ + "4f08b714", + "a8ee797d", + "9994c1df", + "bbc1bb88" + ], + "prs": [], + "tests": [ + "cargo test" + ] + }, + "id": "fn-17.6", + "priority": null, + "spec_path": ".flow/tasks/fn-17.6.md", + "status": "done", + "title": "Add Rust unit tests", + "updated_at": "2026-01-19T01:59:48.056567Z" +} diff --git a/.flow/tasks/fn-17.6.md b/.flow/tasks/fn-17.6.md new file mode 100644 index 000000000..ac6fced28 --- /dev/null +++ b/.flow/tasks/fn-17.6.md @@ -0,0 +1,81 @@ +# fn-17.6 Add Rust unit tests + +## Description + +Add comprehensive unit tests for the dataing_investigator crate in `core/crates/dataing_investigator/src/tests/` or as inline `#[cfg(test)]` modules. + +**Test categories:** + +1. **Domain type tests:** + - Scope serialization roundtrip + - CallKind enum values + - CallMeta with BTreeMap ordering + +2. **Protocol tests:** + - Event variants serialize correctly + - Intent variants serialize correctly + - Tagged enum JSON format verification + +3. **State tests:** + - State::new() defaults + - generate_id() sequence incrementing + - Phase enum coverage + +4. **Machine tests:** + - Full investigation lifecycle (Init → Finished) + - Phase transition guards + - Unexpected event handling + - Cancel during various phases + - AwaitingUser flow + - restore() from snapshot + +**Test patterns:** +```rust +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_investigation_lifecycle() { + let mut inv = Investigator::new(); + // Start + let intent = inv.ingest(Some(Event::Start { ... })); + assert!(matches!(intent, Intent::Call { name, .. } if name == "get_schema")); + // Continue... + } +} +``` + +## Acceptance + +- [ ] Tests exist for all modules (domain, protocol, state, machine) +- [ ] `cargo test` passes with 0 failures +- [ ] Coverage > 80% on core logic +- [ ] Edge cases tested (cancel, invalid transitions, restore) +- [ ] No panics in any test scenario + +## Done summary +Tests were added incrementally with each module implementation: +- domain.rs: 5 tests (serialization, BTreeMap ordering, defaults) +- protocol.rs: 12 tests (Event/Intent variants, roundtrips) +- state.rs: 8 tests (State lifecycle, Phase enum, generate_id) +- machine.rs: 16 tests (full workflow, cancel, invalid transitions, restore) +- 2 doc tests for code examples + +Total: 43 tests covering all modules with comprehensive edge cases. + +Edge cases covered: +- Cancel during various phases +- Unexpected call_id handling +- Start in non-Init phase +- UserResponse in wrong phase +- Restore from snapshot + +Verification: +- cargo test: PASS (43 tests) +- All phases and transitions tested +- No panics in any scenario +## Evidence +- Commits: 4f08b714, a8ee797d, 9994c1df, bbc1bb88 +- Tests: cargo test +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.7.json b/.flow/tasks/fn-17.7.json new file mode 100644 index 000000000..3aceb8bf8 --- /dev/null +++ b/.flow/tasks/fn-17.7.json @@ -0,0 +1,26 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T01:59:55.403560Z", + "created_at": "2026-01-19T01:18:51.482597Z", + "depends_on": [ + "fn-17.1" + ], + "epic": "fn-17", + "evidence": { + "commits": [ + "7b9638d3" + ], + "prs": [], + "tests": [ + "maturin develop --uv", + "cargo test" + ] + }, + "id": "fn-17.7", + "priority": null, + "spec_path": ".flow/tasks/fn-17.7.md", + "status": "done", + "title": "Set up PyO3 bindings crate with Maturin", + "updated_at": "2026-01-19T02:02:01.132542Z" +} diff --git a/.flow/tasks/fn-17.7.md b/.flow/tasks/fn-17.7.md new file mode 100644 index 000000000..ad0fcb342 --- /dev/null +++ b/.flow/tasks/fn-17.7.md @@ -0,0 +1,78 @@ +# fn-17.7 Set up PyO3 bindings crate with Maturin + +## Description + +Configure the PyO3/Maturin binding crate at `core/bindings/python/`. + +**Cargo.toml:** +```toml +[package] +name = "dataing_investigator_py" # Internal cargo name +version = "0.1.0" +edition = "2021" + +[lib] +name = "dataing_investigator" # Python module name +crate-type = ["cdylib"] + +[profile.release] +panic = "unwind" # Required for catch_unwind + +[dependencies] +pyo3 = { version = "0.23", features = ["extension-module", "abi3-py311"] } +serde = "1.0" +serde_json = "1.0" +dataing_investigator = { path = "../../crates/dataing_investigator" } +``` + +**pyproject.toml:** +```toml +[build-system] +requires = ["maturin>=1.7,<2.0"] # Pinned for uv support +build-backend = "maturin" + +[project] +name = "dataing-investigator" +requires-python = ">=3.11" + +[tool.maturin] +bindings = "pyo3" +``` + +**Minimal lib.rs:** +```rust +use pyo3::prelude::*; + +#[pymodule] +fn dataing_investigator(_py: Python, _m: &Bound<'_, PyModule>) -> PyResult<()> { + Ok(()) +} +``` + +## Acceptance + +- [ ] `core/bindings/python/Cargo.toml` configured with cdylib +- [ ] `core/bindings/python/pyproject.toml` uses maturin backend with pinned version (>=1.7) +- [ ] `maturin develop --uv` succeeds in binding directory +- [ ] `python -c "from dataing_investigator import Investigator"` works after build +- [ ] abi3-py311 feature enabled for compatibility +- [ ] `panic = "unwind"` set in release profile + +## Done summary +- PyO3 bindings crate configured at `core/bindings/python/` +- Cargo.toml with cdylib, workspace dependencies +- pyproject.toml with maturin >=1.7, abi3-py311 +- Investigator Python class with new(), restore(), snapshot(), ingest() +- Helper methods: current_phase(), current_step(), __repr__ +- JSON serialization for event/intent communication + +Verification: +- `maturin develop --uv` succeeds +- `from dataing_investigator import Investigator` works +- `Investigator().current_phase()` returns 'init' +- cargo test: PASS (43 tests) +- cargo clippy: PASS +## Evidence +- Commits: 7b9638d3 +- Tests: maturin develop --uv, cargo test +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.8.json b/.flow/tasks/fn-17.8.json new file mode 100644 index 000000000..2f4d73939 --- /dev/null +++ b/.flow/tasks/fn-17.8.json @@ -0,0 +1,28 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T02:02:14.200593Z", + "created_at": "2026-01-19T01:18:51.679901Z", + "depends_on": [ + "fn-17.5", + "fn-17.7" + ], + "epic": "fn-17", + "evidence": { + "commits": [ + "50aa9b99" + ], + "prs": [], + "tests": [ + "maturin develop", + "Python smoke test", + "cargo test" + ] + }, + "id": "fn-17.8", + "priority": null, + "spec_path": ".flow/tasks/fn-17.8.md", + "status": "done", + "title": "Implement panic-free Python wrappers", + "updated_at": "2026-01-19T02:03:51.497124Z" +} diff --git a/.flow/tasks/fn-17.8.md b/.flow/tasks/fn-17.8.md new file mode 100644 index 000000000..fa1e54f65 --- /dev/null +++ b/.flow/tasks/fn-17.8.md @@ -0,0 +1,88 @@ +# fn-17.8 Implement panic-free Python wrappers + +## Description + +Implement PyO3 wrappers in `core/bindings/python/src/lib.rs` that expose the Rust state machine to Python without panics. + +**Investigator class:** +```rust +use pyo3::prelude::*; +use pyo3::exceptions::{PyValueError, PyRuntimeError}; +use dataing_investigator::{machine::Investigator as RustInvestigator, state::State}; + +#[pyclass] +struct Investigator { + inner: RustInvestigator, +} + +#[pymethods] +impl Investigator { + #[new] + fn new() -> Self { + Investigator { inner: RustInvestigator::new() } + } + + #[staticmethod] + fn restore(state_json: String) -> PyResult { + let state: State = serde_json::from_str(&state_json) + .map_err(|e| PyValueError::new_err(format!("Invalid state JSON: {e}")))?; + Ok(Investigator { inner: RustInvestigator::restore(state) }) + } + + fn snapshot(&self) -> PyResult { + serde_json::to_string(&self.inner.snapshot()) + .map_err(|e| PyRuntimeError::new_err(format!("Snapshot failed: {e}"))) + } + + fn ingest(&mut self, event_json: Option) -> PyResult { + // Parse event, call inner.ingest(), serialize intent + // All errors return PyResult, no panics + } +} +``` + +**Custom exceptions:** +```rust +use pyo3::create_exception; + +create_exception!(dataing_investigator, StateError, pyo3::exceptions::PyException); +create_exception!(dataing_investigator, InvalidTransitionError, StateError); +create_exception!(dataing_investigator, SerializationError, StateError); +``` + +**Key requirements:** +- All errors returned via `PyResult`, never panic +- JSON strings for FFI boundary (simple, debuggable) +- Exception hierarchy for Python error handling + +## Acceptance + +- [ ] `Investigator` class exposed with new/restore/snapshot/ingest methods +- [ ] Custom exceptions defined and exported +- [ ] All error paths return `PyResult::Err`, no panics +- [ ] JSON strings used for state/event/intent serialization +- [ ] `maturin develop` builds successfully +- [ ] Basic Python smoke test passes + +## Done summary +- Custom exceptions: StateError, SerializationError, InvalidTransitionError +- catch_unwind at FFI boundary catches any Rust panics +- All error paths return PyResult::Err, never panic +- JSON strings for state/event/intent serialization +- Added is_terminal() helper method +- Full docstrings with Args/Returns/Raises sections + +Python smoke test passed: +- Investigator lifecycle (new, ingest, snapshot, restore) +- Exception handling (SerializationError for bad JSON) +- Exception hierarchy (SerializationError extends StateError) + +Verification: +- maturin develop: PASS +- Python smoke test: PASS +- cargo test: PASS (43 tests) +- cargo clippy: PASS +## Evidence +- Commits: 50aa9b99 +- Tests: maturin develop, Python smoke test, cargo test +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-17.9.json b/.flow/tasks/fn-17.9.json new file mode 100644 index 000000000..664e7fd8b --- /dev/null +++ b/.flow/tasks/fn-17.9.json @@ -0,0 +1,27 @@ +{ + "assignee": "bordumbb@gmail.com", + "claim_note": "", + "claimed_at": "2026-01-19T02:04:05.936382Z", + "created_at": "2026-01-19T01:18:51.857586Z", + "depends_on": [ + "fn-17.8" + ], + "epic": "fn-17", + "evidence": { + "commits": [ + "65b0b7fa" + ], + "prs": [], + "tests": [ + "just rust-dev", + "uv sync", + "Python import" + ] + }, + "id": "fn-17.9", + "priority": null, + "spec_path": ".flow/tasks/fn-17.9.md", + "status": "done", + "title": "Integrate Rust bindings with uv workspace", + "updated_at": "2026-01-19T02:06:10.698820Z" +} diff --git a/.flow/tasks/fn-17.9.md b/.flow/tasks/fn-17.9.md new file mode 100644 index 000000000..4370d406f --- /dev/null +++ b/.flow/tasks/fn-17.9.md @@ -0,0 +1,82 @@ +# fn-17.9 Integrate Rust bindings with uv workspace + +## Description + +Integrate the Maturin-built Rust bindings into the existing uv workspace so `dataing-investigator` is available to other Python packages. + +**Update root pyproject.toml:** +```toml +[tool.uv.workspace] +members = ["python-packages/*", "core/bindings/python"] + +[tool.uv.sources] +dataing-investigator = { path = "core/bindings/python", editable = true } +``` + +**Pin maturin version in bindings pyproject.toml:** +```toml +[build-system] +requires = ["maturin>=1.7,<2.0"] # Pin to version with uv support +build-backend = "maturin" +``` + +**Update Justfile:** +```just +# Prerequisites check +rust-check: + @command -v cargo >/dev/null || (echo "Install Rust: rustup.rs" && exit 1) + @command -v maturin >/dev/null || (echo "Install maturin: pip install maturin>=1.7" && exit 1) + +# Build Rust bindings +rust-build: rust-check + cd core && cargo build --release + +# Develop Rust bindings (install to venv) +rust-dev: rust-check + cd core/bindings/python && maturin develop --uv + +# Full setup including Rust +setup: rust-build + uv sync + cd core/bindings/python && maturin develop --uv +``` + +**Cache keys for uv:** +```toml +[tool.uv] +cache-keys = [ + { file = "pyproject.toml" }, + { file = "uv.lock" }, + { file = "core/Cargo.toml" }, + { file = "core/**/*.rs" } +] +``` + +## Acceptance + +- [ ] `core/bindings/python` listed in uv workspace members +- [ ] `dataing-investigator` source configured in `[tool.uv.sources]` +- [ ] **Maturin version pinned (>=1.7) in build-system requires** +- [ ] `just rust-dev` builds and installs bindings +- [ ] `uv sync` works with Rust binding in workspace +- [ ] `python -c "from dataing_investigator import Investigator"` works from project root venv +- [ ] Justfile updated with Rust build commands +- [ ] **Verified: pinned maturin version supports uv integration** + +## Done summary +- Added Rust commands to Justfile: rust-check, rust-build, rust-dev, rust-test, rust-lint, rust-clean +- Updated `just setup` to run `just rust-dev` after uv sync +- Updated `just clean` to include rust-clean +- Maturin version pinned at >=1.7,<2.0 (confirmed uv support) + +Note: dataing-investigator requires maturin build, cannot be a uv source directly. +Workflow is: `uv sync` then `just rust-dev` (done via `just setup`). + +Verification: +- just rust-dev: PASS (builds and installs to venv) +- uv sync: PASS +- Python import: PASS +## Evidence +- Commits: 65b0b7fa +- Tests: just rust-dev, uv sync, Python import +- PRs: \ No newline at end of file diff --git a/.gitignore b/.gitignore index 46431fcca..cc80691f7 100644 --- a/.gitignore +++ b/.gitignore @@ -165,6 +165,12 @@ chainlit.md /investigations/ *.investigation.json +# Performance benchmark data +tests/performance/.temporal/ +tests/performance/results.json +tests/performance/results.md +benchmarks/ + ############################ # Helm / Kubernetes ############################ diff --git a/core/.gitignore b/core/.gitignore new file mode 100644 index 000000000..b83d22266 --- /dev/null +++ b/core/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/core/Cargo.lock b/core/Cargo.lock new file mode 100644 index 000000000..8fbb5192d --- /dev/null +++ b/core/Cargo.lock @@ -0,0 +1,275 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "dataing_investigator" +version = "0.1.0" +dependencies = [ + "pretty_assertions", + "serde", + "serde_json", +] + +[[package]] +name = "dataing_investigator_py" +version = "0.1.0" +dependencies = [ + "dataing_investigator", + "pyo3", + "serde", + "serde_json", +] + +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "libc" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "portable-atomic" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" + +[[package]] +name = "pretty_assertions" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" +dependencies = [ + "diff", + "yansi", +] + +[[package]] +name = "proc-macro2" +version = "1.0.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "syn" +version = "2.0.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "zmij" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f63c051f4fe3c1509da62131a678643c5b6fbdc9273b2b79d4378ebda003d2" diff --git a/core/Cargo.toml b/core/Cargo.toml new file mode 100644 index 000000000..e930db434 --- /dev/null +++ b/core/Cargo.toml @@ -0,0 +1,18 @@ +[workspace] +members = ["crates/dataing_investigator", "bindings/python"] +resolver = "2" + +[workspace.package] +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" +repository = "https://github.com/bordumb/dataing" + +[workspace.dependencies] +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +pyo3 = { version = "0.23", features = ["extension-module", "abi3-py311"] } + +# Required for catch_unwind at FFI boundary +[profile.release] +panic = "unwind" diff --git a/core/bindings/python/Cargo.toml b/core/bindings/python/Cargo.toml new file mode 100644 index 000000000..3348eb08b --- /dev/null +++ b/core/bindings/python/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "dataing_investigator_py" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +description = "Python bindings for dataing_investigator" + +[lib] +name = "dataing_investigator" +crate-type = ["cdylib"] + +[dependencies] +pyo3.workspace = true +serde.workspace = true +serde_json.workspace = true +dataing_investigator = { path = "../../crates/dataing_investigator" } diff --git a/core/bindings/python/pyproject.toml b/core/bindings/python/pyproject.toml new file mode 100644 index 000000000..ed418656c --- /dev/null +++ b/core/bindings/python/pyproject.toml @@ -0,0 +1,18 @@ +[build-system] +requires = ["maturin>=1.7,<2.0"] +build-backend = "maturin" + +[project] +name = "dataing-investigator" +requires-python = ">=3.11" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dynamic = ["version"] + +[tool.maturin] +bindings = "pyo3" +features = ["pyo3/extension-module", "pyo3/abi3-py311"] diff --git a/core/bindings/python/src/lib.rs b/core/bindings/python/src/lib.rs new file mode 100644 index 000000000..2f06d84c0 --- /dev/null +++ b/core/bindings/python/src/lib.rs @@ -0,0 +1,229 @@ +//! Python bindings for dataing_investigator. +//! +//! This module exposes the Rust state machine to Python via PyO3. +//! All functions use panic-free error handling via `PyResult`. +//! +//! # Error Handling +//! +//! Custom exceptions are provided for fine-grained error handling: +//! - `StateError`: Base exception for all state machine errors +//! - `SerializationError`: JSON serialization/deserialization failures +//! - `InvalidTransitionError`: Invalid state transitions +//! - `ProtocolMismatchError`: Protocol version mismatch +//! - `DuplicateEventError`: Duplicate event ID (idempotent, not an error in practice) +//! - `StepViolationError`: Step not monotonically increasing +//! - `UnexpectedCallError`: Unexpected call_id received +//! +//! # Panic Safety +//! +//! The `panic = "unwind"` profile setting and `catch_unwind` ensure +//! that any unexpected Rust panic is caught and converted to a Python +//! exception rather than crashing the interpreter. + +use pyo3::prelude::*; +use std::panic::{catch_unwind, AssertUnwindSafe}; + +// Import the core crate (renamed to avoid conflict with pymodule name) +use ::dataing_investigator as core; + +// Custom exceptions for Python error handling +pyo3::create_exception!(dataing_investigator, StateError, pyo3::exceptions::PyException); +pyo3::create_exception!(dataing_investigator, SerializationError, StateError); +pyo3::create_exception!(dataing_investigator, InvalidTransitionError, StateError); +pyo3::create_exception!(dataing_investigator, ProtocolMismatchError, StateError); +pyo3::create_exception!(dataing_investigator, DuplicateEventError, StateError); +pyo3::create_exception!(dataing_investigator, StepViolationError, StateError); +pyo3::create_exception!(dataing_investigator, UnexpectedCallError, StateError); +pyo3::create_exception!(dataing_investigator, InvariantError, StateError); + +/// Returns the protocol version used by the state machine. +#[pyfunction] +fn protocol_version() -> u32 { + core::PROTOCOL_VERSION +} + +/// Python wrapper for the Rust Investigator state machine. +/// +/// This class provides a panic-safe interface to the Rust state machine. +/// All methods return Python exceptions on error, never panic. +#[pyclass] +pub struct Investigator { + inner: core::Investigator, +} + +#[pymethods] +impl Investigator { + /// Create a new Investigator in initial state. + #[new] + fn new() -> Self { + Investigator { + inner: core::Investigator::new(), + } + } + + /// Restore an Investigator from a JSON state snapshot. + /// + /// Args: + /// state_json: JSON string of a previously saved state snapshot + /// + /// Returns: + /// Investigator restored to the saved state + /// + /// Raises: + /// SerializationError: If the JSON is invalid or doesn't match schema + #[staticmethod] + fn restore(state_json: &str) -> PyResult { + let state: core::State = serde_json::from_str(state_json) + .map_err(|e| SerializationError::new_err(format!("Invalid state JSON: {}", e)))?; + Ok(Investigator { + inner: core::Investigator::restore(state), + }) + } + + /// Get a JSON snapshot of the current state. + /// + /// Returns: + /// JSON string that can be used with `restore()` + /// + /// Raises: + /// SerializationError: If serialization fails (should never happen) + fn snapshot(&self) -> PyResult { + let state = self.inner.snapshot(); + serde_json::to_string(&state) + .map_err(|e| SerializationError::new_err(format!("Snapshot serialization failed: {}", e))) + } + + /// Process an event envelope and return the next intent. + /// + /// This is the main entry point for interacting with the state machine. + /// The envelope must include protocol_version, event_id, step, and event. + /// + /// Args: + /// envelope_json: JSON string of the envelope containing the event + /// + /// Returns: + /// JSON string of the resulting intent + /// + /// Raises: + /// SerializationError: If envelope JSON is invalid or intent serialization fails + /// ProtocolMismatchError: If protocol version doesn't match + /// StepViolationError: If step is not monotonically increasing + /// InvalidTransitionError: If the event causes an invalid state transition + /// UnexpectedCallError: If an unexpected call_id is received + fn ingest(&mut self, envelope_json: &str) -> PyResult { + // Parse envelope + let envelope: core::Envelope = serde_json::from_str(envelope_json) + .map_err(|e| SerializationError::new_err(format!("Invalid envelope JSON: {}", e)))?; + + // Use catch_unwind for panic safety at FFI boundary + let result = catch_unwind(AssertUnwindSafe(|| { + self.inner.ingest(envelope) + })); + + let intent_result = match result { + Ok(r) => r, + Err(_) => { + return Err(StateError::new_err("Internal error: Rust panic caught at FFI boundary")); + } + }; + + // Convert MachineError to appropriate Python exception + let intent = match intent_result { + Ok(i) => i, + Err(e) => { + let msg = e.to_string(); + return Err(match e.kind { + core::ErrorKind::InvalidTransition => InvalidTransitionError::new_err(msg), + core::ErrorKind::Serialization => SerializationError::new_err(msg), + core::ErrorKind::ProtocolMismatch => ProtocolMismatchError::new_err(msg), + core::ErrorKind::DuplicateEvent => DuplicateEventError::new_err(msg), + core::ErrorKind::StepViolation => StepViolationError::new_err(msg), + core::ErrorKind::UnexpectedCall => UnexpectedCallError::new_err(msg), + core::ErrorKind::Invariant => InvariantError::new_err(msg), + }); + } + }; + + serde_json::to_string(&intent) + .map_err(|e| SerializationError::new_err(format!("Intent serialization failed: {}", e))) + } + + /// Query the current intent without providing an event. + /// + /// Useful for getting the initial intent or checking state without + /// advancing the state machine. + /// + /// Returns: + /// JSON string of the current intent + /// + /// Raises: + /// SerializationError: If intent serialization fails + fn query(&self) -> PyResult { + let intent = self.inner.query(); + serde_json::to_string(&intent) + .map_err(|e| SerializationError::new_err(format!("Intent serialization failed: {}", e))) + } + + /// Get the current phase as a string. + /// + /// Returns one of: 'init', 'gathering_context', 'generating_hypotheses', + /// 'evaluating_hypotheses', 'awaiting_user', 'synthesizing', 'finished', 'failed' + fn current_phase(&self) -> String { + let state = self.inner.snapshot(); + match &state.phase { + core::Phase::Init => "init".to_string(), + core::Phase::GatheringContext { .. } => "gathering_context".to_string(), + core::Phase::GeneratingHypotheses { .. } => "generating_hypotheses".to_string(), + core::Phase::EvaluatingHypotheses { .. } => "evaluating_hypotheses".to_string(), + core::Phase::AwaitingUser { .. } => "awaiting_user".to_string(), + core::Phase::Synthesizing { .. } => "synthesizing".to_string(), + core::Phase::Finished { .. } => "finished".to_string(), + core::Phase::Failed { .. } => "failed".to_string(), + } + } + + /// Get the current step (logical clock value). + /// + /// The step is owned by the workflow and validated for monotonicity. + fn current_step(&self) -> u64 { + self.inner.current_step() + } + + /// Check if the investigation is in a terminal state. + /// + /// Returns True if phase is 'finished' or 'failed'. + fn is_terminal(&self) -> bool { + self.inner.is_terminal() + } + + /// Get string representation. + fn __repr__(&self) -> String { + format!( + "Investigator(phase='{}', step={})", + self.current_phase(), + self.current_step() + ) + } +} + +/// Python module for dataing_investigator. +#[pymodule] +fn dataing_investigator(m: &Bound<'_, PyModule>) -> PyResult<()> { + // Add functions + m.add_function(wrap_pyfunction!(protocol_version, m)?)?; + + // Add classes + m.add_class::()?; + + // Add exceptions + m.add("StateError", m.py().get_type::())?; + m.add("SerializationError", m.py().get_type::())?; + m.add("InvalidTransitionError", m.py().get_type::())?; + m.add("ProtocolMismatchError", m.py().get_type::())?; + m.add("DuplicateEventError", m.py().get_type::())?; + m.add("StepViolationError", m.py().get_type::())?; + m.add("UnexpectedCallError", m.py().get_type::())?; + m.add("InvariantError", m.py().get_type::())?; + + Ok(()) +} diff --git a/core/crates/dataing_investigator/Cargo.toml b/core/crates/dataing_investigator/Cargo.toml new file mode 100644 index 000000000..d9af0f4a3 --- /dev/null +++ b/core/crates/dataing_investigator/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "dataing_investigator" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +description = "Rust state machine for data quality investigations" + +[dependencies] +serde.workspace = true +serde_json.workspace = true + +[dev-dependencies] +pretty_assertions = "1.4" + +[lints.clippy] +unwrap_used = "deny" +expect_used = "deny" +panic = "deny" diff --git a/core/crates/dataing_investigator/src/domain.rs b/core/crates/dataing_investigator/src/domain.rs new file mode 100644 index 000000000..7ca56e41e --- /dev/null +++ b/core/crates/dataing_investigator/src/domain.rs @@ -0,0 +1,137 @@ +//! Domain types for data quality investigations. +//! +//! Foundational types used across the investigation state machine. +//! All types are serializable with serde for protocol stability. + +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::BTreeMap; + +/// Security scope for an investigation. +/// +/// Contains identity and permission information for access control. +/// Uses BTreeMap for deterministic serialization order. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct Scope { + /// User identifier. + pub user_id: String, + /// Tenant identifier for multi-tenancy. + pub tenant_id: String, + /// List of permission strings. + pub permissions: Vec, + /// Additional fields for forward compatibility. + #[serde(default)] + pub extra: BTreeMap, +} + +/// Kind of external call being tracked. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum CallKind { + /// LLM inference call. + Llm, + /// Tool invocation (SQL query, API call, etc.). + Tool, +} + +/// Metadata about a pending external call. +/// +/// Tracks calls that have been initiated but not yet completed, +/// enabling resume-from-snapshot capability. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CallMeta { + /// Unique identifier for this call. + pub id: String, + /// Human-readable name of the call. + pub name: String, + /// Kind of call (LLM or Tool). + pub kind: CallKind, + /// Phase context when call was initiated. + pub phase_context: String, + /// Step number when call was created. + pub created_at_step: u64, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_scope_serialization_roundtrip() { + let mut extra = BTreeMap::new(); + extra.insert("custom_field".to_string(), Value::Bool(true)); + + let scope = Scope { + user_id: "user123".to_string(), + tenant_id: "tenant456".to_string(), + permissions: vec!["read".to_string(), "write".to_string()], + extra, + }; + + let json = serde_json::to_string(&scope).expect("serialize"); + let deserialized: Scope = serde_json::from_str(&json).expect("deserialize"); + + assert_eq!(scope, deserialized); + } + + #[test] + fn test_scope_extra_defaults_to_empty() { + let json = r#"{"user_id":"u","tenant_id":"t","permissions":[]}"#; + let scope: Scope = serde_json::from_str(json).expect("deserialize"); + + assert!(scope.extra.is_empty()); + } + + #[test] + fn test_call_kind_serialization() { + let llm = CallKind::Llm; + let tool = CallKind::Tool; + + assert_eq!(serde_json::to_string(&llm).expect("ser"), "\"llm\""); + assert_eq!(serde_json::to_string(&tool).expect("ser"), "\"tool\""); + + let llm_deser: CallKind = serde_json::from_str("\"llm\"").expect("deser"); + let tool_deser: CallKind = serde_json::from_str("\"tool\"").expect("deser"); + + assert_eq!(llm_deser, CallKind::Llm); + assert_eq!(tool_deser, CallKind::Tool); + } + + #[test] + fn test_call_meta_serialization_roundtrip() { + let meta = CallMeta { + id: "call_001".to_string(), + name: "generate_hypotheses".to_string(), + kind: CallKind::Llm, + phase_context: "hypothesis_generation".to_string(), + created_at_step: 5, + }; + + let json = serde_json::to_string(&meta).expect("serialize"); + let deserialized: CallMeta = serde_json::from_str(&json).expect("deserialize"); + + assert_eq!(meta, deserialized); + } + + #[test] + fn test_btreemap_ordering() { + // BTreeMap ensures deterministic serialization order + let mut extra = BTreeMap::new(); + extra.insert("zebra".to_string(), Value::String("z".to_string())); + extra.insert("alpha".to_string(), Value::String("a".to_string())); + extra.insert("beta".to_string(), Value::String("b".to_string())); + + let scope = Scope { + user_id: "u".to_string(), + tenant_id: "t".to_string(), + permissions: vec![], + extra, + }; + + let json = serde_json::to_string(&scope).expect("serialize"); + // BTreeMap should order keys alphabetically + assert!(json.contains(r#""alpha":"a""#)); + assert!(json.find("alpha").expect("alpha") < json.find("beta").expect("beta")); + assert!(json.find("beta").expect("beta") < json.find("zebra").expect("zebra")); + } +} diff --git a/core/crates/dataing_investigator/src/lib.rs b/core/crates/dataing_investigator/src/lib.rs new file mode 100644 index 000000000..a1cb0e046 --- /dev/null +++ b/core/crates/dataing_investigator/src/lib.rs @@ -0,0 +1,43 @@ +//! Rust state machine for data quality investigations. +//! +//! This crate provides a deterministic, event-sourced state machine +//! for managing investigation workflows. It is designed to be: +//! +//! - **Total**: All state transitions are explicit; illegal transitions become errors +//! - **Deterministic**: Same events always produce the same state +//! - **Serializable**: State snapshots are versioned and backwards-compatible +//! - **Side-effect free**: All side effects happen outside the state machine +//! +//! # Protocol Stability +//! +//! The Event/Intent JSON format is a contract. Changes must be backwards-compatible: +//! - New fields use `#[serde(default)]` for forward compatibility +//! - Existing fields are never renamed without migration +//! - Protocol version is included in all snapshots + +#![deny(clippy::unwrap_used, clippy::expect_used, clippy::panic)] + +/// Current protocol version for state snapshots. +/// Increment when making breaking changes to serialization format. +pub const PROTOCOL_VERSION: u32 = 1; + +pub mod domain; +pub mod machine; +pub mod protocol; +pub mod state; + +// Re-export types for convenience +pub use domain::{CallKind, CallMeta, Scope}; +pub use machine::Investigator; +pub use protocol::{Envelope, ErrorKind, Event, Intent, MachineError}; +pub use state::{phase_name, PendingCall, Phase, State}; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_protocol_version() { + assert_eq!(PROTOCOL_VERSION, 1); + } +} diff --git a/core/crates/dataing_investigator/src/machine.rs b/core/crates/dataing_investigator/src/machine.rs new file mode 100644 index 000000000..4d734ca50 --- /dev/null +++ b/core/crates/dataing_investigator/src/machine.rs @@ -0,0 +1,944 @@ +//! State machine for investigation workflow. +//! +//! The Investigator struct manages state transitions based on events +//! and produces intents for the runtime to execute. +//! +//! # Design Principles +//! +//! - **Total**: All state transitions are explicit; illegal transitions produce errors +//! - **Deterministic**: Same events always produce the same state +//! - **Side-effect free**: All side effects happen outside the state machine +//! - **Workflow owns IDs**: The machine never generates call_ids or question_ids +//! +//! # Call Scheduling Handshake +//! +//! When the machine needs to make an external call: +//! 1. Machine emits `Intent::RequestCall { name, kind, args, reasoning }` +//! 2. Workflow generates a call_id and sends `Event::CallScheduled { call_id, name }` +//! 3. Machine stores the call_id and returns `Intent::Idle` +//! 4. Workflow executes the call and sends `Event::CallResult { call_id, output }` +//! 5. Machine processes the result and advances + +use serde_json::{json, Value}; + +use crate::domain::{CallKind, CallMeta}; +use crate::protocol::{Envelope, ErrorKind, Event, Intent, MachineError}; +use crate::state::{phase_name, PendingCall, Phase, State}; +use crate::PROTOCOL_VERSION; + +/// Investigation state machine. +/// +/// Manages the investigation workflow by processing events and +/// producing intents. All state is contained within the struct +/// and can be serialized/restored for checkpointing. +/// +/// # Example +/// +/// ``` +/// use dataing_investigator::machine::Investigator; +/// use dataing_investigator::protocol::{Envelope, Event, Intent}; +/// use dataing_investigator::domain::Scope; +/// use std::collections::BTreeMap; +/// +/// let mut inv = Investigator::new(); +/// +/// // Start investigation with envelope +/// let envelope = Envelope { +/// protocol_version: 1, +/// event_id: "evt_001".to_string(), +/// step: 1, +/// event: Event::Start { +/// objective: "Find null spike".to_string(), +/// scope: Scope { +/// user_id: "u1".to_string(), +/// tenant_id: "t1".to_string(), +/// permissions: vec![], +/// extra: BTreeMap::new(), +/// }, +/// }, +/// }; +/// +/// let result = inv.ingest(envelope); +/// assert!(result.is_ok()); +/// +/// // Returns intent to request a call (no call_id yet) +/// match result.unwrap() { +/// Intent::RequestCall { name, .. } => assert_eq!(name, "get_schema"), +/// _ => panic!("Expected RequestCall intent"), +/// } +/// ``` +#[derive(Debug, Clone)] +pub struct Investigator { + state: State, +} + +impl Default for Investigator { + fn default() -> Self { + Self::new() + } +} + +impl Investigator { + /// Create a new investigator in initial state. + #[must_use] + pub fn new() -> Self { + Self { + state: State::new(), + } + } + + /// Restore an investigator from a saved state snapshot. + #[must_use] + pub fn restore(state: State) -> Self { + Self { state } + } + + /// Get a clone of the current state for persistence. + #[must_use] + pub fn snapshot(&self) -> State { + self.state.clone() + } + + /// Get the current phase name. + #[must_use] + pub fn current_phase(&self) -> &'static str { + phase_name(&self.state.phase) + } + + /// Get the current step. + #[must_use] + pub fn current_step(&self) -> u64 { + self.state.step + } + + /// Check if in a terminal state. + #[must_use] + pub fn is_terminal(&self) -> bool { + self.state.is_terminal() + } + + /// Process an event envelope and return the next intent. + /// + /// Validates: + /// - Protocol version matches + /// - Event ID is not a duplicate + /// - Step is monotonically increasing + /// + /// On success, applies the event and returns the next intent. + /// On error, returns a typed MachineError for retry decisions. + pub fn ingest(&mut self, envelope: Envelope) -> Result { + // Validate protocol version + if envelope.protocol_version != PROTOCOL_VERSION { + return Err(MachineError::new( + ErrorKind::ProtocolMismatch, + format!( + "Expected protocol version {}, got {}", + PROTOCOL_VERSION, envelope.protocol_version + ), + ) + .with_step(envelope.step)); + } + + // Check for duplicate event + if self.state.is_duplicate_event(&envelope.event_id) { + // Silently return current intent (idempotency) + return Ok(self.decide()); + } + + // Validate step monotonicity (must be > current step) + if envelope.step <= self.state.step { + return Err(MachineError::new( + ErrorKind::StepViolation, + format!( + "Step {} is not greater than current step {}", + envelope.step, self.state.step + ), + ) + .with_phase(self.current_phase()) + .with_step(envelope.step)); + } + + // Mark event as processed and update step + self.state.mark_event_processed(envelope.event_id); + self.state.set_step(envelope.step); + + // Apply the event + self.apply(envelope.event)?; + + // Return the next intent + Ok(self.decide()) + } + + /// Query the current intent without providing an event. + /// + /// Useful for getting the initial intent or checking state. + #[must_use] + pub fn query(&self) -> Intent { + // Create a temporary clone to avoid mutating state + let mut temp = self.clone(); + temp.decide() + } + + /// Apply an event to update the state. + fn apply(&mut self, event: Event) -> Result<(), MachineError> { + match event { + Event::Start { objective, scope } => self.apply_start(objective, scope), + Event::CallScheduled { call_id, name } => self.apply_call_scheduled(&call_id, &name), + Event::CallResult { call_id, output } => self.apply_call_result(&call_id, output), + Event::UserResponse { + question_id, + content, + } => self.apply_user_response(&question_id, &content), + Event::Cancel => { + self.apply_cancel(); + Ok(()) + } + } + } + + /// Apply Start event. + fn apply_start( + &mut self, + objective: String, + scope: crate::domain::Scope, + ) -> Result<(), MachineError> { + match &self.state.phase { + Phase::Init => { + self.state.objective = Some(objective); + self.state.scope = Some(scope); + self.state.phase = Phase::GatheringContext { + pending: None, + call_id: None, + }; + Ok(()) + } + _ => Err(MachineError::new( + ErrorKind::InvalidTransition, + format!( + "Received Start event in phase {}", + self.current_phase() + ), + ) + .with_phase(self.current_phase()) + .with_step(self.state.step)), + } + } + + /// Apply CallScheduled event (workflow assigned a call_id). + fn apply_call_scheduled(&mut self, call_id: &str, name: &str) -> Result<(), MachineError> { + match &self.state.phase { + Phase::GatheringContext { + pending: Some(pending), + call_id: None, + } if pending.awaiting_schedule && pending.name == name => { + // Record the call metadata + self.record_meta(call_id, name, CallKind::Tool, "gathering_context"); + self.state.phase = Phase::GatheringContext { + pending: None, + call_id: Some(call_id.to_string()), + }; + Ok(()) + } + Phase::GeneratingHypotheses { + pending: Some(pending), + call_id: None, + } if pending.awaiting_schedule && pending.name == name => { + self.record_meta(call_id, name, CallKind::Llm, "generating_hypotheses"); + self.state.phase = Phase::GeneratingHypotheses { + pending: None, + call_id: Some(call_id.to_string()), + }; + Ok(()) + } + Phase::EvaluatingHypotheses { + pending: Some(pending), + awaiting_results, + total_hypotheses, + completed, + } if pending.awaiting_schedule && pending.name == name => { + // Clone values before mutable operations to satisfy borrow checker + let mut new_awaiting = awaiting_results.clone(); + new_awaiting.push(call_id.to_string()); + let total = *total_hypotheses; + let done = *completed; + self.record_meta(call_id, name, CallKind::Tool, "evaluating_hypotheses"); + self.state.phase = Phase::EvaluatingHypotheses { + pending: None, + awaiting_results: new_awaiting, + total_hypotheses: total, + completed: done, + }; + Ok(()) + } + Phase::Synthesizing { + pending: Some(pending), + call_id: None, + } if pending.awaiting_schedule && pending.name == name => { + self.record_meta(call_id, name, CallKind::Llm, "synthesizing"); + self.state.phase = Phase::Synthesizing { + pending: None, + call_id: Some(call_id.to_string()), + }; + Ok(()) + } + _ => Err(MachineError::new( + ErrorKind::UnexpectedCall, + format!( + "Unexpected CallScheduled(call_id={}, name={}) in phase {}", + call_id, + name, + self.current_phase() + ), + ) + .with_phase(self.current_phase()) + .with_step(self.state.step)), + } + } + + /// Apply CallResult event. + fn apply_call_result(&mut self, call_id: &str, output: Value) -> Result<(), MachineError> { + match &self.state.phase { + Phase::GatheringContext { + pending: None, + call_id: Some(expected), + } if call_id == expected => { + // Store schema in evidence + self.state + .evidence + .insert("schema".to_string(), output.clone()); + self.state.call_order.push(call_id.to_string()); + // Transition to hypothesis generation + self.state.phase = Phase::GeneratingHypotheses { + pending: None, + call_id: None, + }; + Ok(()) + } + Phase::GeneratingHypotheses { + pending: None, + call_id: Some(expected), + } if call_id == expected => { + // Store hypotheses in evidence + self.state + .evidence + .insert("hypotheses".to_string(), output.clone()); + self.state.call_order.push(call_id.to_string()); + // Count hypotheses for evaluation + let hypothesis_count = output.as_array().map(|a| a.len()).unwrap_or(0); + // Transition to evaluating hypotheses + self.state.phase = Phase::EvaluatingHypotheses { + pending: None, + awaiting_results: vec![], + total_hypotheses: hypothesis_count, + completed: 0, + }; + Ok(()) + } + Phase::EvaluatingHypotheses { + pending: None, + awaiting_results, + total_hypotheses, + completed, + } if awaiting_results.contains(&call_id.to_string()) => { + // Store evidence for this evaluation + self.state + .evidence + .insert(format!("eval_{}", call_id), output.clone()); + self.state.call_order.push(call_id.to_string()); + + // Remove from awaiting + let mut new_awaiting = awaiting_results.clone(); + new_awaiting.retain(|id| id != call_id); + let new_completed = completed + 1; + + if new_completed >= *total_hypotheses && new_awaiting.is_empty() { + // All evaluations complete, move to synthesis + self.state.phase = Phase::Synthesizing { + pending: None, + call_id: None, + }; + } else { + self.state.phase = Phase::EvaluatingHypotheses { + pending: None, + awaiting_results: new_awaiting, + total_hypotheses: *total_hypotheses, + completed: new_completed, + }; + } + Ok(()) + } + Phase::Synthesizing { + pending: None, + call_id: Some(expected), + } if call_id == expected => { + self.state.call_order.push(call_id.to_string()); + // Extract insight from output + let insight = output + .get("insight") + .and_then(|v| v.as_str()) + .unwrap_or("Investigation complete") + .to_string(); + self.state.phase = Phase::Finished { insight }; + Ok(()) + } + _ => Err(MachineError::new( + ErrorKind::UnexpectedCall, + format!( + "Unexpected CallResult(call_id={}) in phase {}", + call_id, + self.current_phase() + ), + ) + .with_phase(self.current_phase()) + .with_step(self.state.step)), + } + } + + /// Apply UserResponse event. + fn apply_user_response( + &mut self, + question_id: &str, + content: &str, + ) -> Result<(), MachineError> { + match &self.state.phase { + Phase::AwaitingUser { + question_id: expected, + .. + } if question_id == expected => { + // Store user response + self.state.evidence.insert( + format!("user_response_{}", question_id), + json!(content), + ); + // Continue to synthesis + self.state.phase = Phase::Synthesizing { + pending: None, + call_id: None, + }; + Ok(()) + } + _ => Err(MachineError::new( + ErrorKind::InvalidTransition, + format!( + "Unexpected UserResponse(question_id={}) in phase {}", + question_id, + self.current_phase() + ), + ) + .with_phase(self.current_phase()) + .with_step(self.state.step)), + } + } + + /// Apply Cancel event. + fn apply_cancel(&mut self) { + match &self.state.phase { + Phase::Finished { .. } | Phase::Failed { .. } => { + // Already terminal, ignore cancel + } + _ => { + self.state.phase = Phase::Failed { + error: "Investigation cancelled by user".to_string(), + }; + } + } + } + + /// Record metadata for a call. + fn record_meta(&mut self, call_id: &str, name: &str, kind: CallKind, phase_context: &str) { + self.state.call_index.insert( + call_id.to_string(), + CallMeta { + id: call_id.to_string(), + name: name.to_string(), + kind, + phase_context: phase_context.to_string(), + created_at_step: self.state.step, + }, + ); + } + + /// Decide what intent to emit based on current state. + fn decide(&mut self) -> Intent { + match &self.state.phase { + Phase::Init => Intent::Idle, + + Phase::GatheringContext { pending, call_id } => { + if pending.is_some() { + // Waiting for CallScheduled + Intent::Idle + } else if call_id.is_some() { + // Waiting for CallResult + Intent::Idle + } else { + // Need to request schema call + self.state.phase = Phase::GatheringContext { + pending: Some(PendingCall { + name: "get_schema".to_string(), + awaiting_schedule: true, + }), + call_id: None, + }; + Intent::RequestCall { + kind: CallKind::Tool, + name: "get_schema".to_string(), + args: json!({ + "objective": self.state.objective.clone().unwrap_or_default() + }), + reasoning: "Need to gather schema context for the investigation".to_string(), + } + } + } + + Phase::GeneratingHypotheses { pending, call_id } => { + if pending.is_some() || call_id.is_some() { + Intent::Idle + } else { + self.state.phase = Phase::GeneratingHypotheses { + pending: Some(PendingCall { + name: "generate_hypotheses".to_string(), + awaiting_schedule: true, + }), + call_id: None, + }; + Intent::RequestCall { + kind: CallKind::Llm, + name: "generate_hypotheses".to_string(), + args: json!({ + "objective": self.state.objective.clone().unwrap_or_default(), + "schema": self.state.evidence.get("schema").cloned().unwrap_or(Value::Null) + }), + reasoning: "Generate hypotheses to explain the observed anomaly".to_string(), + } + } + } + + Phase::EvaluatingHypotheses { + pending, + awaiting_results, + total_hypotheses, + completed, + } => { + if pending.is_some() { + // Waiting for CallScheduled + Intent::Idle + } else if !awaiting_results.is_empty() { + // Waiting for CallResults + Intent::Idle + } else if *completed < *total_hypotheses { + // Need to request next evaluation + // Clone values before mutable operations to satisfy borrow checker + let hypothesis_idx = *completed; + let total = *total_hypotheses; + self.state.phase = Phase::EvaluatingHypotheses { + pending: Some(PendingCall { + name: "evaluate_hypothesis".to_string(), + awaiting_schedule: true, + }), + awaiting_results: vec![], + total_hypotheses: total, + completed: hypothesis_idx, + }; + Intent::RequestCall { + kind: CallKind::Tool, + name: "evaluate_hypothesis".to_string(), + args: json!({ + "hypothesis_index": hypothesis_idx, + "hypotheses": self.state.evidence.get("hypotheses").cloned().unwrap_or(Value::Null) + }), + reasoning: format!("Evaluate hypothesis {} of {}", hypothesis_idx + 1, total), + } + } else { + // Should have transitioned to Synthesizing + Intent::Idle + } + } + + Phase::AwaitingUser { .. } => { + // Waiting for user response (signal) + Intent::Idle + } + + Phase::Synthesizing { pending, call_id } => { + if pending.is_some() || call_id.is_some() { + Intent::Idle + } else { + self.state.phase = Phase::Synthesizing { + pending: Some(PendingCall { + name: "synthesize".to_string(), + awaiting_schedule: true, + }), + call_id: None, + }; + Intent::RequestCall { + kind: CallKind::Llm, + name: "synthesize".to_string(), + args: json!({ + "objective": self.state.objective.clone().unwrap_or_default(), + "evidence": self.state.evidence.clone() + }), + reasoning: "Synthesize all evidence into a final insight".to_string(), + } + } + } + + Phase::Finished { insight } => Intent::Finish { + insight: insight.clone(), + }, + + Phase::Failed { error } => Intent::Error { + message: error.clone(), + }, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::domain::Scope; + use std::collections::BTreeMap; + + fn test_scope() -> Scope { + Scope { + user_id: "u1".to_string(), + tenant_id: "t1".to_string(), + permissions: vec![], + extra: BTreeMap::new(), + } + } + + fn make_envelope(event_id: &str, step: u64, event: Event) -> Envelope { + Envelope { + protocol_version: PROTOCOL_VERSION, + event_id: event_id.to_string(), + step, + event, + } + } + + #[test] + fn test_new_investigator() { + let inv = Investigator::new(); + assert_eq!(inv.current_phase(), "init"); + assert_eq!(inv.current_step(), 0); + assert!(!inv.is_terminal()); + } + + #[test] + fn test_start_event() { + let mut inv = Investigator::new(); + + let envelope = make_envelope( + "evt_1", + 1, + Event::Start { + objective: "Test".to_string(), + scope: test_scope(), + }, + ); + + let intent = inv.ingest(envelope).expect("should succeed"); + + // Should emit RequestCall (no call_id) + match intent { + Intent::RequestCall { name, kind, .. } => { + assert_eq!(name, "get_schema"); + assert_eq!(kind, CallKind::Tool); + } + _ => panic!("Expected RequestCall intent"), + } + + assert_eq!(inv.current_phase(), "gathering_context"); + assert_eq!(inv.current_step(), 1); + } + + #[test] + fn test_protocol_version_mismatch() { + let mut inv = Investigator::new(); + + let envelope = Envelope { + protocol_version: 999, + event_id: "evt_1".to_string(), + step: 1, + event: Event::Cancel, + }; + + let err = inv.ingest(envelope).expect_err("should fail"); + assert_eq!(err.kind, ErrorKind::ProtocolMismatch); + } + + #[test] + fn test_duplicate_event_idempotent() { + let mut inv = Investigator::new(); + + let envelope1 = make_envelope( + "evt_1", + 1, + Event::Start { + objective: "Test".to_string(), + scope: test_scope(), + }, + ); + + let intent1 = inv.ingest(envelope1).expect("first should succeed"); + + // Same event_id again (but different step to pass monotonicity) + let envelope2 = Envelope { + protocol_version: PROTOCOL_VERSION, + event_id: "evt_1".to_string(), // duplicate + step: 2, + event: Event::Cancel, + }; + + // Should return current intent without applying Cancel + let intent2 = inv.ingest(envelope2).expect("duplicate should succeed"); + + // State should NOT have changed + assert_eq!(inv.current_phase(), "gathering_context"); + // Step should NOT have advanced + assert_eq!(inv.current_step(), 1); + } + + #[test] + fn test_step_violation() { + let mut inv = Investigator::new(); + + let envelope1 = make_envelope( + "evt_1", + 5, + Event::Start { + objective: "Test".to_string(), + scope: test_scope(), + }, + ); + inv.ingest(envelope1).expect("first should succeed"); + + // Step 3 is less than current step 5 + let envelope2 = make_envelope("evt_2", 3, Event::Cancel); + + let err = inv.ingest(envelope2).expect_err("should fail"); + assert_eq!(err.kind, ErrorKind::StepViolation); + } + + #[test] + fn test_call_scheduling_handshake() { + let mut inv = Investigator::new(); + + // Start + let start = make_envelope( + "evt_1", + 1, + Event::Start { + objective: "Test".to_string(), + scope: test_scope(), + }, + ); + let intent = inv.ingest(start).expect("start"); + + // Should request get_schema (no call_id) + match intent { + Intent::RequestCall { name, .. } => assert_eq!(name, "get_schema"), + _ => panic!("Expected RequestCall"), + } + + // Now workflow assigns call_id via CallScheduled + let scheduled = make_envelope( + "evt_2", + 2, + Event::CallScheduled { + call_id: "call_001".to_string(), + name: "get_schema".to_string(), + }, + ); + let intent = inv.ingest(scheduled).expect("scheduled"); + assert!(matches!(intent, Intent::Idle)); + + // Now send result + let result = make_envelope( + "evt_3", + 3, + Event::CallResult { + call_id: "call_001".to_string(), + output: json!({"tables": []}), + }, + ); + let intent = inv.ingest(result).expect("result"); + + // Should advance to next phase and request generate_hypotheses + match intent { + Intent::RequestCall { name, .. } => assert_eq!(name, "generate_hypotheses"), + _ => panic!("Expected RequestCall for generate_hypotheses"), + } + } + + #[test] + fn test_unexpected_call_scheduled() { + let mut inv = Investigator::new(); + + // Start + let start = make_envelope( + "evt_1", + 1, + Event::Start { + objective: "Test".to_string(), + scope: test_scope(), + }, + ); + inv.ingest(start).expect("start"); + + // Wrong name in CallScheduled + let scheduled = make_envelope( + "evt_2", + 2, + Event::CallScheduled { + call_id: "call_001".to_string(), + name: "wrong_name".to_string(), + }, + ); + + let err = inv.ingest(scheduled).expect_err("should fail"); + assert_eq!(err.kind, ErrorKind::UnexpectedCall); + } + + #[test] + fn test_cancel_in_progress() { + let mut inv = Investigator::new(); + + let start = make_envelope( + "evt_1", + 1, + Event::Start { + objective: "Test".to_string(), + scope: test_scope(), + }, + ); + inv.ingest(start).expect("start"); + + let cancel = make_envelope("evt_2", 2, Event::Cancel); + let intent = inv.ingest(cancel).expect("cancel"); + + match intent { + Intent::Error { message } => assert!(message.contains("cancelled")), + _ => panic!("Expected Error intent"), + } + assert!(inv.is_terminal()); + } + + #[test] + fn test_full_investigation_cycle() { + let mut inv = Investigator::new(); + let mut step = 0u64; + + // Helper to make envelopes with incrementing steps + let mut next_envelope = |event: Event| { + step += 1; + make_envelope(&format!("evt_{}", step), step, event) + }; + + // Start + let intent = inv + .ingest(next_envelope(Event::Start { + objective: "Find bug".to_string(), + scope: test_scope(), + })) + .expect("start"); + assert!(matches!(intent, Intent::RequestCall { name, .. } if name == "get_schema")); + + // CallScheduled for get_schema + inv.ingest(next_envelope(Event::CallScheduled { + call_id: "c1".to_string(), + name: "get_schema".to_string(), + })) + .expect("scheduled"); + + // CallResult for get_schema + let intent = inv + .ingest(next_envelope(Event::CallResult { + call_id: "c1".to_string(), + output: json!({"tables": []}), + })) + .expect("result"); + assert!(matches!(intent, Intent::RequestCall { name, .. } if name == "generate_hypotheses")); + + // CallScheduled for generate_hypotheses + inv.ingest(next_envelope(Event::CallScheduled { + call_id: "c2".to_string(), + name: "generate_hypotheses".to_string(), + })) + .expect("scheduled"); + + // CallResult with 1 hypothesis + let intent = inv + .ingest(next_envelope(Event::CallResult { + call_id: "c2".to_string(), + output: json!([{"id": "h1", "title": "Bug in ETL"}]), + })) + .expect("result"); + assert!(matches!(intent, Intent::RequestCall { name, .. } if name == "evaluate_hypothesis")); + + // CallScheduled for evaluate_hypothesis + inv.ingest(next_envelope(Event::CallScheduled { + call_id: "c3".to_string(), + name: "evaluate_hypothesis".to_string(), + })) + .expect("scheduled"); + + // CallResult for evaluate + let intent = inv + .ingest(next_envelope(Event::CallResult { + call_id: "c3".to_string(), + output: json!({"supported": true}), + })) + .expect("result"); + assert!(matches!(intent, Intent::RequestCall { name, .. } if name == "synthesize")); + + // CallScheduled for synthesize + inv.ingest(next_envelope(Event::CallScheduled { + call_id: "c4".to_string(), + name: "synthesize".to_string(), + })) + .expect("scheduled"); + + // CallResult for synthesize + let intent = inv + .ingest(next_envelope(Event::CallResult { + call_id: "c4".to_string(), + output: json!({"insight": "Root cause found"}), + })) + .expect("result"); + + assert!(matches!(intent, Intent::Finish { insight } if insight == "Root cause found")); + assert!(inv.is_terminal()); + } + + #[test] + fn test_snapshot_restore() { + let mut inv = Investigator::new(); + + let start = make_envelope( + "evt_1", + 1, + Event::Start { + objective: "Test".to_string(), + scope: test_scope(), + }, + ); + inv.ingest(start).expect("start"); + + let snapshot = inv.snapshot(); + let inv2 = Investigator::restore(snapshot); + + assert_eq!(inv.current_phase(), inv2.current_phase()); + assert_eq!(inv.current_step(), inv2.current_step()); + } + + #[test] + fn test_query_without_event() { + let inv = Investigator::new(); + + // Query current intent without event + let intent = inv.query(); + assert!(matches!(intent, Intent::Idle)); + } +} diff --git a/core/crates/dataing_investigator/src/protocol.rs b/core/crates/dataing_investigator/src/protocol.rs new file mode 100644 index 000000000..cbf16d2b4 --- /dev/null +++ b/core/crates/dataing_investigator/src/protocol.rs @@ -0,0 +1,404 @@ +//! Protocol types for state machine communication. +//! +//! Defines the Event, Intent, and Envelope types that form the contract between +//! the Python runtime and Rust state machine. +//! +//! # Wire Format +//! +//! All events are wrapped in an Envelope: +//! ```json +//! { +//! "protocol_version": 1, +//! "event_id": "evt_abc123", +//! "step": 5, +//! "event": {"type": "CallResult", "payload": {...}} +//! } +//! ``` +//! +//! # Stability +//! +//! These types form a versioned protocol contract. Changes must be +//! backwards-compatible (use `#[serde(default)]` for new fields). + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use crate::domain::{CallKind, Scope}; + +/// Envelope wrapping all events with protocol metadata. +/// +/// The envelope provides: +/// - Protocol versioning for compatibility checks +/// - Event IDs for idempotency/deduplication +/// - Step numbers for ordering and monotonicity validation +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct Envelope { + /// Protocol version (must match state machine's expected version). + pub protocol_version: u32, + + /// Unique ID for this event (for deduplication). + pub event_id: String, + + /// Workflow-owned step counter (must be monotonically increasing). + pub step: u64, + + /// The actual event payload. + pub event: Event, +} + +/// Events sent from Python runtime to the Rust state machine. +/// +/// Each event represents an external occurrence that may trigger +/// a state transition. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "type", content = "payload")] +pub enum Event { + /// Start a new investigation. + Start { + /// Description of what to investigate. + objective: String, + /// Security scope for access control. + scope: Scope, + }, + + /// Workflow has scheduled a call and assigned it an ID. + /// + /// This event is sent by the workflow after it receives a RequestCall + /// intent and generates a call_id. + CallScheduled { + /// Workflow-generated unique ID for this call. + call_id: String, + /// Name of the operation (must match the RequestCall). + name: String, + }, + + /// Result of an external call (LLM or tool). + CallResult { + /// ID matching the CallScheduled event. + call_id: String, + /// Result payload from the call. + output: Value, + }, + + /// User response to a RequestUser intent. + UserResponse { + /// ID of the question being answered. + question_id: String, + /// User's response content. + content: String, + }, + + /// Cancel the current investigation. + Cancel, +} + +/// Intents emitted by the state machine to request actions. +/// +/// Each intent represents something the Python runtime should do. +/// The state machine cannot perform side effects directly. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "type", content = "payload")] +pub enum Intent { + /// No action needed; state machine is waiting. + Idle, + + /// Request an external call (LLM inference or tool invocation). + /// + /// The workflow generates the call_id and sends back a CallScheduled event. + RequestCall { + /// Type of call (LLM or Tool). + kind: CallKind, + /// Human-readable name of the operation. + name: String, + /// Arguments for the call. + args: Value, + /// Explanation of why this call is being made. + reasoning: String, + }, + + /// Request user input (human-in-the-loop). + RequestUser { + /// Workflow-generated unique ID for this question. + question_id: String, + /// Question/prompt to present to the user. + prompt: String, + /// Timeout in seconds (0 means no timeout). + #[serde(default)] + timeout_seconds: u64, + }, + + /// Investigation finished successfully. + Finish { + /// Final insight/conclusion. + insight: String, + }, + + /// Investigation ended with an error (non-retryable). + Error { + /// Error message. + message: String, + }, +} + +/// Error kinds for typed error handling. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "snake_case")] +pub enum ErrorKind { + /// Event received in wrong phase. + InvalidTransition, + /// JSON serialization/deserialization error. + Serialization, + /// Protocol version mismatch. + ProtocolMismatch, + /// Duplicate event ID (already processed). + DuplicateEvent, + /// Step not monotonically increasing. + StepViolation, + /// Unexpected call_id received. + UnexpectedCall, + /// Internal invariant violated. + Invariant, +} + +/// Typed machine error for Result-based API. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct MachineError { + /// Error classification for retry decisions. + pub kind: ErrorKind, + /// Human-readable error message. + pub message: String, + /// Current phase when error occurred. + #[serde(default)] + pub phase: Option, + /// Current step when error occurred. + #[serde(default)] + pub step: Option, +} + +impl MachineError { + /// Create a new machine error. + pub fn new(kind: ErrorKind, message: impl Into) -> Self { + Self { + kind, + message: message.into(), + phase: None, + step: None, + } + } + + /// Add phase context to the error. + #[must_use] + pub fn with_phase(mut self, phase: impl Into) -> Self { + self.phase = Some(phase.into()); + self + } + + /// Add step context to the error. + #[must_use] + pub fn with_step(mut self, step: u64) -> Self { + self.step = Some(step); + self + } + + /// Check if this error is retryable. + #[must_use] + pub fn is_retryable(&self) -> bool { + // Only serialization errors might be retryable (e.g., transient I/O) + // All logic errors are permanent failures + matches!(self.kind, ErrorKind::Serialization) + } +} + +impl std::fmt::Display for MachineError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}: {}", self.kind, self.message)?; + if let Some(phase) = &self.phase { + write!(f, " (phase: {})", phase)?; + } + if let Some(step) = self.step { + write!(f, " (step: {})", step)?; + } + Ok(()) + } +} + +impl std::error::Error for MachineError {} + +#[cfg(test)] +mod tests { + use super::*; + use crate::domain::Scope; + use std::collections::BTreeMap; + + fn test_scope() -> Scope { + Scope { + user_id: "user1".to_string(), + tenant_id: "tenant1".to_string(), + permissions: vec!["read".to_string()], + extra: BTreeMap::new(), + } + } + + #[test] + fn test_envelope_serialization() { + let envelope = Envelope { + protocol_version: 1, + event_id: "evt_001".to_string(), + step: 5, + event: Event::Start { + objective: "Find root cause".to_string(), + scope: test_scope(), + }, + }; + + let json = serde_json::to_string(&envelope).expect("serialize"); + assert!(json.contains(r#""protocol_version":1"#)); + assert!(json.contains(r#""event_id":"evt_001""#)); + assert!(json.contains(r#""step":5"#)); + + let deser: Envelope = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(envelope, deser); + } + + #[test] + fn test_event_call_scheduled_serialization() { + let event = Event::CallScheduled { + call_id: "call_001".to_string(), + name: "get_schema".to_string(), + }; + + let json = serde_json::to_string(&event).expect("serialize"); + assert!(json.contains(r#""type":"CallScheduled""#)); + + let deser: Event = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(event, deser); + } + + #[test] + fn test_event_user_response_with_question_id() { + let event = Event::UserResponse { + question_id: "q_001".to_string(), + content: "Yes, proceed".to_string(), + }; + + let json = serde_json::to_string(&event).expect("serialize"); + assert!(json.contains(r#""question_id":"q_001""#)); + + let deser: Event = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(event, deser); + } + + #[test] + fn test_intent_request_call_no_id() { + let intent = Intent::RequestCall { + kind: CallKind::Tool, + name: "get_schema".to_string(), + args: serde_json::json!({"table": "orders"}), + reasoning: "Need schema context".to_string(), + }; + + let json = serde_json::to_string(&intent).expect("serialize"); + assert!(json.contains(r#""type":"RequestCall""#)); + // Should NOT contain call_id + assert!(!json.contains("call_id")); + + let deser: Intent = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(intent, deser); + } + + #[test] + fn test_intent_request_user_with_fields() { + let intent = Intent::RequestUser { + question_id: "q_001".to_string(), + prompt: "Should we proceed with the risky query?".to_string(), + timeout_seconds: 3600, + }; + + let json = serde_json::to_string(&intent).expect("serialize"); + assert!(json.contains(r#""question_id":"q_001""#)); + assert!(json.contains(r#""timeout_seconds":3600"#)); + + let deser: Intent = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(intent, deser); + } + + #[test] + fn test_machine_error_display() { + let err = MachineError::new(ErrorKind::InvalidTransition, "Start in wrong phase") + .with_phase("gathering_context") + .with_step(5); + + let display = err.to_string(); + assert!(display.contains("InvalidTransition")); + assert!(display.contains("Start in wrong phase")); + assert!(display.contains("gathering_context")); + assert!(display.contains("step: 5")); + } + + #[test] + fn test_error_kind_retryable() { + assert!(!MachineError::new(ErrorKind::InvalidTransition, "").is_retryable()); + assert!(!MachineError::new(ErrorKind::ProtocolMismatch, "").is_retryable()); + assert!(!MachineError::new(ErrorKind::DuplicateEvent, "").is_retryable()); + assert!(MachineError::new(ErrorKind::Serialization, "").is_retryable()); + } + + #[test] + fn test_all_events_roundtrip() { + let events = vec![ + Event::Start { + objective: "test".to_string(), + scope: test_scope(), + }, + Event::CallScheduled { + call_id: "c1".to_string(), + name: "get_schema".to_string(), + }, + Event::CallResult { + call_id: "c1".to_string(), + output: Value::Null, + }, + Event::UserResponse { + question_id: "q1".to_string(), + content: "ok".to_string(), + }, + Event::Cancel, + ]; + + for event in events { + let json = serde_json::to_string(&event).expect("serialize"); + let deser: Event = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(event, deser); + } + } + + #[test] + fn test_all_intents_roundtrip() { + let intents = vec![ + Intent::Idle, + Intent::RequestCall { + kind: CallKind::Tool, + name: "n".to_string(), + args: Value::Null, + reasoning: "r".to_string(), + }, + Intent::RequestUser { + question_id: "q".to_string(), + prompt: "p".to_string(), + timeout_seconds: 0, + }, + Intent::Finish { + insight: "i".to_string(), + }, + Intent::Error { + message: "e".to_string(), + }, + ]; + + for intent in intents { + let json = serde_json::to_string(&intent).expect("serialize"); + let deser: Intent = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(intent, deser); + } + } +} diff --git a/core/crates/dataing_investigator/src/state.rs b/core/crates/dataing_investigator/src/state.rs new file mode 100644 index 000000000..0b21ffb75 --- /dev/null +++ b/core/crates/dataing_investigator/src/state.rs @@ -0,0 +1,449 @@ +//! Investigation state and phase tracking. +//! +//! Contains the core State struct and Phase enum for tracking +//! investigation progress. The state is versioned and serializable +//! for snapshot persistence. + +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::{BTreeMap, BTreeSet}; + +use crate::domain::{CallMeta, Scope}; +use crate::PROTOCOL_VERSION; + +/// Pending call awaiting scheduling by the workflow. +/// +/// When the machine emits a RequestCall intent, it transitions to a +/// "pending" sub-state. The workflow generates a call_id and sends +/// a CallScheduled event, which completes the scheduling. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct PendingCall { + /// Name of the requested operation. + pub name: String, + /// Whether we're waiting for CallScheduled (true) or CallResult (false). + pub awaiting_schedule: bool, +} + +/// Current phase of an investigation. +/// +/// Each phase represents a distinct step in the investigation workflow. +/// Phases with data use tagged serialization for explicit type identification. +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] +#[serde(tag = "type", content = "data")] +pub enum Phase { + /// Initial state before investigation starts. + #[default] + Init, + + /// Gathering schema and context from the data source. + GatheringContext { + /// Pending call info, if any. + #[serde(default)] + pending: Option, + /// Assigned call_id after CallScheduled, if scheduled. + #[serde(default)] + call_id: Option, + }, + + /// Generating hypotheses using LLM. + GeneratingHypotheses { + /// Pending call info, if any. + #[serde(default)] + pending: Option, + /// Assigned call_id after CallScheduled. + #[serde(default)] + call_id: Option, + }, + + /// Evaluating hypotheses by executing queries. + EvaluatingHypotheses { + /// Pending call info for next evaluation. + #[serde(default)] + pending: Option, + /// IDs of calls awaiting results. + #[serde(default)] + awaiting_results: Vec, + /// Total hypotheses to evaluate. + #[serde(default)] + total_hypotheses: usize, + /// Completed evaluations. + #[serde(default)] + completed: usize, + }, + + /// Waiting for user input (human-in-the-loop). + AwaitingUser { + /// Unique ID for this question (workflow-generated). + question_id: String, + /// Prompt presented to the user. + prompt: String, + /// Timeout in seconds (0 = no timeout). + #[serde(default)] + timeout_seconds: u64, + }, + + /// Synthesizing findings into final insight. + Synthesizing { + /// Pending call info, if any. + #[serde(default)] + pending: Option, + /// Assigned call_id after CallScheduled. + #[serde(default)] + call_id: Option, + }, + + /// Investigation completed successfully. + Finished { + /// Final insight/conclusion. + insight: String, + }, + + /// Investigation failed with error. + Failed { + /// Error message describing the failure. + error: String, + }, +} + +/// Versioned investigation state. +/// +/// Contains all data needed to reconstruct an investigation's progress. +/// The state is designed to be serializable for persistence and +/// resumption from snapshots. +/// +/// # Workflow-Owned IDs and Steps +/// +/// The workflow (Temporal) owns ID generation and step counting. +/// The state machine validates but does not generate these values. +/// This ensures deterministic replay. +/// +/// # Idempotency +/// +/// The `seen_event_ids` set enables event deduplication. Duplicate +/// events are silently ignored (returns current intent without +/// state change). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct State { + /// Protocol version for this state snapshot. + pub version: u32, + + /// Last processed step (workflow-owned, validated for monotonicity). + pub step: u64, + + /// Investigation objective/description. + #[serde(default)] + pub objective: Option, + + /// Security scope for access control. + #[serde(default)] + pub scope: Option, + + /// Current phase of the investigation. + pub phase: Phase, + + /// Collected evidence keyed by identifier. + #[serde(default)] + pub evidence: BTreeMap, + + /// Metadata for pending/completed calls. + #[serde(default)] + pub call_index: BTreeMap, + + /// Order in which calls were completed. + #[serde(default)] + pub call_order: Vec, + + /// Event IDs that have been processed (for deduplication). + #[serde(default)] + pub seen_event_ids: BTreeSet, +} + +impl Default for State { + fn default() -> Self { + Self::new() + } +} + +impl State { + /// Create a new state with default values. + /// + /// Initializes with current protocol version, zero step, + /// and Init phase. + #[must_use] + pub fn new() -> Self { + State { + version: PROTOCOL_VERSION, + step: 0, + objective: None, + scope: None, + phase: Phase::Init, + evidence: BTreeMap::new(), + call_index: BTreeMap::new(), + call_order: Vec::new(), + seen_event_ids: BTreeSet::new(), + } + } + + /// Check if an event ID has already been processed. + #[must_use] + pub fn is_duplicate_event(&self, event_id: &str) -> bool { + self.seen_event_ids.contains(event_id) + } + + /// Mark an event ID as processed. + pub fn mark_event_processed(&mut self, event_id: String) { + self.seen_event_ids.insert(event_id); + } + + /// Update the step counter (workflow-owned). + pub fn set_step(&mut self, step: u64) { + self.step = step; + } + + /// Check if state is in a terminal phase. + #[must_use] + pub fn is_terminal(&self) -> bool { + matches!(self.phase, Phase::Finished { .. } | Phase::Failed { .. }) + } +} + +impl PartialEq for State { + fn eq(&self, other: &Self) -> bool { + self.version == other.version + && self.step == other.step + && self.objective == other.objective + && self.scope == other.scope + && self.phase == other.phase + && self.evidence == other.evidence + && self.call_index == other.call_index + && self.call_order == other.call_order + && self.seen_event_ids == other.seen_event_ids + } +} + +/// Get a human-readable name for a phase. +#[must_use] +pub fn phase_name(phase: &Phase) -> &'static str { + match phase { + Phase::Init => "init", + Phase::GatheringContext { .. } => "gathering_context", + Phase::GeneratingHypotheses { .. } => "generating_hypotheses", + Phase::EvaluatingHypotheses { .. } => "evaluating_hypotheses", + Phase::AwaitingUser { .. } => "awaiting_user", + Phase::Synthesizing { .. } => "synthesizing", + Phase::Finished { .. } => "finished", + Phase::Failed { .. } => "failed", + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::domain::CallKind; + + #[test] + fn test_state_new() { + let state = State::new(); + + assert_eq!(state.version, PROTOCOL_VERSION); + assert_eq!(state.step, 0); + assert_eq!(state.phase, Phase::Init); + assert!(state.objective.is_none()); + assert!(state.scope.is_none()); + assert!(state.evidence.is_empty()); + assert!(state.call_index.is_empty()); + assert!(state.call_order.is_empty()); + assert!(state.seen_event_ids.is_empty()); + } + + #[test] + fn test_set_step() { + let mut state = State::new(); + + state.set_step(5); + assert_eq!(state.step, 5); + + state.set_step(10); + assert_eq!(state.step, 10); + } + + #[test] + fn test_duplicate_event_detection() { + let mut state = State::new(); + + assert!(!state.is_duplicate_event("evt_001")); + + state.mark_event_processed("evt_001".to_string()); + + assert!(state.is_duplicate_event("evt_001")); + assert!(!state.is_duplicate_event("evt_002")); + } + + #[test] + fn test_is_terminal() { + let mut state = State::new(); + assert!(!state.is_terminal()); + + state.phase = Phase::GatheringContext { + pending: None, + call_id: None, + }; + assert!(!state.is_terminal()); + + state.phase = Phase::Finished { + insight: "done".to_string(), + }; + assert!(state.is_terminal()); + + state.phase = Phase::Failed { + error: "error".to_string(), + }; + assert!(state.is_terminal()); + } + + #[test] + fn test_phase_serialization() { + let phases = vec![ + Phase::Init, + Phase::GatheringContext { + pending: Some(PendingCall { + name: "get_schema".to_string(), + awaiting_schedule: true, + }), + call_id: None, + }, + Phase::GatheringContext { + pending: None, + call_id: Some("call_1".to_string()), + }, + Phase::GeneratingHypotheses { + pending: None, + call_id: Some("call_2".to_string()), + }, + Phase::EvaluatingHypotheses { + pending: None, + awaiting_results: vec!["call_3".to_string(), "call_4".to_string()], + total_hypotheses: 3, + completed: 1, + }, + Phase::AwaitingUser { + question_id: "q_1".to_string(), + prompt: "Proceed?".to_string(), + timeout_seconds: 3600, + }, + Phase::Synthesizing { + pending: None, + call_id: None, + }, + Phase::Finished { + insight: "Root cause found".to_string(), + }, + Phase::Failed { + error: "Timeout".to_string(), + }, + ]; + + for phase in phases { + let json = serde_json::to_string(&phase).expect("serialize"); + let deser: Phase = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(phase, deser); + } + } + + #[test] + fn test_phase_name() { + assert_eq!(phase_name(&Phase::Init), "init"); + assert_eq!( + phase_name(&Phase::GatheringContext { + pending: None, + call_id: None + }), + "gathering_context" + ); + assert_eq!( + phase_name(&Phase::AwaitingUser { + question_id: "q".to_string(), + prompt: "p".to_string(), + timeout_seconds: 0, + }), + "awaiting_user" + ); + } + + #[test] + fn test_state_serialization_roundtrip() { + let mut state = State::new(); + state.objective = Some("Find null spike cause".to_string()); + state.scope = Some(Scope { + user_id: "u1".to_string(), + tenant_id: "t1".to_string(), + permissions: vec!["read".to_string()], + extra: BTreeMap::new(), + }); + state.phase = Phase::GeneratingHypotheses { + pending: None, + call_id: Some("call_1".to_string()), + }; + state.evidence.insert( + "hyp_1".to_string(), + serde_json::json!({"query_result": "5 nulls"}), + ); + state.call_index.insert( + "call_1".to_string(), + CallMeta { + id: "call_1".to_string(), + name: "generate_hypotheses".to_string(), + kind: CallKind::Llm, + phase_context: "hypothesis_generation".to_string(), + created_at_step: 2, + }, + ); + state.call_order.push("call_1".to_string()); + state.step = 3; + state.seen_event_ids.insert("evt_1".to_string()); + state.seen_event_ids.insert("evt_2".to_string()); + + let json = serde_json::to_string(&state).expect("serialize"); + let deser: State = serde_json::from_str(&json).expect("deserialize"); + + assert_eq!(state, deser); + } + + #[test] + fn test_state_defaults_on_missing_fields() { + // Simulate a minimal snapshot (forward compatibility test) + let json = r#"{ + "version": 1, + "step": 0, + "phase": {"type": "Init"} + }"#; + + let state: State = serde_json::from_str(json).expect("deserialize"); + + assert_eq!(state.version, 1); + assert!(state.objective.is_none()); + assert!(state.scope.is_none()); + assert!(state.evidence.is_empty()); + assert!(state.call_index.is_empty()); + assert!(state.call_order.is_empty()); + assert!(state.seen_event_ids.is_empty()); + } + + #[test] + fn test_btreeset_ordering() { + let mut state = State::new(); + state.mark_event_processed("evt_z".to_string()); + state.mark_event_processed("evt_a".to_string()); + state.mark_event_processed("evt_m".to_string()); + + let json = serde_json::to_string(&state).expect("serialize"); + + // BTreeSet ensures alphabetical ordering + let a_pos = json.find("evt_a").expect("evt_a"); + let m_pos = json.find("evt_m").expect("evt_m"); + let z_pos = json.find("evt_z").expect("evt_z"); + + assert!(a_pos < m_pos); + assert!(m_pos < z_pos); + } +} diff --git a/demo/fixtures/baseline/manifest.json b/demo/fixtures/baseline/manifest.json index 47219ed78..1edc3b0ca 100644 --- a/demo/fixtures/baseline/manifest.json +++ b/demo/fixtures/baseline/manifest.json @@ -1,7 +1,7 @@ { "name": "baseline", "description": "Clean e-commerce data with no anomalies", - "created_at": "2026-01-17T03:54:51.209198Z", + "created_at": "2026-01-19T00:42:41.782379Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" diff --git a/demo/fixtures/duplicates/manifest.json b/demo/fixtures/duplicates/manifest.json index d858abd6a..bf5fc8ee8 100644 --- a/demo/fixtures/duplicates/manifest.json +++ b/demo/fixtures/duplicates/manifest.json @@ -1,7 +1,7 @@ { "name": "duplicates", "description": "Retry logic creates duplicate order_items", - "created_at": "2026-01-17T03:54:53.607816Z", + "created_at": "2026-01-19T00:42:44.215517Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" @@ -48,56 +48,56 @@ ], "ground_truth": { "affected_order_ids": [ - "17c119c1-cf46-48a3-891f-db895e53c917", - "a9910358-8a39-40ae-a79b-2172be4acb95", - "223ec4f7-e03a-46bc-b9f5-628f5031146c", - "f6f9f123-f52a-46d8-af16-3aa7dec6402a", - "f10a1d80-5426-4ba7-b669-ad55ab42c22b", - "0f2a8a77-c807-4d37-a3f1-c16766531fcd", - "9fa84749-a1ed-4327-a583-527912cd17d1", - "4bf47a12-23be-4a2f-a7a3-46e467cfd16d", - "150813c3-5b73-4224-8f3a-ce88b78901a5", - "ff1b4fb6-59d2-413f-a55e-b25218902525", - "d6b5cc91-3ce5-44c7-8616-a9bcbe1a53ad", - "92010dad-ec49-40eb-a782-9926332ffd8b", - "144abc80-6bc0-49bc-9af1-18b375fb4403", - "19cb156f-6f09-4f6c-899e-98d37340a456", - "5c94d415-5b4e-4301-8878-aaf88cc31f3c", - "7a4d8667-d4b9-436f-b2b4-05d2d20a3d29", - "02ccd62a-7644-4d48-8714-d71d43e0779d", - "b0f5f2b0-e2aa-4b67-8d30-07fac26fa5d2", - "04ac9c9c-d713-40f7-9536-57d38e89d5be", - "9062bab5-ca1e-48cd-89a0-e13f43eec081", - "3bc49529-0ecb-4778-b79e-691a0dd5ae14", - "543d250f-2ae1-4744-a103-4ec76e9c4801", - "71ace7f3-5aeb-43e9-82d1-627620b4af59", - "d73ce6e1-af3d-40c3-85c1-98fc345efaa8", - "821c3aea-564e-4e40-a7bc-02b8837bed64", - "ea17256f-e409-4b4d-af05-e28fea456167", - "53b18a84-b0e7-4dba-889e-e867ef5b8aa8", - "b46ed012-1529-4d5f-a685-4f8317c3fc7c", - "2366d68e-1ae7-4677-8560-5098015c6164", - "e55027cc-471c-4bd8-b3ba-188d1fcdc2f5", - "c34f4569-310c-4723-979f-d07ae3a9008a", - "d138a321-2eeb-488e-ae7c-9bbbcd3e71e0", - "ffc196bf-9929-476c-be2f-11a4c1167d30", - "786adeff-975a-40a0-a722-940f0b1b0932", - "107a654f-fc65-444b-963a-209a90dc34c1", - "66100e2d-2ed5-40b8-acfc-6ef08614a3e3", - "d467e113-ad37-4539-b70e-744753a71d11", - "af523cec-afa6-4100-b47a-c2cfef17c191", - "1c9882b6-1652-4144-81d2-33cab2024d69", - "afc243d4-3fad-43dd-80a8-d63c613d2cea", - "e3a08733-2115-4856-a64d-468c6f7f568c", - "edd89905-075d-4ffc-b581-058f184747ba", - "25fe66a3-9d97-4281-94d1-8625b9030c45", - "444d78c0-5d4f-4070-b405-2872581d069e", - "ff5ca1be-5c9f-4c00-9335-f16344d47a14", - "672a5b74-2a8f-48ad-9a4b-bd8ed09a475d", - "89356a7a-a20e-4fa5-afdc-7f487fab74f5", - "ea7e871c-01d4-44b9-8cb4-ac4c3f04ce03", - "f6a5d868-8b84-45b5-8ae7-c5130ae2201a", - "18f1ccb0-e0dd-4c51-a3f4-47eb55b0612a" + "f6e016bb-4adf-40cd-bb7e-1de4095a243f", + "8f3fc261-39c4-46f6-9cfe-7abcb8776776", + "a41db462-9c69-40d0-87d5-8b1a95d7551a", + "bdcbefbc-71d3-45ad-917d-19071339b7c3", + "9d8c3730-531f-4621-beab-2a564fa9a6cd", + "95e689ad-7fb9-488f-819f-e5c2c0f323e3", + "e25e9a78-c706-425d-ae46-cdb2e6844fcc", + "bc098754-9bde-450c-93a6-5a64cf7eb7e0", + "ce47f6c0-ddbf-4bd6-89eb-fe9533de444f", + "1d5864bb-4f57-4748-9f42-40133004e59b", + "0fd17fc6-9e87-4790-b866-b8dd743e420d", + "66ef2095-b786-46c5-aad4-84383c9d9ea0", + "3aac8367-b738-43c1-b04a-2bc8179582cd", + "269764bd-bd01-481d-baf8-6c92a3985703", + "ee41d9ee-5cf0-4410-8add-24632f0ecbda", + "57d9004b-24b7-437a-8957-6e90b478ea18", + "1a11d061-9ee9-44e4-9994-eec1aae8ce3c", + "1a0c301a-9690-4090-adc3-3e4835fe711e", + "904204dd-5cfa-470b-bd9b-d5d38ca01826", + "eb3d3546-4b12-4a36-a033-485d5b61fd08", + "f418dc2c-489f-4679-8ec6-3ffd35b0e7dc", + "a634ee16-5f72-449d-bd68-bcfe3f99ecbf", + "646ff2ed-9003-4943-a987-052481b395f6", + "e82b19df-c2cb-4b16-893e-0c00f0afb790", + "354e8760-5ece-4a2c-a3f2-2d4cdbe5c82e", + "5a0e4141-2b2a-466e-8f1b-d82d28ed611c", + "a33d8990-e2f6-45f2-a4ab-859e02e6fa4a", + "197e8046-a181-4956-9a0f-8c197856076b", + "461414e4-9547-437a-a2d3-389f577bfcae", + "24efd67e-66eb-4691-aacb-e4c774a8c98c", + "3d22d1b3-8956-4118-bbd9-85fab394d43c", + "cae9de2e-5513-4428-8f09-b24cf5c9d668", + "50284447-6905-4c3a-baf4-3f57a0ec166a", + "158e28f5-cbe3-4032-b6f5-83c4b933aee5", + "1c9081e1-1127-46a0-942c-a8d24216ee15", + "6ce5beea-7374-42b9-9bb7-eb0290c3b5c4", + "f497e16e-ba2c-4202-a837-b2d391a722b0", + "5ce62cd7-9662-4043-8dd4-4874d1aea98d", + "750f903a-2409-4fb8-babe-1c4cd34a36b8", + "75fc2041-3818-4166-9ffd-e4d4932cbf33", + "c6b229d0-715f-40b3-bca8-3c9df2a2e909", + "9c7ca771-26fd-4537-aaef-93792b1a13e8", + "b71b5406-e6cd-4bca-9a24-f77e3ec8a54f", + "d66c53e6-ceda-492a-a8a4-767ca2089da6", + "5171b76e-3045-4d24-af75-55085e8a201d", + "e2355cc0-be5c-4e88-bbc0-cc7852ca75ed", + "bda1c508-ae6e-4870-98f9-a8027bb5d404", + "db53159a-e470-4153-83e7-5ba65509b932", + "29e98567-a7a3-4b98-9520-36f0bab6b5c5", + "60461146-d665-49d6-b3f3-574d1071baca" ], "affected_order_count": 81, "duplicate_items": 84 diff --git a/demo/fixtures/late_arriving/manifest.json b/demo/fixtures/late_arriving/manifest.json index 155434f1a..5b2924e19 100644 --- a/demo/fixtures/late_arriving/manifest.json +++ b/demo/fixtures/late_arriving/manifest.json @@ -1,7 +1,7 @@ { "name": "late_arriving", "description": "Mobile app queues events offline, batch uploaded later", - "created_at": "2026-01-17T03:54:54.788551Z", + "created_at": "2026-01-19T00:42:45.413200Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" diff --git a/demo/fixtures/null_spike/manifest.json b/demo/fixtures/null_spike/manifest.json index 14572a752..96797d29c 100644 --- a/demo/fixtures/null_spike/manifest.json +++ b/demo/fixtures/null_spike/manifest.json @@ -1,7 +1,7 @@ { "name": "null_spike", "description": "Mobile app bug causes NULL user_id in orders", - "created_at": "2026-01-17T03:54:52.006907Z", + "created_at": "2026-01-19T00:42:42.591135Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" @@ -64,106 +64,106 @@ ], "ground_truth": { "affected_order_ids": [ - "eb6dae9c-95b3-4ddd-b597-896c800517bf", - "e0e6cbe9-9a52-4e09-957d-f506e839c33f", - "53c48f6e-bb55-4c14-ac67-945c25c410bc", - "1c584a79-a22c-403b-9a5d-8b7fc1601e28", - "ae284596-18c3-4715-936d-e2249dffc9a2", - "75347dd9-934e-4533-a329-4e4b5b18f6d0", - "f101974e-0d1d-43c2-8bba-dfd81339be88", - "a928dcd5-4060-4fa9-96ae-5901540170bb", - "c264ad98-edbe-474c-a01f-cff69766574d", - "ee86baf5-b294-4ba5-bb0a-8dcb15922b78", - "d70623e8-55fd-44cf-8415-014615b84f7e", - "08c87641-5dee-4322-8387-99bb6135097b", - "b35f24ab-7ccc-47d1-9aa3-aff82515da3d", - "6dceb672-9c87-4819-87f3-53167afe6ef7", - "58cd9c5d-57b3-468c-b3f1-1a257c941719", - "35c4603f-25d8-410c-885a-b87c8c07861b", - "3c105ac7-8af3-4d03-ba0a-60990f34a7f3", - "c117923c-e6a8-4ba9-8d18-91b01e848eba", - "28ee3c3e-eaca-48f8-9aa4-4db181b663c9", - "d7642031-e61b-4fd8-8668-f727c4ae9db2", - "fda82c19-3601-4fa7-a845-92db5f3cfb1d", - "bed8976d-30bd-4931-a45e-2f380f72a2a1", - "3be8f6f3-e9ce-4daa-acb1-2e810eb0638f", - "a0fd0d9e-865c-4c3d-bf61-8c3013073648", - "b6ed24a6-4ae6-4bf4-b132-24272c4047ce", - "01392116-d8d3-4442-a3a9-59246a797c67", - "4fdebbd2-b39f-4e2c-a5eb-73ad64ad1cb9", - "6fed44d8-b051-46c1-93d3-cc4cae900e53", - "b8607d3f-b73a-4b33-abe3-70b1a1fd7147", - "8cc7c560-749b-4919-b32f-8d9713ab2912", - "d59c3b0e-331f-41e6-b08b-3a9030edc16c", - "d27cc08e-7890-4219-964d-086186083684", - "876d81cb-93bd-4816-974f-2e32d8414f28", - "65601420-824f-449c-af01-0b7a82f29f30", - "c456a2d0-8ca6-47fd-8d45-90d1d7af2d3e", - "9bb9ffc8-e8d5-4e1d-928b-b42208676c3f", - "cec4c1ff-edef-4ff1-879a-d1e30050ebf3", - "6d6b7959-893d-4d82-a593-7c5b7c384dd5", - "3802c5a8-af7d-485d-876a-45bd6853f99a", - "1c0bd3c3-530f-4f43-9fa0-10cc6fafd2c9", - "ad90fcb0-86c5-4d3e-a079-3a7e311c4ed4", - "81ac007e-8476-4ace-a8a0-e717c14f6082", - "ee256ff1-e21b-4192-a49d-f975608e5f34", - "2321edbb-b031-4356-9545-f16ed139e08a", - "25ea412a-993d-4b0f-8725-2645a1b4b317", - "e5004d4a-384e-4c45-b267-adf77c0e094d", - "c4f59803-1e0d-49bf-94fd-b770dd416dd8", - "5ecd47b5-c9ff-468b-a846-109598eff2e7", - "1d8fcfcc-eb35-46fb-8fcd-2fc46e9fdeb4", - "b406f2f1-c6f6-46ed-a385-c1098a539d00", - "5b5bc74f-9d52-40db-9e77-6f5e6b5cf8a9", - "bec920f3-6bfb-41c9-8e40-d5fc48e2c735", - "c60292ce-e410-4e97-b354-b661a0171417", - "c943fced-e110-4ae7-8cab-5c84019c5bc0", - "4b0cd2b3-a047-4537-9d35-5406b9e89c64", - "2acfe933-e495-4fa7-b402-339b883dae40", - "1954ba08-322c-45f4-99d3-0132e91f3af6", - "c8d7709d-afe9-4ef5-b8f5-3195df2a2f96", - "6fdd61c2-b0fa-45ee-915c-67aeacc6c0fd", - "2c6bde74-565f-403c-b385-a1cff7e615bf", - "31e8ecd5-0fde-4898-8ccc-877c77871983", - "87d38dff-ec32-4d13-a947-309b20985700", - "1de09ecc-c8a9-4592-8875-d03eb10fca9b", - "d132c688-408b-4286-bce7-d62c19b1d1e7", - "22c3f17e-3e61-4287-b3a0-3ab81bf5718d", - "089a38e6-e001-44fc-b483-48aa52e07db3", - "5b0514f7-6029-4e39-9aa5-40d05849c908", - "95425076-6661-4214-acc9-c634b631085f", - "811f7dab-587c-4cb8-b17f-34ece2a0ad0a", - "36fcb5d2-b28d-4511-a95d-fb3496d9f725", - "f0d9ff1b-8f17-42d0-8fbb-98dce2e7cc66", - "942364dd-8054-4237-ba46-949211e04449", - "00c61842-9061-4f43-9e05-0210d68db8af", - "d4f3f909-845d-43ff-9eb3-e2cc2df2eeaa", - "6c10d4e5-693e-4a10-b221-cba987af93c6", - "161ecaee-51bd-478a-993d-2a4361725597", - "9fa66a10-f943-4ebb-a975-50b683a6c0fc", - "3b607055-2a90-4010-bf10-b4b71c86dffc", - "fa586ae4-4325-4242-9729-10ba25102004", - "c44a6199-3f9f-4ee9-83d9-a9bd60a51462", - "2d4896d4-99cf-4ade-8ac1-831ea9baf3ba", - "6a1d8224-00f6-427c-ab0f-3560814cb1bf", - "53835a5b-47d1-442d-8d97-1dc8a14fe0d6", - "fcefb23c-4808-4a7a-bdaf-86e5efc81cd3", - "308bd93d-002c-4eb7-ad12-290b17d26a85", - "0d239a31-5338-42ec-8794-2030f91cf487", - "54da446f-24e4-45d1-8b95-683237663ea2", - "b8288a27-9884-4160-bf73-477c81dde48a", - "88b75787-244e-4391-afd5-84e53acd254d", - "7b951c24-b75a-4935-838b-34b25532b084", - "ef75bf9e-6dd8-4e03-93da-238e21e7ee85", - "899b24dc-9035-4c20-a7b0-515379a230af", - "fda1b073-26b2-40ad-b7c9-77ae65e015ec", - "f5b1055f-a535-4762-a792-ac7d1689cf51", - "060d4f8d-63b1-4e88-a94a-2b2e9cbc6fb4", - "7d6c7c39-e968-42ec-af59-7f3ccd7ea3a0", - "5ab44a58-6fa5-4cad-9a34-0085d7f5e759", - "81f4ff5d-6ec9-4afd-a93a-f709b92b7374", - "7514dfd1-3a9a-472a-aff3-6d36caed3aef", - "9a6aee31-40c5-4d07-9a31-b32f2d6b2b55" + "dc9fc17d-bb35-4c63-9b09-2e2ad8d91987", + "abbc26e9-40f1-4064-9a98-be60a35e0944", + "e32570e4-77a4-4827-816e-482bed91421d", + "4a9f9042-ced6-49ad-8a34-30409877623d", + "32928994-7bcd-4192-9173-5d6834924404", + "332fac3a-a436-4fa5-a126-ee728e03f468", + "fa8a27cf-e81d-43cd-b7f3-e58ce8db41aa", + "253bfc69-ac26-492f-8b12-fdd99f86c8ba", + "f191b885-9554-4fae-b27d-9e459457a404", + "01f92265-79d7-4f0c-aafc-8ee2057b4b5e", + "bbf7e7d3-5b23-49b7-bbf1-62110e404be1", + "8e950d80-30ce-4f55-8acd-b0cc1883a8aa", + "7247f95e-40a7-49f5-8cef-7228d3f15047", + "b4c73c36-492a-4363-b37c-4f3c601fd75c", + "51b6dc85-bfd1-499b-9bf6-9884ac013c8e", + "1bc973a9-62f7-4c54-93e4-4dc89f124d78", + "145602bc-ce2a-4cab-8d51-113a87c3e544", + "35c93521-151f-4412-ba7b-dbccea000203", + "bd628b18-afe6-49e1-a0c1-025721b35561", + "1b1b3736-f552-4dec-a32b-af94a8853677", + "cfcf9b76-5b79-490a-a19c-67f64a4b7a77", + "93ac5782-15bd-4afc-abf1-bfcf25bb7bac", + "224654a6-2b58-4f68-93f6-d60f0e4bbb4d", + "99a17a2d-d9fb-4bd4-ac06-b30892252aac", + "7a5b459a-5c73-450c-bd92-e1dba2d2da79", + "0620fcc6-dff7-4eb4-893c-cb86fb92eefb", + "3b33b780-e52a-47d8-aec3-25531d77592e", + "332bbd69-01a0-49c9-a8cf-feb2e05b7155", + "ffeb1c65-a24b-4278-be6e-d64ae29b2f7d", + "e8b69c69-aa8a-4034-853a-fb72a935dd47", + "d0a00a69-f780-4d4c-b746-268fd80f23e6", + "ca9e194d-baac-474f-a9a8-ef63a3ad27b4", + "f43b462f-5ecc-4b75-b698-143552e461c5", + "81a676d0-2280-41d7-8347-02b985aca60a", + "bf43e5c8-3b22-4fce-9252-2cadcfd378d6", + "feb096b2-68ae-401e-b631-01eba5e25198", + "49b532f5-5337-43c9-ac6e-bf68aef8b151", + "61a51c7a-0308-4d55-9be6-656676910dcb", + "01249ca1-2845-43c6-9273-fcf3b9176c93", + "16b681bc-a5b3-4a04-a3e0-3b1b0ed98e72", + "1d0c6822-4971-4a7d-a58a-aee54b4c673e", + "6ec8f786-0693-472b-b7ed-508df593b6cc", + "fd138546-7310-4478-a440-6625a6a65e9b", + "8d9d3f7e-e589-4633-8ac4-e0755ce165df", + "f1be8b42-12ac-4b91-8487-35f684c6f9f0", + "dd13c2e6-35bd-4786-a33e-5a894e9f06ea", + "2a5ec755-8c70-4e98-a053-bc0a7a476973", + "b6ba47fc-5b6a-4328-a14e-213bdcbc8ffc", + "fe6e34c4-5cd3-474d-b873-8eaaf21de30d", + "a22c1389-ec7f-4814-bfce-82c3954c4e01", + "965fe7e0-df10-46c3-831d-d9ea666e77b2", + "a190190d-506f-496e-8ef3-7613e4526463", + "18cb6150-f5fd-4ff7-9e04-e41c9d35df6c", + "4d682a4e-9725-46bc-b424-a58ac6af9b5d", + "81befa1e-7791-406c-b8db-30be90e801ea", + "26477ec1-eba1-4a14-90f8-3ed4be08be33", + "5544eccf-471d-47b4-b373-20415da96250", + "45955050-570e-4153-b50d-065efa066dbc", + "7e6bda4a-529a-4e05-85dc-485512ca1047", + "e79648af-3a90-4791-ae13-153499b8b546", + "41ec7b29-00fd-43bc-ae63-061ac755ef84", + "b05419d7-5fbe-43b9-9e11-9a8e8eac8a50", + "2011e5e2-b3b4-49ce-92d6-e23bde8ffa66", + "f1b67088-0b35-41f4-8d15-fcf5849a8bdf", + "268d2aad-6d66-4044-b22a-67d1bd9abaf8", + "09c472f9-6f4a-4e9c-988e-c84331b41d58", + "bf138735-4fd0-4fa6-a904-e87bee8ad117", + "3ac6a799-1468-40c6-a5cd-3b67d7302a48", + "a1cead01-c634-4483-96a2-7b9e00ab529f", + "1e46e8c3-a532-4dc2-8fac-dd697edbcaf3", + "870bc22e-5458-46c2-bf4b-7dd727c81726", + "f9d41a31-83df-4360-a9ca-20b8df77a983", + "cee27151-ee3c-4d22-abe2-309947fb7aa1", + "f07e3c51-0a06-44f4-a6d9-d3c2a51e10a0", + "c45138a7-33d6-47b7-99ee-2751180050cc", + "bf9fc641-30eb-4168-8cc9-b6caf473de45", + "d63f5810-770f-4cf9-af43-915f71c79399", + "10653d3f-7394-44c3-88ff-6fed2bda5f87", + "c2b39c78-eb9a-485e-9e6a-a282fe60faae", + "e900429c-b3b1-4043-bcd1-8f6b1766535d", + "06db01fb-6d06-403c-b6e9-75eabd349765", + "7ac6a762-7883-4268-b664-06d05d831c11", + "507388a9-f65a-4e8d-a781-767b6fd12796", + "704d3fde-418a-40c6-901c-f2ec1df25116", + "30b92827-f2b4-43c9-9029-61b90866a002", + "61fa991e-e7a6-491a-b764-43933b8a7265", + "81a266df-d7df-408e-9a94-cba542d18272", + "3fe7fcbd-1deb-4835-bdcf-00a93b02495c", + "616d3b29-597b-49fa-b64e-471eb4fccf84", + "cbd6e838-ec4d-42c5-ab74-5218945d2786", + "9907a06a-6f54-4560-afe9-39054a0dd700", + "3002d1db-8457-4f17-852d-c883cb37b57d", + "36339765-ccbd-4027-bdc3-764071394c4b", + "2d8dbcb1-44e0-4b80-b0b4-3c50f7d41bd5", + "4aba26e6-0998-419e-8451-cfbf400df221", + "42b25c7a-8bda-4d9d-a3f0-950a2eaf20c3", + "803ca8fa-6239-473e-8c10-55996e7bb37b", + "b8a9c932-bbb1-4aa0-b951-37c0143e9cb9", + "524bd778-9322-41fe-b206-20dd5c3dfce5", + "dd5df6f8-3fe5-455d-af3f-e5c98b2d8b56" ], "affected_row_count": 304 } diff --git a/demo/fixtures/orphaned_records/manifest.json b/demo/fixtures/orphaned_records/manifest.json index a020a5a84..0784d720b 100644 --- a/demo/fixtures/orphaned_records/manifest.json +++ b/demo/fixtures/orphaned_records/manifest.json @@ -1,7 +1,7 @@ { "name": "orphaned_records", "description": "User deletion job ran before order archival", - "created_at": "2026-01-17T03:54:55.554720Z", + "created_at": "2026-01-19T00:42:46.228449Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" @@ -48,44 +48,44 @@ ], "ground_truth": { "affected_order_ids": [ - "81f4ff5d-6ec9-4afd-a93a-f709b92b7374", - "aaf7c1c8-4721-41dd-a331-787f4918bcf3", - "384ca1e0-fc10-422c-9d99-3957ab637faf", - "92def114-277e-431c-b786-39272716a9a0", - "76ba05ea-11e2-4c88-92e1-7e47d23ca66d", - "aebc14b2-79ef-4ba7-b708-ef90b498f739", - "5d43e6d1-5c26-40c5-aba5-0f43e87385e7", - "034580c3-caf4-4695-b545-ec0c23c172c1", - "4a7ce359-eb77-48bb-aab7-f06c4fbc9d7c", - "eb9790f3-9ae8-435a-a0fc-4dea5f838c89", - "687dba83-0d5d-4fee-867f-850dfb6f7dbd", - "87a2cce3-5f8d-4e8f-80f3-423c6f303bbf", - "480a0756-e73d-47c9-8738-4a98d8911c31", - "f7227fcd-91ca-45fb-a591-bb34727b9f18", - "d36c0e64-9d4b-4531-b091-90be6f653363", - "0f7db5e0-075f-4545-b0aa-77d94fc5d075", - "2a35073a-1f7b-467c-8185-604e7ee949af", - "a92e8aca-bb6b-4bfe-99b7-d4cca6a48f06", - "948308fd-d8f6-44fd-bbc8-f0c62b5204c0", - "fcb1ce77-694f-4111-85c5-5b9825833396", - "ba15a053-159a-4ec6-a96e-fc740fb69f8d", - "46d21f32-6828-4917-9368-0875155ed936", - "bb8b1437-4003-4d0e-936c-64054f67ccbc", - "33407ed5-a828-46ae-a904-a178271ab06e", - "15c17342-0928-4dd5-a565-0a7da545f875", - "55e489bf-83a8-4371-81c9-1bb6e0cc79ee", - "aaa925f7-6450-49cc-9003-96aeb18c62ff", - "5e4f7684-6ae2-4bf5-b365-bdba35e1fe46", - "0df38eef-4567-4277-a98e-cc7181ae2b76", - "c1bfca6c-c06f-4540-9c36-640c599f1c1f", - "5d7ce34a-038a-43fe-8ca0-746d8ffae2e0", - "8b7e72ad-d430-47e4-9a7b-0b411e59e0d8", - "af66b87f-7e6b-48e1-902d-4f01460c2faf", - "137f9a12-d5aa-43cd-b3da-dc713c4618dd", - "f0869854-98a1-4cc8-9bdb-5f8b27ac2413", - "c3fbe30d-35c3-4de3-a53e-67908ac94a50", - "0ad55c45-b977-4711-b7bb-9110911405d2", - "c730e550-9d82-4a1e-9e5b-9a53a9ded987" + "b8a9c932-bbb1-4aa0-b951-37c0143e9cb9", + "0336e969-2575-44cc-ab30-d31344dd7f6f", + "f5ce49f7-7eee-4a98-8e5e-b193333a6342", + "f7cbc5b6-f330-483e-a3b3-9fa70f100ec5", + "63ac590e-2be4-47db-8f72-145e77f4ae44", + "a87c1cd1-aef8-4b4a-8670-8f5864a56f97", + "81a222df-2908-42a5-aef0-747b80f2ce57", + "32331874-c4e3-4870-b3f7-0b8aada7730b", + "344a6128-673b-4a9c-bdc1-03d73195b4a6", + "b3c0d9a3-f057-47db-aa93-2a64c9afbd58", + "ce8d61b4-d21e-4581-a31f-efdbfb763f2e", + "24499eda-99c4-470f-8849-d4ef8cdda3ae", + "2e53cea8-66fc-47ba-9c0b-5dbe92cc9ece", + "4dd42145-c49a-4e1d-b01b-2d6ebac669c4", + "5d6cfaaa-74a4-4d81-a641-b5a067387a49", + "33840d1c-4184-4b9c-920e-8e501911686b", + "828bd949-9173-4841-820b-6ccb1868e805", + "0917dbe7-e6dc-4674-aa32-20f74b7c7d30", + "cb6e42a3-03c5-4e4e-b8a6-f0aeaf523393", + "841a99d5-70df-4038-ab8a-966bbe40a3c0", + "e9ffdf1a-a07c-4904-a5ff-1458742b9e39", + "f623529b-a826-4d22-92ec-f09f6dd7d407", + "1b56d858-6b8e-43f1-afbf-b255525c2ec1", + "144704eb-6336-441d-acc3-2b35c7b833af", + "82df0f8b-a9e1-4396-b868-8dafcefc9fe4", + "ab4cb2a6-207e-49f2-8f01-6b126e8f559a", + "2aa6aa33-b204-4775-844e-445aa68521fa", + "f7e697b5-71ab-4fc3-ae3a-4fcba9ecbb30", + "148e390d-0b8e-4443-9f19-46aa44cbd3e5", + "8fd60cea-580a-4c5a-a1e1-1071e88687ab", + "4796467d-e4b3-42bc-b21f-2cc48faaac47", + "deb2207d-4d47-4df8-af53-c673cc1d717f", + "cdfcb7a6-474d-4ce8-a74d-1a32b948c4d8", + "3496e9fc-23f4-4238-b864-9ebd047ad3a8", + "cc610603-9c76-4866-aded-e8b6b3f05a5c", + "3b6aacfa-11e8-40ae-a784-626a654c95d9", + "fd198fcb-8de8-4f32-8ea3-7b09bfd8f23b", + "e9b1c78e-1da6-43e9-a919-246827d23a2a" ], "orphaned_order_count": 38, "deleted_user_count": 38 diff --git a/demo/fixtures/schema_drift/manifest.json b/demo/fixtures/schema_drift/manifest.json index c4802f17d..f4c49733b 100644 --- a/demo/fixtures/schema_drift/manifest.json +++ b/demo/fixtures/schema_drift/manifest.json @@ -1,7 +1,7 @@ { "name": "schema_drift", "description": "New product import job inserts price as string with currency", - "created_at": "2026-01-17T03:54:52.844917Z", + "created_at": "2026-01-19T00:42:43.418133Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" diff --git a/demo/fixtures/volume_drop/manifest.json b/demo/fixtures/volume_drop/manifest.json index a7d1797d9..e2f1871ca 100644 --- a/demo/fixtures/volume_drop/manifest.json +++ b/demo/fixtures/volume_drop/manifest.json @@ -1,7 +1,7 @@ { "name": "volume_drop", "description": "CDN misconfiguration blocked tracking pixel for EU users", - "created_at": "2026-01-17T03:54:52.804325Z", + "created_at": "2026-01-19T00:42:43.384917Z", "simulation_period": { "start": "2026-01-08", "end": "2026-01-14" diff --git a/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesDatasourcesTestConnectionResponse.ts b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesDatasourcesTestConnectionResponse.ts new file mode 100644 index 000000000..b38503106 --- /dev/null +++ b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesDatasourcesTestConnectionResponse.ts @@ -0,0 +1,19 @@ +/** + * Generated by orval v6.31.0 🍺 + * Do not edit manually. + * dataing + * Autonomous Data Quality Investigation + * OpenAPI spec version: 2.0.0 + */ +import type { DataingEntrypointsApiRoutesDatasourcesTestConnectionResponseLatencyMs } from "./dataingEntrypointsApiRoutesDatasourcesTestConnectionResponseLatencyMs"; +import type { DataingEntrypointsApiRoutesDatasourcesTestConnectionResponseServerVersion } from "./dataingEntrypointsApiRoutesDatasourcesTestConnectionResponseServerVersion"; + +/** + * Response for testing a connection. + */ +export interface DataingEntrypointsApiRoutesDatasourcesTestConnectionResponse { + latency_ms?: DataingEntrypointsApiRoutesDatasourcesTestConnectionResponseLatencyMs; + message: string; + server_version?: DataingEntrypointsApiRoutesDatasourcesTestConnectionResponseServerVersion; + success: boolean; +} diff --git a/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesDatasourcesTestConnectionResponseLatencyMs.ts b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesDatasourcesTestConnectionResponseLatencyMs.ts new file mode 100644 index 000000000..10ba19e74 --- /dev/null +++ b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesDatasourcesTestConnectionResponseLatencyMs.ts @@ -0,0 +1,10 @@ +/** + * Generated by orval v6.31.0 🍺 + * Do not edit manually. + * dataing + * Autonomous Data Quality Investigation + * OpenAPI spec version: 2.0.0 + */ + +export type DataingEntrypointsApiRoutesDatasourcesTestConnectionResponseLatencyMs = + number | null; diff --git a/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesDatasourcesTestConnectionResponseServerVersion.ts b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesDatasourcesTestConnectionResponseServerVersion.ts new file mode 100644 index 000000000..c0a273ef6 --- /dev/null +++ b/frontend/app/src/lib/api/model/dataingEntrypointsApiRoutesDatasourcesTestConnectionResponseServerVersion.ts @@ -0,0 +1,10 @@ +/** + * Generated by orval v6.31.0 🍺 + * Do not edit manually. + * dataing + * Autonomous Data Quality Investigation + * OpenAPI spec version: 2.0.0 + */ + +export type DataingEntrypointsApiRoutesDatasourcesTestConnectionResponseServerVersion = + string | null; diff --git a/frontend/app/src/lib/api/model/index.ts b/frontend/app/src/lib/api/model/index.ts index 24161546e..5f22cbd94 100644 --- a/frontend/app/src/lib/api/model/index.ts +++ b/frontend/app/src/lib/api/model/index.ts @@ -84,6 +84,9 @@ export * from "./dataSourceResponseLastHealthCheckAt"; export * from "./dataingEntrypointsApiRoutesCredentialsTestConnectionResponse"; export * from "./dataingEntrypointsApiRoutesCredentialsTestConnectionResponseError"; export * from "./dataingEntrypointsApiRoutesCredentialsTestConnectionResponseTablesAccessible"; +export * from "./dataingEntrypointsApiRoutesDatasourcesTestConnectionResponse"; +export * from "./dataingEntrypointsApiRoutesDatasourcesTestConnectionResponseLatencyMs"; +export * from "./dataingEntrypointsApiRoutesDatasourcesTestConnectionResponseServerVersion"; export * from "./datasetDetailResponse"; export * from "./datasetDetailResponseCatalogName"; export * from "./datasetDetailResponseColumnCount"; @@ -405,8 +408,10 @@ export * from "./tenantSettingsSlackChannel"; export * from "./testConnectionRequest"; export * from "./testConnectionRequestConfig"; export * from "./testConnectionResponse"; +export * from "./testConnectionResponseError"; export * from "./testConnectionResponseLatencyMs"; export * from "./testConnectionResponseServerVersion"; +export * from "./testConnectionResponseTablesAccessible"; export * from "./tokenResponse"; export * from "./tokenResponseOrg"; export * from "./tokenResponseOrgAnyOf"; diff --git a/frontend/app/src/lib/api/model/testConnectionResponseError.ts b/frontend/app/src/lib/api/model/testConnectionResponseError.ts new file mode 100644 index 000000000..3b625e785 --- /dev/null +++ b/frontend/app/src/lib/api/model/testConnectionResponseError.ts @@ -0,0 +1,9 @@ +/** + * Generated by orval v6.31.0 🍺 + * Do not edit manually. + * dataing + * Autonomous Data Quality Investigation + * OpenAPI spec version: 2.0.0 + */ + +export type TestConnectionResponseError = string | null; diff --git a/frontend/app/src/lib/api/model/testConnectionResponseTablesAccessible.ts b/frontend/app/src/lib/api/model/testConnectionResponseTablesAccessible.ts new file mode 100644 index 000000000..58794ab9e --- /dev/null +++ b/frontend/app/src/lib/api/model/testConnectionResponseTablesAccessible.ts @@ -0,0 +1,9 @@ +/** + * Generated by orval v6.31.0 🍺 + * Do not edit manually. + * dataing + * Autonomous Data Quality Investigation + * OpenAPI spec version: 2.0.0 + */ + +export type TestConnectionResponseTablesAccessible = number | null; diff --git a/justfile b/justfile index 981d74c44..7b34f1b73 100644 --- a/justfile +++ b/justfile @@ -10,6 +10,8 @@ default: setup: @echo "Setting up dataing (CE)..." uv sync + @echo "Building Rust bindings..." + just rust-dev @echo "Setting up frontend app..." cd frontend/app && pnpm install @echo "Setting up landing site..." @@ -19,6 +21,38 @@ setup: pre-commit install @echo "Setup complete!" +# ============================================ +# Rust Commands +# ============================================ + +# Check Rust toolchain is installed +rust-check: + @command -v cargo >/dev/null || (echo "Install Rust: https://rustup.rs" && exit 1) + +# Build Rust crates (release mode) +rust-build: rust-check + cd core && cargo build --release + +# Build and install Rust bindings to Python venv (development) +rust-dev: rust-check + #!/usr/bin/env bash + set -euo pipefail + echo "Building Rust bindings..." + cd core/bindings/python && uvx maturin develop --uv + echo "Rust bindings installed!" + +# Run Rust tests +rust-test: rust-check + cd core && cargo test + +# Run Rust clippy linter +rust-lint: rust-check + cd core && cargo clippy --workspace + +# Clean Rust build artifacts +rust-clean: + rm -rf core/target + # Install/update pre-commit hooks pre-commit-install: pre-commit install @@ -179,7 +213,7 @@ typecheck: cd frontend/app && pnpm typecheck # Clean build artifacts -clean: +clean: rust-clean rm -rf dist .pytest_cache .ruff_cache .mypy_cache rm -rf python-packages/dataing/.pytest_cache python-packages/dataing/.ruff_cache rm -rf python-packages/dataing-ee/.pytest_cache python-packages/dataing-ee/.ruff_cache diff --git a/pyproject.toml b/pyproject.toml index 1ef499c8d..f988c0c54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ license = { text = "MIT" } authors = [{ name = "dataing team" }] dependencies = [ "bond", + "investigator", "fastapi[standard]>=0.109.0", "uvicorn[standard]>=0.27.0", "pydantic[email]>=2.5.0", @@ -160,3 +161,6 @@ dev = [ [tool.uv.sources] bond = { path = "python-packages/bond", editable = true } +investigator = { path = "python-packages/investigator", editable = true } +# Note: dataing-investigator is built separately via `just rust-dev` (maturin) +# It cannot be included as a uv source because it requires native compilation diff --git a/python-packages/dataing/openapi.json b/python-packages/dataing/openapi.json index c003937ac..92ffa9a8e 100644 --- a/python-packages/dataing/openapi.json +++ b/python-packages/dataing/openapi.json @@ -12958,4 +12958,4 @@ } } } -} +} \ No newline at end of file diff --git a/python-packages/dataing/tests/integration/temporal/__init__.py b/python-packages/dataing/tests/integration/temporal/__init__.py new file mode 100644 index 000000000..d111778c3 --- /dev/null +++ b/python-packages/dataing/tests/integration/temporal/__init__.py @@ -0,0 +1 @@ +"""Temporal workflow integration tests.""" diff --git a/python-packages/dataing/tests/integration/temporal/test_investigator_workflow.py b/python-packages/dataing/tests/integration/temporal/test_investigator_workflow.py new file mode 100644 index 000000000..d48523f31 --- /dev/null +++ b/python-packages/dataing/tests/integration/temporal/test_investigator_workflow.py @@ -0,0 +1,345 @@ +"""End-to-end tests for InvestigatorWorkflow with Rust state machine. + +These tests verify the full Temporal + Rust state machine integration. +They require a running Temporal server at localhost:7233. + +Run with: pytest -m temporal +Skip if Temporal unavailable: tests will be automatically skipped. +""" + +from __future__ import annotations + +import asyncio +import os +import uuid +from typing import Any + +import pytest + +# Skip all tests if Temporal is not available +pytestmark = [ + pytest.mark.temporal, + pytest.mark.skipif( + os.environ.get("SKIP_TEMPORAL_TESTS", "1") == "1", + reason="SKIP_TEMPORAL_TESTS=1 or Temporal server not available", + ), +] + +try: + from temporalio.client import Client + from temporalio.worker import Worker + + from investigator.temporal import ( + BrainStepInput, + BrainStepOutput, + InvestigatorInput, + InvestigatorResult, + InvestigatorStatus, + InvestigatorWorkflow, + brain_step, + ) + + TEMPORAL_AVAILABLE = True +except ImportError: + TEMPORAL_AVAILABLE = False + Client = None # type: ignore[misc, assignment] + Worker = None # type: ignore[misc, assignment] + + +TASK_QUEUE = "test-investigator-queue" + + +@pytest.fixture +async def temporal_client() -> Client: + """Connect to Temporal server.""" + if not TEMPORAL_AVAILABLE: + pytest.skip("temporalio not installed") + + try: + client = await Client.connect("localhost:7233") + return client + except Exception as e: + pytest.skip(f"Temporal server not available: {e}") + + +@pytest.fixture +async def worker(temporal_client: Client): + """Start a worker for the test queue.""" + async with Worker( + temporal_client, + task_queue=TASK_QUEUE, + workflows=[InvestigatorWorkflow], + activities=[brain_step], + ): + yield + + +@pytest.fixture +def test_scope() -> dict[str, Any]: + """Create a test scope.""" + return { + "user_id": "test-user", + "tenant_id": "test-tenant", + "permissions": ["orders", "customers"], + } + + +class TestInvestigatorWorkflowE2E: + """End-to-end tests for the InvestigatorWorkflow.""" + + @pytest.mark.asyncio + async def test_full_investigation_lifecycle( + self, temporal_client: Client, worker: None, test_scope: dict[str, Any] + ) -> None: + """Test complete investigation from start to finish.""" + workflow_id = f"test-investigation-{uuid.uuid4()}" + + handle = await temporal_client.start_workflow( + InvestigatorWorkflow.run, + InvestigatorInput( + investigation_id=workflow_id, + objective="Find the root cause of null spike in orders table", + scope=test_scope, + ), + id=workflow_id, + task_queue=TASK_QUEUE, + ) + + # Wait for result with timeout + result: InvestigatorResult = await asyncio.wait_for( + handle.result(), timeout=60.0 + ) + + # Verify result + assert result.investigation_id == workflow_id + assert result.status == "completed" + assert result.insight is not None + assert result.steps > 0 + assert result.trace_id != "" + + @pytest.mark.asyncio + async def test_query_status( + self, temporal_client: Client, worker: None, test_scope: dict[str, Any] + ) -> None: + """Test querying workflow status.""" + workflow_id = f"test-query-{uuid.uuid4()}" + + handle = await temporal_client.start_workflow( + InvestigatorWorkflow.run, + InvestigatorInput( + investigation_id=workflow_id, + objective="Test status query", + scope=test_scope, + ), + id=workflow_id, + task_queue=TASK_QUEUE, + ) + + # Query status while running + await asyncio.sleep(0.1) # Give workflow time to start + status: InvestigatorStatus = await handle.query( + InvestigatorWorkflow.get_status + ) + + assert status.investigation_id == workflow_id + assert status.step >= 0 + assert not status.is_terminal # Should still be running + + # Wait for completion + await asyncio.wait_for(handle.result(), timeout=60.0) + + @pytest.mark.asyncio + async def test_cancel_signal( + self, temporal_client: Client, worker: None, test_scope: dict[str, Any] + ) -> None: + """Test cancelling investigation via signal.""" + workflow_id = f"test-cancel-{uuid.uuid4()}" + + handle = await temporal_client.start_workflow( + InvestigatorWorkflow.run, + InvestigatorInput( + investigation_id=workflow_id, + objective="Test cancellation", + scope=test_scope, + ), + id=workflow_id, + task_queue=TASK_QUEUE, + ) + + # Give workflow time to start + await asyncio.sleep(0.1) + + # Send cancel signal + await handle.signal(InvestigatorWorkflow.cancel) + + # Wait for result + result: InvestigatorResult = await asyncio.wait_for( + handle.result(), timeout=10.0 + ) + + assert result.status == "cancelled" + + @pytest.mark.asyncio + async def test_deterministic_replay( + self, temporal_client: Client, worker: None, test_scope: dict[str, Any] + ) -> None: + """Verify workflow replays deterministically. + + This test runs the same workflow twice and verifies consistent results. + Temporal's replay mechanism ensures deterministic execution. + """ + # First run + workflow_id_1 = f"test-replay-1-{uuid.uuid4()}" + handle_1 = await temporal_client.start_workflow( + InvestigatorWorkflow.run, + InvestigatorInput( + investigation_id=workflow_id_1, + objective="Deterministic test", + scope=test_scope, + ), + id=workflow_id_1, + task_queue=TASK_QUEUE, + ) + result_1: InvestigatorResult = await asyncio.wait_for( + handle_1.result(), timeout=60.0 + ) + + # Second run with same input + workflow_id_2 = f"test-replay-2-{uuid.uuid4()}" + handle_2 = await temporal_client.start_workflow( + InvestigatorWorkflow.run, + InvestigatorInput( + investigation_id=workflow_id_2, + objective="Deterministic test", + scope=test_scope, + ), + id=workflow_id_2, + task_queue=TASK_QUEUE, + ) + result_2: InvestigatorResult = await asyncio.wait_for( + handle_2.result(), timeout=60.0 + ) + + # Both should complete with same status + assert result_1.status == result_2.status == "completed" + # Same number of steps (deterministic) + assert result_1.steps == result_2.steps + # Same insight (deterministic state machine) + assert result_1.insight == result_2.insight + + +class TestBrainStepActivity: + """Unit tests for the brain_step activity.""" + + @pytest.mark.asyncio + async def test_brain_step_new_investigator(self) -> None: + """Test brain_step with new investigator.""" + if not TEMPORAL_AVAILABLE: + pytest.skip("temporalio not installed") + + import json + + start_event = json.dumps({ + "type": "Start", + "payload": { + "objective": "Test", + "scope": { + "user_id": "u1", + "tenant_id": "t1", + "permissions": [], + }, + }, + }) + + input_data = BrainStepInput(state_json=None, event_json=start_event) + + # Call activity directly (not through Temporal) + result = await brain_step(input_data) + + assert result.new_state_json is not None + assert result.intent["type"] == "Call" + assert result.intent["payload"]["name"] == "get_schema" + + @pytest.mark.asyncio + async def test_brain_step_restore_and_continue(self) -> None: + """Test brain_step with restored state.""" + if not TEMPORAL_AVAILABLE: + pytest.skip("temporalio not installed") + + import json + + # First step to get initial state + start_event = json.dumps({ + "type": "Start", + "payload": { + "objective": "Test", + "scope": {"user_id": "u1", "tenant_id": "t1", "permissions": []}, + }, + }) + + result1 = await brain_step(BrainStepInput(state_json=None, event_json=start_event)) + call_id = result1.intent["payload"]["call_id"] + + # Second step with CallResult + call_result_event = json.dumps({ + "type": "CallResult", + "payload": { + "call_id": call_id, + "output": {"tables": [{"name": "orders"}]}, + }, + }) + + result2 = await brain_step( + BrainStepInput( + state_json=result1.new_state_json, + event_json=call_result_event, + ) + ) + + # Should progress to next phase + assert result2.intent["type"] == "Call" + assert result2.intent["payload"]["name"] == "generate_hypotheses" + + +class TestSignalDeduplication: + """Test signal deduplication in the workflow.""" + + @pytest.mark.asyncio + async def test_duplicate_signals_ignored( + self, temporal_client: Client, worker: None, test_scope: dict[str, Any] + ) -> None: + """Test that duplicate signals are ignored.""" + workflow_id = f"test-dedup-{uuid.uuid4()}" + + handle = await temporal_client.start_workflow( + InvestigatorWorkflow.run, + InvestigatorInput( + investigation_id=workflow_id, + objective="Test deduplication", + scope=test_scope, + ), + id=workflow_id, + task_queue=TASK_QUEUE, + ) + + # Send the same signal multiple times with same ID + signal_id = f"sig-{uuid.uuid4()}" + await handle.signal( + InvestigatorWorkflow.user_response, signal_id, "response-1" + ) + await handle.signal( + InvestigatorWorkflow.user_response, signal_id, "response-2" + ) + await handle.signal( + InvestigatorWorkflow.user_response, signal_id, "response-3" + ) + + # Cancel to end the workflow + await handle.signal(InvestigatorWorkflow.cancel) + + result: InvestigatorResult = await asyncio.wait_for( + handle.result(), timeout=10.0 + ) + + # Workflow should complete (was cancelled) + assert result.status == "cancelled" diff --git a/python-packages/investigator/pyproject.toml b/python-packages/investigator/pyproject.toml new file mode 100644 index 000000000..8e5924de9 --- /dev/null +++ b/python-packages/investigator/pyproject.toml @@ -0,0 +1,23 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "investigator" +version = "0.1.0" +description = "Rust-powered investigation state machine runtime" +requires-python = ">=3.11" +dependencies = [] +# Note: dataing-investigator (Rust bindings) is installed separately via maturin +# It cannot be listed as a dependency because it requires native compilation + +[project.optional-dependencies] +temporal = ["temporalio>=1.0.0"] +dev = ["pytest>=8.0.0", "pytest-asyncio>=0.23.0"] + +[tool.hatch.build.targets.wheel] +packages = ["src/investigator"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] diff --git a/python-packages/investigator/src/investigator/__init__.py b/python-packages/investigator/src/investigator/__init__.py new file mode 100644 index 000000000..a955c6cdf --- /dev/null +++ b/python-packages/investigator/src/investigator/__init__.py @@ -0,0 +1,92 @@ +"""Investigator - Rust-powered investigation state machine runtime. + +This package provides a Python interface to the Rust state machine for +data quality investigations. The state machine manages the investigation +lifecycle with deterministic transitions and versioned snapshots. + +Example: + >>> from investigator import Investigator + >>> inv = Investigator() + >>> print(inv.current_phase()) + 'init' +""" + +from dataing_investigator import ( + Investigator, + InvalidTransitionError, + SerializationError, + StateError, + protocol_version, +) + +from investigator.envelope import ( + Envelope, + create_child_envelope, + create_trace, + extract_trace_id, + unwrap, + wrap, +) +from investigator.runtime import ( + InvestigationError, + LocalInvestigator, + run_local, +) +from investigator.security import ( + SecurityViolation, + create_scope, + validate_tool_call, +) +# Temporal integration (requires temporalio) +try: + from investigator.temporal import ( + BrainStepInput, + BrainStepOutput, + InvestigatorInput, + InvestigatorResult, + InvestigatorStatus, + InvestigatorWorkflow, + brain_step, + ) + + _HAS_TEMPORAL = True +except ImportError: + _HAS_TEMPORAL = False + +__all__ = [ + # Rust bindings + "Investigator", + "StateError", + "SerializationError", + "InvalidTransitionError", + "protocol_version", + # Envelope + "Envelope", + "wrap", + "unwrap", + "create_trace", + "extract_trace_id", + "create_child_envelope", + # Security + "SecurityViolation", + "validate_tool_call", + "create_scope", + # Runtime + "run_local", + "LocalInvestigator", + "InvestigationError", +] + +# Add temporal exports if available +if _HAS_TEMPORAL: + __all__ += [ + "InvestigatorWorkflow", + "InvestigatorInput", + "InvestigatorResult", + "InvestigatorStatus", + "brain_step", + "BrainStepInput", + "BrainStepOutput", + ] + +__version__ = "0.1.0" diff --git a/python-packages/investigator/src/investigator/envelope.py b/python-packages/investigator/src/investigator/envelope.py new file mode 100644 index 000000000..6b7712231 --- /dev/null +++ b/python-packages/investigator/src/investigator/envelope.py @@ -0,0 +1,118 @@ +"""Envelope module for distributed tracing context propagation. + +Provides correlation IDs for tracing events through the investigation +state machine and external services. +""" + +from __future__ import annotations + +import json +import uuid +from typing import Any, TypedDict + + +class Envelope(TypedDict): + """Envelope for wrapping payloads with tracing context. + + Attributes: + id: Unique identifier for this envelope. + trace_id: Trace ID linking related events. + parent_id: Optional parent envelope ID for causality tracking. + payload: The wrapped payload data. + """ + + id: str + trace_id: str + parent_id: str | None + payload: dict[str, Any] + + +def wrap( + payload: dict[str, Any], + trace_id: str, + parent_id: str | None = None, +) -> str: + """Wrap a payload in an envelope for tracing. + + Args: + payload: The data to wrap. + trace_id: The trace ID for correlation. + parent_id: Optional parent envelope ID. + + Returns: + JSON string of the envelope. + """ + envelope: Envelope = { + "id": str(uuid.uuid4()), + "trace_id": trace_id, + "parent_id": parent_id, + "payload": payload, + } + return json.dumps(envelope) + + +def unwrap(json_str: str) -> Envelope: + """Unwrap an envelope from a JSON string. + + Args: + json_str: JSON string of an envelope. + + Returns: + The parsed Envelope. + + Raises: + json.JSONDecodeError: If JSON is invalid. + KeyError: If required fields are missing. + """ + data = json.loads(json_str) + # Validate required fields + required = {"id", "trace_id", "parent_id", "payload"} + missing = required - set(data.keys()) + if missing: + raise KeyError(f"Missing envelope fields: {missing}") + return Envelope( + id=data["id"], + trace_id=data["trace_id"], + parent_id=data["parent_id"], + payload=data["payload"], + ) + + +def create_trace() -> str: + """Create a new trace ID. + + For Temporal workflows, use workflow.uuid4() instead for + deterministic replay. + + Returns: + A new UUID string for use as a trace ID. + """ + return str(uuid.uuid4()) + + +def extract_trace_id(envelope: Envelope) -> str: + """Extract the trace ID from an envelope. + + Args: + envelope: The envelope to extract from. + + Returns: + The trace ID. + """ + return envelope["trace_id"] + + +def create_child_envelope( + parent: Envelope, + payload: dict[str, Any], +) -> str: + """Create a child envelope linked to a parent. + + Args: + parent: The parent envelope. + payload: The child payload data. + + Returns: + JSON string of the child envelope. + """ + return wrap(payload, parent["trace_id"], parent["id"]) diff --git a/python-packages/investigator/src/investigator/runtime.py b/python-packages/investigator/src/investigator/runtime.py new file mode 100644 index 000000000..b08496d1f --- /dev/null +++ b/python-packages/investigator/src/investigator/runtime.py @@ -0,0 +1,350 @@ +"""Runtime module for local investigation execution. + +Provides a local execution loop for running investigations outside of Temporal. +Useful for testing and simple deployments. +""" + +from __future__ import annotations + +import json +import uuid +from typing import Any, Callable, TypeVar + +from dataing_investigator import Investigator, protocol_version + +from .envelope import create_trace +from .security import validate_tool_call + +# Type alias for tool executor function +ToolExecutor = Callable[[str, dict[str, Any]], Any] +UserResponder = Callable[[str, str], str] # (question_id, prompt) -> response + +T = TypeVar("T") + + +class InvestigationError(Exception): + """Raised when an investigation fails.""" + + pass + + +class EnvelopeBuilder: + """Builds event envelopes with monotonically increasing steps.""" + + def __init__(self) -> None: + """Initialize envelope builder.""" + self._step = 0 + + def build(self, event: dict[str, Any]) -> str: + """Build an envelope for the given event. + + Args: + event: The event payload. + + Returns: + JSON string of the envelope. + """ + self._step += 1 + envelope = { + "protocol_version": protocol_version(), + "event_id": f"evt_{uuid.uuid4().hex[:12]}", + "step": self._step, + "event": event, + } + return json.dumps(envelope) + + +async def run_local( + objective: str, + scope: dict[str, Any], + tool_executor: ToolExecutor, + user_responder: UserResponder | None = None, + max_steps: int = 100, +) -> dict[str, Any]: + """Run an investigation locally (not in Temporal). + + This provides a simple execution loop for running investigations + without the overhead of Temporal. Useful for: + - Local testing and development + - Simple deployments without durability requirements + - Debugging investigation logic + + Args: + objective: The investigation objective/description. + scope: Security scope with user_id, tenant_id, permissions. + tool_executor: Async function to execute tool calls. + Signature: (tool_name: str, args: dict) -> Any + user_responder: Optional function to get user responses for HITL. + Signature: (question_id: str, prompt: str) -> str + If None and user response is needed, raises RuntimeError. + max_steps: Maximum number of steps before aborting (prevents infinite loops). + + Returns: + Final investigation result from the Finish intent. + + Raises: + InvestigationError: If investigation fails or max_steps exceeded. + SecurityViolation: If a tool call violates security policy. + RuntimeError: If user response needed but no responder provided. + """ + inv = Investigator() + trace_id = create_trace() + envelope_builder = EnvelopeBuilder() + + # Build and send Start event + start_event = {"type": "Start", "payload": {"objective": objective, "scope": scope}} + envelope = envelope_builder.build(start_event) + intent = _ingest_and_parse(inv, envelope) + + loop_count = 0 + while loop_count < max_steps: + loop_count += 1 + + if intent["type"] == "Idle": + # State machine waiting - query without event + intent = json.loads(inv.query()) + + elif intent["type"] == "RequestCall": + payload = intent["payload"] + tool_name = payload["name"] + args = payload["args"] + + # Generate a call_id and send CallScheduled + call_id = f"call_{uuid.uuid4().hex[:12]}" + scheduled_event = { + "type": "CallScheduled", + "payload": {"call_id": call_id, "name": tool_name}, + } + envelope = envelope_builder.build(scheduled_event) + intent = _ingest_and_parse(inv, envelope) + + # Should return Idle, now execute the tool + if intent["type"] != "Idle": + raise InvestigationError( + f"Expected Idle after CallScheduled, got {intent['type']}" + ) + + # Security validation before execution + validate_tool_call(tool_name, args, scope) + + # Execute tool + try: + result = await tool_executor(tool_name, args) + except Exception as e: + # Tool execution failed - send error result + result = {"error": str(e)} + + # Send CallResult event + call_result_event = { + "type": "CallResult", + "payload": {"call_id": call_id, "output": result}, + } + envelope = envelope_builder.build(call_result_event) + intent = _ingest_and_parse(inv, envelope) + + elif intent["type"] == "RequestUser": + payload = intent["payload"] + question_id = payload["question_id"] + prompt = payload["prompt"] + + if user_responder is None: + raise RuntimeError( + f"User response required but no responder provided. Prompt: {prompt}" + ) + + # Get user response + response = user_responder(question_id, prompt) + + # Send UserResponse event + user_response_event = { + "type": "UserResponse", + "payload": {"question_id": question_id, "content": response}, + } + envelope = envelope_builder.build(user_response_event) + intent = _ingest_and_parse(inv, envelope) + + elif intent["type"] == "Finish": + # Success - return the insight + return { + "status": "completed", + "insight": intent["payload"]["insight"], + "steps": loop_count, + "trace_id": trace_id, + } + + elif intent["type"] == "Error": + # Investigation failed + raise InvestigationError(intent["payload"]["message"]) + + else: + raise InvestigationError(f"Unknown intent type: {intent['type']}") + + raise InvestigationError(f"Investigation exceeded max_steps ({max_steps})") + + +def _ingest_and_parse(inv: Investigator, envelope_json: str) -> dict[str, Any]: + """Ingest an envelope and parse the resulting intent. + + Args: + inv: The Investigator instance. + envelope_json: JSON string of the envelope. + + Returns: + Parsed intent dictionary. + """ + intent_json = inv.ingest(envelope_json) + result: dict[str, Any] = json.loads(intent_json) + return result + + +class LocalInvestigator: + """Wrapper providing stateful investigation control. + + For more fine-grained control over the investigation loop, + use this class instead of run_local(). + + Example: + >>> inv = LocalInvestigator() + >>> intent = inv.start("Find null spike", scope) + >>> while not inv.is_terminal: + ... intent = inv.current_intent() + ... if intent["type"] == "RequestCall": + ... call_id = inv.schedule_call(intent["payload"]["name"]) + ... result = execute_tool(intent["payload"]) + ... intent = inv.send_call_result(call_id, result) + """ + + def __init__(self) -> None: + """Initialize a new local investigator.""" + self._inv = Investigator() + self._trace_id = create_trace() + self._envelope_builder = EnvelopeBuilder() + self._started = False + + @property + def is_terminal(self) -> bool: + """Check if investigation is in a terminal state.""" + return self._inv.is_terminal() + + @property + def current_phase(self) -> str: + """Get the current investigation phase.""" + return self._inv.current_phase() + + @property + def trace_id(self) -> str: + """Get the trace ID for this investigation.""" + return self._trace_id + + def start(self, objective: str, scope: dict[str, Any]) -> dict[str, Any]: + """Start the investigation with the given objective. + + Args: + objective: Investigation objective. + scope: Security scope. + + Returns: + The first intent after starting. + """ + if self._started: + raise RuntimeError("Investigation already started") + + event = {"type": "Start", "payload": {"objective": objective, "scope": scope}} + envelope = self._envelope_builder.build(event) + intent = _ingest_and_parse(self._inv, envelope) + self._started = True + return intent + + def current_intent(self) -> dict[str, Any]: + """Get the current intent without sending an event. + + Returns: + The current intent. + """ + intent_json = self._inv.query() + return json.loads(intent_json) + + def schedule_call(self, name: str) -> str: + """Schedule a call by sending CallScheduled event. + + Args: + name: Name of the tool being scheduled. + + Returns: + The generated call_id. + """ + call_id = f"call_{uuid.uuid4().hex[:12]}" + event = { + "type": "CallScheduled", + "payload": {"call_id": call_id, "name": name}, + } + envelope = self._envelope_builder.build(event) + _ingest_and_parse(self._inv, envelope) + return call_id + + def send_call_result(self, call_id: str, output: Any) -> dict[str, Any]: + """Send a CallResult event. + + Args: + call_id: ID of the completed call. + output: Result of the tool execution. + + Returns: + The next intent. + """ + event = { + "type": "CallResult", + "payload": {"call_id": call_id, "output": output}, + } + envelope = self._envelope_builder.build(event) + return _ingest_and_parse(self._inv, envelope) + + def send_user_response(self, question_id: str, content: str) -> dict[str, Any]: + """Send a UserResponse event. + + Args: + question_id: ID of the question being answered. + content: User's response content. + + Returns: + The next intent. + """ + event = { + "type": "UserResponse", + "payload": {"question_id": question_id, "content": content}, + } + envelope = self._envelope_builder.build(event) + return _ingest_and_parse(self._inv, envelope) + + def cancel(self) -> dict[str, Any]: + """Cancel the investigation. + + Returns: + The Error intent after cancellation. + """ + event = {"type": "Cancel"} + envelope = self._envelope_builder.build(event) + return _ingest_and_parse(self._inv, envelope) + + def snapshot(self) -> str: + """Get a JSON snapshot of the current state. + + Returns: + JSON string of the state. + """ + return self._inv.snapshot() + + @classmethod + def restore(cls, state_json: str) -> "LocalInvestigator": + """Restore from a saved snapshot. + + Args: + state_json: JSON string of a saved state. + + Returns: + A LocalInvestigator restored to the saved state. + """ + instance = cls() + instance._inv = Investigator.restore(state_json) + instance._started = True + return instance diff --git a/python-packages/investigator/src/investigator/security.py b/python-packages/investigator/src/investigator/security.py new file mode 100644 index 000000000..426d8bf4d --- /dev/null +++ b/python-packages/investigator/src/investigator/security.py @@ -0,0 +1,166 @@ +"""Security module with deny-by-default tool call validation. + +Provides defense-in-depth validation for tool calls before they +reach any database or external service. +""" + +from __future__ import annotations + +from typing import Any + + +class SecurityViolation(Exception): + """Raised when a tool call violates security policy.""" + + pass + + +# Default forbidden SQL patterns (deny-by-default) +FORBIDDEN_SQL_PATTERNS: frozenset[str] = frozenset({ + "DROP", + "DELETE", + "TRUNCATE", + "ALTER", + "INSERT", + "UPDATE", + "CREATE", + "GRANT", + "REVOKE", +}) + + +def validate_tool_call( + tool_name: str, + args: dict[str, Any], + scope: dict[str, Any], +) -> None: + """Validate a tool call against the security policy. + + Defense-in-depth: this runs BEFORE hitting any database. + + Args: + tool_name: Name of the tool being called. + args: Arguments to the tool call. + scope: Security scope with permissions. + + Raises: + SecurityViolation: If the call violates security policy. + """ + # 1. Validate tool is in allowlist (if scope restricts tools) + _validate_tool_allowlist(tool_name, scope) + + # 2. Validate table access (if table_name in args) + _validate_table_access(args, scope) + + # 3. Validate query safety (if query in args) + if "query" in args: + _validate_query_safety(args["query"]) + + +def _validate_tool_allowlist(tool_name: str, scope: dict[str, Any]) -> None: + """Validate that the tool is in the allowlist. + + If scope has no allowlist, all tools are allowed (permissive default). + If scope has an allowlist, the tool must be in it. + + Args: + tool_name: Name of the tool. + scope: Security scope. + + Raises: + SecurityViolation: If tool is not in allowlist. + """ + allowed_tools = scope.get("allowed_tools") + if allowed_tools is not None and tool_name not in allowed_tools: + raise SecurityViolation(f"Tool '{tool_name}' not in allowlist") + + +def _validate_table_access(args: dict[str, Any], scope: dict[str, Any]) -> None: + """Validate table access permissions. + + Args: + args: Tool arguments. + scope: Security scope with permissions list. + + Raises: + SecurityViolation: If access denied to table. + """ + if "table_name" not in args: + return + + table = args["table_name"] + allowed_tables = scope.get("permissions", []) + + # Deny-by-default: if no permissions specified, deny all + if not allowed_tables: + raise SecurityViolation(f"No table permissions granted, access denied to '{table}'") + + if table not in allowed_tables: + raise SecurityViolation(f"Access denied to table '{table}'") + + +def _validate_query_safety(query: str) -> None: + """Check for obviously dangerous SQL patterns. + + This is a defense-in-depth check, not a complete SQL parser. + The underlying database adapter should also enforce read-only access. + + Args: + query: SQL query string. + + Raises: + SecurityViolation: If forbidden pattern detected. + """ + query_upper = query.upper() + for pattern in FORBIDDEN_SQL_PATTERNS: + # Check for pattern as a word (not substring of another word) + # e.g., "DROP" should match " DROP " but not "DROPBOX" + if _word_in_query(pattern, query_upper): + raise SecurityViolation(f"Forbidden SQL pattern: {pattern}") + + +def _word_in_query(word: str, query_upper: str) -> bool: + """Check if a word appears in the query as a keyword. + + Simple check that looks for the word surrounded by non-alphanumeric chars. + + Args: + word: The keyword to check for (uppercase). + query_upper: The query string (uppercase). + + Returns: + True if the word appears as a keyword. + """ + import re + # Match word boundaries + pattern = rf"\b{word}\b" + return bool(re.search(pattern, query_upper)) + + +def create_scope( + user_id: str, + tenant_id: str, + permissions: list[str] | None = None, + allowed_tools: list[str] | None = None, +) -> dict[str, Any]: + """Create a security scope dictionary. + + Helper function for constructing scope objects. + + Args: + user_id: User identifier. + tenant_id: Tenant identifier. + permissions: List of allowed table names. + allowed_tools: Optional list of allowed tool names. + + Returns: + Scope dictionary for use with validate_tool_call. + """ + scope: dict[str, Any] = { + "user_id": user_id, + "tenant_id": tenant_id, + "permissions": permissions or [], + } + if allowed_tools is not None: + scope["allowed_tools"] = allowed_tools + return scope diff --git a/python-packages/investigator/src/investigator/temporal.py b/python-packages/investigator/src/investigator/temporal.py new file mode 100644 index 000000000..288713ffe --- /dev/null +++ b/python-packages/investigator/src/investigator/temporal.py @@ -0,0 +1,421 @@ +"""Temporal workflow integration for the Rust state machine. + +This module provides Temporal workflow and activity definitions that use +the Rust Investigator state machine for durable, deterministic execution. + +Example usage: + ```python + from investigator.temporal import ( + InvestigatorWorkflow, + InvestigatorInput, + brain_step, + ) + + # Register workflow and activity with worker + worker = Worker( + client, + task_queue="investigator", + workflows=[InvestigatorWorkflow], + activities=[brain_step], + ) + ``` +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from datetime import timedelta +from typing import Any + +from temporalio import activity, workflow + +with workflow.unsafe.imports_passed_through(): + from dataing_investigator import Investigator + from investigator.security import SecurityViolation, validate_tool_call + + +# === Activity Definitions === + + +@dataclass +class BrainStepInput: + """Input for the brain_step activity.""" + + state_json: str | None + event_json: str + + +@dataclass +class BrainStepOutput: + """Output from the brain_step activity.""" + + new_state_json: str + intent: dict[str, Any] + + +@activity.defn +async def brain_step(input: BrainStepInput) -> BrainStepOutput: + """Execute one step of the state machine. + + This activity is the core of the investigation loop. It: + 1. Restores state from JSON (or creates new state) + 2. Ingests the event + 3. Returns the new state and intent + + The activity is pure computation - no side effects. + Side effects (tool calls) happen in the workflow. + """ + if input.state_json: + inv = Investigator.restore(input.state_json) + else: + inv = Investigator() + + intent_json = inv.ingest(input.event_json) + + return BrainStepOutput( + new_state_json=inv.snapshot(), + intent=json.loads(intent_json), + ) + + +# === Workflow Definitions === + + +@dataclass +class InvestigatorInput: + """Input for starting an investigator workflow.""" + + investigation_id: str + objective: str + scope: dict[str, Any] + # For continue_as_new resumption + checkpoint_state: str | None = None + checkpoint_step: int = 0 + + +@dataclass +class InvestigatorResult: + """Result of a completed investigation.""" + + investigation_id: str + status: str # "completed", "failed", "cancelled" + insight: str | None = None + error: str | None = None + steps: int = 0 + trace_id: str = "" + + +@dataclass +class InvestigatorStatus: + """Status returned by the get_status query.""" + + investigation_id: str + phase: str + step: int + is_terminal: bool + awaiting_user: bool + current_question: str | None + + +@workflow.defn +class InvestigatorWorkflow: + """Temporal workflow using the Rust Investigator state machine. + + This workflow demonstrates the integration pattern: + - State machine logic runs in activities (pure computation) + - Tool execution happens in the workflow (side effects) + - HITL via signals/queries + - Signal dedup via seen_signal_ids + - continue_as_new at step threshold + + Signals: + - user_response(signal_id, content): Submit user response + - cancel(): Cancel the investigation + + Queries: + - get_status(): Get current investigation status + """ + + # Step threshold for continue_as_new + MAX_STEPS_BEFORE_CONTINUE = 100 + + def __init__(self) -> None: + """Initialize workflow state.""" + self._state_json: str | None = None + self._current_phase = "init" + self._step = 0 + self._is_terminal = False + self._awaiting_user = False + self._current_question: str | None = None + self._user_response_queue: list[str] = [] + self._seen_signal_ids: set[str] = set() + self._cancelled = False + self._investigation_id = "" + self._trace_id = "" + + @workflow.signal + def user_response(self, signal_id: str, content: str) -> None: + """Signal to submit a user response. + + Uses signal_id for deduplication - duplicate signals are ignored. + + Args: + signal_id: Unique ID for this signal (for dedup). + content: User's response content. + """ + if signal_id in self._seen_signal_ids: + workflow.logger.info(f"Ignoring duplicate signal: {signal_id}") + return + self._seen_signal_ids.add(signal_id) + self._user_response_queue.append(content) + + @workflow.signal + def cancel(self) -> None: + """Signal to cancel the investigation.""" + self._cancelled = True + + @workflow.query + def get_status(self) -> InvestigatorStatus: + """Query the current status of the investigation.""" + return InvestigatorStatus( + investigation_id=self._investigation_id, + phase=self._current_phase, + step=self._step, + is_terminal=self._is_terminal, + awaiting_user=self._awaiting_user, + current_question=self._current_question, + ) + + @workflow.run + async def run(self, input: InvestigatorInput) -> InvestigatorResult: + """Execute the investigation workflow. + + Args: + input: Investigation input with objective and scope. + + Returns: + InvestigatorResult with status and findings. + """ + self._investigation_id = input.investigation_id + self._trace_id = str(workflow.uuid4()) + + # Restore from checkpoint if continuing + if input.checkpoint_state: + self._state_json = input.checkpoint_state + self._step = input.checkpoint_step + + # Build Start event (only if not resuming) + if not input.checkpoint_state: + start_event = json.dumps({ + "type": "Start", + "payload": { + "objective": input.objective, + "scope": input.scope, + }, + }) + else: + start_event = None + + # Run the investigation loop + while not self._is_terminal and not self._cancelled: + # Check for continue_as_new threshold + if self._step >= self.MAX_STEPS_BEFORE_CONTINUE + input.checkpoint_step: + workflow.logger.info( + f"Step threshold reached ({self._step}), continuing as new" + ) + workflow.continue_as_new( + InvestigatorInput( + investigation_id=input.investigation_id, + objective=input.objective, + scope=input.scope, + checkpoint_state=self._state_json, + checkpoint_step=self._step, + ) + ) + + # Execute brain step + step_input = BrainStepInput( + state_json=self._state_json, + event_json=start_event if start_event else "null", + ) + step_output = await workflow.execute_activity( + brain_step, + step_input, + start_to_close_timeout=timedelta(seconds=30), + ) + + # Clear start_event after first iteration + start_event = None + + # Update local state + self._state_json = step_output.new_state_json + self._step += 1 + intent = step_output.intent + + # Update phase from state + state = json.loads(self._state_json) + self._current_phase = state.get("phase", {}).get("type", "unknown").lower() + + # Handle intent + if intent["type"] == "Idle": + # Need to wait for something - this shouldn't happen often + await workflow.sleep(timedelta(milliseconds=100)) + + elif intent["type"] == "Call": + # Execute tool call + result = await self._execute_tool_call(intent["payload"], input.scope) + + # Build CallResult event + call_result_event = json.dumps({ + "type": "CallResult", + "payload": { + "call_id": intent["payload"]["call_id"], + "output": result, + }, + }) + + # Feed result back to state machine + step_input = BrainStepInput( + state_json=self._state_json, + event_json=call_result_event, + ) + step_output = await workflow.execute_activity( + brain_step, + step_input, + start_to_close_timeout=timedelta(seconds=30), + ) + self._state_json = step_output.new_state_json + self._step += 1 + + elif intent["type"] == "RequestUser": + # Enter HITL mode + self._awaiting_user = True + self._current_question = intent["payload"]["question"] + + # Wait for user response or cancellation + await workflow.wait_condition( + lambda: len(self._user_response_queue) > 0 or self._cancelled, + timeout=timedelta(hours=24), + ) + + if self._cancelled: + break + + # Get response and build event + response = self._user_response_queue.pop(0) + user_response_event = json.dumps({ + "type": "UserResponse", + "payload": {"content": response}, + }) + + # Feed response back to state machine + step_input = BrainStepInput( + state_json=self._state_json, + event_json=user_response_event, + ) + step_output = await workflow.execute_activity( + brain_step, + step_input, + start_to_close_timeout=timedelta(seconds=30), + ) + self._state_json = step_output.new_state_json + self._step += 1 + + self._awaiting_user = False + self._current_question = None + + elif intent["type"] == "Finish": + self._is_terminal = True + return InvestigatorResult( + investigation_id=input.investigation_id, + status="completed", + insight=intent["payload"]["insight"], + steps=self._step, + trace_id=self._trace_id, + ) + + elif intent["type"] == "Error": + self._is_terminal = True + return InvestigatorResult( + investigation_id=input.investigation_id, + status="failed", + error=intent["payload"]["message"], + steps=self._step, + trace_id=self._trace_id, + ) + + # Cancelled + return InvestigatorResult( + investigation_id=input.investigation_id, + status="cancelled", + steps=self._step, + trace_id=self._trace_id, + ) + + async def _execute_tool_call( + self, + payload: dict[str, Any], + scope: dict[str, Any], + ) -> Any: + """Execute a tool call with security validation. + + Args: + payload: The Call intent payload. + scope: Security scope. + + Returns: + Tool execution result. + + Raises: + SecurityViolation: If call violates security policy. + """ + tool_name = payload["name"] + args = payload["args"] + + # Security validation before execution + try: + validate_tool_call(tool_name, args, scope) + except SecurityViolation as e: + workflow.logger.warning(f"Security violation: {e}") + return {"error": str(e)} + + # Execute tool based on name + # In production, this would dispatch to actual tool implementations + if tool_name == "get_schema": + # Mock schema gathering + return await self._mock_get_schema(args) + elif tool_name == "generate_hypotheses": + # Mock hypothesis generation + return await self._mock_generate_hypotheses(args) + elif tool_name == "evaluate_hypothesis": + # Mock hypothesis evaluation + return await self._mock_evaluate_hypothesis(args) + elif tool_name == "synthesize": + # Mock synthesis + return await self._mock_synthesize(args) + else: + return {"error": f"Unknown tool: {tool_name}"} + + async def _mock_get_schema(self, args: dict[str, Any]) -> dict[str, Any]: + """Mock schema gathering tool.""" + return { + "tables": [ + {"name": "orders", "columns": ["id", "customer_id", "amount", "created_at"]} + ] + } + + async def _mock_generate_hypotheses(self, args: dict[str, Any]) -> list[dict[str, Any]]: + """Mock hypothesis generation tool.""" + return [ + {"id": "h1", "title": "ETL job failure", "reasoning": "Upstream ETL may have failed"}, + {"id": "h2", "title": "Schema change", "reasoning": "A column type may have changed"}, + ] + + async def _mock_evaluate_hypothesis(self, args: dict[str, Any]) -> dict[str, Any]: + """Mock hypothesis evaluation tool.""" + return {"supported": True, "confidence": 0.85} + + async def _mock_synthesize(self, args: dict[str, Any]) -> dict[str, Any]: + """Mock synthesis tool.""" + return {"insight": "Root cause: ETL job failed at 3:00 AM due to timeout"} diff --git a/python-packages/investigator/tests/__init__.py b/python-packages/investigator/tests/__init__.py new file mode 100644 index 000000000..e07fd12b7 --- /dev/null +++ b/python-packages/investigator/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for the investigator package.""" diff --git a/python-packages/investigator/tests/conftest.py b/python-packages/investigator/tests/conftest.py new file mode 100644 index 000000000..2f4c6986a --- /dev/null +++ b/python-packages/investigator/tests/conftest.py @@ -0,0 +1,37 @@ +"""Common test fixtures for investigator tests.""" + +from __future__ import annotations + +from typing import Any + +import pytest + + +@pytest.fixture +def basic_scope() -> dict[str, Any]: + """Create a basic scope for testing.""" + # Must match the Rust Scope struct exactly + return { + "user_id": "test-user", + "tenant_id": "test-tenant", + "permissions": ["orders", "customers"], + } + + +@pytest.fixture +def mock_tool_executor(): + """Create a mock tool executor for testing.""" + + async def executor(tool_name: str, args: dict[str, Any]) -> Any: + if tool_name == "get_schema": + return {"tables": [{"name": "orders", "columns": ["id", "amount"]}]} + elif tool_name == "generate_hypotheses": + return [{"id": "h1", "title": "Test hypothesis"}] + elif tool_name == "evaluate_hypothesis": + return {"supported": True, "confidence": 0.9} + elif tool_name == "synthesize": + return {"insight": "Test insight"} + else: + return {"error": f"Unknown tool: {tool_name}"} + + return executor diff --git a/python-packages/investigator/tests/test_envelope.py b/python-packages/investigator/tests/test_envelope.py new file mode 100644 index 000000000..94a0607f3 --- /dev/null +++ b/python-packages/investigator/tests/test_envelope.py @@ -0,0 +1,176 @@ +"""Tests for the envelope module.""" + +from __future__ import annotations + +import json +from typing import Any + +import pytest + +from investigator.envelope import ( + Envelope, + create_child_envelope, + create_trace, + extract_trace_id, + unwrap, + wrap, +) + + +class TestWrapUnwrap: + """Test wrap/unwrap functionality.""" + + def test_wrap_creates_json_string(self) -> None: + """Test wrap creates a valid JSON string.""" + payload = {"test": "data", "number": 42} + trace_id = create_trace() + + result = wrap(payload, trace_id) + + # wrap returns a JSON string + assert isinstance(result, str) + envelope = json.loads(result) + assert envelope["trace_id"] == trace_id + assert envelope["payload"] == payload + assert "id" in envelope + + def test_unwrap_returns_envelope(self) -> None: + """Test unwrap returns an Envelope dict.""" + payload = {"test": "data", "nested": {"key": "value"}} + trace_id = create_trace() + + json_str = wrap(payload, trace_id) + envelope = unwrap(json_str) + + assert envelope["payload"] == payload + assert envelope["trace_id"] == trace_id + assert "id" in envelope + + def test_wrap_unwrap_roundtrip(self) -> None: + """Test wrap/unwrap roundtrip preserves data.""" + original = {"key": "value", "list": [1, 2, 3], "nested": {"a": "b"}} + trace_id = create_trace() + + json_str = wrap(original, trace_id) + envelope = unwrap(json_str) + + assert envelope["payload"] == original + + def test_wrap_with_parent_id(self) -> None: + """Test wrap with parent_id.""" + payload = {"test": "data"} + trace_id = create_trace() + parent_id = "parent-123" + + json_str = wrap(payload, trace_id, parent_id) + envelope = unwrap(json_str) + + assert envelope["parent_id"] == parent_id + + def test_wrap_without_parent_id(self) -> None: + """Test wrap without parent_id sets it to None.""" + payload = {"test": "data"} + trace_id = create_trace() + + json_str = wrap(payload, trace_id) + envelope = unwrap(json_str) + + assert envelope["parent_id"] is None + + def test_unwrap_missing_fields_raises(self) -> None: + """Test unwrap raises KeyError for missing fields.""" + bad_json = json.dumps({"only_one_field": "value"}) + + with pytest.raises(KeyError): + unwrap(bad_json) + + +class TestTraceId: + """Test trace ID functionality.""" + + def test_create_trace_is_string(self) -> None: + """Test create_trace returns a string.""" + trace_id = create_trace() + assert isinstance(trace_id, str) + assert len(trace_id) > 0 + + def test_create_trace_unique(self) -> None: + """Test create_trace returns unique IDs.""" + traces = [create_trace() for _ in range(100)] + assert len(set(traces)) == 100 + + def test_extract_trace_id(self) -> None: + """Test extract_trace_id from envelope dict.""" + trace_id = create_trace() + json_str = wrap({"test": "data"}, trace_id) + envelope = unwrap(json_str) + + extracted = extract_trace_id(envelope) + assert extracted == trace_id + + +class TestChildEnvelope: + """Test child envelope creation.""" + + def test_create_child_envelope(self) -> None: + """Test creating a child envelope.""" + parent_json = wrap({"parent": "data"}, create_trace()) + parent = unwrap(parent_json) + child_payload = {"child": "data"} + + child_json = create_child_envelope(parent, child_payload) + child = unwrap(child_json) + + # Child should have same trace_id + assert child["trace_id"] == parent["trace_id"] + assert child["payload"] == child_payload + # Child should reference parent's id + assert child["parent_id"] == parent["id"] + + def test_child_envelope_preserves_trace(self) -> None: + """Test child preserves parent trace ID.""" + trace_id = "custom-trace-123" + parent: Envelope = { + "id": "parent-id-456", + "trace_id": trace_id, + "parent_id": None, + "payload": {"parent": True}, + } + + child_json = create_child_envelope(parent, {"child": True}) + child = unwrap(child_json) + + assert child["trace_id"] == trace_id + assert child["parent_id"] == "parent-id-456" + + +class TestEnvelopeSerialization: + """Test envelope JSON serialization.""" + + def test_envelope_json_roundtrip(self) -> None: + """Test envelope can be serialized and deserialized.""" + original_json = wrap({"test": "data"}, create_trace()) + original = unwrap(original_json) + + # Re-serialize and parse + json_str = json.dumps(original) + restored: Envelope = json.loads(json_str) + + assert restored == original + + def test_envelope_with_complex_payload(self) -> None: + """Test envelope with complex nested payload.""" + payload = { + "string": "value", + "number": 42, + "float": 3.14, + "bool": True, + "null": None, + "list": [1, 2, 3], + "nested": {"a": {"b": {"c": "deep"}}}, + } + + json_str = wrap(payload, create_trace()) + envelope = unwrap(json_str) + + assert envelope["payload"] == payload diff --git a/python-packages/investigator/tests/test_investigator.py b/python-packages/investigator/tests/test_investigator.py new file mode 100644 index 000000000..6023f7a60 --- /dev/null +++ b/python-packages/investigator/tests/test_investigator.py @@ -0,0 +1,372 @@ +"""Tests for the Rust Investigator bindings.""" + +from __future__ import annotations + +import json +import uuid +from typing import Any + +import pytest + +from dataing_investigator import ( + Investigator, + InvalidTransitionError, + ProtocolMismatchError, + SerializationError, + StateError, + StepViolationError, + UnexpectedCallError, + protocol_version, +) + + +class EnvelopeBuilder: + """Helper to build event envelopes for tests.""" + + def __init__(self) -> None: + """Initialize envelope builder.""" + self._step = 0 + + def build(self, event: dict[str, Any]) -> str: + """Build an envelope for the given event.""" + self._step += 1 + envelope = { + "protocol_version": protocol_version(), + "event_id": f"evt_{uuid.uuid4().hex[:12]}", + "step": self._step, + "event": event, + } + return json.dumps(envelope) + + +class TestInvestigatorBasics: + """Test basic Investigator functionality.""" + + def test_new_investigator(self) -> None: + """Test creating a new Investigator.""" + inv = Investigator() + state = json.loads(inv.snapshot()) + assert state["phase"]["type"] == "Init" + assert state["step"] == 0 + assert state["version"] == protocol_version() + + def test_current_phase_and_step(self) -> None: + """Test phase and step accessors.""" + inv = Investigator() + assert inv.current_phase() == "init" + assert inv.current_step() == 0 + assert not inv.is_terminal() + + def test_protocol_version(self) -> None: + """Test protocol version is returned.""" + assert protocol_version() == 1 + + def test_query_returns_idle_in_init(self) -> None: + """Test query() returns Idle in Init phase.""" + inv = Investigator() + intent = json.loads(inv.query()) + assert intent["type"] == "Idle" + + +class TestInvestigatorEvents: + """Test Investigator event handling.""" + + def test_start_event(self, basic_scope: dict[str, Any]) -> None: + """Test Start event transitions to GatheringContext.""" + inv = Investigator() + builder = EnvelopeBuilder() + + start_event = { + "type": "Start", + "payload": { + "objective": "Test investigation", + "scope": basic_scope, + }, + } + envelope = builder.build(start_event) + intent_json = inv.ingest(envelope) + intent = json.loads(intent_json) + + # Should emit RequestCall (no call_id) + assert intent["type"] == "RequestCall" + assert intent["payload"]["name"] == "get_schema" + assert "call_id" not in intent["payload"] + assert inv.current_phase() == "gathering_context" + + def test_call_scheduling_handshake(self, basic_scope: dict[str, Any]) -> None: + """Test the two-step call scheduling handshake.""" + inv = Investigator() + builder = EnvelopeBuilder() + + # Start + start = builder.build({ + "type": "Start", + "payload": {"objective": "Test", "scope": basic_scope}, + }) + intent = json.loads(inv.ingest(start)) + assert intent["type"] == "RequestCall" + assert intent["payload"]["name"] == "get_schema" + + # Workflow assigns call_id via CallScheduled + scheduled = builder.build({ + "type": "CallScheduled", + "payload": {"call_id": "call_001", "name": "get_schema"}, + }) + intent = json.loads(inv.ingest(scheduled)) + assert intent["type"] == "Idle" + + # Now send CallResult + result = builder.build({ + "type": "CallResult", + "payload": {"call_id": "call_001", "output": {"tables": []}}, + }) + intent = json.loads(inv.ingest(result)) + + # Should move to next phase + assert intent["type"] == "RequestCall" + assert intent["payload"]["name"] == "generate_hypotheses" + + def test_cancel_event(self, basic_scope: dict[str, Any]) -> None: + """Test Cancel event transitions to Failed.""" + inv = Investigator() + builder = EnvelopeBuilder() + + start = builder.build({ + "type": "Start", + "payload": {"objective": "Test", "scope": basic_scope}, + }) + inv.ingest(start) + + cancel = builder.build({"type": "Cancel"}) + intent = json.loads(inv.ingest(cancel)) + + assert intent["type"] == "Error" + assert inv.is_terminal() + + def test_unexpected_call_scheduled_fails(self, basic_scope: dict[str, Any]) -> None: + """Test that wrong name in CallScheduled raises error.""" + inv = Investigator() + builder = EnvelopeBuilder() + + start = builder.build({ + "type": "Start", + "payload": {"objective": "Test", "scope": basic_scope}, + }) + inv.ingest(start) + + # Send CallScheduled with wrong name + scheduled = builder.build({ + "type": "CallScheduled", + "payload": {"call_id": "call_001", "name": "wrong_name"}, + }) + + with pytest.raises(UnexpectedCallError): + inv.ingest(scheduled) + + +class TestInvestigatorProtocolValidation: + """Test protocol validation.""" + + def test_protocol_version_mismatch(self, basic_scope: dict[str, Any]) -> None: + """Test that wrong protocol version raises error.""" + inv = Investigator() + + envelope = json.dumps({ + "protocol_version": 999, + "event_id": "evt_001", + "step": 1, + "event": {"type": "Cancel"}, + }) + + with pytest.raises(ProtocolMismatchError): + inv.ingest(envelope) + + def test_step_violation(self, basic_scope: dict[str, Any]) -> None: + """Test that non-monotonic step raises error.""" + inv = Investigator() + builder = EnvelopeBuilder() + + # First event with step 1 + start = builder.build({ + "type": "Start", + "payload": {"objective": "Test", "scope": basic_scope}, + }) + inv.ingest(start) + + # Try to send event with step 1 (not > current) + envelope = json.dumps({ + "protocol_version": protocol_version(), + "event_id": "evt_002", + "step": 1, # Same as first event + "event": {"type": "Cancel"}, + }) + + with pytest.raises(StepViolationError): + inv.ingest(envelope) + + def test_duplicate_event_idempotent(self, basic_scope: dict[str, Any]) -> None: + """Test that duplicate event_id is handled idempotently.""" + inv = Investigator() + + # Send start event + envelope1 = json.dumps({ + "protocol_version": protocol_version(), + "event_id": "evt_001", + "step": 1, + "event": { + "type": "Start", + "payload": {"objective": "Test", "scope": basic_scope}, + }, + }) + inv.ingest(envelope1) + assert inv.current_phase() == "gathering_context" + + # Same event_id with higher step - should be ignored + envelope2 = json.dumps({ + "protocol_version": protocol_version(), + "event_id": "evt_001", # duplicate + "step": 2, + "event": {"type": "Cancel"}, + }) + inv.ingest(envelope2) + + # State should NOT have changed + assert inv.current_phase() == "gathering_context" + assert inv.current_step() == 1 + + +class TestInvestigatorSerialization: + """Test Investigator snapshot/restore.""" + + def test_restore_from_snapshot(self, basic_scope: dict[str, Any]) -> None: + """Test restoring from a snapshot.""" + inv1 = Investigator() + builder = EnvelopeBuilder() + + start = builder.build({ + "type": "Start", + "payload": {"objective": "Test", "scope": basic_scope}, + }) + inv1.ingest(start) + snapshot = inv1.snapshot() + + inv2 = Investigator.restore(snapshot) + assert inv1.snapshot() == inv2.snapshot() + assert inv1.current_phase() == inv2.current_phase() + assert inv1.current_step() == inv2.current_step() + + def test_restore_invalid_json(self) -> None: + """Test restoring from invalid JSON raises error.""" + with pytest.raises(SerializationError): + Investigator.restore("not valid json") + + def test_restore_invalid_state(self) -> None: + """Test restoring from invalid state raises error.""" + with pytest.raises(SerializationError): + Investigator.restore('{"invalid": "state"}') + + +class TestInvestigatorErrors: + """Test Investigator error handling.""" + + def test_invalid_envelope_json(self) -> None: + """Test invalid JSON raises SerializationError.""" + inv = Investigator() + with pytest.raises(SerializationError): + inv.ingest("not valid json") + + def test_invalid_envelope_structure(self) -> None: + """Test invalid envelope structure raises error.""" + inv = Investigator() + with pytest.raises(SerializationError): + inv.ingest('{"invalid": "envelope"}') + + +class TestInvestigatorFullCycle: + """Test full investigation cycle.""" + + def test_full_investigation_cycle(self, basic_scope: dict[str, Any]) -> None: + """Test a complete investigation from start to finish.""" + inv = Investigator() + builder = EnvelopeBuilder() + + # Start + start = builder.build({ + "type": "Start", + "payload": {"objective": "Test", "scope": basic_scope}, + }) + intent = json.loads(inv.ingest(start)) + assert intent["type"] == "RequestCall" + assert intent["payload"]["name"] == "get_schema" + + # CallScheduled for get_schema + scheduled = builder.build({ + "type": "CallScheduled", + "payload": {"call_id": "c1", "name": "get_schema"}, + }) + intent = json.loads(inv.ingest(scheduled)) + assert intent["type"] == "Idle" + + # CallResult for get_schema -> GeneratingHypotheses + result1 = builder.build({ + "type": "CallResult", + "payload": {"call_id": "c1", "output": {"tables": []}}, + }) + intent = json.loads(inv.ingest(result1)) + assert intent["type"] == "RequestCall" + assert intent["payload"]["name"] == "generate_hypotheses" + + # CallScheduled for generate_hypotheses + scheduled = builder.build({ + "type": "CallScheduled", + "payload": {"call_id": "c2", "name": "generate_hypotheses"}, + }) + intent = json.loads(inv.ingest(scheduled)) + assert intent["type"] == "Idle" + + # CallResult with 1 hypothesis -> EvaluatingHypotheses + result2 = builder.build({ + "type": "CallResult", + "payload": { + "call_id": "c2", + "output": [{"id": "h1", "title": "Test"}], + }, + }) + intent = json.loads(inv.ingest(result2)) + assert intent["type"] == "RequestCall" + assert intent["payload"]["name"] == "evaluate_hypothesis" + + # CallScheduled for evaluate_hypothesis + scheduled = builder.build({ + "type": "CallScheduled", + "payload": {"call_id": "c3", "name": "evaluate_hypothesis"}, + }) + intent = json.loads(inv.ingest(scheduled)) + assert intent["type"] == "Idle" + + # Evaluation result -> Synthesizing + result3 = builder.build({ + "type": "CallResult", + "payload": {"call_id": "c3", "output": {"supported": True}}, + }) + intent = json.loads(inv.ingest(result3)) + assert intent["type"] == "RequestCall" + assert intent["payload"]["name"] == "synthesize" + + # CallScheduled for synthesize + scheduled = builder.build({ + "type": "CallScheduled", + "payload": {"call_id": "c4", "name": "synthesize"}, + }) + intent = json.loads(inv.ingest(scheduled)) + assert intent["type"] == "Idle" + + # Synthesis result -> Finished + result4 = builder.build({ + "type": "CallResult", + "payload": {"call_id": "c4", "output": {"insight": "Root cause found"}}, + }) + intent = json.loads(inv.ingest(result4)) + assert intent["type"] == "Finish" + assert intent["payload"]["insight"] == "Root cause found" + assert inv.is_terminal() diff --git a/python-packages/investigator/tests/test_runtime.py b/python-packages/investigator/tests/test_runtime.py new file mode 100644 index 000000000..86d0c0dad --- /dev/null +++ b/python-packages/investigator/tests/test_runtime.py @@ -0,0 +1,273 @@ +"""Tests for the runtime module.""" + +from __future__ import annotations + +import json +from typing import Any + +import pytest + +from investigator.runtime import ( + EnvelopeBuilder, + InvestigationError, + LocalInvestigator, + run_local, +) +from investigator.security import SecurityViolation + + +class TestEnvelopeBuilder: + """Test EnvelopeBuilder class.""" + + def test_builds_envelope_with_protocol_version(self) -> None: + """Test envelope includes protocol version.""" + builder = EnvelopeBuilder() + event = {"type": "Cancel"} + + envelope = json.loads(builder.build(event)) + + assert "protocol_version" in envelope + assert envelope["protocol_version"] == 1 + + def test_builds_envelope_with_event_id(self) -> None: + """Test envelope includes unique event IDs.""" + builder = EnvelopeBuilder() + + envelope1 = json.loads(builder.build({"type": "Cancel"})) + envelope2 = json.loads(builder.build({"type": "Cancel"})) + + assert envelope1["event_id"] != envelope2["event_id"] + + def test_builds_envelope_with_monotonic_step(self) -> None: + """Test envelope has monotonically increasing steps.""" + builder = EnvelopeBuilder() + + envelope1 = json.loads(builder.build({"type": "Cancel"})) + envelope2 = json.loads(builder.build({"type": "Cancel"})) + envelope3 = json.loads(builder.build({"type": "Cancel"})) + + assert envelope1["step"] == 1 + assert envelope2["step"] == 2 + assert envelope3["step"] == 3 + + def test_includes_event_in_envelope(self) -> None: + """Test envelope includes the event.""" + builder = EnvelopeBuilder() + event = {"type": "Start", "payload": {"objective": "Test", "scope": {}}} + + envelope = json.loads(builder.build(event)) + + assert envelope["event"] == event + + +class TestLocalInvestigator: + """Test LocalInvestigator class.""" + + def test_new_investigator(self) -> None: + """Test creating a new LocalInvestigator.""" + inv = LocalInvestigator() + assert inv.current_phase == "init" + assert not inv.is_terminal + assert inv.trace_id # Should have a trace ID + + def test_start_investigation(self, basic_scope: dict[str, Any]) -> None: + """Test starting an investigation.""" + inv = LocalInvestigator() + intent = inv.start("Find the bug", basic_scope) + + assert intent["type"] == "RequestCall" + assert intent["payload"]["name"] == "get_schema" + assert "gathering" in inv.current_phase.lower() + + def test_cannot_start_twice(self, basic_scope: dict[str, Any]) -> None: + """Test that investigation cannot be started twice.""" + inv = LocalInvestigator() + inv.start("First start", basic_scope) + + with pytest.raises(RuntimeError) as exc_info: + inv.start("Second start", basic_scope) + assert "already started" in str(exc_info.value) + + def test_schedule_call_and_send_result(self, basic_scope: dict[str, Any]) -> None: + """Test scheduling a call and sending result.""" + inv = LocalInvestigator() + intent = inv.start("Test", basic_scope) + + # Get tool name from RequestCall + assert intent["type"] == "RequestCall" + tool_name = intent["payload"]["name"] + + # Schedule the call + call_id = inv.schedule_call(tool_name) + assert call_id.startswith("call_") + + # Send result + next_intent = inv.send_call_result(call_id, {"tables": []}) + + assert next_intent["type"] == "RequestCall" + assert next_intent["payload"]["name"] == "generate_hypotheses" + + def test_current_intent(self, basic_scope: dict[str, Any]) -> None: + """Test getting current intent without event.""" + inv = LocalInvestigator() + # Before start, current_intent returns Idle + intent = inv.current_intent() + assert intent["type"] == "Idle" + + def test_cancel(self, basic_scope: dict[str, Any]) -> None: + """Test cancelling an investigation.""" + inv = LocalInvestigator() + inv.start("Test", basic_scope) + + intent = inv.cancel() + + assert intent["type"] == "Error" + assert inv.is_terminal + + def test_snapshot_restore(self, basic_scope: dict[str, Any]) -> None: + """Test snapshot and restore.""" + inv1 = LocalInvestigator() + inv1.start("Test", basic_scope) + snapshot = inv1.snapshot() + + inv2 = LocalInvestigator.restore(snapshot) + + assert inv1.current_phase == inv2.current_phase + assert inv2._started # noqa: SLF001 + + +class TestRunLocal: + """Test run_local function.""" + + @pytest.mark.asyncio + async def test_run_local_completes( + self, basic_scope: dict[str, Any], mock_tool_executor: Any + ) -> None: + """Test run_local completes an investigation.""" + result = await run_local( + objective="Find the bug", + scope=basic_scope, + tool_executor=mock_tool_executor, + max_steps=50, + ) + + assert result["status"] == "completed" + assert "insight" in result + assert result["steps"] > 0 + assert result["trace_id"] + + @pytest.mark.asyncio + async def test_run_local_max_steps(self, basic_scope: dict[str, Any]) -> None: + """Test run_local respects max_steps.""" + + async def slow_response(tool: str, args: dict[str, Any]) -> dict[str, Any]: + # Return responses that don't complete the investigation quickly + if tool == "get_schema": + return {"tables": [{"name": "t1"}, {"name": "t2"}, {"name": "t3"}]} + elif tool == "generate_hypotheses": + # Return many hypotheses to extend the evaluation phase + return [ + {"id": f"h{i}", "title": f"Hypothesis {i}"} + for i in range(10) + ] + elif tool == "evaluate_hypothesis": + # Each hypothesis needs evaluation + return {"supported": False, "confidence": 0.1} + else: + return {"minimal": "response"} + + # Use a very small max_steps to trigger the limit + with pytest.raises(InvestigationError) as exc_info: + await run_local( + objective="Test", + scope=basic_scope, + tool_executor=slow_response, + max_steps=3, + ) + assert "max_steps" in str(exc_info.value) + + @pytest.mark.asyncio + async def test_run_local_tool_error(self, basic_scope: dict[str, Any]) -> None: + """Test run_local handles tool errors gracefully.""" + call_count = 0 + + async def failing_then_working_executor( + tool: str, args: dict[str, Any] + ) -> dict[str, Any]: + nonlocal call_count + call_count += 1 + # Fail on first call, then work normally + if call_count == 1: + raise RuntimeError("Tool failed") + if tool == "get_schema": + return {"tables": [{"name": "orders"}], "error": "partial failure"} + elif tool == "generate_hypotheses": + return [{"id": "h1", "title": "Test"}] + elif tool == "evaluate_hypothesis": + return {"supported": True} + elif tool == "synthesize": + return {"insight": "Completed despite errors"} + return {} + + # The error is captured and sent back to state machine + # Investigation continues with the error as part of the output + result = await run_local( + objective="Test", + scope=basic_scope, + tool_executor=failing_then_working_executor, + max_steps=50, + ) + assert result["status"] == "completed" + + @pytest.mark.asyncio + async def test_run_local_security_violation( + self, basic_scope: dict[str, Any], mock_tool_executor: Any + ) -> None: + """Test run_local works with empty permissions scope.""" + # Create scope with no permissions - should still complete + # since default tools don't require table permissions + empty_scope = {**basic_scope, "permissions": []} + + result = await run_local( + objective="Test", + scope=empty_scope, + tool_executor=mock_tool_executor, + max_steps=50, + ) + # Should complete since default tools don't require table permissions + assert result["status"] == "completed" + + +class TestRunLocalUserResponse: + """Test run_local with user responses.""" + + @pytest.mark.asyncio + async def test_user_response_parameter_accepted( + self, basic_scope: dict[str, Any], mock_tool_executor: Any + ) -> None: + """Test that user_responder parameter is accepted.""" + # Current state machine doesn't emit RequestUser in normal flow + # This test verifies the parameter is accepted + result = await run_local( + objective="Test", + scope=basic_scope, + tool_executor=mock_tool_executor, + user_responder=None, + max_steps=50, + ) + assert result["status"] == "completed" + + +class TestInvestigationError: + """Test InvestigationError exception.""" + + def test_investigation_error_message(self) -> None: + """Test InvestigationError preserves message.""" + try: + raise InvestigationError("Test error") + except InvestigationError as e: + assert str(e) == "Test error" + + def test_investigation_error_is_exception(self) -> None: + """Test InvestigationError is an Exception.""" + assert issubclass(InvestigationError, Exception) diff --git a/python-packages/investigator/tests/test_security.py b/python-packages/investigator/tests/test_security.py new file mode 100644 index 000000000..412762333 --- /dev/null +++ b/python-packages/investigator/tests/test_security.py @@ -0,0 +1,152 @@ +"""Tests for the security module.""" + +from __future__ import annotations + +from typing import Any + +import pytest + +from investigator.security import SecurityViolation, create_scope, validate_tool_call + + +class TestValidateToolCall: + """Test validate_tool_call functionality.""" + + def test_all_tools_allowed_by_default(self) -> None: + """Test all tools are allowed when no allowlist specified.""" + scope = create_scope("user1", "tenant1", ["orders"]) + # All tools should pass when no allowed_tools in scope + validate_tool_call("get_schema", {}, scope) + validate_tool_call("generate_hypotheses", {}, scope) + validate_tool_call("any_custom_tool", {}, scope) + + def test_allowlist_restricts_tools(self) -> None: + """Test tools are restricted when allowlist is specified.""" + scope = create_scope( + "user1", "tenant1", ["orders"], allowed_tools=["get_schema"] + ) + # Allowed tool should pass + validate_tool_call("get_schema", {}, scope) + + # Non-allowed tool should fail + with pytest.raises(SecurityViolation) as exc_info: + validate_tool_call("other_tool", {}, scope) + assert "not in allowlist" in str(exc_info.value) + + def test_forbidden_table_raises(self) -> None: + """Test forbidden tables are rejected.""" + scope = create_scope("user1", "tenant1", ["allowed_table"]) + with pytest.raises(SecurityViolation) as exc_info: + validate_tool_call("query", {"table_name": "forbidden_table"}, scope) + assert "forbidden_table" in str(exc_info.value) + + def test_allowed_table_passes(self) -> None: + """Test allowed tables pass validation.""" + scope = create_scope("user1", "tenant1", ["orders", "customers"]) + # Should not raise + validate_tool_call("query", {"table_name": "orders"}, scope) + validate_tool_call("query", {"table_name": "customers"}, scope) + + def test_empty_permissions_denies_all_tables(self) -> None: + """Test empty permissions denies all table access.""" + scope = create_scope("user1", "tenant1", []) + with pytest.raises(SecurityViolation) as exc_info: + validate_tool_call("query", {"table_name": "any_table"}, scope) + assert "No table permissions" in str(exc_info.value) + + def test_no_table_in_args_passes(self) -> None: + """Test calls without table_name pass table validation.""" + scope = create_scope("user1", "tenant1", []) + # Should pass - no table_name in args + validate_tool_call("get_schema", {}, scope) + + +class TestForbiddenSqlPatterns: + """Test SQL pattern validation.""" + + @pytest.mark.parametrize( + "sql", + [ + "DROP TABLE users", + "drop table users", + "DROP TABLE users", + "TRUNCATE TABLE orders", + "truncate table orders", + "DELETE FROM users", + "delete from customers", + "ALTER TABLE users ADD COLUMN", + "alter table users drop column", + "CREATE TABLE new_table", + "create table test", + "INSERT INTO users VALUES", + "insert into orders values", + "UPDATE users SET name = 'x'", + "update orders set status = 'done'", + "GRANT SELECT ON users", + "grant all on orders", + "REVOKE SELECT ON users", + "revoke all on orders", + ], + ) + def test_forbidden_sql_patterns_raise(self, sql: str) -> None: + """Test forbidden SQL patterns are rejected.""" + scope = create_scope("user1", "tenant1", ["orders"]) + with pytest.raises(SecurityViolation) as exc_info: + validate_tool_call("execute", {"query": sql}, scope) + assert "Forbidden SQL pattern" in str(exc_info.value) + + def test_select_query_allowed(self) -> None: + """Test SELECT queries are allowed.""" + scope = create_scope("user1", "tenant1", ["users", "orders"]) + # Should not raise + validate_tool_call("execute", {"query": "SELECT * FROM users"}, scope) + validate_tool_call("execute", {"query": "select count(*) from orders"}, scope) + + def test_pattern_word_boundary(self) -> None: + """Test patterns match on word boundaries only.""" + scope = create_scope("user1", "tenant1", ["orders"]) + # DROPBOX should not match DROP + validate_tool_call("execute", {"query": "SELECT * FROM dropbox_files"}, scope) + + +class TestCreateScope: + """Test create_scope helper.""" + + def test_create_scope_basic(self) -> None: + """Test creating a basic scope.""" + scope = create_scope("user1", "tenant1", ["table1", "table2"]) + assert scope["user_id"] == "user1" + assert scope["tenant_id"] == "tenant1" + assert scope["permissions"] == ["table1", "table2"] + + def test_create_scope_with_allowed_tools(self) -> None: + """Test creating a scope with allowed tools.""" + scope = create_scope( + "user1", "tenant1", ["orders"], allowed_tools=["get_schema", "query"] + ) + assert scope["allowed_tools"] == ["get_schema", "query"] + + def test_create_scope_empty_permissions(self) -> None: + """Test scope with empty permissions.""" + scope = create_scope("user1", "tenant1", []) + assert scope["permissions"] == [] + + def test_create_scope_none_permissions(self) -> None: + """Test scope with None permissions defaults to empty list.""" + scope = create_scope("user1", "tenant1", None) + assert scope["permissions"] == [] + + +class TestSecurityViolation: + """Test SecurityViolation exception.""" + + def test_security_violation_message(self) -> None: + """Test SecurityViolation preserves message.""" + try: + raise SecurityViolation("Test violation") + except SecurityViolation as e: + assert str(e) == "Test violation" + + def test_security_violation_is_exception(self) -> None: + """Test SecurityViolation is an Exception.""" + assert issubclass(SecurityViolation, Exception) diff --git a/scripts/concat_files.py b/scripts/concat_files.py index 1c99ee6df..76c3eea88 100755 --- a/scripts/concat_files.py +++ b/scripts/concat_files.py @@ -15,10 +15,11 @@ ROOT_DIR = Path(".") SEARCH_PREFIXES = [ - "dataing", + "python-packages/dataing", + "python-packages/bond", + "python-packages/investigator", + "core", # "frontend", - # "bond", - # "maistro", # "docs/feedback", ] @@ -44,6 +45,9 @@ ".jsx", ".css", ".html", + ".rs", + ".toml", + } EXCLUDE = { @@ -72,6 +76,7 @@ "site", "output", "tests", + "target" } ENCODING = "utf-8" diff --git a/tests/performance/README.md b/tests/performance/README.md new file mode 100644 index 000000000..b975a2d2a --- /dev/null +++ b/tests/performance/README.md @@ -0,0 +1,287 @@ +# Performance Benchmark + +Compares investigation runtime between git branches. Runs multiple investigations on each branch and produces statistical comparisons. + +## Prerequisites + +- Docker running (for PostgreSQL, Temporal, Jaeger) +- Both branches exist locally and have been fetched +- Python 3.11+ with `httpx` installed (`pip install httpx`) +- No other services on ports 8000, 7233, 8233, 5432, 16686 + +## Quick Start + +```bash +# Recommended: compare branches with server restart (avoids degradation) and keep infra for analysis +python tests/performance/bench.py --restart-between-runs --keep-infra + +# Compare specific branches +python tests/performance/bench.py --branches feature-x main --restart-between-runs --keep-infra + +# Fewer runs for quick comparison +python tests/performance/bench.py --runs 5 --warmup 1 --restart-between-runs --keep-infra + +# After benchmark, analyze Temporal data +python tests/performance/analyze_temporal.py + +# When done, clean up Docker +docker rm -f dataing-demo-postgres dataing-demo-temporal dataing-demo-jaeger +``` + +## Command Line Options + +| Option | Default | Description | +|--------|---------|-------------| +| `--branches` | `fn-17 main` | Space-separated list of branches to compare | +| `--runs` | `10` | Number of timed investigation runs per branch | +| `--warmup` | `2` | Number of warmup runs (not counted in stats) | +| `--timeout` | `300` | Max seconds per investigation before timeout | +| `--output-dir` | `tests/performance` | Directory for results files | +| `--dry-run` | `false` | Setup only, don't run investigations | +| `--restart-between-runs` | `false` | **Recommended.** Restart server between runs to avoid process-level degradation | +| `--keep-infra` | `false` | **Recommended.** Keep Docker running after benchmark for Temporal analysis | +| `--verbose` | `false` | Enable verbose logging | + +## Why Use `--restart-between-runs`? + +Without server restarts, investigations progressively slow down due to: +- **Memory accumulation** in the worker process +- **Cache growth** (adapters, schemas, patterns) +- **Connection pool state** buildup + +Example degradation pattern without restarts: +``` +Run 1: 48s ████ +Run 5: 112s █████████ +Run 10: 210s █████████████████ +``` + +With `--restart-between-runs`, each run starts fresh: +``` +Run 1: 48s ████ +Run 5: 52s ████ +Run 10: 49s ████ +``` + +## What It Measures + +**Wall-clock time** from the moment `POST /api/v1/investigations` returns (investigation created and queued) until the investigation reaches a **terminal status**: +- `completed` - Investigation finished successfully +- `failed` - Workflow failed +- `cancelled` - User cancelled +- `timed_out` - Exceeded timeout +- `terminated` - Forcefully terminated + +Status is polled via `GET /api/v1/investigations/{id}/status` with exponential backoff (2s-10s intervals with jitter). + +## How It Works + +1. **Docker Infrastructure** - Shared PostgreSQL, Temporal, and Jaeger containers run for all branches +2. **Git Worktrees** - Each branch runs in an isolated worktree (no checkout switching) +3. **Server Lifecycle** - For each branch: + - Create worktree + - Start FastAPI backend + Temporal worker + - Wait for `/health` endpoint + - Run warmup investigations (not counted) + - Run timed investigations (with optional server restart between each) + - Stop processes +4. **Cleanup** - Remove worktrees; optionally keep Docker for analysis + +## Output Files + +### `results.json` + +Machine-readable complete results: + +```json +{ + "timestamp": "2025-01-19T10:30:00Z", + "machine": "hostname", + "config": { + "runs": 10, + "warmup": 2, + "timeout": 300 + }, + "branches": { + "fn-17": { + "git_sha": "abc123", + "runs": [ + {"duration_seconds": 45.2, "status": "completed", "investigation_id": "uuid"}, + ... + ], + "stats": { + "mean": 47.3, + "median": 46.5, + "stdev": 2.1, + "p95": 51.2, + "min": 44.1, + "max": 52.3 + } + }, + "main": {...} + }, + "comparison": { + "delta_mean_seconds": -5.2, + "delta_mean_percent": -11.0, + "faster_branch": "fn-17" + } +} +``` + +### `results.md` + +Human-readable summary table: + +```markdown +| Branch | SHA | Mean | Median | P95 | Stdev | Min | Max | +|--------|-----|------|--------|-----|-------|-----|-----| +| fn-17 | abc123 | 47.3s | 46.5s | 51.2s | 2.1s | 44.1s | 52.3s | +| main | def456 | 52.5s | 51.8s | 56.1s | 2.8s | 49.2s | 58.7s | + +**Delta:** fn-17 is 5.2s (9.9%) faster than main +``` + +### Console Output + +``` +============================================================ + PERFORMANCE BENCHMARK RESULTS +============================================================ + +fn-17 (b8153f9e): + Mean: 58.06s + Median: 59.75s + P95: 71.43s + Stdev: 8.12s + Range: 42.77s - 71.43s + +main (f57281ff): + Mean: 63.58s + Median: 63.90s + P95: 72.50s + Stdev: 7.09s + Range: 52.55s - 72.50s + +Delta: fn-17 is 5.52s (8.7%) FASTER +``` + +## Temporal Analysis + +After running a benchmark with `--keep-infra`, analyze workflow execution details: + +```bash +# Aggregate stats across all workflows +python tests/performance/analyze_temporal.py + +# Filter by workflow type +python tests/performance/analyze_temporal.py --workflow-type InvestigationWorkflow + +# Analyze last N workflows +python tests/performance/analyze_temporal.py --last 20 + +# Save detailed JSON +python tests/performance/analyze_temporal.py --output temporal_analysis.json +``` + +### Sample Output + +``` +====================================================================== +Temporal Workflow Analysis - 20 workflows +====================================================================== + +WORKFLOW TOTAL DURATION: + Count: 20 + Mean: 47.39s + Median: 46.12s + P95: 52.53s + Range: 44.39s - 54.20s + +====================================================================== +ACTIVITY BREAKDOWN: +====================================================================== + +Activity Count Mean Median P95 Max +-------------------------------------------------------------------------------- +generate_hypotheses 20 18.45s 17.12s 22.67s 24.34s +evaluate_hypothesis 60 12.23s 11.89s 15.45s 18.12s +synthesize_findings 20 8.34s 7.56s 10.90s 12.45s +gather_context 20 5.67s 5.89s 7.34s 8.67s + +====================================================================== +PER-RUN PROGRESSION (to detect degradation): +====================================================================== + Run 1: 46.39s 846cbbc6-15c6-4b4b-bac8-1256cbcc2038 + Run 2: 47.80s 438ce2dc-0eae-40ff-97a8-7b4ca767a78c + ... + Run 10: 48.20s 30d0fc58-a6db-4c62-a4a8-048bd3fd7f97 + + First third avg: 46.61s + Last third avg: 47.89s + ✓ Stable performance (±2.7%) +``` + +### Persistent Data + +Temporal data is persisted in `tests/performance/.temporal/temporal.db` so you can: +- Re-analyze previous benchmark runs +- Compare activity timing across different branches +- Track degradation patterns over time + +## Investigation Payload + +Each investigation uses the null_spike demo fixture: + +```json +{ + "alert": { + "dataset_ids": ["orders"], + "metric_spec": { + "metric_type": "column", + "expression": "null_count(customer_id)", + "display_name": "Null Customer IDs", + "columns_referenced": ["customer_id"] + }, + "anomaly_type": "null_spike", + "expected_value": 5, + "actual_value": 200, + "deviation_pct": 3900, + "anomaly_date": "2026-01-10", + "severity": "high" + } +} +``` + +## Troubleshooting + +### Port conflicts + +The benchmark auto-selects ports if 8000 is in use. If you see port errors: +```bash +# Check what's using the port +lsof -i :8000 +# Kill it if needed +kill -9 $(lsof -ti:8000) +``` + +### Worktree cleanup + +If a benchmark fails mid-run, clean up orphan worktrees: +```bash +git worktree list +git worktree remove benchmarks/worktrees/fn-17 --force +git worktree remove benchmarks/worktrees/main --force +``` + +### Docker cleanup + +```bash +docker rm -f dataing-demo-postgres dataing-demo-temporal dataing-demo-jaeger +``` + +### Investigation hangs + +If investigations hang consistently: +1. Check Temporal UI at http://localhost:8233 +2. Check Jaeger traces at http://localhost:16686 +3. Increase `--timeout` or check backend logs diff --git a/tests/performance/analyze_temporal.py b/tests/performance/analyze_temporal.py new file mode 100644 index 000000000..723ac2f6c --- /dev/null +++ b/tests/performance/analyze_temporal.py @@ -0,0 +1,682 @@ +#!/usr/bin/env python3 +"""Analyze Temporal workflow performance for investigation benchmarks. + +Staff engineer perspective: understand where time is spent across the entire +investigation workflow to identify optimization opportunities. + +Usage: + python tests/performance/analyze_temporal.py + python tests/performance/analyze_temporal.py --last 20 + python tests/performance/analyze_temporal.py --output analysis.json +""" + +from __future__ import annotations + +import argparse +import asyncio +import json +import statistics +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from pathlib import Path +from typing import Any + +from temporalio.client import Client + + +@dataclass +class ActivityExecution: + """Single activity execution with timing.""" + name: str + scheduled_at: datetime + started_at: datetime | None + completed_at: datetime | None + queue_time_ms: float # Time waiting to be picked up + execution_time_ms: float # Time actually running + total_time_ms: float + success: bool + error: str | None = None + + +@dataclass +class WorkflowExecution: + """Single workflow execution with all activities.""" + workflow_id: str + workflow_type: str + started_at: datetime + completed_at: datetime | None + total_duration_ms: float + status: str + activities: list[ActivityExecution] = field(default_factory=list) + + @property + def activity_time_ms(self) -> float: + """Total time spent in activities.""" + return sum(a.total_time_ms for a in self.activities) + + @property + def overhead_time_ms(self) -> float: + """Time not spent in activities (workflow orchestration overhead).""" + return self.total_duration_ms - self.activity_time_ms + + +@dataclass +class PhaseBreakdown: + """Time spent in each investigation phase.""" + context_gathering_ms: float = 0 + hypothesis_generation_ms: float = 0 + hypothesis_evaluation_ms: float = 0 + synthesis_ms: float = 0 + other_ms: float = 0 + + @property + def total_ms(self) -> float: + return (self.context_gathering_ms + self.hypothesis_generation_ms + + self.hypothesis_evaluation_ms + self.synthesis_ms + self.other_ms) + + +def classify_activity_phase(activity_name: str) -> str: + """Map activity name to investigation phase.""" + name_lower = activity_name.lower() + + if any(x in name_lower for x in ['schema', 'context', 'gather', 'lineage', 'metadata', 'pattern']): + return 'context_gathering' + elif 'generate_hypothes' in name_lower: # generate_hypotheses specifically + return 'hypothesis_generation' + elif any(x in name_lower for x in ['evaluate', 'eval', 'query', 'sql', 'execute', 'interpret', 'evidence']): + return 'hypothesis_evaluation' + elif any(x in name_lower for x in ['synthesize', 'synthesis', 'conclude', 'summary', 'counter']): + return 'synthesis' + else: + return 'other' + + +def get_event_time(event: Any) -> datetime | None: + """Extract datetime from Temporal event, handling SDK differences.""" + if not hasattr(event, 'event_time'): + return None + event_time = event.event_time + if hasattr(event_time, 'ToDatetime'): + return event_time.ToDatetime() + return event_time + + +def get_event_type_name(event: Any) -> str: + """Get event type name, handling SDK differences.""" + if hasattr(event.event_type, 'name'): + return event.event_type.name + return str(event.event_type) + + +# Temporal event type integer values (from proto definition) +# https://github.com/temporalio/api/blob/master/temporal/api/enums/v1/event_type.proto +# Note: 5-9 are WORKFLOW_TASK events, 10+ are ACTIVITY_TASK events +EVENT_TYPE_ACTIVITY_TASK_SCHEDULED = 10 +EVENT_TYPE_ACTIVITY_TASK_STARTED = 11 +EVENT_TYPE_ACTIVITY_TASK_COMPLETED = 12 +EVENT_TYPE_ACTIVITY_TASK_FAILED = 13 +EVENT_TYPE_ACTIVITY_TASK_TIMED_OUT = 14 + + +def is_activity_scheduled(event_type: str) -> bool: + """Check if event type is ACTIVITY_TASK_SCHEDULED.""" + return ("ACTIVITY_TASK_SCHEDULED" in event_type or + event_type == str(EVENT_TYPE_ACTIVITY_TASK_SCHEDULED)) + + +def is_activity_started(event_type: str) -> bool: + """Check if event type is ACTIVITY_TASK_STARTED.""" + return ("ACTIVITY_TASK_STARTED" in event_type or + event_type == str(EVENT_TYPE_ACTIVITY_TASK_STARTED)) + + +def is_activity_completed(event_type: str) -> bool: + """Check if event type is ACTIVITY_TASK_COMPLETED.""" + return ("ACTIVITY_TASK_COMPLETED" in event_type or + event_type == str(EVENT_TYPE_ACTIVITY_TASK_COMPLETED)) + + +def is_activity_failed(event_type: str) -> bool: + """Check if event type is ACTIVITY_TASK_FAILED.""" + return ("ACTIVITY_TASK_FAILED" in event_type or + event_type == str(EVENT_TYPE_ACTIVITY_TASK_FAILED)) + + +async def fetch_workflow_execution( + client: Client, + workflow_id: str, + run_id: str | None, + debug: bool = False, +) -> WorkflowExecution | None: + """Fetch detailed execution data for a single workflow.""" + try: + handle = client.get_workflow_handle(workflow_id, run_id=run_id) + desc = await handle.describe() + + # Basic workflow info + started_at = desc.start_time + completed_at = desc.close_time + + if not started_at: + return None + + duration_ms = 0.0 + if completed_at: + duration_ms = (completed_at - started_at).total_seconds() * 1000 + + status = str(desc.status.name) if hasattr(desc.status, 'name') else str(desc.status) + + execution = WorkflowExecution( + workflow_id=workflow_id, + workflow_type=desc.workflow_type or "unknown", + started_at=started_at, + completed_at=completed_at, + total_duration_ms=duration_ms, + status=status, + ) + + # Parse activity timings from history + scheduled_activities: dict[int, tuple[str, datetime]] = {} # event_id -> (name, scheduled_time) + started_activities: dict[int, datetime] = {} # scheduled_event_id -> started_time + + event_types_seen: set[str] = set() + + async for event in handle.fetch_history_events(): + event_type = get_event_type_name(event) + event_time = get_event_time(event) + event_types_seen.add(event_type) + + if not event_time: + continue + + if is_activity_scheduled(event_type): + attrs = event.activity_task_scheduled_event_attributes + if attrs and attrs.activity_type and attrs.activity_type.name: + scheduled_activities[event.event_id] = (attrs.activity_type.name, event_time) + + elif is_activity_started(event_type): + attrs = event.activity_task_started_event_attributes + if attrs: + started_activities[attrs.scheduled_event_id] = event_time + + elif is_activity_completed(event_type): + attrs = event.activity_task_completed_event_attributes + if attrs and attrs.scheduled_event_id in scheduled_activities: + activity_name, scheduled_at = scheduled_activities[attrs.scheduled_event_id] + started_at = started_activities.get(attrs.scheduled_event_id) + completed_at = event_time + + queue_time = 0.0 + exec_time = 0.0 + + if started_at: + queue_time = (started_at - scheduled_at).total_seconds() * 1000 + exec_time = (completed_at - started_at).total_seconds() * 1000 + + total_time = (completed_at - scheduled_at).total_seconds() * 1000 + + execution.activities.append(ActivityExecution( + name=activity_name, + scheduled_at=scheduled_at, + started_at=started_at, + completed_at=completed_at, + queue_time_ms=queue_time, + execution_time_ms=exec_time, + total_time_ms=total_time, + success=True, + )) + + elif is_activity_failed(event_type): + attrs = event.activity_task_failed_event_attributes + if attrs and attrs.scheduled_event_id in scheduled_activities: + activity_name, scheduled_at = scheduled_activities[attrs.scheduled_event_id] + started_at = started_activities.get(attrs.scheduled_event_id) + + execution.activities.append(ActivityExecution( + name=activity_name, + scheduled_at=scheduled_at, + started_at=started_at, + completed_at=event_time, + queue_time_ms=0, + execution_time_ms=0, + total_time_ms=(event_time - scheduled_at).total_seconds() * 1000, + success=False, + error=str(attrs.failure) if hasattr(attrs, 'failure') else "Unknown", + )) + + if debug: + print(f" {workflow_id[:20]}...: {len(execution.activities)} activities, {format_duration(duration_ms)}") + + return execution + + except Exception as e: + print(f" Warning: Failed to fetch {workflow_id}: {e}") + return None + + +async def fetch_all_workflows( + client: Client, + limit: int = 50, + workflow_type: str | None = None, + debug: bool = False, +) -> list[WorkflowExecution]: + """Fetch all completed workflow executions.""" + executions: list[WorkflowExecution] = [] + + query = "ExecutionStatus = 'Completed' OR ExecutionStatus = 'Failed'" + if workflow_type: + query = f"WorkflowType = '{workflow_type}' AND ({query})" + + print(f"Fetching up to {limit} workflows...") + + count = 0 + # Track first workflow for debug output + first_debug = debug + + async for workflow in client.list_workflows(query=query): + if count >= limit: + break + + execution = await fetch_workflow_execution( + client, workflow.id, workflow.run_id, debug=first_debug + ) + if execution: + executions.append(execution) + count += 1 + if count % 10 == 0: + print(f" Fetched {count} workflows...") + # Only show debug for first few workflows + if count >= 3: + first_debug = False + + # Sort by start time + executions.sort(key=lambda x: x.started_at) + + return executions + + +def compute_phase_breakdown(execution: WorkflowExecution) -> PhaseBreakdown: + """Compute time spent in each investigation phase.""" + breakdown = PhaseBreakdown() + + for activity in execution.activities: + phase = classify_activity_phase(activity.name) + time_ms = activity.total_time_ms + + if phase == 'context_gathering': + breakdown.context_gathering_ms += time_ms + elif phase == 'hypothesis_generation': + breakdown.hypothesis_generation_ms += time_ms + elif phase == 'hypothesis_evaluation': + breakdown.hypothesis_evaluation_ms += time_ms + elif phase == 'synthesis': + breakdown.synthesis_ms += time_ms + else: + breakdown.other_ms += time_ms + + return breakdown + + +def format_duration(ms: float) -> str: + """Format milliseconds as human-readable duration.""" + if ms < 1000: + return f"{ms:.0f}ms" + elif ms < 60000: + return f"{ms/1000:.1f}s" + else: + minutes = int(ms // 60000) + seconds = (ms % 60000) / 1000 + return f"{minutes}m {seconds:.0f}s" + + +def format_percent(part: float, total: float) -> str: + """Format as percentage.""" + if total == 0: + return "0%" + return f"{(part/total)*100:.1f}%" + + +def print_analysis(executions: list[WorkflowExecution]) -> None: + """Print comprehensive performance analysis.""" + if not executions: + print("\nNo workflow executions found!") + print("\nPossible causes:") + print(" 1. No investigations have been run") + print(" 2. Investigations failed before completing") + print(" 3. Wrong Temporal namespace") + print("\nTry running: python tests/performance/bench.py --keep-infra --runs 3") + return + + print("\n" + "=" * 80) + print("INVESTIGATION WORKFLOW PERFORMANCE ANALYSIS") + print("=" * 80) + + # Filter to only InvestigationWorkflow (not child hypothesis workflows) + main_workflows = [e for e in executions if 'hypothesis' not in e.workflow_type.lower()] + child_workflows = [e for e in executions if 'hypothesis' in e.workflow_type.lower()] + + print(f"\nAnalyzed: {len(main_workflows)} investigation workflows, {len(child_workflows)} child workflows") + + # Aggregate activities from ALL workflows (parent + child) for activity analysis + all_workflows_for_activities = executions + + if not main_workflows: + print("\nNo main investigation workflows found. Only child workflows:") + for wf in child_workflows[:5]: + print(f" - {wf.workflow_type}: {format_duration(wf.total_duration_ms)}") + return + + # ========================================================================= + # 1. OVERALL WORKFLOW TIMING + # ========================================================================= + print("\n" + "-" * 80) + print("1. OVERALL WORKFLOW TIMING") + print("-" * 80) + + durations = [e.total_duration_ms for e in main_workflows] + + print(f"\n Total Workflows: {len(durations)}") + print(f" Mean Duration: {format_duration(statistics.mean(durations))}") + print(f" Median Duration: {format_duration(statistics.median(durations))}") + if len(durations) > 1: + print(f" Std Dev: {format_duration(statistics.stdev(durations))}") + print(f" Min: {format_duration(min(durations))}") + print(f" Max: {format_duration(max(durations))}") + + # P95/P99 + sorted_durations = sorted(durations) + p95_idx = int(len(sorted_durations) * 0.95) + p99_idx = int(len(sorted_durations) * 0.99) + print(f" P95: {format_duration(sorted_durations[min(p95_idx, len(sorted_durations)-1)])}") + if len(sorted_durations) > 10: + print(f" P99: {format_duration(sorted_durations[min(p99_idx, len(sorted_durations)-1)])}") + + # Child workflow timing + if child_workflows: + print(f"\n Child Workflows ({len(child_workflows)} total):") + child_durations = [e.total_duration_ms for e in child_workflows] + print(f" Mean Duration: {format_duration(statistics.mean(child_durations))}") + print(f" Min/Max: {format_duration(min(child_durations))} - {format_duration(max(child_durations))}") + + # Group by workflow type + child_by_type: dict[str, list[float]] = defaultdict(list) + for wf in child_workflows: + child_by_type[wf.workflow_type].append(wf.total_duration_ms) + + if len(child_by_type) > 1: + print(f"\n Child Workflow Types:") + for wf_type, times in sorted(child_by_type.items(), key=lambda x: -sum(x[1])): + print(f" {wf_type}: {len(times)}x, mean {format_duration(statistics.mean(times))}") + + # ========================================================================= + # 2. PHASE BREAKDOWN (WHERE TIME IS SPENT) + # ========================================================================= + print("\n" + "-" * 80) + print("2. PHASE BREAKDOWN (WHERE TIME IS SPENT)") + print("-" * 80) + + # Use ALL workflows (parent + child) for phase breakdown since activities run in children + total_phase = PhaseBreakdown() + for execution in all_workflows_for_activities: + breakdown = compute_phase_breakdown(execution) + total_phase.context_gathering_ms += breakdown.context_gathering_ms + total_phase.hypothesis_generation_ms += breakdown.hypothesis_generation_ms + total_phase.hypothesis_evaluation_ms += breakdown.hypothesis_evaluation_ms + total_phase.synthesis_ms += breakdown.synthesis_ms + total_phase.other_ms += breakdown.other_ms + + total = total_phase.total_ms + if total > 0: + phases = [ + ("Context Gathering", total_phase.context_gathering_ms), + ("Hypothesis Generation", total_phase.hypothesis_generation_ms), + ("Hypothesis Evaluation", total_phase.hypothesis_evaluation_ms), + ("Synthesis", total_phase.synthesis_ms), + ("Other/Orchestration", total_phase.other_ms), + ] + + # Sort by time spent (descending) + phases.sort(key=lambda x: -x[1]) + + print(f"\n {'Phase':<25} {'Time':>12} {'% of Total':>12} Bar") + print(" " + "-" * 70) + + max_bar = 40 + for name, time_ms in phases: + pct = (time_ms / total) * 100 + bar_len = int((time_ms / total) * max_bar) + bar = "█" * bar_len + print(f" {name:<25} {format_duration(time_ms):>12} {pct:>10.1f}% {bar}") + else: + print("\n No activity timing data found!") + print(" Workflows may be completing without running activities.") + + # ========================================================================= + # 3. ACTIVITY-LEVEL BREAKDOWN + # ========================================================================= + print("\n" + "-" * 80) + print("3. ACTIVITY-LEVEL BREAKDOWN (TOP 10 BY TIME)") + print("-" * 80) + + # Use ALL workflows (parent + child) for activity breakdown + activity_times: dict[str, list[float]] = defaultdict(list) + activity_queue_times: dict[str, list[float]] = defaultdict(list) + + for execution in all_workflows_for_activities: + for activity in execution.activities: + activity_times[activity.name].append(activity.execution_time_ms) + activity_queue_times[activity.name].append(activity.queue_time_ms) + + if activity_times: + # Calculate totals and sort + activity_totals = [(name, sum(times), len(times), statistics.mean(times)) + for name, times in activity_times.items()] + activity_totals.sort(key=lambda x: -x[1]) # Sort by total time + + print(f"\n {'Activity':<35} {'Count':>6} {'Total':>10} {'Mean':>10} {'% Time':>8}") + print(" " + "-" * 75) + + grand_total = sum(t[1] for t in activity_totals) + + for name, total_time, count, mean_time in activity_totals[:10]: + pct = (total_time / grand_total) * 100 if grand_total > 0 else 0 + # Truncate long names + display_name = name[:33] + ".." if len(name) > 35 else name + print(f" {display_name:<35} {count:>6} {format_duration(total_time):>10} {format_duration(mean_time):>10} {pct:>7.1f}%") + else: + print("\n No activity data found!") + + # ========================================================================= + # 4. QUEUE TIME ANALYSIS (WORKER CAPACITY) + # ========================================================================= + print("\n" + "-" * 80) + print("4. QUEUE TIME ANALYSIS (WORKER CAPACITY)") + print("-" * 80) + + all_queue_times = [] + for times in activity_queue_times.values(): + all_queue_times.extend(times) + + if all_queue_times: + mean_queue = statistics.mean(all_queue_times) + max_queue = max(all_queue_times) + + print(f"\n Mean Queue Time: {format_duration(mean_queue)}") + print(f" Max Queue Time: {format_duration(max_queue)}") + + if mean_queue > 1000: # More than 1 second average queue time + print(f"\n ⚠️ HIGH QUEUE TIME - Consider adding more workers") + elif mean_queue > 100: + print(f"\n ℹ️ Moderate queue time - workers are keeping up") + else: + print(f"\n ✓ Low queue time - workers have capacity") + else: + print("\n No queue time data available") + + # ========================================================================= + # 5. RUN-OVER-RUN PROGRESSION (DEGRADATION DETECTION) + # ========================================================================= + print("\n" + "-" * 80) + print("5. RUN-OVER-RUN PROGRESSION") + print("-" * 80) + + print(f"\n {'Run':>4} {'Duration':>10} {'Activities':>10} Workflow ID") + print(" " + "-" * 65) + + for i, execution in enumerate(main_workflows, 1): + print(f" {i:>4} {format_duration(execution.total_duration_ms):>10} {len(execution.activities):>10} {execution.workflow_id[:36]}") + + # Degradation check + if len(main_workflows) >= 3: + third = len(main_workflows) // 3 + first_third = statistics.mean([e.total_duration_ms for e in main_workflows[:third]]) + last_third = statistics.mean([e.total_duration_ms for e in main_workflows[-third:]]) + + if first_third > 0: + change_pct = ((last_third - first_third) / first_third) * 100 + + print(f"\n First third avg: {format_duration(first_third)}") + print(f" Last third avg: {format_duration(last_third)}") + + if change_pct > 20: + print(f"\n ⚠️ DEGRADATION: {change_pct:.1f}% slower over time") + print(" Consider using --restart-between-runs to isolate cause") + elif change_pct < -20: + print(f"\n 📈 IMPROVEMENT: {-change_pct:.1f}% faster over time (warmup effect)") + else: + print(f"\n ✓ Stable performance (±{abs(change_pct):.1f}%)") + + # ========================================================================= + # 6. OPTIMIZATION RECOMMENDATIONS + # ========================================================================= + print("\n" + "-" * 80) + print("6. OPTIMIZATION RECOMMENDATIONS") + print("-" * 80) + + recommendations = [] + + # Check if any phase dominates + if total > 0: + for name, time_ms in phases: + if time_ms / total > 0.5: + recommendations.append(f"• {name} takes {format_percent(time_ms, total)} of time - focus optimization here") + + # Check for degradation + if len(main_workflows) >= 3: + if change_pct > 20: + recommendations.append("• Run-over-run degradation detected - investigate memory leaks or cache growth") + + # Check queue times + if all_queue_times and statistics.mean(all_queue_times) > 1000: + recommendations.append("• High queue times - add more Temporal workers") + + # Check if activities are missing (check all workflows including children) + total_activities = sum(len(e.activities) for e in all_workflows_for_activities) + if total_activities == 0: + recommendations.append("• No activities recorded - workflows may be failing before running") + recommendations.append("• Check Temporal UI for workflow errors") + elif len(main_workflows) > 0 and total_activities / len(main_workflows) < 3: + recommendations.append("• Very few activities per workflow - investigations may be short-circuiting") + + if recommendations: + print() + for rec in recommendations: + print(f" {rec}") + else: + print("\n ✓ No major issues detected") + + print("\n" + "=" * 80) + + +def save_json(executions: list[WorkflowExecution], output_path: Path) -> None: + """Save detailed analysis to JSON.""" + + def to_dict(obj: Any) -> Any: + if isinstance(obj, datetime): + return obj.isoformat() + elif hasattr(obj, '__dict__'): + return {k: to_dict(v) for k, v in obj.__dict__.items()} + elif isinstance(obj, list): + return [to_dict(v) for v in obj] + elif isinstance(obj, dict): + return {k: to_dict(v) for k, v in obj.items()} + return obj + + data = { + "generated_at": datetime.now().isoformat(), + "workflow_count": len(executions), + "executions": [to_dict(e) for e in executions], + } + + output_path.write_text(json.dumps(data, indent=2)) + print(f"\nDetailed data saved to: {output_path}") + + +async def main() -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Analyze Temporal workflow performance for investigations" + ) + parser.add_argument( + "--temporal-host", + default="localhost:7233", + help="Temporal server address (default: localhost:7233)", + ) + parser.add_argument( + "--namespace", + default="default", + help="Temporal namespace (default: default)", + ) + parser.add_argument( + "--workflow-type", + help="Filter by workflow type", + ) + parser.add_argument( + "--last", + type=int, + default=50, + help="Number of recent workflows to analyze (default: 50)", + ) + parser.add_argument( + "--output", + type=Path, + help="Save detailed JSON to this file", + ) + parser.add_argument( + "--debug", + action="store_true", + help="Show debug output including event types", + ) + + args = parser.parse_args() + + try: + client = await Client.connect(args.temporal_host, namespace=args.namespace) + except Exception as e: + print(f"Error: Could not connect to Temporal at {args.temporal_host}") + print(f" {e}") + print("\nMake sure:") + print(" 1. Run benchmark with --keep-infra flag") + print(" 2. Docker containers are running: docker ps | grep temporal") + return 1 + + print(f"Connected to Temporal at {args.temporal_host}") + + executions = await fetch_all_workflows( + client, + limit=args.last, + workflow_type=args.workflow_type, + debug=args.debug, + ) + + print_analysis(executions) + + if args.output: + save_json(executions, args.output) + + return 0 + + +if __name__ == "__main__": + exit(asyncio.run(main())) diff --git a/tests/performance/bench.py b/tests/performance/bench.py new file mode 100755 index 000000000..6ce147df0 --- /dev/null +++ b/tests/performance/bench.py @@ -0,0 +1,1073 @@ +#!/usr/bin/env python3 +"""Performance benchmark comparing investigation runtime between git branches. + +Usage: + python tests/performance/bench.py + python tests/performance/bench.py --branches fn-17 main --runs 10 + python tests/performance/bench.py --dry-run --verbose +""" + +from __future__ import annotations + +import argparse +import json +import logging +import os +import platform +import random +import shutil +import signal +import socket +import statistics +import subprocess +import sys +import time +from dataclasses import asdict, dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +# Optional: use httpx if available, fall back to urllib +try: + import httpx + + HAS_HTTPX = True +except ImportError: + import urllib.error + import urllib.request + + HAS_HTTPX = False + +# Load .env file from repo root +try: + from dotenv import load_dotenv + + # Find repo root and load .env + _repo_root = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, + text=True, + ).stdout.strip() + if _repo_root: + load_dotenv(Path(_repo_root) / ".env") +except ImportError: + pass # dotenv not installed, rely on shell environment + +# ============================================================================ +# Constants +# ============================================================================ + +DEFAULT_BRANCHES = ["fn-17", "main"] +DEFAULT_NUM_RUNS = 10 +DEFAULT_WARMUP_RUNS = 2 +DEFAULT_TIMEOUT = 300 # 5 minutes per investigation +DEFAULT_POLL_INTERVAL = 2.0 +MAX_POLL_INTERVAL = 10.0 +HEALTH_TIMEOUT = 120 # 2 minutes to wait for server to be ready + +API_KEY = "dd_demo_12345" +BASE_PORT = 8000 + +TERMINAL_STATUSES = {"completed", "failed", "cancelled", "timed_out", "terminated"} + +# Investigation payload (null_spike demo) +INVESTIGATION_PAYLOAD = { + "alert": { + "dataset_ids": ["orders"], + "metric_spec": { + "metric_type": "column", + "expression": "null_count(customer_id)", + "display_name": "Null Customer IDs", + "columns_referenced": ["customer_id"], + }, + "anomaly_type": "null_spike", + "expected_value": 5, + "actual_value": 200, + "deviation_pct": 3900, + "anomaly_date": "2026-01-10", + "severity": "high", + } +} + +# ============================================================================ +# Logging +# ============================================================================ + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + datefmt="%H:%M:%S", +) +logger = logging.getLogger(__name__) + +# ============================================================================ +# Data Classes +# ============================================================================ + + +@dataclass +class RunResult: + """Result of a single investigation run.""" + + duration_seconds: float + status: str + investigation_id: str + error: str | None = None + + +@dataclass +class BranchStats: + """Statistics for a branch's runs.""" + + mean: float + median: float + stdev: float + p95: float + min_val: float + max_val: float + + +@dataclass +class BranchResult: + """Complete result for a branch.""" + + branch: str + git_sha: str + runs: list[RunResult] = field(default_factory=list) + stats: BranchStats | None = None + + +@dataclass +class BenchmarkResults: + """Complete benchmark results.""" + + timestamp: str + machine: str + config: dict[str, Any] + branches: dict[str, BranchResult] = field(default_factory=dict) + comparison: dict[str, Any] | None = None + + +# ============================================================================ +# HTTP Client (works with or without httpx) +# ============================================================================ + + +class HTTPClient: + """Simple HTTP client that works with httpx or urllib.""" + + def __init__(self, base_url: str, timeout: float = 30.0): + """Initialize HTTP client.""" + self.base_url = base_url.rstrip("/") + self.timeout = timeout + self.headers = { + "Content-Type": "application/json", + "X-API-Key": API_KEY, + } + if HAS_HTTPX: + self._client = httpx.Client(timeout=timeout) + else: + self._client = None + + def close(self) -> None: + """Close the client.""" + if HAS_HTTPX and self._client: + self._client.close() + + def get(self, path: str) -> dict[str, Any]: + """Make a GET request.""" + url = f"{self.base_url}{path}" + if HAS_HTTPX: + resp = self._client.get(url, headers=self.headers) + resp.raise_for_status() + return resp.json() + else: + req = urllib.request.Request(url, headers=self.headers) + with urllib.request.urlopen(req, timeout=self.timeout) as resp: + return json.loads(resp.read().decode()) + + def post(self, path: str, data: dict[str, Any] | None = None) -> dict[str, Any]: + """Make a POST request.""" + url = f"{self.base_url}{path}" + body = json.dumps(data).encode() if data else None + if HAS_HTTPX: + resp = self._client.post(url, headers=self.headers, content=body) + resp.raise_for_status() + return resp.json() + else: + req = urllib.request.Request(url, data=body, headers=self.headers, method="POST") + with urllib.request.urlopen(req, timeout=self.timeout) as resp: + return json.loads(resp.read().decode()) + + def health_check(self) -> bool: + """Check if server is healthy.""" + try: + resp = self.get("/health") + return resp.get("status") == "healthy" + except Exception: + return False + + +# ============================================================================ +# Utility Functions +# ============================================================================ + + +def find_free_port(start: int = BASE_PORT) -> int: + """Find a free port starting from the given port.""" + for port in range(start, start + 100): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + try: + s.bind(("localhost", port)) + return port + except OSError: + continue + raise RuntimeError(f"Could not find a free port starting from {start}") + + +def get_git_sha(repo_path: Path, branch: str) -> str: + """Get the git SHA for a branch.""" + result = subprocess.run( + ["git", "rev-parse", "--short", branch], + cwd=repo_path, + capture_output=True, + text=True, + check=True, + ) + return result.stdout.strip() + + +def get_repo_root() -> Path: + """Get the repository root directory.""" + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, + text=True, + check=True, + ) + return Path(result.stdout.strip()) + + +# ============================================================================ +# Git Worktree Management +# ============================================================================ + + +def get_current_branch(repo_root: Path) -> str: + """Get the currently checked out branch name.""" + result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + cwd=repo_root, + capture_output=True, + text=True, + ) + if result.returncode != 0: + return "" + return result.stdout.strip() + + +def setup_worktree(branch: str, base_dir: Path) -> tuple[Path, bool]: + """Create a git worktree for the given branch. + + Returns: + Tuple of (path, is_worktree) where is_worktree is False if using current dir. + """ + # Check if we're already on this branch + current_branch = get_current_branch(base_dir) + if current_branch == branch: + logger.info(f"Already on branch '{branch}', using current directory") + return base_dir, False + + worktree_path = base_dir / "benchmarks" / "worktrees" / branch.replace("/", "-") + + # Remove existing worktree if it exists + if worktree_path.exists(): + logger.info(f"Removing existing worktree at {worktree_path}") + subprocess.run( + ["git", "worktree", "remove", "--force", str(worktree_path)], + cwd=base_dir, + capture_output=True, + ) + if worktree_path.exists(): + shutil.rmtree(worktree_path) + + # Create worktree directory + worktree_path.parent.mkdir(parents=True, exist_ok=True) + + # Add the worktree + logger.info(f"Creating worktree for branch '{branch}' at {worktree_path}") + result = subprocess.run( + ["git", "worktree", "add", str(worktree_path), branch], + cwd=base_dir, + capture_output=True, + text=True, + ) + if result.returncode != 0: + raise RuntimeError(f"Failed to create worktree: {result.stderr}") + + return worktree_path, True + + +def cleanup_worktree(worktree_path: Path, base_dir: Path, is_worktree: bool) -> None: + """Remove a git worktree.""" + if not is_worktree: + # Not a worktree, nothing to clean up + return + if worktree_path.exists(): + logger.info(f"Cleaning up worktree at {worktree_path}") + subprocess.run( + ["git", "worktree", "remove", "--force", str(worktree_path)], + cwd=base_dir, + capture_output=True, + ) + if worktree_path.exists(): + shutil.rmtree(worktree_path) + + +# ============================================================================ +# Docker Infrastructure +# ============================================================================ + + +def start_docker_infrastructure() -> None: + """Start shared Docker containers (PostgreSQL, Temporal, Jaeger).""" + logger.info("Starting Docker infrastructure...") + + # Start PostgreSQL + logger.info("Starting PostgreSQL...") + subprocess.run(["docker", "rm", "-f", "dataing-demo-postgres"], capture_output=True) + subprocess.run( + [ + "docker", + "run", + "-d", + "--name", + "dataing-demo-postgres", + "-e", + "POSTGRES_DB=dataing_demo", + "-e", + "POSTGRES_USER=dataing", + "-e", + "POSTGRES_PASSWORD=dataing", + "-p", + "5432:5432", + "pgvector/pgvector:pg16", + ], + check=True, + ) + + # Wait for PostgreSQL + for _ in range(30): + result = subprocess.run( + [ + "docker", + "exec", + "dataing-demo-postgres", + "pg_isready", + "-U", + "dataing", + ], + capture_output=True, + ) + if result.returncode == 0: + logger.info("PostgreSQL is ready") + break + time.sleep(1) + else: + raise RuntimeError("PostgreSQL did not become ready in time") + + # Start Temporal with persistent storage + logger.info("Starting Temporal...") + subprocess.run(["docker", "rm", "-f", "dataing-demo-temporal"], capture_output=True) + + # Create persistent data directory for Temporal + temporal_data_dir = Path(__file__).parent / ".temporal" + temporal_data_dir.mkdir(parents=True, exist_ok=True) + + subprocess.run( + [ + "docker", + "run", + "-d", + "--name", + "dataing-demo-temporal", + "-p", + "7233:7233", + "-p", + "8233:8233", + "-v", + f"{temporal_data_dir.absolute()}:/data", + "--entrypoint", + "temporal", + "temporalio/admin-tools:latest", + "server", + "start-dev", + "--ip", + "0.0.0.0", + "--db-filename", + "/data/temporal.db", + ], + check=True, + ) + + # Wait for Temporal + for _ in range(30): + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(1) + s.connect(("localhost", 8233)) + logger.info("Temporal is ready") + break + except (OSError, socket.timeout): + time.sleep(1) + else: + raise RuntimeError("Temporal did not become ready in time") + + # Start Jaeger + logger.info("Starting Jaeger...") + subprocess.run(["docker", "rm", "-f", "dataing-demo-jaeger"], capture_output=True) + subprocess.run( + [ + "docker", + "run", + "-d", + "--name", + "dataing-demo-jaeger", + "-e", + "COLLECTOR_OTLP_ENABLED=true", + "-p", + "16686:16686", + "-p", + "4317:4317", + "-p", + "4318:4318", + "jaegertracing/all-in-one:1.76.0", + ], + check=True, + ) + + logger.info("Docker infrastructure started successfully") + + +def stop_docker_infrastructure() -> None: + """Stop Docker containers.""" + logger.info("Stopping Docker infrastructure...") + for container in ["dataing-demo-postgres", "dataing-demo-temporal", "dataing-demo-jaeger"]: + subprocess.run(["docker", "rm", "-f", container], capture_output=True) + logger.info("Docker infrastructure stopped") + + +def run_migrations(worktree_path: Path) -> None: + """Run database migrations.""" + logger.info("Running database migrations...") + migrations_dir = worktree_path / "python-packages" / "dataing" / "migrations" + + if not migrations_dir.exists(): + logger.warning(f"Migrations directory not found: {migrations_dir}") + return + + # Get all migration files sorted + migration_files = sorted(migrations_dir.glob("*.sql")) + + for migration_file in migration_files: + result = subprocess.run( + [ + "psql", + "-h", + "localhost", + "-U", + "dataing", + "-d", + "dataing_demo", + "-f", + str(migration_file), + ], + capture_output=True, + env={**os.environ, "PGPASSWORD": "dataing"}, + ) + if result.returncode != 0 and b"already exists" not in result.stderr: + logger.debug(f"Migration {migration_file.name}: {result.stderr.decode()[:200]}") + + logger.info("Migrations complete") + + +# ============================================================================ +# Server Management +# ============================================================================ + + +class DemoServer: + """Manages the demo server processes for a branch.""" + + def __init__(self, worktree_path: Path, port: int, verbose: bool = False): + """Initialize demo server manager.""" + self.worktree_path = worktree_path + self.port = port + self.verbose = verbose + self.backend_process: subprocess.Popen | None = None + self.worker_process: subprocess.Popen | None = None + self._env = self._build_env() + + def _build_env(self) -> dict[str, str]: + """Build environment variables for the server.""" + env = os.environ.copy() + env.update( + { + "DATADR_DEMO_MODE": "true", + "DATADR_FIXTURE_PATH": str(self.worktree_path / "demo" / "fixtures" / "null_spike"), + "DATABASE_URL": "postgresql://dataing:dataing@localhost:5432/dataing_demo", + "APP_DATABASE_URL": "postgresql://dataing:dataing@localhost:5432/dataing_demo", + "INVESTIGATION_ENGINE": "temporal", + "TEMPORAL_HOST": "localhost:7233", + "ENCRYPTION_KEY": "ZnxhCyx4-ZjziPWtUguwGOFMMiLNioSwso5-qNPAGZI=", + "OTEL_SERVICE_NAME": "dataing-bench", + "OTEL_TRACES_ENABLED": "true", + "OTEL_METRICS_ENABLED": "false", + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://localhost:4318", + } + ) + return env + + def start(self) -> None: + """Start the backend and worker processes.""" + logger.info(f"Starting demo server on port {self.port}...") + + # Ensure uv dependencies are synced + subprocess.run( + ["uv", "sync", "--quiet"], + cwd=self.worktree_path, + env=self._env, + capture_output=True, + ) + + # Output handling based on verbose mode + if self.verbose: + stdout = None # Inherit from parent (show output) + stderr = None + else: + stdout = subprocess.PIPE + stderr = subprocess.PIPE + + # Start backend + self.backend_process = subprocess.Popen( + [ + "uv", + "run", + "fastapi", + "dev", + "python-packages/dataing/src/dataing/entrypoints/api/app.py", + "--host", + "0.0.0.0", + "--port", + str(self.port), + ], + cwd=self.worktree_path, + env=self._env, + stdout=stdout, + stderr=stderr, + ) + + # Start Temporal worker + self.worker_process = subprocess.Popen( + ["uv", "run", "python", "-m", "dataing.entrypoints.temporal_worker"], + cwd=self.worktree_path, + env=self._env, + stdout=stdout, + stderr=stderr, + ) + + logger.info(f"Server processes started (backend PID: {self.backend_process.pid}, worker PID: {self.worker_process.pid})") + + def wait_for_ready(self, timeout: int = HEALTH_TIMEOUT) -> bool: + """Wait for the server to be ready.""" + logger.info(f"Waiting for server to be ready on port {self.port}...") + client = HTTPClient(f"http://localhost:{self.port}") + try: + start = time.time() + while time.time() - start < timeout: + # Check if processes have crashed + if self.backend_process and self.backend_process.poll() is not None: + exit_code = self.backend_process.returncode + stderr_output = "" + if self.backend_process.stderr: + stderr_output = self.backend_process.stderr.read().decode()[:500] + logger.error(f"Backend process exited with code {exit_code}") + if stderr_output: + logger.error(f"Backend stderr: {stderr_output}") + return False + + if self.worker_process and self.worker_process.poll() is not None: + exit_code = self.worker_process.returncode + stderr_output = "" + if self.worker_process.stderr: + stderr_output = self.worker_process.stderr.read().decode()[:500] + logger.error(f"Worker process exited with code {exit_code}") + if stderr_output: + logger.error(f"Worker stderr: {stderr_output}") + return False + + if client.health_check(): + logger.info("Server is ready") + return True + time.sleep(1) + logger.error(f"Server did not become ready within {timeout}s") + return False + finally: + client.close() + + def stop(self) -> None: + """Stop the backend and worker processes.""" + logger.info("Stopping demo server...") + + # Stop worker first + if self.worker_process: + self.worker_process.terminate() + try: + self.worker_process.wait(timeout=10) + except subprocess.TimeoutExpired: + self.worker_process.kill() + self.worker_process = None + + # Stop backend + if self.backend_process: + self.backend_process.terminate() + try: + self.backend_process.wait(timeout=5) + except subprocess.TimeoutExpired: + self.backend_process.kill() + self.backend_process = None + + # Clean up any orphan processes on the port + subprocess.run(f"lsof -ti:{self.port} | xargs kill -9 2>/dev/null || true", shell=True) + + logger.info("Server stopped") + + +# ============================================================================ +# Investigation Runner +# ============================================================================ + + +def run_investigation(client: HTTPClient, timeout: int) -> RunResult: + """Run a single investigation and measure its duration.""" + start_time = time.time() + + try: + # Start investigation + response = client.post("/api/v1/investigations", INVESTIGATION_PAYLOAD) + investigation_id = response["investigation_id"] + logger.debug(f"Started investigation {investigation_id}") + + # Poll for completion + interval = DEFAULT_POLL_INTERVAL + while True: + elapsed = time.time() - start_time + if elapsed > timeout: + return RunResult( + duration_seconds=elapsed, + status="timeout", + investigation_id=investigation_id, + error=f"Investigation did not complete within {timeout}s", + ) + + try: + status_response = client.get(f"/api/v1/investigations/{investigation_id}/status") + workflow_status = status_response.get("workflow_status", "unknown") + + if workflow_status in TERMINAL_STATUSES: + duration = time.time() - start_time + logger.debug(f"Investigation {investigation_id} completed with status '{workflow_status}' in {duration:.2f}s") + return RunResult( + duration_seconds=duration, + status=workflow_status, + investigation_id=investigation_id, + ) + + except Exception as e: + logger.warning(f"Error polling status: {e}") + + # Exponential backoff with jitter + jitter = random.uniform(0, 0.5 * interval) + sleep_time = min(interval + jitter, MAX_POLL_INTERVAL) + time.sleep(sleep_time) + interval = min(interval * 1.2, MAX_POLL_INTERVAL) + + except Exception as e: + duration = time.time() - start_time + return RunResult( + duration_seconds=duration, + status="error", + investigation_id="", + error=str(e), + ) + + +def calculate_stats(runs: list[RunResult]) -> BranchStats: + """Calculate statistics from run results.""" + durations = [r.duration_seconds for r in runs if r.status == "completed"] + + if not durations: + return BranchStats( + mean=0.0, + median=0.0, + stdev=0.0, + p95=0.0, + min_val=0.0, + max_val=0.0, + ) + + durations.sort() + n = len(durations) + p95_idx = int(n * 0.95) + + return BranchStats( + mean=statistics.mean(durations), + median=statistics.median(durations), + stdev=statistics.stdev(durations) if len(durations) > 1 else 0.0, + p95=durations[min(p95_idx, n - 1)], + min_val=min(durations), + max_val=max(durations), + ) + + +# ============================================================================ +# Benchmark Runner +# ============================================================================ + + +def run_benchmark_for_branch( + branch: str, + repo_root: Path, + num_runs: int, + warmup_runs: int, + timeout: int, + dry_run: bool = False, + verbose: bool = False, + restart_between_runs: bool = False, +) -> BranchResult: + """Run benchmark for a single branch.""" + logger.info(f"\n{'='*60}") + logger.info(f"Benchmarking branch: {branch}") + logger.info(f"{'='*60}") + + git_sha = get_git_sha(repo_root, branch) + result = BranchResult(branch=branch, git_sha=git_sha) + + # Create worktree (or use current dir if already on this branch) + worktree_path, is_worktree = setup_worktree(branch, repo_root) + + # Find free port + port = find_free_port() + logger.info(f"Using port {port}") + + # Run migrations (safe to run multiple times) + run_migrations(worktree_path) + + # Start server + server = DemoServer(worktree_path, port, verbose=verbose) + try: + server.start() + if not server.wait_for_ready(): + raise RuntimeError("Server failed to become ready") + + if dry_run: + logger.info("Dry run - skipping investigations") + return result + + client = HTTPClient(f"http://localhost:{port}") + + try: + # Warmup runs + logger.info(f"Running {warmup_runs} warmup investigations...") + for i in range(warmup_runs): + logger.info(f" Warmup {i+1}/{warmup_runs}") + run_investigation(client, timeout) + + # Timed runs + logger.info(f"Running {num_runs} timed investigations...") + for i in range(num_runs): + logger.info(f" Run {i+1}/{num_runs}") + run_result = run_investigation(client, timeout) + result.runs.append(run_result) + logger.info(f" Duration: {run_result.duration_seconds:.2f}s, Status: {run_result.status}") + + # Restart server between runs if requested (to isolate process vs DB issues) + if restart_between_runs and i < num_runs - 1: + logger.info(" Restarting server for next run...") + client.close() + server.stop() + time.sleep(2) # Brief pause for cleanup + server.start() + if not server.wait_for_ready(): + raise RuntimeError("Server failed to restart") + client = HTTPClient(f"http://localhost:{port}") + + finally: + client.close() + + # Calculate stats + result.stats = calculate_stats(result.runs) + + finally: + server.stop() + cleanup_worktree(worktree_path, repo_root, is_worktree) + + return result + + +def run_benchmark( + branches: list[str], + num_runs: int, + warmup_runs: int, + timeout: int, + output_dir: Path, + dry_run: bool = False, + verbose: bool = False, + restart_between_runs: bool = False, + keep_infra: bool = False, +) -> BenchmarkResults: + """Run the complete benchmark.""" + repo_root = get_repo_root() + + results = BenchmarkResults( + timestamp=datetime.now(timezone.utc).isoformat(), + machine=platform.node(), + config={ + "runs": num_runs, + "warmup": warmup_runs, + "timeout": timeout, + "branches": branches, + }, + ) + + # Start Docker infrastructure + start_docker_infrastructure() + + try: + for branch in branches: + branch_result = run_benchmark_for_branch( + branch=branch, + repo_root=repo_root, + num_runs=num_runs, + warmup_runs=warmup_runs, + timeout=timeout, + dry_run=dry_run, + verbose=verbose, + restart_between_runs=restart_between_runs, + ) + results.branches[branch] = branch_result + + # Calculate comparison if we have two branches + if len(branches) == 2 and not dry_run: + b1, b2 = branches + stats1 = results.branches[b1].stats + stats2 = results.branches[b2].stats + + if stats1 and stats2 and stats1.mean > 0 and stats2.mean > 0: + delta = stats1.mean - stats2.mean + delta_pct = (delta / stats2.mean) * 100 + + results.comparison = { + "delta_mean_seconds": delta, + "delta_mean_percent": delta_pct, + "faster_branch": b1 if delta < 0 else b2, + } + + finally: + if keep_infra: + logger.info("Keeping Docker infrastructure running (--keep-infra)") + logger.info(" Temporal UI: http://localhost:8233") + logger.info(" Jaeger UI: http://localhost:16686") + logger.info(" To stop: docker rm -f dataing-demo-postgres dataing-demo-temporal dataing-demo-jaeger") + else: + stop_docker_infrastructure() + + return results + + +# ============================================================================ +# Output +# ============================================================================ + + +def save_results(results: BenchmarkResults, output_dir: Path) -> None: + """Save results to JSON and Markdown files.""" + output_dir.mkdir(parents=True, exist_ok=True) + + # Convert to dict for JSON serialization + def to_dict(obj: Any) -> Any: + if hasattr(obj, "__dict__"): + return {k: to_dict(v) for k, v in asdict(obj).items()} + elif isinstance(obj, list): + return [to_dict(v) for v in obj] + elif isinstance(obj, dict): + return {k: to_dict(v) for k, v in obj.items()} + return obj + + results_dict = to_dict(results) + + # Save JSON + json_path = output_dir / "results.json" + with open(json_path, "w") as f: + json.dump(results_dict, f, indent=2) + logger.info(f"Results saved to {json_path}") + + # Save Markdown + md_path = output_dir / "results.md" + with open(md_path, "w") as f: + f.write(f"# Performance Benchmark Results\n\n") + f.write(f"**Timestamp:** {results.timestamp}\n") + f.write(f"**Machine:** {results.machine}\n") + f.write(f"**Config:** {results.config['runs']} runs, {results.config['warmup']} warmup, {results.config['timeout']}s timeout\n\n") + + f.write("## Results\n\n") + f.write("| Branch | SHA | Mean | Median | P95 | Stdev | Min | Max |\n") + f.write("|--------|-----|------|--------|-----|-------|-----|-----|\n") + + for branch, br in results.branches.items(): + if br.stats: + f.write( + f"| {branch} | {br.git_sha} | {br.stats.mean:.2f}s | {br.stats.median:.2f}s | " + f"{br.stats.p95:.2f}s | {br.stats.stdev:.2f}s | {br.stats.min_val:.2f}s | {br.stats.max_val:.2f}s |\n" + ) + + if results.comparison: + f.write(f"\n**Delta:** {results.comparison['faster_branch']} is ") + f.write(f"{abs(results.comparison['delta_mean_seconds']):.2f}s ") + f.write(f"({abs(results.comparison['delta_mean_percent']):.1f}%) ") + f.write(f"faster\n") + + logger.info(f"Results saved to {md_path}") + + +def print_summary(results: BenchmarkResults) -> None: + """Print summary to console.""" + print("\n" + "=" * 60) + print(" PERFORMANCE BENCHMARK RESULTS") + print("=" * 60 + "\n") + + for branch, br in results.branches.items(): + print(f"{branch} ({br.git_sha}):") + if br.stats: + print(f" Mean: {br.stats.mean:.2f}s") + print(f" Median: {br.stats.median:.2f}s") + print(f" P95: {br.stats.p95:.2f}s") + print(f" Stdev: {br.stats.stdev:.2f}s") + print(f" Range: {br.stats.min_val:.2f}s - {br.stats.max_val:.2f}s") + else: + print(" No successful runs") + print() + + if results.comparison: + faster = results.comparison["faster_branch"] + delta_s = abs(results.comparison["delta_mean_seconds"]) + delta_pct = abs(results.comparison["delta_mean_percent"]) + print(f"Delta: {faster} is {delta_s:.2f}s ({delta_pct:.1f}%) FASTER") + + print("=" * 60 + "\n") + + +# ============================================================================ +# Main +# ============================================================================ + + +def main() -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Performance benchmark comparing investigation runtime between branches" + ) + parser.add_argument( + "--branches", + nargs="+", + default=DEFAULT_BRANCHES, + help=f"Branches to compare (default: {DEFAULT_BRANCHES})", + ) + parser.add_argument( + "--runs", + type=int, + default=DEFAULT_NUM_RUNS, + help=f"Number of timed runs per branch (default: {DEFAULT_NUM_RUNS})", + ) + parser.add_argument( + "--warmup", + type=int, + default=DEFAULT_WARMUP_RUNS, + help=f"Number of warmup runs per branch (default: {DEFAULT_WARMUP_RUNS})", + ) + parser.add_argument( + "--timeout", + type=int, + default=DEFAULT_TIMEOUT, + help=f"Timeout per investigation in seconds (default: {DEFAULT_TIMEOUT})", + ) + parser.add_argument( + "--output-dir", + type=Path, + default=Path("tests/performance"), + help="Output directory for results (default: tests/performance)", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Setup only, don't run investigations", + ) + parser.add_argument( + "--restart-between-runs", + action="store_true", + help="Restart server between each investigation (isolates process vs DB issues)", + ) + parser.add_argument( + "--keep-infra", + action="store_true", + help="Keep Docker containers running after benchmark (for Temporal UI analysis)", + ) + parser.add_argument( + "--verbose", + "-v", + action="store_true", + help="Enable verbose logging", + ) + + args = parser.parse_args() + + if args.verbose: + logging.getLogger().setLevel(logging.DEBUG) + + # Handle SIGINT gracefully + def signal_handler(sig: int, frame: Any) -> None: + logger.info("\nInterrupted, cleaning up...") + stop_docker_infrastructure() + sys.exit(1) + + signal.signal(signal.SIGINT, signal_handler) + + logger.info(f"Starting benchmark: {args.branches}") + logger.info(f"Config: {args.runs} runs, {args.warmup} warmup, {args.timeout}s timeout") + + try: + results = run_benchmark( + branches=args.branches, + num_runs=args.runs, + warmup_runs=args.warmup, + timeout=args.timeout, + output_dir=args.output_dir, + dry_run=args.dry_run, + verbose=args.verbose, + restart_between_runs=args.restart_between_runs, + keep_infra=args.keep_infra, + ) + + if not args.dry_run: + save_results(results, args.output_dir) + print_summary(results) + + return 0 + + except Exception as e: + logger.exception(f"Benchmark failed: {e}") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/uv.lock b/uv.lock index 68ff3f74a..fcdef5182 100644 --- a/uv.lock +++ b/uv.lock @@ -860,6 +860,7 @@ dependencies = [ { name = "faker" }, { name = "fastapi", extra = ["standard"] }, { name = "httpx" }, + { name = "investigator" }, { name = "jinja2" }, { name = "mcp" }, { name = "opentelemetry-api" }, @@ -923,6 +924,7 @@ requires-dist = [ { name = "faker", marker = "extra == 'demo'", specifier = ">=22.0.0" }, { name = "fastapi", extras = ["standard"], specifier = ">=0.109.0" }, { name = "httpx", specifier = ">=0.26.0" }, + { name = "investigator", editable = "python-packages/investigator" }, { name = "jinja2", specifier = ">=3.1.3" }, { name = "mcp", specifier = ">=1.0.0" }, { name = "mkdocs-material", marker = "extra == 'docs'", specifier = ">=9.5.0" }, @@ -1897,6 +1899,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, ] +[[package]] +name = "investigator" +version = "0.1.0" +source = { editable = "python-packages/investigator" } + +[package.metadata] +requires-dist = [ + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" }, + { name = "temporalio", marker = "extra == 'temporal'", specifier = ">=1.0.0" }, +] +provides-extras = ["temporal", "dev"] + [[package]] name = "invoke" version = "2.2.1"