Skip to content

Commit 355e72d

Browse files
author
SentienceDEV
committed
verification payment step_end in agent runtime
1 parent fc83a3e commit 355e72d

2 files changed

Lines changed: 177 additions & 17 deletions

File tree

sentience/agent_runtime.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,15 @@
6565

6666
import asyncio
6767
import difflib
68+
import hashlib
6869
import time
6970
from dataclasses import dataclass
7071
from typing import TYPE_CHECKING, Any
7172

7273
from .captcha import CaptchaContext, CaptchaHandlingError, CaptchaOptions, CaptchaResolution
7374
from .failure_artifacts import FailureArtifactBuffer, FailureArtifactsOptions
7475
from .models import Snapshot, SnapshotOptions
76+
from .trace_event_builder import TraceEventBuilder
7577
from .verification import AssertContext, AssertOutcome, Predicate
7678

7779
if TYPE_CHECKING:
@@ -138,6 +140,8 @@ def __init__(
138140

139141
# Snapshot state
140142
self.last_snapshot: Snapshot | None = None
143+
self._step_pre_snapshot: Snapshot | None = None
144+
self._step_pre_url: str | None = None
141145

142146
# Failure artifacts (Phase 1)
143147
self._artifact_buffer: FailureArtifactBuffer | None = None
@@ -148,6 +152,12 @@ def __init__(
148152

149153
# Assertions accumulated during current step
150154
self._assertions_this_step: list[dict[str, Any]] = []
155+
self._step_goal: str | None = None
156+
self._last_action: str | None = None
157+
self._last_action_error: str | None = None
158+
self._last_action_outcome: str | None = None
159+
self._last_action_duration_ms: int | None = None
160+
self._last_action_success: bool | None = None
151161

152162
# Task completion tracking
153163
self._task_done: bool = False
@@ -250,6 +260,11 @@ async def snapshot(self, **kwargs: Any) -> Snapshot:
250260
# Check if using legacy browser (backward compat)
251261
if hasattr(self, "_legacy_browser") and hasattr(self, "_legacy_page"):
252262
self.last_snapshot = await self._legacy_browser.snapshot(self._legacy_page, **kwargs)
263+
if self.last_snapshot is not None:
264+
self._cached_url = self.last_snapshot.url
265+
if self._step_pre_snapshot is None:
266+
self._step_pre_snapshot = self.last_snapshot
267+
self._step_pre_url = self.last_snapshot.url
253268
return self.last_snapshot
254269

255270
# Use backend-agnostic snapshot
@@ -262,6 +277,11 @@ async def snapshot(self, **kwargs: Any) -> Snapshot:
262277
options = SnapshotOptions(**options_dict)
263278

264279
self.last_snapshot = await backend_snapshot(self.backend, options=options)
280+
if self.last_snapshot is not None:
281+
self._cached_url = self.last_snapshot.url
282+
if self._step_pre_snapshot is None:
283+
self._step_pre_snapshot = self.last_snapshot
284+
self._step_pre_url = self.last_snapshot.url
265285
if not skip_captcha_handling:
266286
await self._handle_captcha_if_needed(self.last_snapshot, source="gateway")
267287
return self.last_snapshot
@@ -414,6 +434,7 @@ async def record_action(
414434
"""
415435
Record an action in the artifact timeline and capture a frame if enabled.
416436
"""
437+
self._last_action = action
417438
if not self._artifact_buffer:
418439
return
419440
self._artifact_buffer.record_step(
@@ -425,6 +446,107 @@ async def record_action(
425446
if self._artifact_buffer.options.capture_on_action:
426447
await self._capture_artifact_frame()
427448

449+
def _compute_snapshot_digest(self, snap: Snapshot | None) -> str | None:
450+
if snap is None:
451+
return None
452+
try:
453+
return (
454+
"sha256:"
455+
+ hashlib.sha256(f"{snap.url}{snap.timestamp}".encode("utf-8")).hexdigest()
456+
)
457+
except Exception:
458+
return None
459+
460+
async def emit_step_end(
461+
self,
462+
*,
463+
action: str | None = None,
464+
success: bool | None = None,
465+
error: str | None = None,
466+
outcome: str | None = None,
467+
duration_ms: int | None = None,
468+
attempt: int = 0,
469+
verify_passed: bool | None = None,
470+
verify_signals: dict[str, Any] | None = None,
471+
post_url: str | None = None,
472+
post_snapshot_digest: str | None = None,
473+
) -> dict[str, Any]:
474+
"""
475+
Emit a step_end event using TraceEventBuilder.
476+
"""
477+
goal = self._step_goal or ""
478+
pre_snap = self._step_pre_snapshot or self.last_snapshot
479+
pre_url = (
480+
self._step_pre_url
481+
or (pre_snap.url if pre_snap else None)
482+
or self._cached_url
483+
or ""
484+
)
485+
486+
if post_url is None:
487+
try:
488+
post_url = await self.get_url()
489+
except Exception:
490+
post_url = (
491+
(self.last_snapshot.url if self.last_snapshot else None) or self._cached_url
492+
)
493+
post_url = post_url or pre_url
494+
495+
pre_digest = self._compute_snapshot_digest(pre_snap)
496+
post_digest = post_snapshot_digest or self._compute_snapshot_digest(self.last_snapshot)
497+
url_changed = bool(pre_url and post_url and str(pre_url) != str(post_url))
498+
499+
assertions_data = self.get_assertions_for_step_end()
500+
assertions = assertions_data.get("assertions") or []
501+
502+
signals = dict(verify_signals or {})
503+
signals.setdefault("url_changed", url_changed)
504+
if error and "error" not in signals:
505+
signals["error"] = error
506+
507+
passed = (
508+
bool(verify_passed)
509+
if verify_passed is not None
510+
else self.required_assertions_passed()
511+
)
512+
513+
exec_success = bool(success) if success is not None else bool(
514+
self._last_action_success if self._last_action_success is not None else passed
515+
)
516+
517+
exec_data: dict[str, Any] = {
518+
"success": exec_success,
519+
"action": action or self._last_action or "unknown",
520+
"outcome": outcome or self._last_action_outcome or "",
521+
}
522+
if duration_ms is not None:
523+
exec_data["duration_ms"] = int(duration_ms)
524+
if error:
525+
exec_data["error"] = error
526+
527+
verify_data = {
528+
"passed": bool(passed),
529+
"signals": signals,
530+
}
531+
532+
step_end_data = TraceEventBuilder.build_step_end_event(
533+
step_id=self.step_id or "",
534+
step_index=int(self.step_index),
535+
goal=goal,
536+
attempt=int(attempt),
537+
pre_url=str(pre_url or ""),
538+
post_url=str(post_url or ""),
539+
snapshot_digest=pre_digest,
540+
llm_data={},
541+
exec_data=exec_data,
542+
verify_data=verify_data,
543+
pre_elements=None,
544+
assertions=assertions,
545+
post_snapshot_digest=post_digest,
546+
)
547+
self.tracer.emit("step_end", step_end_data, step_id=self.step_id)
548+
return step_end_data
549+
428550
async def _capture_artifact_frame(self) -> None:
429551
if not self._artifact_buffer:
430552
return
@@ -511,6 +633,14 @@ def begin_step(self, goal: str, step_index: int | None = None) -> str:
511633
"""
512634
# Clear previous step state
513635
self._assertions_this_step = []
636+
self._step_pre_snapshot = None
637+
self._step_pre_url = None
638+
self._step_goal = goal
639+
self._last_action = None
640+
self._last_action_error = None
641+
self._last_action_outcome = None
642+
self._last_action_duration_ms = None
643+
self._last_action_success = None
514644

515645
# Update step index
516646
if step_index is not None:

sentience/runtime_agent.py

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -86,26 +86,56 @@ async def run_step(
8686
step: RuntimeStep,
8787
) -> bool:
8888
self.runtime.begin_step(step.goal)
89+
emitted = False
90+
ok = False
91+
try:
92+
snap = await self._snapshot_with_ramp(step=step)
8993

90-
snap = await self._snapshot_with_ramp(step=step)
91-
92-
if await self._should_short_circuit_to_vision(step=step, snap=snap):
93-
ok = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
94-
return ok
95-
96-
# 1) Structured executor attempt.
97-
action = self._propose_structured_action(task_goal=task_goal, step=step, snap=snap)
98-
await self._execute_action(action=action, snap=snap)
99-
ok = await self._apply_verifications(step=step)
100-
if ok:
101-
return True
94+
if await self._should_short_circuit_to_vision(step=step, snap=snap):
95+
ok = await self._vision_executor_attempt(
96+
task_goal=task_goal, step=step, snap=snap
97+
)
98+
return ok
10299

103-
# 2) Optional vision executor fallback (bounded).
104-
if step.vision_executor_enabled and step.max_vision_executor_attempts > 0:
105-
ok2 = await self._vision_executor_attempt(task_goal=task_goal, step=step, snap=snap)
106-
return ok2
100+
# 1) Structured executor attempt.
101+
action = self._propose_structured_action(
102+
task_goal=task_goal, step=step, snap=snap
103+
)
104+
await self._execute_action(action=action, snap=snap)
105+
ok = await self._apply_verifications(step=step)
106+
if ok:
107+
return True
108+
109+
# 2) Optional vision executor fallback (bounded).
110+
if step.vision_executor_enabled and step.max_vision_executor_attempts > 0:
111+
ok = await self._vision_executor_attempt(
112+
task_goal=task_goal, step=step, snap=snap
113+
)
114+
return ok
107115

108-
return False
116+
return False
117+
except Exception as exc:
118+
try:
119+
await self.runtime.emit_step_end(
120+
success=False,
121+
error=str(exc),
122+
outcome="exception",
123+
verify_passed=False,
124+
)
125+
emitted = True
126+
except Exception:
127+
pass
128+
raise
129+
finally:
130+
if not emitted:
131+
try:
132+
await self.runtime.emit_step_end(
133+
success=ok,
134+
outcome=("ok" if ok else "verification_failed"),
135+
verify_passed=ok,
136+
)
137+
except Exception:
138+
pass
109139

110140
async def _snapshot_with_ramp(self, *, step: RuntimeStep) -> Snapshot:
111141
limit = step.snapshot_limit_base

0 commit comments

Comments
 (0)