From c4f7475d140f9863f2b0d1ab121688a3fc8b76fb Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 15:38:10 +0100 Subject: [PATCH 01/23] feat: add idle detection data types, marker constant, and duration parser Add IdleConfig dataclass, idle state tracking on RunState (consecutive_idle, cumulative_idle_time, mark_idle, reset_idle), IDLE_STATE_MARKER constant, parse_duration() for human-readable durations (30s, 5m, 6h), and frontmatter field constants for the idle configuration block. Co-authored-by: Ralphify --- src/ralphify/_frontmatter.py | 39 ++++++++++++++++++++++++++++ src/ralphify/_run_types.py | 38 ++++++++++++++++++++++++++++ tests/test_frontmatter.py | 29 +++++++++++++++++++++ tests/test_run_types.py | 49 ++++++++++++++++++++++++++++++++++++ 4 files changed, 155 insertions(+) diff --git a/src/ralphify/_frontmatter.py b/src/ralphify/_frontmatter.py index 0d4f6f3..59269b8 100644 --- a/src/ralphify/_frontmatter.py +++ b/src/ralphify/_frontmatter.py @@ -25,6 +25,13 @@ FIELD_COMMANDS = "commands" FIELD_ARGS = "args" FIELD_CREDIT = "credit" +FIELD_IDLE = "idle" + +# Sub-field names within the idle configuration mapping. +IDLE_FIELD_DELAY = "delay" +IDLE_FIELD_BACKOFF = "backoff" +IDLE_FIELD_MAX_DELAY = "max_delay" +IDLE_FIELD_MAX = "max" # Sub-field names within each command mapping. CMD_FIELD_NAME = "name" @@ -41,9 +48,41 @@ # Human-readable description of allowed name characters, paired with CMD_NAME_RE. VALID_NAME_CHARS_MSG = "Names may only contain letters, digits, hyphens, and underscores." +# Marker that agents emit to signal idle state. +IDLE_STATE_MARKER = "" + # Pre-compiled pattern to strip HTML comments from body text. _HTML_COMMENT_RE = re.compile(r"", re.DOTALL) +# Pattern for human-readable duration strings (e.g. "30s", "5m", "6h", "1d"). +_DURATION_RE = re.compile(r"^\s*(\d+(?:\.\d+)?)\s*([smhd])\s*$") + +_DURATION_MULTIPLIERS: dict[str, float] = { + "s": 1, + "m": 60, + "h": 3600, + "d": 86400, +} + + +def parse_duration(value: str) -> float: + """Parse a human-readable duration string into seconds. + + Supported suffixes: ``s`` (seconds), ``m`` (minutes), ``h`` (hours), + ``d`` (days). Examples: ``"30s"`` → 30.0, ``"5m"`` → 300.0. + + Raises :class:`ValueError` for invalid formats. + """ + match = _DURATION_RE.match(value) + if not match: + raise ValueError( + f"Invalid duration '{value}'. Use a number with a suffix: " + f"s (seconds), m (minutes), h (hours), d (days). Examples: 30s, 5m, 6h." + ) + amount = float(match.group(1)) + unit = match.group(2) + return amount * _DURATION_MULTIPLIERS[unit] + def _extract_frontmatter_block(text: str) -> tuple[str, str]: """Split text into raw YAML frontmatter and body at ``---`` delimiters. diff --git a/src/ralphify/_run_types.py b/src/ralphify/_run_types.py index be74603..8844e05 100644 --- a/src/ralphify/_run_types.py +++ b/src/ralphify/_run_types.py @@ -17,6 +17,15 @@ from ralphify._events import STOP_COMPLETED, STOP_ERROR, STOP_USER_REQUESTED, StopReason +DEFAULT_IDLE_DELAY: float = 30 +"""Default initial delay in seconds when idle state is detected.""" + +DEFAULT_IDLE_BACKOFF: float = 2.0 +"""Default backoff multiplier applied each consecutive idle iteration.""" + +DEFAULT_IDLE_MAX_DELAY: float = 300 +"""Default maximum delay in seconds (5 minutes) for idle backoff.""" + DEFAULT_COMMAND_TIMEOUT: float = 60 """Default timeout in seconds for commands defined in RALPH.md frontmatter.""" @@ -67,6 +76,22 @@ def reason(self) -> StopReason: } +@dataclass +class IdleConfig: + """Configuration for idle detection and backoff behavior. + + When an agent signals idle state, the engine waits ``delay`` seconds + before the next iteration, multiplying by ``backoff`` each consecutive + idle iteration, capped at ``max_delay``. If cumulative idle time + exceeds ``max``, the loop stops. + """ + + delay: float = DEFAULT_IDLE_DELAY + backoff: float = DEFAULT_IDLE_BACKOFF + max_delay: float = DEFAULT_IDLE_MAX_DELAY + max: float | None = None + + @dataclass class Command: """A named command from RALPH.md frontmatter.""" @@ -97,6 +122,7 @@ class RunConfig: log_dir: Path | None = None project_root: Path = field(default=Path(".")) credit: bool = True + idle: IdleConfig | None = None @dataclass @@ -120,6 +146,8 @@ class RunState: failed: int = 0 timed_out: int = 0 started_at: datetime | None = None + consecutive_idle: int = 0 + cumulative_idle_time: float = 0.0 _stop_requested: bool = field(default=False, init=False, repr=False, compare=False) _resume_event: threading.Event = field(default_factory=threading.Event, init=False, repr=False, compare=False) @@ -174,3 +202,13 @@ def mark_timed_out(self) -> None: """Record a timed-out iteration (also counts as failed).""" self.timed_out += 1 self.mark_failed() + + def mark_idle(self) -> None: + """Record an idle iteration (counts as completed, increments idle tracking).""" + self.completed += 1 + self.consecutive_idle += 1 + + def reset_idle(self) -> None: + """Reset idle tracking after a non-idle iteration.""" + self.consecutive_idle = 0 + self.cumulative_idle_time = 0.0 diff --git a/tests/test_frontmatter.py b/tests/test_frontmatter.py index 6c60ed9..0b6a611 100644 --- a/tests/test_frontmatter.py +++ b/tests/test_frontmatter.py @@ -3,8 +3,10 @@ import pytest from ralphify._frontmatter import ( + IDLE_STATE_MARKER, RALPH_MARKER, _extract_frontmatter_block, + parse_duration, parse_frontmatter, serialize_frontmatter, ) @@ -153,6 +155,33 @@ def test_scalar_frontmatter_raises_value_error(self): parse_frontmatter(text) +class TestParseDuration: + @pytest.mark.parametrize( + "value,expected", + [ + ("30s", 30.0), + ("5m", 300.0), + ("6h", 21600.0), + ("1d", 86400.0), + ("1.5h", 5400.0), + ("0.5m", 30.0), + (" 30s ", 30.0), + ], + ) + def test_valid_durations(self, value, expected): + assert parse_duration(value) == expected + + @pytest.mark.parametrize("value", ["", "30", "abc", "30x", "m5", "-5s"]) + def test_invalid_durations_raise(self, value): + with pytest.raises(ValueError, match="Invalid duration"): + parse_duration(value) + + +class TestIdleStateMarker: + def test_marker_value(self): + assert IDLE_STATE_MARKER == "" + + class TestSerializeFrontmatter: def test_roundtrip(self): original_fm = {"agent": "claude"} diff --git a/tests/test_run_types.py b/tests/test_run_types.py index 4e3394f..f0d2cf3 100644 --- a/tests/test_run_types.py +++ b/tests/test_run_types.py @@ -8,8 +8,12 @@ from ralphify._frontmatter import RALPH_MARKER from ralphify._run_types import ( DEFAULT_COMMAND_TIMEOUT, + DEFAULT_IDLE_BACKOFF, + DEFAULT_IDLE_DELAY, + DEFAULT_IDLE_MAX_DELAY, RUN_ID_LENGTH, Command, + IdleConfig, RunConfig, RunState, RunStatus, @@ -41,6 +45,22 @@ def test_custom_timeout(self): assert cmd.timeout == 300 +class TestIdleConfig: + def test_defaults(self): + cfg = IdleConfig() + assert cfg.delay == DEFAULT_IDLE_DELAY + assert cfg.backoff == DEFAULT_IDLE_BACKOFF + assert cfg.max_delay == DEFAULT_IDLE_MAX_DELAY + assert cfg.max is None + + def test_custom_values(self): + cfg = IdleConfig(delay=10, backoff=1.5, max_delay=120, max=3600) + assert cfg.delay == 10 + assert cfg.backoff == 1.5 + assert cfg.max_delay == 120 + assert cfg.max == 3600 + + class TestRunConfig: def test_default_project_root_is_dot(self, tmp_path): config = RunConfig( @@ -64,6 +84,7 @@ def test_defaults(self, tmp_path): assert config.stop_on_error is False assert config.log_dir is None assert config.credit is True + assert config.idle is None class TestRunState: @@ -75,6 +96,8 @@ def test_initial_state(self): assert state.failed == 0 assert state.timed_out == 0 assert state.started_at is None + assert state.consecutive_idle == 0 + assert state.cumulative_idle_time == 0.0 def test_total_is_completed_plus_failed(self): state = RunState(run_id="r1") @@ -147,6 +170,32 @@ def test_wait_for_unpause_times_out(self): result = state.wait_for_unpause(timeout=0.01) assert result is False + def test_mark_idle_increments_completed_and_consecutive(self): + state = RunState(run_id="r1") + state.mark_idle() + assert state.completed == 1 + assert state.consecutive_idle == 1 + state.mark_idle() + assert state.completed == 2 + assert state.consecutive_idle == 2 + + def test_reset_idle_clears_tracking(self): + state = RunState(run_id="r1") + state.mark_idle() + state.mark_idle() + state.cumulative_idle_time = 120.0 + state.reset_idle() + assert state.consecutive_idle == 0 + assert state.cumulative_idle_time == 0.0 + # completed count is preserved + assert state.completed == 2 + + def test_mark_idle_included_in_total(self): + state = RunState(run_id="r1") + state.mark_idle() + state.mark_completed() + assert state.total == 2 + class TestRunStatus: @pytest.mark.parametrize( From 470d0baefbe23420dc88c1bb4a0b26b8e3e58f6e Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 15:40:14 +0100 Subject: [PATCH 02/23] feat: add ITERATION_IDLE event type, STOP_MAX_IDLE reason, and IDLE_EXCEEDED status Co-authored-by: Ralphify --- src/ralphify/_events.py | 14 ++++++++++++-- src/ralphify/_run_types.py | 10 +++++++++- tests/test_events.py | 39 ++++++++++++++++++++++++++++++++++++++ tests/test_run_types.py | 2 ++ 4 files changed, 62 insertions(+), 3 deletions(-) diff --git a/src/ralphify/_events.py b/src/ralphify/_events.py index 6954e51..d75e214 100644 --- a/src/ralphify/_events.py +++ b/src/ralphify/_events.py @@ -19,12 +19,13 @@ LOG_INFO: LogLevel = "info" LOG_ERROR: LogLevel = "error" -StopReason = Literal["completed", "error", "user_requested"] +StopReason = Literal["completed", "error", "user_requested", "max_idle"] """Valid reason strings for :class:`RunStoppedData` events.""" STOP_COMPLETED: StopReason = "completed" STOP_ERROR: StopReason = "error" STOP_USER_REQUESTED: StopReason = "user_requested" +STOP_MAX_IDLE: StopReason = "max_idle" class EventType(Enum): @@ -37,7 +38,7 @@ class EventType(Enum): **Iteration lifecycle** — emitted once per iteration: ``ITERATION_STARTED``, ``ITERATION_COMPLETED``, ``ITERATION_FAILED``, - ``ITERATION_TIMED_OUT``. + ``ITERATION_TIMED_OUT``, ``ITERATION_IDLE``. **Commands** — emitted around command execution: ``COMMANDS_STARTED``, ``COMMANDS_COMPLETED``. @@ -63,6 +64,7 @@ class EventType(Enum): ITERATION_COMPLETED = "iteration_completed" ITERATION_FAILED = "iteration_failed" ITERATION_TIMED_OUT = "iteration_timed_out" + ITERATION_IDLE = "iteration_idle" # ── Commands ──────────────────────────────────────────────── COMMANDS_STARTED = "commands_started" @@ -111,6 +113,13 @@ class IterationEndedData(TypedDict): result_text: str | None +class IterationIdleData(TypedDict): + iteration: int + consecutive_idle: int + next_delay: float + cumulative_idle_time: float + + class CommandsStartedData(TypedDict): iteration: int count: int @@ -142,6 +151,7 @@ class LogMessageData(TypedDict): | RunStoppedData | IterationStartedData | IterationEndedData + | IterationIdleData | CommandsStartedData | CommandsCompletedData | PromptAssembledData diff --git a/src/ralphify/_run_types.py b/src/ralphify/_run_types.py index 8844e05..09af8f4 100644 --- a/src/ralphify/_run_types.py +++ b/src/ralphify/_run_types.py @@ -14,7 +14,13 @@ from enum import Enum from pathlib import Path -from ralphify._events import STOP_COMPLETED, STOP_ERROR, STOP_USER_REQUESTED, StopReason +from ralphify._events import ( + STOP_COMPLETED, + STOP_ERROR, + STOP_MAX_IDLE, + STOP_USER_REQUESTED, + StopReason, +) DEFAULT_IDLE_DELAY: float = 30 @@ -52,6 +58,7 @@ class RunStatus(Enum): STOPPED = "stopped" COMPLETED = "completed" FAILED = "failed" + IDLE_EXCEEDED = "idle_exceeded" @property def reason(self) -> StopReason: @@ -73,6 +80,7 @@ def reason(self) -> StopReason: RunStatus.COMPLETED: STOP_COMPLETED, RunStatus.FAILED: STOP_ERROR, RunStatus.STOPPED: STOP_USER_REQUESTED, + RunStatus.IDLE_EXCEEDED: STOP_MAX_IDLE, } diff --git a/tests/test_events.py b/tests/test_events.py index 0dbeed9..ae68dc7 100644 --- a/tests/test_events.py +++ b/tests/test_events.py @@ -8,10 +8,12 @@ from ralphify._events import ( LOG_ERROR, LOG_INFO, + STOP_MAX_IDLE, BoundEmitter, Event, EventType, FanoutEmitter, + IterationIdleData, NullEmitter, QueueEmitter, ) @@ -184,3 +186,40 @@ def test_fanout_with_null_emitter(self): fanout.emit(event) assert q.queue.get() is event + + +class TestIterationIdleEvent: + def test_iteration_idle_event_type_exists(self): + assert EventType.ITERATION_IDLE.value == "iteration_idle" + + def test_iteration_idle_data_structure(self): + data: IterationIdleData = { + "iteration": 3, + "consecutive_idle": 2, + "next_delay": 60.0, + "cumulative_idle_time": 90.0, + } + assert data["iteration"] == 3 + assert data["consecutive_idle"] == 2 + assert data["next_delay"] == 60.0 + assert data["cumulative_idle_time"] == 90.0 + + def test_emit_iteration_idle_via_bound_emitter(self): + q = QueueEmitter() + emit = BoundEmitter(q, "run-idle") + data: IterationIdleData = { + "iteration": 1, + "consecutive_idle": 1, + "next_delay": 30.0, + "cumulative_idle_time": 30.0, + } + emit(EventType.ITERATION_IDLE, data) + + events = drain_events(q) + assert len(events) == 1 + assert events[0].type == EventType.ITERATION_IDLE + assert events[0].data["consecutive_idle"] == 1 + assert events[0].data["next_delay"] == 30.0 + + def test_stop_max_idle_reason(self): + assert STOP_MAX_IDLE == "max_idle" diff --git a/tests/test_run_types.py b/tests/test_run_types.py index f0d2cf3..3ae11ef 100644 --- a/tests/test_run_types.py +++ b/tests/test_run_types.py @@ -207,6 +207,7 @@ class TestRunStatus: (RunStatus.STOPPED, "stopped"), (RunStatus.COMPLETED, "completed"), (RunStatus.FAILED, "failed"), + (RunStatus.IDLE_EXCEEDED, "idle_exceeded"), ], ) def test_enum_values(self, status, value): @@ -218,6 +219,7 @@ def test_enum_values(self, status, value): (RunStatus.COMPLETED, "completed"), (RunStatus.FAILED, "error"), (RunStatus.STOPPED, "user_requested"), + (RunStatus.IDLE_EXCEEDED, "max_idle"), ], ) def test_reason_for_terminal_statuses(self, status, expected_reason): From 8c61c47e2f40571a2989ea27fc56676f076326a4 Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 15:45:00 +0100 Subject: [PATCH 03/23] feat: implement idle detection in engine with backoff and max idle Detect marker in agent output, emit ITERATION_IDLE events, apply exponential backoff delays between idle iterations, reset idle tracking on non-idle iterations, and stop the loop when cumulative idle time exceeds idle.max. Co-authored-by: Ralphify --- src/ralphify/engine.py | 59 +++++++++++-- tests/test_engine.py | 191 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 243 insertions(+), 7 deletions(-) diff --git a/src/ralphify/engine.py b/src/ralphify/engine.py index 897e6fc..3454dbc 100644 --- a/src/ralphify/engine.py +++ b/src/ralphify/engine.py @@ -30,7 +30,7 @@ RunStartedData, RunStoppedData, ) -from ralphify._frontmatter import FIELD_AGENT, FIELD_COMMANDS, RALPH_MARKER, parse_frontmatter +from ralphify._frontmatter import FIELD_AGENT, FIELD_COMMANDS, IDLE_STATE_MARKER, RALPH_MARKER, parse_frontmatter from ralphify._output import format_duration from ralphify._run_types import ( Command, @@ -172,10 +172,20 @@ def _run_agent_phase( duration = format_duration(agent.elapsed) + is_idle = ( + agent.success + and agent.result_text is not None + and IDLE_STATE_MARKER in agent.result_text + ) + if agent.timed_out: state.mark_timed_out() event_type = EventType.ITERATION_TIMED_OUT state_detail = f"timed out after {duration}" + elif is_idle: + state.mark_idle() + event_type = EventType.ITERATION_IDLE + state_detail = f"idle ({duration})" elif agent.success: state.mark_completed() event_type = EventType.ITERATION_COMPLETED @@ -194,6 +204,7 @@ def _run_agent_phase( log_file=str(agent.log_file) if agent.log_file else None, result_text=agent.result_text, )) + return agent.success @@ -244,17 +255,36 @@ def _run_iteration( return True +def _compute_idle_delay(config: RunConfig, state: RunState) -> float: + """Compute the backoff delay for the current idle streak. + + Formula: ``delay * backoff^(consecutive_idle - 1)`` capped at ``max_delay``. + Returns 0 when no idle config is present or idle count is zero. + """ + if config.idle is None or state.consecutive_idle <= 0: + return 0 + raw = config.idle.delay * (config.idle.backoff ** (state.consecutive_idle - 1)) + return min(raw, config.idle.max_delay) + + def _delay_if_needed(config: RunConfig, state: RunState, emit: BoundEmitter) -> None: """Sleep between iterations when a delay is configured. - The sleep is broken into small chunks so that stop requests are - respected promptly rather than blocking for the full delay. + When idle backoff is active, the idle delay takes precedence over + the base delay. The sleep is broken into small chunks so that stop + requests are respected promptly rather than blocking for the full delay. """ - if config.delay > 0 and ( + # Determine effective delay: idle backoff overrides base delay + if state.consecutive_idle > 0 and config.idle is not None: + delay = _compute_idle_delay(config, state) + else: + delay = config.delay + + if delay > 0 and ( config.max_iterations is None or state.iteration < config.max_iterations ): - emit.log_info(f"Waiting {config.delay}s...") - remaining = config.delay + emit.log_info(f"Waiting {delay}s...") + remaining = delay while remaining > 0 and not state.stop_requested: chunk = min(remaining, _PAUSE_POLL_INTERVAL) time.sleep(chunk) @@ -297,12 +327,29 @@ def run_loop( if config.max_iterations is not None and state.iteration > config.max_iterations: break + idle_before = state.consecutive_idle should_continue = _run_iteration(config, state, emit) if not should_continue: break + # Detect whether this iteration was idle (mark_idle increments + # consecutive_idle; mark_completed/mark_failed do not). + iteration_was_idle = state.consecutive_idle > idle_before + + if not iteration_was_idle and idle_before > 0: + state.reset_idle() + _delay_if_needed(config, state, emit) + # Track cumulative idle time and check max idle limit + if iteration_was_idle and config.idle is not None: + idle_delay = _compute_idle_delay(config, state) + state.cumulative_idle_time += idle_delay + if config.idle.max is not None and state.cumulative_idle_time >= config.idle.max: + state.status = RunStatus.IDLE_EXCEEDED + emit.log_info("Max idle time exceeded, stopping.") + break + except KeyboardInterrupt: state.status = RunStatus.STOPPED except Exception as exc: diff --git a/tests/test_engine.py b/tests/test_engine.py index 29e7902..009baee 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -8,16 +8,35 @@ import pytest from helpers import MOCK_RUN_COMMAND, MOCK_SUBPROCESS, drain_events, event_types, events_of_type, fail_result, make_config, make_state, ok_result, ok_run_result +from ralphify._agent import AgentResult from ralphify._events import BoundEmitter, EventType, NullEmitter, QueueEmitter -from ralphify._run_types import Command, RunStatus +from ralphify._frontmatter import IDLE_STATE_MARKER +from ralphify._run_types import Command, IdleConfig, RunStatus from ralphify.engine import ( _assemble_prompt, + _compute_idle_delay, _delay_if_needed, _handle_control_signals, _run_commands, run_loop, ) +MOCK_EXECUTE_AGENT = "ralphify.engine.execute_agent" + + +def _idle_agent_result(**kwargs): + """Create an AgentResult that signals idle state.""" + defaults = dict(returncode=0, elapsed=1.0, result_text=IDLE_STATE_MARKER) + defaults.update(kwargs) + return AgentResult(**defaults) + + +def _active_agent_result(**kwargs): + """Create an AgentResult for a normal (non-idle) iteration.""" + defaults = dict(returncode=0, elapsed=1.0, result_text="did some work") + defaults.update(kwargs) + return AgentResult(**defaults) + class TestRunLoop: @patch(MOCK_SUBPROCESS, side_effect=ok_result) @@ -894,3 +913,173 @@ def test_credit_false_no_trailer_in_agent_input(self, mock_run, tmp_path): call_input = mock_run.call_args.kwargs["input"] assert "Co-authored-by" not in call_input + + +class TestComputeIdleDelay: + """Unit tests for _compute_idle_delay — backoff math.""" + + def test_returns_zero_without_idle_config(self, tmp_path): + config = make_config(tmp_path, idle=None) + state = make_state() + state.consecutive_idle = 3 + + assert _compute_idle_delay(config, state) == 0 + + def test_returns_zero_when_not_idle(self, tmp_path): + config = make_config(tmp_path, idle=IdleConfig(delay=30)) + state = make_state() + state.consecutive_idle = 0 + + assert _compute_idle_delay(config, state) == 0 + + def test_first_idle_returns_base_delay(self, tmp_path): + config = make_config(tmp_path, idle=IdleConfig(delay=30, backoff=2.0, max_delay=300)) + state = make_state() + state.consecutive_idle = 1 + + assert _compute_idle_delay(config, state) == 30 + + def test_second_idle_applies_backoff(self, tmp_path): + config = make_config(tmp_path, idle=IdleConfig(delay=30, backoff=2.0, max_delay=300)) + state = make_state() + state.consecutive_idle = 2 + + assert _compute_idle_delay(config, state) == 60 # 30 * 2^1 + + def test_third_idle_applies_backoff_squared(self, tmp_path): + config = make_config(tmp_path, idle=IdleConfig(delay=30, backoff=2.0, max_delay=300)) + state = make_state() + state.consecutive_idle = 3 + + assert _compute_idle_delay(config, state) == 120 # 30 * 2^2 + + def test_caps_at_max_delay(self, tmp_path): + config = make_config(tmp_path, idle=IdleConfig(delay=30, backoff=2.0, max_delay=100)) + state = make_state() + state.consecutive_idle = 10 # 30 * 2^9 = 15360, way over 100 + + assert _compute_idle_delay(config, state) == 100 + + +class TestIdleDetection: + """Integration tests for idle detection in the run loop.""" + + @patch(MOCK_EXECUTE_AGENT, return_value=_idle_agent_result()) + def test_idle_marker_triggers_idle_event(self, mock_agent, tmp_path): + config = make_config(tmp_path, max_iterations=1, idle=IdleConfig()) + state = make_state() + q = QueueEmitter() + + run_loop(config, state, q) + + events = drain_events(q) + types = event_types(events) + assert EventType.ITERATION_IDLE in types + assert EventType.ITERATION_COMPLETED not in types + + @patch(MOCK_EXECUTE_AGENT, return_value=_idle_agent_result()) + def test_idle_increments_consecutive_idle(self, mock_agent, tmp_path): + config = make_config(tmp_path, max_iterations=2, idle=IdleConfig(delay=0)) + state = make_state() + + run_loop(config, state, NullEmitter()) + + assert state.consecutive_idle == 2 + assert state.completed == 2 + + @patch(MOCK_EXECUTE_AGENT) + def test_non_idle_resets_idle_tracking(self, mock_agent, tmp_path): + """After idle iterations, a normal iteration resets the idle counters.""" + mock_agent.side_effect = [ + _idle_agent_result(), + _idle_agent_result(), + _active_agent_result(), + ] + config = make_config(tmp_path, max_iterations=3, idle=IdleConfig(delay=0)) + state = make_state() + + run_loop(config, state, NullEmitter()) + + assert state.consecutive_idle == 0 + assert state.cumulative_idle_time == 0.0 + + @patch(MOCK_EXECUTE_AGENT, return_value=_idle_agent_result()) + def test_idle_without_config_still_marks_completed(self, mock_agent, tmp_path): + """When no idle config is set, idle marker in result_text still + triggers ITERATION_IDLE but no backoff delay is applied.""" + config = make_config(tmp_path, max_iterations=1, idle=None) + state = make_state() + q = QueueEmitter() + + run_loop(config, state, q) + + events = drain_events(q) + types = event_types(events) + assert EventType.ITERATION_IDLE in types + assert state.completed == 1 + + @patch(MOCK_EXECUTE_AGENT, return_value=_idle_agent_result()) + def test_max_idle_stops_loop(self, mock_agent, tmp_path): + """Loop stops when cumulative idle time exceeds idle.max.""" + config = make_config( + tmp_path, max_iterations=100, + idle=IdleConfig(delay=10, backoff=1.0, max_delay=10, max=25), + ) + state = make_state() + q = QueueEmitter() + + run_loop(config, state, q) + + assert state.status == RunStatus.IDLE_EXCEEDED + events = drain_events(q) + stop = events_of_type(events, EventType.RUN_STOPPED)[0] + assert stop.data["reason"] == "max_idle" + + @patch(MOCK_EXECUTE_AGENT) + def test_idle_backoff_delay_applied(self, mock_agent, tmp_path): + """Idle iterations should apply backoff delay, not the base delay.""" + mock_agent.return_value = _idle_agent_result() + config = make_config( + tmp_path, max_iterations=2, delay=0, + idle=IdleConfig(delay=0.15, backoff=1.0, max_delay=300), + ) + state = make_state() + + start = time.monotonic() + run_loop(config, state, NullEmitter()) + elapsed = time.monotonic() - start + + # First idle delay should be ~0.15s, no delay after last iteration + assert elapsed >= 0.1 + + @patch(MOCK_EXECUTE_AGENT) + def test_idle_result_text_none_not_detected_as_idle(self, mock_agent, tmp_path): + """Agent result with result_text=None should not be detected as idle.""" + mock_agent.return_value = AgentResult(returncode=0, elapsed=1.0, result_text=None) + config = make_config(tmp_path, max_iterations=1, idle=IdleConfig()) + state = make_state() + q = QueueEmitter() + + run_loop(config, state, q) + + events = drain_events(q) + types = event_types(events) + assert EventType.ITERATION_COMPLETED in types + assert EventType.ITERATION_IDLE not in types + + @patch(MOCK_EXECUTE_AGENT) + def test_failed_agent_not_detected_as_idle(self, mock_agent, tmp_path): + """Failed agent result should not be detected as idle even if marker is present.""" + mock_agent.return_value = AgentResult( + returncode=1, elapsed=1.0, result_text=IDLE_STATE_MARKER, + ) + config = make_config(tmp_path, max_iterations=1, idle=IdleConfig()) + state = make_state() + q = QueueEmitter() + + run_loop(config, state, q) + + events = drain_events(q) + types = event_types(events) + assert EventType.ITERATION_FAILED in types + assert EventType.ITERATION_IDLE not in types From be5f760b10a25b10624f102a33fc13d0014a06a4 Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 15:48:16 +0100 Subject: [PATCH 04/23] feat: parse idle config from RALPH.md frontmatter in CLI Add _validate_idle() to parse the idle frontmatter block with support for duration strings (30s, 5m, 1h) and numeric values. Validates all sub-fields (delay, backoff, max_delay, max) and rejects unknown fields. Co-authored-by: Ralphify --- src/ralphify/cli.py | 69 ++++++++++++++++- tests/test_cli.py | 175 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 242 insertions(+), 2 deletions(-) diff --git a/src/ralphify/cli.py b/src/ralphify/cli.py index 8fe3b9b..c17340c 100644 --- a/src/ralphify/cli.py +++ b/src/ralphify/cli.py @@ -29,11 +29,17 @@ FIELD_ARGS, FIELD_COMMANDS, FIELD_CREDIT, + FIELD_IDLE, + IDLE_FIELD_BACKOFF, + IDLE_FIELD_DELAY, + IDLE_FIELD_MAX, + IDLE_FIELD_MAX_DELAY, RALPH_MARKER, VALID_NAME_CHARS_MSG, + parse_duration, parse_frontmatter, ) -from ralphify._run_types import Command, DEFAULT_COMMAND_TIMEOUT, RunConfig, RunState, generate_run_id +from ralphify._run_types import Command, DEFAULT_COMMAND_TIMEOUT, IdleConfig, RunConfig, RunState, generate_run_id from ralphify.engine import run_loop if sys.platform == "win32": @@ -373,6 +379,65 @@ def _validate_credit(raw_credit: Any) -> bool: return raw_credit +def _parse_idle_duration(value: Any, field_name: str) -> float: + """Parse a duration value from idle config — accepts numbers (seconds) or duration strings.""" + if isinstance(value, bool): + _exit_error(f"'{FIELD_IDLE}.{field_name}' must be a number or duration string, got {value!r}.") + if isinstance(value, (int, float)): + if not math.isfinite(value) or value <= 0: + _exit_error(f"'{FIELD_IDLE}.{field_name}' must be positive, got {value!r}.") + return float(value) + if isinstance(value, str): + try: + result = parse_duration(value) + except ValueError as exc: + _exit_error(f"'{FIELD_IDLE}.{field_name}': {exc}") + if result <= 0: + _exit_error(f"'{FIELD_IDLE}.{field_name}' must be positive.") + return result + _exit_error(f"'{FIELD_IDLE}.{field_name}' must be a number or duration string, got {type(value).__name__}.") + + +def _validate_idle(raw_idle: Any) -> IdleConfig | None: + """Validate the ``idle`` frontmatter block and return an IdleConfig. + + Returns ``None`` when *raw_idle* is ``None`` (field absent). + Exits with an error when the value is malformed. + """ + if raw_idle is None: + return None + if not isinstance(raw_idle, dict): + _exit_error(f"'{FIELD_IDLE}' must be a mapping, got {type(raw_idle).__name__}.") + + kwargs: dict[str, Any] = {} + + if IDLE_FIELD_DELAY in raw_idle: + kwargs["delay"] = _parse_idle_duration(raw_idle[IDLE_FIELD_DELAY], IDLE_FIELD_DELAY) + + if IDLE_FIELD_MAX_DELAY in raw_idle: + kwargs["max_delay"] = _parse_idle_duration(raw_idle[IDLE_FIELD_MAX_DELAY], IDLE_FIELD_MAX_DELAY) + + if IDLE_FIELD_MAX in raw_idle: + kwargs["max"] = _parse_idle_duration(raw_idle[IDLE_FIELD_MAX], IDLE_FIELD_MAX) + + if IDLE_FIELD_BACKOFF in raw_idle: + backoff = raw_idle[IDLE_FIELD_BACKOFF] + if isinstance(backoff, bool): + _exit_error(f"'{FIELD_IDLE}.{IDLE_FIELD_BACKOFF}' must be a positive number, got {backoff!r}.") + if not isinstance(backoff, (int, float)): + _exit_error(f"'{FIELD_IDLE}.{IDLE_FIELD_BACKOFF}' must be a positive number, got {type(backoff).__name__}.") + if not math.isfinite(backoff) or backoff <= 0: + _exit_error(f"'{FIELD_IDLE}.{IDLE_FIELD_BACKOFF}' must be positive, got {backoff!r}.") + kwargs["backoff"] = float(backoff) + + known_fields = {IDLE_FIELD_DELAY, IDLE_FIELD_BACKOFF, IDLE_FIELD_MAX_DELAY, IDLE_FIELD_MAX} + unknown = set(raw_idle.keys()) - known_fields + if unknown: + _exit_error(f"Unknown field(s) in '{FIELD_IDLE}': {', '.join(sorted(unknown))}.") + + return IdleConfig(**kwargs) + + def _validate_run_options( max_iterations: int | None, delay: float, @@ -416,6 +481,7 @@ def _build_run_config( ralph_args = _parse_user_args(extra_args, declared_names) credit = _validate_credit(fm.get(FIELD_CREDIT)) + idle = _validate_idle(fm.get(FIELD_IDLE)) return RunConfig( agent=agent, @@ -430,6 +496,7 @@ def _build_run_config( log_dir=Path(log_dir) if log_dir else None, project_root=Path.cwd(), credit=credit, + idle=idle, ) diff --git a/tests/test_cli.py b/tests/test_cli.py index d4050e0..fb67cb7 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -10,7 +10,7 @@ from helpers import MOCK_ENGINE_SLEEP, MOCK_SKILLS_WHICH, MOCK_SUBPROCESS, MOCK_WHICH, ok_result, fail_result, make_ralph from ralphify import __version__ from ralphify._frontmatter import RALPH_MARKER -from ralphify.cli import app, _parse_command_items, _parse_user_args +from ralphify.cli import app, _parse_command_items, _parse_user_args, _validate_idle runner = CliRunner() @@ -773,3 +773,176 @@ def test_credit_invalid_value_errors(self, mock_which, tmp_path, monkeypatch): assert result.exit_code == 1 assert "credit" in result.output.lower() assert "true or false" in result.output.lower() + + +@patch(MOCK_WHICH, return_value="/usr/bin/claude") +class TestIdleFrontmatter: + @patch(MOCK_SUBPROCESS, side_effect=ok_result) + def test_no_idle_config_by_default(self, mock_run, mock_which, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + ralph_dir = make_ralph(tmp_path) + result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) + assert result.exit_code == 0 + + @patch(MOCK_SUBPROCESS, side_effect=ok_result) + def test_idle_with_all_fields(self, mock_run, mock_which, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + ralph_dir = tmp_path / "my-ralph" + ralph_dir.mkdir(exist_ok=True) + (ralph_dir / RALPH_MARKER).write_text( + "---\nagent: claude -p --dangerously-skip-permissions\n" + "idle:\n delay: 30s\n backoff: 2\n max_delay: 5m\n max: 1h\n---\ngo" + ) + result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) + assert result.exit_code == 0 + + @patch(MOCK_SUBPROCESS, side_effect=ok_result) + def test_idle_with_numeric_values(self, mock_run, mock_which, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + ralph_dir = tmp_path / "my-ralph" + ralph_dir.mkdir(exist_ok=True) + (ralph_dir / RALPH_MARKER).write_text( + "---\nagent: claude -p --dangerously-skip-permissions\n" + "idle:\n delay: 30\n backoff: 1.5\n max_delay: 300\n max: 3600\n---\ngo" + ) + result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) + assert result.exit_code == 0 + + @patch(MOCK_SUBPROCESS, side_effect=ok_result) + def test_idle_with_only_delay(self, mock_run, mock_which, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + ralph_dir = tmp_path / "my-ralph" + ralph_dir.mkdir(exist_ok=True) + (ralph_dir / RALPH_MARKER).write_text( + "---\nagent: claude -p --dangerously-skip-permissions\n" + "idle:\n delay: 10s\n---\ngo" + ) + result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) + assert result.exit_code == 0 + + def test_idle_not_a_mapping_errors(self, mock_which, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + ralph_dir = tmp_path / "my-ralph" + ralph_dir.mkdir(exist_ok=True) + (ralph_dir / RALPH_MARKER).write_text( + "---\nagent: claude -p\nidle: true\n---\ngo" + ) + result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) + assert result.exit_code == 1 + assert "must be a mapping" in result.output.lower() + + def test_idle_string_errors(self, mock_which, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + ralph_dir = tmp_path / "my-ralph" + ralph_dir.mkdir(exist_ok=True) + (ralph_dir / RALPH_MARKER).write_text( + "---\nagent: claude -p\nidle: 30s\n---\ngo" + ) + result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) + assert result.exit_code == 1 + assert "must be a mapping" in result.output.lower() + + def test_idle_invalid_delay_errors(self, mock_which, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + ralph_dir = tmp_path / "my-ralph" + ralph_dir.mkdir(exist_ok=True) + (ralph_dir / RALPH_MARKER).write_text( + "---\nagent: claude -p\nidle:\n delay: not-a-duration\n---\ngo" + ) + result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) + assert result.exit_code == 1 + assert "idle.delay" in result.output.lower() + + def test_idle_negative_delay_errors(self, mock_which, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + ralph_dir = tmp_path / "my-ralph" + ralph_dir.mkdir(exist_ok=True) + (ralph_dir / RALPH_MARKER).write_text( + "---\nagent: claude -p\nidle:\n delay: -5\n---\ngo" + ) + result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) + assert result.exit_code == 1 + assert "positive" in result.output.lower() + + def test_idle_zero_backoff_errors(self, mock_which, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + ralph_dir = tmp_path / "my-ralph" + ralph_dir.mkdir(exist_ok=True) + (ralph_dir / RALPH_MARKER).write_text( + "---\nagent: claude -p\nidle:\n backoff: 0\n---\ngo" + ) + result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) + assert result.exit_code == 1 + assert "positive" in result.output.lower() + + def test_idle_boolean_backoff_errors(self, mock_which, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + ralph_dir = tmp_path / "my-ralph" + ralph_dir.mkdir(exist_ok=True) + (ralph_dir / RALPH_MARKER).write_text( + "---\nagent: claude -p\nidle:\n backoff: true\n---\ngo" + ) + result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) + assert result.exit_code == 1 + assert "positive number" in result.output.lower() + + def test_idle_unknown_field_errors(self, mock_which, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + ralph_dir = tmp_path / "my-ralph" + ralph_dir.mkdir(exist_ok=True) + (ralph_dir / RALPH_MARKER).write_text( + "---\nagent: claude -p\nidle:\n delay: 30s\n unknown: foo\n---\ngo" + ) + result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) + assert result.exit_code == 1 + assert "unknown" in result.output.lower() + + +class TestValidateIdle: + def test_none_returns_none(self): + assert _validate_idle(None) is None + + def test_empty_dict_returns_defaults(self): + from ralphify._run_types import DEFAULT_IDLE_BACKOFF, DEFAULT_IDLE_DELAY, DEFAULT_IDLE_MAX_DELAY + config = _validate_idle({}) + assert config.delay == DEFAULT_IDLE_DELAY + assert config.backoff == DEFAULT_IDLE_BACKOFF + assert config.max_delay == DEFAULT_IDLE_MAX_DELAY + assert config.max is None + + def test_duration_strings_parsed(self): + config = _validate_idle({"delay": "30s", "max_delay": "5m", "max": "1h"}) + assert config.delay == 30.0 + assert config.max_delay == 300.0 + assert config.max == 3600.0 + + def test_numeric_values_accepted(self): + config = _validate_idle({"delay": 10, "backoff": 1.5, "max_delay": 120, "max": 600}) + assert config.delay == 10.0 + assert config.backoff == 1.5 + assert config.max_delay == 120.0 + assert config.max == 600.0 + + def test_not_a_dict_errors(self): + with pytest.raises(typer.Exit): + _validate_idle("30s") + + def test_invalid_duration_string_errors(self): + with pytest.raises(typer.Exit): + _validate_idle({"delay": "not-valid"}) + + def test_negative_delay_errors(self): + with pytest.raises(typer.Exit): + _validate_idle({"delay": -5}) + + def test_zero_delay_errors(self): + with pytest.raises(typer.Exit): + _validate_idle({"delay": 0}) + + def test_boolean_delay_errors(self): + with pytest.raises(typer.Exit): + _validate_idle({"delay": True}) + + def test_unknown_fields_errors(self): + with pytest.raises(typer.Exit): + _validate_idle({"delay": "30s", "extra": "oops"}) From 874dd768218008f994668b33e8fc846dba5ace33 Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 15:50:52 +0100 Subject: [PATCH 05/23] feat: render ITERATION_IDLE event in console emitter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dimmed idle indicator (◇) for idle iterations and show "Stopped (idle):" summary when a run ends due to max idle time. Co-authored-by: Ralphify --- src/ralphify/_console_emitter.py | 11 +++++++-- tests/test_console_emitter.py | 42 ++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/ralphify/_console_emitter.py b/src/ralphify/_console_emitter.py index 20ee837..3e5efd5 100644 --- a/src/ralphify/_console_emitter.py +++ b/src/ralphify/_console_emitter.py @@ -20,6 +20,7 @@ from ralphify._events import ( LOG_ERROR, STOP_COMPLETED, + STOP_MAX_IDLE, CommandsCompletedData, Event, EventType, @@ -34,6 +35,7 @@ _ICON_SUCCESS = "✓" _ICON_FAILURE = "✗" _ICON_TIMEOUT = "⏱" +_ICON_IDLE = "◇" _ICON_ARROW = "→" _ICON_DASH = "—" @@ -66,6 +68,7 @@ def __init__(self, console: Console) -> None: EventType.ITERATION_COMPLETED: partial(self._on_iteration_ended, color="green", icon=_ICON_SUCCESS), EventType.ITERATION_FAILED: partial(self._on_iteration_ended, color="red", icon=_ICON_FAILURE), EventType.ITERATION_TIMED_OUT: partial(self._on_iteration_ended, color="yellow", icon=_ICON_TIMEOUT), + EventType.ITERATION_IDLE: partial(self._on_iteration_ended, color="dim", icon=_ICON_IDLE), EventType.COMMANDS_COMPLETED: self._on_commands_completed, EventType.LOG_MESSAGE: self._on_log_message, EventType.RUN_STOPPED: self._on_run_stopped, @@ -143,7 +146,8 @@ def _on_log_message(self, data: LogMessageData) -> None: def _on_run_stopped(self, data: RunStoppedData) -> None: self._stop_live() - if data["reason"] != STOP_COMPLETED: + reason = data["reason"] + if reason not in (STOP_COMPLETED, STOP_MAX_IDLE): return total = data["total"] @@ -161,4 +165,7 @@ def _on_run_stopped(self, data: RunStoppedData) -> None: parts.append(f"{timed_out_count} timed out") detail = ", ".join(parts) self._console.print(f"\n[bold blue]──────────────────────[/bold blue]") - self._console.print(f"[bold green]Done:[/bold green] {total} iteration(s) {_ICON_DASH} {detail}") + if reason == STOP_MAX_IDLE: + self._console.print(f"[bold yellow]Stopped (idle):[/bold yellow] {total} iteration(s) {_ICON_DASH} {detail}") + else: + self._console.print(f"[bold green]Done:[/bold green] {total} iteration(s) {_ICON_DASH} {detail}") diff --git a/tests/test_console_emitter.py b/tests/test_console_emitter.py index 136a0ea..0000dc9 100644 --- a/tests/test_console_emitter.py +++ b/tests/test_console_emitter.py @@ -217,6 +217,37 @@ def test_traceback_with_brackets_not_corrupted(self): assert "[red]missing[/red]" in output +class TestIterationIdle: + def test_idle_shows_dimmed_output(self): + emitter, console = _capture_emitter() + emitter.emit(_make_event( + EventType.ITERATION_IDLE, + iteration=3, detail="idle (2s)", log_file=None, result_text=None, + )) + output = console.export_text() + assert "Iteration 3" in output + assert "idle (2s)" in output + + def test_idle_shows_log_file(self): + emitter, console = _capture_emitter() + emitter.emit(_make_event( + EventType.ITERATION_IDLE, + iteration=1, detail="idle (1s)", log_file="/tmp/idle.log", result_text=None, + )) + output = console.export_text() + assert "/tmp/idle.log" in output + + def test_idle_stops_live_display(self): + emitter, console = _capture_emitter() + emitter.emit(_make_event(EventType.ITERATION_STARTED, iteration=1)) + assert emitter._live is not None + emitter.emit(_make_event( + EventType.ITERATION_IDLE, + iteration=1, detail="idle (1s)", log_file=None, result_text=None, + )) + assert emitter._live is None + + class TestRunStopped: def test_completed_shows_summary(self): emitter, console = _capture_emitter() @@ -281,6 +312,17 @@ def test_run_stopped_stops_active_live_display(self): )) assert emitter._live is None + def test_max_idle_shows_summary(self): + emitter, console = _capture_emitter() + emitter.emit(_make_event( + EventType.RUN_STOPPED, + reason="max_idle", total=4, completed=2, failed=0, timed_out=0, + )) + output = console.export_text() + assert "Stopped (idle):" in output + assert "4 iteration(s)" in output + assert "2 succeeded" in output + def test_completed_all_succeeded(self): emitter, console = _capture_emitter() emitter.emit(_make_event( From adb489e928aa09f3c962a872cf6c450c5c82c307 Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 15:54:42 +0100 Subject: [PATCH 06/23] docs: add idle detection to all doc surfaces Document the new idle detection feature across CLI reference, quick reference, writing prompts guide, changelog, codebase map, and the new-ralph skill. Adds frontmatter field reference, usage examples, and contributor guidance for the idle detection system. Co-authored-by: Ralphify --- docs/changelog.md | 8 ++++++ docs/cli.md | 36 ++++++++++++++++++++++++++ docs/contributing/codebase-map.md | 15 ++++++++--- docs/quick-reference.md | 13 ++++++++++ docs/writing-prompts.md | 29 +++++++++++++++++++++ src/ralphify/skills/new-ralph/SKILL.md | 5 ++++ 6 files changed, 103 insertions(+), 3 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 59a9bcd..52e76b1 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -8,6 +8,14 @@ keywords: ralphify changelog, release history, new features, version updates, br All notable changes to ralphify are documented here. +## 0.2.5 — 2026-03-22 + +### Added + +- **Idle detection with backoff** — when an agent emits `` in its output, the engine applies configurable backoff delays between iterations and optionally stops the loop after a cumulative idle time limit. Configure via the `idle` frontmatter block with `delay`, `backoff`, `max_delay`, and `max` fields. Prevents wasting tokens on idle iterations. + +--- + ## 0.2.4 — 2026-03-22 ### Fixed diff --git a/docs/cli.md b/docs/cli.md index f03bb8b..27c1130 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -165,6 +165,7 @@ Your instructions here. Reference args with {{ args.dir }}. | `commands` | list | no | Commands to run each iteration (each has `name` and `run`) | | `args` | list of strings | no | Declared argument names for user arguments. Letters, digits, hyphens, and underscores only. | | `credit` | bool | no | Append co-author trailer instruction to prompt (default: `true`) | +| `idle` | mapping | no | Idle detection config — backoff delays when agent signals idle state (see [Idle detection](#idle-detection)) | ### Commands @@ -188,3 +189,38 @@ If a command exceeds its timeout, the process is killed and the captured output | `{{ args. }}` | Value of the named user argument | Unmatched placeholders resolve to an empty string. + +### Idle detection + +When an agent emits `` in its output, the engine applies backoff delays between iterations and optionally stops the loop after a cumulative idle time limit. + +Add an `idle` block to your frontmatter: + +```markdown +--- +agent: claude -p --dangerously-skip-permissions +idle: + delay: 30s + backoff: 2 + max_delay: 5m + max: 30m +--- +``` + +| Field | Type | Default | Description | +|---|---|---|---| +| `delay` | duration or number | `30` (seconds) | Initial delay after the first idle iteration | +| `backoff` | number | `2.0` | Multiplier applied each consecutive idle iteration | +| `max_delay` | duration or number | `300` (5 minutes) | Maximum delay cap | +| `max` | duration or number | none | Stop the loop after this cumulative idle time | + +Duration values accept numbers (seconds) or human-readable strings: `30s`, `5m`, `6h`, `1d`. + +**How it works:** + +1. Agent output contains `` → iteration is marked idle +2. Next delay = `delay × backoff^(consecutive_idle - 1)`, capped at `max_delay` +3. A non-idle iteration resets all idle tracking (consecutive count and cumulative time) +4. If `max` is set, the loop stops when cumulative idle delay time exceeds the limit + +When no `idle` block is present, the loop runs exactly as before. diff --git a/docs/contributing/codebase-map.md b/docs/contributing/codebase-map.md index 3e8b45c..32e7f09 100644 --- a/docs/contributing/codebase-map.md +++ b/docs/contributing/codebase-map.md @@ -81,11 +81,12 @@ The run loop communicates via structured events (`_events.py`). Each event has a Event data uses TypedDict classes — one per event type — rather than free-form dicts. The key types: -- **`RunStartedData`** / **`RunStoppedData`** — run lifecycle (stop reason is a `StopReason` literal: `"completed"`, `"error"`, `"user_requested"`) +- **`RunStartedData`** / **`RunStoppedData`** — run lifecycle (stop reason is a `StopReason` literal: `"completed"`, `"error"`, `"user_requested"`, `"max_idle"`) - **`IterationStartedData`** / **`IterationEndedData`** — per-iteration data (return code, duration, log path) - **`CommandsStartedData`** / **`CommandsCompletedData`** — command execution bookends - **`PromptAssembledData`** — prompt length after placeholder resolution - **`AgentActivityData`** — streaming agent output +- **`IterationIdleData`** — idle detection data (consecutive idle count, next delay) - **`LogMessageData`** — info/error messages with optional traceback All payload types are unioned as `EventData`. @@ -113,7 +114,7 @@ The CLI uses a `ConsoleEmitter` (defined in `_console_emitter.py`) that renders 1. **`engine.py`** — The core run loop. Uses `RunConfig` and `RunState` (from `_run_types.py`) and `EventEmitter`. This is where iteration logic lives. 2. **`_run_types.py`** — `RunConfig`, `RunState`, `RunStatus`, and `Command`. These are the shared data types used by the engine, CLI, and manager. -3. **`cli.py`** — All CLI commands. Validates frontmatter fields via extracted helpers (`_validate_agent`, `_validate_commands`, `_validate_credit`, `_validate_run_options`, `_validate_declared_args`), builds a `RunConfig`, and delegates to `engine.run_loop()` for the actual loop. Terminal event rendering lives in `_console_emitter.py`. +3. **`cli.py`** — All CLI commands. Validates frontmatter fields via extracted helpers (`_validate_agent`, `_validate_commands`, `_validate_credit`, `_validate_idle`, `_validate_run_options`, `_validate_declared_args`), builds a `RunConfig`, and delegates to `engine.run_loop()` for the actual loop. Terminal event rendering lives in `_console_emitter.py`. 4. **`_frontmatter.py`** — YAML frontmatter parsing. Extracts `agent`, `commands`, `args` from the RALPH.md file. 5. **`_resolver.py`** — Template placeholder logic. Small file but critical. 6. **`_skills.py`** + **`skills/`** — The skill system behind `ralph new`. `_skills.py` handles agent detection, reads bundled skill definitions from `skills/`, installs them into the agent's skill directory, and builds the command to launch the agent. @@ -124,7 +125,7 @@ The CLI uses a `ConsoleEmitter` (defined in `_console_emitter.py`) that renders Frontmatter parsing is in `_frontmatter.py:parse_frontmatter()`, which returns a raw dict. Each field is then validated and coerced by a dedicated helper in `cli.py` — e.g. `_validate_agent()`, `_validate_commands()`, `_validate_credit()`. Adding a new frontmatter field means adding a new validator in `cli.py` and wiring it into `_build_run_config()`. -**Field name constants** (`FIELD_AGENT`, `FIELD_COMMANDS`, `FIELD_ARGS`, `FIELD_CREDIT`, `CMD_FIELD_NAME`, `CMD_FIELD_RUN`, `CMD_FIELD_TIMEOUT`) are centralized in `_frontmatter.py`. Always import these constants instead of hardcoding strings like `"agent"` or `"commands"` — this keeps error messages, validation, and placeholder resolution in sync when fields are renamed. +**Field name constants** (`FIELD_AGENT`, `FIELD_COMMANDS`, `FIELD_ARGS`, `FIELD_CREDIT`, `FIELD_IDLE`, `CMD_FIELD_NAME`, `CMD_FIELD_RUN`, `CMD_FIELD_TIMEOUT`, `IDLE_FIELD_DELAY`, `IDLE_FIELD_BACKOFF`, `IDLE_FIELD_MAX_DELAY`, `IDLE_FIELD_MAX`) are centralized in `_frontmatter.py`. Always import these constants instead of hardcoding strings like `"agent"` or `"commands"` — this keeps error messages, validation, and placeholder resolution in sync when fields are renamed. ### If you add a new CLI command... @@ -134,6 +135,14 @@ Add it in `cli.py`. The CLI uses Typer. Update `docs/cli.md` to document the new Events are defined in `_events.py:EventType`, with a corresponding TypedDict payload class for each type. Adding a new event type requires a new `EventType` member, a new TypedDict payload class, adding it to the `EventData` union, and handling it in `ConsoleEmitter` (`_console_emitter.py`). +### Idle detection + +When an agent emits `` (the `IDLE_STATE_MARKER` constant in `_frontmatter.py`) in its output, the engine marks the iteration as idle instead of completed. Idle behavior is configured via the `idle` frontmatter block, parsed by `_validate_idle()` in `cli.py` into an `IdleConfig` dataclass (`_run_types.py`). + +The engine (`engine.py`) tracks idle state on `RunState` (`consecutive_idle`, `cumulative_idle_time`). Backoff delay is computed by `_compute_idle_delay()`: `delay × backoff^(consecutive_idle - 1)`, capped at `max_delay`. A non-idle iteration calls `state.reset_idle()` to clear all idle tracking. When `idle.max` is set and cumulative idle time exceeds it, the loop stops with `RunStatus.IDLE_EXCEEDED`. + +The `ITERATION_IDLE` event type and `STOP_MAX_IDLE` stop reason are defined in `_events.py`. The console emitter renders idle iterations with a dimmed style. + ### Credit trailer When `credit` is `true` (the default), `engine.py:_assemble_prompt()` appends `_CREDIT_INSTRUCTION` to the prompt — a short instruction telling the agent to include a `Co-authored-by: Ralphify` trailer in git commits. Users can opt out with `credit: false` in frontmatter. diff --git a/docs/quick-reference.md b/docs/quick-reference.md index 817ab55..aa71b99 100644 --- a/docs/quick-reference.md +++ b/docs/quick-reference.md @@ -72,6 +72,7 @@ Your instructions here. Use {{ args.dir }} for user arguments. | `commands` | list | no | Commands to run each iteration | | `args` | list | no | User argument names. Letters, digits, hyphens, and underscores only. | | `credit` | bool | no | Append co-author trailer instruction to prompt (default: `true`) | +| `idle` | mapping | no | Idle detection: backoff delays when agent signals idle (see below) | ### Command fields @@ -107,6 +108,18 @@ Your instructions here. Use {{ args.dir }} for user arguments. - `--` ends flag parsing: `ralph run my-ralph -- --verbose ./src` treats `--verbose` as a positional value - Missing args resolve to empty string +### Idle detection + +```yaml +idle: + delay: 30s # Initial delay after first idle iteration (default: 30s) + backoff: 2 # Multiplier per consecutive idle iteration (default: 2) + max_delay: 5m # Delay cap (default: 5m) + max: 30m # Stop loop after this cumulative idle time (optional) +``` + +Agent emits `` → backoff delays kick in. Non-idle iteration resets tracking. Durations: `30s`, `5m`, `6h`, `1d`. + ## The loop Each iteration: diff --git a/docs/writing-prompts.md b/docs/writing-prompts.md index 642b9fd..a8836c5 100644 --- a/docs/writing-prompts.md +++ b/docs/writing-prompts.md @@ -331,6 +331,35 @@ HTML comments in your RALPH.md are automatically stripped before the prompt is a You can freely add and edit comments while the loop runs — they're stripped every iteration, so they never waste the agent's context window. +## Idle detection + +If your agent signals when it has no work to do, you can avoid wasting tokens on idle iterations. Add an `idle` block to your frontmatter and have your prompt instruct the agent to emit the idle marker: + +```markdown +--- +agent: claude -p --dangerously-skip-permissions +idle: + delay: 30s + backoff: 2 + max_delay: 5m + max: 30m +commands: + - name: tasks + run: cat TODO.md +--- + +{{ commands.tasks }} + +Read TODO.md and implement the next uncompleted task. Commit when done. + +If all tasks are complete and there is nothing left to do, output exactly: + +``` + +When the agent emits ``, the engine waits with increasing backoff delays (30s, 60s, 120s, ... up to 5m) before the next iteration. If you add new tasks to TODO.md, the next iteration will detect work and reset the backoff. If cumulative idle time reaches the `max` limit (30m here), the loop stops automatically. + +This is useful for loops that should keep running but may have periods of inactivity — the idle backoff reduces token usage while keeping the loop ready to resume when new work appears. + ## Prompt size and context windows Keep your prompt focused. A long prompt with every possible instruction eats into the agent's context window, leaving less room for the actual codebase. diff --git a/src/ralphify/skills/new-ralph/SKILL.md b/src/ralphify/skills/new-ralph/SKILL.md index 32e4f0c..ebc63c4 100644 --- a/src/ralphify/skills/new-ralph/SKILL.md +++ b/src/ralphify/skills/new-ralph/SKILL.md @@ -83,6 +83,11 @@ If any tests are failing above, fix them before continuing. | `commands[].timeout` | No | Max seconds before the command is killed (default: 60) | | `args` | No | Declared argument names for positional CLI args. Letters, digits, hyphens, and underscores only. Must be unique. | | `credit` | No | Append co-author trailer instruction (default: `true`). Set to `false` to disable. | +| `idle` | No | Idle detection config. Mapping with `delay`, `backoff`, `max_delay`, `max` fields. When the agent emits ``, the engine applies backoff delays between iterations. | +| `idle.delay` | No | Initial delay after first idle iteration (default: `30s`). Accepts numbers (seconds) or duration strings (`30s`, `5m`). | +| `idle.backoff` | No | Multiplier per consecutive idle iteration (default: `2`). | +| `idle.max_delay` | No | Maximum delay cap (default: `5m`). | +| `idle.max` | No | Stop the loop after this cumulative idle time. | #### Body From cd1e33e49b05f477196c8213fb30d8a46885df3e Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 16:05:55 +0100 Subject: [PATCH 07/23] refactor: remove redundant idle integration tests from test_cli.py TestValidateIdle unit tests already cover all validation error cases. Remove 9 duplicate integration tests from TestIdleFrontmatter, keeping only the 3 happy-path tests that verify CLI wiring. Co-authored-by: Ralphify --- tests/test_cli.py | 88 ----------------------------------------------- 1 file changed, 88 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index fb67cb7..27df42c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -808,94 +808,6 @@ def test_idle_with_numeric_values(self, mock_run, mock_which, tmp_path, monkeypa result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) assert result.exit_code == 0 - @patch(MOCK_SUBPROCESS, side_effect=ok_result) - def test_idle_with_only_delay(self, mock_run, mock_which, tmp_path, monkeypatch): - monkeypatch.chdir(tmp_path) - ralph_dir = tmp_path / "my-ralph" - ralph_dir.mkdir(exist_ok=True) - (ralph_dir / RALPH_MARKER).write_text( - "---\nagent: claude -p --dangerously-skip-permissions\n" - "idle:\n delay: 10s\n---\ngo" - ) - result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) - assert result.exit_code == 0 - - def test_idle_not_a_mapping_errors(self, mock_which, tmp_path, monkeypatch): - monkeypatch.chdir(tmp_path) - ralph_dir = tmp_path / "my-ralph" - ralph_dir.mkdir(exist_ok=True) - (ralph_dir / RALPH_MARKER).write_text( - "---\nagent: claude -p\nidle: true\n---\ngo" - ) - result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) - assert result.exit_code == 1 - assert "must be a mapping" in result.output.lower() - - def test_idle_string_errors(self, mock_which, tmp_path, monkeypatch): - monkeypatch.chdir(tmp_path) - ralph_dir = tmp_path / "my-ralph" - ralph_dir.mkdir(exist_ok=True) - (ralph_dir / RALPH_MARKER).write_text( - "---\nagent: claude -p\nidle: 30s\n---\ngo" - ) - result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) - assert result.exit_code == 1 - assert "must be a mapping" in result.output.lower() - - def test_idle_invalid_delay_errors(self, mock_which, tmp_path, monkeypatch): - monkeypatch.chdir(tmp_path) - ralph_dir = tmp_path / "my-ralph" - ralph_dir.mkdir(exist_ok=True) - (ralph_dir / RALPH_MARKER).write_text( - "---\nagent: claude -p\nidle:\n delay: not-a-duration\n---\ngo" - ) - result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) - assert result.exit_code == 1 - assert "idle.delay" in result.output.lower() - - def test_idle_negative_delay_errors(self, mock_which, tmp_path, monkeypatch): - monkeypatch.chdir(tmp_path) - ralph_dir = tmp_path / "my-ralph" - ralph_dir.mkdir(exist_ok=True) - (ralph_dir / RALPH_MARKER).write_text( - "---\nagent: claude -p\nidle:\n delay: -5\n---\ngo" - ) - result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) - assert result.exit_code == 1 - assert "positive" in result.output.lower() - - def test_idle_zero_backoff_errors(self, mock_which, tmp_path, monkeypatch): - monkeypatch.chdir(tmp_path) - ralph_dir = tmp_path / "my-ralph" - ralph_dir.mkdir(exist_ok=True) - (ralph_dir / RALPH_MARKER).write_text( - "---\nagent: claude -p\nidle:\n backoff: 0\n---\ngo" - ) - result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) - assert result.exit_code == 1 - assert "positive" in result.output.lower() - - def test_idle_boolean_backoff_errors(self, mock_which, tmp_path, monkeypatch): - monkeypatch.chdir(tmp_path) - ralph_dir = tmp_path / "my-ralph" - ralph_dir.mkdir(exist_ok=True) - (ralph_dir / RALPH_MARKER).write_text( - "---\nagent: claude -p\nidle:\n backoff: true\n---\ngo" - ) - result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) - assert result.exit_code == 1 - assert "positive number" in result.output.lower() - - def test_idle_unknown_field_errors(self, mock_which, tmp_path, monkeypatch): - monkeypatch.chdir(tmp_path) - ralph_dir = tmp_path / "my-ralph" - ralph_dir.mkdir(exist_ok=True) - (ralph_dir / RALPH_MARKER).write_text( - "---\nagent: claude -p\nidle:\n delay: 30s\n unknown: foo\n---\ngo" - ) - result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) - assert result.exit_code == 1 - assert "unknown" in result.output.lower() class TestValidateIdle: From 6a6e91e1bd7901645f4c39cf2f9ea84088b0278f Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 16:08:06 +0100 Subject: [PATCH 08/23] refactor: parametrize idle delay tests and remove fragile timing test Collapse TestComputeIdleDelay from 6 individual tests into 2 parametrized tests. Remove test_idle_backoff_delay_applied which relied on wall-clock timing assertions (covered by the unit-level backoff math tests). Co-authored-by: Ralphify --- tests/test_engine.py | 75 ++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 55 deletions(-) diff --git a/tests/test_engine.py b/tests/test_engine.py index 009baee..e168a2b 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -918,47 +918,29 @@ def test_credit_false_no_trailer_in_agent_input(self, mock_run, tmp_path): class TestComputeIdleDelay: """Unit tests for _compute_idle_delay — backoff math.""" - def test_returns_zero_without_idle_config(self, tmp_path): - config = make_config(tmp_path, idle=None) + @pytest.mark.parametrize("idle_cfg, consecutive, expected", [ + (None, 3, 0), # no idle config → zero + (IdleConfig(delay=30), 0, 0), # not idle → zero + ]) + def test_returns_zero_when_inactive(self, tmp_path, idle_cfg, consecutive, expected): + config = make_config(tmp_path, idle=idle_cfg) state = make_state() - state.consecutive_idle = 3 - - assert _compute_idle_delay(config, state) == 0 - - def test_returns_zero_when_not_idle(self, tmp_path): - config = make_config(tmp_path, idle=IdleConfig(delay=30)) + state.consecutive_idle = consecutive + + assert _compute_idle_delay(config, state) == expected + + @pytest.mark.parametrize("consecutive, max_delay, expected", [ + (1, 300, 30), # base delay + (2, 300, 60), # 30 * 2^1 + (3, 300, 120), # 30 * 2^2 + (10, 100, 100), # capped at max_delay + ]) + def test_backoff_progression(self, tmp_path, consecutive, max_delay, expected): + config = make_config(tmp_path, idle=IdleConfig(delay=30, backoff=2.0, max_delay=max_delay)) state = make_state() - state.consecutive_idle = 0 + state.consecutive_idle = consecutive - assert _compute_idle_delay(config, state) == 0 - - def test_first_idle_returns_base_delay(self, tmp_path): - config = make_config(tmp_path, idle=IdleConfig(delay=30, backoff=2.0, max_delay=300)) - state = make_state() - state.consecutive_idle = 1 - - assert _compute_idle_delay(config, state) == 30 - - def test_second_idle_applies_backoff(self, tmp_path): - config = make_config(tmp_path, idle=IdleConfig(delay=30, backoff=2.0, max_delay=300)) - state = make_state() - state.consecutive_idle = 2 - - assert _compute_idle_delay(config, state) == 60 # 30 * 2^1 - - def test_third_idle_applies_backoff_squared(self, tmp_path): - config = make_config(tmp_path, idle=IdleConfig(delay=30, backoff=2.0, max_delay=300)) - state = make_state() - state.consecutive_idle = 3 - - assert _compute_idle_delay(config, state) == 120 # 30 * 2^2 - - def test_caps_at_max_delay(self, tmp_path): - config = make_config(tmp_path, idle=IdleConfig(delay=30, backoff=2.0, max_delay=100)) - state = make_state() - state.consecutive_idle = 10 # 30 * 2^9 = 15360, way over 100 - - assert _compute_idle_delay(config, state) == 100 + assert _compute_idle_delay(config, state) == expected class TestIdleDetection: @@ -1035,23 +1017,6 @@ def test_max_idle_stops_loop(self, mock_agent, tmp_path): stop = events_of_type(events, EventType.RUN_STOPPED)[0] assert stop.data["reason"] == "max_idle" - @patch(MOCK_EXECUTE_AGENT) - def test_idle_backoff_delay_applied(self, mock_agent, tmp_path): - """Idle iterations should apply backoff delay, not the base delay.""" - mock_agent.return_value = _idle_agent_result() - config = make_config( - tmp_path, max_iterations=2, delay=0, - idle=IdleConfig(delay=0.15, backoff=1.0, max_delay=300), - ) - state = make_state() - - start = time.monotonic() - run_loop(config, state, NullEmitter()) - elapsed = time.monotonic() - start - - # First idle delay should be ~0.15s, no delay after last iteration - assert elapsed >= 0.1 - @patch(MOCK_EXECUTE_AGENT) def test_idle_result_text_none_not_detected_as_idle(self, mock_agent, tmp_path): """Agent result with result_text=None should not be detected as idle.""" From dbda556ac8a029d574911334aac6b54265c0318f Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 16:09:29 +0100 Subject: [PATCH 09/23] refactor: consolidate idle event tests into single emitter test Co-authored-by: Ralphify --- tests/test_events.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/tests/test_events.py b/tests/test_events.py index ae68dc7..c7319fd 100644 --- a/tests/test_events.py +++ b/tests/test_events.py @@ -189,21 +189,6 @@ def test_fanout_with_null_emitter(self): class TestIterationIdleEvent: - def test_iteration_idle_event_type_exists(self): - assert EventType.ITERATION_IDLE.value == "iteration_idle" - - def test_iteration_idle_data_structure(self): - data: IterationIdleData = { - "iteration": 3, - "consecutive_idle": 2, - "next_delay": 60.0, - "cumulative_idle_time": 90.0, - } - assert data["iteration"] == 3 - assert data["consecutive_idle"] == 2 - assert data["next_delay"] == 60.0 - assert data["cumulative_idle_time"] == 90.0 - def test_emit_iteration_idle_via_bound_emitter(self): q = QueueEmitter() emit = BoundEmitter(q, "run-idle") @@ -220,6 +205,4 @@ def test_emit_iteration_idle_via_bound_emitter(self): assert events[0].type == EventType.ITERATION_IDLE assert events[0].data["consecutive_idle"] == 1 assert events[0].data["next_delay"] == 30.0 - - def test_stop_max_idle_reason(self): assert STOP_MAX_IDLE == "max_idle" From 4fbddc1657db07e18bc7ce91a6811578f120044e Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 16:21:35 +0100 Subject: [PATCH 10/23] refactor: remove dead IterationIdleData, inline idle defaults, simplify duration parser Co-authored-by: Ralphify --- docs/contributing/codebase-map.md | 1 - src/ralphify/_events.py | 8 -------- src/ralphify/_frontmatter.py | 20 +++----------------- src/ralphify/_run_types.py | 23 ++++------------------- tests/test_cli.py | 7 +++---- tests/test_events.py | 14 ++------------ tests/test_frontmatter.py | 6 ------ tests/test_run_types.py | 9 +++------ 8 files changed, 15 insertions(+), 73 deletions(-) diff --git a/docs/contributing/codebase-map.md b/docs/contributing/codebase-map.md index 32e7f09..372170f 100644 --- a/docs/contributing/codebase-map.md +++ b/docs/contributing/codebase-map.md @@ -86,7 +86,6 @@ Event data uses TypedDict classes — one per event type — rather than free-fo - **`CommandsStartedData`** / **`CommandsCompletedData`** — command execution bookends - **`PromptAssembledData`** — prompt length after placeholder resolution - **`AgentActivityData`** — streaming agent output -- **`IterationIdleData`** — idle detection data (consecutive idle count, next delay) - **`LogMessageData`** — info/error messages with optional traceback All payload types are unioned as `EventData`. diff --git a/src/ralphify/_events.py b/src/ralphify/_events.py index d75e214..173e5e1 100644 --- a/src/ralphify/_events.py +++ b/src/ralphify/_events.py @@ -113,13 +113,6 @@ class IterationEndedData(TypedDict): result_text: str | None -class IterationIdleData(TypedDict): - iteration: int - consecutive_idle: int - next_delay: float - cumulative_idle_time: float - - class CommandsStartedData(TypedDict): iteration: int count: int @@ -151,7 +144,6 @@ class LogMessageData(TypedDict): | RunStoppedData | IterationStartedData | IterationEndedData - | IterationIdleData | CommandsStartedData | CommandsCompletedData | PromptAssembledData diff --git a/src/ralphify/_frontmatter.py b/src/ralphify/_frontmatter.py index 59269b8..040c5e8 100644 --- a/src/ralphify/_frontmatter.py +++ b/src/ralphify/_frontmatter.py @@ -56,32 +56,18 @@ # Pattern for human-readable duration strings (e.g. "30s", "5m", "6h", "1d"). _DURATION_RE = re.compile(r"^\s*(\d+(?:\.\d+)?)\s*([smhd])\s*$") - -_DURATION_MULTIPLIERS: dict[str, float] = { - "s": 1, - "m": 60, - "h": 3600, - "d": 86400, -} +_DURATION_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "d": 86400} def parse_duration(value: str) -> float: - """Parse a human-readable duration string into seconds. - - Supported suffixes: ``s`` (seconds), ``m`` (minutes), ``h`` (hours), - ``d`` (days). Examples: ``"30s"`` → 30.0, ``"5m"`` → 300.0. - - Raises :class:`ValueError` for invalid formats. - """ + """Parse a duration string (e.g. ``"30s"``, ``"5m"``) into seconds.""" match = _DURATION_RE.match(value) if not match: raise ValueError( f"Invalid duration '{value}'. Use a number with a suffix: " f"s (seconds), m (minutes), h (hours), d (days). Examples: 30s, 5m, 6h." ) - amount = float(match.group(1)) - unit = match.group(2) - return amount * _DURATION_MULTIPLIERS[unit] + return float(match.group(1)) * _DURATION_MULTIPLIERS[match.group(2)] def _extract_frontmatter_block(text: str) -> tuple[str, str]: diff --git a/src/ralphify/_run_types.py b/src/ralphify/_run_types.py index 09af8f4..55109ce 100644 --- a/src/ralphify/_run_types.py +++ b/src/ralphify/_run_types.py @@ -23,15 +23,6 @@ ) -DEFAULT_IDLE_DELAY: float = 30 -"""Default initial delay in seconds when idle state is detected.""" - -DEFAULT_IDLE_BACKOFF: float = 2.0 -"""Default backoff multiplier applied each consecutive idle iteration.""" - -DEFAULT_IDLE_MAX_DELAY: float = 300 -"""Default maximum delay in seconds (5 minutes) for idle backoff.""" - DEFAULT_COMMAND_TIMEOUT: float = 60 """Default timeout in seconds for commands defined in RALPH.md frontmatter.""" @@ -86,17 +77,11 @@ def reason(self) -> StopReason: @dataclass class IdleConfig: - """Configuration for idle detection and backoff behavior. - - When an agent signals idle state, the engine waits ``delay`` seconds - before the next iteration, multiplying by ``backoff`` each consecutive - idle iteration, capped at ``max_delay``. If cumulative idle time - exceeds ``max``, the loop stops. - """ + """Configuration for idle detection and backoff behavior.""" - delay: float = DEFAULT_IDLE_DELAY - backoff: float = DEFAULT_IDLE_BACKOFF - max_delay: float = DEFAULT_IDLE_MAX_DELAY + delay: float = 30 + backoff: float = 2.0 + max_delay: float = 300 max: float | None = None diff --git a/tests/test_cli.py b/tests/test_cli.py index 27df42c..0bfbf65 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -815,11 +815,10 @@ def test_none_returns_none(self): assert _validate_idle(None) is None def test_empty_dict_returns_defaults(self): - from ralphify._run_types import DEFAULT_IDLE_BACKOFF, DEFAULT_IDLE_DELAY, DEFAULT_IDLE_MAX_DELAY config = _validate_idle({}) - assert config.delay == DEFAULT_IDLE_DELAY - assert config.backoff == DEFAULT_IDLE_BACKOFF - assert config.max_delay == DEFAULT_IDLE_MAX_DELAY + assert config.delay == 30 + assert config.backoff == 2.0 + assert config.max_delay == 300 assert config.max is None def test_duration_strings_parsed(self): diff --git a/tests/test_events.py b/tests/test_events.py index c7319fd..4c2f1be 100644 --- a/tests/test_events.py +++ b/tests/test_events.py @@ -8,12 +8,10 @@ from ralphify._events import ( LOG_ERROR, LOG_INFO, - STOP_MAX_IDLE, BoundEmitter, Event, EventType, FanoutEmitter, - IterationIdleData, NullEmitter, QueueEmitter, ) @@ -192,17 +190,9 @@ class TestIterationIdleEvent: def test_emit_iteration_idle_via_bound_emitter(self): q = QueueEmitter() emit = BoundEmitter(q, "run-idle") - data: IterationIdleData = { - "iteration": 1, - "consecutive_idle": 1, - "next_delay": 30.0, - "cumulative_idle_time": 30.0, - } - emit(EventType.ITERATION_IDLE, data) + emit(EventType.ITERATION_IDLE, {"iteration": 1, "detail": "idle (1.0s)"}) events = drain_events(q) assert len(events) == 1 assert events[0].type == EventType.ITERATION_IDLE - assert events[0].data["consecutive_idle"] == 1 - assert events[0].data["next_delay"] == 30.0 - assert STOP_MAX_IDLE == "max_idle" + assert events[0].data["iteration"] == 1 diff --git a/tests/test_frontmatter.py b/tests/test_frontmatter.py index 0b6a611..a561c8f 100644 --- a/tests/test_frontmatter.py +++ b/tests/test_frontmatter.py @@ -3,7 +3,6 @@ import pytest from ralphify._frontmatter import ( - IDLE_STATE_MARKER, RALPH_MARKER, _extract_frontmatter_block, parse_duration, @@ -177,11 +176,6 @@ def test_invalid_durations_raise(self, value): parse_duration(value) -class TestIdleStateMarker: - def test_marker_value(self): - assert IDLE_STATE_MARKER == "" - - class TestSerializeFrontmatter: def test_roundtrip(self): original_fm = {"agent": "claude"} diff --git a/tests/test_run_types.py b/tests/test_run_types.py index 3ae11ef..5943734 100644 --- a/tests/test_run_types.py +++ b/tests/test_run_types.py @@ -8,9 +8,6 @@ from ralphify._frontmatter import RALPH_MARKER from ralphify._run_types import ( DEFAULT_COMMAND_TIMEOUT, - DEFAULT_IDLE_BACKOFF, - DEFAULT_IDLE_DELAY, - DEFAULT_IDLE_MAX_DELAY, RUN_ID_LENGTH, Command, IdleConfig, @@ -48,9 +45,9 @@ def test_custom_timeout(self): class TestIdleConfig: def test_defaults(self): cfg = IdleConfig() - assert cfg.delay == DEFAULT_IDLE_DELAY - assert cfg.backoff == DEFAULT_IDLE_BACKOFF - assert cfg.max_delay == DEFAULT_IDLE_MAX_DELAY + assert cfg.delay == 30 + assert cfg.backoff == 2.0 + assert cfg.max_delay == 300 assert cfg.max is None def test_custom_values(self): From 6a89cb635d3a5ba8d692c1d47f843b52f2677dad Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 16:23:48 +0100 Subject: [PATCH 11/23] refactor: inline _parse_idle_duration into _validate_idle and loop over duration fields Co-authored-by: Ralphify --- src/ralphify/cli.py | 63 +++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/src/ralphify/cli.py b/src/ralphify/cli.py index c17340c..d476908 100644 --- a/src/ralphify/cli.py +++ b/src/ralphify/cli.py @@ -379,25 +379,6 @@ def _validate_credit(raw_credit: Any) -> bool: return raw_credit -def _parse_idle_duration(value: Any, field_name: str) -> float: - """Parse a duration value from idle config — accepts numbers (seconds) or duration strings.""" - if isinstance(value, bool): - _exit_error(f"'{FIELD_IDLE}.{field_name}' must be a number or duration string, got {value!r}.") - if isinstance(value, (int, float)): - if not math.isfinite(value) or value <= 0: - _exit_error(f"'{FIELD_IDLE}.{field_name}' must be positive, got {value!r}.") - return float(value) - if isinstance(value, str): - try: - result = parse_duration(value) - except ValueError as exc: - _exit_error(f"'{FIELD_IDLE}.{field_name}': {exc}") - if result <= 0: - _exit_error(f"'{FIELD_IDLE}.{field_name}' must be positive.") - return result - _exit_error(f"'{FIELD_IDLE}.{field_name}' must be a number or duration string, got {type(value).__name__}.") - - def _validate_idle(raw_idle: Any) -> IdleConfig | None: """Validate the ``idle`` frontmatter block and return an IdleConfig. @@ -409,25 +390,41 @@ def _validate_idle(raw_idle: Any) -> IdleConfig | None: if not isinstance(raw_idle, dict): _exit_error(f"'{FIELD_IDLE}' must be a mapping, got {type(raw_idle).__name__}.") - kwargs: dict[str, Any] = {} - - if IDLE_FIELD_DELAY in raw_idle: - kwargs["delay"] = _parse_idle_duration(raw_idle[IDLE_FIELD_DELAY], IDLE_FIELD_DELAY) - - if IDLE_FIELD_MAX_DELAY in raw_idle: - kwargs["max_delay"] = _parse_idle_duration(raw_idle[IDLE_FIELD_MAX_DELAY], IDLE_FIELD_MAX_DELAY) + def _parse_duration_field(value: Any, field_name: str) -> float: + label = f"'{FIELD_IDLE}.{field_name}'" + if isinstance(value, bool): + _exit_error(f"{label} must be a number or duration string, got {value!r}.") + if isinstance(value, (int, float)): + if not math.isfinite(value) or value <= 0: + _exit_error(f"{label} must be positive, got {value!r}.") + return float(value) + if isinstance(value, str): + try: + result = parse_duration(value) + except ValueError as exc: + _exit_error(f"{label}: {exc}") + if result <= 0: + _exit_error(f"{label} must be positive.") + return result + _exit_error(f"{label} must be a number or duration string, got {type(value).__name__}.") - if IDLE_FIELD_MAX in raw_idle: - kwargs["max"] = _parse_idle_duration(raw_idle[IDLE_FIELD_MAX], IDLE_FIELD_MAX) + kwargs: dict[str, Any] = {} + duration_fields = { + IDLE_FIELD_DELAY: "delay", + IDLE_FIELD_MAX_DELAY: "max_delay", + IDLE_FIELD_MAX: "max", + } + for field, kwarg in duration_fields.items(): + if field in raw_idle: + kwargs[kwarg] = _parse_duration_field(raw_idle[field], field) if IDLE_FIELD_BACKOFF in raw_idle: backoff = raw_idle[IDLE_FIELD_BACKOFF] - if isinstance(backoff, bool): - _exit_error(f"'{FIELD_IDLE}.{IDLE_FIELD_BACKOFF}' must be a positive number, got {backoff!r}.") - if not isinstance(backoff, (int, float)): - _exit_error(f"'{FIELD_IDLE}.{IDLE_FIELD_BACKOFF}' must be a positive number, got {type(backoff).__name__}.") + label = f"'{FIELD_IDLE}.{IDLE_FIELD_BACKOFF}'" + if isinstance(backoff, bool) or not isinstance(backoff, (int, float)): + _exit_error(f"{label} must be a positive number, got {backoff!r}.") if not math.isfinite(backoff) or backoff <= 0: - _exit_error(f"'{FIELD_IDLE}.{IDLE_FIELD_BACKOFF}' must be positive, got {backoff!r}.") + _exit_error(f"{label} must be positive, got {backoff!r}.") kwargs["backoff"] = float(backoff) known_fields = {IDLE_FIELD_DELAY, IDLE_FIELD_BACKOFF, IDLE_FIELD_MAX_DELAY, IDLE_FIELD_MAX} From 6b3b3f174f6b87246a4ca5626509fe3a792b22ed Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 16:26:27 +0100 Subject: [PATCH 12/23] refactor: simplify _compute_idle_delay into _idle_delay and remove guard-condition tests Co-authored-by: Ralphify --- docs/contributing/codebase-map.md | 2 +- src/ralphify/engine.py | 14 ++++---------- tests/test_engine.py | 19 ++++--------------- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/docs/contributing/codebase-map.md b/docs/contributing/codebase-map.md index 372170f..1052efc 100644 --- a/docs/contributing/codebase-map.md +++ b/docs/contributing/codebase-map.md @@ -138,7 +138,7 @@ Events are defined in `_events.py:EventType`, with a corresponding TypedDict pay When an agent emits `` (the `IDLE_STATE_MARKER` constant in `_frontmatter.py`) in its output, the engine marks the iteration as idle instead of completed. Idle behavior is configured via the `idle` frontmatter block, parsed by `_validate_idle()` in `cli.py` into an `IdleConfig` dataclass (`_run_types.py`). -The engine (`engine.py`) tracks idle state on `RunState` (`consecutive_idle`, `cumulative_idle_time`). Backoff delay is computed by `_compute_idle_delay()`: `delay × backoff^(consecutive_idle - 1)`, capped at `max_delay`. A non-idle iteration calls `state.reset_idle()` to clear all idle tracking. When `idle.max` is set and cumulative idle time exceeds it, the loop stops with `RunStatus.IDLE_EXCEEDED`. +The engine (`engine.py`) tracks idle state on `RunState` (`consecutive_idle`, `cumulative_idle_time`). Backoff delay is computed by `_idle_delay()`: `delay × backoff^(consecutive_idle - 1)`, capped at `max_delay`. A non-idle iteration calls `state.reset_idle()` to clear all idle tracking. When `idle.max` is set and cumulative idle time exceeds it, the loop stops with `RunStatus.IDLE_EXCEEDED`. The `ITERATION_IDLE` event type and `STOP_MAX_IDLE` stop reason are defined in `_events.py`. The console emitter renders idle iterations with a dimmed style. diff --git a/src/ralphify/engine.py b/src/ralphify/engine.py index 3454dbc..47aaab6 100644 --- a/src/ralphify/engine.py +++ b/src/ralphify/engine.py @@ -255,14 +255,8 @@ def _run_iteration( return True -def _compute_idle_delay(config: RunConfig, state: RunState) -> float: - """Compute the backoff delay for the current idle streak. - - Formula: ``delay * backoff^(consecutive_idle - 1)`` capped at ``max_delay``. - Returns 0 when no idle config is present or idle count is zero. - """ - if config.idle is None or state.consecutive_idle <= 0: - return 0 +def _idle_delay(config: RunConfig, state: RunState) -> float: + """Return idle backoff delay: ``delay * backoff^(streak-1)`` capped at ``max_delay``.""" raw = config.idle.delay * (config.idle.backoff ** (state.consecutive_idle - 1)) return min(raw, config.idle.max_delay) @@ -276,7 +270,7 @@ def _delay_if_needed(config: RunConfig, state: RunState, emit: BoundEmitter) -> """ # Determine effective delay: idle backoff overrides base delay if state.consecutive_idle > 0 and config.idle is not None: - delay = _compute_idle_delay(config, state) + delay = _idle_delay(config, state) else: delay = config.delay @@ -343,7 +337,7 @@ def run_loop( # Track cumulative idle time and check max idle limit if iteration_was_idle and config.idle is not None: - idle_delay = _compute_idle_delay(config, state) + idle_delay = _idle_delay(config, state) state.cumulative_idle_time += idle_delay if config.idle.max is not None and state.cumulative_idle_time >= config.idle.max: state.status = RunStatus.IDLE_EXCEEDED diff --git a/tests/test_engine.py b/tests/test_engine.py index e168a2b..0cacfbe 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -14,7 +14,7 @@ from ralphify._run_types import Command, IdleConfig, RunStatus from ralphify.engine import ( _assemble_prompt, - _compute_idle_delay, + _idle_delay, _delay_if_needed, _handle_control_signals, _run_commands, @@ -915,19 +915,8 @@ def test_credit_false_no_trailer_in_agent_input(self, mock_run, tmp_path): assert "Co-authored-by" not in call_input -class TestComputeIdleDelay: - """Unit tests for _compute_idle_delay — backoff math.""" - - @pytest.mark.parametrize("idle_cfg, consecutive, expected", [ - (None, 3, 0), # no idle config → zero - (IdleConfig(delay=30), 0, 0), # not idle → zero - ]) - def test_returns_zero_when_inactive(self, tmp_path, idle_cfg, consecutive, expected): - config = make_config(tmp_path, idle=idle_cfg) - state = make_state() - state.consecutive_idle = consecutive - - assert _compute_idle_delay(config, state) == expected +class TestIdleDelay: + """Unit tests for _idle_delay — backoff math.""" @pytest.mark.parametrize("consecutive, max_delay, expected", [ (1, 300, 30), # base delay @@ -940,7 +929,7 @@ def test_backoff_progression(self, tmp_path, consecutive, max_delay, expected): state = make_state() state.consecutive_idle = consecutive - assert _compute_idle_delay(config, state) == expected + assert _idle_delay(config, state) == expected class TestIdleDetection: From 61986e2c25ff3a7528067145a45ee3d4d91a686d Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 16:29:57 +0100 Subject: [PATCH 13/23] refactor: parametrize idle tests in cli, console_emitter, and run_types Co-authored-by: Ralphify --- tests/test_cli.py | 53 +++++++++++------------------------ tests/test_console_emitter.py | 21 ++++++-------- tests/test_run_types.py | 20 +++++-------- 3 files changed, 31 insertions(+), 63 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 0bfbf65..eb9b490 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -784,26 +784,17 @@ def test_no_idle_config_by_default(self, mock_run, mock_which, tmp_path, monkeyp result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) assert result.exit_code == 0 + @pytest.mark.parametrize("idle_yaml", [ + "idle:\n delay: 30s\n backoff: 2\n max_delay: 5m\n max: 1h", + "idle:\n delay: 30\n backoff: 1.5\n max_delay: 300\n max: 3600", + ]) @patch(MOCK_SUBPROCESS, side_effect=ok_result) - def test_idle_with_all_fields(self, mock_run, mock_which, tmp_path, monkeypatch): + def test_idle_config_accepted(self, mock_run, mock_which, tmp_path, monkeypatch, idle_yaml): monkeypatch.chdir(tmp_path) ralph_dir = tmp_path / "my-ralph" ralph_dir.mkdir(exist_ok=True) (ralph_dir / RALPH_MARKER).write_text( - "---\nagent: claude -p --dangerously-skip-permissions\n" - "idle:\n delay: 30s\n backoff: 2\n max_delay: 5m\n max: 1h\n---\ngo" - ) - result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) - assert result.exit_code == 0 - - @patch(MOCK_SUBPROCESS, side_effect=ok_result) - def test_idle_with_numeric_values(self, mock_run, mock_which, tmp_path, monkeypatch): - monkeypatch.chdir(tmp_path) - ralph_dir = tmp_path / "my-ralph" - ralph_dir.mkdir(exist_ok=True) - (ralph_dir / RALPH_MARKER).write_text( - "---\nagent: claude -p --dangerously-skip-permissions\n" - "idle:\n delay: 30\n backoff: 1.5\n max_delay: 300\n max: 3600\n---\ngo" + f"---\nagent: claude -p --dangerously-skip-permissions\n{idle_yaml}\n---\ngo" ) result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) assert result.exit_code == 0 @@ -834,26 +825,14 @@ def test_numeric_values_accepted(self): assert config.max_delay == 120.0 assert config.max == 600.0 - def test_not_a_dict_errors(self): - with pytest.raises(typer.Exit): - _validate_idle("30s") - - def test_invalid_duration_string_errors(self): - with pytest.raises(typer.Exit): - _validate_idle({"delay": "not-valid"}) - - def test_negative_delay_errors(self): - with pytest.raises(typer.Exit): - _validate_idle({"delay": -5}) - - def test_zero_delay_errors(self): - with pytest.raises(typer.Exit): - _validate_idle({"delay": 0}) - - def test_boolean_delay_errors(self): - with pytest.raises(typer.Exit): - _validate_idle({"delay": True}) - - def test_unknown_fields_errors(self): + @pytest.mark.parametrize("raw", [ + "30s", # not a dict + {"delay": "not-valid"}, # invalid duration string + {"delay": -5}, # negative delay + {"delay": 0}, # zero delay + {"delay": True}, # boolean delay + {"delay": "30s", "extra": "oops"}, # unknown field + ]) + def test_invalid_input_errors(self, raw): with pytest.raises(typer.Exit): - _validate_idle({"delay": "30s", "extra": "oops"}) + _validate_idle(raw) diff --git a/tests/test_console_emitter.py b/tests/test_console_emitter.py index 0000dc9..7ac5d0d 100644 --- a/tests/test_console_emitter.py +++ b/tests/test_console_emitter.py @@ -218,24 +218,19 @@ def test_traceback_with_brackets_not_corrupted(self): class TestIterationIdle: - def test_idle_shows_dimmed_output(self): - emitter, console = _capture_emitter() - emitter.emit(_make_event( - EventType.ITERATION_IDLE, - iteration=3, detail="idle (2s)", log_file=None, result_text=None, - )) - output = console.export_text() - assert "Iteration 3" in output - assert "idle (2s)" in output - - def test_idle_shows_log_file(self): + @pytest.mark.parametrize("iteration, detail, log_file, expected", [ + (3, "idle (2s)", None, ["Iteration 3", "idle (2s)"]), + (1, "idle (1s)", "/tmp/idle.log", ["/tmp/idle.log"]), + ]) + def test_idle_renders_details(self, iteration, detail, log_file, expected): emitter, console = _capture_emitter() emitter.emit(_make_event( EventType.ITERATION_IDLE, - iteration=1, detail="idle (1s)", log_file="/tmp/idle.log", result_text=None, + iteration=iteration, detail=detail, log_file=log_file, result_text=None, )) output = console.export_text() - assert "/tmp/idle.log" in output + for text in expected: + assert text in output def test_idle_stops_live_display(self): emitter, console = _capture_emitter() diff --git a/tests/test_run_types.py b/tests/test_run_types.py index 5943734..2354aea 100644 --- a/tests/test_run_types.py +++ b/tests/test_run_types.py @@ -43,19 +43,13 @@ def test_custom_timeout(self): class TestIdleConfig: - def test_defaults(self): - cfg = IdleConfig() - assert cfg.delay == 30 - assert cfg.backoff == 2.0 - assert cfg.max_delay == 300 - assert cfg.max is None - - def test_custom_values(self): - cfg = IdleConfig(delay=10, backoff=1.5, max_delay=120, max=3600) - assert cfg.delay == 10 - assert cfg.backoff == 1.5 - assert cfg.max_delay == 120 - assert cfg.max == 3600 + @pytest.mark.parametrize("kwargs, expected", [ + ({}, (30, 2.0, 300, None)), + ({"delay": 10, "backoff": 1.5, "max_delay": 120, "max": 3600}, (10, 1.5, 120, 3600)), + ]) + def test_values(self, kwargs, expected): + cfg = IdleConfig(**kwargs) + assert (cfg.delay, cfg.backoff, cfg.max_delay, cfg.max) == expected class TestRunConfig: From c3a5f864b17e215ba155a5ed1aa7e13b18d0413c Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 16:32:00 +0100 Subject: [PATCH 14/23] docs: trim redundant idle detection explanations in cli, writing-prompts, and changelog Co-authored-by: Ralphify --- docs/changelog.md | 2 +- docs/cli.md | 9 +-------- docs/writing-prompts.md | 4 +--- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 52e76b1..ae14242 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -12,7 +12,7 @@ All notable changes to ralphify are documented here. ### Added -- **Idle detection with backoff** — when an agent emits `` in its output, the engine applies configurable backoff delays between iterations and optionally stops the loop after a cumulative idle time limit. Configure via the `idle` frontmatter block with `delay`, `backoff`, `max_delay`, and `max` fields. Prevents wasting tokens on idle iterations. +- **Idle detection with backoff** — configure via the `idle` frontmatter block (`delay`, `backoff`, `max_delay`, `max`). When an agent emits ``, the engine applies backoff delays and optionally stops after a cumulative idle time limit. --- diff --git a/docs/cli.md b/docs/cli.md index 27c1130..4c89d46 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -216,11 +216,4 @@ idle: Duration values accept numbers (seconds) or human-readable strings: `30s`, `5m`, `6h`, `1d`. -**How it works:** - -1. Agent output contains `` → iteration is marked idle -2. Next delay = `delay × backoff^(consecutive_idle - 1)`, capped at `max_delay` -3. A non-idle iteration resets all idle tracking (consecutive count and cumulative time) -4. If `max` is set, the loop stops when cumulative idle delay time exceeds the limit - -When no `idle` block is present, the loop runs exactly as before. +A non-idle iteration resets all idle tracking. When no `idle` block is present, the loop runs exactly as before. diff --git a/docs/writing-prompts.md b/docs/writing-prompts.md index a8836c5..3019116 100644 --- a/docs/writing-prompts.md +++ b/docs/writing-prompts.md @@ -356,9 +356,7 @@ If all tasks are complete and there is nothing left to do, output exactly: ``` -When the agent emits ``, the engine waits with increasing backoff delays (30s, 60s, 120s, ... up to 5m) before the next iteration. If you add new tasks to TODO.md, the next iteration will detect work and reset the backoff. If cumulative idle time reaches the `max` limit (30m here), the loop stops automatically. - -This is useful for loops that should keep running but may have periods of inactivity — the idle backoff reduces token usage while keeping the loop ready to resume when new work appears. +When the agent emits ``, the engine applies increasing backoff delays before the next iteration. A non-idle iteration resets the backoff. If cumulative idle time reaches the `max` limit, the loop stops automatically. ## Prompt size and context windows From fe4f4470d102ad7a6d6db7adda08d4bb809728f9 Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 16:49:45 +0100 Subject: [PATCH 15/23] refactor: inline idle sub-field constants used only in _validate_idle The IDLE_FIELD_DELAY/BACKOFF/MAX_DELAY/MAX constants were only referenced in one function. Inline them as string literals to reduce the diff footprint. Co-authored-by: Ralphify --- docs/contributing/codebase-map.md | 2 +- src/ralphify/_frontmatter.py | 6 ------ src/ralphify/cli.py | 22 ++++++---------------- 3 files changed, 7 insertions(+), 23 deletions(-) diff --git a/docs/contributing/codebase-map.md b/docs/contributing/codebase-map.md index 1052efc..2b88897 100644 --- a/docs/contributing/codebase-map.md +++ b/docs/contributing/codebase-map.md @@ -124,7 +124,7 @@ The CLI uses a `ConsoleEmitter` (defined in `_console_emitter.py`) that renders Frontmatter parsing is in `_frontmatter.py:parse_frontmatter()`, which returns a raw dict. Each field is then validated and coerced by a dedicated helper in `cli.py` — e.g. `_validate_agent()`, `_validate_commands()`, `_validate_credit()`. Adding a new frontmatter field means adding a new validator in `cli.py` and wiring it into `_build_run_config()`. -**Field name constants** (`FIELD_AGENT`, `FIELD_COMMANDS`, `FIELD_ARGS`, `FIELD_CREDIT`, `FIELD_IDLE`, `CMD_FIELD_NAME`, `CMD_FIELD_RUN`, `CMD_FIELD_TIMEOUT`, `IDLE_FIELD_DELAY`, `IDLE_FIELD_BACKOFF`, `IDLE_FIELD_MAX_DELAY`, `IDLE_FIELD_MAX`) are centralized in `_frontmatter.py`. Always import these constants instead of hardcoding strings like `"agent"` or `"commands"` — this keeps error messages, validation, and placeholder resolution in sync when fields are renamed. +**Field name constants** (`FIELD_AGENT`, `FIELD_COMMANDS`, `FIELD_ARGS`, `FIELD_CREDIT`, `FIELD_IDLE`, `CMD_FIELD_NAME`, `CMD_FIELD_RUN`, `CMD_FIELD_TIMEOUT`) are centralized in `_frontmatter.py`. Always import these constants instead of hardcoding strings like `"agent"` or `"commands"` — this keeps error messages, validation, and placeholder resolution in sync when fields are renamed. ### If you add a new CLI command... diff --git a/src/ralphify/_frontmatter.py b/src/ralphify/_frontmatter.py index 040c5e8..3522e4a 100644 --- a/src/ralphify/_frontmatter.py +++ b/src/ralphify/_frontmatter.py @@ -27,12 +27,6 @@ FIELD_CREDIT = "credit" FIELD_IDLE = "idle" -# Sub-field names within the idle configuration mapping. -IDLE_FIELD_DELAY = "delay" -IDLE_FIELD_BACKOFF = "backoff" -IDLE_FIELD_MAX_DELAY = "max_delay" -IDLE_FIELD_MAX = "max" - # Sub-field names within each command mapping. CMD_FIELD_NAME = "name" CMD_FIELD_RUN = "run" diff --git a/src/ralphify/cli.py b/src/ralphify/cli.py index d476908..8f4b6ab 100644 --- a/src/ralphify/cli.py +++ b/src/ralphify/cli.py @@ -30,10 +30,6 @@ FIELD_COMMANDS, FIELD_CREDIT, FIELD_IDLE, - IDLE_FIELD_BACKOFF, - IDLE_FIELD_DELAY, - IDLE_FIELD_MAX, - IDLE_FIELD_MAX_DELAY, RALPH_MARKER, VALID_NAME_CHARS_MSG, parse_duration, @@ -409,26 +405,20 @@ def _parse_duration_field(value: Any, field_name: str) -> float: _exit_error(f"{label} must be a number or duration string, got {type(value).__name__}.") kwargs: dict[str, Any] = {} - duration_fields = { - IDLE_FIELD_DELAY: "delay", - IDLE_FIELD_MAX_DELAY: "max_delay", - IDLE_FIELD_MAX: "max", - } - for field, kwarg in duration_fields.items(): + for field in ("delay", "max_delay", "max"): if field in raw_idle: - kwargs[kwarg] = _parse_duration_field(raw_idle[field], field) + kwargs[field] = _parse_duration_field(raw_idle[field], field) - if IDLE_FIELD_BACKOFF in raw_idle: - backoff = raw_idle[IDLE_FIELD_BACKOFF] - label = f"'{FIELD_IDLE}.{IDLE_FIELD_BACKOFF}'" + if "backoff" in raw_idle: + backoff = raw_idle["backoff"] + label = f"'{FIELD_IDLE}.backoff'" if isinstance(backoff, bool) or not isinstance(backoff, (int, float)): _exit_error(f"{label} must be a positive number, got {backoff!r}.") if not math.isfinite(backoff) or backoff <= 0: _exit_error(f"{label} must be positive, got {backoff!r}.") kwargs["backoff"] = float(backoff) - known_fields = {IDLE_FIELD_DELAY, IDLE_FIELD_BACKOFF, IDLE_FIELD_MAX_DELAY, IDLE_FIELD_MAX} - unknown = set(raw_idle.keys()) - known_fields + unknown = set(raw_idle.keys()) - {"delay", "backoff", "max_delay", "max"} if unknown: _exit_error(f"Unknown field(s) in '{FIELD_IDLE}': {', '.join(sorted(unknown))}.") From e1d78dd4478c9af8979ec61b4092ea6fc36911b1 Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 16:51:20 +0100 Subject: [PATCH 16/23] refactor: consolidate idle tests to reduce diff footprint Parametrize non-idle engine tests, merge mark/reset idle tests into one, remove redundant no-idle CLI test and events test already covered by integration tests. Co-authored-by: Ralphify --- tests/test_cli.py | 7 ------- tests/test_engine.py | 29 ++++++++--------------------- tests/test_events.py | 12 ------------ tests/test_run_types.py | 16 ++-------------- 4 files changed, 10 insertions(+), 54 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index eb9b490..26a52e6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -777,13 +777,6 @@ def test_credit_invalid_value_errors(self, mock_which, tmp_path, monkeypatch): @patch(MOCK_WHICH, return_value="/usr/bin/claude") class TestIdleFrontmatter: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) - def test_no_idle_config_by_default(self, mock_run, mock_which, tmp_path, monkeypatch): - monkeypatch.chdir(tmp_path) - ralph_dir = make_ralph(tmp_path) - result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) - assert result.exit_code == 0 - @pytest.mark.parametrize("idle_yaml", [ "idle:\n delay: 30s\n backoff: 2\n max_delay: 5m\n max: 1h", "idle:\n delay: 30\n backoff: 1.5\n max_delay: 300\n max: 3600", diff --git a/tests/test_engine.py b/tests/test_engine.py index 0cacfbe..8d98393 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -1006,27 +1006,14 @@ def test_max_idle_stops_loop(self, mock_agent, tmp_path): stop = events_of_type(events, EventType.RUN_STOPPED)[0] assert stop.data["reason"] == "max_idle" + @pytest.mark.parametrize("agent_result, expected_type", [ + (AgentResult(returncode=0, elapsed=1.0, result_text=None), EventType.ITERATION_COMPLETED), + (AgentResult(returncode=1, elapsed=1.0, result_text=IDLE_STATE_MARKER), EventType.ITERATION_FAILED), + ]) @patch(MOCK_EXECUTE_AGENT) - def test_idle_result_text_none_not_detected_as_idle(self, mock_agent, tmp_path): - """Agent result with result_text=None should not be detected as idle.""" - mock_agent.return_value = AgentResult(returncode=0, elapsed=1.0, result_text=None) - config = make_config(tmp_path, max_iterations=1, idle=IdleConfig()) - state = make_state() - q = QueueEmitter() - - run_loop(config, state, q) - - events = drain_events(q) - types = event_types(events) - assert EventType.ITERATION_COMPLETED in types - assert EventType.ITERATION_IDLE not in types - - @patch(MOCK_EXECUTE_AGENT) - def test_failed_agent_not_detected_as_idle(self, mock_agent, tmp_path): - """Failed agent result should not be detected as idle even if marker is present.""" - mock_agent.return_value = AgentResult( - returncode=1, elapsed=1.0, result_text=IDLE_STATE_MARKER, - ) + def test_non_idle_cases(self, mock_agent, tmp_path, agent_result, expected_type): + """result_text=None or failed agent should not be detected as idle.""" + mock_agent.return_value = agent_result config = make_config(tmp_path, max_iterations=1, idle=IdleConfig()) state = make_state() q = QueueEmitter() @@ -1035,5 +1022,5 @@ def test_failed_agent_not_detected_as_idle(self, mock_agent, tmp_path): events = drain_events(q) types = event_types(events) - assert EventType.ITERATION_FAILED in types + assert expected_type in types assert EventType.ITERATION_IDLE not in types diff --git a/tests/test_events.py b/tests/test_events.py index 4c2f1be..0dbeed9 100644 --- a/tests/test_events.py +++ b/tests/test_events.py @@ -184,15 +184,3 @@ def test_fanout_with_null_emitter(self): fanout.emit(event) assert q.queue.get() is event - - -class TestIterationIdleEvent: - def test_emit_iteration_idle_via_bound_emitter(self): - q = QueueEmitter() - emit = BoundEmitter(q, "run-idle") - emit(EventType.ITERATION_IDLE, {"iteration": 1, "detail": "idle (1.0s)"}) - - events = drain_events(q) - assert len(events) == 1 - assert events[0].type == EventType.ITERATION_IDLE - assert events[0].data["iteration"] == 1 diff --git a/tests/test_run_types.py b/tests/test_run_types.py index 2354aea..f94dbd1 100644 --- a/tests/test_run_types.py +++ b/tests/test_run_types.py @@ -161,7 +161,7 @@ def test_wait_for_unpause_times_out(self): result = state.wait_for_unpause(timeout=0.01) assert result is False - def test_mark_idle_increments_completed_and_consecutive(self): + def test_mark_and_reset_idle(self): state = RunState(run_id="r1") state.mark_idle() assert state.completed == 1 @@ -169,23 +169,11 @@ def test_mark_idle_increments_completed_and_consecutive(self): state.mark_idle() assert state.completed == 2 assert state.consecutive_idle == 2 - - def test_reset_idle_clears_tracking(self): - state = RunState(run_id="r1") - state.mark_idle() - state.mark_idle() state.cumulative_idle_time = 120.0 state.reset_idle() assert state.consecutive_idle == 0 assert state.cumulative_idle_time == 0.0 - # completed count is preserved - assert state.completed == 2 - - def test_mark_idle_included_in_total(self): - state = RunState(run_id="r1") - state.mark_idle() - state.mark_completed() - assert state.total == 2 + assert state.completed == 2 # preserved class TestRunStatus: From 9b02721dd978cf17f4b325f4c1c7470664cd62bb Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 16:51:55 +0100 Subject: [PATCH 17/23] docs: trim idle detection section in writing-prompts guide Replace duplicate YAML example with cross-reference to cli.md, keeping only the prompt-writing guidance relevant to this page. Co-authored-by: Ralphify --- docs/writing-prompts.md | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/docs/writing-prompts.md b/docs/writing-prompts.md index 3019116..d0be3c5 100644 --- a/docs/writing-prompts.md +++ b/docs/writing-prompts.md @@ -333,26 +333,10 @@ You can freely add and edit comments while the loop runs — they're stripped ev ## Idle detection -If your agent signals when it has no work to do, you can avoid wasting tokens on idle iterations. Add an `idle` block to your frontmatter and have your prompt instruct the agent to emit the idle marker: +If your agent signals when it has no work to do, you can avoid wasting tokens on idle iterations. Add an [`idle` block](cli.md#idle-detection) to your frontmatter and instruct the agent to emit the idle marker when there's nothing left to do: ```markdown ---- -agent: claude -p --dangerously-skip-permissions -idle: - delay: 30s - backoff: 2 - max_delay: 5m - max: 30m -commands: - - name: tasks - run: cat TODO.md ---- - -{{ commands.tasks }} - -Read TODO.md and implement the next uncompleted task. Commit when done. - -If all tasks are complete and there is nothing left to do, output exactly: +If all tasks are complete, output exactly: ``` From 421b55961673d322267ecba7cc5d292620013389 Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 17:08:48 +0100 Subject: [PATCH 18/23] feat: add stdout_text field to AgentResult Populate stdout_text from the full streamed output in streaming mode and from captured stdout in blocking mode, so the engine can check the complete agent output for markers not present in result_text. Co-authored-by: Ralphify --- src/ralphify/_agent.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ralphify/_agent.py b/src/ralphify/_agent.py index 29604c3..70ec800 100644 --- a/src/ralphify/_agent.py +++ b/src/ralphify/_agent.py @@ -51,6 +51,7 @@ class AgentResult(ProcessResult): elapsed: float = 0.0 log_file: Path | None = None result_text: str | None = None + stdout_text: str | None = None @dataclass(frozen=True) @@ -206,6 +207,7 @@ def _run_agent_streaming( log_file=log_file, result_text=stream.result_text, timed_out=stream.timed_out, + stdout_text="".join(stream.stdout_lines), ) @@ -254,6 +256,7 @@ def _run_agent_blocking( elapsed=time.monotonic() - start, log_file=log_file, timed_out=timed_out, + stdout_text=ensure_str(stdout) if stdout else None, ) From cf5f54b99ec0a02122b3aa03d51fe94671934d97 Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 17:10:36 +0100 Subject: [PATCH 19/23] feat: check stdout_text as fallback for idle marker detection The idle detection in _run_agent_phase now checks agent.stdout_text when result_text doesn't contain the idle marker, ensuring idle state is detected even when the marker appears in streamed output but not in the final result field. Co-authored-by: Ralphify --- src/ralphify/engine.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/ralphify/engine.py b/src/ralphify/engine.py index 47aaab6..c85c42c 100644 --- a/src/ralphify/engine.py +++ b/src/ralphify/engine.py @@ -172,10 +172,9 @@ def _run_agent_phase( duration = format_duration(agent.elapsed) - is_idle = ( - agent.success - and agent.result_text is not None - and IDLE_STATE_MARKER in agent.result_text + is_idle = agent.success and ( + (agent.result_text is not None and IDLE_STATE_MARKER in agent.result_text) + or (agent.stdout_text is not None and IDLE_STATE_MARKER in agent.stdout_text) ) if agent.timed_out: From a3a84f9fe1c395b927ed952bf0c33135562fbe17 Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 17:13:17 +0100 Subject: [PATCH 20/23] test: add tests for idle-via-stdout-fallback and stdout_text population Cover the new stdout_text fallback path in idle detection (engine) and verify stdout_text is populated in both streaming and blocking modes. Co-authored-by: Ralphify --- tests/test_agent.py | 36 ++++++++++++++++++++++++++++++++++++ tests/test_engine.py | 17 +++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/tests/test_agent.py b/tests/test_agent.py index a68f9a7..fd657f3 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -277,6 +277,42 @@ def test_not_success_when_timed_out(self): assert result.success is False +class TestStdoutTextPopulation: + """Tests for stdout_text field population in AgentResult.""" + + @patch(MOCK_POPEN) + def test_streaming_populates_stdout_text(self, mock_popen): + mock_popen.return_value = make_mock_popen( + stdout_lines='{"type": "status", "msg": "working"}\nplain line\n', + returncode=0, + ) + result = _run_agent_streaming( + ["claude", "-p"], "prompt", timeout=None, log_path_dir=None, iteration=1, + ) + + assert result.stdout_text is not None + assert "working" in result.stdout_text + assert "plain line" in result.stdout_text + + @patch(MOCK_SUBPROCESS) + def test_blocking_populates_stdout_text_when_logging(self, mock_run, tmp_path): + mock_run.return_value = ok_result(stdout="agent output\n") + result = execute_agent( + ["echo"], "prompt", timeout=None, log_path_dir=tmp_path, iteration=1, + ) + + assert result.stdout_text == "agent output\n" + + @patch(MOCK_SUBPROCESS) + def test_blocking_stdout_text_none_without_logging(self, mock_run): + mock_run.return_value = ok_result() + result = execute_agent( + ["echo"], "prompt", timeout=None, log_path_dir=None, iteration=1, + ) + + assert result.stdout_text is None + + class TestExecuteAgentDispatch: """Tests for execute_agent routing to streaming vs blocking mode.""" diff --git a/tests/test_engine.py b/tests/test_engine.py index 8d98393..d877c0f 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -1006,6 +1006,23 @@ def test_max_idle_stops_loop(self, mock_agent, tmp_path): stop = events_of_type(events, EventType.RUN_STOPPED)[0] assert stop.data["reason"] == "max_idle" + @patch(MOCK_EXECUTE_AGENT, return_value=_idle_agent_result( + result_text="no marker here", stdout_text=IDLE_STATE_MARKER, + )) + def test_idle_detected_via_stdout_fallback(self, mock_agent, tmp_path): + """When result_text lacks the marker but stdout_text contains it, + idle is still detected.""" + config = make_config(tmp_path, max_iterations=1, idle=IdleConfig()) + state = make_state() + q = QueueEmitter() + + run_loop(config, state, q) + + events = drain_events(q) + types = event_types(events) + assert EventType.ITERATION_IDLE in types + assert EventType.ITERATION_COMPLETED not in types + @pytest.mark.parametrize("agent_result, expected_type", [ (AgentResult(returncode=0, elapsed=1.0, result_text=None), EventType.ITERATION_COMPLETED), (AgentResult(returncode=1, elapsed=1.0, result_text=IDLE_STATE_MARKER), EventType.ITERATION_FAILED), From f39da3b6313adae53a72bdd50db881b1e696939a Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 17:16:40 +0100 Subject: [PATCH 21/23] feat: add DELAY_STARTED/DELAY_ENDED events for live countdown support Replace the static log_info("Waiting...") message with structured DELAY_STARTED and DELAY_ENDED events so the console emitter can render a live countdown timer. Co-authored-by: Ralphify --- src/ralphify/_events.py | 14 ++++++++++++++ src/ralphify/engine.py | 5 ++++- tests/test_engine.py | 7 ++++--- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/ralphify/_events.py b/src/ralphify/_events.py index 173e5e1..95c6baf 100644 --- a/src/ralphify/_events.py +++ b/src/ralphify/_events.py @@ -76,6 +76,10 @@ class EventType(Enum): # ── Agent activity (live streaming) ───────────────────────── AGENT_ACTIVITY = "agent_activity" + # ── Delay ───────────────────────────────────────────────────── + DELAY_STARTED = "delay_started" + DELAY_ENDED = "delay_ended" + # ── Other ─────────────────────────────────────────────────── LOG_MESSAGE = "log_message" @@ -133,6 +137,14 @@ class AgentActivityData(TypedDict): iteration: int +class DelayStartedData(TypedDict): + delay: float + + +class DelayEndedData(TypedDict): + pass + + class LogMessageData(TypedDict): message: str level: LogLevel @@ -148,6 +160,8 @@ class LogMessageData(TypedDict): | CommandsCompletedData | PromptAssembledData | AgentActivityData + | DelayStartedData + | DelayEndedData | LogMessageData ) """Union of all typed event data payloads.""" diff --git a/src/ralphify/engine.py b/src/ralphify/engine.py index c85c42c..b028265 100644 --- a/src/ralphify/engine.py +++ b/src/ralphify/engine.py @@ -21,6 +21,8 @@ BoundEmitter, CommandsCompletedData, CommandsStartedData, + DelayEndedData, + DelayStartedData, EventEmitter, EventType, IterationEndedData, @@ -276,12 +278,13 @@ def _delay_if_needed(config: RunConfig, state: RunState, emit: BoundEmitter) -> if delay > 0 and ( config.max_iterations is None or state.iteration < config.max_iterations ): - emit.log_info(f"Waiting {delay}s...") + emit(EventType.DELAY_STARTED, DelayStartedData(delay=delay)) remaining = delay while remaining > 0 and not state.stop_requested: chunk = min(remaining, _PAUSE_POLL_INTERVAL) time.sleep(chunk) remaining -= chunk + emit(EventType.DELAY_ENDED, DelayEndedData()) def run_loop( diff --git a/tests/test_engine.py b/tests/test_engine.py index d877c0f..1eb6d49 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -535,9 +535,10 @@ def test_delay_sleeps_between_iterations(self, tmp_path): assert elapsed >= 0.1 events = drain_events(q) - assert len(events) == 1 - assert events[0].type == EventType.LOG_MESSAGE - assert "Waiting" in events[0].data["message"] + assert len(events) == 2 + assert events[0].type == EventType.DELAY_STARTED + assert events[0].data["delay"] == 0.15 + assert events[1].type == EventType.DELAY_ENDED def test_no_delay_on_last_iteration(self, tmp_path): config = make_config(tmp_path, delay=0.5, max_iterations=3) From 0de612f0e8a4204061c10085a65ad9b59645f1df Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 17:19:31 +0100 Subject: [PATCH 22/23] feat: add live delay countdown renderable and handlers to console emitter Add _DelayCountdown renderable that shows a ticking countdown for inter-iteration delays, and wire up DELAY_STARTED/DELAY_ENDED event handlers in ConsoleEmitter. Co-authored-by: Ralphify --- src/ralphify/_console_emitter.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/ralphify/_console_emitter.py b/src/ralphify/_console_emitter.py index 3e5efd5..7c81b01 100644 --- a/src/ralphify/_console_emitter.py +++ b/src/ralphify/_console_emitter.py @@ -22,6 +22,8 @@ STOP_COMPLETED, STOP_MAX_IDLE, CommandsCompletedData, + DelayEndedData, + DelayStartedData, Event, EventType, IterationEndedData, @@ -56,6 +58,20 @@ def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderR yield text +class _DelayCountdown: + """Rich renderable that shows a countdown timer for inter-iteration delays.""" + + def __init__(self, total: float) -> None: + self._total = total + self._start = time.monotonic() + + def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult: + elapsed = time.monotonic() - self._start + remaining = max(0.0, self._total - elapsed) + text = Text(f" Waiting {format_duration(remaining)}…", style="dim") + yield text + + class ConsoleEmitter: """Renders engine events to the Rich console.""" @@ -70,6 +86,8 @@ def __init__(self, console: Console) -> None: EventType.ITERATION_TIMED_OUT: partial(self._on_iteration_ended, color="yellow", icon=_ICON_TIMEOUT), EventType.ITERATION_IDLE: partial(self._on_iteration_ended, color="dim", icon=_ICON_IDLE), EventType.COMMANDS_COMPLETED: self._on_commands_completed, + EventType.DELAY_STARTED: self._on_delay_started, + EventType.DELAY_ENDED: self._on_delay_ended, EventType.LOG_MESSAGE: self._on_log_message, EventType.RUN_STOPPED: self._on_run_stopped, } @@ -133,6 +151,19 @@ def _on_commands_completed(self, data: CommandsCompletedData) -> None: if count: self._console.print(f" [bold]Commands:[/bold] {count} ran") + def _on_delay_started(self, data: DelayStartedData) -> None: + countdown = _DelayCountdown(data["delay"]) + self._live = Live( + countdown, + console=self._console, + transient=True, + refresh_per_second=_LIVE_REFRESH_RATE, + ) + self._live.start() + + def _on_delay_ended(self, data: DelayEndedData) -> None: + self._stop_live() + def _on_log_message(self, data: LogMessageData) -> None: msg = escape_markup(data["message"]) level = data["level"] From 6c155c322444b940b3c98eac4567a9f57c5ad29c Mon Sep 17 00:00:00 2001 From: malpou Date: Sun, 22 Mar 2026 17:21:54 +0100 Subject: [PATCH 23/23] test: add tests for delay countdown rendering and event handlers Co-authored-by: Ralphify --- tests/test_console_emitter.py | 39 ++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/tests/test_console_emitter.py b/tests/test_console_emitter.py index 7ac5d0d..b5de251 100644 --- a/tests/test_console_emitter.py +++ b/tests/test_console_emitter.py @@ -3,7 +3,7 @@ import pytest from rich.console import Console -from ralphify._console_emitter import ConsoleEmitter, _IterationSpinner +from ralphify._console_emitter import ConsoleEmitter, _DelayCountdown, _IterationSpinner from ralphify._events import Event, EventType @@ -330,6 +330,43 @@ def test_completed_all_succeeded(self): assert "timed out" not in output +class TestDelayCountdown: + def test_renders_remaining_time(self): + countdown = _DelayCountdown(10.0) + console = Console(record=True, width=80) + console.print(countdown) + output = console.export_text() + assert "Waiting" in output + # Should contain a duration string + assert "s" in output + + def test_delay_started_creates_live(self): + emitter, console = _capture_emitter() + assert emitter._live is None + emitter.emit(_make_event(EventType.DELAY_STARTED, delay=5.0)) + assert emitter._live is not None + emitter._stop_live() # clean up + + def test_delay_ended_stops_live(self): + emitter, console = _capture_emitter() + emitter.emit(_make_event(EventType.DELAY_STARTED, delay=5.0)) + assert emitter._live is not None + emitter.emit(_make_event(EventType.DELAY_ENDED)) + assert emitter._live is None + + def test_delay_lifecycle_full(self): + """DELAY_STARTED followed by DELAY_ENDED cleans up properly.""" + emitter, console = _capture_emitter() + emitter.emit(_make_event(EventType.DELAY_STARTED, delay=1.0)) + assert emitter._live is not None + emitter.emit(_make_event(EventType.DELAY_ENDED)) + assert emitter._live is None + # Should be safe to start another delay + emitter.emit(_make_event(EventType.DELAY_STARTED, delay=2.0)) + assert emitter._live is not None + emitter._stop_live() + + class TestIterationSpinner: def test_renders_elapsed_time(self): spinner = _IterationSpinner()