Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions cloudflare-gastown/src/db/tables/beads.table.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ export const BeadRecord = z.object({
}
})
.pipe(z.record(z.string(), z.any())), // z.any() needed for Rpc.Serializable compatibility
dispatch_attempts: z.number().int().default(0),
last_dispatch_attempt_at: z.string().nullable().default(null),
created_by: z.string().nullable(),
created_at: z.string(),
updated_at: z.string(),
Expand Down Expand Up @@ -129,10 +131,20 @@ export function createTableBeads(): string {
created_by: `text`,
created_at: `text not null`,
updated_at: `text not null`,
dispatch_attempts: `integer not null default 0`,
last_dispatch_attempt_at: `text`,
closed_at: `text`,
});
}

/** Idempotent ALTER statements for existing databases. */
export function migrateBeads(): string[] {
return [
`ALTER TABLE beads ADD COLUMN dispatch_attempts integer not null default 0`,
`ALTER TABLE beads ADD COLUMN last_dispatch_attempt_at text`,
];
}

export function getIndexesBeads(): string[] {
return [
`CREATE INDEX IF NOT EXISTS idx_beads_type_status ON ${beads}(${beads.columns.type}, ${beads.columns.status})`,
Expand Down
19 changes: 19 additions & 0 deletions cloudflare-gastown/src/dos/Town.do.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1555,6 +1555,25 @@ export class TownDO extends DurableObject<Env> {
switch (action) {
case 'RESTART':
case 'RESTART_WITH_BACKOFF': {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: RESTART_WITH_BACKOFF no longer applies the new backoff policy

The reconciler now gates redispatch off beads.last_dispatch_attempt_at, but this restart path still only changes agent state before handing control back to the scheduler. If the bead's last dispatch timestamp is already stale, the next reconcile can redispatch immediately, so the manual restart bypasses the intended cooldown.

// Fix 4 (#1653): if the hooked bead has exhausted its dispatch
// attempts, fail it instead of restarting — prevents infinite loops.
const restartBeadId = snapshotHookedBeadId ?? targetAgent?.current_hook_bead_id;
if (restartBeadId) {
const restartBead = beadOps.getBead(this.sql, restartBeadId);
if (restartBead && restartBead.dispatch_attempts >= scheduling.MAX_DISPATCH_ATTEMPTS) {
beadOps.updateBeadStatus(this.sql, restartBeadId, 'failed', input.agent_id);
if (targetAgent?.current_hook_bead_id === restartBeadId) {
if (targetAgent.status === 'working' || targetAgent.status === 'stalled') {
dispatch
.stopAgentInContainer(this.env, this.townId, targetAgentId)
.catch(() => {});
}
agents.unhookBead(this.sql, targetAgentId);
}
break;
}
}

// Stop the agent in the container, reset to idle so the
// scheduler picks it up again on the next alarm cycle.
if (targetAgent?.status === 'working' || targetAgent?.status === 'stalled') {
Expand Down
13 changes: 13 additions & 0 deletions cloudflare-gastown/src/dos/town/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,19 @@ export function applyAction(ctx: ApplyActionContext, action: Action): (() => Pro
`,
[agentId]
);
// Track dispatch attempts on the bead itself (not just the agent).
// The bead counter is never reset by hookBead, preventing the
// infinite retry loop (#1653).
query(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: Dispatch attempts are counted twice per dispatch

dispatch_agent already hands off to ctx.dispatchAgent(), and Town.do routes that into scheduling.dispatchAgent(), which increments the bead counter again. With this extra update, every dispatch consumes two attempts, so the new 5-attempt cap and the town-level breaker will fire roughly twice as early as intended.

sql,
/* sql */ `
UPDATE ${beads}
SET ${beads.columns.dispatch_attempts} = ${beads.columns.dispatch_attempts} + 1,
${beads.columns.last_dispatch_attempt_at} = ?
WHERE ${beads.bead_id} = ?
`,
[now(), beadId]
);
beadOps.updateBeadStatus(sql, beadId, 'in_progress', agentId);

const capturedAgentId = agentId;
Expand Down
5 changes: 4 additions & 1 deletion cloudflare-gastown/src/dos/town/agents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,13 +282,16 @@ export function hookBead(sql: SqlStorage, agentId: string, beadId: string): void
unhookBead(sql, stale.bead_id);
}

// Do NOT reset dispatch_attempts here — per-bead dispatch tracking
// lives on the beads table now (beads.dispatch_attempts). Resetting
// the agent counter on every hook was the root cause of the infinite
// retry loop (#1653).
query(
sql,
/* sql */ `
UPDATE ${agent_metadata}
SET ${agent_metadata.columns.current_hook_bead_id} = ?,
${agent_metadata.columns.status} = 'idle',
${agent_metadata.columns.dispatch_attempts} = 0,
${agent_metadata.columns.last_activity_at} = ?,
${agent_metadata.columns.agent_status_message} = NULL,
${agent_metadata.columns.agent_status_updated_at} = NULL
Expand Down
10 changes: 8 additions & 2 deletions cloudflare-gastown/src/dos/town/beads.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@
*/

import { z } from 'zod';
import { beads, BeadRecord, createTableBeads, getIndexesBeads } from '../../db/tables/beads.table';
import {
beads,
BeadRecord,
createTableBeads,
getIndexesBeads,
migrateBeads,
} from '../../db/tables/beads.table';
import {
bead_events,
BeadEventRecord,
Expand Down Expand Up @@ -65,7 +71,7 @@ export function initBeadTables(sql: SqlStorage): void {
dropCheckConstraints(sql);

// Migrations: add columns to existing tables (idempotent)
for (const stmt of [...migrateConvoyMetadata(), ...migrateAgentMetadata()]) {
for (const stmt of [...migrateBeads(), ...migrateConvoyMetadata(), ...migrateAgentMetadata()]) {
try {
query(sql, stmt, []);
} catch {
Expand Down
Loading
Loading