From 62749e431549c5c7abf05659a4b6e2881c85f9d0 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 31 Mar 2026 12:52:40 -0500 Subject: [PATCH 01/44] feat(admin): add Cloudflare dashboard links to town inspector (#1790) * feat(admin): add Cloudflare dashboard links to town inspector Add one-click links to Cloudflare dashboard pages (worker logs, container instance, TownDO logs, TownContainerDO logs) in the admin town inspector's Container tab for quick infrastructure debugging. - Add GET /api/towns/:townId/cloudflare-debug endpoint on gastown worker that returns DO IDs computed from idFromName(townId) - Add getCloudflareLinks tRPC procedure in admin gastown router that constructs CF dashboard URLs from DO IDs and env vars - Add CLOUDFLARE_ACCOUNT_ID, CLOUDFLARE_TOWN_DO_NAMESPACE_ID, and CLOUDFLARE_CONTAINER_DO_NAMESPACE_ID env vars to config.server.ts - Add Cloudflare Dashboard card with CFLink component to ContainerTab.tsx with tooltip explanations for disabled links Closes #1516 * fix(admin): gate container link on actual runtime state The cloudflare-debug endpoint was returning containerDoId via idFromName(townId), a deterministic value that always exists regardless of whether the container is running. This meant the UI could never reach the intended 'Container not running' disabled state. Now the endpoint calls getState() on TownContainerDO to check if the container is actually running/healthy before returning the DO ID. When stopped, containerDoId is null, so containerInstanceUrl and containerDoLogsUrl correctly degrade to null in the UI. --- cloudflare-gastown/src/gastown.worker.ts | 26 +++++++ .../gastown/towns/[townId]/ContainerTab.tsx | 75 +++++++++++++++++++ src/lib/config.server.ts | 7 ++ src/routers/admin/gastown-router.ts | 56 ++++++++++++++ 4 files changed, 164 insertions(+) diff --git a/cloudflare-gastown/src/gastown.worker.ts b/cloudflare-gastown/src/gastown.worker.ts index 32d676e0a..58239fb71 100644 --- a/cloudflare-gastown/src/gastown.worker.ts +++ b/cloudflare-gastown/src/gastown.worker.ts @@ -604,6 +604,32 @@ app.patch('/api/towns/:townId/config', c => instrumented(c, 'PATCH /api/towns/:townId/config', () => handleUpdateTownConfig(c, c.req.param())) ); +// ── Cloudflare Debug ──────────────────────────────────────────────── +// Returns DO IDs and namespace IDs for constructing Cloudflare dashboard URLs. +// containerDoId is only returned when the container is actually running, +// so the UI correctly shows a disabled state when the container is stopped. + +app.get('/api/towns/:townId/cloudflare-debug', async c => { + const townId = c.req.param('townId'); + const townDoId = c.env.TOWN.idFromName(townId).toString(); + + // Check actual container runtime state before returning the DO ID. + // idFromName() is deterministic and always returns an ID even when + // no container instance is running — we need to gate on getState(). + const containerStub = getTownContainerStub(c.env, townId); + const containerState = await containerStub.getState(); + const containerRunning = + containerState.status === 'running' || containerState.status === 'healthy'; + const containerDoId = containerRunning + ? c.env.TOWN_CONTAINER.idFromName(townId).toString() + : null; + + return c.json({ + success: true, + data: { townDoId, containerDoId }, + }); +}); + // ── Town Events ───────────────────────────────────────────────────────── app.use('/api/users/:userId/towns/:townId/events', async (c: Context, next) => diff --git a/src/app/admin/gastown/towns/[townId]/ContainerTab.tsx b/src/app/admin/gastown/towns/[townId]/ContainerTab.tsx index 6b7532b72..f6c7e6402 100644 --- a/src/app/admin/gastown/towns/[townId]/ContainerTab.tsx +++ b/src/app/admin/gastown/towns/[townId]/ContainerTab.tsx @@ -15,8 +15,43 @@ import { DialogTitle, } from '@/components/ui/dialog'; import { Badge } from '@/components/ui/badge'; +import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'; +import { ExternalLink } from 'lucide-react'; import { formatDistanceToNow } from 'date-fns'; +function CFLink({ + href, + label, + disabledTooltip, +}: { + href: string | null | undefined; + label: string; + disabledTooltip?: string; +}) { + if (!href) { + return ( + + + + + {disabledTooltip && {disabledTooltip}} + + ); + } + + return ( + + ); +} + export function ContainerTab({ townId }: { townId: string }) { const trpc = useTRPC(); @@ -25,6 +60,7 @@ export function ContainerTab({ townId }: { townId: string }) { const healthQuery = useQuery(trpc.admin.gastown.getTownHealth.queryOptions({ townId })); const eventsQuery = useQuery(trpc.admin.gastown.listContainerEvents.queryOptions({ townId })); const configQuery = useQuery(trpc.admin.gastown.getTownConfig.queryOptions({ townId })); + const cfLinksQuery = useQuery(trpc.admin.gastown.getCloudflareLinks.queryOptions({ townId })); const forceRestartMutation = useMutation( trpc.admin.gastown.forceRestartContainer.mutationOptions({ @@ -63,8 +99,47 @@ export function ContainerTab({ townId }: { townId: string }) { ? 'bg-red-500/10 text-red-400 border-red-500/20' : 'bg-gray-500/10 text-gray-400 border-gray-500/20'; + const cfLinks = cfLinksQuery.data; + return (
+ {/* Cloudflare Dashboard */} + + + Cloudflare Dashboard + + + {cfLinksQuery.isLoading && ( +

Loading links…

+ )} + {cfLinksQuery.isError && ( +

+ Failed to load Cloudflare links: {cfLinksQuery.error.message} +

+ )} + {!cfLinksQuery.isLoading && !cfLinksQuery.isError && ( +
+ + + + +
+ )} +
+
+ {/* Health & Actions */} diff --git a/src/lib/config.server.ts b/src/lib/config.server.ts index 20db96d6a..dfd78d00e 100644 --- a/src/lib/config.server.ts +++ b/src/lib/config.server.ts @@ -176,6 +176,13 @@ if (process.env.NODE_ENV === 'production') { } } +// Cloudflare dashboard link construction (admin town inspector) +export const CLOUDFLARE_ACCOUNT_ID = getEnvVariable('CLOUDFLARE_ACCOUNT_ID'); +export const CLOUDFLARE_TOWN_DO_NAMESPACE_ID = getEnvVariable('CLOUDFLARE_TOWN_DO_NAMESPACE_ID'); +export const CLOUDFLARE_CONTAINER_DO_NAMESPACE_ID = getEnvVariable( + 'CLOUDFLARE_CONTAINER_DO_NAMESPACE_ID' +); + // KiloClaw Worker export const KILOCLAW_API_URL = getEnvVariable('KILOCLAW_API_URL') || ''; export const KILOCLAW_INTERNAL_API_SECRET = getEnvVariable('KILOCLAW_INTERNAL_API_SECRET') || ''; diff --git a/src/routers/admin/gastown-router.ts b/src/routers/admin/gastown-router.ts index c3dc6925d..3e8284ad5 100644 --- a/src/routers/admin/gastown-router.ts +++ b/src/routers/admin/gastown-router.ts @@ -7,6 +7,9 @@ import { GASTOWN_SERVICE_URL, GASTOWN_CF_ACCESS_CLIENT_ID, GASTOWN_CF_ACCESS_CLIENT_SECRET, + CLOUDFLARE_ACCOUNT_ID, + CLOUDFLARE_TOWN_DO_NAMESPACE_ID, + CLOUDFLARE_CONTAINER_DO_NAMESPACE_ID, } from '@/lib/config.server'; import { generateApiToken } from '@/lib/tokens'; import type { User } from '@kilocode/db/schema'; @@ -429,6 +432,59 @@ export const adminGastownRouter = createTRPCRouter({ ); }), + /** + * Get Cloudflare dashboard links for a town. + * Fetches DO IDs from the gastown worker and constructs CF dashboard URLs. + * Gracefully degrades when env vars are not configured. + */ + getCloudflareLinks: adminProcedure + .input(z.object({ townId: z.string().uuid() })) + .output( + z.object({ + workerLogsUrl: z.string(), + containerInstanceUrl: z.string().nullable(), + townDoLogsUrl: z.string().nullable(), + containerDoLogsUrl: z.string().nullable(), + }) + ) + .query(async ({ input, ctx }) => { + const accountId = CLOUDFLARE_ACCOUNT_ID; + if (!accountId) { + return { + workerLogsUrl: + 'https://dash.cloudflare.com/workers/services/view/gastown/production/logs/live', + containerInstanceUrl: null, + townDoLogsUrl: null, + containerDoLogsUrl: null, + }; + } + + const debugInfo = await gastownGet( + ctx.user, + `/api/towns/${input.townId}/cloudflare-debug`, + z.object({ containerDoId: z.string().nullable(), townDoId: z.string() }) + ).catch(() => null); + + const townDoNamespaceId = CLOUDFLARE_TOWN_DO_NAMESPACE_ID; + const containerDoNamespaceId = CLOUDFLARE_CONTAINER_DO_NAMESPACE_ID; + + return { + workerLogsUrl: `https://dash.cloudflare.com/${accountId}/workers/services/view/gastown/production/logs/live`, + // containerDoId is only non-null when the container is actually running + containerInstanceUrl: debugInfo?.containerDoId + ? `https://dash.cloudflare.com/${accountId}/workers/containers/app-gastown/instances/${debugInfo.containerDoId}` + : null, + townDoLogsUrl: + townDoNamespaceId && debugInfo + ? `https://dash.cloudflare.com/${accountId}/workers/durable-objects/view/${townDoNamespaceId}/${debugInfo.townDoId}/logs` + : null, + containerDoLogsUrl: + containerDoNamespaceId && debugInfo?.containerDoId + ? `https://dash.cloudflare.com/${accountId}/workers/durable-objects/view/${containerDoNamespaceId}/${debugInfo.containerDoId}/logs` + : null, + }; + }), + /** * List all beads in a town, with optional filters. * The user-facing tRPC listBeads requires a rigId and verifies ownership. From 9c13a62b3e37faca0542ce874235abf22a26a4cd Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Tue, 31 Mar 2026 13:24:41 -0500 Subject: [PATCH 02/44] fix(gastown): add dispatch circuit breaker to prevent infinite retry loops (#1788) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(gastown): dispatch circuit breaker — per-bead attempt cap, exponential backoff, town-level breaker, error logging Fixes #1653 — No circuit breaker on dispatch failures causes infinite retry loops. Fix 1: Track dispatch_attempts on the bead itself (not just the agent). - Add dispatch_attempts + last_dispatch_attempt_at columns to beads table - Remove dispatch_attempts=0 reset from hookBead (root cause of infinite loop) - Increment bead counter in both actions.ts and scheduling.ts dispatch paths - Reconciler Rules 1/2/6 now check bead.dispatch_attempts for the cap - Lower MAX_DISPATCH_ATTEMPTS from 20 to 5 Fix 1b: Exponential backoff using bead.last_dispatch_attempt_at: - attempts 1-2: 2 min, attempt 3: 5 min, attempt 4: 10 min, attempt 5+: 30 min Fix 2: Town-level circuit breaker in reconciler. - Count beads with recent dispatch failures (dispatch_attempts > 0, open/failed, last_dispatch_attempt_at within 30 min window) - If >= 20, suppress all dispatch_agent actions and notify mayor - Applied to reconcileBeads and reconcileReviewQueue Fix 3: Add error reason to agent.dispatch_failed analytics events. - 'container returned false' for started===false path - err.message for catch path Fix 4: Beads at max dispatch attempts stay failed. - Rule 3 (stale in_progress reset): fail instead of reopen if at cap - Triage RESTART: fail hooked bead if at dispatch cap instead of re-dispatch - Triage REASSIGN_BEAD: fail bead instead of reopening if at cap * fix(gastown): address PR #1788 review comments - Remove double dispatch_attempts increment in actions.ts applyAction; scheduling.dispatchAgent is the single source of truth for both agent_metadata and bead counters. - Fix circuit breaker query to use NOT IN ('closed') instead of IN ('open', 'failed'), capturing in_progress beads from failed dispatches that never rolled back status. - Stamp beads.last_dispatch_attempt_at in RESTART_WITH_BACKOFF triage so the reconciler's exponential backoff gate fires correctly. - Add reason field to writeEvent() blobs serialization so agent.dispatch_failed events actually write the failure reason to Analytics Engine. * fix(gastown): circuit breaker only counts failed beads, fix MR max-attempts ordering - Circuit breaker: narrow query to only count beads with status='failed' or dispatch_attempts >= MAX_DISPATCH_ATTEMPTS. Previously counted all non-closed beads with recent dispatch attempts, which meant healthy in-progress beads tripped the breaker under normal concurrent load. - MR Rule 6: move max-attempts check before cooldown check so MR beads that exhausted all dispatch attempts are immediately failed rather than waiting up to 30 min for the cooldown to expire. Mirrors the ordering used in the issue-bead paths. * fix(gastown): circuit breaker predicate counts active retry loops and excludes closed beads The previous predicate (status='failed' OR dispatch_attempts>=MAX) had two bugs: 1. Under-counted: beads left in_progress after failed dispatch were missed 2. Over-counted: beads that succeeded on final attempt were still counted New predicate uses dispatch_attempts>0 AND status!='closed' to capture all beads in active retry loops while excluding successfully completed work. --- .../src/db/tables/beads.table.ts | 11 + cloudflare-gastown/src/dos/Town.do.ts | 70 ++++- cloudflare-gastown/src/dos/town/actions.ts | 13 +- cloudflare-gastown/src/dos/town/agents.ts | 1 - cloudflare-gastown/src/dos/town/beads.ts | 10 +- cloudflare-gastown/src/dos/town/reconciler.ts | 243 ++++++++++++++---- cloudflare-gastown/src/dos/town/scheduling.ts | 16 +- cloudflare-gastown/src/util/analytics.util.ts | 2 + 8 files changed, 287 insertions(+), 79 deletions(-) diff --git a/cloudflare-gastown/src/db/tables/beads.table.ts b/cloudflare-gastown/src/db/tables/beads.table.ts index e629017a1..2c38f8603 100644 --- a/cloudflare-gastown/src/db/tables/beads.table.ts +++ b/cloudflare-gastown/src/db/tables/beads.table.ts @@ -50,6 +50,8 @@ export const BeadRecord = z.object({ } }) .pipe(z.record(z.string(), z.any())), // z.any() needed for Rpc.Serializable compatibility + dispatch_attempts: z.number(), + last_dispatch_attempt_at: z.string().nullable(), created_by: z.string().nullable(), created_at: z.string(), updated_at: z.string(), @@ -126,6 +128,8 @@ export function createTableBeads(): string { priority: `text default 'medium'`, labels: `text default '[]'`, metadata: `text default '{}'`, + dispatch_attempts: `integer not null default 0`, + last_dispatch_attempt_at: `text`, created_by: `text`, created_at: `text not null`, updated_at: `text not null`, @@ -133,6 +137,13 @@ export function createTableBeads(): string { }); } +export function migrateBeads(): string[] { + return [ + `ALTER TABLE beads ADD COLUMN dispatch_attempts integer not null default 0`, + `ALTER TABLE beads ADD COLUMN last_dispatch_attempt_at text`, + ]; +} + export function getIndexesBeads(): string[] { return [ `CREATE INDEX IF NOT EXISTS idx_beads_type_status ON ${beads}(${beads.columns.type}, ${beads.columns.status})`, diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index b3eeaa158..1933b3af8 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -1561,6 +1561,22 @@ export class TownDO extends DurableObject { dispatch.stopAgentInContainer(this.env, this.townId, targetAgentId).catch(() => {}); } if (targetAgent) { + // Check if the hooked bead has exhausted its dispatch cap. + // If so, fail it immediately instead of letting the reconciler + // re-dispatch indefinitely (#1653). + if (targetAgent.current_hook_bead_id) { + const hookedBead = beadOps.getBead(this.sql, targetAgent.current_hook_bead_id); + if (hookedBead && hookedBead.dispatch_attempts >= scheduling.MAX_DISPATCH_ATTEMPTS) { + beadOps.updateBeadStatus( + this.sql, + targetAgent.current_hook_bead_id, + 'failed', + 'system' + ); + agents.unhookBead(this.sql, targetAgentId); + break; + } + } // RESTART clears last_activity_at so the scheduler picks it // up immediately. RESTART_WITH_BACKOFF sets it to now() so // the dispatch cooldown (DISPATCH_COOLDOWN_MS) delays the @@ -1576,6 +1592,21 @@ export class TownDO extends DurableObject { `, [activityAt, targetAgentId] ); + // Also stamp the bead's last_dispatch_attempt_at so the + // reconciler's exponential backoff gate fires correctly. + // Without this, the backoff variant allows immediate + // redispatch once last_dispatch_attempt_at ages out. + if (action === 'RESTART_WITH_BACKOFF' && targetAgent.current_hook_bead_id) { + query( + this.sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.last_dispatch_attempt_at} = ? + WHERE ${beads.bead_id} = ? + `, + [now(), targetAgent.current_hook_bead_id] + ); + } } break; } @@ -1643,20 +1674,31 @@ export class TownDO extends DurableObject { } agents.unhookBead(this.sql, targetAgentId); } - // Reset the bead to open so the scheduler can re-assign it - query( - this.sql, - /* sql */ ` - UPDATE ${beads} - SET ${beads.columns.assignee_agent_bead_id} = NULL, - ${beads.columns.status} = 'open', - ${beads.columns.updated_at} = ? - WHERE ${beads.bead_id} = ? - AND ${beads.status} != 'closed' - AND ${beads.status} != 'failed' - `, - [now(), beadToReassign] - ); + // Check the bead's dispatch_attempts before resetting to open. + // If the bead exhausted its dispatch cap, fail it instead of + // re-entering the infinite retry loop (#1653). + const reassignBead = beadOps.getBead(this.sql, beadToReassign); + if ( + reassignBead && + reassignBead.dispatch_attempts >= scheduling.MAX_DISPATCH_ATTEMPTS + ) { + beadOps.updateBeadStatus(this.sql, beadToReassign, 'failed', input.agent_id); + } else { + // Reset the bead to open so the scheduler can re-assign it + query( + this.sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.assignee_agent_bead_id} = NULL, + ${beads.columns.status} = 'open', + ${beads.columns.updated_at} = ? + WHERE ${beads.bead_id} = ? + AND ${beads.status} != 'closed' + AND ${beads.status} != 'failed' + `, + [now(), beadToReassign] + ); + } } break; } diff --git a/cloudflare-gastown/src/dos/town/actions.ts b/cloudflare-gastown/src/dos/town/actions.ts index 5f799ec90..bc89c8347 100644 --- a/cloudflare-gastown/src/dos/town/actions.ts +++ b/cloudflare-gastown/src/dos/town/actions.ts @@ -506,17 +506,10 @@ export function applyAction(ctx: ApplyActionContext, action: Action): (() => Pro } } - // Set agent to working and bead to in_progress synchronously + // Set agent to working and bead to in_progress synchronously. + // dispatch_attempts are NOT incremented here — scheduling.dispatchAgent() + // is the single source of truth for both agent_metadata and bead counters. agentOps.updateAgentStatus(sql, agentId, 'working'); - query( - sql, - /* sql */ ` - UPDATE ${agent_metadata} - SET ${agent_metadata.columns.dispatch_attempts} = ${agent_metadata.columns.dispatch_attempts} + 1 - WHERE ${agent_metadata.bead_id} = ? - `, - [agentId] - ); beadOps.updateBeadStatus(sql, beadId, 'in_progress', agentId); const capturedAgentId = agentId; diff --git a/cloudflare-gastown/src/dos/town/agents.ts b/cloudflare-gastown/src/dos/town/agents.ts index c208a3834..c70d38350 100644 --- a/cloudflare-gastown/src/dos/town/agents.ts +++ b/cloudflare-gastown/src/dos/town/agents.ts @@ -288,7 +288,6 @@ export function hookBead(sql: SqlStorage, agentId: string, beadId: string): void UPDATE ${agent_metadata} SET ${agent_metadata.columns.current_hook_bead_id} = ?, ${agent_metadata.columns.status} = 'idle', - ${agent_metadata.columns.dispatch_attempts} = 0, ${agent_metadata.columns.last_activity_at} = ?, ${agent_metadata.columns.agent_status_message} = NULL, ${agent_metadata.columns.agent_status_updated_at} = NULL diff --git a/cloudflare-gastown/src/dos/town/beads.ts b/cloudflare-gastown/src/dos/town/beads.ts index 5fd9463fa..85a9e8a6b 100644 --- a/cloudflare-gastown/src/dos/town/beads.ts +++ b/cloudflare-gastown/src/dos/town/beads.ts @@ -4,7 +4,13 @@ */ import { z } from 'zod'; -import { beads, BeadRecord, createTableBeads, getIndexesBeads } from '../../db/tables/beads.table'; +import { + beads, + BeadRecord, + createTableBeads, + getIndexesBeads, + migrateBeads, +} from '../../db/tables/beads.table'; import { bead_events, BeadEventRecord, @@ -65,7 +71,7 @@ export function initBeadTables(sql: SqlStorage): void { dropCheckConstraints(sql); // Migrations: add columns to existing tables (idempotent) - for (const stmt of [...migrateConvoyMetadata(), ...migrateAgentMetadata()]) { + for (const stmt of [...migrateBeads(), ...migrateConvoyMetadata(), ...migrateAgentMetadata()]) { try { query(sql, stmt, []); } catch { diff --git a/cloudflare-gastown/src/dos/town/reconciler.ts b/cloudflare-gastown/src/dos/town/reconciler.ts index e9fef8d21..bfcc26f00 100644 --- a/cloudflare-gastown/src/dos/town/reconciler.ts +++ b/cloudflare-gastown/src/dos/town/reconciler.ts @@ -35,6 +35,54 @@ import type { TownEventRecord } from '../../db/tables/town-events.table'; const LOG = '[reconciler]'; +// ── Circuit breaker ───────────────────────────────────────────────── + +/** Number of dispatch failures in a 30-min window to trip the town-level breaker. */ +const CIRCUIT_BREAKER_FAILURE_THRESHOLD = 20; +/** Window in minutes for counting dispatch failures. */ +const CIRCUIT_BREAKER_WINDOW_MINUTES = 30; + +/** + * Town-level dispatch circuit breaker. Counts beads with at least one + * dispatch attempt in the recent window that have not yet closed + * successfully. This captures beads in active retry loops (in_progress + * after a failed container start), beads that have been explicitly + * failed, and beads that exhausted all attempts — while excluding + * beads that eventually succeeded (status = 'closed'). + */ +function checkDispatchCircuitBreaker(sql: SqlStorage): Action[] { + const rows = z + .object({ failure_count: z.number() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT count(*) as failure_count + FROM ${beads} + WHERE ${beads.last_dispatch_attempt_at} > strftime('%Y-%m-%dT%H:%M:%fZ', 'now', '-${CIRCUIT_BREAKER_WINDOW_MINUTES} minutes') + AND ${beads.dispatch_attempts} > 0 + AND ${beads.status} != 'closed' + `, + [] + ), + ]); + + const failureCount = rows[0]?.failure_count ?? 0; + if (failureCount >= CIRCUIT_BREAKER_FAILURE_THRESHOLD) { + console.warn( + `${LOG} circuit breaker OPEN: ${failureCount} dispatch failures in last ${CIRCUIT_BREAKER_WINDOW_MINUTES}min (threshold=${CIRCUIT_BREAKER_FAILURE_THRESHOLD})` + ); + return [ + { + type: 'notify_mayor', + message: `Dispatch circuit breaker is OPEN: ${failureCount} dispatch failures in the last ${CIRCUIT_BREAKER_WINDOW_MINUTES} minutes. All dispatch actions are paused until failures clear.`, + }, + ]; + } + return []; +} + // ── Timeouts (from spec §7) ───────────────────────────────────────── /** Reset non-PR MR beads stuck in_progress with no working agent */ @@ -61,6 +109,21 @@ function staleMs(timestamp: string | null, thresholdMs: number): boolean { return Date.now() - new Date(timestamp).getTime() > thresholdMs; } +/** + * Compute the dispatch cooldown for a bead based on its attempt count. + * Implements exponential backoff: + * attempts 1-2: 2 min (DISPATCH_COOLDOWN_MS) + * attempt 3: 5 min + * attempt 4: 10 min + * attempt 5+: 30 min + */ +function getDispatchCooldownMs(dispatchAttempts: number): number { + if (dispatchAttempts <= 2) return DISPATCH_COOLDOWN_MS; // 2 min + if (dispatchAttempts === 3) return 5 * 60_000; // 5 min + if (dispatchAttempts === 4) return 10 * 60_000; // 10 min + return 30 * 60_000; // 30 min +} + // ── Row schemas for queries ───────────────────────────────────────── // Derived from table record schemas for traceability back to table defs. @@ -89,6 +152,8 @@ const BeadRow = BeadRecord.pick({ updated_at: true, labels: true, created_by: true, + dispatch_attempts: true, + last_dispatch_attempt_at: true, }); type BeadRow = z.infer; @@ -460,6 +525,11 @@ export function reconcileAgents(sql: SqlStorage): Action[] { export function reconcileBeads(sql: SqlStorage): Action[] { const actions: Action[] = []; + // Town-level circuit breaker: if too many dispatch failures in the + // window, skip all dispatch_agent actions and escalate to mayor. + const circuitBreakerActions = checkDispatchCircuitBreaker(sql); + const circuitBreakerOpen = circuitBreakerActions.length > 0; + // Rule 1: Open issue beads with no assignee, no blockers, not staged, not triage const unassigned = BeadRow.array().parse([ ...query( @@ -470,7 +540,9 @@ export function reconcileBeads(sql: SqlStorage): Action[] { b.${beads.columns.assignee_agent_bead_id}, b.${beads.columns.updated_at}, b.${beads.columns.labels}, - b.${beads.columns.created_by} + b.${beads.columns.created_by}, + b.${beads.columns.dispatch_attempts}, + b.${beads.columns.last_dispatch_attempt_at} FROM ${beads} b WHERE b.${beads.columns.type} = 'issue' AND b.${beads.columns.status} = 'open' @@ -498,9 +570,27 @@ export function reconcileBeads(sql: SqlStorage): Action[] { for (const bead of unassigned) { if (!bead.rig_id) continue; - // In shadow mode we can't call getOrCreateAgent, so we just note - // that a hook_agent + dispatch_agent is needed. - // The action includes rig_id so Phase 3's applyAction can resolve the agent. + + // Per-bead dispatch cap: fail the bead if it exhausted all attempts + if (bead.dispatch_attempts >= MAX_DISPATCH_ATTEMPTS) { + actions.push({ + type: 'transition_bead', + bead_id: bead.bead_id, + from: 'open', + to: 'failed', + reason: `max dispatch attempts exceeded (${bead.dispatch_attempts})`, + actor: 'system', + }); + continue; + } + + // Exponential backoff: skip if last dispatch attempt was too recent + const cooldownMs = getDispatchCooldownMs(bead.dispatch_attempts); + if (!staleMs(bead.last_dispatch_attempt_at, cooldownMs)) continue; + + // Town-level circuit breaker suppresses dispatch + if (circuitBreakerOpen) continue; + actions.push({ type: 'dispatch_agent', agent_id: '', // resolved at apply time @@ -532,36 +622,22 @@ export function reconcileBeads(sql: SqlStorage): Action[] { for (const agent of idleHooked) { if (!agent.current_hook_bead_id) continue; - // Check dispatch cooldown - if (!staleMs(agent.last_activity_at, DISPATCH_COOLDOWN_MS)) continue; - - // Check max dispatch attempts - if (agent.dispatch_attempts >= MAX_DISPATCH_ATTEMPTS) { - actions.push({ - type: 'transition_bead', - bead_id: agent.current_hook_bead_id, - from: null, - to: 'failed', - reason: 'max dispatch attempts exceeded', - actor: 'system', - }); - actions.push({ - type: 'unhook_agent', - agent_id: agent.bead_id, - reason: 'max dispatch attempts', - }); - continue; - } - - // Check if the hooked bead is open and unblocked + // Check if the hooked bead is open and unblocked, and read its + // dispatch_attempts for the per-bead circuit breaker. const hookedRows = z - .object({ status: z.string(), rig_id: z.string().nullable() }) + .object({ + status: z.string(), + rig_id: z.string().nullable(), + dispatch_attempts: z.number(), + last_dispatch_attempt_at: z.string().nullable(), + }) .array() .parse([ ...query( sql, /* sql */ ` - SELECT ${beads.status}, ${beads.rig_id} + SELECT ${beads.status}, ${beads.rig_id}, + ${beads.dispatch_attempts}, ${beads.last_dispatch_attempt_at} FROM ${beads} WHERE ${beads.bead_id} = ? `, @@ -573,6 +649,28 @@ export function reconcileBeads(sql: SqlStorage): Action[] { const hooked = hookedRows[0]; if (hooked.status !== 'open') continue; + // Per-bead dispatch cap (uses bead counter, not agent counter) + if (hooked.dispatch_attempts >= MAX_DISPATCH_ATTEMPTS) { + actions.push({ + type: 'transition_bead', + bead_id: agent.current_hook_bead_id, + from: null, + to: 'failed', + reason: `max dispatch attempts exceeded (${hooked.dispatch_attempts})`, + actor: 'system', + }); + actions.push({ + type: 'unhook_agent', + agent_id: agent.bead_id, + reason: 'max dispatch attempts', + }); + continue; + } + + // Exponential backoff using bead's last_dispatch_attempt_at + const cooldownMs = getDispatchCooldownMs(hooked.dispatch_attempts); + if (!staleMs(hooked.last_dispatch_attempt_at, cooldownMs)) continue; + // Check blockers const blockerCount = z .object({ cnt: z.number() }) @@ -594,6 +692,9 @@ export function reconcileBeads(sql: SqlStorage): Action[] { if (blockerCount[0]?.cnt > 0) continue; + // Town-level circuit breaker suppresses dispatch + if (circuitBreakerOpen) continue; + actions.push({ type: 'dispatch_agent', agent_id: agent.bead_id, @@ -612,7 +713,9 @@ export function reconcileBeads(sql: SqlStorage): Action[] { b.${beads.columns.assignee_agent_bead_id}, b.${beads.columns.updated_at}, b.${beads.columns.labels}, - b.${beads.columns.created_by} + b.${beads.columns.created_by}, + b.${beads.columns.dispatch_attempts}, + b.${beads.columns.last_dispatch_attempt_at} FROM ${beads} b WHERE b.${beads.columns.type} = 'issue' AND b.${beads.columns.status} = 'in_progress' @@ -649,6 +752,24 @@ export function reconcileBeads(sql: SqlStorage): Action[] { if (hookedAgent.length > 0) continue; + // If the bead has exhausted its dispatch attempts, fail it instead + // of resetting to open (which would cause an infinite retry loop). + if (bead.dispatch_attempts >= MAX_DISPATCH_ATTEMPTS) { + actions.push({ + type: 'transition_bead', + bead_id: bead.bead_id, + from: 'in_progress', + to: 'failed', + reason: `agent lost, max dispatch attempts exhausted (${bead.dispatch_attempts})`, + actor: 'system', + }); + actions.push({ + type: 'clear_bead_assignee', + bead_id: bead.bead_id, + }); + continue; + } + actions.push({ type: 'transition_bead', bead_id: bead.bead_id, @@ -735,6 +856,11 @@ export function reconcileBeads(sql: SqlStorage): Action[] { } } + // Emit circuit breaker notification (once per reconcile pass) + if (circuitBreakerOpen) { + actions.push(...circuitBreakerActions); + } + return actions; } @@ -746,6 +872,9 @@ export function reconcileBeads(sql: SqlStorage): Action[] { export function reconcileReviewQueue(sql: SqlStorage): Action[] { const actions: Action[] = []; + // Town-level circuit breaker + const circuitBreakerOpen = checkDispatchCircuitBreaker(sql).length > 0; + // Get all MR beads that need attention const mrBeads = MrBeadRow.array().parse([ ...query( @@ -933,6 +1062,9 @@ export function reconcileReviewQueue(sql: SqlStorage): Action[] { if (oldestMr.length === 0) continue; + // Town-level circuit breaker suppresses dispatch + if (circuitBreakerOpen) continue; + // If no refinery exists or it's busy, emit a dispatch_agent with empty // agent_id — applyAction will create the refinery via getOrCreateAgent. if (refinery.length === 0) { @@ -1000,39 +1132,22 @@ export function reconcileReviewQueue(sql: SqlStorage): Action[] { for (const ref of idleRefineries) { if (!ref.current_hook_bead_id) continue; - // Cooldown: skip if last activity is too recent (#1342) - if (!staleMs(ref.last_activity_at, DISPATCH_COOLDOWN_MS)) continue; - - // Circuit-breaker: fail the MR bead after too many attempts (#1342) - if (ref.dispatch_attempts >= MAX_DISPATCH_ATTEMPTS) { - actions.push({ - type: 'transition_bead', - bead_id: ref.current_hook_bead_id, - from: null, - to: 'failed', - reason: 'refinery max dispatch attempts exceeded', - actor: 'system', - }); - actions.push({ - type: 'unhook_agent', - agent_id: ref.bead_id, - reason: 'max dispatch attempts', - }); - continue; - } - + // Read the bead's dispatch_attempts for the per-bead circuit breaker const mrRows = z .object({ status: z.string(), type: z.string(), rig_id: z.string().nullable(), + dispatch_attempts: z.number(), + last_dispatch_attempt_at: z.string().nullable(), }) .array() .parse([ ...query( sql, /* sql */ ` - SELECT ${beads.status}, ${beads.type}, ${beads.rig_id} + SELECT ${beads.status}, ${beads.type}, ${beads.rig_id}, + ${beads.dispatch_attempts}, ${beads.last_dispatch_attempt_at} FROM ${beads} WHERE ${beads.bead_id} = ? `, @@ -1044,6 +1159,32 @@ export function reconcileReviewQueue(sql: SqlStorage): Action[] { const mr = mrRows[0]; if (mr.type !== 'merge_request' || mr.status !== 'in_progress') continue; + // Per-bead dispatch cap — check before cooldown so max-attempt MR + // beads are failed immediately rather than waiting for the cooldown. + if (mr.dispatch_attempts >= MAX_DISPATCH_ATTEMPTS) { + actions.push({ + type: 'transition_bead', + bead_id: ref.current_hook_bead_id, + from: null, + to: 'failed', + reason: `refinery max dispatch attempts exceeded (${mr.dispatch_attempts})`, + actor: 'system', + }); + actions.push({ + type: 'unhook_agent', + agent_id: ref.bead_id, + reason: 'max dispatch attempts', + }); + continue; + } + + // Exponential backoff using bead's last_dispatch_attempt_at + const cooldownMs = getDispatchCooldownMs(mr.dispatch_attempts); + if (!staleMs(mr.last_dispatch_attempt_at, cooldownMs)) continue; + + // Town-level circuit breaker suppresses dispatch + if (circuitBreakerOpen) continue; + // Container status is checked at apply time (async). In shadow mode, // we just note that a dispatch is needed. actions.push({ diff --git a/cloudflare-gastown/src/dos/town/scheduling.ts b/cloudflare-gastown/src/dos/town/scheduling.ts index 2c48347b5..80dc7a1fb 100644 --- a/cloudflare-gastown/src/dos/town/scheduling.ts +++ b/cloudflare-gastown/src/dos/town/scheduling.ts @@ -23,7 +23,7 @@ const LOG = '[scheduling]'; // ── Constants ────────────────────────────────────────────────────────── export const DISPATCH_COOLDOWN_MS = 2 * 60_000; // 2 min -export const MAX_DISPATCH_ATTEMPTS = 20; +export const MAX_DISPATCH_ATTEMPTS = 5; // ── Context passed by the Town DO ────────────────────────────────────── @@ -109,6 +109,18 @@ export async function dispatchAgent( `, [timestamp, agent.id] ); + // Track dispatch attempts on the bead itself so the counter + // survives agent re-creation and hookBead cycles. + query( + ctx.sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.dispatch_attempts} = ${beads.columns.dispatch_attempts} + 1, + ${beads.columns.last_dispatch_attempt_at} = ? + WHERE ${beads.bead_id} = ? + `, + [timestamp, bead.bead_id] + ); const started = await dispatch.startAgentInContainer(ctx.env, ctx.storage, { townId: ctx.townId, @@ -170,6 +182,7 @@ export async function dispatchAgent( agentId: agent.id, beadId: bead.bead_id, role: agent.role, + reason: 'container returned false', }); } return started; @@ -199,6 +212,7 @@ export async function dispatchAgent( agentId: agent.id, beadId: bead.bead_id, role: agent.role, + reason: err instanceof Error ? err.message : String(err), }); return false; } diff --git a/cloudflare-gastown/src/util/analytics.util.ts b/cloudflare-gastown/src/util/analytics.util.ts index 99d1e54c0..af5676b32 100644 --- a/cloudflare-gastown/src/util/analytics.util.ts +++ b/cloudflare-gastown/src/util/analytics.util.ts @@ -38,6 +38,7 @@ export type GastownEventData = { beadId?: string; convoyId?: string; role?: string; // 'polecat' | 'refinery' | 'mayor' + reason?: string; // dispatch failure reason, triage action, etc. beadType?: string; durationMs?: number; value?: number; @@ -79,6 +80,7 @@ export function writeEvent( data.convoyId ?? '', // blob11 data.role ?? '', // blob12 data.beadType ?? '', // blob13 + data.reason ?? '', // blob14 ], doubles: [ data.durationMs ?? 0, // double1 From 223731e2f7b0dada717d40f4b41a603c29386e97 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 19 Mar 2026 02:53:04 +0000 Subject: [PATCH 03/44] feat(beads): add FailureReason type and extend updateBeadStatus() to persist failure reasons --- cloudflare-gastown/src/dos/town/beads.ts | 5 ++++- cloudflare-gastown/src/dos/town/types.ts | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 cloudflare-gastown/src/dos/town/types.ts diff --git a/cloudflare-gastown/src/dos/town/beads.ts b/cloudflare-gastown/src/dos/town/beads.ts index 85a9e8a6b..82f1951a2 100644 --- a/cloudflare-gastown/src/dos/town/beads.ts +++ b/cloudflare-gastown/src/dos/town/beads.ts @@ -47,6 +47,7 @@ import type { BeadType, } from '../../types'; import type { BeadEventType } from '../../db/tables/bead-events.table'; +import type { FailureReason } from './types'; function generateId(): string { return crypto.randomUUID(); @@ -256,7 +257,8 @@ export function updateBeadStatus( sql: SqlStorage, beadId: string, status: string, - agentId: string | null + agentId: string | null, + failureReason?: FailureReason ): Bead { const bead = getBead(sql, beadId); if (!bead) throw new Error(`Bead ${beadId} not found`); @@ -297,6 +299,7 @@ export function updateBeadStatus( eventType: 'status_changed', oldValue: oldStatus, newValue: status, + metadata: failureReason && status === 'failed' ? { failure_reason: failureReason } : {}, }); // If the bead reached a terminal status and is tracked by a convoy, diff --git a/cloudflare-gastown/src/dos/town/types.ts b/cloudflare-gastown/src/dos/town/types.ts new file mode 100644 index 000000000..2120be142 --- /dev/null +++ b/cloudflare-gastown/src/dos/town/types.ts @@ -0,0 +1,14 @@ +/** + * Shared types for Town DO operations. + */ + +export type FailureReason = { + /** Machine-readable failure code. */ + code: string; + /** Human-readable summary of what went wrong. */ + message: string; + /** Optional detail: stack trace, error output, container logs, etc. */ + details?: string; + /** What triggered the failure: 'scheduler' | 'patrol' | 'refinery' | 'triage' | 'admin' | 'container' */ + source: string; +}; From babf6f0eed3bdb51ffead5797210422c99919956 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Wed, 1 Apr 2026 10:13:11 -0500 Subject: [PATCH 04/44] fix(gastown): re-apply lost empty repo handling and KILOCODE_TOKEN refresh propagation PRs #1758 and #1759 were merged to gastown-staging but their changes were lost during batch PR #1828 assembly. This re-applies both fixes: Empty repo handling (PR #1758): - cloneRepoInner: omit --branch flag, detect empty repo via rev-parse, create and push an initial empty commit on the default branch - setupBrowseWorktreeInner: check origin/ exists before creating tracking branch, skip gracefully if missing - createWorktreeInner: verify HEAD exists before creating branches, throw clear error if repo has no commits KILOCODE_TOKEN refresh propagation (PR #1759): - control-server: add kilocode_token to CONFIG_ENV_MAP for town config push - process-manager: add KILOCODE_TOKEN to LIVE_ENV_KEYS for model update - Town.do: include KILOCODE_TOKEN in syncConfigToContainer env mapping, add alarm-driven refreshKilocodeTokenIfExpiring (daily check, 7-day window on 30-day tokens) - router: remint and push KILOCODE_TOKEN in refreshContainerToken mutation --- .../container/src/control-server.ts | 1 + .../container/src/git-manager.ts | 68 ++++++++++++++- .../container/src/process-manager.ts | 1 + cloudflare-gastown/src/dos/Town.do.ts | 85 +++++++++++++++++++ cloudflare-gastown/src/trpc/router.ts | 7 ++ 5 files changed, 161 insertions(+), 1 deletion(-) diff --git a/cloudflare-gastown/container/src/control-server.ts b/cloudflare-gastown/container/src/control-server.ts index 70ba58c62..5834a1b81 100644 --- a/cloudflare-gastown/container/src/control-server.ts +++ b/cloudflare-gastown/container/src/control-server.ts @@ -220,6 +220,7 @@ app.patch('/agents/:agentId/model', async c => { ['github_cli_pat', 'GITHUB_CLI_PAT'], ['git_author_name', 'GASTOWN_GIT_AUTHOR_NAME'], ['git_author_email', 'GASTOWN_GIT_AUTHOR_EMAIL'], + ['kilocode_token', 'KILOCODE_TOKEN'], ]; for (const [cfgKey, envKey] of CONFIG_ENV_MAP) { const val = cfg[cfgKey]; diff --git a/cloudflare-gastown/container/src/git-manager.ts b/cloudflare-gastown/container/src/git-manager.ts index a65d34370..5be0b134a 100644 --- a/cloudflare-gastown/container/src/git-manager.ts +++ b/cloudflare-gastown/container/src/git-manager.ts @@ -275,9 +275,45 @@ async function cloneRepoInner( `Cloning repo for rig ${options.rigId}: hasAuth=${hasAuth} envKeys=[${Object.keys(options.envVars ?? {}).join(',')}]` ); + // Omit --branch: on empty repos (no commits) the default branch doesn't + // exist yet, so `git clone --branch ` would fail with + // "Remote branch not found in upstream origin". await mkdir(dir, { recursive: true }); - await exec('git', ['clone', '--no-checkout', '--branch', options.defaultBranch, authUrl, dir]); + await exec('git', ['clone', '--no-checkout', authUrl, dir]); await configureRepoCredentials(dir, options.gitUrl, options.envVars); + + // Detect empty repo: git rev-parse HEAD fails when there are no commits. + const isEmpty = await exec('git', ['rev-parse', 'HEAD'], dir) + .then(() => false) + .catch(() => true); + + if (isEmpty) { + console.log(`Detected empty repo for rig ${options.rigId}, creating initial commit`); + // Create an initial empty commit so branches/worktrees can be created. + // Use -c flags for user identity (the repo has no config yet and the + // container may not have GIT_AUTHOR_NAME set). + await exec( + 'git', + [ + '-c', + 'user.name=Gastown', + '-c', + 'user.email=gastown@kilo.ai', + 'commit', + '--allow-empty', + '-m', + 'Initial commit', + ], + dir + ); + await exec('git', ['push', 'origin', `HEAD:${options.defaultBranch}`], dir); + // Best-effort: set remote HEAD so future operations know the default branch + await exec('git', ['remote', 'set-head', 'origin', options.defaultBranch], dir).catch(() => {}); + // Fetch so origin/ ref is available locally + await exec('git', ['fetch', 'origin'], dir); + console.log(`Created initial commit on empty repo for rig ${options.rigId}`); + } + console.log(`Cloned repo for rig ${options.rigId}`); return dir; } @@ -303,6 +339,18 @@ async function createWorktreeInner(options: WorktreeOptions): Promise { return dir; } + // Verify the repo has at least one commit. If cloneRepoInner's initial + // commit push failed, there's no HEAD and we can't create branches. + const hasHead = await exec('git', ['rev-parse', '--verify', 'HEAD'], repo) + .then(() => true) + .catch(() => false); + + if (!hasHead) { + throw new Error( + `Cannot create worktree: repo has no commits. Push an initial commit first or re-connect the rig.` + ); + } + // When a startPoint is provided (e.g. a convoy feature branch), create // the new branch from that ref so the agent begins with the latest // merged work from upstream. Without a startPoint, try to track the @@ -398,6 +446,24 @@ async function setupBrowseWorktreeInner(rigId: string, defaultBranch: string): P return browseDir; } + // Check whether origin/ exists. On a repo that was just + // initialized with an empty commit in cloneRepoInner the ref should + // exist, but if the push failed (network, permissions) it may not. + const hasRemoteBranch = await exec( + 'git', + ['rev-parse', '--verify', `origin/${defaultBranch}`], + repo + ) + .then(() => true) + .catch(() => false); + + if (!hasRemoteBranch) { + console.log( + `Skipping browse worktree for rig ${rigId}: origin/${defaultBranch} not found (repo may be empty), will create on next fetch` + ); + return browseDir; + } + // Create a worktree on the default branch for browsing. // Force-create (or reset) the tracking branch to origin/ // so a recreated browse worktree always starts from the latest remote diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index f0008e320..6b9d390ba 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -843,6 +843,7 @@ export async function updateAgentModel( 'GASTOWN_GIT_AUTHOR_NAME', 'GASTOWN_GIT_AUTHOR_EMAIL', 'GASTOWN_DISABLE_AI_COAUTHOR', + 'KILOCODE_TOKEN', ]); const hotSwapEnv: Record = {}; for (const [key, value] of Object.entries(agent.startupEnv)) { diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 1933b3af8..6b97df37d 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -56,6 +56,8 @@ import { query } from '../util/query.util'; import { getAgentDOStub } from './Agent.do'; import { getTownContainerStub } from './TownContainer.do'; +import { generateKiloApiToken } from '../util/kilo-token.util'; +import { resolveSecret } from '../util/secret.util'; import { writeEvent, type GastownEventData } from '../util/analytics.util'; import { logger, withLogTags } from '../util/log.util'; import { BeadPriority } from '../types'; @@ -616,6 +618,7 @@ export class TownDO extends DurableObject { ['GASTOWN_GIT_AUTHOR_NAME', townConfig.git_author_name], ['GASTOWN_GIT_AUTHOR_EMAIL', townConfig.git_author_email], ['GASTOWN_DISABLE_AI_COAUTHOR', townConfig.disable_ai_coauthor ? '1' : undefined], + ['KILOCODE_TOKEN', townConfig.kilocode_token], ]; for (const [key, value] of envMapping) { @@ -3056,6 +3059,16 @@ export class TownDO extends DurableObject { error: err instanceof Error ? err.message : String(err), }); } + + // Proactively remint KILOCODE_TOKEN before it expires (30-day + // expiry, checked daily, refreshed within 7 days of expiry). + try { + await this.refreshKilocodeTokenIfExpiring(); + } catch (err) { + logger.warn('alarm: refreshKilocodeTokenIfExpiring failed', { + error: err instanceof Error ? err.message : String(err), + }); + } } // ── Pre-phase: Observe container status for working agents ──────── @@ -3341,6 +3354,78 @@ export class TownDO extends DurableObject { this.lastContainerTokenRefreshAt = now; } + /** + * Proactively remint KILOCODE_TOKEN when it's approaching expiry. + * Throttled to once per day — the 30-day token is refreshed when + * within 7 days of expiry, providing ample safety margin. + * + * Decodes the existing JWT payload to extract user identity (no + * signature verification needed — we're just reading the claims to + * re-sign with the same data). + */ + private lastKilocodeTokenCheckAt = 0; + private async refreshKilocodeTokenIfExpiring(): Promise { + const CHECK_INTERVAL_MS = 24 * 60 * 60_000; // once per day + const REFRESH_WINDOW_SECONDS = 7 * 24 * 60 * 60; // 7 days + const now = Date.now(); + if (now - this.lastKilocodeTokenCheckAt < CHECK_INTERVAL_MS) return; + this.lastKilocodeTokenCheckAt = now; + + const townConfig = await this.getTownConfig(); + const token = townConfig.kilocode_token; + if (!token) return; + + // Decode JWT payload (base64url, no verification) + const parts = token.split('.'); + const encodedPayload = parts[1]; + if (!encodedPayload) return; + const payloadSchema = z.object({ + exp: z.number().optional(), + kiloUserId: z.string().optional(), + apiTokenPepper: z.string().nullable().optional(), + }); + let rawPayload: unknown; + try { + rawPayload = JSON.parse(atob(encodedPayload.replace(/-/g, '+').replace(/_/g, '/'))); + } catch { + return; + } + const parsed = payloadSchema.safeParse(rawPayload); + if (!parsed.success) return; + const payload = parsed.data; + + const exp = payload.exp; + if (!exp) return; + + const nowSeconds = Math.floor(now / 1000); + if (exp - nowSeconds > REFRESH_WINDOW_SECONDS) return; + + // Token expires within 7 days — remint it + const userId = payload.kiloUserId; + if (!userId) return; + + if (!this.env.NEXTAUTH_SECRET) { + logger.warn('refreshKilocodeTokenIfExpiring: NEXTAUTH_SECRET not configured'); + return; + } + const secret = await resolveSecret(this.env.NEXTAUTH_SECRET); + if (!secret) { + logger.warn('refreshKilocodeTokenIfExpiring: failed to resolve NEXTAUTH_SECRET'); + return; + } + + const newToken = await generateKiloApiToken( + { id: userId, api_token_pepper: payload.apiTokenPepper ?? null }, + secret + ); + await this.updateTownConfig({ kilocode_token: newToken }); + await this.syncConfigToContainer(); + logger.info('refreshKilocodeTokenIfExpiring: reminted KILOCODE_TOKEN proactively', { + userId, + oldExp: new Date(exp * 1000).toISOString(), + }); + } + private hasActiveWork(): boolean { return scheduling.hasActiveWork(this.sql); } diff --git a/cloudflare-gastown/src/trpc/router.ts b/cloudflare-gastown/src/trpc/router.ts index 8169c0d7c..c1551d2d7 100644 --- a/cloudflare-gastown/src/trpc/router.ts +++ b/cloudflare-gastown/src/trpc/router.ts @@ -1018,6 +1018,13 @@ export const gastownRouter = router({ } const townStub = getTownDOStub(ctx.env, input.townId); await townStub.forceRefreshContainerToken(); + + // Also remint and push KILOCODE_TOKEN — this is what actually + // authenticates GT tool calls and is the main reason users hit 401s. + const user = userFromCtx(ctx); + const newKilocodeToken = await mintKilocodeToken(ctx.env, user); + await townStub.updateTownConfig({ kilocode_token: newKilocodeToken }); + await townStub.syncConfigToContainer(); }), // ── Events ────────────────────────────────────────────────────────── From b5be7a6208519f5e3209bcd2702d9019d324259c Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Wed, 1 Apr 2026 11:35:47 -0500 Subject: [PATCH 05/44] fix(gastown): live-push config to running container and add restart button MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The token refresh button only persisted env vars to DO storage for next boot — the running container process never received the updated KILOCODE_TOKEN, so the mayor session kept using stale credentials. Container config live-push: - Extract syncTownConfigToProcessEnv() in control-server.ts from the duplicated CONFIG_ENV_MAP logic in the model-update handler - Add POST /sync-config endpoint that applies the extracted function - Update syncConfigToContainer in Town.do.ts to also POST /sync-config with X-Town-Config header after persisting env vars to DO storage, so the running container process picks up changes immediately Force restart container: - Add forceRestartContainer tRPC mutation with ownership check - Add Restart Container button in town settings Container section - Regenerate gastown tRPC type declarations --- .../container/src/control-server.ts | 98 +- cloudflare-gastown/src/dos/Town.do.ts | 27 +- cloudflare-gastown/src/trpc/router.ts | 14 + .../settings/TownSettingsPageClient.tsx | 29 + src/lib/gastown/types/router.d.ts | 3973 ++++++++--------- src/lib/gastown/types/schemas.d.ts | 2174 ++++----- 6 files changed, 2861 insertions(+), 3454 deletions(-) diff --git a/cloudflare-gastown/container/src/control-server.ts b/cloudflare-gastown/container/src/control-server.ts index 5834a1b81..d569b3fba 100644 --- a/cloudflare-gastown/container/src/control-server.ts +++ b/cloudflare-gastown/container/src/control-server.ts @@ -46,6 +46,53 @@ export function getCurrentTownConfig(): Record | null { return lastKnownTownConfig; } +/** + * Sync config-derived env vars from the last-known town config into + * process.env. Safe to call at any time — no-ops when no config is cached. + */ +function syncTownConfigToProcessEnv(): void { + const cfg = getCurrentTownConfig(); + if (!cfg) return; + + const CONFIG_ENV_MAP: Array<[string, string]> = [ + ['github_cli_pat', 'GITHUB_CLI_PAT'], + ['git_author_name', 'GASTOWN_GIT_AUTHOR_NAME'], + ['git_author_email', 'GASTOWN_GIT_AUTHOR_EMAIL'], + ['kilocode_token', 'KILOCODE_TOKEN'], + ]; + for (const [cfgKey, envKey] of CONFIG_ENV_MAP) { + const val = cfg[cfgKey]; + if (typeof val === 'string' && val) { + process.env[envKey] = val; + } else { + delete process.env[envKey]; + } + } + + const gitAuth = cfg.git_auth; + if (typeof gitAuth === 'object' && gitAuth !== null) { + const auth = gitAuth as Record; + for (const [authKey, envKey] of [ + ['github_token', 'GIT_TOKEN'], + ['gitlab_token', 'GITLAB_TOKEN'], + ['gitlab_instance_url', 'GITLAB_INSTANCE_URL'], + ] as const) { + const val = auth[authKey]; + if (typeof val === 'string' && val) { + process.env[envKey] = val; + } else { + delete process.env[envKey]; + } + } + } + + if (cfg.disable_ai_coauthor) { + process.env.GASTOWN_DISABLE_AI_COAUTHOR = '1'; + } else { + delete process.env.GASTOWN_DISABLE_AI_COAUTHOR; + } +} + export const app = new Hono(); // Parse and validate town config from X-Town-Config header (sent by TownDO on @@ -133,6 +180,16 @@ app.post('/refresh-token', async c => { return c.json({ refreshed: true }); }); +// POST /sync-config +// Push config-derived env vars from X-Town-Config into process.env on +// the running container. Called by TownDO.syncConfigToContainer() after +// persisting env vars to DO storage, so the live process picks up +// changes (e.g. refreshed KILOCODE_TOKEN) without a container restart. +app.post('/sync-config', async c => { + syncTownConfigToProcessEnv(); + return c.json({ synced: true }); +}); + // POST /agents/start app.post('/agents/start', async c => { const body: unknown = await c.req.json().catch(() => null); @@ -214,46 +271,7 @@ app.patch('/agents/:agentId/model', async c => { // Sync config-derived env vars from X-Town-Config into process.env so // the SDK server restart picks up fresh tokens and git identity. // The middleware already parsed the header into lastKnownTownConfig. - const cfg = getCurrentTownConfig(); - if (cfg) { - const CONFIG_ENV_MAP: Array<[string, string]> = [ - ['github_cli_pat', 'GITHUB_CLI_PAT'], - ['git_author_name', 'GASTOWN_GIT_AUTHOR_NAME'], - ['git_author_email', 'GASTOWN_GIT_AUTHOR_EMAIL'], - ['kilocode_token', 'KILOCODE_TOKEN'], - ]; - for (const [cfgKey, envKey] of CONFIG_ENV_MAP) { - const val = cfg[cfgKey]; - if (typeof val === 'string' && val) { - process.env[envKey] = val; - } else { - delete process.env[envKey]; - } - } - // git_auth tokens - const gitAuth = cfg.git_auth; - if (typeof gitAuth === 'object' && gitAuth !== null) { - const auth = gitAuth as Record; - for (const [authKey, envKey] of [ - ['github_token', 'GIT_TOKEN'], - ['gitlab_token', 'GITLAB_TOKEN'], - ['gitlab_instance_url', 'GITLAB_INSTANCE_URL'], - ] as const) { - const val = auth[authKey]; - if (typeof val === 'string' && val) { - process.env[envKey] = val; - } else { - delete process.env[envKey]; - } - } - } - // disable_ai_coauthor - if (cfg.disable_ai_coauthor) { - process.env.GASTOWN_DISABLE_AI_COAUTHOR = '1'; - } else { - delete process.env.GASTOWN_DISABLE_AI_COAUTHOR; - } - } + syncTownConfigToProcessEnv(); await updateAgentModel( agentId, diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 6b97df37d..10f237a72 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -601,6 +601,10 @@ export class TownDO extends DurableObject { * Push config-derived env vars to the running container. Called after * updateTownConfig so that settings changes take effect without a * container restart. New agent processes inherit the updated values. + * + * Two-phase push: + * 1. setEnvVar — persists to DO storage for next boot + * 2. POST /sync-config — hot-swaps process.env on the running container */ async syncConfigToContainer(): Promise { const townId = this.townId; @@ -608,8 +612,7 @@ export class TownDO extends DurableObject { const townConfig = await this.getTownConfig(); const container = getTownContainerStub(this.env, townId); - // Map config fields to their container env var equivalents. - // When a value is set, push it; when cleared, remove it. + // Phase 1: Persist to DO storage for next boot. const envMapping: Array<[string, string | undefined]> = [ ['GIT_TOKEN', townConfig.git_auth?.github_token], ['GITLAB_TOKEN', townConfig.git_auth?.gitlab_token], @@ -632,6 +635,26 @@ export class TownDO extends DurableObject { console.warn(`[Town.do] syncConfigToContainer: ${key} sync failed:`, err); } } + + // Phase 2: Push to the running container's process.env via the + // /sync-config endpoint. The X-Town-Config header delivers the + // full config; the endpoint applies CONFIG_ENV_MAP to process.env. + try { + const containerConfig = await config.buildContainerConfig(this.ctx.storage, this.env); + await container.fetch('http://container/sync-config', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-Town-Config': JSON.stringify(containerConfig), + }, + }); + } catch (err) { + // Best-effort — container may not be running yet. + console.warn( + `[Town.do] syncConfigToContainer: /sync-config push failed:`, + err instanceof Error ? err.message : err + ); + } } // ══════════════════════════════════════════════════════════════════ diff --git a/cloudflare-gastown/src/trpc/router.ts b/cloudflare-gastown/src/trpc/router.ts index c1551d2d7..771d97dda 100644 --- a/cloudflare-gastown/src/trpc/router.ts +++ b/cloudflare-gastown/src/trpc/router.ts @@ -1027,6 +1027,20 @@ export const gastownRouter = router({ await townStub.syncConfigToContainer(); }), + forceRestartContainer: gastownProcedure + .input(z.object({ townId: z.string().uuid() })) + .mutation(async ({ ctx, input }) => { + const ownership = await resolveTownOwnership(ctx.env, ctx, input.townId); + if (ownership.type === 'admin') { + throw new TRPCError({ + code: 'FORBIDDEN', + message: 'Admins cannot restart containers for towns they do not own', + }); + } + const containerStub = getTownContainerStub(ctx.env, input.townId); + await containerStub.destroy(); + }), + // ── Events ────────────────────────────────────────────────────────── getBeadEvents: gastownProcedure diff --git a/src/app/(app)/gastown/[townId]/settings/TownSettingsPageClient.tsx b/src/app/(app)/gastown/[townId]/settings/TownSettingsPageClient.tsx index 19fe2fade..5da787148 100644 --- a/src/app/(app)/gastown/[townId]/settings/TownSettingsPageClient.tsx +++ b/src/app/(app)/gastown/[townId]/settings/TownSettingsPageClient.tsx @@ -26,6 +26,7 @@ import { Variable, Layers, RefreshCw, + RotateCcw, Container, User, Key, @@ -238,6 +239,13 @@ export function TownSettingsPageClient({ townId, readOnly = false, organizationI }) ); + const restartContainer = useMutation( + trpc.gastown.forceRestartContainer.mutationOptions({ + onSuccess: () => toast.success('Container restarting — it will be back shortly'), + onError: err => toast.error(`Container restart failed: ${err.message}`), + }) + ); + // Local state for form fields const [envVars, setEnvVars] = useState([]); const [githubToken, setGithubToken] = useState(''); @@ -826,6 +834,27 @@ export function TownSettingsPageClient({ townId, readOnly = false, organizationI {refreshToken.isPending ? 'Refreshing...' : 'Refresh Token'}
+
+
+

Restart Container

+

+ Destroys the running container and lets it restart fresh. All running + agents will be interrupted but will resume on the next dispatch cycle. +

+
+ +
diff --git a/src/lib/gastown/types/router.d.ts b/src/lib/gastown/types/router.d.ts index b8160316c..2eb903717 100644 --- a/src/lib/gastown/types/router.d.ts +++ b/src/lib/gastown/types/router.d.ts @@ -1,1352 +1,78 @@ import type { TRPCContext } from './init'; -export declare const gastownRouter: import('@trpc/server').TRPCBuiltRouter< - { +export declare const gastownRouter: import("@trpc/server").TRPCBuiltRouter<{ ctx: TRPCContext; meta: object; - errorShape: import('@trpc/server').TRPCDefaultErrorShape; + errorShape: import("@trpc/server").TRPCDefaultErrorShape; transformer: false; - }, - import('@trpc/server').TRPCDecorateCreateRouterOptions<{ - createTown: import('@trpc/server').TRPCMutationProcedure<{ - input: { - name: string; - }; - output: { - id: string; - name: string; - owner_user_id: string; - created_at: string; - updated_at: string; - }; - meta: object; - }>; - listTowns: import('@trpc/server').TRPCQueryProcedure<{ - input: void; - output: { - id: string; - name: string; - owner_user_id: string; - created_at: string; - updated_at: string; - }[]; - meta: object; - }>; - getTown: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - }; - output: { - id: string; - name: string; - owner_user_id: string; - created_at: string; - updated_at: string; - }; - meta: object; - }>; - /** - * Check whether the current user is an admin viewing a town they don't own. - * Used by the frontend to show an admin banner. - */ - checkAdminAccess: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - }; - output: { - isAdminViewing: boolean; - ownerUserId: string | null; - ownerOrgId: string | null; - }; - meta: object; - }>; - deleteTown: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - }; - output: void; - meta: object; - }>; - createRig: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - name: string; - gitUrl: string; - defaultBranch?: string | undefined; - platformIntegrationId?: string | undefined; - }; - output: { - id: string; - town_id: string; - name: string; - git_url: string; - default_branch: string; - platform_integration_id: string | null; - created_at: string; - updated_at: string; - }; - meta: object; - }>; - listRigs: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - }; - output: { - id: string; - town_id: string; - name: string; - git_url: string; - default_branch: string; - platform_integration_id: string | null; - created_at: string; - updated_at: string; - }[]; - meta: object; - }>; - getRig: import('@trpc/server').TRPCQueryProcedure<{ - input: { - rigId: string; - townId?: string | undefined; - }; - output: { - id: string; - town_id: string; - name: string; - git_url: string; - default_branch: string; - platform_integration_id: string | null; - created_at: string; - updated_at: string; - agents: { - id: string; - rig_id: string | null; - role: string; - name: string; - identity: string; - status: string; - current_hook_bead_id: string | null; - dispatch_attempts: number; - last_activity_at: string | null; - checkpoint?: unknown; - created_at: string; - agent_status_message: string | null; - agent_status_updated_at: string | null; - }[]; - beads: { - bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; - title: string; - body: string | null; - rig_id: string | null; - parent_bead_id: string | null; - assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; - labels: string[]; - metadata: Record; - created_by: string | null; - created_at: string; - updated_at: string; - closed_at: string | null; - }[]; - }; - meta: object; - }>; - deleteRig: import('@trpc/server').TRPCMutationProcedure<{ - input: { - rigId: string; - }; - output: void; - meta: object; - }>; - listBeads: import('@trpc/server').TRPCQueryProcedure<{ - input: { - rigId: string; - townId?: string | undefined; - status?: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open' | undefined; - }; - output: { - bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; - title: string; - body: string | null; - rig_id: string | null; - parent_bead_id: string | null; - assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; - labels: string[]; - metadata: Record; - created_by: string | null; - created_at: string; - updated_at: string; - closed_at: string | null; - }[]; - meta: object; - }>; - deleteBead: import('@trpc/server').TRPCMutationProcedure<{ - input: { - rigId: string; - beadId: string; - townId?: string | undefined; - }; - output: void; - meta: object; - }>; - updateBead: import('@trpc/server').TRPCMutationProcedure<{ - input: { - rigId: string; - beadId: string; - townId?: string | undefined; - title?: string | undefined; - body?: string | null | undefined; - status?: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open' | undefined; - priority?: 'critical' | 'high' | 'low' | 'medium' | undefined; - labels?: string[] | undefined; - metadata?: Record | undefined; - rig_id?: string | null | undefined; - parent_bead_id?: string | null | undefined; - }; - output: { - bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; - title: string; - body: string | null; - rig_id: string | null; - parent_bead_id: string | null; - assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; - labels: string[]; - metadata: Record; - created_by: string | null; - created_at: string; - updated_at: string; - closed_at: string | null; - }; - meta: object; - }>; - listAgents: import('@trpc/server').TRPCQueryProcedure<{ - input: { - rigId: string; - townId?: string | undefined; - }; - output: { - id: string; - rig_id: string | null; - role: string; - name: string; - identity: string; - status: string; - current_hook_bead_id: string | null; - dispatch_attempts: number; - last_activity_at: string | null; - checkpoint?: unknown; - created_at: string; - agent_status_message: string | null; - agent_status_updated_at: string | null; - }[]; - meta: object; - }>; - deleteAgent: import('@trpc/server').TRPCMutationProcedure<{ - input: { - rigId: string; - agentId: string; - townId?: string | undefined; - }; - output: void; - meta: object; - }>; - sling: import('@trpc/server').TRPCMutationProcedure<{ - input: { - rigId: string; - title: string; - body?: string | undefined; - model?: string | undefined; - }; - output: { - bead: { - bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; - title: string; - body: string | null; - rig_id: string | null; - parent_bead_id: string | null; - assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; - labels: string[]; - metadata: Record; - created_by: string | null; - created_at: string; - updated_at: string; - closed_at: string | null; - }; - agent: { - id: string; - rig_id: string | null; - role: string; - name: string; - identity: string; - status: string; - current_hook_bead_id: string | null; - dispatch_attempts: number; - last_activity_at: string | null; - checkpoint?: unknown; - created_at: string; - agent_status_message: string | null; - agent_status_updated_at: string | null; - }; - }; - meta: object; - }>; - sendMessage: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - message: string; - model?: string | undefined; - rigId?: string | undefined; - uiContext?: string | undefined; - }; - output: { - agentId: string; - sessionStatus: 'active' | 'idle' | 'starting'; - }; - meta: object; - }>; - getMayorStatus: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - }; - output: { - configured: boolean; - townId: string | null; - session: { - agentId: string; - sessionId: string; - status: 'active' | 'idle' | 'starting'; - lastActivityAt: string; - } | null; - }; - meta: object; - }>; - getAlarmStatus: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - }; - output: { - alarm: { - nextFireAt: string | null; - intervalMs: number; - intervalLabel: string; - }; - agents: { - working: number; - idle: number; - stalled: number; - dead: number; - total: number; - }; - beads: { - open: number; - inProgress: number; - inReview: number; - failed: number; - triageRequests: number; - }; - patrol: { - guppWarnings: number; - guppEscalations: number; - stalledAgents: number; - orphanedHooks: number; - }; - recentEvents: { - time: string; - type: string; - message: string; - }[]; - }; - meta: object; - }>; - ensureMayor: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - }; - output: { - agentId: string; - sessionStatus: 'active' | 'idle' | 'starting'; - }; - meta: object; - }>; - getAgentStreamUrl: import('@trpc/server').TRPCQueryProcedure<{ - input: { - agentId: string; - townId: string; - }; - output: { - url: string; - ticket: string; - }; - meta: object; - }>; - createPtySession: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - agentId: string; - }; - output: { - pty: { - [x: string]: unknown; - id: string; - }; - wsUrl: string; - }; - meta: object; - }>; - resizePtySession: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - agentId: string; - ptyId: string; - cols: number; - rows: number; - }; - output: void; - meta: object; - }>; - getTownConfig: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - }; - output: { - env_vars: Record; - git_auth: { - github_token?: string | undefined; - gitlab_token?: string | undefined; - gitlab_instance_url?: string | undefined; - platform_integration_id?: string | undefined; - }; - owner_user_id?: string | undefined; - owner_type: 'org' | 'user'; - owner_id?: string | undefined; - created_by_user_id?: string | undefined; - organization_id?: string | undefined; - kilocode_token?: string | undefined; - default_model?: string | undefined; - role_models?: - | { - mayor?: string | undefined; - refinery?: string | undefined; - polecat?: string | undefined; - } - | undefined; - small_model?: string | undefined; - max_polecats_per_rig?: number | undefined; - merge_strategy: 'direct' | 'pr'; - refinery?: - | { - gates: string[]; - auto_merge: boolean; - require_clean_merge: boolean; - } - | undefined; - alarm_interval_active?: number | undefined; - alarm_interval_idle?: number | undefined; - container?: - | { - sleep_after_minutes?: number | undefined; - } - | undefined; - staged_convoys_default: boolean; - github_cli_pat?: string | undefined; - git_author_name?: string | undefined; - git_author_email?: string | undefined; - disable_ai_coauthor: boolean; - }; - meta: object; - }>; - updateTownConfig: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - config: { - env_vars?: Record | undefined; - git_auth?: - | { - github_token?: string | undefined; - gitlab_token?: string | undefined; - gitlab_instance_url?: string | undefined; - platform_integration_id?: string | undefined; - } - | undefined; - owner_user_id?: string | undefined; - owner_type?: 'org' | 'user' | undefined; - owner_id?: string | undefined; - created_by_user_id?: string | undefined; - organization_id?: string | undefined; - kilocode_token?: string | undefined; - default_model?: string | undefined; - role_models?: - | { - mayor?: string | undefined; - refinery?: string | undefined; - polecat?: string | undefined; - } - | undefined; - small_model?: string | undefined; - max_polecats_per_rig?: number | undefined; - merge_strategy?: 'direct' | 'pr' | undefined; - refinery?: - | { - gates?: string[] | undefined; - auto_merge?: boolean | undefined; - require_clean_merge?: boolean | undefined; - } - | undefined; - alarm_interval_active?: number | undefined; - alarm_interval_idle?: number | undefined; - container?: - | { - sleep_after_minutes?: number | undefined; - } - | undefined; - staged_convoys_default?: boolean | undefined; - github_cli_pat?: string | undefined; - git_author_name?: string | undefined; - git_author_email?: string | undefined; - disable_ai_coauthor?: boolean | undefined; - }; - }; - output: { - env_vars: Record; - git_auth: { - github_token?: string | undefined; - gitlab_token?: string | undefined; - gitlab_instance_url?: string | undefined; - platform_integration_id?: string | undefined; - }; - owner_user_id?: string | undefined; - owner_type: 'org' | 'user'; - owner_id?: string | undefined; - created_by_user_id?: string | undefined; - organization_id?: string | undefined; - kilocode_token?: string | undefined; - default_model?: string | undefined; - role_models?: - | { - mayor?: string | undefined; - refinery?: string | undefined; - polecat?: string | undefined; - } - | undefined; - small_model?: string | undefined; - max_polecats_per_rig?: number | undefined; - merge_strategy: 'direct' | 'pr'; - refinery?: - | { - gates: string[]; - auto_merge: boolean; - require_clean_merge: boolean; - } - | undefined; - alarm_interval_active?: number | undefined; - alarm_interval_idle?: number | undefined; - container?: - | { - sleep_after_minutes?: number | undefined; - } - | undefined; - staged_convoys_default: boolean; - github_cli_pat?: string | undefined; - git_author_name?: string | undefined; - git_author_email?: string | undefined; - disable_ai_coauthor: boolean; - }; - meta: object; - }>; - refreshContainerToken: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - }; - output: void; - meta: object; - }>; - getBeadEvents: import('@trpc/server').TRPCQueryProcedure<{ - input: { - rigId: string; - townId?: string | undefined; - beadId?: string | undefined; - since?: string | undefined; - limit?: number | undefined; - }; - output: { - bead_event_id: string; - bead_id: string; - agent_id: string | null; - event_type: string; - old_value: string | null; - new_value: string | null; - metadata: Record; - created_at: string; - rig_id?: string | undefined; - rig_name?: string | undefined; - }[]; - meta: object; - }>; - getTownEvents: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - since?: string | undefined; - limit?: number | undefined; - }; - output: { - bead_event_id: string; - bead_id: string; - agent_id: string | null; - event_type: string; - old_value: string | null; - new_value: string | null; - metadata: Record; - created_at: string; - rig_id?: string | undefined; - rig_name?: string | undefined; - }[]; - meta: object; - }>; - getMergeQueueData: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - rigId?: string | undefined; - limit?: number | undefined; - since?: string | undefined; - }; - output: { - needsAttention: { - openPRs: { - mrBead: { - bead_id: string; - status: string; - title: string; - body: string | null; - rig_id: string | null; - created_at: string; - updated_at: string; - metadata: Record; - }; - reviewMetadata: { - branch: string; - target_branch: string; - merge_commit: string | null; - pr_url: string | null; - retry_count: number; - }; - sourceBead: { - bead_id: string; - title: string; - status: string; - body: string | null; - } | null; - convoy: { - convoy_id: string; - title: string; - total_beads: number; - closed_beads: number; - feature_branch: string | null; - merge_mode: string | null; - } | null; - agent: { - agent_id: string; - name: string; - role: string; - } | null; - rigName: string | null; - staleSince: string | null; - failureReason: string | null; - }[]; - failedReviews: { - mrBead: { - bead_id: string; - status: string; - title: string; - body: string | null; - rig_id: string | null; - created_at: string; - updated_at: string; - metadata: Record; - }; - reviewMetadata: { - branch: string; - target_branch: string; - merge_commit: string | null; - pr_url: string | null; - retry_count: number; - }; - sourceBead: { - bead_id: string; - title: string; - status: string; - body: string | null; - } | null; - convoy: { - convoy_id: string; - title: string; - total_beads: number; - closed_beads: number; - feature_branch: string | null; - merge_mode: string | null; - } | null; - agent: { - agent_id: string; - name: string; - role: string; - } | null; - rigName: string | null; - staleSince: string | null; - failureReason: string | null; - }[]; - stalePRs: { - mrBead: { - bead_id: string; - status: string; - title: string; - body: string | null; - rig_id: string | null; - created_at: string; - updated_at: string; - metadata: Record; - }; - reviewMetadata: { - branch: string; - target_branch: string; - merge_commit: string | null; - pr_url: string | null; - retry_count: number; - }; - sourceBead: { - bead_id: string; - title: string; - status: string; - body: string | null; - } | null; - convoy: { - convoy_id: string; - title: string; - total_beads: number; - closed_beads: number; - feature_branch: string | null; - merge_mode: string | null; - } | null; - agent: { - agent_id: string; - name: string; - role: string; - } | null; - rigName: string | null; - staleSince: string | null; - failureReason: string | null; - }[]; - }; - activityLog: { - event: { - bead_event_id: string; - bead_id: string; - agent_id: string | null; - event_type: string; - old_value: string | null; - new_value: string | null; - metadata: Record; - created_at: string; - }; - mrBead: { - bead_id: string; - title: string; - type: string; - status: string; - rig_id: string | null; - metadata: Record; - } | null; - sourceBead: { - bead_id: string; - title: string; - status: string; - } | null; - convoy: { - convoy_id: string; - title: string; - total_beads: number; - closed_beads: number; - feature_branch: string | null; - merge_mode: string | null; - } | null; - agent: { - agent_id: string; - name: string; - role: string; - } | null; - rigName: string | null; - reviewMetadata: { - pr_url: string | null; - branch: string | null; - target_branch: string | null; - merge_commit: string | null; - } | null; - }[]; - }; - meta: object; - }>; - listConvoys: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - }; - output: { - id: string; - title: string; - status: 'active' | 'landed'; - staged: boolean; - total_beads: number; - closed_beads: number; - created_by: string | null; - created_at: string; - landed_at: string | null; - feature_branch: string | null; - merge_mode: string | null; - beads: { - bead_id: string; - title: string; - status: string; - rig_id: string | null; - assignee_agent_name: string | null; - }[]; - dependency_edges: { - bead_id: string; - depends_on_bead_id: string; - }[]; - }[]; - meta: object; - }>; - getConvoy: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - convoyId: string; - }; - output: { - id: string; - title: string; - status: 'active' | 'landed'; - staged: boolean; - total_beads: number; - closed_beads: number; - created_by: string | null; - created_at: string; - landed_at: string | null; - feature_branch: string | null; - merge_mode: string | null; - beads: { - bead_id: string; - title: string; - status: string; - rig_id: string | null; - assignee_agent_name: string | null; - }[]; - dependency_edges: { - bead_id: string; - depends_on_bead_id: string; - }[]; - } | null; - meta: object; - }>; - closeConvoy: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - convoyId: string; - }; - output: { - id: string; - title: string; - status: 'active' | 'landed'; - staged: boolean; - total_beads: number; - closed_beads: number; - created_by: string | null; - created_at: string; - landed_at: string | null; - feature_branch: string | null; - merge_mode: string | null; - beads: { - bead_id: string; - title: string; - status: string; - rig_id: string | null; - assignee_agent_name: string | null; - }[]; - dependency_edges: { - bead_id: string; - depends_on_bead_id: string; - }[]; - } | null; - meta: object; - }>; - startConvoy: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - convoyId: string; - }; - output: { - id: string; - title: string; - status: 'active' | 'landed'; - staged: boolean; - total_beads: number; - closed_beads: number; - created_by: string | null; - created_at: string; - landed_at: string | null; - feature_branch: string | null; - merge_mode: string | null; - beads: { - bead_id: string; - title: string; - status: string; - rig_id: string | null; - assignee_agent_name: string | null; - }[]; - dependency_edges: { - bead_id: string; - depends_on_bead_id: string; - }[]; - } | null; - meta: object; - }>; - listOrgTowns: import('@trpc/server').TRPCQueryProcedure<{ - input: { - organizationId: string; - }; - output: { - id: string; - name: string; - owner_org_id: string; - created_by_user_id: string; - created_at: string; - updated_at: string; - }[]; - meta: object; - }>; - createOrgTown: import('@trpc/server').TRPCMutationProcedure<{ - input: { - organizationId: string; - name: string; - }; - output: { - id: string; - name: string; - owner_org_id: string; - created_by_user_id: string; - created_at: string; - updated_at: string; - }; - meta: object; - }>; - deleteOrgTown: import('@trpc/server').TRPCMutationProcedure<{ - input: { - organizationId: string; - townId: string; - }; - output: void; - meta: object; - }>; - listOrgRigs: import('@trpc/server').TRPCQueryProcedure<{ - input: { - organizationId: string; - townId: string; - }; - output: { - id: string; - town_id: string; - name: string; - git_url: string; - default_branch: string; - platform_integration_id: string | null; - created_at: string; - updated_at: string; - }[]; - meta: object; - }>; - createOrgRig: import('@trpc/server').TRPCMutationProcedure<{ - input: { - organizationId: string; - townId: string; - name: string; - gitUrl: string; - defaultBranch?: string | undefined; - platformIntegrationId?: string | undefined; - }; - output: { - id: string; - town_id: string; - name: string; - git_url: string; - default_branch: string; - platform_integration_id: string | null; - created_at: string; - updated_at: string; - }; - meta: object; - }>; - adminListBeads: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - status?: 'closed' | 'failed' | 'in_progress' | 'open' | undefined; - type?: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule' - | undefined; - limit?: number | undefined; - }; - output: { - bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; - title: string; - body: string | null; - rig_id: string | null; - parent_bead_id: string | null; - assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; - labels: string[]; - metadata: Record; - created_by: string | null; - created_at: string; - updated_at: string; - closed_at: string | null; - }[]; - meta: object; - }>; - adminListAgents: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - }; - output: { - id: string; - rig_id: string | null; - role: string; - name: string; - identity: string; - status: string; - current_hook_bead_id: string | null; - dispatch_attempts: number; - last_activity_at: string | null; - checkpoint?: unknown; - created_at: string; - agent_status_message: string | null; - agent_status_updated_at: string | null; - }[]; - meta: object; - }>; - adminForceRestartContainer: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - }; - output: void; - meta: object; - }>; - adminForceResetAgent: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - agentId: string; - }; - output: void; - meta: object; - }>; - adminForceCloseBead: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - beadId: string; - }; - output: { - bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; - title: string; - body: string | null; - rig_id: string | null; - parent_bead_id: string | null; - assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; - labels: string[]; - metadata: Record; - created_by: string | null; - created_at: string; - updated_at: string; - closed_at: string | null; - }; - meta: object; - }>; - adminForceFailBead: import('@trpc/server').TRPCMutationProcedure<{ - input: { - townId: string; - beadId: string; - }; - output: { - bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; - title: string; - body: string | null; - rig_id: string | null; - parent_bead_id: string | null; - assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; - labels: string[]; - metadata: Record; - created_by: string | null; - created_at: string; - updated_at: string; - closed_at: string | null; - }; - meta: object; - }>; - adminGetAlarmStatus: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - }; - output: { - alarm: { - nextFireAt: string | null; - intervalMs: number; - intervalLabel: string; - }; - agents: { - working: number; - idle: number; - stalled: number; - dead: number; - total: number; - }; - beads: { - open: number; - inProgress: number; - inReview: number; - failed: number; - triageRequests: number; - }; - patrol: { - guppWarnings: number; - guppEscalations: number; - stalledAgents: number; - orphanedHooks: number; - }; - recentEvents: { - time: string; - type: string; - message: string; - }[]; - }; - meta: object; - }>; - adminGetTownEvents: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - beadId?: string | undefined; - since?: string | undefined; - limit?: number | undefined; - }; - output: { - bead_event_id: string; - bead_id: string; - agent_id: string | null; - event_type: string; - old_value: string | null; - new_value: string | null; - metadata: Record; - created_at: string; - rig_id?: string | undefined; - rig_name?: string | undefined; - }[]; - meta: object; - }>; - adminGetBead: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - beadId: string; - }; - output: { - bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; - title: string; - body: string | null; - rig_id: string | null; - parent_bead_id: string | null; - assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; - labels: string[]; - metadata: Record; - created_by: string | null; - created_at: string; - updated_at: string; - closed_at: string | null; - } | null; - meta: object; - }>; - debugAgentMetadata: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - }; - output: never; - meta: object; - }>; - }> ->; -export type GastownRouter = typeof gastownRouter; -/** - * Wrapped router that nests gastownRouter under a `gastown` key. - * This preserves the `trpc.gastown.X` call pattern on the frontend, - * matching the existing RootRouter shape so components don't need - * to change their procedure paths. - */ -export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRouter< - { - ctx: TRPCContext; - meta: object; - errorShape: import('@trpc/server').TRPCDefaultErrorShape; - transformer: false; - }, - import('@trpc/server').TRPCDecorateCreateRouterOptions<{ - gastown: import('@trpc/server').TRPCBuiltRouter< - { - ctx: TRPCContext; - meta: object; - errorShape: import('@trpc/server').TRPCDefaultErrorShape; - transformer: false; - }, - import('@trpc/server').TRPCDecorateCreateRouterOptions<{ - createTown: import('@trpc/server').TRPCMutationProcedure<{ - input: { +}, import("@trpc/server").TRPCDecorateCreateRouterOptions<{ + createTown: import("@trpc/server").TRPCMutationProcedure<{ + input: { name: string; - }; - output: { + }; + output: { id: string; name: string; owner_user_id: string; created_at: string; updated_at: string; - }; - meta: object; - }>; - listTowns: import('@trpc/server').TRPCQueryProcedure<{ - input: void; - output: { + }; + meta: object; + }>; + listTowns: import("@trpc/server").TRPCQueryProcedure<{ + input: void; + output: { id: string; name: string; owner_user_id: string; created_at: string; updated_at: string; - }[]; - meta: object; - }>; - getTown: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }[]; + meta: object; + }>; + getTown: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; - }; - output: { + }; + output: { id: string; name: string; owner_user_id: string; created_at: string; updated_at: string; - }; - meta: object; - }>; - /** - * Check whether the current user is an admin viewing a town they don't own. - * Used by the frontend to show an admin banner. - */ - checkAdminAccess: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }; + meta: object; + }>; + /** + * Check whether the current user is an admin viewing a town they don't own. + * Used by the frontend to show an admin banner. + */ + checkAdminAccess: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; - }; - output: { + }; + output: { isAdminViewing: boolean; ownerUserId: string | null; ownerOrgId: string | null; - }; - meta: object; - }>; - deleteTown: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + meta: object; + }>; + deleteTown: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; - }; - output: void; - meta: object; - }>; - createRig: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + output: void; + meta: object; + }>; + createRig: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; name: string; gitUrl: string; defaultBranch?: string | undefined; platformIntegrationId?: string | undefined; - }; - output: { + }; + output: { id: string; town_id: string; name: string; @@ -1355,14 +81,14 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute platform_integration_id: string | null; created_at: string; updated_at: string; - }; - meta: object; - }>; - listRigs: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }; + meta: object; + }>; + listRigs: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; - }; - output: { + }; + output: { id: string; town_id: string; name: string; @@ -1371,15 +97,15 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute platform_integration_id: string | null; created_at: string; updated_at: string; - }[]; - meta: object; - }>; - getRig: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }[]; + meta: object; + }>; + getRig: import("@trpc/server").TRPCQueryProcedure<{ + input: { rigId: string; townId?: string | undefined; - }; - output: { + }; + output: { id: string; town_id: string; name: string; @@ -1389,141 +115,120 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute created_at: string; updated_at: string; agents: { - id: string; - rig_id: string | null; - role: string; - name: string; - identity: string; - status: string; - current_hook_bead_id: string | null; - dispatch_attempts: number; - last_activity_at: string | null; - checkpoint?: unknown; - created_at: string; - agent_status_message: string | null; - agent_status_updated_at: string | null; + id: string; + rig_id: string | null; + role: string; + name: string; + identity: string; + status: string; + current_hook_bead_id: string | null; + dispatch_attempts: number; + last_activity_at: string | null; + checkpoint?: unknown; + created_at: string; + agent_status_message: string | null; + agent_status_updated_at: string | null; }[]; beads: { - bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; - title: string; - body: string | null; - rig_id: string | null; - parent_bead_id: string | null; - assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; - labels: string[]; - metadata: Record; - created_by: string | null; - created_at: string; - updated_at: string; - closed_at: string | null; + bead_id: string; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; + title: string; + body: string | null; + rig_id: string | null; + parent_bead_id: string | null; + assignee_agent_bead_id: string | null; + priority: "critical" | "high" | "low" | "medium"; + labels: string[]; + metadata: Record; + created_by: string | null; + created_at: string; + updated_at: string; + closed_at: string | null; }[]; - }; - meta: object; - }>; - deleteRig: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + meta: object; + }>; + deleteRig: import("@trpc/server").TRPCMutationProcedure<{ + input: { rigId: string; - }; - output: void; - meta: object; - }>; - listBeads: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }; + output: void; + meta: object; + }>; + listBeads: import("@trpc/server").TRPCQueryProcedure<{ + input: { rigId: string; townId?: string | undefined; - status?: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open' | undefined; - }; - output: { + status?: "closed" | "failed" | "in_progress" | "in_review" | "open" | undefined; + }; + output: { bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; title: string; body: string | null; rig_id: string | null; parent_bead_id: string | null; assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; + priority: "critical" | "high" | "low" | "medium"; labels: string[]; metadata: Record; created_by: string | null; created_at: string; updated_at: string; closed_at: string | null; - }[]; - meta: object; - }>; - deleteBead: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }[]; + meta: object; + }>; + deleteBead: import("@trpc/server").TRPCMutationProcedure<{ + input: { rigId: string; beadId: string; townId?: string | undefined; - }; - output: void; - meta: object; - }>; - updateBead: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + output: void; + meta: object; + }>; + updateBead: import("@trpc/server").TRPCMutationProcedure<{ + input: { rigId: string; beadId: string; townId?: string | undefined; title?: string | undefined; body?: string | null | undefined; - status?: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open' | undefined; - priority?: 'critical' | 'high' | 'low' | 'medium' | undefined; + status?: "closed" | "failed" | "in_progress" | "in_review" | "open" | undefined; + priority?: "critical" | "high" | "low" | "medium" | undefined; labels?: string[] | undefined; metadata?: Record | undefined; rig_id?: string | null | undefined; parent_bead_id?: string | null | undefined; - }; - output: { + }; + output: { bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; title: string; body: string | null; rig_id: string | null; parent_bead_id: string | null; assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; + priority: "critical" | "high" | "low" | "medium"; labels: string[]; metadata: Record; created_by: string | null; created_at: string; updated_at: string; closed_at: string | null; - }; - meta: object; - }>; - listAgents: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }; + meta: object; + }>; + listAgents: import("@trpc/server").TRPCQueryProcedure<{ + input: { rigId: string; townId?: string | undefined; - }; - output: { + }; + output: { id: string; rig_id: string | null; role: string; @@ -1537,347 +242,327 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute created_at: string; agent_status_message: string | null; agent_status_updated_at: string | null; - }[]; - meta: object; - }>; - deleteAgent: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }[]; + meta: object; + }>; + deleteAgent: import("@trpc/server").TRPCMutationProcedure<{ + input: { rigId: string; agentId: string; townId?: string | undefined; - }; - output: void; - meta: object; - }>; - sling: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + output: void; + meta: object; + }>; + sling: import("@trpc/server").TRPCMutationProcedure<{ + input: { rigId: string; title: string; body?: string | undefined; model?: string | undefined; - }; - output: { + }; + output: { bead: { - bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; - title: string; - body: string | null; - rig_id: string | null; - parent_bead_id: string | null; - assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; - labels: string[]; - metadata: Record; - created_by: string | null; - created_at: string; - updated_at: string; - closed_at: string | null; + bead_id: string; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; + title: string; + body: string | null; + rig_id: string | null; + parent_bead_id: string | null; + assignee_agent_bead_id: string | null; + priority: "critical" | "high" | "low" | "medium"; + labels: string[]; + metadata: Record; + created_by: string | null; + created_at: string; + updated_at: string; + closed_at: string | null; }; agent: { - id: string; - rig_id: string | null; - role: string; - name: string; - identity: string; - status: string; - current_hook_bead_id: string | null; - dispatch_attempts: number; - last_activity_at: string | null; - checkpoint?: unknown; - created_at: string; - agent_status_message: string | null; - agent_status_updated_at: string | null; - }; - }; - meta: object; - }>; - sendMessage: import('@trpc/server').TRPCMutationProcedure<{ - input: { + id: string; + rig_id: string | null; + role: string; + name: string; + identity: string; + status: string; + current_hook_bead_id: string | null; + dispatch_attempts: number; + last_activity_at: string | null; + checkpoint?: unknown; + created_at: string; + agent_status_message: string | null; + agent_status_updated_at: string | null; + }; + }; + meta: object; + }>; + sendMessage: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; message: string; model?: string | undefined; rigId?: string | undefined; uiContext?: string | undefined; - }; - output: { + }; + output: { agentId: string; - sessionStatus: 'active' | 'idle' | 'starting'; - }; - meta: object; - }>; - getMayorStatus: import('@trpc/server').TRPCQueryProcedure<{ - input: { + sessionStatus: "active" | "idle" | "starting"; + }; + meta: object; + }>; + getMayorStatus: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; - }; - output: { + }; + output: { configured: boolean; townId: string | null; session: { - agentId: string; - sessionId: string; - status: 'active' | 'idle' | 'starting'; - lastActivityAt: string; + agentId: string; + sessionId: string; + status: "active" | "idle" | "starting"; + lastActivityAt: string; } | null; - }; - meta: object; - }>; - getAlarmStatus: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }; + meta: object; + }>; + getAlarmStatus: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; - }; - output: { + }; + output: { alarm: { - nextFireAt: string | null; - intervalMs: number; - intervalLabel: string; + nextFireAt: string | null; + intervalMs: number; + intervalLabel: string; }; agents: { - working: number; - idle: number; - stalled: number; - dead: number; - total: number; + working: number; + idle: number; + stalled: number; + dead: number; + total: number; }; beads: { - open: number; - inProgress: number; - inReview: number; - failed: number; - triageRequests: number; + open: number; + inProgress: number; + inReview: number; + failed: number; + triageRequests: number; }; patrol: { - guppWarnings: number; - guppEscalations: number; - stalledAgents: number; - orphanedHooks: number; + guppWarnings: number; + guppEscalations: number; + stalledAgents: number; + orphanedHooks: number; }; recentEvents: { - time: string; - type: string; - message: string; + time: string; + type: string; + message: string; }[]; - }; - meta: object; - }>; - ensureMayor: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + meta: object; + }>; + ensureMayor: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; - }; - output: { + }; + output: { agentId: string; - sessionStatus: 'active' | 'idle' | 'starting'; - }; - meta: object; - }>; - getAgentStreamUrl: import('@trpc/server').TRPCQueryProcedure<{ - input: { + sessionStatus: "active" | "idle" | "starting"; + }; + meta: object; + }>; + getAgentStreamUrl: import("@trpc/server").TRPCQueryProcedure<{ + input: { agentId: string; townId: string; - }; - output: { + }; + output: { url: string; ticket: string; - }; - meta: object; - }>; - createPtySession: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + meta: object; + }>; + createPtySession: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; agentId: string; - }; - output: { + }; + output: { pty: { - [x: string]: unknown; - id: string; + [x: string]: unknown; + id: string; }; wsUrl: string; - }; - meta: object; - }>; - resizePtySession: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + meta: object; + }>; + resizePtySession: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; agentId: string; ptyId: string; cols: number; rows: number; - }; - output: void; - meta: object; - }>; - getTownConfig: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }; + output: void; + meta: object; + }>; + getTownConfig: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; - }; - output: { + }; + output: { env_vars: Record; git_auth: { - github_token?: string | undefined; - gitlab_token?: string | undefined; - gitlab_instance_url?: string | undefined; - platform_integration_id?: string | undefined; + github_token?: string | undefined; + gitlab_token?: string | undefined; + gitlab_instance_url?: string | undefined; + platform_integration_id?: string | undefined; }; owner_user_id?: string | undefined; - owner_type: 'org' | 'user'; + owner_type: "org" | "user"; owner_id?: string | undefined; created_by_user_id?: string | undefined; organization_id?: string | undefined; kilocode_token?: string | undefined; default_model?: string | undefined; - role_models?: - | { - mayor?: string | undefined; - refinery?: string | undefined; - polecat?: string | undefined; - } - | undefined; + role_models?: { + mayor?: string | undefined; + refinery?: string | undefined; + polecat?: string | undefined; + } | undefined; small_model?: string | undefined; max_polecats_per_rig?: number | undefined; - merge_strategy: 'direct' | 'pr'; - refinery?: - | { - gates: string[]; - auto_merge: boolean; - require_clean_merge: boolean; - } - | undefined; + merge_strategy: "direct" | "pr"; + refinery?: { + gates: string[]; + auto_merge: boolean; + require_clean_merge: boolean; + } | undefined; alarm_interval_active?: number | undefined; alarm_interval_idle?: number | undefined; - container?: - | { - sleep_after_minutes?: number | undefined; - } - | undefined; + container?: { + sleep_after_minutes?: number | undefined; + } | undefined; staged_convoys_default: boolean; github_cli_pat?: string | undefined; git_author_name?: string | undefined; git_author_email?: string | undefined; disable_ai_coauthor: boolean; - }; - meta: object; - }>; - updateTownConfig: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + meta: object; + }>; + updateTownConfig: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; config: { - env_vars?: Record | undefined; - git_auth?: - | { + env_vars?: Record | undefined; + git_auth?: { github_token?: string | undefined; gitlab_token?: string | undefined; gitlab_instance_url?: string | undefined; platform_integration_id?: string | undefined; - } - | undefined; - owner_user_id?: string | undefined; - owner_type?: 'org' | 'user' | undefined; - owner_id?: string | undefined; - created_by_user_id?: string | undefined; - organization_id?: string | undefined; - kilocode_token?: string | undefined; - default_model?: string | undefined; - role_models?: - | { + } | undefined; + owner_user_id?: string | undefined; + owner_type?: "org" | "user" | undefined; + owner_id?: string | undefined; + created_by_user_id?: string | undefined; + organization_id?: string | undefined; + kilocode_token?: string | undefined; + default_model?: string | undefined; + role_models?: { mayor?: string | undefined; refinery?: string | undefined; polecat?: string | undefined; - } - | undefined; - small_model?: string | undefined; - max_polecats_per_rig?: number | undefined; - merge_strategy?: 'direct' | 'pr' | undefined; - refinery?: - | { + } | undefined; + small_model?: string | undefined; + max_polecats_per_rig?: number | undefined; + merge_strategy?: "direct" | "pr" | undefined; + refinery?: { gates?: string[] | undefined; auto_merge?: boolean | undefined; require_clean_merge?: boolean | undefined; - } - | undefined; - alarm_interval_active?: number | undefined; - alarm_interval_idle?: number | undefined; - container?: - | { + } | undefined; + alarm_interval_active?: number | undefined; + alarm_interval_idle?: number | undefined; + container?: { sleep_after_minutes?: number | undefined; - } - | undefined; - staged_convoys_default?: boolean | undefined; - github_cli_pat?: string | undefined; - git_author_name?: string | undefined; - git_author_email?: string | undefined; - disable_ai_coauthor?: boolean | undefined; - }; - }; - output: { + } | undefined; + staged_convoys_default?: boolean | undefined; + github_cli_pat?: string | undefined; + git_author_name?: string | undefined; + git_author_email?: string | undefined; + disable_ai_coauthor?: boolean | undefined; + }; + }; + output: { env_vars: Record; git_auth: { - github_token?: string | undefined; - gitlab_token?: string | undefined; - gitlab_instance_url?: string | undefined; - platform_integration_id?: string | undefined; + github_token?: string | undefined; + gitlab_token?: string | undefined; + gitlab_instance_url?: string | undefined; + platform_integration_id?: string | undefined; }; owner_user_id?: string | undefined; - owner_type: 'org' | 'user'; + owner_type: "org" | "user"; owner_id?: string | undefined; created_by_user_id?: string | undefined; organization_id?: string | undefined; kilocode_token?: string | undefined; default_model?: string | undefined; - role_models?: - | { - mayor?: string | undefined; - refinery?: string | undefined; - polecat?: string | undefined; - } - | undefined; + role_models?: { + mayor?: string | undefined; + refinery?: string | undefined; + polecat?: string | undefined; + } | undefined; small_model?: string | undefined; max_polecats_per_rig?: number | undefined; - merge_strategy: 'direct' | 'pr'; - refinery?: - | { - gates: string[]; - auto_merge: boolean; - require_clean_merge: boolean; - } - | undefined; + merge_strategy: "direct" | "pr"; + refinery?: { + gates: string[]; + auto_merge: boolean; + require_clean_merge: boolean; + } | undefined; alarm_interval_active?: number | undefined; alarm_interval_idle?: number | undefined; - container?: - | { - sleep_after_minutes?: number | undefined; - } - | undefined; + container?: { + sleep_after_minutes?: number | undefined; + } | undefined; staged_convoys_default: boolean; github_cli_pat?: string | undefined; git_author_name?: string | undefined; git_author_email?: string | undefined; disable_ai_coauthor: boolean; - }; - meta: object; - }>; - refreshContainerToken: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + meta: object; + }>; + refreshContainerToken: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; - }; - output: void; - meta: object; - }>; - getBeadEvents: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }; + output: void; + meta: object; + }>; + forceRestartContainer: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + }; + output: void; + meta: object; + }>; + getBeadEvents: import("@trpc/server").TRPCQueryProcedure<{ + input: { rigId: string; townId?: string | undefined; beadId?: string | undefined; since?: string | undefined; limit?: number | undefined; - }; - output: { + }; + output: { bead_event_id: string; bead_id: string; agent_id: string | null; @@ -1888,16 +573,16 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute created_at: string; rig_id?: string | undefined; rig_name?: string | undefined; - }[]; - meta: object; - }>; - getTownEvents: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }[]; + meta: object; + }>; + getTownEvents: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; since?: string | undefined; limit?: number | undefined; - }; - output: { + }; + output: { bead_event_id: string; bead_id: string; agent_id: string | null; @@ -1908,198 +593,198 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute created_at: string; rig_id?: string | undefined; rig_name?: string | undefined; - }[]; - meta: object; - }>; - getMergeQueueData: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }[]; + meta: object; + }>; + getMergeQueueData: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; rigId?: string | undefined; limit?: number | undefined; since?: string | undefined; - }; - output: { + }; + output: { needsAttention: { - openPRs: { - mrBead: { - bead_id: string; - status: string; - title: string; - body: string | null; - rig_id: string | null; - created_at: string; - updated_at: string; - metadata: Record; - }; - reviewMetadata: { - branch: string; - target_branch: string; - merge_commit: string | null; - pr_url: string | null; - retry_count: number; + openPRs: { + mrBead: { + bead_id: string; + status: string; + title: string; + body: string | null; + rig_id: string | null; + created_at: string; + updated_at: string; + metadata: Record; + }; + reviewMetadata: { + branch: string; + target_branch: string; + merge_commit: string | null; + pr_url: string | null; + retry_count: number; + }; + sourceBead: { + bead_id: string; + title: string; + status: string; + body: string | null; + } | null; + convoy: { + convoy_id: string; + title: string; + total_beads: number; + closed_beads: number; + feature_branch: string | null; + merge_mode: string | null; + } | null; + agent: { + agent_id: string; + name: string; + role: string; + } | null; + rigName: string | null; + staleSince: string | null; + failureReason: string | null; + }[]; + failedReviews: { + mrBead: { + bead_id: string; + status: string; + title: string; + body: string | null; + rig_id: string | null; + created_at: string; + updated_at: string; + metadata: Record; + }; + reviewMetadata: { + branch: string; + target_branch: string; + merge_commit: string | null; + pr_url: string | null; + retry_count: number; + }; + sourceBead: { + bead_id: string; + title: string; + status: string; + body: string | null; + } | null; + convoy: { + convoy_id: string; + title: string; + total_beads: number; + closed_beads: number; + feature_branch: string | null; + merge_mode: string | null; + } | null; + agent: { + agent_id: string; + name: string; + role: string; + } | null; + rigName: string | null; + staleSince: string | null; + failureReason: string | null; + }[]; + stalePRs: { + mrBead: { + bead_id: string; + status: string; + title: string; + body: string | null; + rig_id: string | null; + created_at: string; + updated_at: string; + metadata: Record; + }; + reviewMetadata: { + branch: string; + target_branch: string; + merge_commit: string | null; + pr_url: string | null; + retry_count: number; + }; + sourceBead: { + bead_id: string; + title: string; + status: string; + body: string | null; + } | null; + convoy: { + convoy_id: string; + title: string; + total_beads: number; + closed_beads: number; + feature_branch: string | null; + merge_mode: string | null; + } | null; + agent: { + agent_id: string; + name: string; + role: string; + } | null; + rigName: string | null; + staleSince: string | null; + failureReason: string | null; + }[]; + }; + activityLog: { + event: { + bead_event_id: string; + bead_id: string; + agent_id: string | null; + event_type: string; + old_value: string | null; + new_value: string | null; + metadata: Record; + created_at: string; }; - sourceBead: { - bead_id: string; - title: string; - status: string; - body: string | null; - } | null; - convoy: { - convoy_id: string; - title: string; - total_beads: number; - closed_beads: number; - feature_branch: string | null; - merge_mode: string | null; - } | null; - agent: { - agent_id: string; - name: string; - role: string; - } | null; - rigName: string | null; - staleSince: string | null; - failureReason: string | null; - }[]; - failedReviews: { mrBead: { - bead_id: string; - status: string; - title: string; - body: string | null; - rig_id: string | null; - created_at: string; - updated_at: string; - metadata: Record; - }; - reviewMetadata: { - branch: string; - target_branch: string; - merge_commit: string | null; - pr_url: string | null; - retry_count: number; - }; + bead_id: string; + title: string; + type: string; + status: string; + rig_id: string | null; + metadata: Record; + } | null; sourceBead: { - bead_id: string; - title: string; - status: string; - body: string | null; + bead_id: string; + title: string; + status: string; } | null; convoy: { - convoy_id: string; - title: string; - total_beads: number; - closed_beads: number; - feature_branch: string | null; - merge_mode: string | null; + convoy_id: string; + title: string; + total_beads: number; + closed_beads: number; + feature_branch: string | null; + merge_mode: string | null; } | null; agent: { - agent_id: string; - name: string; - role: string; + agent_id: string; + name: string; + role: string; } | null; rigName: string | null; - staleSince: string | null; - failureReason: string | null; - }[]; - stalePRs: { - mrBead: { - bead_id: string; - status: string; - title: string; - body: string | null; - rig_id: string | null; - created_at: string; - updated_at: string; - metadata: Record; - }; reviewMetadata: { - branch: string; - target_branch: string; - merge_commit: string | null; - pr_url: string | null; - retry_count: number; - }; - sourceBead: { - bead_id: string; - title: string; - status: string; - body: string | null; - } | null; - convoy: { - convoy_id: string; - title: string; - total_beads: number; - closed_beads: number; - feature_branch: string | null; - merge_mode: string | null; + pr_url: string | null; + branch: string | null; + target_branch: string | null; + merge_commit: string | null; } | null; - agent: { - agent_id: string; - name: string; - role: string; - } | null; - rigName: string | null; - staleSince: string | null; - failureReason: string | null; - }[]; - }; - activityLog: { - event: { - bead_event_id: string; - bead_id: string; - agent_id: string | null; - event_type: string; - old_value: string | null; - new_value: string | null; - metadata: Record; - created_at: string; - }; - mrBead: { - bead_id: string; - title: string; - type: string; - status: string; - rig_id: string | null; - metadata: Record; - } | null; - sourceBead: { - bead_id: string; - title: string; - status: string; - } | null; - convoy: { - convoy_id: string; - title: string; - total_beads: number; - closed_beads: number; - feature_branch: string | null; - merge_mode: string | null; - } | null; - agent: { - agent_id: string; - name: string; - role: string; - } | null; - rigName: string | null; - reviewMetadata: { - pr_url: string | null; - branch: string | null; - target_branch: string | null; - merge_commit: string | null; - } | null; }[]; - }; - meta: object; - }>; - listConvoys: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }; + meta: object; + }>; + listConvoys: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; - }; - output: { + }; + output: { id: string; title: string; - status: 'active' | 'landed'; + status: "active" | "landed"; staged: boolean; total_beads: number; closed_beads: number; @@ -2109,28 +794,28 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute feature_branch: string | null; merge_mode: string | null; beads: { - bead_id: string; - title: string; - status: string; - rig_id: string | null; - assignee_agent_name: string | null; + bead_id: string; + title: string; + status: string; + rig_id: string | null; + assignee_agent_name: string | null; }[]; dependency_edges: { - bead_id: string; - depends_on_bead_id: string; + bead_id: string; + depends_on_bead_id: string; }[]; - }[]; - meta: object; - }>; - getConvoy: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }[]; + meta: object; + }>; + getConvoy: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; convoyId: string; - }; - output: { + }; + output: { id: string; title: string; - status: 'active' | 'landed'; + status: "active" | "landed"; staged: boolean; total_beads: number; closed_beads: number; @@ -2140,28 +825,28 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute feature_branch: string | null; merge_mode: string | null; beads: { - bead_id: string; - title: string; - status: string; - rig_id: string | null; - assignee_agent_name: string | null; + bead_id: string; + title: string; + status: string; + rig_id: string | null; + assignee_agent_name: string | null; }[]; dependency_edges: { - bead_id: string; - depends_on_bead_id: string; + bead_id: string; + depends_on_bead_id: string; }[]; - } | null; - meta: object; - }>; - closeConvoy: import('@trpc/server').TRPCMutationProcedure<{ - input: { + } | null; + meta: object; + }>; + closeConvoy: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; convoyId: string; - }; - output: { + }; + output: { id: string; title: string; - status: 'active' | 'landed'; + status: "active" | "landed"; staged: boolean; total_beads: number; closed_beads: number; @@ -2171,28 +856,28 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute feature_branch: string | null; merge_mode: string | null; beads: { - bead_id: string; - title: string; - status: string; - rig_id: string | null; - assignee_agent_name: string | null; + bead_id: string; + title: string; + status: string; + rig_id: string | null; + assignee_agent_name: string | null; }[]; dependency_edges: { - bead_id: string; - depends_on_bead_id: string; + bead_id: string; + depends_on_bead_id: string; }[]; - } | null; - meta: object; - }>; - startConvoy: import('@trpc/server').TRPCMutationProcedure<{ - input: { + } | null; + meta: object; + }>; + startConvoy: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; convoyId: string; - }; - output: { + }; + output: { id: string; title: string; - status: 'active' | 'landed'; + status: "active" | "landed"; staged: boolean; total_beads: number; closed_beads: number; @@ -2202,62 +887,62 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute feature_branch: string | null; merge_mode: string | null; beads: { - bead_id: string; - title: string; - status: string; - rig_id: string | null; - assignee_agent_name: string | null; + bead_id: string; + title: string; + status: string; + rig_id: string | null; + assignee_agent_name: string | null; }[]; dependency_edges: { - bead_id: string; - depends_on_bead_id: string; + bead_id: string; + depends_on_bead_id: string; }[]; - } | null; - meta: object; - }>; - listOrgTowns: import('@trpc/server').TRPCQueryProcedure<{ - input: { + } | null; + meta: object; + }>; + listOrgTowns: import("@trpc/server").TRPCQueryProcedure<{ + input: { organizationId: string; - }; - output: { + }; + output: { id: string; name: string; owner_org_id: string; created_by_user_id: string; created_at: string; updated_at: string; - }[]; - meta: object; - }>; - createOrgTown: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }[]; + meta: object; + }>; + createOrgTown: import("@trpc/server").TRPCMutationProcedure<{ + input: { organizationId: string; name: string; - }; - output: { + }; + output: { id: string; name: string; owner_org_id: string; created_by_user_id: string; created_at: string; updated_at: string; - }; - meta: object; - }>; - deleteOrgTown: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + meta: object; + }>; + deleteOrgTown: import("@trpc/server").TRPCMutationProcedure<{ + input: { organizationId: string; townId: string; - }; - output: void; - meta: object; - }>; - listOrgRigs: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }; + output: void; + meta: object; + }>; + listOrgRigs: import("@trpc/server").TRPCQueryProcedure<{ + input: { organizationId: string; townId: string; - }; - output: { + }; + output: { id: string; town_id: string; name: string; @@ -2266,19 +951,19 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute platform_integration_id: string | null; created_at: string; updated_at: string; - }[]; - meta: object; - }>; - createOrgRig: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }[]; + meta: object; + }>; + createOrgRig: import("@trpc/server").TRPCMutationProcedure<{ + input: { organizationId: string; townId: string; name: string; gitUrl: string; defaultBranch?: string | undefined; platformIntegrationId?: string | undefined; - }; - output: { + }; + output: { id: string; town_id: string; name: string; @@ -2287,55 +972,40 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute platform_integration_id: string | null; created_at: string; updated_at: string; - }; - meta: object; - }>; - adminListBeads: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }; + meta: object; + }>; + adminListBeads: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; - status?: 'closed' | 'failed' | 'in_progress' | 'open' | undefined; - type?: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule' - | undefined; + status?: "closed" | "failed" | "in_progress" | "open" | undefined; + type?: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule" | undefined; limit?: number | undefined; - }; - output: { + }; + output: { bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; title: string; body: string | null; rig_id: string | null; parent_bead_id: string | null; assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; + priority: "critical" | "high" | "low" | "medium"; labels: string[]; metadata: Record; created_by: string | null; created_at: string; updated_at: string; closed_at: string | null; - }[]; - meta: object; - }>; - adminListAgents: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }[]; + meta: object; + }>; + adminListAgents: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; - }; - output: { + }; + output: { id: string; rig_id: string | null; role: string; @@ -2349,132 +1019,118 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute created_at: string; agent_status_message: string | null; agent_status_updated_at: string | null; - }[]; - meta: object; - }>; - adminForceRestartContainer: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }[]; + meta: object; + }>; + adminForceRestartContainer: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; - }; - output: void; - meta: object; - }>; - adminForceResetAgent: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + output: void; + meta: object; + }>; + adminForceResetAgent: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; agentId: string; - }; - output: void; - meta: object; - }>; - adminForceCloseBead: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + output: void; + meta: object; + }>; + adminForceCloseBead: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; beadId: string; - }; - output: { + }; + output: { bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; title: string; body: string | null; rig_id: string | null; parent_bead_id: string | null; assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; + priority: "critical" | "high" | "low" | "medium"; labels: string[]; metadata: Record; created_by: string | null; created_at: string; updated_at: string; closed_at: string | null; - }; - meta: object; - }>; - adminForceFailBead: import('@trpc/server').TRPCMutationProcedure<{ - input: { + }; + meta: object; + }>; + adminForceFailBead: import("@trpc/server").TRPCMutationProcedure<{ + input: { townId: string; beadId: string; - }; - output: { + }; + output: { bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; title: string; body: string | null; rig_id: string | null; parent_bead_id: string | null; assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; + priority: "critical" | "high" | "low" | "medium"; labels: string[]; metadata: Record; created_by: string | null; created_at: string; updated_at: string; closed_at: string | null; - }; - meta: object; - }>; - adminGetAlarmStatus: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }; + meta: object; + }>; + adminGetAlarmStatus: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; - }; - output: { + }; + output: { alarm: { - nextFireAt: string | null; - intervalMs: number; - intervalLabel: string; + nextFireAt: string | null; + intervalMs: number; + intervalLabel: string; }; agents: { - working: number; - idle: number; - stalled: number; - dead: number; - total: number; + working: number; + idle: number; + stalled: number; + dead: number; + total: number; }; beads: { - open: number; - inProgress: number; - inReview: number; - failed: number; - triageRequests: number; + open: number; + inProgress: number; + inReview: number; + failed: number; + triageRequests: number; }; patrol: { - guppWarnings: number; - guppEscalations: number; - stalledAgents: number; - orphanedHooks: number; + guppWarnings: number; + guppEscalations: number; + stalledAgents: number; + orphanedHooks: number; }; recentEvents: { - time: string; - type: string; - message: string; + time: string; + type: string; + message: string; }[]; - }; - meta: object; - }>; - adminGetTownEvents: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }; + meta: object; + }>; + adminGetTownEvents: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; beadId?: string | undefined; since?: string | undefined; limit?: number | undefined; - }; - output: { + }; + output: { bead_event_id: string; bead_id: string; agent_id: string | null; @@ -2485,49 +1141,1230 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute created_at: string; rig_id?: string | undefined; rig_name?: string | undefined; - }[]; - meta: object; - }>; - adminGetBead: import('@trpc/server').TRPCQueryProcedure<{ - input: { + }[]; + meta: object; + }>; + adminGetBead: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; beadId: string; - }; - output: { + }; + output: { bead_id: string; - type: - | 'agent' - | 'convoy' - | 'escalation' - | 'issue' - | 'merge_request' - | 'message' - | 'molecule'; - status: 'closed' | 'failed' | 'in_progress' | 'in_review' | 'open'; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; title: string; body: string | null; rig_id: string | null; parent_bead_id: string | null; assignee_agent_bead_id: string | null; - priority: 'critical' | 'high' | 'low' | 'medium'; + priority: "critical" | "high" | "low" | "medium"; labels: string[]; metadata: Record; created_by: string | null; created_at: string; updated_at: string; closed_at: string | null; - } | null; - meta: object; - }>; - debugAgentMetadata: import('@trpc/server').TRPCQueryProcedure<{ - input: { + } | null; + meta: object; + }>; + debugAgentMetadata: import("@trpc/server").TRPCQueryProcedure<{ + input: { townId: string; - }; - output: never; - meta: object; + }; + output: never; + meta: object; + }>; +}>>; +export type GastownRouter = typeof gastownRouter; +/** + * Wrapped router that nests gastownRouter under a `gastown` key. + * This preserves the `trpc.gastown.X` call pattern on the frontend, + * matching the existing RootRouter shape so components don't need + * to change their procedure paths. + */ +export declare const wrappedGastownRouter: import("@trpc/server").TRPCBuiltRouter<{ + ctx: TRPCContext; + meta: object; + errorShape: import("@trpc/server").TRPCDefaultErrorShape; + transformer: false; +}, import("@trpc/server").TRPCDecorateCreateRouterOptions<{ + gastown: import("@trpc/server").TRPCBuiltRouter<{ + ctx: TRPCContext; + meta: object; + errorShape: import("@trpc/server").TRPCDefaultErrorShape; + transformer: false; + }, import("@trpc/server").TRPCDecorateCreateRouterOptions<{ + createTown: import("@trpc/server").TRPCMutationProcedure<{ + input: { + name: string; + }; + output: { + id: string; + name: string; + owner_user_id: string; + created_at: string; + updated_at: string; + }; + meta: object; + }>; + listTowns: import("@trpc/server").TRPCQueryProcedure<{ + input: void; + output: { + id: string; + name: string; + owner_user_id: string; + created_at: string; + updated_at: string; + }[]; + meta: object; + }>; + getTown: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: { + id: string; + name: string; + owner_user_id: string; + created_at: string; + updated_at: string; + }; + meta: object; + }>; + /** + * Check whether the current user is an admin viewing a town they don't own. + * Used by the frontend to show an admin banner. + */ + checkAdminAccess: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: { + isAdminViewing: boolean; + ownerUserId: string | null; + ownerOrgId: string | null; + }; + meta: object; + }>; + deleteTown: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + }; + output: void; + meta: object; + }>; + createRig: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + name: string; + gitUrl: string; + defaultBranch?: string | undefined; + platformIntegrationId?: string | undefined; + }; + output: { + id: string; + town_id: string; + name: string; + git_url: string; + default_branch: string; + platform_integration_id: string | null; + created_at: string; + updated_at: string; + }; + meta: object; + }>; + listRigs: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: { + id: string; + town_id: string; + name: string; + git_url: string; + default_branch: string; + platform_integration_id: string | null; + created_at: string; + updated_at: string; + }[]; + meta: object; + }>; + getRig: import("@trpc/server").TRPCQueryProcedure<{ + input: { + rigId: string; + townId?: string | undefined; + }; + output: { + id: string; + town_id: string; + name: string; + git_url: string; + default_branch: string; + platform_integration_id: string | null; + created_at: string; + updated_at: string; + agents: { + id: string; + rig_id: string | null; + role: string; + name: string; + identity: string; + status: string; + current_hook_bead_id: string | null; + dispatch_attempts: number; + last_activity_at: string | null; + checkpoint?: unknown; + created_at: string; + agent_status_message: string | null; + agent_status_updated_at: string | null; + }[]; + beads: { + bead_id: string; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; + title: string; + body: string | null; + rig_id: string | null; + parent_bead_id: string | null; + assignee_agent_bead_id: string | null; + priority: "critical" | "high" | "low" | "medium"; + labels: string[]; + metadata: Record; + created_by: string | null; + created_at: string; + updated_at: string; + closed_at: string | null; + }[]; + }; + meta: object; + }>; + deleteRig: import("@trpc/server").TRPCMutationProcedure<{ + input: { + rigId: string; + }; + output: void; + meta: object; + }>; + listBeads: import("@trpc/server").TRPCQueryProcedure<{ + input: { + rigId: string; + townId?: string | undefined; + status?: "closed" | "failed" | "in_progress" | "in_review" | "open" | undefined; + }; + output: { + bead_id: string; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; + title: string; + body: string | null; + rig_id: string | null; + parent_bead_id: string | null; + assignee_agent_bead_id: string | null; + priority: "critical" | "high" | "low" | "medium"; + labels: string[]; + metadata: Record; + created_by: string | null; + created_at: string; + updated_at: string; + closed_at: string | null; + }[]; + meta: object; + }>; + deleteBead: import("@trpc/server").TRPCMutationProcedure<{ + input: { + rigId: string; + beadId: string; + townId?: string | undefined; + }; + output: void; + meta: object; + }>; + updateBead: import("@trpc/server").TRPCMutationProcedure<{ + input: { + rigId: string; + beadId: string; + townId?: string | undefined; + title?: string | undefined; + body?: string | null | undefined; + status?: "closed" | "failed" | "in_progress" | "in_review" | "open" | undefined; + priority?: "critical" | "high" | "low" | "medium" | undefined; + labels?: string[] | undefined; + metadata?: Record | undefined; + rig_id?: string | null | undefined; + parent_bead_id?: string | null | undefined; + }; + output: { + bead_id: string; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; + title: string; + body: string | null; + rig_id: string | null; + parent_bead_id: string | null; + assignee_agent_bead_id: string | null; + priority: "critical" | "high" | "low" | "medium"; + labels: string[]; + metadata: Record; + created_by: string | null; + created_at: string; + updated_at: string; + closed_at: string | null; + }; + meta: object; + }>; + listAgents: import("@trpc/server").TRPCQueryProcedure<{ + input: { + rigId: string; + townId?: string | undefined; + }; + output: { + id: string; + rig_id: string | null; + role: string; + name: string; + identity: string; + status: string; + current_hook_bead_id: string | null; + dispatch_attempts: number; + last_activity_at: string | null; + checkpoint?: unknown; + created_at: string; + agent_status_message: string | null; + agent_status_updated_at: string | null; + }[]; + meta: object; + }>; + deleteAgent: import("@trpc/server").TRPCMutationProcedure<{ + input: { + rigId: string; + agentId: string; + townId?: string | undefined; + }; + output: void; + meta: object; + }>; + sling: import("@trpc/server").TRPCMutationProcedure<{ + input: { + rigId: string; + title: string; + body?: string | undefined; + model?: string | undefined; + }; + output: { + bead: { + bead_id: string; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; + title: string; + body: string | null; + rig_id: string | null; + parent_bead_id: string | null; + assignee_agent_bead_id: string | null; + priority: "critical" | "high" | "low" | "medium"; + labels: string[]; + metadata: Record; + created_by: string | null; + created_at: string; + updated_at: string; + closed_at: string | null; + }; + agent: { + id: string; + rig_id: string | null; + role: string; + name: string; + identity: string; + status: string; + current_hook_bead_id: string | null; + dispatch_attempts: number; + last_activity_at: string | null; + checkpoint?: unknown; + created_at: string; + agent_status_message: string | null; + agent_status_updated_at: string | null; + }; + }; + meta: object; + }>; + sendMessage: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + message: string; + model?: string | undefined; + rigId?: string | undefined; + uiContext?: string | undefined; + }; + output: { + agentId: string; + sessionStatus: "active" | "idle" | "starting"; + }; + meta: object; + }>; + getMayorStatus: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: { + configured: boolean; + townId: string | null; + session: { + agentId: string; + sessionId: string; + status: "active" | "idle" | "starting"; + lastActivityAt: string; + } | null; + }; + meta: object; + }>; + getAlarmStatus: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: { + alarm: { + nextFireAt: string | null; + intervalMs: number; + intervalLabel: string; + }; + agents: { + working: number; + idle: number; + stalled: number; + dead: number; + total: number; + }; + beads: { + open: number; + inProgress: number; + inReview: number; + failed: number; + triageRequests: number; + }; + patrol: { + guppWarnings: number; + guppEscalations: number; + stalledAgents: number; + orphanedHooks: number; + }; + recentEvents: { + time: string; + type: string; + message: string; + }[]; + }; + meta: object; + }>; + ensureMayor: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + }; + output: { + agentId: string; + sessionStatus: "active" | "idle" | "starting"; + }; + meta: object; + }>; + getAgentStreamUrl: import("@trpc/server").TRPCQueryProcedure<{ + input: { + agentId: string; + townId: string; + }; + output: { + url: string; + ticket: string; + }; + meta: object; + }>; + createPtySession: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + agentId: string; + }; + output: { + pty: { + [x: string]: unknown; + id: string; + }; + wsUrl: string; + }; + meta: object; + }>; + resizePtySession: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + agentId: string; + ptyId: string; + cols: number; + rows: number; + }; + output: void; + meta: object; + }>; + getTownConfig: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: { + env_vars: Record; + git_auth: { + github_token?: string | undefined; + gitlab_token?: string | undefined; + gitlab_instance_url?: string | undefined; + platform_integration_id?: string | undefined; + }; + owner_user_id?: string | undefined; + owner_type: "org" | "user"; + owner_id?: string | undefined; + created_by_user_id?: string | undefined; + organization_id?: string | undefined; + kilocode_token?: string | undefined; + default_model?: string | undefined; + role_models?: { + mayor?: string | undefined; + refinery?: string | undefined; + polecat?: string | undefined; + } | undefined; + small_model?: string | undefined; + max_polecats_per_rig?: number | undefined; + merge_strategy: "direct" | "pr"; + refinery?: { + gates: string[]; + auto_merge: boolean; + require_clean_merge: boolean; + } | undefined; + alarm_interval_active?: number | undefined; + alarm_interval_idle?: number | undefined; + container?: { + sleep_after_minutes?: number | undefined; + } | undefined; + staged_convoys_default: boolean; + github_cli_pat?: string | undefined; + git_author_name?: string | undefined; + git_author_email?: string | undefined; + disable_ai_coauthor: boolean; + }; + meta: object; + }>; + updateTownConfig: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + config: { + env_vars?: Record | undefined; + git_auth?: { + github_token?: string | undefined; + gitlab_token?: string | undefined; + gitlab_instance_url?: string | undefined; + platform_integration_id?: string | undefined; + } | undefined; + owner_user_id?: string | undefined; + owner_type?: "org" | "user" | undefined; + owner_id?: string | undefined; + created_by_user_id?: string | undefined; + organization_id?: string | undefined; + kilocode_token?: string | undefined; + default_model?: string | undefined; + role_models?: { + mayor?: string | undefined; + refinery?: string | undefined; + polecat?: string | undefined; + } | undefined; + small_model?: string | undefined; + max_polecats_per_rig?: number | undefined; + merge_strategy?: "direct" | "pr" | undefined; + refinery?: { + gates?: string[] | undefined; + auto_merge?: boolean | undefined; + require_clean_merge?: boolean | undefined; + } | undefined; + alarm_interval_active?: number | undefined; + alarm_interval_idle?: number | undefined; + container?: { + sleep_after_minutes?: number | undefined; + } | undefined; + staged_convoys_default?: boolean | undefined; + github_cli_pat?: string | undefined; + git_author_name?: string | undefined; + git_author_email?: string | undefined; + disable_ai_coauthor?: boolean | undefined; + }; + }; + output: { + env_vars: Record; + git_auth: { + github_token?: string | undefined; + gitlab_token?: string | undefined; + gitlab_instance_url?: string | undefined; + platform_integration_id?: string | undefined; + }; + owner_user_id?: string | undefined; + owner_type: "org" | "user"; + owner_id?: string | undefined; + created_by_user_id?: string | undefined; + organization_id?: string | undefined; + kilocode_token?: string | undefined; + default_model?: string | undefined; + role_models?: { + mayor?: string | undefined; + refinery?: string | undefined; + polecat?: string | undefined; + } | undefined; + small_model?: string | undefined; + max_polecats_per_rig?: number | undefined; + merge_strategy: "direct" | "pr"; + refinery?: { + gates: string[]; + auto_merge: boolean; + require_clean_merge: boolean; + } | undefined; + alarm_interval_active?: number | undefined; + alarm_interval_idle?: number | undefined; + container?: { + sleep_after_minutes?: number | undefined; + } | undefined; + staged_convoys_default: boolean; + github_cli_pat?: string | undefined; + git_author_name?: string | undefined; + git_author_email?: string | undefined; + disable_ai_coauthor: boolean; + }; + meta: object; + }>; + refreshContainerToken: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + }; + output: void; + meta: object; + }>; + forceRestartContainer: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + }; + output: void; + meta: object; + }>; + getBeadEvents: import("@trpc/server").TRPCQueryProcedure<{ + input: { + rigId: string; + townId?: string | undefined; + beadId?: string | undefined; + since?: string | undefined; + limit?: number | undefined; + }; + output: { + bead_event_id: string; + bead_id: string; + agent_id: string | null; + event_type: string; + old_value: string | null; + new_value: string | null; + metadata: Record; + created_at: string; + rig_id?: string | undefined; + rig_name?: string | undefined; + }[]; + meta: object; + }>; + getTownEvents: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + since?: string | undefined; + limit?: number | undefined; + }; + output: { + bead_event_id: string; + bead_id: string; + agent_id: string | null; + event_type: string; + old_value: string | null; + new_value: string | null; + metadata: Record; + created_at: string; + rig_id?: string | undefined; + rig_name?: string | undefined; + }[]; + meta: object; + }>; + getMergeQueueData: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + rigId?: string | undefined; + limit?: number | undefined; + since?: string | undefined; + }; + output: { + needsAttention: { + openPRs: { + mrBead: { + bead_id: string; + status: string; + title: string; + body: string | null; + rig_id: string | null; + created_at: string; + updated_at: string; + metadata: Record; + }; + reviewMetadata: { + branch: string; + target_branch: string; + merge_commit: string | null; + pr_url: string | null; + retry_count: number; + }; + sourceBead: { + bead_id: string; + title: string; + status: string; + body: string | null; + } | null; + convoy: { + convoy_id: string; + title: string; + total_beads: number; + closed_beads: number; + feature_branch: string | null; + merge_mode: string | null; + } | null; + agent: { + agent_id: string; + name: string; + role: string; + } | null; + rigName: string | null; + staleSince: string | null; + failureReason: string | null; + }[]; + failedReviews: { + mrBead: { + bead_id: string; + status: string; + title: string; + body: string | null; + rig_id: string | null; + created_at: string; + updated_at: string; + metadata: Record; + }; + reviewMetadata: { + branch: string; + target_branch: string; + merge_commit: string | null; + pr_url: string | null; + retry_count: number; + }; + sourceBead: { + bead_id: string; + title: string; + status: string; + body: string | null; + } | null; + convoy: { + convoy_id: string; + title: string; + total_beads: number; + closed_beads: number; + feature_branch: string | null; + merge_mode: string | null; + } | null; + agent: { + agent_id: string; + name: string; + role: string; + } | null; + rigName: string | null; + staleSince: string | null; + failureReason: string | null; + }[]; + stalePRs: { + mrBead: { + bead_id: string; + status: string; + title: string; + body: string | null; + rig_id: string | null; + created_at: string; + updated_at: string; + metadata: Record; + }; + reviewMetadata: { + branch: string; + target_branch: string; + merge_commit: string | null; + pr_url: string | null; + retry_count: number; + }; + sourceBead: { + bead_id: string; + title: string; + status: string; + body: string | null; + } | null; + convoy: { + convoy_id: string; + title: string; + total_beads: number; + closed_beads: number; + feature_branch: string | null; + merge_mode: string | null; + } | null; + agent: { + agent_id: string; + name: string; + role: string; + } | null; + rigName: string | null; + staleSince: string | null; + failureReason: string | null; + }[]; + }; + activityLog: { + event: { + bead_event_id: string; + bead_id: string; + agent_id: string | null; + event_type: string; + old_value: string | null; + new_value: string | null; + metadata: Record; + created_at: string; + }; + mrBead: { + bead_id: string; + title: string; + type: string; + status: string; + rig_id: string | null; + metadata: Record; + } | null; + sourceBead: { + bead_id: string; + title: string; + status: string; + } | null; + convoy: { + convoy_id: string; + title: string; + total_beads: number; + closed_beads: number; + feature_branch: string | null; + merge_mode: string | null; + } | null; + agent: { + agent_id: string; + name: string; + role: string; + } | null; + rigName: string | null; + reviewMetadata: { + pr_url: string | null; + branch: string | null; + target_branch: string | null; + merge_commit: string | null; + } | null; + }[]; + }; + meta: object; + }>; + listConvoys: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: { + id: string; + title: string; + status: "active" | "landed"; + staged: boolean; + total_beads: number; + closed_beads: number; + created_by: string | null; + created_at: string; + landed_at: string | null; + feature_branch: string | null; + merge_mode: string | null; + beads: { + bead_id: string; + title: string; + status: string; + rig_id: string | null; + assignee_agent_name: string | null; + }[]; + dependency_edges: { + bead_id: string; + depends_on_bead_id: string; + }[]; + }[]; + meta: object; + }>; + getConvoy: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + convoyId: string; + }; + output: { + id: string; + title: string; + status: "active" | "landed"; + staged: boolean; + total_beads: number; + closed_beads: number; + created_by: string | null; + created_at: string; + landed_at: string | null; + feature_branch: string | null; + merge_mode: string | null; + beads: { + bead_id: string; + title: string; + status: string; + rig_id: string | null; + assignee_agent_name: string | null; + }[]; + dependency_edges: { + bead_id: string; + depends_on_bead_id: string; + }[]; + } | null; + meta: object; + }>; + closeConvoy: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + convoyId: string; + }; + output: { + id: string; + title: string; + status: "active" | "landed"; + staged: boolean; + total_beads: number; + closed_beads: number; + created_by: string | null; + created_at: string; + landed_at: string | null; + feature_branch: string | null; + merge_mode: string | null; + beads: { + bead_id: string; + title: string; + status: string; + rig_id: string | null; + assignee_agent_name: string | null; + }[]; + dependency_edges: { + bead_id: string; + depends_on_bead_id: string; + }[]; + } | null; + meta: object; + }>; + startConvoy: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + convoyId: string; + }; + output: { + id: string; + title: string; + status: "active" | "landed"; + staged: boolean; + total_beads: number; + closed_beads: number; + created_by: string | null; + created_at: string; + landed_at: string | null; + feature_branch: string | null; + merge_mode: string | null; + beads: { + bead_id: string; + title: string; + status: string; + rig_id: string | null; + assignee_agent_name: string | null; + }[]; + dependency_edges: { + bead_id: string; + depends_on_bead_id: string; + }[]; + } | null; + meta: object; + }>; + listOrgTowns: import("@trpc/server").TRPCQueryProcedure<{ + input: { + organizationId: string; + }; + output: { + id: string; + name: string; + owner_org_id: string; + created_by_user_id: string; + created_at: string; + updated_at: string; + }[]; + meta: object; + }>; + createOrgTown: import("@trpc/server").TRPCMutationProcedure<{ + input: { + organizationId: string; + name: string; + }; + output: { + id: string; + name: string; + owner_org_id: string; + created_by_user_id: string; + created_at: string; + updated_at: string; + }; + meta: object; + }>; + deleteOrgTown: import("@trpc/server").TRPCMutationProcedure<{ + input: { + organizationId: string; + townId: string; + }; + output: void; + meta: object; + }>; + listOrgRigs: import("@trpc/server").TRPCQueryProcedure<{ + input: { + organizationId: string; + townId: string; + }; + output: { + id: string; + town_id: string; + name: string; + git_url: string; + default_branch: string; + platform_integration_id: string | null; + created_at: string; + updated_at: string; + }[]; + meta: object; + }>; + createOrgRig: import("@trpc/server").TRPCMutationProcedure<{ + input: { + organizationId: string; + townId: string; + name: string; + gitUrl: string; + defaultBranch?: string | undefined; + platformIntegrationId?: string | undefined; + }; + output: { + id: string; + town_id: string; + name: string; + git_url: string; + default_branch: string; + platform_integration_id: string | null; + created_at: string; + updated_at: string; + }; + meta: object; + }>; + adminListBeads: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + status?: "closed" | "failed" | "in_progress" | "open" | undefined; + type?: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule" | undefined; + limit?: number | undefined; + }; + output: { + bead_id: string; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; + title: string; + body: string | null; + rig_id: string | null; + parent_bead_id: string | null; + assignee_agent_bead_id: string | null; + priority: "critical" | "high" | "low" | "medium"; + labels: string[]; + metadata: Record; + created_by: string | null; + created_at: string; + updated_at: string; + closed_at: string | null; + }[]; + meta: object; + }>; + adminListAgents: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: { + id: string; + rig_id: string | null; + role: string; + name: string; + identity: string; + status: string; + current_hook_bead_id: string | null; + dispatch_attempts: number; + last_activity_at: string | null; + checkpoint?: unknown; + created_at: string; + agent_status_message: string | null; + agent_status_updated_at: string | null; + }[]; + meta: object; + }>; + adminForceRestartContainer: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + }; + output: void; + meta: object; + }>; + adminForceResetAgent: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + agentId: string; + }; + output: void; + meta: object; + }>; + adminForceCloseBead: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + beadId: string; + }; + output: { + bead_id: string; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; + title: string; + body: string | null; + rig_id: string | null; + parent_bead_id: string | null; + assignee_agent_bead_id: string | null; + priority: "critical" | "high" | "low" | "medium"; + labels: string[]; + metadata: Record; + created_by: string | null; + created_at: string; + updated_at: string; + closed_at: string | null; + }; + meta: object; + }>; + adminForceFailBead: import("@trpc/server").TRPCMutationProcedure<{ + input: { + townId: string; + beadId: string; + }; + output: { + bead_id: string; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; + title: string; + body: string | null; + rig_id: string | null; + parent_bead_id: string | null; + assignee_agent_bead_id: string | null; + priority: "critical" | "high" | "low" | "medium"; + labels: string[]; + metadata: Record; + created_by: string | null; + created_at: string; + updated_at: string; + closed_at: string | null; + }; + meta: object; + }>; + adminGetAlarmStatus: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: { + alarm: { + nextFireAt: string | null; + intervalMs: number; + intervalLabel: string; + }; + agents: { + working: number; + idle: number; + stalled: number; + dead: number; + total: number; + }; + beads: { + open: number; + inProgress: number; + inReview: number; + failed: number; + triageRequests: number; + }; + patrol: { + guppWarnings: number; + guppEscalations: number; + stalledAgents: number; + orphanedHooks: number; + }; + recentEvents: { + time: string; + type: string; + message: string; + }[]; + }; + meta: object; + }>; + adminGetTownEvents: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + beadId?: string | undefined; + since?: string | undefined; + limit?: number | undefined; + }; + output: { + bead_event_id: string; + bead_id: string; + agent_id: string | null; + event_type: string; + old_value: string | null; + new_value: string | null; + metadata: Record; + created_at: string; + rig_id?: string | undefined; + rig_name?: string | undefined; + }[]; + meta: object; + }>; + adminGetBead: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + beadId: string; + }; + output: { + bead_id: string; + type: "agent" | "convoy" | "escalation" | "issue" | "merge_request" | "message" | "molecule"; + status: "closed" | "failed" | "in_progress" | "in_review" | "open"; + title: string; + body: string | null; + rig_id: string | null; + parent_bead_id: string | null; + assignee_agent_bead_id: string | null; + priority: "critical" | "high" | "low" | "medium"; + labels: string[]; + metadata: Record; + created_by: string | null; + created_at: string; + updated_at: string; + closed_at: string | null; + } | null; + meta: object; + }>; + debugAgentMetadata: import("@trpc/server").TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: never; + meta: object; }>; - }> - >; - }> ->; + }>>; +}>>; export type WrappedGastownRouter = typeof wrappedGastownRouter; diff --git a/src/lib/gastown/types/schemas.d.ts b/src/lib/gastown/types/schemas.d.ts index d56dbe856..02530a5c3 100644 --- a/src/lib/gastown/types/schemas.d.ts +++ b/src/lib/gastown/types/schemas.d.ts @@ -1,16 +1,12 @@ import type { z } from 'zod'; -export declare const TownOutput: z.ZodObject< - { +export declare const TownOutput: z.ZodObject<{ id: z.ZodString; name: z.ZodString; owner_user_id: z.ZodString; created_at: z.ZodString; updated_at: z.ZodString; - }, - z.core.$strip ->; -export declare const RigOutput: z.ZodObject< - { +}, z.core.$strip>; +export declare const RigOutput: z.ZodObject<{ id: z.ZodString; town_id: z.ZodString; name: z.ZodString; @@ -19,27 +15,24 @@ export declare const RigOutput: z.ZodObject< platform_integration_id: z.ZodDefault>>; created_at: z.ZodString; updated_at: z.ZodString; - }, - z.core.$strip ->; -export declare const BeadOutput: z.ZodObject< - { +}, z.core.$strip>; +export declare const BeadOutput: z.ZodObject<{ bead_id: z.ZodString; type: z.ZodEnum<{ - agent: 'agent'; - convoy: 'convoy'; - escalation: 'escalation'; - issue: 'issue'; - merge_request: 'merge_request'; - message: 'message'; - molecule: 'molecule'; + agent: "agent"; + convoy: "convoy"; + escalation: "escalation"; + issue: "issue"; + merge_request: "merge_request"; + message: "message"; + molecule: "molecule"; }>; status: z.ZodEnum<{ - closed: 'closed'; - failed: 'failed'; - in_progress: 'in_progress'; - in_review: 'in_review'; - open: 'open'; + closed: "closed"; + failed: "failed"; + in_progress: "in_progress"; + in_review: "in_review"; + open: "open"; }>; title: z.ZodString; body: z.ZodNullable; @@ -47,10 +40,10 @@ export declare const BeadOutput: z.ZodObject< parent_bead_id: z.ZodNullable; assignee_agent_bead_id: z.ZodNullable; priority: z.ZodEnum<{ - critical: 'critical'; - high: 'high'; - low: 'low'; - medium: 'medium'; + critical: "critical"; + high: "high"; + low: "low"; + medium: "medium"; }>; labels: z.ZodArray; metadata: z.ZodRecord; @@ -58,36 +51,23 @@ export declare const BeadOutput: z.ZodObject< created_at: z.ZodString; updated_at: z.ZodString; closed_at: z.ZodNullable; - }, - z.core.$strip ->; -export declare const AgentOutput: z.ZodObject< - { +}, z.core.$strip>; +export declare const AgentOutput: z.ZodObject<{ id: z.ZodString; rig_id: z.ZodNullable; - role: z.ZodUnion< - [ - z.ZodEnum<{ - mayor: 'mayor'; - polecat: 'polecat'; - refinery: 'refinery'; - }>, - z.ZodString, - ] - >; + role: z.ZodUnion<[z.ZodEnum<{ + mayor: "mayor"; + polecat: "polecat"; + refinery: "refinery"; + }>, z.ZodString]>; name: z.ZodString; identity: z.ZodString; - status: z.ZodUnion< - [ - z.ZodEnum<{ - dead: 'dead'; - idle: 'idle'; - stalled: 'stalled'; - working: 'working'; - }>, - z.ZodString, - ] - >; + status: z.ZodUnion<[z.ZodEnum<{ + dead: "dead"; + idle: "idle"; + stalled: "stalled"; + working: "working"; + }>, z.ZodString]>; current_hook_bead_id: z.ZodNullable; dispatch_attempts: z.ZodDefault; last_activity_at: z.ZodNullable; @@ -95,11 +75,8 @@ export declare const AgentOutput: z.ZodObject< created_at: z.ZodString; agent_status_message: z.ZodDefault>>; agent_status_updated_at: z.ZodDefault>>; - }, - z.core.$strip ->; -export declare const BeadEventOutput: z.ZodObject< - { +}, z.core.$strip>; +export declare const BeadEventOutput: z.ZodObject<{ bead_event_id: z.ZodString; bead_id: z.ZodString; agent_id: z.ZodNullable; @@ -110,68 +87,45 @@ export declare const BeadEventOutput: z.ZodObject< created_at: z.ZodString; rig_id: z.ZodOptional; rig_name: z.ZodOptional; - }, - z.core.$strip ->; -export declare const MayorSendResultOutput: z.ZodObject< - { +}, z.core.$strip>; +export declare const MayorSendResultOutput: z.ZodObject<{ agentId: z.ZodString; sessionStatus: z.ZodEnum<{ - active: 'active'; - idle: 'idle'; - starting: 'starting'; + active: "active"; + idle: "idle"; + starting: "starting"; }>; - }, - z.core.$strip ->; -export declare const MayorStatusOutput: z.ZodObject< - { +}, z.core.$strip>; +export declare const MayorStatusOutput: z.ZodObject<{ configured: z.ZodBoolean; townId: z.ZodNullable; - session: z.ZodNullable< - z.ZodObject< - { - agentId: z.ZodString; - sessionId: z.ZodString; - status: z.ZodEnum<{ - active: 'active'; - idle: 'idle'; - starting: 'starting'; - }>; - lastActivityAt: z.ZodString; - }, - z.core.$strip - > - >; - }, - z.core.$strip ->; -export declare const StreamTicketOutput: z.ZodObject< - { + session: z.ZodNullable; + lastActivityAt: z.ZodString; + }, z.core.$strip>>; +}, z.core.$strip>; +export declare const StreamTicketOutput: z.ZodObject<{ url: z.ZodString; ticket: z.ZodString; - }, - z.core.$strip ->; -export declare const PtySessionOutput: z.ZodObject< - { - pty: z.ZodObject< - { +}, z.core.$strip>; +export declare const PtySessionOutput: z.ZodObject<{ + pty: z.ZodObject<{ id: z.ZodString; - }, - z.core.$loose - >; + }, z.core.$loose>; wsUrl: z.ZodString; - }, - z.core.$strip ->; -export declare const ConvoyOutput: z.ZodObject< - { +}, z.core.$strip>; +export declare const ConvoyOutput: z.ZodObject<{ id: z.ZodString; title: z.ZodString; status: z.ZodEnum<{ - active: 'active'; - landed: 'landed'; + active: "active"; + landed: "landed"; }>; staged: z.ZodBoolean; total_beads: z.ZodNumber; @@ -181,16 +135,13 @@ export declare const ConvoyOutput: z.ZodObject< landed_at: z.ZodNullable; feature_branch: z.ZodNullable; merge_mode: z.ZodNullable; - }, - z.core.$strip ->; -export declare const ConvoyDetailOutput: z.ZodObject< - { +}, z.core.$strip>; +export declare const ConvoyDetailOutput: z.ZodObject<{ id: z.ZodString; title: z.ZodString; status: z.ZodEnum<{ - active: 'active'; - landed: 'landed'; + active: "active"; + landed: "landed"; }>; staged: z.ZodBoolean; total_beads: z.ZodNumber; @@ -200,50 +151,36 @@ export declare const ConvoyDetailOutput: z.ZodObject< landed_at: z.ZodNullable; feature_branch: z.ZodNullable; merge_mode: z.ZodNullable; - beads: z.ZodArray< - z.ZodObject< - { - bead_id: z.ZodString; - title: z.ZodString; - status: z.ZodString; - rig_id: z.ZodNullable; - assignee_agent_name: z.ZodNullable; - }, - z.core.$strip - > - >; - dependency_edges: z.ZodArray< - z.ZodObject< - { - bead_id: z.ZodString; - depends_on_bead_id: z.ZodString; - }, - z.core.$strip - > - >; - }, - z.core.$strip ->; -export declare const SlingResultOutput: z.ZodObject< - { - bead: z.ZodObject< - { + beads: z.ZodArray; + assignee_agent_name: z.ZodNullable; + }, z.core.$strip>>; + dependency_edges: z.ZodArray>; +}, z.core.$strip>; +export declare const SlingResultOutput: z.ZodObject<{ + bead: z.ZodObject<{ bead_id: z.ZodString; type: z.ZodEnum<{ - agent: 'agent'; - convoy: 'convoy'; - escalation: 'escalation'; - issue: 'issue'; - merge_request: 'merge_request'; - message: 'message'; - molecule: 'molecule'; + agent: "agent"; + convoy: "convoy"; + escalation: "escalation"; + issue: "issue"; + merge_request: "merge_request"; + message: "message"; + molecule: "molecule"; }>; status: z.ZodEnum<{ - closed: 'closed'; - failed: 'failed'; - in_progress: 'in_progress'; - in_review: 'in_review'; - open: 'open'; + closed: "closed"; + failed: "failed"; + in_progress: "in_progress"; + in_review: "in_review"; + open: "open"; }>; title: z.ZodString; body: z.ZodNullable; @@ -251,10 +188,10 @@ export declare const SlingResultOutput: z.ZodObject< parent_bead_id: z.ZodNullable; assignee_agent_bead_id: z.ZodNullable; priority: z.ZodEnum<{ - critical: 'critical'; - high: 'high'; - low: 'low'; - medium: 'medium'; + critical: "critical"; + high: "high"; + low: "low"; + medium: "medium"; }>; labels: z.ZodArray; metadata: z.ZodRecord; @@ -262,36 +199,23 @@ export declare const SlingResultOutput: z.ZodObject< created_at: z.ZodString; updated_at: z.ZodString; closed_at: z.ZodNullable; - }, - z.core.$strip - >; - agent: z.ZodObject< - { + }, z.core.$strip>; + agent: z.ZodObject<{ id: z.ZodString; rig_id: z.ZodNullable; - role: z.ZodUnion< - [ - z.ZodEnum<{ - mayor: 'mayor'; - polecat: 'polecat'; - refinery: 'refinery'; - }>, - z.ZodString, - ] - >; + role: z.ZodUnion<[z.ZodEnum<{ + mayor: "mayor"; + polecat: "polecat"; + refinery: "refinery"; + }>, z.ZodString]>; name: z.ZodString; identity: z.ZodString; - status: z.ZodUnion< - [ - z.ZodEnum<{ - dead: 'dead'; - idle: 'idle'; - stalled: 'stalled'; - working: 'working'; - }>, - z.ZodString, - ] - >; + status: z.ZodUnion<[z.ZodEnum<{ + dead: "dead"; + idle: "idle"; + stalled: "stalled"; + working: "working"; + }>, z.ZodString]>; current_hook_bead_id: z.ZodNullable; dispatch_attempts: z.ZodDefault; last_activity_at: z.ZodNullable; @@ -299,14 +223,9 @@ export declare const SlingResultOutput: z.ZodObject< created_at: z.ZodString; agent_status_message: z.ZodDefault>>; agent_status_updated_at: z.ZodDefault>>; - }, - z.core.$strip - >; - }, - z.core.$strip ->; -export declare const RigDetailOutput: z.ZodObject< - { + }, z.core.$strip>; +}, z.core.$strip>; +export declare const RigDetailOutput: z.ZodObject<{ id: z.ZodString; town_id: z.ZodString; name: z.ZodString; @@ -315,1185 +234,752 @@ export declare const RigDetailOutput: z.ZodObject< platform_integration_id: z.ZodDefault>>; created_at: z.ZodString; updated_at: z.ZodString; - agents: z.ZodArray< - z.ZodObject< - { - id: z.ZodString; - rig_id: z.ZodNullable; - role: z.ZodUnion< - [ - z.ZodEnum<{ - mayor: 'mayor'; - polecat: 'polecat'; - refinery: 'refinery'; - }>, - z.ZodString, - ] - >; - name: z.ZodString; - identity: z.ZodString; - status: z.ZodUnion< - [ - z.ZodEnum<{ - dead: 'dead'; - idle: 'idle'; - stalled: 'stalled'; - working: 'working'; - }>, - z.ZodString, - ] - >; - current_hook_bead_id: z.ZodNullable; - dispatch_attempts: z.ZodDefault; - last_activity_at: z.ZodNullable; - checkpoint: z.ZodOptional; - created_at: z.ZodString; - agent_status_message: z.ZodDefault>>; - agent_status_updated_at: z.ZodDefault>>; - }, - z.core.$strip - > - >; - beads: z.ZodArray< - z.ZodObject< - { - bead_id: z.ZodString; - type: z.ZodEnum<{ - agent: 'agent'; - convoy: 'convoy'; - escalation: 'escalation'; - issue: 'issue'; - merge_request: 'merge_request'; - message: 'message'; - molecule: 'molecule'; - }>; - status: z.ZodEnum<{ - closed: 'closed'; - failed: 'failed'; - in_progress: 'in_progress'; - in_review: 'in_review'; - open: 'open'; - }>; - title: z.ZodString; - body: z.ZodNullable; - rig_id: z.ZodNullable; - parent_bead_id: z.ZodNullable; - assignee_agent_bead_id: z.ZodNullable; - priority: z.ZodEnum<{ - critical: 'critical'; - high: 'high'; - low: 'low'; - medium: 'medium'; - }>; - labels: z.ZodArray; - metadata: z.ZodRecord; - created_by: z.ZodNullable; - created_at: z.ZodString; - updated_at: z.ZodString; - closed_at: z.ZodNullable; - }, - z.core.$strip - > - >; - }, - z.core.$strip ->; -export declare const RpcTownOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - id: z.ZodString; - name: z.ZodString; - owner_user_id: z.ZodString; - created_at: z.ZodString; - updated_at: z.ZodString; - }, - z.core.$strip - > ->; -export declare const RpcRigOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - id: z.ZodString; - town_id: z.ZodString; - name: z.ZodString; - git_url: z.ZodString; - default_branch: z.ZodString; - platform_integration_id: z.ZodDefault>>; - created_at: z.ZodString; - updated_at: z.ZodString; - }, - z.core.$strip - > ->; -export declare const RpcBeadOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - bead_id: z.ZodString; - type: z.ZodEnum<{ - agent: 'agent'; - convoy: 'convoy'; - escalation: 'escalation'; - issue: 'issue'; - merge_request: 'merge_request'; - message: 'message'; - molecule: 'molecule'; - }>; - status: z.ZodEnum<{ - closed: 'closed'; - failed: 'failed'; - in_progress: 'in_progress'; - in_review: 'in_review'; - open: 'open'; - }>; - title: z.ZodString; - body: z.ZodNullable; - rig_id: z.ZodNullable; - parent_bead_id: z.ZodNullable; - assignee_agent_bead_id: z.ZodNullable; - priority: z.ZodEnum<{ - critical: 'critical'; - high: 'high'; - low: 'low'; - medium: 'medium'; - }>; - labels: z.ZodArray; - metadata: z.ZodRecord; - created_by: z.ZodNullable; - created_at: z.ZodString; - updated_at: z.ZodString; - closed_at: z.ZodNullable; - }, - z.core.$strip - > ->; -export declare const RpcAgentOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - id: z.ZodString; - rig_id: z.ZodNullable; - role: z.ZodUnion< - [ - z.ZodEnum<{ - mayor: 'mayor'; - polecat: 'polecat'; - refinery: 'refinery'; - }>, - z.ZodString, - ] - >; - name: z.ZodString; - identity: z.ZodString; - status: z.ZodUnion< - [ - z.ZodEnum<{ - dead: 'dead'; - idle: 'idle'; - stalled: 'stalled'; - working: 'working'; - }>, - z.ZodString, - ] - >; - current_hook_bead_id: z.ZodNullable; - dispatch_attempts: z.ZodDefault; - last_activity_at: z.ZodNullable; - checkpoint: z.ZodOptional; - created_at: z.ZodString; - agent_status_message: z.ZodDefault>>; - agent_status_updated_at: z.ZodDefault>>; - }, - z.core.$strip - > ->; -export declare const RpcBeadEventOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - bead_event_id: z.ZodString; - bead_id: z.ZodString; - agent_id: z.ZodNullable; - event_type: z.ZodString; - old_value: z.ZodNullable; - new_value: z.ZodNullable; - metadata: z.ZodRecord; - created_at: z.ZodString; - rig_id: z.ZodOptional; - rig_name: z.ZodOptional; - }, - z.core.$strip - > ->; -export declare const RpcMayorSendResultOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - agentId: z.ZodString; - sessionStatus: z.ZodEnum<{ - active: 'active'; - idle: 'idle'; - starting: 'starting'; - }>; - }, - z.core.$strip - > ->; -export declare const RpcMayorStatusOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - configured: z.ZodBoolean; - townId: z.ZodNullable; - session: z.ZodNullable< - z.ZodObject< - { - agentId: z.ZodString; - sessionId: z.ZodString; - status: z.ZodEnum<{ - active: 'active'; - idle: 'idle'; - starting: 'starting'; - }>; - lastActivityAt: z.ZodString; - }, - z.core.$strip - > - >; - }, - z.core.$strip - > ->; -export declare const RpcStreamTicketOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - url: z.ZodString; - ticket: z.ZodString; - }, - z.core.$strip - > ->; -export declare const RpcPtySessionOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - pty: z.ZodObject< - { - id: z.ZodString; - }, - z.core.$loose - >; - wsUrl: z.ZodString; - }, - z.core.$strip - > ->; -export declare const RpcConvoyOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - id: z.ZodString; - title: z.ZodString; - status: z.ZodEnum<{ - active: 'active'; - landed: 'landed'; - }>; - staged: z.ZodBoolean; - total_beads: z.ZodNumber; - closed_beads: z.ZodNumber; - created_by: z.ZodNullable; - created_at: z.ZodString; - landed_at: z.ZodNullable; - feature_branch: z.ZodNullable; - merge_mode: z.ZodNullable; - }, - z.core.$strip - > ->; -export declare const RpcConvoyDetailOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - id: z.ZodString; - title: z.ZodString; - status: z.ZodEnum<{ - active: 'active'; - landed: 'landed'; - }>; - staged: z.ZodBoolean; - total_beads: z.ZodNumber; - closed_beads: z.ZodNumber; - created_by: z.ZodNullable; - created_at: z.ZodString; - landed_at: z.ZodNullable; - feature_branch: z.ZodNullable; - merge_mode: z.ZodNullable; - beads: z.ZodArray< - z.ZodObject< - { - bead_id: z.ZodString; - title: z.ZodString; - status: z.ZodString; - rig_id: z.ZodNullable; - assignee_agent_name: z.ZodNullable; - }, - z.core.$strip - > - >; - dependency_edges: z.ZodArray< - z.ZodObject< - { + agents: z.ZodArray; + role: z.ZodUnion<[z.ZodEnum<{ + mayor: "mayor"; + polecat: "polecat"; + refinery: "refinery"; + }>, z.ZodString]>; + name: z.ZodString; + identity: z.ZodString; + status: z.ZodUnion<[z.ZodEnum<{ + dead: "dead"; + idle: "idle"; + stalled: "stalled"; + working: "working"; + }>, z.ZodString]>; + current_hook_bead_id: z.ZodNullable; + dispatch_attempts: z.ZodDefault; + last_activity_at: z.ZodNullable; + checkpoint: z.ZodOptional; + created_at: z.ZodString; + agent_status_message: z.ZodDefault>>; + agent_status_updated_at: z.ZodDefault>>; + }, z.core.$strip>>; + beads: z.ZodArray; + status: z.ZodEnum<{ + closed: "closed"; + failed: "failed"; + in_progress: "in_progress"; + in_review: "in_review"; + open: "open"; + }>; + title: z.ZodString; + body: z.ZodNullable; + rig_id: z.ZodNullable; + parent_bead_id: z.ZodNullable; + assignee_agent_bead_id: z.ZodNullable; + priority: z.ZodEnum<{ + critical: "critical"; + high: "high"; + low: "low"; + medium: "medium"; + }>; + labels: z.ZodArray; + metadata: z.ZodRecord; + created_by: z.ZodNullable; + created_at: z.ZodString; + updated_at: z.ZodString; + closed_at: z.ZodNullable; + }, z.core.$strip>>; +}, z.core.$strip>; +export declare const RpcTownOutput: z.ZodPipe>; +export declare const RpcRigOutput: z.ZodPipe>>; + created_at: z.ZodString; + updated_at: z.ZodString; +}, z.core.$strip>>; +export declare const RpcBeadOutput: z.ZodPipe; + status: z.ZodEnum<{ + closed: "closed"; + failed: "failed"; + in_progress: "in_progress"; + in_review: "in_review"; + open: "open"; + }>; + title: z.ZodString; + body: z.ZodNullable; + rig_id: z.ZodNullable; + parent_bead_id: z.ZodNullable; + assignee_agent_bead_id: z.ZodNullable; + priority: z.ZodEnum<{ + critical: "critical"; + high: "high"; + low: "low"; + medium: "medium"; + }>; + labels: z.ZodArray; + metadata: z.ZodRecord; + created_by: z.ZodNullable; + created_at: z.ZodString; + updated_at: z.ZodString; + closed_at: z.ZodNullable; +}, z.core.$strip>>; +export declare const RpcAgentOutput: z.ZodPipe; + role: z.ZodUnion<[z.ZodEnum<{ + mayor: "mayor"; + polecat: "polecat"; + refinery: "refinery"; + }>, z.ZodString]>; + name: z.ZodString; + identity: z.ZodString; + status: z.ZodUnion<[z.ZodEnum<{ + dead: "dead"; + idle: "idle"; + stalled: "stalled"; + working: "working"; + }>, z.ZodString]>; + current_hook_bead_id: z.ZodNullable; + dispatch_attempts: z.ZodDefault; + last_activity_at: z.ZodNullable; + checkpoint: z.ZodOptional; + created_at: z.ZodString; + agent_status_message: z.ZodDefault>>; + agent_status_updated_at: z.ZodDefault>>; +}, z.core.$strip>>; +export declare const RpcBeadEventOutput: z.ZodPipe; + event_type: z.ZodString; + old_value: z.ZodNullable; + new_value: z.ZodNullable; + metadata: z.ZodRecord; + created_at: z.ZodString; + rig_id: z.ZodOptional; + rig_name: z.ZodOptional; +}, z.core.$strip>>; +export declare const RpcMayorSendResultOutput: z.ZodPipe; +}, z.core.$strip>>; +export declare const RpcMayorStatusOutput: z.ZodPipe; + session: z.ZodNullable; + lastActivityAt: z.ZodString; + }, z.core.$strip>>; +}, z.core.$strip>>; +export declare const RpcStreamTicketOutput: z.ZodPipe>; +export declare const RpcPtySessionOutput: z.ZodPipe; + wsUrl: z.ZodString; +}, z.core.$strip>>; +export declare const RpcConvoyOutput: z.ZodPipe; + staged: z.ZodBoolean; + total_beads: z.ZodNumber; + closed_beads: z.ZodNumber; + created_by: z.ZodNullable; + created_at: z.ZodString; + landed_at: z.ZodNullable; + feature_branch: z.ZodNullable; + merge_mode: z.ZodNullable; +}, z.core.$strip>>; +export declare const RpcConvoyDetailOutput: z.ZodPipe; + staged: z.ZodBoolean; + total_beads: z.ZodNumber; + closed_beads: z.ZodNumber; + created_by: z.ZodNullable; + created_at: z.ZodString; + landed_at: z.ZodNullable; + feature_branch: z.ZodNullable; + merge_mode: z.ZodNullable; + beads: z.ZodArray; + assignee_agent_name: z.ZodNullable; + }, z.core.$strip>>; + dependency_edges: z.ZodArray>; +}, z.core.$strip>>; +export declare const RpcSlingResultOutput: z.ZodPipe; + status: z.ZodEnum<{ + closed: "closed"; + failed: "failed"; + in_progress: "in_progress"; + in_review: "in_review"; + open: "open"; + }>; + title: z.ZodString; + body: z.ZodNullable; + rig_id: z.ZodNullable; + parent_bead_id: z.ZodNullable; + assignee_agent_bead_id: z.ZodNullable; + priority: z.ZodEnum<{ + critical: "critical"; + high: "high"; + low: "low"; + medium: "medium"; + }>; + labels: z.ZodArray; + metadata: z.ZodRecord; + created_by: z.ZodNullable; + created_at: z.ZodString; + updated_at: z.ZodString; + closed_at: z.ZodNullable; + }, z.core.$strip>; + agent: z.ZodObject<{ + id: z.ZodString; + rig_id: z.ZodNullable; + role: z.ZodUnion<[z.ZodEnum<{ + mayor: "mayor"; + polecat: "polecat"; + refinery: "refinery"; + }>, z.ZodString]>; + name: z.ZodString; + identity: z.ZodString; + status: z.ZodUnion<[z.ZodEnum<{ + dead: "dead"; + idle: "idle"; + stalled: "stalled"; + working: "working"; + }>, z.ZodString]>; + current_hook_bead_id: z.ZodNullable; + dispatch_attempts: z.ZodDefault; + last_activity_at: z.ZodNullable; + checkpoint: z.ZodOptional; + created_at: z.ZodString; + agent_status_message: z.ZodDefault>>; + agent_status_updated_at: z.ZodDefault>>; + }, z.core.$strip>; +}, z.core.$strip>>; +export declare const RpcAlarmStatusOutput: z.ZodPipe; + intervalMs: z.ZodNumber; + intervalLabel: z.ZodString; + }, z.core.$strip>; + agents: z.ZodObject<{ + working: z.ZodNumber; + idle: z.ZodNumber; + stalled: z.ZodNumber; + dead: z.ZodNumber; + total: z.ZodNumber; + }, z.core.$strip>; + beads: z.ZodObject<{ + open: z.ZodNumber; + inProgress: z.ZodNumber; + inReview: z.ZodNumber; + failed: z.ZodNumber; + triageRequests: z.ZodNumber; + }, z.core.$strip>; + patrol: z.ZodObject<{ + guppWarnings: z.ZodNumber; + guppEscalations: z.ZodNumber; + stalledAgents: z.ZodNumber; + orphanedHooks: z.ZodNumber; + }, z.core.$strip>; + recentEvents: z.ZodArray>; +}, z.core.$strip>>; +export declare const RpcRigDetailOutput: z.ZodPipe>>; + created_at: z.ZodString; + updated_at: z.ZodString; + agents: z.ZodArray; + role: z.ZodUnion<[z.ZodEnum<{ + mayor: "mayor"; + polecat: "polecat"; + refinery: "refinery"; + }>, z.ZodString]>; + name: z.ZodString; + identity: z.ZodString; + status: z.ZodUnion<[z.ZodEnum<{ + dead: "dead"; + idle: "idle"; + stalled: "stalled"; + working: "working"; + }>, z.ZodString]>; + current_hook_bead_id: z.ZodNullable; + dispatch_attempts: z.ZodDefault; + last_activity_at: z.ZodNullable; + checkpoint: z.ZodOptional; + created_at: z.ZodString; + agent_status_message: z.ZodDefault>>; + agent_status_updated_at: z.ZodDefault>>; + }, z.core.$strip>>; + beads: z.ZodArray; + status: z.ZodEnum<{ + closed: "closed"; + failed: "failed"; + in_progress: "in_progress"; + in_review: "in_review"; + open: "open"; + }>; + title: z.ZodString; + body: z.ZodNullable; + rig_id: z.ZodNullable; + parent_bead_id: z.ZodNullable; + assignee_agent_bead_id: z.ZodNullable; + priority: z.ZodEnum<{ + critical: "critical"; + high: "high"; + low: "low"; + medium: "medium"; + }>; + labels: z.ZodArray; + metadata: z.ZodRecord; + created_by: z.ZodNullable; + created_at: z.ZodString; + updated_at: z.ZodString; + closed_at: z.ZodNullable; + }, z.core.$strip>>; +}, z.core.$strip>>; +export declare const MergeQueueDataOutput: z.ZodObject<{ + needsAttention: z.ZodObject<{ + openPRs: z.ZodArray; + rig_id: z.ZodNullable; + created_at: z.ZodString; + updated_at: z.ZodString; + metadata: z.ZodRecord; + }, z.core.$strip>; + reviewMetadata: z.ZodObject<{ + branch: z.ZodString; + target_branch: z.ZodString; + merge_commit: z.ZodNullable; + pr_url: z.ZodNullable; + retry_count: z.ZodNumber; + }, z.core.$strip>; + sourceBead: z.ZodNullable; + }, z.core.$strip>>; + convoy: z.ZodNullable; + merge_mode: z.ZodNullable; + }, z.core.$strip>>; + agent: z.ZodNullable>; + rigName: z.ZodNullable; + staleSince: z.ZodNullable; + failureReason: z.ZodNullable; + }, z.core.$strip>>; + failedReviews: z.ZodArray; + rig_id: z.ZodNullable; + created_at: z.ZodString; + updated_at: z.ZodString; + metadata: z.ZodRecord; + }, z.core.$strip>; + reviewMetadata: z.ZodObject<{ + branch: z.ZodString; + target_branch: z.ZodString; + merge_commit: z.ZodNullable; + pr_url: z.ZodNullable; + retry_count: z.ZodNumber; + }, z.core.$strip>; + sourceBead: z.ZodNullable; + }, z.core.$strip>>; + convoy: z.ZodNullable; + merge_mode: z.ZodNullable; + }, z.core.$strip>>; + agent: z.ZodNullable>; + rigName: z.ZodNullable; + staleSince: z.ZodNullable; + failureReason: z.ZodNullable; + }, z.core.$strip>>; + stalePRs: z.ZodArray; + rig_id: z.ZodNullable; + created_at: z.ZodString; + updated_at: z.ZodString; + metadata: z.ZodRecord; + }, z.core.$strip>; + reviewMetadata: z.ZodObject<{ + branch: z.ZodString; + target_branch: z.ZodString; + merge_commit: z.ZodNullable; + pr_url: z.ZodNullable; + retry_count: z.ZodNumber; + }, z.core.$strip>; + sourceBead: z.ZodNullable; + }, z.core.$strip>>; + convoy: z.ZodNullable; + merge_mode: z.ZodNullable; + }, z.core.$strip>>; + agent: z.ZodNullable>; + rigName: z.ZodNullable; + staleSince: z.ZodNullable; + failureReason: z.ZodNullable; + }, z.core.$strip>>; + }, z.core.$strip>; + activityLog: z.ZodArray - >; - }, - z.core.$strip - > ->; -export declare const RpcSlingResultOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - bead: z.ZodObject< - { - bead_id: z.ZodString; - type: z.ZodEnum<{ - agent: 'agent'; - convoy: 'convoy'; - escalation: 'escalation'; - issue: 'issue'; - merge_request: 'merge_request'; - message: 'message'; - molecule: 'molecule'; - }>; - status: z.ZodEnum<{ - closed: 'closed'; - failed: 'failed'; - in_progress: 'in_progress'; - in_review: 'in_review'; - open: 'open'; - }>; - title: z.ZodString; - body: z.ZodNullable; - rig_id: z.ZodNullable; - parent_bead_id: z.ZodNullable; - assignee_agent_bead_id: z.ZodNullable; - priority: z.ZodEnum<{ - critical: 'critical'; - high: 'high'; - low: 'low'; - medium: 'medium'; - }>; - labels: z.ZodArray; - metadata: z.ZodRecord; - created_by: z.ZodNullable; - created_at: z.ZodString; - updated_at: z.ZodString; - closed_at: z.ZodNullable; - }, - z.core.$strip - >; - agent: z.ZodObject< - { - id: z.ZodString; - rig_id: z.ZodNullable; - role: z.ZodUnion< - [ - z.ZodEnum<{ - mayor: 'mayor'; - polecat: 'polecat'; - refinery: 'refinery'; - }>, - z.ZodString, - ] - >; - name: z.ZodString; - identity: z.ZodString; - status: z.ZodUnion< - [ - z.ZodEnum<{ - dead: 'dead'; - idle: 'idle'; - stalled: 'stalled'; - working: 'working'; - }>, - z.ZodString, - ] - >; - current_hook_bead_id: z.ZodNullable; - dispatch_attempts: z.ZodDefault; - last_activity_at: z.ZodNullable; - checkpoint: z.ZodOptional; - created_at: z.ZodString; - agent_status_message: z.ZodDefault>>; - agent_status_updated_at: z.ZodDefault>>; - }, - z.core.$strip - >; - }, - z.core.$strip - > ->; -export declare const RpcAlarmStatusOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - alarm: z.ZodObject< - { - nextFireAt: z.ZodNullable; - intervalMs: z.ZodNumber; - intervalLabel: z.ZodString; - }, - z.core.$strip - >; - agents: z.ZodObject< - { - working: z.ZodNumber; - idle: z.ZodNumber; - stalled: z.ZodNumber; - dead: z.ZodNumber; - total: z.ZodNumber; - }, - z.core.$strip - >; - beads: z.ZodObject< - { - open: z.ZodNumber; - inProgress: z.ZodNumber; - inReview: z.ZodNumber; - failed: z.ZodNumber; - triageRequests: z.ZodNumber; - }, - z.core.$strip - >; - patrol: z.ZodObject< - { - guppWarnings: z.ZodNumber; - guppEscalations: z.ZodNumber; - stalledAgents: z.ZodNumber; - orphanedHooks: z.ZodNumber; - }, - z.core.$strip - >; - recentEvents: z.ZodArray< - z.ZodObject< - { - time: z.ZodString; - type: z.ZodString; - message: z.ZodString; - }, - z.core.$strip - > - >; - }, - z.core.$strip - > ->; -export declare const RpcRigDetailOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - id: z.ZodString; - town_id: z.ZodString; - name: z.ZodString; - git_url: z.ZodString; - default_branch: z.ZodString; - platform_integration_id: z.ZodDefault>>; - created_at: z.ZodString; - updated_at: z.ZodString; - agents: z.ZodArray< - z.ZodObject< - { - id: z.ZodString; - rig_id: z.ZodNullable; - role: z.ZodUnion< - [ - z.ZodEnum<{ - mayor: 'mayor'; - polecat: 'polecat'; - refinery: 'refinery'; - }>, - z.ZodString, - ] - >; - name: z.ZodString; - identity: z.ZodString; - status: z.ZodUnion< - [ - z.ZodEnum<{ - dead: 'dead'; - idle: 'idle'; - stalled: 'stalled'; - working: 'working'; - }>, - z.ZodString, - ] - >; - current_hook_bead_id: z.ZodNullable; - dispatch_attempts: z.ZodDefault; - last_activity_at: z.ZodNullable; - checkpoint: z.ZodOptional; + agent_id: z.ZodNullable; + event_type: z.ZodString; + old_value: z.ZodNullable; + new_value: z.ZodNullable; + metadata: z.ZodRecord; created_at: z.ZodString; - agent_status_message: z.ZodDefault>>; - agent_status_updated_at: z.ZodDefault>>; - }, - z.core.$strip - > - >; - beads: z.ZodArray< - z.ZodObject< - { + }, z.core.$strip>; + mrBead: z.ZodNullable; - status: z.ZodEnum<{ - closed: 'closed'; - failed: 'failed'; - in_progress: 'in_progress'; - in_review: 'in_review'; - open: 'open'; - }>; title: z.ZodString; - body: z.ZodNullable; + type: z.ZodString; + status: z.ZodString; rig_id: z.ZodNullable; - parent_bead_id: z.ZodNullable; - assignee_agent_bead_id: z.ZodNullable; - priority: z.ZodEnum<{ - critical: 'critical'; - high: 'high'; - low: 'low'; - medium: 'medium'; - }>; - labels: z.ZodArray; metadata: z.ZodRecord; - created_by: z.ZodNullable; - created_at: z.ZodString; - updated_at: z.ZodString; - closed_at: z.ZodNullable; - }, - z.core.$strip - > - >; - }, - z.core.$strip - > ->; -export declare const MergeQueueDataOutput: z.ZodObject< - { - needsAttention: z.ZodObject< - { - openPRs: z.ZodArray< - z.ZodObject< - { - mrBead: z.ZodObject< - { - bead_id: z.ZodString; - status: z.ZodString; - title: z.ZodString; - body: z.ZodNullable; - rig_id: z.ZodNullable; - created_at: z.ZodString; - updated_at: z.ZodString; - metadata: z.ZodRecord; - }, - z.core.$strip - >; - reviewMetadata: z.ZodObject< - { - branch: z.ZodString; - target_branch: z.ZodString; - merge_commit: z.ZodNullable; - pr_url: z.ZodNullable; - retry_count: z.ZodNumber; - }, - z.core.$strip - >; - sourceBead: z.ZodNullable< - z.ZodObject< - { - bead_id: z.ZodString; - title: z.ZodString; - status: z.ZodString; - body: z.ZodNullable; - }, - z.core.$strip - > - >; - convoy: z.ZodNullable< - z.ZodObject< - { - convoy_id: z.ZodString; - title: z.ZodString; - total_beads: z.ZodNumber; - closed_beads: z.ZodNumber; - feature_branch: z.ZodNullable; - merge_mode: z.ZodNullable; - }, - z.core.$strip - > - >; - agent: z.ZodNullable< - z.ZodObject< - { - agent_id: z.ZodString; - name: z.ZodString; - role: z.ZodString; - }, - z.core.$strip - > - >; - rigName: z.ZodNullable; - staleSince: z.ZodNullable; - failureReason: z.ZodNullable; - }, - z.core.$strip - > - >; - failedReviews: z.ZodArray< - z.ZodObject< - { - mrBead: z.ZodObject< - { - bead_id: z.ZodString; - status: z.ZodString; - title: z.ZodString; - body: z.ZodNullable; - rig_id: z.ZodNullable; - created_at: z.ZodString; - updated_at: z.ZodString; - metadata: z.ZodRecord; - }, - z.core.$strip - >; - reviewMetadata: z.ZodObject< - { - branch: z.ZodString; - target_branch: z.ZodString; - merge_commit: z.ZodNullable; - pr_url: z.ZodNullable; - retry_count: z.ZodNumber; - }, - z.core.$strip - >; - sourceBead: z.ZodNullable< - z.ZodObject< - { - bead_id: z.ZodString; - title: z.ZodString; - status: z.ZodString; - body: z.ZodNullable; - }, - z.core.$strip - > - >; - convoy: z.ZodNullable< - z.ZodObject< - { - convoy_id: z.ZodString; - title: z.ZodString; - total_beads: z.ZodNumber; - closed_beads: z.ZodNumber; - feature_branch: z.ZodNullable; - merge_mode: z.ZodNullable; - }, - z.core.$strip - > - >; - agent: z.ZodNullable< - z.ZodObject< - { - agent_id: z.ZodString; - name: z.ZodString; - role: z.ZodString; - }, - z.core.$strip - > - >; - rigName: z.ZodNullable; - staleSince: z.ZodNullable; - failureReason: z.ZodNullable; - }, - z.core.$strip - > - >; - stalePRs: z.ZodArray< - z.ZodObject< - { - mrBead: z.ZodObject< - { - bead_id: z.ZodString; - status: z.ZodString; - title: z.ZodString; - body: z.ZodNullable; - rig_id: z.ZodNullable; - created_at: z.ZodString; - updated_at: z.ZodString; - metadata: z.ZodRecord; - }, - z.core.$strip - >; - reviewMetadata: z.ZodObject< - { - branch: z.ZodString; - target_branch: z.ZodString; - merge_commit: z.ZodNullable; - pr_url: z.ZodNullable; - retry_count: z.ZodNumber; - }, - z.core.$strip - >; - sourceBead: z.ZodNullable< - z.ZodObject< - { - bead_id: z.ZodString; - title: z.ZodString; - status: z.ZodString; - body: z.ZodNullable; - }, - z.core.$strip - > - >; - convoy: z.ZodNullable< - z.ZodObject< - { - convoy_id: z.ZodString; - title: z.ZodString; - total_beads: z.ZodNumber; - closed_beads: z.ZodNumber; - feature_branch: z.ZodNullable; - merge_mode: z.ZodNullable; - }, - z.core.$strip - > - >; - agent: z.ZodNullable< - z.ZodObject< - { - agent_id: z.ZodString; - name: z.ZodString; - role: z.ZodString; - }, - z.core.$strip - > - >; - rigName: z.ZodNullable; - staleSince: z.ZodNullable; - failureReason: z.ZodNullable; - }, - z.core.$strip - > - >; - }, - z.core.$strip - >; - activityLog: z.ZodArray< - z.ZodObject< - { - event: z.ZodObject< - { - bead_event_id: z.ZodString; - bead_id: z.ZodString; - agent_id: z.ZodNullable; - event_type: z.ZodString; - old_value: z.ZodNullable; - new_value: z.ZodNullable; - metadata: z.ZodRecord; - created_at: z.ZodString; - }, - z.core.$strip - >; - mrBead: z.ZodNullable< - z.ZodObject< - { + }, z.core.$strip>>; + sourceBead: z.ZodNullable>; + convoy: z.ZodNullable; + merge_mode: z.ZodNullable; + }, z.core.$strip>>; + agent: z.ZodNullable>; + rigName: z.ZodNullable; + reviewMetadata: z.ZodNullable; + branch: z.ZodNullable; + target_branch: z.ZodNullable; + merge_commit: z.ZodNullable; + }, z.core.$strip>>; + }, z.core.$strip>>; +}, z.core.$strip>; +export declare const RpcMergeQueueDataOutput: z.ZodPipe; rig_id: z.ZodNullable; + created_at: z.ZodString; + updated_at: z.ZodString; metadata: z.ZodRecord; - }, - z.core.$strip - > - >; - sourceBead: z.ZodNullable< - z.ZodObject< - { + }, z.core.$strip>; + reviewMetadata: z.ZodObject<{ + branch: z.ZodString; + target_branch: z.ZodString; + merge_commit: z.ZodNullable; + pr_url: z.ZodNullable; + retry_count: z.ZodNumber; + }, z.core.$strip>; + sourceBead: z.ZodNullable - >; - convoy: z.ZodNullable< - z.ZodObject< - { + body: z.ZodNullable; + }, z.core.$strip>>; + convoy: z.ZodNullable; merge_mode: z.ZodNullable; - }, - z.core.$strip - > - >; - agent: z.ZodNullable< - z.ZodObject< - { + }, z.core.$strip>>; + agent: z.ZodNullable - >; - rigName: z.ZodNullable; - reviewMetadata: z.ZodNullable< - z.ZodObject< - { - pr_url: z.ZodNullable; - branch: z.ZodNullable; - target_branch: z.ZodNullable; - merge_commit: z.ZodNullable; - }, - z.core.$strip - > - >; - }, - z.core.$strip - > - >; - }, - z.core.$strip ->; -export declare const RpcMergeQueueDataOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - needsAttention: z.ZodObject< - { - openPRs: z.ZodArray< - z.ZodObject< - { - mrBead: z.ZodObject< - { - bead_id: z.ZodString; - status: z.ZodString; - title: z.ZodString; - body: z.ZodNullable; - rig_id: z.ZodNullable; - created_at: z.ZodString; - updated_at: z.ZodString; - metadata: z.ZodRecord; - }, - z.core.$strip - >; - reviewMetadata: z.ZodObject< - { - branch: z.ZodString; - target_branch: z.ZodString; - merge_commit: z.ZodNullable; - pr_url: z.ZodNullable; - retry_count: z.ZodNumber; - }, - z.core.$strip - >; - sourceBead: z.ZodNullable< - z.ZodObject< - { - bead_id: z.ZodString; - title: z.ZodString; - status: z.ZodString; - body: z.ZodNullable; - }, - z.core.$strip - > - >; - convoy: z.ZodNullable< - z.ZodObject< - { - convoy_id: z.ZodString; - title: z.ZodString; - total_beads: z.ZodNumber; - closed_beads: z.ZodNumber; - feature_branch: z.ZodNullable; - merge_mode: z.ZodNullable; - }, - z.core.$strip - > - >; - agent: z.ZodNullable< - z.ZodObject< - { - agent_id: z.ZodString; - name: z.ZodString; - role: z.ZodString; - }, - z.core.$strip - > - >; - rigName: z.ZodNullable; - staleSince: z.ZodNullable; - failureReason: z.ZodNullable; - }, - z.core.$strip - > - >; - failedReviews: z.ZodArray< - z.ZodObject< - { - mrBead: z.ZodObject< - { - bead_id: z.ZodString; - status: z.ZodString; - title: z.ZodString; - body: z.ZodNullable; - rig_id: z.ZodNullable; - created_at: z.ZodString; - updated_at: z.ZodString; - metadata: z.ZodRecord; - }, - z.core.$strip - >; - reviewMetadata: z.ZodObject< - { - branch: z.ZodString; - target_branch: z.ZodString; - merge_commit: z.ZodNullable; - pr_url: z.ZodNullable; - retry_count: z.ZodNumber; - }, - z.core.$strip - >; - sourceBead: z.ZodNullable< - z.ZodObject< - { - bead_id: z.ZodString; - title: z.ZodString; - status: z.ZodString; - body: z.ZodNullable; - }, - z.core.$strip - > - >; - convoy: z.ZodNullable< - z.ZodObject< - { - convoy_id: z.ZodString; - title: z.ZodString; - total_beads: z.ZodNumber; - closed_beads: z.ZodNumber; - feature_branch: z.ZodNullable; - merge_mode: z.ZodNullable; - }, - z.core.$strip - > - >; - agent: z.ZodNullable< - z.ZodObject< - { - agent_id: z.ZodString; - name: z.ZodString; - role: z.ZodString; - }, - z.core.$strip - > - >; - rigName: z.ZodNullable; - staleSince: z.ZodNullable; - failureReason: z.ZodNullable; - }, - z.core.$strip - > - >; - stalePRs: z.ZodArray< - z.ZodObject< - { - mrBead: z.ZodObject< - { - bead_id: z.ZodString; - status: z.ZodString; - title: z.ZodString; - body: z.ZodNullable; - rig_id: z.ZodNullable; - created_at: z.ZodString; - updated_at: z.ZodString; - metadata: z.ZodRecord; - }, - z.core.$strip - >; - reviewMetadata: z.ZodObject< - { - branch: z.ZodString; - target_branch: z.ZodString; - merge_commit: z.ZodNullable; - pr_url: z.ZodNullable; - retry_count: z.ZodNumber; - }, - z.core.$strip - >; - sourceBead: z.ZodNullable< - z.ZodObject< - { - bead_id: z.ZodString; - title: z.ZodString; - status: z.ZodString; - body: z.ZodNullable; - }, - z.core.$strip - > - >; - convoy: z.ZodNullable< - z.ZodObject< - { - convoy_id: z.ZodString; - title: z.ZodString; - total_beads: z.ZodNumber; - closed_beads: z.ZodNumber; - feature_branch: z.ZodNullable; - merge_mode: z.ZodNullable; - }, - z.core.$strip - > - >; - agent: z.ZodNullable< - z.ZodObject< - { - agent_id: z.ZodString; - name: z.ZodString; - role: z.ZodString; - }, - z.core.$strip - > - >; - rigName: z.ZodNullable; - staleSince: z.ZodNullable; - failureReason: z.ZodNullable; - }, - z.core.$strip - > - >; - }, - z.core.$strip - >; - activityLog: z.ZodArray< - z.ZodObject< - { - event: z.ZodObject< - { - bead_event_id: z.ZodString; + }, z.core.$strip>>; + rigName: z.ZodNullable; + staleSince: z.ZodNullable; + failureReason: z.ZodNullable; + }, z.core.$strip>>; + failedReviews: z.ZodArray; - event_type: z.ZodString; - old_value: z.ZodNullable; - new_value: z.ZodNullable; + status: z.ZodString; + title: z.ZodString; + body: z.ZodNullable; + rig_id: z.ZodNullable; + created_at: z.ZodString; + updated_at: z.ZodString; metadata: z.ZodRecord; + }, z.core.$strip>; + reviewMetadata: z.ZodObject<{ + branch: z.ZodString; + target_branch: z.ZodString; + merge_commit: z.ZodNullable; + pr_url: z.ZodNullable; + retry_count: z.ZodNumber; + }, z.core.$strip>; + sourceBead: z.ZodNullable; + }, z.core.$strip>>; + convoy: z.ZodNullable; + merge_mode: z.ZodNullable; + }, z.core.$strip>>; + agent: z.ZodNullable>; + rigName: z.ZodNullable; + staleSince: z.ZodNullable; + failureReason: z.ZodNullable; + }, z.core.$strip>>; + stalePRs: z.ZodArray; + rig_id: z.ZodNullable; created_at: z.ZodString; - }, - z.core.$strip - >; - mrBead: z.ZodNullable< - z.ZodObject< - { - bead_id: z.ZodString; - title: z.ZodString; - type: z.ZodString; - status: z.ZodString; - rig_id: z.ZodNullable; - metadata: z.ZodRecord; - }, - z.core.$strip - > - >; - sourceBead: z.ZodNullable< - z.ZodObject< - { - bead_id: z.ZodString; - title: z.ZodString; - status: z.ZodString; - }, - z.core.$strip - > - >; - convoy: z.ZodNullable< - z.ZodObject< - { - convoy_id: z.ZodString; - title: z.ZodString; - total_beads: z.ZodNumber; - closed_beads: z.ZodNumber; - feature_branch: z.ZodNullable; - merge_mode: z.ZodNullable; - }, - z.core.$strip - > - >; - agent: z.ZodNullable< - z.ZodObject< - { - agent_id: z.ZodString; - name: z.ZodString; - role: z.ZodString; - }, - z.core.$strip - > - >; + updated_at: z.ZodString; + metadata: z.ZodRecord; + }, z.core.$strip>; + reviewMetadata: z.ZodObject<{ + branch: z.ZodString; + target_branch: z.ZodString; + merge_commit: z.ZodNullable; + pr_url: z.ZodNullable; + retry_count: z.ZodNumber; + }, z.core.$strip>; + sourceBead: z.ZodNullable; + }, z.core.$strip>>; + convoy: z.ZodNullable; + merge_mode: z.ZodNullable; + }, z.core.$strip>>; + agent: z.ZodNullable>; rigName: z.ZodNullable; - reviewMetadata: z.ZodNullable< - z.ZodObject< - { - pr_url: z.ZodNullable; - branch: z.ZodNullable; - target_branch: z.ZodNullable; - merge_commit: z.ZodNullable; - }, - z.core.$strip - > - >; - }, - z.core.$strip - > - >; - }, - z.core.$strip - > ->; -export declare const OrgTownOutput: z.ZodObject< - { + staleSince: z.ZodNullable; + failureReason: z.ZodNullable; + }, z.core.$strip>>; + }, z.core.$strip>; + activityLog: z.ZodArray; + event_type: z.ZodString; + old_value: z.ZodNullable; + new_value: z.ZodNullable; + metadata: z.ZodRecord; + created_at: z.ZodString; + }, z.core.$strip>; + mrBead: z.ZodNullable; + metadata: z.ZodRecord; + }, z.core.$strip>>; + sourceBead: z.ZodNullable>; + convoy: z.ZodNullable; + merge_mode: z.ZodNullable; + }, z.core.$strip>>; + agent: z.ZodNullable>; + rigName: z.ZodNullable; + reviewMetadata: z.ZodNullable; + branch: z.ZodNullable; + target_branch: z.ZodNullable; + merge_commit: z.ZodNullable; + }, z.core.$strip>>; + }, z.core.$strip>>; +}, z.core.$strip>>; +export declare const OrgTownOutput: z.ZodObject<{ + id: z.ZodString; + name: z.ZodString; + owner_org_id: z.ZodString; + created_by_user_id: z.ZodString; + created_at: z.ZodString; + updated_at: z.ZodString; +}, z.core.$strip>; +export declare const RpcOrgTownOutput: z.ZodPipe; -export declare const RpcOrgTownOutput: z.ZodPipe< - z.ZodAny, - z.ZodObject< - { - id: z.ZodString; - name: z.ZodString; - owner_org_id: z.ZodString; - created_by_user_id: z.ZodString; - created_at: z.ZodString; - updated_at: z.ZodString; - }, - z.core.$strip - > ->; +}, z.core.$strip>>; From b61e356b125e38b051b5ba9f284aa371129bfafc Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Wed, 1 Apr 2026 11:56:50 -0500 Subject: [PATCH 06/44] =?UTF-8?q?fix(gastown):=20address=20PR=20review=20c?= =?UTF-8?q?omments=20=E2=80=94=20security,=20correctness,=20and=20UX?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Comment 3 (CRITICAL): refreshKilocodeTokenIfExpiring now verifies the existing JWT signature via verifyKiloToken() before trusting its claims. Previously decoded claims without verification, allowing a forged near-expiry token to be re-signed with real credentials. Comment 4 (WARNING): refreshContainerToken tRPC mutation now resolves the town owner's identity for org towns instead of using the caller's. For personal towns the caller IS the owner; for org towns it looks up the owner via findUserById from the DB to get the correct api_token_pepper. Comment 1 (WARNING): RESTART/RESTART_WITH_BACKOFF triage now uses snapshotHookedBeadId (captured when the triage request was created) instead of targetAgent.current_hook_bead_id (which may have changed). Consistent with CLOSE_BEAD and REASSIGN_BEAD which already use the snapshot. Comment 2 (SUGGESTION): Wrap disabled CFLink button in a span so the tooltip trigger receives hover events and operators can see why the link is unavailable. --- cloudflare-gastown/src/dos/Town.do.ts | 68 +++++++++---------- cloudflare-gastown/src/trpc/router.ts | 32 ++++++++- .../gastown/towns/[townId]/ContainerTab.tsx | 10 +-- 3 files changed, 67 insertions(+), 43 deletions(-) diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 10f237a72..d140b1d47 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -56,6 +56,7 @@ import { query } from '../util/query.util'; import { getAgentDOStub } from './Agent.do'; import { getTownContainerStub } from './TownContainer.do'; +import { verifyKiloToken } from '@kilocode/worker-utils'; import { generateKiloApiToken } from '../util/kilo-token.util'; import { resolveSecret } from '../util/secret.util'; import { writeEvent, type GastownEventData } from '../util/analytics.util'; @@ -1587,18 +1588,19 @@ export class TownDO extends DurableObject { dispatch.stopAgentInContainer(this.env, this.townId, targetAgentId).catch(() => {}); } if (targetAgent) { + // Use the bead captured in the triage snapshot (not the agent's + // current hook, which may have changed since the triage request + // was created). Fall back to current hook for backward compat. + const restartBeadId = + snapshotHookedBeadId ?? targetAgent.current_hook_bead_id; + // Check if the hooked bead has exhausted its dispatch cap. // If so, fail it immediately instead of letting the reconciler // re-dispatch indefinitely (#1653). - if (targetAgent.current_hook_bead_id) { - const hookedBead = beadOps.getBead(this.sql, targetAgent.current_hook_bead_id); + if (restartBeadId) { + const hookedBead = beadOps.getBead(this.sql, restartBeadId); if (hookedBead && hookedBead.dispatch_attempts >= scheduling.MAX_DISPATCH_ATTEMPTS) { - beadOps.updateBeadStatus( - this.sql, - targetAgent.current_hook_bead_id, - 'failed', - 'system' - ); + beadOps.updateBeadStatus(this.sql, restartBeadId, 'failed', 'system'); agents.unhookBead(this.sql, targetAgentId); break; } @@ -1622,7 +1624,7 @@ export class TownDO extends DurableObject { // reconciler's exponential backoff gate fires correctly. // Without this, the backoff variant allows immediate // redispatch once last_dispatch_attempt_at ages out. - if (action === 'RESTART_WITH_BACKOFF' && targetAgent.current_hook_bead_id) { + if (action === 'RESTART_WITH_BACKOFF' && restartBeadId) { query( this.sql, /* sql */ ` @@ -1630,7 +1632,7 @@ export class TownDO extends DurableObject { SET ${beads.columns.last_dispatch_attempt_at} = ? WHERE ${beads.bead_id} = ? `, - [now(), targetAgent.current_hook_bead_id] + [now(), restartBeadId] ); } } @@ -3382,9 +3384,9 @@ export class TownDO extends DurableObject { * Throttled to once per day — the 30-day token is refreshed when * within 7 days of expiry, providing ample safety margin. * - * Decodes the existing JWT payload to extract user identity (no - * signature verification needed — we're just reading the claims to - * re-sign with the same data). + * Verifies the existing token's signature before trusting its claims, + * preventing a forged near-expiry token from being re-signed with + * real credentials. */ private lastKilocodeTokenCheckAt = 0; private async refreshKilocodeTokenIfExpiring(): Promise { @@ -3398,24 +3400,26 @@ export class TownDO extends DurableObject { const token = townConfig.kilocode_token; if (!token) return; - // Decode JWT payload (base64url, no verification) - const parts = token.split('.'); - const encodedPayload = parts[1]; - if (!encodedPayload) return; - const payloadSchema = z.object({ - exp: z.number().optional(), - kiloUserId: z.string().optional(), - apiTokenPepper: z.string().nullable().optional(), - }); - let rawPayload: unknown; + if (!this.env.NEXTAUTH_SECRET) { + logger.warn('refreshKilocodeTokenIfExpiring: NEXTAUTH_SECRET not configured'); + return; + } + const secret = await resolveSecret(this.env.NEXTAUTH_SECRET); + if (!secret) { + logger.warn('refreshKilocodeTokenIfExpiring: failed to resolve NEXTAUTH_SECRET'); + return; + } + + // Verify the existing token's signature before trusting its claims. + // This prevents a forged token from being re-signed with real credentials. + let payload: { kiloUserId: string; apiTokenPepper?: string | null; exp?: number }; try { - rawPayload = JSON.parse(atob(encodedPayload.replace(/-/g, '+').replace(/_/g, '/'))); + payload = await verifyKiloToken(token, secret); } catch { + // Signature invalid or token malformed — don't remint from untrusted claims. + logger.warn('refreshKilocodeTokenIfExpiring: existing token failed signature verification'); return; } - const parsed = payloadSchema.safeParse(rawPayload); - if (!parsed.success) return; - const payload = parsed.data; const exp = payload.exp; if (!exp) return; @@ -3427,16 +3431,6 @@ export class TownDO extends DurableObject { const userId = payload.kiloUserId; if (!userId) return; - if (!this.env.NEXTAUTH_SECRET) { - logger.warn('refreshKilocodeTokenIfExpiring: NEXTAUTH_SECRET not configured'); - return; - } - const secret = await resolveSecret(this.env.NEXTAUTH_SECRET); - if (!secret) { - logger.warn('refreshKilocodeTokenIfExpiring: failed to resolve NEXTAUTH_SECRET'); - return; - } - const newToken = await generateKiloApiToken( { id: userId, api_token_pepper: payload.apiTokenPepper ?? null }, secret diff --git a/cloudflare-gastown/src/trpc/router.ts b/cloudflare-gastown/src/trpc/router.ts index 771d97dda..f4f8d5ef4 100644 --- a/cloudflare-gastown/src/trpc/router.ts +++ b/cloudflare-gastown/src/trpc/router.ts @@ -1021,8 +1021,36 @@ export const gastownRouter = router({ // Also remint and push KILOCODE_TOKEN — this is what actually // authenticates GT tool calls and is the main reason users hit 401s. - const user = userFromCtx(ctx); - const newKilocodeToken = await mintKilocodeToken(ctx.env, user); + // For personal towns the caller IS the owner; for org towns we must + // use the town owner's identity (not the caller's) so that + // git-credentials and other owner-scoped APIs continue to work. + let tokenUser: { id: string; api_token_pepper: string | null }; + if (ownership.type === 'user') { + tokenUser = userFromCtx(ctx); + } else { + // Org town: resolve the owner from the town config + const config = await townStub.getTownConfig(); + const ownerId = config.owner_user_id ?? config.created_by_user_id; + if (ownerId && ownerId === ctx.userId) { + // Caller happens to be the owner — use their live context + tokenUser = userFromCtx(ctx); + } else if (ownerId) { + // Different org member — look up the owner's pepper from the DB + const { findUserById } = await import('../util/user-db.util'); + const ownerUser = await findUserById(ctx.env.HYPERDRIVE.connectionString, ownerId); + if (!ownerUser) { + throw new TRPCError({ + code: 'INTERNAL_SERVER_ERROR', + message: 'Town owner not found — cannot refresh KILOCODE_TOKEN', + }); + } + tokenUser = { id: ownerUser.id, api_token_pepper: ownerUser.api_token_pepper }; + } else { + // No owner recorded — fall back to caller + tokenUser = userFromCtx(ctx); + } + } + const newKilocodeToken = await mintKilocodeToken(ctx.env, tokenUser); await townStub.updateTownConfig({ kilocode_token: newKilocodeToken }); await townStub.syncConfigToContainer(); }), diff --git a/src/app/admin/gastown/towns/[townId]/ContainerTab.tsx b/src/app/admin/gastown/towns/[townId]/ContainerTab.tsx index f6c7e6402..4f3eaeb53 100644 --- a/src/app/admin/gastown/towns/[townId]/ContainerTab.tsx +++ b/src/app/admin/gastown/towns/[townId]/ContainerTab.tsx @@ -32,10 +32,12 @@ function CFLink({ return ( - + + + {disabledTooltip && {disabledTooltip}} From 007a5bd3b4c7ab65211a86e413e1963bbf171953 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Wed, 1 Apr 2026 12:00:33 -0500 Subject: [PATCH 07/44] fix(gastown): gate container actions behind effectiveReadOnly Both Refresh Token and Restart Container buttons in town settings now respect effectiveReadOnly, preventing non-owner org members from triggering container actions on a read-only settings page. --- .../gastown/[townId]/settings/TownSettingsPageClient.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/app/(app)/gastown/[townId]/settings/TownSettingsPageClient.tsx b/src/app/(app)/gastown/[townId]/settings/TownSettingsPageClient.tsx index 5da787148..4ff0880c1 100644 --- a/src/app/(app)/gastown/[townId]/settings/TownSettingsPageClient.tsx +++ b/src/app/(app)/gastown/[townId]/settings/TownSettingsPageClient.tsx @@ -823,7 +823,7 @@ export function TownSettingsPageClient({ townId, readOnly = false, organizationI + +
+
+

Destroy Container

+

+ Sends SIGKILL — the container dies immediately with no graceful drain. Use + when the container is stuck or unresponsive. +

+
+
@@ -882,7 +911,7 @@ export function TownSettingsPageClient({ townId, readOnly = false, organizationI disabled={ deleteTown.isPending || deleteOrgTown.isPending || effectiveReadOnly } - variant="destructive" + variant="secondary" size="sm" className="ml-4 shrink-0 gap-1.5" > diff --git a/src/lib/gastown/types/router.d.ts b/src/lib/gastown/types/router.d.ts index 36b0d1d10..12de217df 100644 --- a/src/lib/gastown/types/router.d.ts +++ b/src/lib/gastown/types/router.d.ts @@ -604,6 +604,13 @@ export declare const gastownRouter: import('@trpc/server').TRPCBuiltRouter< output: void; meta: object; }>; + destroyContainer: import('@trpc/server').TRPCMutationProcedure<{ + input: { + townId: string; + }; + output: void; + meta: object; + }>; getBeadEvents: import('@trpc/server').TRPCQueryProcedure<{ input: { rigId: string; @@ -1883,6 +1890,13 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute output: void; meta: object; }>; + destroyContainer: import('@trpc/server').TRPCMutationProcedure<{ + input: { + townId: string; + }; + output: void; + meta: object; + }>; getBeadEvents: import('@trpc/server').TRPCQueryProcedure<{ input: { rigId: string; diff --git a/src/lib/gastown/types/schemas.d.ts b/src/lib/gastown/types/schemas.d.ts index d56dbe856..5ccdf157d 100644 --- a/src/lib/gastown/types/schemas.d.ts +++ b/src/lib/gastown/types/schemas.d.ts @@ -1,4 +1,4 @@ -import type { z } from 'zod'; +import { z } from 'zod'; export declare const TownOutput: z.ZodObject< { id: z.ZodString; From 71ad75f065006bdf879ec37539d372c0328aa789 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Wed, 1 Apr 2026 16:46:45 -0500 Subject: [PATCH 24/44] fix(gastown): improve drain Phase 2 logging for nudge debugging Log all agent statuses and roles at drain start, whether an SDK instance exists for the agent, and explicit skip reasons for non-nudgeable roles. This helps diagnose why running agents may not receive the drain nudge. --- .../container/src/process-manager.ts | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index af65d7617..0854bb433 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -1019,8 +1019,12 @@ export async function drainAll(): Promise { } // ── Phase 2: Nudge running agents to save ─────────────────────────── - const runningAgents = [...agents.values()].filter(a => a.status === 'running'); - console.log(`${DRAIN_LOG} Phase 2: nudging ${runningAgents.length} running agents`); + const allAgents = [...agents.values()]; + const runningAgents = allAgents.filter(a => a.status === 'running'); + console.log( + `${DRAIN_LOG} Phase 2: ${runningAgents.length} running of ${allAgents.length} total agents. ` + + `All statuses: ${allAgents.map(a => `${a.role}:${a.agentId.slice(0, 8)}=${a.status}`).join(', ')}` + ); for (const agent of runningAgents) { try { @@ -1040,13 +1044,22 @@ export async function drainAll(): Promise { // already idle, the timer could fire mid-nudge and exit the // agent before it processes the eviction message. clearIdleTimer(agent.agentId); - console.log(`${DRAIN_LOG} Phase 2: nudging ${agent.role} agent ${agent.agentId}`); + const hasInstance = sdkInstances.has(agent.workdir); + console.log( + `${DRAIN_LOG} Phase 2: nudging ${agent.role} agent ${agent.agentId} ` + + `(session=${agent.sessionId}, sdkInstance=${hasInstance})` + ); await sendMessage(agent.agentId, nudgeMessage); + console.log(`${DRAIN_LOG} Phase 2: nudge delivered to ${agent.agentId}`); + } else { + console.log( + `${DRAIN_LOG} Phase 2: skipping ${agent.role} agent ${agent.agentId} (no nudge for this role)` + ); } } catch (err) { console.warn( `${DRAIN_LOG} Phase 2: failed to nudge agent ${agent.agentId} (${agent.role}):`, - err + err instanceof Error ? err.message : err ); } } From dfa39656842b0405a890f227f0175e93304603f8 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Wed, 1 Apr 2026 16:52:22 -0500 Subject: [PATCH 25/44] perf(gastown): deduplicate and guard wasteful queries in TownDO alarm tick (#1864) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * perf(gastown): deduplicate and guard wasteful queries in TownDO alarm tick - Guard getAlarmStatus() behind WebSocket client count check (saves 13 ops/tick when no dashboard clients are connected) - Deduplicate hasActiveWork() — called once at alarm start, threaded to ensureContainerReady(), getAlarmStatus(), and re-arm decision (saves 8 COUNTs) - Deduplicate countPendingTriageRequests() — called once, threaded to maybeDispatchTriageAgent() and getAlarmStatus() (saves 1 COUNT) - Deduplicate rigs.listRigs() — called once at alarm start, threaded to ensureContainerReady() and maybeDispatchTriageAgent() (saves 3 queries) - Consolidate GUPP warning + escalation COUNT queries into single query with CASE expression (saves 1 query when status IS broadcast) - Circuit breaker checks (Fix 6) are purely inline per-agent comparisons — no shared query to deduplicate, skipped per spec All function signatures use optional cached-value parameters with fallback, keeping them backward-compatible for non-alarm callers (WebSocket initial snapshot, RPC calls). Closes #1855 * fix(gastown): compute hasActiveWork() after reconciliation to avoid stale re-arm interval --- cloudflare-gastown/src/dos/Town.do.ts | 122 ++++++++++++++------------ 1 file changed, 68 insertions(+), 54 deletions(-) diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index e595b3230..4f455c175 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -3179,10 +3179,13 @@ export class TownDO extends DurableObject { logger.setTags({ townId }); logger.info('alarm: fired'); - const hasRigs = rigs.listRigs(this.sql).length > 0; + // Call once per tick — threaded to ensureContainerReady, maybeDispatchTriageAgent, and getAlarmStatus + const rigList = rigs.listRigs(this.sql); + const hasRigs = rigList.length > 0; + if (hasRigs) { try { - await this.ensureContainerReady(); + await this.ensureContainerReady(rigList); } catch (err) { logger.warn('alarm: container health check failed', { error: err instanceof Error ? err.message : String(err), @@ -3445,7 +3448,16 @@ export class TownDO extends DurableObject { label: JSON.stringify(metrics.actionsByType), }); + // ── Post-reconciliation: cache activity snapshot ──────────────── + // Computed after Phases 0-2 so re-arm and getAlarmStatus reflect + // any work created during reconciliation (hooks, dispatches, triage). + const activeWork = this.hasActiveWork(); + // ── Phase 3: Housekeeping (independent, all parallelizable) ──── + + // Call once per tick — threaded to maybeDispatchTriageAgent and getAlarmStatus + const cachedTriageCount = patrol.countPendingTriageRequests(this.sql); + await Promise.allSettled([ this.deliverPendingMail().catch(err => logger.warn('alarm: deliverPendingMail failed', { @@ -3462,7 +3474,7 @@ export class TownDO extends DurableObject { error: err instanceof Error ? err.message : String(err), }) ), - this.maybeDispatchTriageAgent().catch(err => + this.maybeDispatchTriageAgent(cachedTriageCount, rigList).catch(err => logger.warn('alarm: maybeDispatchTriageAgent failed', { error: err instanceof Error ? err.message : String(err), }) @@ -3478,19 +3490,25 @@ export class TownDO extends DurableObject { } }), ]); + // Re-arm: fast when active, slow when idle - const active = this.hasActiveWork(); - const interval = active ? ACTIVE_ALARM_INTERVAL_MS : IDLE_ALARM_INTERVAL_MS; + const interval = activeWork ? ACTIVE_ALARM_INTERVAL_MS : IDLE_ALARM_INTERVAL_MS; await this.ctx.storage.setAlarm(Date.now() + interval); - // Broadcast status snapshot to connected WebSocket clients - try { - const snapshot = await this.getAlarmStatus(); - this.broadcastAlarmStatus(snapshot); - } catch (err) { - logger.warn('alarm: status broadcast failed', { - error: err instanceof Error ? err.message : String(err), - }); + // Broadcast status snapshot to connected WebSocket clients (skip if nobody is listening) + const statusClients = this.ctx.getWebSockets('status'); + if (statusClients.length > 0) { + try { + const snapshot = await this.getAlarmStatus({ + activeWork, + triageCount: cachedTriageCount, + }); + this.broadcastAlarmStatus(snapshot); + } catch (err) { + logger.warn('alarm: status broadcast failed', { + error: err instanceof Error ? err.message : String(err), + }); + } } } @@ -3619,8 +3637,11 @@ export class TownDO extends DurableObject { * * Skips dispatch if a triage agent is already working. */ - private async maybeDispatchTriageAgent(): Promise { - const pendingCount = patrol.countPendingTriageRequests(this.sql); + private async maybeDispatchTriageAgent( + cachedTriageCount?: number, + cachedRigList?: rigs.RigRecord[] + ): Promise { + const pendingCount = cachedTriageCount ?? patrol.countPendingTriageRequests(this.sql); if (pendingCount === 0) return; // Check if a triage batch bead is already in progress (meaning a @@ -3657,7 +3678,7 @@ export class TownDO extends DurableObject { // Validate preconditions before creating any beads to avoid // leaked phantom issue beads on early-return paths. - const rigList = rigs.listRigs(this.sql); + const rigList = cachedRigList ?? rigs.listRigs(this.sql); if (rigList.length === 0) { console.warn(`${TOWN_LOG} maybeDispatchTriageAgent: no rigs available, skipping`); return; @@ -3942,13 +3963,15 @@ export class TownDO extends DurableObject { } } - private async ensureContainerReady(): Promise { - const hasRigs = rigs.listRigs(this.sql).length > 0; - if (!hasRigs) return; + private async ensureContainerReady( + cachedRigList?: rigs.RigRecord[], + cachedActiveWork?: boolean + ): Promise { + const rigList = cachedRigList ?? rigs.listRigs(this.sql); + if (rigList.length === 0) return; - const hasWork = this.hasActiveWork(); + const hasWork = cachedActiveWork ?? this.hasActiveWork(); if (!hasWork && !this._draining) { - const rigList = rigs.listRigs(this.sql); const newestRigAge = rigList.reduce((min, r) => { const age = Date.now() - new Date(r.created_at).getTime(); return Math.min(min, age); @@ -4057,7 +4080,10 @@ export class TownDO extends DurableObject { * Return a structured snapshot of the alarm loop and patrol state * for the dashboard Status tab. */ - async getAlarmStatus(): Promise<{ + async getAlarmStatus(cached?: { + activeWork?: boolean; + triageCount?: number; + }): Promise<{ alarm: { nextFireAt: string | null; intervalMs: number; @@ -4091,7 +4117,7 @@ export class TownDO extends DurableObject { }>; }> { const currentAlarm = await this.ctx.storage.getAlarm(); - const active = this.hasActiveWork(); + const active = cached?.activeWork ?? this.hasActiveWork(); const intervalMs = active ? ACTIVE_ALARM_INTERVAL_MS : IDLE_ALARM_INTERVAL_MS; // Agent counts by status @@ -4144,38 +4170,26 @@ export class TownDO extends DurableObject { } // Triage request count (issue beads with gt:triage-request label) - beadCounts.triageRequests = patrol.countPendingTriageRequests(this.sql); - - // Patrol indicators — count active warnings/issues - const guppWarnings = Number( - [ - ...query( - this.sql, - /* sql */ ` - SELECT COUNT(*) AS cnt FROM ${beads} - WHERE ${beads.type} = 'message' - AND ${beads.title} = 'GUPP_CHECK' - AND ${beads.status} = 'open' - `, - [] - ), - ][0]?.cnt ?? 0 - ); + beadCounts.triageRequests = cached?.triageCount ?? patrol.countPendingTriageRequests(this.sql); - const guppEscalations = Number( - [ - ...query( - this.sql, - /* sql */ ` - SELECT COUNT(*) AS cnt FROM ${beads} - WHERE ${beads.type} = 'message' - AND ${beads.title} = 'GUPP_ESCALATION' - AND ${beads.status} = 'open' - `, - [] - ), - ][0]?.cnt ?? 0 - ); + // Patrol indicators — count active GUPP warnings + escalations in one query + const guppRows = [ + ...query( + this.sql, + /* sql */ ` + SELECT + SUM(CASE WHEN ${beads.title} = 'GUPP_CHECK' THEN 1 ELSE 0 END) AS warnings, + SUM(CASE WHEN ${beads.title} = 'GUPP_ESCALATION' THEN 1 ELSE 0 END) AS escalations + FROM ${beads} + WHERE ${beads.type} = 'message' + AND ${beads.title} IN ('GUPP_CHECK', 'GUPP_ESCALATION') + AND ${beads.status} = 'open' + `, + [] + ), + ]; + const guppWarnings = Number(guppRows[0]?.warnings ?? 0); + const guppEscalations = Number(guppRows[0]?.escalations ?? 0); const stalledAgents = agentCounts.stalled; From 9f1ca1ef2c17c03c89aa015e36ac2cc743cee34a Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 09:41:14 -0500 Subject: [PATCH 26/44] =?UTF-8?q?fix(gastown):=20fix=20graceful=20drain=20?= =?UTF-8?q?=E2=80=94=20idle=20timer,=20starting=20agents,=20eviction=20che?= =?UTF-8?q?ckpoint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Exclude mayors from Phase 3 running count (they never exit on their own) - Wait for 'starting' agents in Phase 3; late-nudge when they become running - Reject new agent starts during drain (503 from control server) - Don't let server.heartbeat events clear the idle timer (agents could never time out because heartbeats kept resetting the timer) - Use 120s idle timeout on first drain idle (time for nudge to arrive), 10s on subsequent idles (agent has processed the nudge) - Phase 4 writes eviction context on bead body after force-pushing WIP so the next agent knows there's code on the branch - Clear checkpoint in unhookBead so stale checkpoints don't leak across bead reassignments - Add /debug endpoints for drain status, pending nudges, and eviction testing (dev-only send-message and graceful-stop) --- .../container/src/control-server.ts | 7 + .../container/src/process-manager.ts | 236 ++++++++++++++---- cloudflare-gastown/container/src/types.ts | 1 + cloudflare-gastown/src/dos/Town.do.ts | 50 +++- cloudflare-gastown/src/dos/town/agents.ts | 5 +- cloudflare-gastown/src/gastown.worker.ts | 45 ++++ .../src/handlers/rig-agents.handler.ts | 23 ++ 7 files changed, 307 insertions(+), 60 deletions(-) diff --git a/cloudflare-gastown/container/src/control-server.ts b/cloudflare-gastown/container/src/control-server.ts index 1c71ed069..72e9dcbb7 100644 --- a/cloudflare-gastown/container/src/control-server.ts +++ b/cloudflare-gastown/container/src/control-server.ts @@ -11,6 +11,7 @@ import { getUptime, stopAll, drainAll, + isDraining, getAgentEvents, registerEventSink, } from './process-manager'; @@ -154,6 +155,7 @@ app.get('/health', c => { agents: activeAgentCount(), servers: activeServerCount(), uptime: getUptime(), + draining: isDraining() || undefined, }; return c.json(response); }); @@ -202,6 +204,11 @@ app.post('/sync-config', async c => { // POST /agents/start app.post('/agents/start', async c => { + if (isDraining()) { + console.warn('[control-server] /agents/start: rejected — container is draining'); + return c.json({ error: 'Container is draining, cannot start new agents' }, 503); + } + const body: unknown = await c.req.json().catch(() => null); const parsed = StartAgentRequest.safeParse(body); if (!parsed.success) { diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index 0854bb433..d5a6b5423 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -35,9 +35,32 @@ const eventSinks = new Set<(agentId: string, event: string, data: unknown) => vo // Per-agent idle timers — fires exit when no nudges arrive const idleTimers = new Map>(); +// Server-level lifecycle events that should NOT cancel an agent's idle +// timer. These fire periodically (heartbeat) or on connect and don't +// represent actual agent work. Includes runtime-only types that aren't +// in the SDK's TS union (e.g. 'server.heartbeat'). +const IDLE_TIMER_IGNORE_EVENTS = new Set([ + 'server.heartbeat', + 'server.connected', + 'server.instance.disposed', +]); + let nextPort = 4096; const startTime = Date.now(); +// Set to true when drainAll() starts — prevents new agent starts and +// lets the drain loop nudge agents that transition to running mid-drain. +let _draining = false; + +// Tracks how many times each agent has gone idle since drain started. +// First idle = use normal timeout (nudge may be queued, needs time to arrive). +// Second+ idle = 10s (agent processed the nudge and is done). +const drainIdleCounts = new Map(); + +export function isDraining(): boolean { + return _draining; +} + // Mutex for ensureSDKServer — createKilo() reads process.cwd() and // process.env during startup, so concurrent calls with different workdirs // would corrupt each other's globals. This serializes server creation only; @@ -315,6 +338,48 @@ async function markNudgeDelivered(agent: ManagedAgent, nudgeId: string): Promise } } +/** + * Write eviction context on the agent's bead so the next agent dispatched + * to it knows there is WIP code pushed to a branch. Appends a note to the + * bead's body via the Gastown API. + * Best-effort: errors are logged but never propagated. + */ +async function writeEvictionCheckpoint( + agent: ManagedAgent, + context: { branch: string; agent_name: string; saved_at: string } +): Promise { + const authToken = + process.env.GASTOWN_CONTAINER_TOKEN ?? agent.gastownContainerToken ?? agent.gastownSessionToken; + if (!agent.gastownApiUrl || !authToken || !agent.townId || !agent.rigId) { + console.warn( + `${MANAGER_LOG} writeEvictionCheckpoint: missing API credentials for ${agent.agentId}` + ); + return; + } + + try { + const resp = await fetch( + `${agent.gastownApiUrl}/api/towns/${agent.townId}/rigs/${agent.rigId}/agents/${agent.agentId}/eviction-context`, + { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${authToken}`, + 'X-Gastown-Agent-Id': agent.agentId, + 'X-Gastown-Rig-Id': agent.rigId, + }, + body: JSON.stringify(context), + signal: AbortSignal.timeout(5_000), + } + ); + if (!resp.ok) { + console.warn(`${MANAGER_LOG} writeEvictionCheckpoint: ${resp.status} for ${agent.agentId}`); + } + } catch (err) { + console.warn(`${MANAGER_LOG} writeEvictionCheckpoint: error for ${agent.agentId}:`, err); + } +} + /** * Clear the idle timer for an agent (if any). */ @@ -370,19 +435,29 @@ async function handleIdleEvent(agent: ManagedAgent, onExit: () => void): Promise } // No nudges (or fetch error) — (re)start the idle timeout. - // Refineries get a longer timeout because their workflow is multi-step - // (diff → analyze → decide → merge/rework). The 2-min default kills the - // session between LLM turns when the refinery responds with text before - // issuing a tool call. See #1342. + // During drain: first idle uses normal timeout (the mayor's nudge via + // gt_nudge queues a message that hasn't been processed yet — the agent + // needs to go idle, then the SDK delivers the queued nudge, then the + // agent processes it). Second+ idle uses 10s (agent saw the nudge and + // is done). clearIdleTimer(agentId); - const timeoutMs = - agent.role === 'refinery' - ? process.env.REFINERY_IDLE_TIMEOUT_MS !== undefined - ? Number(process.env.REFINERY_IDLE_TIMEOUT_MS) - : 600_000 - : process.env.AGENT_IDLE_TIMEOUT_MS !== undefined - ? Number(process.env.AGENT_IDLE_TIMEOUT_MS) - : 120_000; + let timeoutMs: number; + if (_draining) { + const idleCount = (drainIdleCounts.get(agentId) ?? 0) + 1; + drainIdleCounts.set(agentId, idleCount); + // First idle: give the agent time to receive and process the nudge. + // Second+: agent has had its chance, use aggressive timeout. + timeoutMs = idleCount <= 1 ? 120_000 : 10_000; + } else { + timeoutMs = + agent.role === 'refinery' + ? process.env.REFINERY_IDLE_TIMEOUT_MS !== undefined + ? Number(process.env.REFINERY_IDLE_TIMEOUT_MS) + : 600_000 + : process.env.AGENT_IDLE_TIMEOUT_MS !== undefined + ? Number(process.env.AGENT_IDLE_TIMEOUT_MS) + : 120_000; + } console.log( `${MANAGER_LOG} handleIdleEvent: no nudges for ${agentId}, idle timeout in ${timeoutMs}ms` @@ -491,8 +566,10 @@ async function subscribeToEvents( // handleIdleEvent is async; we run it in the background so the event // loop continues. The exitAgent callback will abort the stream if needed. void handleIdleEvent(agent, exitAgent); - } else { - // Non-idle event means the agent resumed work — cancel any pending idle timer. + } else if (!IDLE_TIMER_IGNORE_EVENTS.has(event.type ?? '')) { + // Non-idle event means the agent resumed work — cancel any pending + // idle timer. But skip server-level lifecycle events (heartbeats, + // connections) that don't represent actual agent activity. clearIdleTimer(agent.agentId); } @@ -988,6 +1065,7 @@ export function activeServerCount(): number { */ export async function drainAll(): Promise { const DRAIN_LOG = '[drain]'; + _draining = true; // ── Phase 1: Notify TownDO ────────────────────────────────────────── try { @@ -1018,62 +1096,88 @@ export async function drainAll(): Promise { console.warn(`${DRAIN_LOG} Phase 1: TownDO notification failed, continuing:`, err); } - // ── Phase 2: Nudge running agents to save ─────────────────────────── + // ── Phase 2: Directly nudge running non-mayor agents ────────────────── + // We use sendMessage() to inject an eviction notice directly into each + // agent's SDK session. This is a local call (no round-trip through the + // TownDO) so it works even after SIGTERM — the container's HTTP server + // is still up but the Cloudflare runtime blocks inbound requests from + // other DOs, which breaks the gt_nudge → TownDO → container.fetch() path. + const nudgedAgents = new Set(); + + const nudgeAgent = async (agent: ManagedAgent): Promise => { + if (nudgedAgents.has(agent.agentId)) return false; + nudgedAgents.add(agent.agentId); + + if (agent.role === 'mayor' || agent.role === 'triage') return false; + + const nudgeMessage = + 'URGENT: The container is being evicted. Commit and push all your current changes RIGHT NOW. ' + + 'Do NOT call gt_done — the system will handle the bead state. ' + + 'Just git add -A, git commit, and git push your work-in-progress, then stop working.'; + + clearIdleTimer(agent.agentId); + console.log( + `${DRAIN_LOG} Phase 2: nudging ${agent.role} agent ${agent.agentId} (session=${agent.sessionId})` + ); + await sendMessage(agent.agentId, nudgeMessage); + console.log(`${DRAIN_LOG} Phase 2: nudge delivered to ${agent.agentId}`); + return true; + }; + const allAgents = [...agents.values()]; - const runningAgents = allAgents.filter(a => a.status === 'running'); + const runningAgents = allAgents.filter(a => a.status === 'running' && a.role !== 'mayor'); console.log( - `${DRAIN_LOG} Phase 2: ${runningAgents.length} running of ${allAgents.length} total agents. ` + - `All statuses: ${allAgents.map(a => `${a.role}:${a.agentId.slice(0, 8)}=${a.status}`).join(', ')}` + `${DRAIN_LOG} Phase 2: ${runningAgents.length} nudgeable of ${allAgents.length} total agents. ` + + `Statuses: ${allAgents.map(a => `${a.role}:${a.agentId.slice(0, 8)}=${a.status}`).join(', ')}` ); for (const agent of runningAgents) { try { - let nudgeMessage: string | null = null; - - if (agent.role === 'polecat') { - nudgeMessage = - 'URGENT: The container is shutting down in ~15 minutes. Please commit and push your current changes immediately, then call gt_done. You have 2 minutes before a forced save.'; - } else if (agent.role === 'refinery') { - nudgeMessage = - 'URGENT: The container is shutting down. If your review is complete, call gt_done now. Otherwise your work will be pushed as a WIP commit.'; - } - // Mayor and other roles: no nudge needed - - if (nudgeMessage) { - // Cancel the idle timer before nudging — if the agent was - // already idle, the timer could fire mid-nudge and exit the - // agent before it processes the eviction message. - clearIdleTimer(agent.agentId); - const hasInstance = sdkInstances.has(agent.workdir); - console.log( - `${DRAIN_LOG} Phase 2: nudging ${agent.role} agent ${agent.agentId} ` + - `(session=${agent.sessionId}, sdkInstance=${hasInstance})` - ); - await sendMessage(agent.agentId, nudgeMessage); - console.log(`${DRAIN_LOG} Phase 2: nudge delivered to ${agent.agentId}`); - } else { - console.log( - `${DRAIN_LOG} Phase 2: skipping ${agent.role} agent ${agent.agentId} (no nudge for this role)` - ); - } + await nudgeAgent(agent); } catch (err) { console.warn( - `${DRAIN_LOG} Phase 2: failed to nudge agent ${agent.agentId} (${agent.role}):`, + `${DRAIN_LOG} Phase 2: failed to nudge ${agent.agentId} (${agent.role}):`, err instanceof Error ? err.message : err ); } } // ── Phase 3: Wait up to 10 minutes ────────────────────────────────── + // Exclude mayors from the running count — they are persistent and will + // never exit on their own. They are frozen in Phase 4 if still running. + // Also late-nudge agents that transition from `starting` to `running` + // during the wait. const DRAIN_WAIT_MS = 10 * 60 * 1000; const pollInterval = 5000; const start = Date.now(); console.log(`${DRAIN_LOG} Phase 3: waiting up to ${DRAIN_WAIT_MS / 1000}s for agents to finish`); while (Date.now() - start < DRAIN_WAIT_MS) { - const running = [...agents.values()].filter(a => a.status === 'running'); - if (running.length === 0) break; - console.log(`${DRAIN_LOG} Waiting for ${running.length} agents...`); + // Include both `running` and `starting` agents in the wait. + // Starting agents may finish their clone/setup and transition to + // running during the wait — they'll be late-nudged below. + const active = [...agents.values()].filter( + a => (a.status === 'running' || a.status === 'starting') && a.role !== 'mayor' + ); + if (active.length === 0) break; + + // Late-nudge agents that became running since Phase 2. + for (const agent of active) { + if (agent.status !== 'running' || nudgedAgents.has(agent.agentId)) continue; + try { + await nudgeAgent(agent); + } catch (err) { + console.warn( + `${DRAIN_LOG} Phase 3: late nudge failed for ${agent.agentId}:`, + err instanceof Error ? err.message : err + ); + } + } + + console.log( + `${DRAIN_LOG} Waiting for ${active.length} non-mayor agents: ` + + active.map(a => `${a.role}:${a.agentId.slice(0, 8)}=${a.status}`).join(', ') + ); await new Promise(r => setTimeout(r, pollInterval)); } @@ -1083,7 +1187,9 @@ export async function drainAll(): Promise { // worktree. Freezing first prevents the normal completion path // (idle timer → onExit → bead completion) from racing with the WIP // git save, and avoids .git/index.lock collisions with agent git ops. - const stragglers = [...agents.values()].filter(a => a.status === 'running'); + const stragglers = [...agents.values()].filter( + a => a.status === 'running' || a.status === 'starting' + ); if (stragglers.length > 0) { console.log(`${DRAIN_LOG} Phase 4: freezing ${stragglers.length} straggler(s)`); } else { @@ -1153,15 +1259,13 @@ export async function drainAll(): Promise { ? "git add -A && git commit --allow-empty -m 'WIP: container eviction save' && git push --set-upstream origin HEAD" : "git add -A && git commit --allow-empty -m 'WIP: container eviction save'"; - if (!hasOrigin) { + if (!hasOrigin && agent.role !== 'mayor' && agent.role !== 'triage') { console.warn( - `${DRAIN_LOG} Phase 4: no origin remote for agent ${agent.agentId}, committing locally only (push skipped)` + `${DRAIN_LOG} Phase 4: no origin remote for ${agent.role} agent ${agent.agentId}, committing locally only (push skipped)` ); } // Use the agent's startup env for git author/committer identity. - // The control-server's process.env may not have GIT_AUTHOR_NAME set, - // but the agent's startupEnv (captured at spawn time) does. const gitEnv: Record = { ...process.env }; const authorName = agent.startupEnv?.GIT_AUTHOR_NAME ?? process.env.GASTOWN_GIT_AUTHOR_NAME ?? 'Gastown'; @@ -1188,6 +1292,28 @@ export async function drainAll(): Promise { (stdout ? ` stdout=${stdout.trim()}` : '') + (stderr ? ` stderr=${stderr.trim()}` : '') ); + + // 4c: Write eviction context on the bead so the next agent + // dispatched to it knows there is WIP code on the branch. + // Must happen BEFORE reportAgentCompleted (which unhooks the agent). + if (hasOrigin && exitCode === 0 && agent.role === 'polecat') { + const branchProc = Bun.spawn(['git', 'rev-parse', '--abbrev-ref', 'HEAD'], { + cwd: agent.workdir, + stdout: 'pipe', + stderr: 'pipe', + }); + const branchName = (await new Response(branchProc.stdout).text()).trim(); + await branchProc.exited; + + console.log( + `${DRAIN_LOG} Phase 4: writing eviction context for agent ${agent.agentId}: branch=${branchName}` + ); + await writeEvictionCheckpoint(agent, { + branch: branchName, + agent_name: agent.name, + saved_at: new Date().toISOString(), + }); + } } catch (err) { console.warn(`${DRAIN_LOG} Phase 4: force-save failed for agent ${agent.agentId}:`, err); } diff --git a/cloudflare-gastown/container/src/types.ts b/cloudflare-gastown/container/src/types.ts index da458e349..d2edabd3f 100644 --- a/cloudflare-gastown/container/src/types.ts +++ b/cloudflare-gastown/container/src/types.ts @@ -152,6 +152,7 @@ export type HealthResponse = { agents: number; servers: number; uptime: number; + draining?: boolean; }; // ── Kilo serve instance ───────────────────────────────────────────────── diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 4f455c175..b2bfd66bf 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -1220,6 +1220,27 @@ export class TownDO extends DurableObject { return agents.readCheckpoint(this.sql, agentId); } + /** + * Append eviction context to a bead's body so the next agent dispatched + * to it knows there is WIP code on a branch. Called by the container's + * Phase 4 force-save after pushing the WIP commit. + */ + async writeBeadEvictionContext( + agentId: string, + context: { branch: string; agent_name: string; saved_at: string } + ): Promise { + const agent = agents.getAgent(this.sql, agentId); + if (!agent?.current_hook_bead_id) return; + const bead = beadOps.getBead(this.sql, agent.current_hook_bead_id); + if (!bead) return; + const evictionNote = + `\n\n---\n**Container eviction note:** ${context.agent_name} pushed WIP progress ` + + `to branch \`${context.branch}\` before container eviction at ${context.saved_at}. ` + + `Pick up from where they left off — pull the branch and continue the work.`; + const updatedBody = (bead.body ?? '') + evictionNote; + beadOps.updateBeadFields(this.sql, bead.bead_id, { body: updatedBody }, 'system'); + } + // ── Heartbeat ───────────────────────────────────────────────────── /** @@ -4080,10 +4101,7 @@ export class TownDO extends DurableObject { * Return a structured snapshot of the alarm loop and patrol state * for the dashboard Status tab. */ - async getAlarmStatus(cached?: { - activeWork?: boolean; - triageCount?: number; - }): Promise<{ + async getAlarmStatus(cached?: { activeWork?: boolean; triageCount?: number }): Promise<{ alarm: { nextFireAt: string | null; intervalMs: number; @@ -4428,6 +4446,30 @@ export class TownDO extends DurableObject { } // DEBUG: raw agent_metadata dump — remove after debugging + async debugPendingNudges(): Promise { + return [ + ...query( + this.sql, + /* sql */ ` + SELECT ${agent_nudges.nudge_id}, + ${agent_nudges.agent_bead_id}, + ${agent_nudges.message}, + ${agent_nudges.mode}, + ${agent_nudges.priority}, + ${agent_nudges.source}, + ${agent_nudges.created_at}, + ${agent_nudges.delivered_at}, + ${agent_nudges.expires_at} + FROM ${agent_nudges} + WHERE ${agent_nudges.delivered_at} IS NULL + ORDER BY ${agent_nudges.created_at} DESC + LIMIT 20 + `, + [] + ), + ]; + } + async debugAgentMetadata(): Promise { return [ ...query( diff --git a/cloudflare-gastown/src/dos/town/agents.ts b/cloudflare-gastown/src/dos/town/agents.ts index c70d38350..a723ce6d9 100644 --- a/cloudflare-gastown/src/dos/town/agents.ts +++ b/cloudflare-gastown/src/dos/town/agents.ts @@ -324,12 +324,15 @@ export function unhookBead(sql: SqlStorage, agentId: string): void { const beadId = agent.current_hook_bead_id; + // Clear checkpoint when unhooking — the agent is done with this bead + // and the checkpoint (if any) should not leak into the next dispatch. query( sql, /* sql */ ` UPDATE ${agent_metadata} SET ${agent_metadata.columns.current_hook_bead_id} = NULL, - ${agent_metadata.columns.status} = 'idle' + ${agent_metadata.columns.status} = 'idle', + ${agent_metadata.columns.checkpoint} = NULL WHERE ${agent_metadata.bead_id} = ? `, [agentId] diff --git a/cloudflare-gastown/src/gastown.worker.ts b/cloudflare-gastown/src/gastown.worker.ts index e6353b20f..be9390b7d 100644 --- a/cloudflare-gastown/src/gastown.worker.ts +++ b/cloudflare-gastown/src/gastown.worker.ts @@ -37,6 +37,7 @@ import { handleRequestChanges, handleAgentCompleted, handleWriteCheckpoint, + handleWriteEvictionContext, handleCheckMail, handleHeartbeat, handleGetOrCreateAgent, @@ -261,6 +262,45 @@ app.post('/debug/towns/:townId/replay-events', async c => { return c.json(result); }); +app.get('/debug/towns/:townId/drain-status', async c => { + const townId = c.req.param('townId'); + const town = getTownDOStub(c.env, townId); + // eslint-disable-next-line @typescript-eslint/await-thenable -- DO RPC returns promise at runtime + const draining = await town.isDraining(); + // eslint-disable-next-line @typescript-eslint/await-thenable + const drainNonce = await town.getDrainNonce(); + return c.json({ draining, drainNonce }); +}); + +app.get('/debug/towns/:townId/nudges', async c => { + const townId = c.req.param('townId'); + const town = getTownDOStub(c.env, townId); + // eslint-disable-next-line @typescript-eslint/await-thenable -- DO RPC returns promise at runtime + const nudges = await town.debugPendingNudges(); + return c.json({ nudges }); +}); + +app.post('/debug/towns/:townId/send-message', async c => { + if (c.env.ENVIRONMENT !== 'development') return c.json({ error: 'dev only' }, 403); + const townId = c.req.param('townId'); + const body: { message: string; model?: string } = await c.req.json(); + const town = getTownDOStub(c.env, townId); + // eslint-disable-next-line @typescript-eslint/await-thenable + const result = await town.sendMayorMessage( + body.message, + body.model ?? 'anthropic/claude-sonnet-4.6' + ); + return c.json(result); +}); + +app.post('/debug/towns/:townId/graceful-stop', async c => { + if (c.env.ENVIRONMENT !== 'development') return c.json({ error: 'dev only' }, 403); + const townId = c.req.param('townId'); + const containerStub = getTownContainerStub(c.env, townId); + await containerStub.stop(); + return c.json({ stopped: true }); +}); + // ── Town ID + Auth ────────────────────────────────────────────────────── // All rig routes live under /api/towns/:townId/rigs/:rigId so the townId // is always available from the URL path. @@ -387,6 +427,11 @@ app.post('/api/towns/:townId/rigs/:rigId/agents/:agentId/checkpoint', c => handleWriteCheckpoint(c, c.req.param()) ) ); +app.post('/api/towns/:townId/rigs/:rigId/agents/:agentId/eviction-context', c => + instrumented(c, 'POST /api/towns/:townId/rigs/:rigId/agents/:agentId/eviction-context', () => + handleWriteEvictionContext(c, c.req.param()) + ) +); app.get('/api/towns/:townId/rigs/:rigId/agents/:agentId/mail', c => instrumented(c, 'GET /api/towns/:townId/rigs/:rigId/agents/:agentId/mail', () => handleCheckMail(c, c.req.param()) diff --git a/cloudflare-gastown/src/handlers/rig-agents.handler.ts b/cloudflare-gastown/src/handlers/rig-agents.handler.ts index f1dd8062a..a9f3e2dc2 100644 --- a/cloudflare-gastown/src/handlers/rig-agents.handler.ts +++ b/cloudflare-gastown/src/handlers/rig-agents.handler.ts @@ -179,6 +179,29 @@ export async function handleWriteCheckpoint( return c.json(resSuccess({ written: true })); } +const EvictionContextBody = z.object({ + branch: z.string(), + agent_name: z.string(), + saved_at: z.string(), +}); + +export async function handleWriteEvictionContext( + c: Context, + params: { rigId: string; agentId: string } +) { + const parsed = EvictionContextBody.safeParse(await parseJsonBody(c)); + if (!parsed.success) { + return c.json( + { success: false, error: 'Invalid request body', issues: parsed.error.issues }, + 400 + ); + } + const townId = c.get('townId'); + const town = getTownDOStub(c.env, townId); + await town.writeBeadEvictionContext(params.agentId, parsed.data); + return c.json(resSuccess({ written: true })); +} + export async function handleCheckMail( c: Context, params: { rigId: string; agentId: string } From 5f577f2bb92a7ceaa55cb130f0b77548acb2c007 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 09:47:40 -0500 Subject: [PATCH 27/44] fix(gastown): restrict container restart/destroy to org owners, skip triage-request beads in Rule 1b - forceRestartContainer and destroyContainer now check org owner/creator role before allowing the operation, matching updateTownConfig's auth - reconciler Rule 1b skips gt:triage-request beads whose assignee is intentionally set without a hook (patrol routes them to specific agents) --- cloudflare-gastown/src/dos/town/reconciler.ts | 7 +++++ cloudflare-gastown/src/trpc/router.ts | 26 +++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/cloudflare-gastown/src/dos/town/reconciler.ts b/cloudflare-gastown/src/dos/town/reconciler.ts index 111f4c421..13b799cc7 100644 --- a/cloudflare-gastown/src/dos/town/reconciler.ts +++ b/cloudflare-gastown/src/dos/town/reconciler.ts @@ -648,6 +648,13 @@ export function reconcileBeads(sql: SqlStorage, opts?: { draining?: boolean }): // are handled by other subsystems and don't need dispatch. if (bead.assignee_agent_bead_id === 'system') continue; + // Skip triage-request beads — patrol.createTriageRequest() sets + // assignee_agent_bead_id to route the request to a specific agent, + // but hookBead() intentionally refuses to hook triage-request beads. + // Without this skip, the reconciler would clear the assignee on + // every tick because the hook will never exist. + if (bead.labels.includes('gt:triage-request')) continue; + actions.push({ type: 'clear_bead_assignee', bead_id: bead.bead_id, diff --git a/cloudflare-gastown/src/trpc/router.ts b/cloudflare-gastown/src/trpc/router.ts index 750dc7174..6dc169164 100644 --- a/cloudflare-gastown/src/trpc/router.ts +++ b/cloudflare-gastown/src/trpc/router.ts @@ -1071,6 +1071,19 @@ export const gastownRouter = router({ message: 'Admins cannot restart containers for towns they do not own', }); } + if (ownership.type === 'org') { + const townStub = getTownDOStub(ctx.env, input.townId); + const config = await townStub.getTownConfig(); + const membership = getOrgMembership(ctx.orgMemberships, ownership.orgId); + const isOrgOwner = membership?.role === 'owner'; + const isTownCreator = ctx.userId === config.created_by_user_id; + if (!isOrgOwner && !isTownCreator) { + throw new TRPCError({ + code: 'FORBIDDEN', + message: 'Only town creators and org owners can restart containers', + }); + } + } // stop() sends SIGTERM so the container's drain handler can run // drainAll() — nudging agents to commit/push WIP before exiting. const containerStub = getTownContainerStub(ctx.env, input.townId); @@ -1087,6 +1100,19 @@ export const gastownRouter = router({ message: 'Admins cannot destroy containers for towns they do not own', }); } + if (ownership.type === 'org') { + const townStub = getTownDOStub(ctx.env, input.townId); + const config = await townStub.getTownConfig(); + const membership = getOrgMembership(ctx.orgMemberships, ownership.orgId); + const isOrgOwner = membership?.role === 'owner'; + const isTownCreator = ctx.userId === config.created_by_user_id; + if (!isOrgOwner && !isTownCreator) { + throw new TRPCError({ + code: 'FORBIDDEN', + message: 'Only town creators and org owners can destroy containers', + }); + } + } // destroy() sends SIGKILL — the container dies immediately with // no graceful drain. Use when the container is stuck or unresponsive. const containerStub = getTownContainerStub(ctx.env, input.townId); From f218ed87061c2f84b27b75baf194409fde7174a2 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 10:11:30 -0500 Subject: [PATCH 28/44] fix(gastown): report evicted agents as completed to TownDO in Phase 4 Without this, the bead stays in_progress and the agent stays working until stale-bead recovery timeouts fire (~5 min). Now Phase 4 calls reportAgentCompleted after the eviction context write and force-save, so the TownDO can unhook the agent and let the reconciler reset the bead to open immediately. --- cloudflare-gastown/container/src/process-manager.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index d5a6b5423..da6d8f371 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -1314,6 +1314,13 @@ export async function drainAll(): Promise { saved_at: new Date().toISOString(), }); } + + // 4d: Report the agent as completed so the TownDO can unhook it + // and transition the bead. Without this, the bead stays in_progress + // and the agent stays working until stale-bead recovery kicks in. + if (agent.role !== 'mayor' && agent.role !== 'triage') { + await reportAgentCompleted(agent, 'completed', 'container eviction'); + } } catch (err) { console.warn(`${DRAIN_LOG} Phase 4: force-save failed for agent ${agent.agentId}:`, err); } From 36e32610e2778edce08d0f516ec2abfa9c0b5f09 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 10:39:41 -0500 Subject: [PATCH 29/44] fix(gastown): reset evicted beads to open immediately instead of waiting for Rule 3 When agentCompleted fires with reason='container eviction', the bead is reset to open right away so the reconciler can re-dispatch on the next tick. Previously the bead stayed in_progress for ~5 min until Rule 3's stale timeout detected it. --- .../src/dos/town/review-queue.ts | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/cloudflare-gastown/src/dos/town/review-queue.ts b/cloudflare-gastown/src/dos/town/review-queue.ts index 50a32c354..c4d4629d4 100644 --- a/cloudflare-gastown/src/dos/town/review-queue.ts +++ b/cloudflare-gastown/src/dos/town/review-queue.ts @@ -716,12 +716,24 @@ export function agentCompleted( source: 'container', }); } else if (hookedBead && hookedBead.status === 'in_progress') { - // Agent exited 'completed' but bead is still in_progress — gt_done was never called. - // Don't close the bead. Rule 3 will handle rework. - console.log( - `[review-queue] agentCompleted: polecat ${agentId} exited without gt_done — ` + - `bead ${agent.current_hook_bead_id} stays in_progress (Rule 3 will recover)` - ); + if (input.reason === 'container eviction') { + // Container eviction: WIP was force-pushed and eviction context + // was written on the bead body. Reset to open immediately so the + // reconciler can re-dispatch on the next tick instead of waiting + // for Rule 3's stale timeout. + console.log( + `[review-queue] agentCompleted: polecat ${agentId} evicted — ` + + `resetting bead ${agent.current_hook_bead_id} to open` + ); + updateBeadStatus(sql, agent.current_hook_bead_id, 'open', agentId); + } else { + // Agent exited 'completed' but bead is still in_progress — gt_done was never called. + // Don't close the bead. Rule 3 will handle rework. + console.log( + `[review-queue] agentCompleted: polecat ${agentId} exited without gt_done — ` + + `bead ${agent.current_hook_bead_id} stays in_progress (Rule 3 will recover)` + ); + } } else if (hookedBead && hookedBead.status === 'open') { // Bead is open (wasn't dispatched yet or was already reset). No-op. } else { From 706994e3d7a5810aa0f3f4fad9fbbfb02d54c371 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 11:31:04 -0500 Subject: [PATCH 30/44] fix(gastown): clear stale assignee on evicted beads, bump max_instances to 700 - Eviction reopen path now clears assignee_agent_bead_id to NULL so the reconciler can re-dispatch the bead immediately via Rule 1 - Bump container max_instances from 600 to 700 --- cloudflare-gastown/src/dos/town/review-queue.ts | 14 +++++++++++--- cloudflare-gastown/wrangler.jsonc | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/cloudflare-gastown/src/dos/town/review-queue.ts b/cloudflare-gastown/src/dos/town/review-queue.ts index c4d4629d4..44c7e26bf 100644 --- a/cloudflare-gastown/src/dos/town/review-queue.ts +++ b/cloudflare-gastown/src/dos/town/review-queue.ts @@ -718,14 +718,22 @@ export function agentCompleted( } else if (hookedBead && hookedBead.status === 'in_progress') { if (input.reason === 'container eviction') { // Container eviction: WIP was force-pushed and eviction context - // was written on the bead body. Reset to open immediately so the - // reconciler can re-dispatch on the next tick instead of waiting - // for Rule 3's stale timeout. + // was written on the bead body. Reset to open and clear the + // stale assignee so the reconciler can re-dispatch immediately. console.log( `[review-queue] agentCompleted: polecat ${agentId} evicted — ` + `resetting bead ${agent.current_hook_bead_id} to open` ); updateBeadStatus(sql, agent.current_hook_bead_id, 'open', agentId); + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.assignee_agent_bead_id} = NULL + WHERE ${beads.bead_id} = ? + `, + [agent.current_hook_bead_id] + ); } else { // Agent exited 'completed' but bead is still in_progress — gt_done was never called. // Don't close the bead. Rule 3 will handle rework. diff --git a/cloudflare-gastown/wrangler.jsonc b/cloudflare-gastown/wrangler.jsonc index e4814659b..1aafcd35b 100644 --- a/cloudflare-gastown/wrangler.jsonc +++ b/cloudflare-gastown/wrangler.jsonc @@ -36,7 +36,7 @@ "class_name": "TownContainerDO", "image": "./container/Dockerfile", "instance_type": "standard-4", - "max_instances": 600, + "max_instances": 700, }, ], From f051d46392c9f1334eee8d0b553baadcb346fca3 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 11:42:30 -0500 Subject: [PATCH 31/44] fix(gastown): update dev GASTOWN_API_URL to port 8803 --- cloudflare-gastown/wrangler.jsonc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudflare-gastown/wrangler.jsonc b/cloudflare-gastown/wrangler.jsonc index 1aafcd35b..f045ffd72 100644 --- a/cloudflare-gastown/wrangler.jsonc +++ b/cloudflare-gastown/wrangler.jsonc @@ -117,7 +117,7 @@ // Desktop VM host gateway IP directly so containers can reach // the host's dev servers. "KILO_API_URL": "http://192.168.65.254:3000", - "GASTOWN_API_URL": "http://192.168.65.254:8787", + "GASTOWN_API_URL": "http://192.168.65.254:8803", }, "containers": [ { From 27fabe1556a3a1ffbe2f524db143e7c1b15680e8 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 11:55:17 -0500 Subject: [PATCH 32/44] fix(gastown): skip stale-heartbeat and GUPP checks during container drain When drain starts, stopHeartbeat() freezes last_activity_at and last_event_at. Without this fix: - reconcileAgents sets working agents to idle after 90s (stale heartbeat) causing the dashboard to show agents as idle while still running - reconcileGUPP sends false 'idle for 15 minutes' nudges to agents that are actively working in the draining container Both reconcileAgents and reconcileGUPP now receive the draining flag and skip their checks when the container is being evicted. --- cloudflare-gastown/src/dos/town/reconciler.ts | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/cloudflare-gastown/src/dos/town/reconciler.ts b/cloudflare-gastown/src/dos/town/reconciler.ts index 13b799cc7..e3129896e 100644 --- a/cloudflare-gastown/src/dos/town/reconciler.ts +++ b/cloudflare-gastown/src/dos/town/reconciler.ts @@ -381,11 +381,11 @@ export function applyEvent(sql: SqlStorage, event: TownEventRecord): void { export function reconcile(sql: SqlStorage, opts?: { draining?: boolean }): Action[] { const draining = opts?.draining ?? false; const actions: Action[] = []; - actions.push(...reconcileAgents(sql)); + actions.push(...reconcileAgents(sql, { draining })); actions.push(...reconcileBeads(sql, { draining })); actions.push(...reconcileReviewQueue(sql, { draining })); actions.push(...reconcileConvoys(sql)); - actions.push(...reconcileGUPP(sql)); + actions.push(...reconcileGUPP(sql, { draining })); actions.push(...reconcileGC(sql)); return actions; } @@ -395,7 +395,7 @@ export function reconcile(sql: SqlStorage, opts?: { draining?: boolean }): Actio // idle agents with stale hooks to terminal beads // ════════════════════════════════════════════════════════════════════ -export function reconcileAgents(sql: SqlStorage): Action[] { +export function reconcileAgents(sql: SqlStorage, opts?: { draining?: boolean }): Action[] { const actions: Action[] = []; // Working agents with stale or missing heartbeat — container probably dead. @@ -426,6 +426,11 @@ export function reconcileAgents(sql: SqlStorage): Action[] { // Mayors are always working with no hook — skip them if (agent.role === 'mayor') continue; + // During container drain the heartbeat reporter is stopped, so + // last_activity_at freezes. Skip stale-heartbeat checks to avoid + // false-positive idle transitions while agents are still working. + if (opts?.draining) continue; + if (!agent.last_activity_at) { // No heartbeat ever received — container may have failed to start actions.push({ @@ -1472,7 +1477,12 @@ export function reconcileConvoys(sql: SqlStorage): Action[] { // reconcileGUPP — detect agents exceeding activity thresholds // ════════════════════════════════════════════════════════════════════ -export function reconcileGUPP(sql: SqlStorage): Action[] { +export function reconcileGUPP(sql: SqlStorage, opts?: { draining?: boolean }): Action[] { + // During container drain the heartbeat reporter is stopped, so + // last_event_at freezes. Skip GUPP checks entirely to avoid + // false-positive "idle for 15 minutes" nudges while agents are + // still actively working in the draining container. + if (opts?.draining) return []; const actions: Action[] = []; const workingAgents = AgentRow.array().parse([ From 267828b77664d43419f7f678afa3317ae9cfdbf1 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 12:07:08 -0500 Subject: [PATCH 33/44] fix(gastown): use nudge-aware drain idle timeout instead of idle count Nudged agents get 10s timeout (they've been told to wrap up). Un-nudged agents get 120s (SDK may still be queueing the message). Previously the idle-count approach gave ALL agents 120s on first idle, causing the refinery to sit for 2 min after processing the nudge before the timer fired. --- .../container/src/process-manager.ts | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index da6d8f371..1136adb99 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -52,10 +52,11 @@ const startTime = Date.now(); // lets the drain loop nudge agents that transition to running mid-drain. let _draining = false; -// Tracks how many times each agent has gone idle since drain started. -// First idle = use normal timeout (nudge may be queued, needs time to arrive). -// Second+ idle = 10s (agent processed the nudge and is done). -const drainIdleCounts = new Map(); +// Agents that were successfully nudged about eviction in Phase 2/3. +// Used by handleIdleEvent to decide the drain idle timeout: +// nudged agents → 10s (they've been told to wrap up) +// un-nudged agents → 120s (nudge may still be in SDK queue) +const drainNudgedAgents = new Set(); export function isDraining(): boolean { return _draining; @@ -435,19 +436,16 @@ async function handleIdleEvent(agent: ManagedAgent, onExit: () => void): Promise } // No nudges (or fetch error) — (re)start the idle timeout. - // During drain: first idle uses normal timeout (the mayor's nudge via - // gt_nudge queues a message that hasn't been processed yet — the agent - // needs to go idle, then the SDK delivers the queued nudge, then the - // agent processes it). Second+ idle uses 10s (agent saw the nudge and - // is done). + // During drain: + // - Nudged agents get 10s: they've been told to wrap up, this idle + // means they finished processing the nudge. + // - Un-nudged agents get 120s: the nudge may still be queued in the + // SDK (session.prompt enqueues behind the active turn). The agent + // needs time to finish its turn, then process the nudge. clearIdleTimer(agentId); let timeoutMs: number; if (_draining) { - const idleCount = (drainIdleCounts.get(agentId) ?? 0) + 1; - drainIdleCounts.set(agentId, idleCount); - // First idle: give the agent time to receive and process the nudge. - // Second+: agent has had its chance, use aggressive timeout. - timeoutMs = idleCount <= 1 ? 120_000 : 10_000; + timeoutMs = drainNudgedAgents.has(agentId) ? 10_000 : 120_000; } else { timeoutMs = agent.role === 'refinery' @@ -1107,6 +1105,7 @@ export async function drainAll(): Promise { const nudgeAgent = async (agent: ManagedAgent): Promise => { if (nudgedAgents.has(agent.agentId)) return false; nudgedAgents.add(agent.agentId); + drainNudgedAgents.add(agent.agentId); if (agent.role === 'mayor' || agent.role === 'triage') return false; From d39ef8954d053d78416e495f8bea6507e4c2de91 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 12:18:45 -0500 Subject: [PATCH 34/44] fix(gastown): set agent status to running before initial prompt, nudge in parallel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Set agent.status = 'running' before session.prompt() instead of after. The event subscription is already active and events flow (the agent is functionally running). session.prompt() can block if the SDK server is busy, leaving the agent stuck in 'starting' despite being active — causing drain to wait indefinitely. - Send Phase 2 nudges via Promise.allSettled instead of sequentially. sendMessage blocks until the SDK accepts the prompt, so nudging agents one-by-one delays later agents until earlier ones unblock. --- .../container/src/process-manager.ts | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index 1136adb99..03812bbf9 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -688,6 +688,15 @@ export async function startAgent( // 3. Subscribe to events (async, runs in background) void subscribeToEvents(client, agent, request); + // Mark as running BEFORE the initial prompt. The event subscription + // is already active and events may be flowing (the agent is + // functionally running). session.prompt() can block if the SDK + // server is busy, which would leave the agent stuck in 'starting' + // despite being active — causing the drain to wait indefinitely. + if (agent.status === 'starting') { + agent.status = 'running'; + } + // 4. Send the initial prompt // The model string is an OpenRouter-style ID like "anthropic/claude-sonnet-4.6". // The kilo provider (which wraps OpenRouter) takes the FULL model string as modelID. @@ -705,10 +714,6 @@ export async function startAgent( ...(request.systemPrompt ? { system: request.systemPrompt } : {}), }, }); - - if (agent.status === 'starting') { - agent.status = 'running'; - } agent.messageCount = 1; log.info('agent.start', { @@ -1130,16 +1135,19 @@ export async function drainAll(): Promise { `Statuses: ${allAgents.map(a => `${a.role}:${a.agentId.slice(0, 8)}=${a.status}`).join(', ')}` ); - for (const agent of runningAgents) { - try { - await nudgeAgent(agent); - } catch (err) { - console.warn( - `${DRAIN_LOG} Phase 2: failed to nudge ${agent.agentId} (${agent.role}):`, - err instanceof Error ? err.message : err - ); - } - } + // Fire all nudges in parallel — sendMessage can block if the SDK server + // is busy with the agent's current turn. Sequential nudging would delay + // later agents until earlier ones unblock. + await Promise.allSettled( + runningAgents.map(agent => + nudgeAgent(agent).catch(err => { + console.warn( + `${DRAIN_LOG} Phase 2: failed to nudge ${agent.agentId} (${agent.role}):`, + err instanceof Error ? err.message : err + ); + }) + ) + ); // ── Phase 3: Wait up to 10 minutes ────────────────────────────────── // Exclude mayors from the running count — they are persistent and will From 6d9cea8c101d4dde70f8b1094a6d9d132a1306d8 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 12:40:26 -0500 Subject: [PATCH 35/44] feat(gastown): simplify drain to wait-only (no nudge), add drain status banner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Container drain: - Remove Phase 2 nudging entirely — agents complete their current work naturally through gt_done and the normal idle timeout - Reduce to 3 phases: notify TownDO, wait 5 min, force-save stragglers - 10s idle timeout during drain for agents that finish their work UI: - Add DrainStatusBanner component to town overview page - Shows amber banner when container drain is active with elapsed time - Includes Force Shutdown button (calls destroyContainer) - Polls getDrainStatus every 5s Backend: - Add getDrainStatus tRPC query - Add getDrainStartedAt() to TownDO - Add draining/drainStartedAt to getAlarmStatus broadcast --- .../container/src/process-manager.ts | 128 ++++-------------- cloudflare-gastown/src/dos/Town.do.ts | 11 ++ cloudflare-gastown/src/trpc/router.ts | 16 +++ .../[townId]/TownOverviewPageClient.tsx | 2 + src/components/gastown/DrainStatusBanner.tsx | 78 +++++++++++ src/components/gastown/TerminalBar.tsx | 2 + src/lib/gastown/types/router.d.ts | 20 +++ 7 files changed, 159 insertions(+), 98 deletions(-) create mode 100644 src/components/gastown/DrainStatusBanner.tsx diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index 03812bbf9..4f4e118d3 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -52,12 +52,6 @@ const startTime = Date.now(); // lets the drain loop nudge agents that transition to running mid-drain. let _draining = false; -// Agents that were successfully nudged about eviction in Phase 2/3. -// Used by handleIdleEvent to decide the drain idle timeout: -// nudged agents → 10s (they've been told to wrap up) -// un-nudged agents → 120s (nudge may still be in SDK queue) -const drainNudgedAgents = new Set(); - export function isDraining(): boolean { return _draining; } @@ -436,16 +430,13 @@ async function handleIdleEvent(agent: ManagedAgent, onExit: () => void): Promise } // No nudges (or fetch error) — (re)start the idle timeout. - // During drain: - // - Nudged agents get 10s: they've been told to wrap up, this idle - // means they finished processing the nudge. - // - Un-nudged agents get 120s: the nudge may still be queued in the - // SDK (session.prompt enqueues behind the active turn). The agent - // needs time to finish its turn, then process the nudge. + // During drain, use a short idle timeout. Agents aren't nudged — they + // complete naturally — so this idle means the agent is done with its + // current work and can exit promptly. clearIdleTimer(agentId); let timeoutMs: number; if (_draining) { - timeoutMs = drainNudgedAgents.has(agentId) ? 10_000 : 120_000; + timeoutMs = 10_000; } else { timeoutMs = agent.role === 'refinery' @@ -1057,11 +1048,14 @@ export function activeServerCount(): number { /** * Gracefully drain all running agents before container eviction. * - * 4-phase sequence: - * 1. Notify TownDO of the eviction (fire-and-forget) - * 2. Nudge running polecats/refineries to commit & push - * 3. Poll up to 10 min waiting for agents to finish - * 4. Force-save any stragglers via WIP git commit + push + * 3-phase sequence: + * 1. Notify TownDO of the eviction (blocks new dispatch) + * 2. Wait up to 5 min for non-mayor agents to finish naturally + * 3. Force-save any stragglers via WIP git commit + push + * + * No nudging — agents complete their current work via gt_done and + * exit through the normal idle timeout path. The TownDO's draining + * flag prevents new work from being dispatched. * * Never throws — all errors are logged and swallowed so the caller * can always proceed to stopAll() + process.exit(). @@ -1099,88 +1093,26 @@ export async function drainAll(): Promise { console.warn(`${DRAIN_LOG} Phase 1: TownDO notification failed, continuing:`, err); } - // ── Phase 2: Directly nudge running non-mayor agents ────────────────── - // We use sendMessage() to inject an eviction notice directly into each - // agent's SDK session. This is a local call (no round-trip through the - // TownDO) so it works even after SIGTERM — the container's HTTP server - // is still up but the Cloudflare runtime blocks inbound requests from - // other DOs, which breaks the gt_nudge → TownDO → container.fetch() path. - const nudgedAgents = new Set(); - - const nudgeAgent = async (agent: ManagedAgent): Promise => { - if (nudgedAgents.has(agent.agentId)) return false; - nudgedAgents.add(agent.agentId); - drainNudgedAgents.add(agent.agentId); - - if (agent.role === 'mayor' || agent.role === 'triage') return false; - - const nudgeMessage = - 'URGENT: The container is being evicted. Commit and push all your current changes RIGHT NOW. ' + - 'Do NOT call gt_done — the system will handle the bead state. ' + - 'Just git add -A, git commit, and git push your work-in-progress, then stop working.'; - - clearIdleTimer(agent.agentId); - console.log( - `${DRAIN_LOG} Phase 2: nudging ${agent.role} agent ${agent.agentId} (session=${agent.sessionId})` - ); - await sendMessage(agent.agentId, nudgeMessage); - console.log(`${DRAIN_LOG} Phase 2: nudge delivered to ${agent.agentId}`); - return true; - }; + // ── Phase 2: Wait for agents to finish their current work ───────────── + // No nudging — agents complete naturally (call gt_done, go idle, etc.). + // The TownDO's draining flag blocks new dispatch so no new work starts. + // We just give them time to wrap up, then Phase 3 force-saves stragglers. + const DRAIN_WAIT_MS = 5 * 60 * 1000; + const pollInterval = 5000; + const start = Date.now(); const allAgents = [...agents.values()]; - const runningAgents = allAgents.filter(a => a.status === 'running' && a.role !== 'mayor'); console.log( - `${DRAIN_LOG} Phase 2: ${runningAgents.length} nudgeable of ${allAgents.length} total agents. ` + + `${DRAIN_LOG} Phase 2: waiting up to ${DRAIN_WAIT_MS / 1000}s for non-mayor agents to finish. ` + `Statuses: ${allAgents.map(a => `${a.role}:${a.agentId.slice(0, 8)}=${a.status}`).join(', ')}` ); - // Fire all nudges in parallel — sendMessage can block if the SDK server - // is busy with the agent's current turn. Sequential nudging would delay - // later agents until earlier ones unblock. - await Promise.allSettled( - runningAgents.map(agent => - nudgeAgent(agent).catch(err => { - console.warn( - `${DRAIN_LOG} Phase 2: failed to nudge ${agent.agentId} (${agent.role}):`, - err instanceof Error ? err.message : err - ); - }) - ) - ); - - // ── Phase 3: Wait up to 10 minutes ────────────────────────────────── - // Exclude mayors from the running count — they are persistent and will - // never exit on their own. They are frozen in Phase 4 if still running. - // Also late-nudge agents that transition from `starting` to `running` - // during the wait. - const DRAIN_WAIT_MS = 10 * 60 * 1000; - const pollInterval = 5000; - const start = Date.now(); - console.log(`${DRAIN_LOG} Phase 3: waiting up to ${DRAIN_WAIT_MS / 1000}s for agents to finish`); - while (Date.now() - start < DRAIN_WAIT_MS) { - // Include both `running` and `starting` agents in the wait. - // Starting agents may finish their clone/setup and transition to - // running during the wait — they'll be late-nudged below. const active = [...agents.values()].filter( a => (a.status === 'running' || a.status === 'starting') && a.role !== 'mayor' ); if (active.length === 0) break; - // Late-nudge agents that became running since Phase 2. - for (const agent of active) { - if (agent.status !== 'running' || nudgedAgents.has(agent.agentId)) continue; - try { - await nudgeAgent(agent); - } catch (err) { - console.warn( - `${DRAIN_LOG} Phase 3: late nudge failed for ${agent.agentId}:`, - err instanceof Error ? err.message : err - ); - } - } - console.log( `${DRAIN_LOG} Waiting for ${active.length} non-mayor agents: ` + active.map(a => `${a.role}:${a.agentId.slice(0, 8)}=${a.status}`).join(', ') @@ -1188,7 +1120,7 @@ export async function drainAll(): Promise { await new Promise(r => setTimeout(r, pollInterval)); } - // ── Phase 4: Force-save remaining agents ──────────────────────────── + // ── Phase 3: Force-save remaining agents ──────────────────────────── // Two sub-steps: first freeze all stragglers (cancel idle timers, // abort event subscriptions and SDK sessions), then snapshot each // worktree. Freezing first prevents the normal completion path @@ -1198,9 +1130,9 @@ export async function drainAll(): Promise { a => a.status === 'running' || a.status === 'starting' ); if (stragglers.length > 0) { - console.log(`${DRAIN_LOG} Phase 4: freezing ${stragglers.length} straggler(s)`); + console.log(`${DRAIN_LOG} Phase 3: freezing ${stragglers.length} straggler(s)`); } else { - console.log(`${DRAIN_LOG} Phase 4: all agents finished, no force-save needed`); + console.log(`${DRAIN_LOG} Phase 3: all agents finished, no force-save needed`); } // 4a: Freeze — cancel idle timers and abort sessions so no @@ -1231,13 +1163,13 @@ export async function drainAll(): Promise { agent.status = 'exited'; agent.exitReason = 'container eviction'; frozen.push(agent); - console.log(`${DRAIN_LOG} Phase 4: froze agent ${agent.agentId}`); + console.log(`${DRAIN_LOG} Phase 3: froze agent ${agent.agentId}`); } catch (err) { // Freeze failed — the session may still be writing to the // worktree. Skip this agent in 4b to avoid .git/index.lock // races and partial snapshots. console.warn( - `${DRAIN_LOG} Phase 4: failed to freeze agent ${agent.agentId}, skipping snapshot:`, + `${DRAIN_LOG} Phase 3: failed to freeze agent ${agent.agentId}, skipping snapshot:`, err ); } @@ -1249,7 +1181,7 @@ export async function drainAll(): Promise { // with a still-active SDK session. for (const agent of frozen) { try { - console.log(`${DRAIN_LOG} Phase 4: force-saving agent ${agent.agentId} in ${agent.workdir}`); + console.log(`${DRAIN_LOG} Phase 3: force-saving agent ${agent.agentId} in ${agent.workdir}`); // Check whether a remote named "origin" exists. Lightweight // workspaces (mayor/triage) are created with `git init` and @@ -1268,7 +1200,7 @@ export async function drainAll(): Promise { if (!hasOrigin && agent.role !== 'mayor' && agent.role !== 'triage') { console.warn( - `${DRAIN_LOG} Phase 4: no origin remote for ${agent.role} agent ${agent.agentId}, committing locally only (push skipped)` + `${DRAIN_LOG} Phase 3: no origin remote for ${agent.role} agent ${agent.agentId}, committing locally only (push skipped)` ); } @@ -1295,7 +1227,7 @@ export async function drainAll(): Promise { const stdout = await new Response(proc.stdout).text(); const stderr = await new Response(proc.stderr).text(); console.log( - `${DRAIN_LOG} Phase 4: agent ${agent.agentId} git save exited ${exitCode}` + + `${DRAIN_LOG} Phase 3: agent ${agent.agentId} git save exited ${exitCode}` + (stdout ? ` stdout=${stdout.trim()}` : '') + (stderr ? ` stderr=${stderr.trim()}` : '') ); @@ -1313,7 +1245,7 @@ export async function drainAll(): Promise { await branchProc.exited; console.log( - `${DRAIN_LOG} Phase 4: writing eviction context for agent ${agent.agentId}: branch=${branchName}` + `${DRAIN_LOG} Phase 3: writing eviction context for agent ${agent.agentId}: branch=${branchName}` ); await writeEvictionCheckpoint(agent, { branch: branchName, @@ -1329,7 +1261,7 @@ export async function drainAll(): Promise { await reportAgentCompleted(agent, 'completed', 'container eviction'); } } catch (err) { - console.warn(`${DRAIN_LOG} Phase 4: force-save failed for agent ${agent.agentId}:`, err); + console.warn(`${DRAIN_LOG} Phase 3: force-save failed for agent ${agent.agentId}:`, err); } } diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index b2bfd66bf..579b9e25a 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -642,6 +642,11 @@ export class TownDO extends DurableObject { return this._drainNonce; } + /** When the drain started (epoch ms), or null when not draining. */ + async getDrainStartedAt(): Promise { + return this._drainStartedAt; + } + // ══════════════════════════════════════════════════════════════════ // Town Configuration // ══════════════════════════════════════════════════════════════════ @@ -4133,6 +4138,8 @@ export class TownDO extends DurableObject { type: string; message: string; }>; + draining?: boolean; + drainStartedAt?: string; }> { const currentAlarm = await this.ctx.storage.getAlarm(); const active = cached?.activeWork ?? this.hasActiveWork(); @@ -4271,6 +4278,10 @@ export class TownDO extends DurableObject { }, reconciler: this._lastReconcilerMetrics, recentEvents, + draining: this._draining || undefined, + drainStartedAt: this._drainStartedAt + ? new Date(this._drainStartedAt).toISOString() + : undefined, }; } diff --git a/cloudflare-gastown/src/trpc/router.ts b/cloudflare-gastown/src/trpc/router.ts index 6dc169164..be1d5feaf 100644 --- a/cloudflare-gastown/src/trpc/router.ts +++ b/cloudflare-gastown/src/trpc/router.ts @@ -363,6 +363,22 @@ export const gastownRouter = router({ return verifyTownOwnership(ctx.env, ctx, input.townId); }), + getDrainStatus: gastownProcedure + .input(z.object({ townId: z.string().uuid() })) + .output(z.object({ draining: z.boolean(), drainStartedAt: z.string().nullable() })) + .query(async ({ ctx, input }) => { + await verifyTownOwnership(ctx.env, ctx, input.townId); + const town = getTownDOStub(ctx.env, input.townId); + const [draining, startedAt] = await Promise.all([ + town.isDraining() as Promise, + town.getDrainStartedAt() as Promise, + ]); + return { + draining, + drainStartedAt: startedAt ? new Date(startedAt).toISOString() : null, + }; + }), + /** * Check whether the current user is an admin viewing a town they don't own. * Used by the frontend to show an admin banner. diff --git a/src/app/(app)/gastown/[townId]/TownOverviewPageClient.tsx b/src/app/(app)/gastown/[townId]/TownOverviewPageClient.tsx index a75006bfb..601b0ec7e 100644 --- a/src/app/(app)/gastown/[townId]/TownOverviewPageClient.tsx +++ b/src/app/(app)/gastown/[townId]/TownOverviewPageClient.tsx @@ -36,6 +36,7 @@ import { AreaChart, Area, ResponsiveContainer, Tooltip, XAxis, YAxis } from 'rec import { motion, AnimatePresence } from 'motion/react'; import type { GastownOutputs } from '@/lib/gastown/trpc'; import { AdminViewingBanner } from '@/components/gastown/AdminViewingBanner'; +import { DrainStatusBanner } from '@/components/gastown/DrainStatusBanner'; type Agent = GastownOutputs['gastown']['listAgents'][number]; @@ -208,6 +209,7 @@ export function TownOverviewPageClient({ return (
+ {/* Top bar — sticky */}
diff --git a/src/components/gastown/DrainStatusBanner.tsx b/src/components/gastown/DrainStatusBanner.tsx new file mode 100644 index 000000000..8954ac6db --- /dev/null +++ b/src/components/gastown/DrainStatusBanner.tsx @@ -0,0 +1,78 @@ +'use client'; + +import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'; +import { useGastownTRPC } from '@/lib/gastown/trpc'; +import { Banner } from '@/components/shared/Banner'; +import { Button } from '@/components/ui/button'; +import { AlertTriangle, Loader2 } from 'lucide-react'; +import { toast } from 'sonner'; + +/** + * Banner displayed when the town's container is draining (graceful restart + * in progress). Shows how long the drain has been running and provides a + * force-shutdown button to expedite the process. + * + * Polls every 5s so it appears/disappears promptly. + */ +export function DrainStatusBanner({ townId }: { townId: string }) { + const trpc = useGastownTRPC(); + const queryClient = useQueryClient(); + + const { data } = useQuery({ + ...trpc.gastown.getDrainStatus.queryOptions({ townId }), + refetchInterval: 5_000, + }); + + const destroyContainer = useMutation( + trpc.gastown.destroyContainer.mutationOptions({ + onSuccess: () => { + toast.success('Container destroyed — it will restart on next dispatch'); + void queryClient.invalidateQueries({ + queryKey: trpc.gastown.getDrainStatus.queryKey({ townId }), + }); + }, + onError: err => toast.error(`Force shutdown failed: ${err.message}`), + }) + ); + + if (!data?.draining) return null; + + const elapsed = data.drainStartedAt + ? Math.round((Date.now() - new Date(data.drainStartedAt).getTime()) / 1000) + : null; + const elapsedLabel = + elapsed !== null + ? elapsed < 60 + ? `${elapsed}s ago` + : `${Math.floor(elapsed / 60)}m ${elapsed % 60}s ago` + : null; + + return ( + + + + + + Container restart in progress + + A graceful shutdown was initiated{elapsedLabel ? ` ${elapsedLabel}` : ''}. Agents are + finishing their current work — no new tasks will be dispatched until the restart + completes. + + + + + + + ); +} diff --git a/src/components/gastown/TerminalBar.tsx b/src/components/gastown/TerminalBar.tsx index 615474351..ad2af05ca 100644 --- a/src/components/gastown/TerminalBar.tsx +++ b/src/components/gastown/TerminalBar.tsx @@ -888,6 +888,8 @@ type AlarmStatus = { orphanedHooks: number; }; recentEvents: Array<{ time: string; type: string; message: string }>; + draining?: boolean; + drainStartedAt?: string; }; type AgentStatusEvent = { diff --git a/src/lib/gastown/types/router.d.ts b/src/lib/gastown/types/router.d.ts index 12de217df..ce9ece484 100644 --- a/src/lib/gastown/types/router.d.ts +++ b/src/lib/gastown/types/router.d.ts @@ -59,6 +59,16 @@ export declare const gastownRouter: import('@trpc/server').TRPCBuiltRouter< }; meta: object; }>; + getDrainStatus: import('@trpc/server').TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: { + draining: boolean; + drainStartedAt: string | null; + }; + meta: object; + }>; deleteTown: import('@trpc/server').TRPCMutationProcedure<{ input: { townId: string; @@ -1345,6 +1355,16 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute }; meta: object; }>; + getDrainStatus: import('@trpc/server').TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: { + draining: boolean; + drainStartedAt: string | null; + }; + meta: object; + }>; deleteTown: import('@trpc/server').TRPCMutationProcedure<{ input: { townId: string; From 808f79ac70b4df94c6d0820049e0faff9432d249 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 12:45:06 -0500 Subject: [PATCH 36/44] =?UTF-8?q?fix(gastown):=20town=20containers=20never?= =?UTF-8?q?=20idle=20=E2=80=94=20mayor=20holds=20alarm,=20health=20checks?= =?UTF-8?q?=20reset=20sleep=20timer=20(#1452)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(gastown): town containers never idle — mayor holds alarm at 5s, constant health checks reset sleep timer Add 'waiting' agent status for mayors that finished processing a prompt and are waiting for user input. This lets hasActiveWork() return false, dropping the alarm from 5s to 5m and stopping health-check pings that reset the container's sleepAfter timer. Changes: - Add 'waiting' to AgentStatus enum (types.ts, agent-metadata.table.ts) - Container reports mayor idle via POST /agents/:id/waiting (completion-reporter.ts) - TownDO.mayorWaiting() transitions working → waiting - sendMayorMessage transitions waiting → working on new user input - Persist token refresh throttle in ctx.storage (survives DO eviction) - Increase idle alarm interval from 60s to 300s Closes #1450 * fix: reschedule alarm to active cadence when mayor transitions waiting → working When sendMayorMessage transitions the mayor from waiting to working, the idle alarm may be up to 5 min away. Explicitly reschedule to ACTIVE_ALARM_INTERVAL_MS (5s) so the reconciler/health-check loop resumes promptly. * style: fix formatting in Town.do.ts and completion-reporter.ts * fix: update idle intervalLabel and guard against stale session.idle callbacks - Fix intervalLabel in getAlarmStatus() to show 'idle (5m)' instead of 'idle (60s)' to match the new IDLE_ALARM_INTERVAL_MS. - Add _mayorWorkingSince timestamp guard in mayorWaiting() to reject stale session.idle callbacks that arrive after sendMayorMessage has already re-activated the mayor. The 5s grace window covers the round-trip time for fire-and-forget reportMayorWaiting() calls. * fix: replace time-based stale guard with firedAt timestamp comparison The 5s time-based guard in mayorWaiting() rejected legitimate fast turns where the mayor finishes a prompt in under 5s. Replace with a firedAt timestamp that the container includes in the reportMayorWaiting callback. mayorWaiting() now only rejects callbacks whose firedAt predates the last working transition (_mayorWorkingSince), correctly distinguishing stale callbacks from current ones regardless of turn duration. --- .../container/src/completion-reporter.ts | 41 +++++ .../container/src/process-manager.ts | 7 +- .../src/db/tables/agent-metadata.table.ts | 2 +- cloudflare-gastown/src/dos/Town.do.ts | 85 ++++++++-- cloudflare-gastown/src/gastown.worker.ts | 6 + .../src/handlers/rig-agents.handler.ts | 18 +++ cloudflare-gastown/src/types.ts | 2 +- .../test/integration/mayor-idle.test.ts | 153 ++++++++++++++++++ 8 files changed, 300 insertions(+), 14 deletions(-) create mode 100644 cloudflare-gastown/test/integration/mayor-idle.test.ts diff --git a/cloudflare-gastown/container/src/completion-reporter.ts b/cloudflare-gastown/container/src/completion-reporter.ts index 8ad4be23b..7a7766e61 100644 --- a/cloudflare-gastown/container/src/completion-reporter.ts +++ b/cloudflare-gastown/container/src/completion-reporter.ts @@ -7,6 +7,47 @@ import type { ManagedAgent } from './types'; +/** + * Notify the TownDO that the mayor has finished processing a prompt and + * is now waiting for user input. This lets the TownDO transition the + * mayor from "working" to "waiting", which drops the alarm to the idle + * cadence and stops health-check pings that reset the container's + * sleepAfter timer. + * + * Best-effort: errors are logged but do not propagate. + */ +export async function reportMayorWaiting(agent: ManagedAgent): Promise { + const apiUrl = agent.gastownApiUrl; + const authToken = + process.env.GASTOWN_CONTAINER_TOKEN ?? agent.gastownContainerToken ?? agent.gastownSessionToken; + if (!apiUrl || !authToken) { + console.warn( + `Cannot report mayor ${agent.agentId} waiting: no API credentials on agent record` + ); + return; + } + + const url = `${apiUrl}/api/towns/${agent.townId}/rigs/${agent.rigId}/agents/${agent.agentId}/waiting`; + try { + const response = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${authToken}`, + }, + body: JSON.stringify({ agentId: agent.agentId, firedAt: Date.now() }), + }); + + if (!response.ok) { + console.warn( + `Failed to report mayor ${agent.agentId} waiting: ${response.status} ${response.statusText}` + ); + } + } catch (err) { + console.warn(`Error reporting mayor ${agent.agentId} waiting:`, err); + } +} + /** * Notify the Rig DO that an agent session has completed or failed. * Best-effort: errors are logged but do not propagate. diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index 4f4e118d3..255057d49 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -9,7 +9,7 @@ import { createKilo, type KiloClient } from '@kilocode/sdk'; import { z } from 'zod'; import type { ManagedAgent, StartAgentRequest } from './types'; -import { reportAgentCompleted } from './completion-reporter'; +import { reportAgentCompleted, reportMayorWaiting } from './completion-reporter'; import { buildKiloConfigContent } from './agent-runner'; import { log } from './logger'; @@ -549,6 +549,11 @@ async function subscribeToEvents( if (event.type === 'session.idle') { if (request.role === 'mayor') { // Mayor agents are persistent — session.idle means "turn done", not exit. + // Notify the TownDO so it can transition the mayor to "waiting" + // (alive in container, not doing LLM work). This lets the alarm + // drop to the idle cadence and stops health-check pings that + // would reset the container's sleepAfter timer. + void reportMayorWaiting(agent); continue; } // Non-mayor: check for pending nudges before deciding to exit. diff --git a/cloudflare-gastown/src/db/tables/agent-metadata.table.ts b/cloudflare-gastown/src/db/tables/agent-metadata.table.ts index 6409e4c1a..f9b9228dc 100644 --- a/cloudflare-gastown/src/db/tables/agent-metadata.table.ts +++ b/cloudflare-gastown/src/db/tables/agent-metadata.table.ts @@ -5,7 +5,7 @@ import { getTableFromZodSchema, getCreateTableQueryFromTable } from '../../util/ // queries parsing through AgentMetadataRecord don't throw on old rows. // Application code should only create the known roles below. const AgentRole = z.enum(['polecat', 'refinery', 'mayor']).or(z.string()); -const AgentProcessStatus = z.enum(['idle', 'working', 'stalled', 'dead']).or(z.string()); +const AgentProcessStatus = z.enum(['idle', 'working', 'waiting', 'stalled', 'dead']).or(z.string()); export const AgentMetadataRecord = z.object({ bead_id: z.string(), diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 579b9e25a..e771c5106 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -125,7 +125,7 @@ function formatEventMessage(row: Record): string { // Alarm intervals const ACTIVE_ALARM_INTERVAL_MS = 5_000; // 5s when agents are active -const IDLE_ALARM_INTERVAL_MS = 1 * 60_000; // 1m when idle +const IDLE_ALARM_INTERVAL_MS = 5 * 60_000; // 5m when idle (no working agents) // Escalation constants const STALE_ESCALATION_THRESHOLD_MS = 4 * 60 * 60 * 1000; @@ -547,6 +547,10 @@ export class TownDO extends DurableObject { private _townId: string | null = null; private _lastReconcilerMetrics: reconciler.ReconcilerMetrics | null = null; private _dashboardContext: string | null = null; + /** Monotonic timestamp of the last working → transition for the mayor. + * Used to reject stale session.idle callbacks that arrive after a new + * prompt has already re-activated the mayor. */ + private _mayorWorkingSince = 0; private _draining = false; private _drainNonce: string | null = null; private _drainStartedAt: number | null = null; @@ -676,7 +680,7 @@ export class TownDO extends DurableObject { const townConfig = await this.getTownConfig(); const userId = townConfig.owner_user_id ?? townId; await dispatch.forceRefreshContainerToken(this.env, townId, userId); - this.lastContainerTokenRefreshAt = Date.now(); + await this.ctx.storage.put('container:lastTokenRefreshAt', Date.now()); } /** @@ -1558,6 +1562,43 @@ export class TownDO extends DurableObject { await this.armAlarmIfNeeded(); } + /** + * Transition the mayor from "working" to "waiting". Called by the + * container when the mayor's session goes idle (turn done, waiting for + * user input). The "waiting" status means the mayor is alive in the + * container but not doing LLM work — hasActiveWork() returns false, + * so the alarm drops to the idle cadence and health-check pings stop + * resetting the container's sleepAfter timer. + * + * @param firedAt - Timestamp (ms) when the container fired this + * callback. Used to reject stale session.idle callbacks from a + * previous turn that arrive after the mayor has already been + * re-activated by a new prompt. + */ + async mayorWaiting(agentId?: string, firedAt?: number): Promise { + let resolvedAgentId = agentId; + if (!resolvedAgentId) { + const mayor = agents.listAgents(this.sql, { role: 'mayor' })[0]; + if (mayor) resolvedAgentId = mayor.id; + } + if (!resolvedAgentId) return; + + const agent = agents.getAgent(this.sql, resolvedAgentId); + if (!agent || agent.role !== 'mayor') return; + + // Only transition from working → waiting. If the agent has already + // been set to idle/stalled/dead by another path, don't overwrite. + // Guard against stale session.idle callbacks: reportMayorWaiting is + // fire-and-forget, so a callback from a previous turn can arrive + // after sendMayorMessage has already re-activated the mayor. If the + // callback carries a firedAt timestamp that predates the last + // working transition, it belongs to an older turn — reject it. + if (agent.status === 'working') { + if (firedAt && firedAt < this._mayorWorkingSince) return; + agents.updateAgentStatus(this.sql, resolvedAgentId, 'waiting'); + } + } + async agentCompleted( agentId: string, input: { status: 'completed' | 'failed'; reason?: string } @@ -2106,7 +2147,20 @@ export class TownDO extends DurableObject { if (isAlive) { const sent = await dispatch.sendMessageToAgent(this.env, townId, mayor.id, combinedMessage); - sessionStatus = sent ? 'active' : 'idle'; + if (sent) { + // Transition waiting → working so the alarm runs at the active cadence + // while the mayor processes this prompt. Also reschedule the alarm + // immediately — the idle alarm may be up to 5 min away, and we need + // the reconciler/health-check loop to resume promptly. + if (mayor.status === 'waiting') { + agents.updateAgentStatus(this.sql, mayor.id, 'working'); + this._mayorWorkingSince = Date.now(); + await this.ctx.storage.setAlarm(Date.now() + ACTIVE_ALARM_INTERVAL_MS); + } + sessionStatus = 'active'; + } else { + sessionStatus = 'idle'; + } } else { const townConfig = await this.getTownConfig(); const rigConfig = await this.getMayorRigConfig(); @@ -2152,6 +2206,7 @@ export class TownDO extends DurableObject { if (started) { agents.updateAgentStatus(this.sql, mayor.id, 'working'); + this._mayorWorkingSince = Date.now(); sessionStatus = 'starting'; } else { sessionStatus = 'idle'; @@ -2200,8 +2255,9 @@ export class TownDO extends DurableObject { const isAlive = containerStatus.status === 'running' || containerStatus.status === 'starting'; if (isAlive) { - const status = mayor.status === 'working' || mayor.status === 'stalled' ? 'active' : 'idle'; - return { agentId: mayor.id, sessionStatus: status }; + const isActive = + mayor.status === 'working' || mayor.status === 'stalled' || mayor.status === 'waiting'; + return { agentId: mayor.id, sessionStatus: isActive ? 'active' : 'idle' }; } // Start the container with an idle mayor (no initial prompt) @@ -2253,6 +2309,7 @@ export class TownDO extends DurableObject { if (started) { agents.updateAgentStatus(this.sql, mayor.id, 'working'); + this._mayorWorkingSince = Date.now(); return { agentId: mayor.id, sessionStatus: 'starting' }; } @@ -2317,7 +2374,7 @@ export class TownDO extends DurableObject { const mapStatus = (agentStatus: string): 'idle' | 'active' | 'starting' => { switch (agentStatus) { case 'working': - return 'active'; + case 'waiting': case 'stalled': return 'active'; default: @@ -3543,12 +3600,17 @@ export class TownDO extends DurableObject { * from the alarm handler, throttled to once per hour (tokens have * 8h expiry). The TownContainerDO stores it as an env var so it's * available to all agents in the container. + * + * The throttle timestamp is persisted in ctx.storage so it survives + * DO eviction. Without persistence, eviction resets the throttle to 0 + * and the refresh fires immediately on the next alarm tick, sending + * requests that reset the container's sleepAfter timer (#1409). */ - private lastContainerTokenRefreshAt = 0; private async refreshContainerToken(): Promise { const TOKEN_REFRESH_INTERVAL_MS = 60 * 60_000; // 1 hour const now = Date.now(); - if (now - this.lastContainerTokenRefreshAt < TOKEN_REFRESH_INTERVAL_MS) return; + const lastRefresh = (await this.ctx.storage.get('container:lastTokenRefreshAt')) ?? 0; + if (now - lastRefresh < TOKEN_REFRESH_INTERVAL_MS) return; const townId = this.townId; if (!townId) return; @@ -3557,7 +3619,7 @@ export class TownDO extends DurableObject { await dispatch.refreshContainerToken(this.env, townId, userId); // Only mark as refreshed after success — failed refreshes should // be retried on the next alarm tick, not throttled for an hour. - this.lastContainerTokenRefreshAt = now; + await this.ctx.storage.put('container:lastTokenRefreshAt', now); } /** @@ -4114,6 +4176,7 @@ export class TownDO extends DurableObject { }; agents: { working: number; + waiting: number; idle: number; stalled: number; dead: number; @@ -4157,7 +4220,7 @@ export class TownDO extends DurableObject { [] ), ]; - const agentCounts = { working: 0, idle: 0, stalled: 0, dead: 0, total: 0 }; + const agentCounts = { working: 0, waiting: 0, idle: 0, stalled: 0, dead: 0, total: 0 }; for (const row of agentRows) { const s = `${row.status as string}`; const c = Number(row.cnt); @@ -4266,7 +4329,7 @@ export class TownDO extends DurableObject { alarm: { nextFireAt: currentAlarm ? new Date(Number(currentAlarm)).toISOString() : null, intervalMs, - intervalLabel: active ? 'active (5s)' : 'idle (60s)', + intervalLabel: active ? 'active (5s)' : 'idle (5m)', }, agents: agentCounts, beads: beadCounts, diff --git a/cloudflare-gastown/src/gastown.worker.ts b/cloudflare-gastown/src/gastown.worker.ts index be9390b7d..13e88dc2a 100644 --- a/cloudflare-gastown/src/gastown.worker.ts +++ b/cloudflare-gastown/src/gastown.worker.ts @@ -36,6 +36,7 @@ import { handleAgentDone, handleRequestChanges, handleAgentCompleted, + handleAgentWaiting, handleWriteCheckpoint, handleWriteEvictionContext, handleCheckMail, @@ -422,6 +423,11 @@ app.post('/api/towns/:townId/rigs/:rigId/agents/:agentId/completed', c => handleAgentCompleted(c, c.req.param()) ) ); +app.post('/api/towns/:townId/rigs/:rigId/agents/:agentId/waiting', c => + instrumented(c, 'POST /api/towns/:townId/rigs/:rigId/agents/:agentId/waiting', () => + handleAgentWaiting(c, c.req.param()) + ) +); app.post('/api/towns/:townId/rigs/:rigId/agents/:agentId/checkpoint', c => instrumented(c, 'POST /api/towns/:townId/rigs/:rigId/agents/:agentId/checkpoint', () => handleWriteCheckpoint(c, c.req.param()) diff --git a/cloudflare-gastown/src/handlers/rig-agents.handler.ts b/cloudflare-gastown/src/handlers/rig-agents.handler.ts index a9f3e2dc2..c277eab46 100644 --- a/cloudflare-gastown/src/handlers/rig-agents.handler.ts +++ b/cloudflare-gastown/src/handlers/rig-agents.handler.ts @@ -162,6 +162,24 @@ export async function handleAgentCompleted( return c.json(resSuccess({ completed: true })); } +/** + * Called by the container when the mayor's session goes idle (turn done, + * waiting for user input). Transitions the mayor from "working" to + * "waiting" so the alarm drops to the idle cadence and health-check + * pings stop resetting the container's sleepAfter timer. + */ +export async function handleAgentWaiting( + c: Context, + params: { rigId: string; agentId: string } +) { + const body = (await parseJsonBody(c)) as Record; + const firedAt = typeof body?.firedAt === 'number' ? body.firedAt : undefined; + const townId = c.get('townId'); + const town = getTownDOStub(c.env, townId); + await town.mayorWaiting(params.agentId, firedAt); + return c.json(resSuccess({ acknowledged: true })); +} + export async function handleWriteCheckpoint( c: Context, params: { rigId: string; agentId: string } diff --git a/cloudflare-gastown/src/types.ts b/cloudflare-gastown/src/types.ts index f59b3d216..245097035 100644 --- a/cloudflare-gastown/src/types.ts +++ b/cloudflare-gastown/src/types.ts @@ -51,7 +51,7 @@ export type BeadFilter = { export const AgentRole = z.enum(['polecat', 'refinery', 'mayor']); export type AgentRole = z.infer; -export const AgentStatus = z.enum(['idle', 'working', 'stalled', 'dead']); +export const AgentStatus = z.enum(['idle', 'working', 'waiting', 'stalled', 'dead']); export type AgentStatus = z.infer; /** diff --git a/cloudflare-gastown/test/integration/mayor-idle.test.ts b/cloudflare-gastown/test/integration/mayor-idle.test.ts new file mode 100644 index 000000000..c12527a4a --- /dev/null +++ b/cloudflare-gastown/test/integration/mayor-idle.test.ts @@ -0,0 +1,153 @@ +/** + * Integration tests for the mayor idle (waiting) lifecycle. + * + * Verifies that: + * 1. The "waiting" agent status exists and can be set + * 2. hasActiveWork() returns false when the only agent is a waiting mayor + * 3. The alarm interval drops to idle cadence when the mayor is waiting + * 4. mayorWaiting() transitions a working mayor to waiting + * 5. sendMayorMessage transitions a waiting mayor back to working (when container is alive) + * 6. Token refresh throttle persists across DO eviction (ctx.storage) + */ + +import { env, runDurableObjectAlarm } from 'cloudflare:test'; +import { describe, it, expect, beforeEach } from 'vitest'; + +function getTownStub(name = 'test-town') { + const id = env.TOWN.idFromName(name); + return env.TOWN.get(id); +} + +describe('Mayor idle lifecycle', () => { + let town: ReturnType; + let townName: string; + + beforeEach(async () => { + townName = `mayor-idle-${crypto.randomUUID()}`; + town = getTownStub(townName); + await town.setTownId(townName); + await town.addRig({ + rigId: 'rig-1', + name: 'main-rig', + gitUrl: 'https://github.com/test/repo.git', + defaultBranch: 'main', + }); + }); + + // ── waiting status ────────────────────────────────────────────────── + + describe('waiting status', () => { + it('should allow setting an agent to waiting', async () => { + // Register a mayor agent directly + const agentsBefore = await town.listAgents({ role: 'mayor' }); + expect(agentsBefore.length).toBe(0); + + // ensureMayor creates the agent (won't start container in test env) + const result = await town.ensureMayor(); + expect(result.agentId).toBeTruthy(); + + // Set the agent to working first, then waiting + await town.updateAgentStatus(result.agentId, 'working'); + const workingAgent = await town.getAgentAsync(result.agentId); + expect(workingAgent?.status).toBe('working'); + + // mayorWaiting should transition working → waiting + await town.mayorWaiting(result.agentId); + const waitingAgent = await town.getAgentAsync(result.agentId); + expect(waitingAgent?.status).toBe('waiting'); + }); + + it('should not transition non-working agents to waiting', async () => { + const result = await town.ensureMayor(); + + // Agent starts as idle (container not running in test env) + const agent = await town.getAgentAsync(result.agentId); + expect(agent?.status).toBe('idle'); + + // mayorWaiting should NOT change idle to waiting + await town.mayorWaiting(result.agentId); + const afterAgent = await town.getAgentAsync(result.agentId); + expect(afterAgent?.status).toBe('idle'); + }); + + it('should resolve empty agentId to the mayor', async () => { + const result = await town.ensureMayor(); + await town.updateAgentStatus(result.agentId, 'working'); + + // Call with undefined agentId — should resolve to mayor + await town.mayorWaiting(); + const agent = await town.getAgentAsync(result.agentId); + expect(agent?.status).toBe('waiting'); + }); + }); + + // ── hasActiveWork / alarm interval ────────────────────────────────── + + describe('alarm interval with waiting mayor', () => { + it('should use idle alarm interval when mayor is waiting', async () => { + const result = await town.ensureMayor(); + + // Set mayor to working → alarm should be active (5s) + await town.updateAgentStatus(result.agentId, 'working'); + const activeStatus = await town.getAlarmStatus(); + expect(activeStatus.alarm.intervalMs).toBe(5_000); + + // Set mayor to waiting → alarm should drop to idle (5 min) + await town.updateAgentStatus(result.agentId, 'waiting'); + const idleStatus = await town.getAlarmStatus(); + expect(idleStatus.alarm.intervalMs).toBe(5 * 60_000); + }); + + it('should use active alarm interval when a polecat is working alongside a waiting mayor', async () => { + const result = await town.ensureMayor(); + await town.updateAgentStatus(result.agentId, 'waiting'); + + // Create a convoy to get a working polecat + const convoy = await town.slingConvoy({ + rigId: 'rig-1', + convoyTitle: 'Test', + tasks: [{ title: 'Task 1' }], + }); + + // Run alarm to assign and dispatch the polecat + await runDurableObjectAlarm(town); + + const bead = await town.getBeadAsync(convoy.beads[0].bead.bead_id); + expect(bead?.assignee_agent_bead_id).toBeTruthy(); + + // Set the polecat to working + if (bead?.assignee_agent_bead_id) { + await town.updateAgentStatus(bead.assignee_agent_bead_id, 'working'); + } + + // Now alarm should be active (polecat is working) + const status = await town.getAlarmStatus(); + expect(status.alarm.intervalMs).toBe(5_000); + }); + }); + + // ── getMayorStatus mapping ───────────────────────────────────────── + + describe('getMayorStatus', () => { + it('should report waiting mayor as active', async () => { + const result = await town.ensureMayor(); + await town.updateAgentStatus(result.agentId, 'waiting'); + + const status = await town.getMayorStatus(); + expect(status.session?.status).toBe('active'); + }); + }); + + // ── getAlarmStatus agent counts ──────────────────────────────────── + + describe('getAlarmStatus agent counts', () => { + it('should include waiting in agent counts', async () => { + const result = await town.ensureMayor(); + await town.updateAgentStatus(result.agentId, 'waiting'); + + const status = await town.getAlarmStatus(); + expect(status.agents.waiting).toBe(1); + expect(status.agents.working).toBe(0); + }); + }); +}); From 5fcfc6e302886c3bbf6ad2087be13539dd940a86 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 13:18:12 -0500 Subject: [PATCH 37/44] fix(gastown): skip nudge fetch during drain, add 10s timeout to fetchPendingNudges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After SIGTERM, fetchPendingNudges can hang if the container runtime's outbound networking is degraded. This blocks handleIdleEvent from ever setting the idle timer, so agents stay 'running' indefinitely even after calling gt_done. During drain, skip the fetch entirely — agents finish naturally and just need the 10s idle timer to exit. For the non-drain case, add a 10s AbortSignal timeout to prevent hangs. --- cloudflare-gastown/container/src/process-manager.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index 255057d49..0b5d6dc1d 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -281,7 +281,7 @@ async function fetchPendingNudges( }; const resp = await fetch( `${agent.gastownApiUrl}/api/towns/${agent.townId}/rigs/${agent.rigId}/agents/${agent.agentId}/pending-nudges`, - { headers } + { headers, signal: AbortSignal.timeout(10_000) } ); if (!resp.ok) { console.warn( @@ -400,7 +400,10 @@ async function handleIdleEvent(agent: ManagedAgent, onExit: () => void): Promise const agentId = agent.agentId; console.log(`${MANAGER_LOG} handleIdleEvent: checking nudges for agent ${agentId}`); - const nudges = await fetchPendingNudges(agent); + // During drain, skip the nudge fetch — it can hang if the container + // runtime's outbound networking is degraded after SIGTERM. The agent + // finished its work; just start the idle timer so it exits promptly. + const nudges = _draining ? null : await fetchPendingNudges(agent); if (nudges === null) { // Error fetching — treat as no nudges, start idle timer From 7eea0d3873ede388b15900331c392edc3baefbd8 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 13:21:47 -0500 Subject: [PATCH 38/44] fix(gastown): add spacing around drain status banner --- src/components/gastown/DrainStatusBanner.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/gastown/DrainStatusBanner.tsx b/src/components/gastown/DrainStatusBanner.tsx index 8954ac6db..423bdf0a2 100644 --- a/src/components/gastown/DrainStatusBanner.tsx +++ b/src/components/gastown/DrainStatusBanner.tsx @@ -48,7 +48,7 @@ export function DrainStatusBanner({ townId }: { townId: string }) { : null; return ( - + From bf4aee08962b217a03535116dec674559783b724 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 13:27:33 -0500 Subject: [PATCH 39/44] fix(gastown): use padding wrapper instead of margin on drain banner to prevent overflow-x --- src/app/(app)/gastown/[townId]/TownOverviewPageClient.tsx | 4 +++- src/components/gastown/DrainStatusBanner.tsx | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/app/(app)/gastown/[townId]/TownOverviewPageClient.tsx b/src/app/(app)/gastown/[townId]/TownOverviewPageClient.tsx index 601b0ec7e..c27c34e89 100644 --- a/src/app/(app)/gastown/[townId]/TownOverviewPageClient.tsx +++ b/src/app/(app)/gastown/[townId]/TownOverviewPageClient.tsx @@ -209,7 +209,9 @@ export function TownOverviewPageClient({ return (
- +
+ +
{/* Top bar — sticky */}
diff --git a/src/components/gastown/DrainStatusBanner.tsx b/src/components/gastown/DrainStatusBanner.tsx index 423bdf0a2..c027161a0 100644 --- a/src/components/gastown/DrainStatusBanner.tsx +++ b/src/components/gastown/DrainStatusBanner.tsx @@ -48,7 +48,7 @@ export function DrainStatusBanner({ townId }: { townId: string }) { : null; return ( - + From a9db6d1d6003e2efa1cb44f61827924f36104d52 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 13:31:10 -0500 Subject: [PATCH 40/44] fix(gastown): clear drain flag as soon as all non-mayor agents are idle Previously the drain flag persisted for up to 15 minutes after the container exited, causing the UI banner to linger. Now the alarm loop clears it as soon as no working/stalled non-mayor agents remain (~5s after container exit). Hard timeout reduced from 15 to 7 min. --- cloudflare-gastown/src/dos/Town.do.ts | 52 ++++++++++++++++++--------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index e771c5106..77da1b7ce 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -3397,23 +3397,41 @@ export class TownDO extends DurableObject { Sentry.captureException(err); } - // Auto-clear drain flag if it has been active for too long. - // The drain sequence (drainAll) waits up to 10 minutes, so 15 - // minutes is a generous upper bound. After this timeout the old - // container is certainly dead and it is safe to resume dispatch. - const DRAIN_TIMEOUT_MS = 15 * 60 * 1000; - if ( - this._draining && - this._drainStartedAt && - Date.now() - this._drainStartedAt > DRAIN_TIMEOUT_MS - ) { - this._draining = false; - this._drainNonce = null; - this._drainStartedAt = null; - await this.ctx.storage.put('town:draining', false); - await this.ctx.storage.delete('town:drainNonce'); - await this.ctx.storage.delete('town:drainStartedAt'); - logger.info('reconciler: drain timeout exceeded, auto-clearing draining flag'); + // Auto-clear drain flag. Two triggers: + // 1. No working/stalled non-mayor agents remain — the container is + // gone and all agents have been recovered. Clear immediately so + // the UI banner disappears within one alarm tick (~5s). + // 2. Hard timeout (7 min) as a safety net if agent state gets stuck. + if (this._draining) { + const DRAIN_TIMEOUT_MS = 7 * 60 * 1000; + const timedOut = this._drainStartedAt && Date.now() - this._drainStartedAt > DRAIN_TIMEOUT_MS; + + const hasActiveNonMayor = + !timedOut && + query( + this.sql, + /* sql */ ` + SELECT 1 FROM ${agent_metadata} + WHERE ${agent_metadata.status} IN ('working', 'stalled') + AND ${agent_metadata.role} != 'mayor' + LIMIT 1 + `, + [] + ).length > 0; + + if (timedOut || !hasActiveNonMayor) { + this._draining = false; + this._drainNonce = null; + this._drainStartedAt = null; + await this.ctx.storage.put('town:draining', false); + await this.ctx.storage.delete('town:drainNonce'); + await this.ctx.storage.delete('town:drainStartedAt'); + logger.info( + timedOut + ? 'reconciler: drain timeout exceeded, auto-clearing draining flag' + : 'reconciler: no active non-mayor agents, clearing draining flag' + ); + } } // Phase 1: Reconcile — compute desired state vs actual state From 7f82093176d3045d017438a09e1fe4ea9d8de70a Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 13:37:18 -0500 Subject: [PATCH 41/44] fix(gastown): detect container restart via heartbeat instance ID, clear drain flag instantly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each container generates a UUID at startup and sends it with every heartbeat. When the TownDO sees a different instance ID than the previous one, it clears the drain flag immediately — the UI banner disappears within one heartbeat cycle (~30s) instead of waiting for the 7-min safety timeout. Also reduces the hard drain timeout from 15 min to 7 min. --- cloudflare-gastown/container/src/heartbeat.ts | 8 +++ cloudflare-gastown/container/src/types.ts | 2 + cloudflare-gastown/src/dos/Town.do.ts | 59 ++++++++++--------- .../src/handlers/rig-agents.handler.ts | 2 + 4 files changed, 44 insertions(+), 27 deletions(-) diff --git a/cloudflare-gastown/container/src/heartbeat.ts b/cloudflare-gastown/container/src/heartbeat.ts index defbe7897..ea7fd4e3e 100644 --- a/cloudflare-gastown/container/src/heartbeat.ts +++ b/cloudflare-gastown/container/src/heartbeat.ts @@ -9,6 +9,13 @@ let sessionToken: string | null = null; /** Set once we've successfully acknowledged container-ready. */ let containerReadyAcknowledged = false; +/** + * Unique ID for this container instance. Generated once at import time. + * Sent with every heartbeat so the TownDO can detect container restarts + * (new instance ID ≠ old one → clear drain flag). + */ +const CONTAINER_INSTANCE_ID = crypto.randomUUID(); + /** * Configure and start the heartbeat reporter. * Periodically sends agent status updates to the Gastown worker API, @@ -108,6 +115,7 @@ async function sendHeartbeats(): Promise { lastEventAt: agent.lastEventAt ?? null, activeTools: agent.activeTools ?? [], messageCount: agent.messageCount ?? 0, + containerInstanceId: CONTAINER_INSTANCE_ID, }; try { diff --git a/cloudflare-gastown/container/src/types.ts b/cloudflare-gastown/container/src/types.ts index d2edabd3f..923e82ac4 100644 --- a/cloudflare-gastown/container/src/types.ts +++ b/cloudflare-gastown/container/src/types.ts @@ -319,6 +319,8 @@ export type HeartbeatPayload = { lastEventAt: string | null; activeTools: string[]; messageCount: number; + /** Unique ID for this container instance, used to detect restarts. */ + containerInstanceId?: string; }; // ── Stream ticket (for WebSocket streaming) ───────────────────────────── diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 77da1b7ce..4b7269215 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -554,6 +554,8 @@ export class TownDO extends DurableObject { private _draining = false; private _drainNonce: string | null = null; private _drainStartedAt: number | null = null; + /** Instance UUID of the current container, set by the first heartbeat. */ + private _containerInstanceId: string | null = null; private get townId(): string { return this._townId ?? this.ctx.id.name ?? this.ctx.id.toString(); @@ -1265,10 +1267,33 @@ export class TownDO extends DurableObject { lastEventType?: string | null; lastEventAt?: string | null; activeTools?: string[]; + containerInstanceId?: string; } ): Promise<{ drainNonce: string | null }> { agents.touchAgent(this.sql, agentId, watermark); await this.armAlarmIfNeeded(); + + // If the container instance ID changed, a new container has started. + // Clear the drain flag so the UI banner disappears and dispatch resumes. + if (watermark?.containerInstanceId) { + if ( + this._draining && + this._containerInstanceId && + watermark.containerInstanceId !== this._containerInstanceId + ) { + this._draining = false; + this._drainNonce = null; + this._drainStartedAt = null; + await this.ctx.storage.put('town:draining', false); + await this.ctx.storage.delete('town:drainNonce'); + await this.ctx.storage.delete('town:drainStartedAt'); + console.log( + `${TOWN_LOG} heartbeat: new container instance ${watermark.containerInstanceId} (was ${this._containerInstanceId}), clearing drain flag` + ); + } + this._containerInstanceId = watermark.containerInstanceId; + } + return { drainNonce: this._drainNonce }; } @@ -3397,40 +3422,20 @@ export class TownDO extends DurableObject { Sentry.captureException(err); } - // Auto-clear drain flag. Two triggers: - // 1. No working/stalled non-mayor agents remain — the container is - // gone and all agents have been recovered. Clear immediately so - // the UI banner disappears within one alarm tick (~5s). - // 2. Hard timeout (7 min) as a safety net if agent state gets stuck. - if (this._draining) { + // Safety-net: auto-clear drain flag if it has been active too long. + // The primary clear mechanism is the heartbeat instance ID check + // (see recordHeartbeat), but this catches edge cases where no + // heartbeat arrives (e.g. container failed to start). + if (this._draining && this._drainStartedAt) { const DRAIN_TIMEOUT_MS = 7 * 60 * 1000; - const timedOut = this._drainStartedAt && Date.now() - this._drainStartedAt > DRAIN_TIMEOUT_MS; - - const hasActiveNonMayor = - !timedOut && - query( - this.sql, - /* sql */ ` - SELECT 1 FROM ${agent_metadata} - WHERE ${agent_metadata.status} IN ('working', 'stalled') - AND ${agent_metadata.role} != 'mayor' - LIMIT 1 - `, - [] - ).length > 0; - - if (timedOut || !hasActiveNonMayor) { + if (Date.now() - this._drainStartedAt > DRAIN_TIMEOUT_MS) { this._draining = false; this._drainNonce = null; this._drainStartedAt = null; await this.ctx.storage.put('town:draining', false); await this.ctx.storage.delete('town:drainNonce'); await this.ctx.storage.delete('town:drainStartedAt'); - logger.info( - timedOut - ? 'reconciler: drain timeout exceeded, auto-clearing draining flag' - : 'reconciler: no active non-mayor agents, clearing draining flag' - ); + logger.info('reconciler: drain timeout exceeded, auto-clearing draining flag'); } } diff --git a/cloudflare-gastown/src/handlers/rig-agents.handler.ts b/cloudflare-gastown/src/handlers/rig-agents.handler.ts index c277eab46..c8d5a7367 100644 --- a/cloudflare-gastown/src/handlers/rig-agents.handler.ts +++ b/cloudflare-gastown/src/handlers/rig-agents.handler.ts @@ -235,6 +235,7 @@ const HeartbeatWatermark = z lastEventType: z.string().nullable().optional(), lastEventAt: z.string().nullable().optional(), activeTools: z.array(z.string()).optional(), + containerInstanceId: z.string().optional(), }) .passthrough(); @@ -273,6 +274,7 @@ export async function handleHeartbeat( lastEventType: watermark.lastEventType ?? null, lastEventAt: watermark.lastEventAt ?? null, activeTools: watermark.activeTools, + containerInstanceId: watermark.containerInstanceId, } : undefined ); From de7ae34a9294607da385e4f6aeef809b47aaf3a0 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 13:51:18 -0500 Subject: [PATCH 42/44] fix(gastown): drain Phase 2 exits early when all agents have idle timers pending Instead of waiting the full 10s idle timeout + 5s poll cycle for each agent, the drain loop now checks if every active non-mayor agent already has an idle timer running. If so, they've finished their work (session.idle fired, handleIdleEvent set the timer) and Phase 3 can proceed immediately to freeze and force-save them. --- cloudflare-gastown/container/src/process-manager.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index 0b5d6dc1d..13d62ab77 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -1121,6 +1121,16 @@ export async function drainAll(): Promise { ); if (active.length === 0) break; + // If every active agent already has an idle timer running, they've + // finished their work and are just waiting for the timer to fire. + // No need to keep polling — break and let Phase 3 handle them. + if (active.every(a => idleTimers.has(a.agentId))) { + console.log( + `${DRAIN_LOG} All ${active.length} non-mayor agents are idle (timer pending), proceeding to Phase 3` + ); + break; + } + console.log( `${DRAIN_LOG} Waiting for ${active.length} non-mayor agents: ` + active.map(a => `${a.role}:${a.agentId.slice(0, 8)}=${a.status}`).join(', ') From 63ff892d1adda00c7cf3208c3044b5a1463e574a Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 13:57:41 -0500 Subject: [PATCH 43/44] =?UTF-8?q?fix(gastown):=20address=20PR=20review=20?= =?UTF-8?q?=E2=80=94=20stream=20race,=20mayor=20watermark,=20admin=20guard?= =?UTF-8?q?,=20instance=20ID=20persistence?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Check agent.status after initial prompt to catch stream errors that fired while session.prompt() was in-flight - Refresh _mayorWorkingSince on every accepted prompt, not just when transitioning from waiting (prevents stale callback race) - Hide Force Shutdown button for admins in read-only mode - Persist containerInstanceId to storage so it survives DO restarts - Hydrate from storage on first heartbeat after DO restart --- .../container/src/process-manager.ts | 8 +++++ cloudflare-gastown/src/dos/Town.do.ts | 25 ++++++++++++--- src/components/gastown/DrainStatusBanner.tsx | 31 +++++++++++-------- 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index 13d62ab77..d88542453 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -713,6 +713,14 @@ export async function startAgent( ...(request.systemPrompt ? { system: request.systemPrompt } : {}), }, }); + + // If the event stream errored while we were awaiting the prompt, + // the stream-error handler already set the agent to 'failed' and + // reported completion. Don't continue with a success log. + if (agent.status === 'failed') { + throw new Error('Event stream failed during initial prompt'); + } + agent.messageCount = 1; log.info('agent.start', { diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index 4b7269215..d1172b5e2 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -1273,14 +1273,24 @@ export class TownDO extends DurableObject { agents.touchAgent(this.sql, agentId, watermark); await this.armAlarmIfNeeded(); - // If the container instance ID changed, a new container has started. - // Clear the drain flag so the UI banner disappears and dispatch resumes. + // Detect container restarts via instance ID change. The instance ID + // is persisted so it survives DO restarts (unlike in-memory only). if (watermark?.containerInstanceId) { + // Hydrate from storage on first access after DO restart + if (this._containerInstanceId === null) { + this._containerInstanceId = + (await this.ctx.storage.get('town:containerInstanceId')) ?? null; + } + if ( this._draining && this._containerInstanceId && watermark.containerInstanceId !== this._containerInstanceId ) { + // New container started — clear drain flag. This supplements the + // nonce handshake (acknowledgeContainerReady) as a faster path: + // the heartbeat fires every 30s vs the nonce which requires the + // container to explicitly call /container-ready. this._draining = false; this._drainNonce = null; this._drainStartedAt = null; @@ -1291,7 +1301,11 @@ export class TownDO extends DurableObject { `${TOWN_LOG} heartbeat: new container instance ${watermark.containerInstanceId} (was ${this._containerInstanceId}), clearing drain flag` ); } - this._containerInstanceId = watermark.containerInstanceId; + + if (watermark.containerInstanceId !== this._containerInstanceId) { + this._containerInstanceId = watermark.containerInstanceId; + await this.ctx.storage.put('town:containerInstanceId', watermark.containerInstanceId); + } } return { drainNonce: this._drainNonce }; @@ -2177,9 +2191,12 @@ export class TownDO extends DurableObject { // while the mayor processes this prompt. Also reschedule the alarm // immediately — the idle alarm may be up to 5 min away, and we need // the reconciler/health-check loop to resume promptly. + // Always refresh the watermark so a stale mayorWaiting callback + // from a previous turn can't flip the mayor back to waiting + // while a queued prompt is being processed. + this._mayorWorkingSince = Date.now(); if (mayor.status === 'waiting') { agents.updateAgentStatus(this.sql, mayor.id, 'working'); - this._mayorWorkingSince = Date.now(); await this.ctx.storage.setAlarm(Date.now() + ACTIVE_ALARM_INTERVAL_MS); } sessionStatus = 'active'; diff --git a/src/components/gastown/DrainStatusBanner.tsx b/src/components/gastown/DrainStatusBanner.tsx index c027161a0..ccc5cce60 100644 --- a/src/components/gastown/DrainStatusBanner.tsx +++ b/src/components/gastown/DrainStatusBanner.tsx @@ -23,6 +23,9 @@ export function DrainStatusBanner({ townId }: { townId: string }) { refetchInterval: 5_000, }); + const { data: adminAccess } = useQuery(trpc.gastown.checkAdminAccess.queryOptions({ townId })); + const isReadOnly = adminAccess?.isAdminViewing === true; + const destroyContainer = useMutation( trpc.gastown.destroyContainer.mutationOptions({ onSuccess: () => { @@ -60,19 +63,21 @@ export function DrainStatusBanner({ townId }: { townId: string }) { completes. - - - + {!isReadOnly && ( + + + + )} ); } From c81a5896d665c41f36eec328c5d860d87a403686 Mon Sep 17 00:00:00 2001 From: John Fawcett Date: Thu, 2 Apr 2026 14:24:49 -0500 Subject: [PATCH 44/44] fix(gastown): prevent double sessionCount decrement, let idle agents exit cleanly during drain - When stream error fires during initial prompt, mark sessionCounted false so the catch block doesn't double-decrement the shared SDK server's session count (could shut it down while other agents use it) - When all drain agents have idle timers, poll at 1s instead of breaking to Phase 3. This lets the normal exitAgent path run (reportAgentCompleted with 'completed') instead of force-saving WIP commits on agents that already called gt_done. --- .../container/src/process-manager.ts | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index d88542453..26f49b550 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -715,9 +715,11 @@ export async function startAgent( }); // If the event stream errored while we were awaiting the prompt, - // the stream-error handler already set the agent to 'failed' and - // reported completion. Don't continue with a success log. + // the stream-error handler already set the agent to 'failed', + // reported completion, and decremented sessionCount. Mark + // sessionCounted false so the catch block doesn't double-decrement. if (agent.status === 'failed') { + sessionCounted = false; throw new Error('Event stream failed during initial prompt'); } @@ -1130,13 +1132,17 @@ export async function drainAll(): Promise { if (active.length === 0) break; // If every active agent already has an idle timer running, they've - // finished their work and are just waiting for the timer to fire. - // No need to keep polling — break and let Phase 3 handle them. + // finished their work and are just waiting for the 10s timer to + // fire via the normal completion path (exitAgent → reportAgentCompleted). + // Poll more frequently so we notice the exit promptly, but don't + // break to Phase 3 — that would force-save WIP commits on agents + // that already called gt_done and are about to exit cleanly. if (active.every(a => idleTimers.has(a.agentId))) { console.log( - `${DRAIN_LOG} All ${active.length} non-mayor agents are idle (timer pending), proceeding to Phase 3` + `${DRAIN_LOG} All ${active.length} non-mayor agents are idle (timers pending), waiting for clean exit` ); - break; + await new Promise(r => setTimeout(r, 1000)); + continue; } console.log(