diff --git a/.gitignore b/.gitignore index 71f605ef7..e08af3592 100644 --- a/.gitignore +++ b/.gitignore @@ -102,6 +102,9 @@ supabase/.temp .kilo/plans +# ESLint cache +.eslintcache + # @kilocode/trpc build output (rebuilt by: pnpm --filter @kilocode/trpc run build) packages/trpc/dist/ .plan/ diff --git a/cloudflare-gastown/container/Dockerfile b/cloudflare-gastown/container/Dockerfile index 39b2235c5..b835a3332 100644 --- a/cloudflare-gastown/container/Dockerfile +++ b/cloudflare-gastown/container/Dockerfile @@ -1,18 +1,62 @@ FROM oven/bun:1-slim -# Install git, gh CLI, and Node.js (required by @kilocode/cli which uses #!/usr/bin/env node) +# Install dev toolchain, search tools, build deps, gh CLI, and Node.js +# Package categories: +# version control: git, git-lfs +# network/download: curl, wget, ca-certificates, gnupg, unzip +# build toolchain: build-essential, autoconf +# search tools: ripgrep, jq +# compression: bzip2, zstd +# SSL/crypto: libssl-dev, libffi-dev +# database client libs: libdb-dev, libgdbm-dev, libgdbm6 +# Python build deps: libbz2-dev, liblzma-dev, libncurses5-dev, libreadline-dev, zlib1g-dev +# Ruby build deps: libyaml-dev +# image processing: libvips-dev +# browser/rendering: libgbm1 +# C++ stdlib: libc++1 +# math: libgmp-dev +# timezone data: tzdata RUN apt-get update && \ - apt-get install -y --no-install-recommends git git-lfs curl ca-certificates && \ - curl -fsSL https://deb.nodesource.com/setup_24.x | bash - && \ - apt-get install -y --no-install-recommends nodejs && \ - curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ - -o /usr/share/keyrings/githubcli-archive-keyring.gpg && \ - echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ - > /etc/apt/sources.list.d/github-cli.list && \ - apt-get update && \ - apt-get install -y --no-install-recommends gh && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* + apt-get install -y --no-install-recommends \ + git \ + git-lfs \ + curl \ + wget \ + ca-certificates \ + gnupg \ + unzip \ + build-essential \ + autoconf \ + ripgrep \ + jq \ + bzip2 \ + zstd \ + libssl-dev \ + libffi-dev \ + libdb-dev \ + libgdbm-dev \ + libgdbm6 \ + libbz2-dev \ + liblzma-dev \ + libncurses5-dev \ + libreadline-dev \ + zlib1g-dev \ + libyaml-dev \ + libvips-dev \ + libgbm1 \ + libc++1 \ + libgmp-dev \ + tzdata \ + && curl -fsSL https://deb.nodesource.com/setup_24.x | bash - \ + && apt-get install -y --no-install-recommends nodejs \ + && curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + -o /usr/share/keyrings/githubcli-archive-keyring.gpg \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ + > /etc/apt/sources.list.d/github-cli.list \ + && apt-get update \ + && apt-get install -y --no-install-recommends gh \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* RUN git lfs install --system diff --git a/cloudflare-gastown/container/Dockerfile.dev b/cloudflare-gastown/container/Dockerfile.dev index 474ef2315..a4bebc5db 100644 --- a/cloudflare-gastown/container/Dockerfile.dev +++ b/cloudflare-gastown/container/Dockerfile.dev @@ -1,18 +1,62 @@ FROM --platform=linux/arm64 oven/bun:1-slim -# Install git, gh CLI, and Node.js (required by @kilocode/cli which uses #!/usr/bin/env node) +# Install dev toolchain, search tools, build deps, gh CLI, and Node.js +# Package categories: +# version control: git, git-lfs +# network/download: curl, wget, ca-certificates, gnupg, unzip +# build toolchain: build-essential, autoconf +# search tools: ripgrep, jq +# compression: bzip2, zstd +# SSL/crypto: libssl-dev, libffi-dev +# database client libs: libdb-dev, libgdbm-dev, libgdbm6 +# Python build deps: libbz2-dev, liblzma-dev, libncurses5-dev, libreadline-dev, zlib1g-dev +# Ruby build deps: libyaml-dev +# image processing: libvips-dev +# browser/rendering: libgbm1 +# C++ stdlib: libc++1 +# math: libgmp-dev +# timezone data: tzdata RUN apt-get update && \ - apt-get install -y --no-install-recommends git git-lfs curl ca-certificates && \ - curl -fsSL https://deb.nodesource.com/setup_24.x | bash - && \ - apt-get install -y --no-install-recommends nodejs && \ - curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ - -o /usr/share/keyrings/githubcli-archive-keyring.gpg && \ - echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ - > /etc/apt/sources.list.d/github-cli.list && \ - apt-get update && \ - apt-get install -y --no-install-recommends gh && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* + apt-get install -y --no-install-recommends \ + git \ + git-lfs \ + curl \ + wget \ + ca-certificates \ + gnupg \ + unzip \ + build-essential \ + autoconf \ + ripgrep \ + jq \ + bzip2 \ + zstd \ + libssl-dev \ + libffi-dev \ + libdb-dev \ + libgdbm-dev \ + libgdbm6 \ + libbz2-dev \ + liblzma-dev \ + libncurses5-dev \ + libreadline-dev \ + zlib1g-dev \ + libyaml-dev \ + libvips-dev \ + libgbm1 \ + libc++1 \ + libgmp-dev \ + tzdata \ + && curl -fsSL https://deb.nodesource.com/setup_24.x | bash - \ + && apt-get install -y --no-install-recommends nodejs \ + && curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + -o /usr/share/keyrings/githubcli-archive-keyring.gpg \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ + > /etc/apt/sources.list.d/github-cli.list \ + && apt-get update \ + && apt-get install -y --no-install-recommends gh \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* RUN git lfs install --system diff --git a/cloudflare-gastown/container/plugin/client.ts b/cloudflare-gastown/container/plugin/client.ts index 93b972854..6a222b7b4 100644 --- a/cloudflare-gastown/container/plugin/client.ts +++ b/cloudflare-gastown/container/plugin/client.ts @@ -310,6 +310,7 @@ export class MayorGastownClient { title: string; body?: string; metadata?: Record; + labels?: string[]; }): Promise { return this.request(this.mayorPath('/sling'), { method: 'POST', diff --git a/cloudflare-gastown/container/plugin/mayor-tools.ts b/cloudflare-gastown/container/plugin/mayor-tools.ts index e8c9929de..09b28ded4 100644 --- a/cloudflare-gastown/container/plugin/mayor-tools.ts +++ b/cloudflare-gastown/container/plugin/mayor-tools.ts @@ -27,21 +27,6 @@ function parseUiAction(value: unknown): UiActionInput { return value as UiActionInput; } -function parseJsonObject(value: string, label: string): Record { - let parsed: unknown; - try { - parsed = JSON.parse(value); - } catch { - throw new Error(`Invalid JSON in "${label}"`); - } - if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) { - throw new Error( - `"${label}" must be a JSON object, got ${Array.isArray(parsed) ? 'array' : typeof parsed}` - ); - } - return parsed as Record; -} - /** * Mayor-specific tools for cross-rig delegation. * These are only registered when `GASTOWN_AGENT_ROLE=mayor`. @@ -64,17 +49,23 @@ export function createMayorTools(client: MayorGastownClient) { ) .optional(), metadata: tool.schema - .string() - .describe('JSON-encoded metadata object for additional context') + .record(tool.schema.string(), tool.schema.unknown()) + .describe( + 'Metadata object for additional context (e.g. { pr_url, branch, target_branch })' + ) + .optional(), + labels: tool.schema + .array(tool.schema.string()) + .describe('Labels to attach to the bead (e.g. ["gt:pr-fixup"])') .optional(), }, async execute(args) { - const metadata = args.metadata ? parseJsonObject(args.metadata, 'metadata') : undefined; const result = await client.sling({ rig_id: args.rig_id, title: args.title, body: args.body, - metadata, + metadata: args.metadata, + labels: args.labels, }); return [ `Task slung successfully.`, diff --git a/cloudflare-gastown/container/plugin/tools.test.ts b/cloudflare-gastown/container/plugin/tools.test.ts index 4e8780013..2c55de041 100644 --- a/cloudflare-gastown/container/plugin/tools.test.ts +++ b/cloudflare-gastown/container/plugin/tools.test.ts @@ -232,33 +232,18 @@ describe('tools', () => { expect(result).toContain('priority: high'); }); - it('parses metadata JSON string', async () => { - await tools.gt_escalate.execute({ title: 'Test', metadata: '{"key": "value"}' }, CTX); + it('passes metadata object through to createEscalation', async () => { + await tools.gt_escalate.execute( + { title: 'Test', metadata: { key: 'value', nested: 123 } }, + CTX + ); expect(client.createEscalation).toHaveBeenCalledWith({ title: 'Test', body: undefined, priority: undefined, - metadata: { key: 'value' }, + metadata: { key: 'value', nested: 123 }, }); }); - - it('throws on invalid metadata JSON', async () => { - await expect( - tools.gt_escalate.execute({ title: 'Test', metadata: 'not json' }, CTX) - ).rejects.toThrow('Invalid JSON in "metadata"'); - }); - - it('throws when metadata is a JSON array instead of object', async () => { - await expect( - tools.gt_escalate.execute({ title: 'Test', metadata: '[1, 2]' }, CTX) - ).rejects.toThrow('"metadata" must be a JSON object, got array'); - }); - - it('throws when metadata is a JSON string instead of object', async () => { - await expect( - tools.gt_escalate.execute({ title: 'Test', metadata: '"hello"' }, CTX) - ).rejects.toThrow('"metadata" must be a JSON object, got string'); - }); }); describe('gt_checkpoint', () => { diff --git a/cloudflare-gastown/container/plugin/tools.ts b/cloudflare-gastown/container/plugin/tools.ts index 1c360c0f2..770115a6d 100644 --- a/cloudflare-gastown/container/plugin/tools.ts +++ b/cloudflare-gastown/container/plugin/tools.ts @@ -9,16 +9,6 @@ function parseJsonArg(value: string, label: string): unknown { } } -function parseJsonObject(value: string, label: string): Record { - const parsed = parseJsonArg(value, label); - if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) { - throw new Error( - `"${label}" must be a JSON object, got ${Array.isArray(parsed) ? 'array' : typeof parsed}` - ); - } - return Object.fromEntries(Object.entries(parsed as object)); -} - export function createTools(client: GastownClient) { return { gt_prime: tool({ @@ -155,17 +145,16 @@ export function createTools(client: GastownClient) { .describe('Severity level (defaults to medium)') .optional(), metadata: tool.schema - .string() - .describe('JSON-encoded metadata object for additional context') + .record(tool.schema.string(), tool.schema.unknown()) + .describe('Metadata object for additional context') .optional(), }, async execute(args) { - const metadata = args.metadata ? parseJsonObject(args.metadata, 'metadata') : undefined; const bead = await client.createEscalation({ title: args.title, body: args.body, priority: args.priority, - metadata, + metadata: args.metadata, }); return `Escalation created: ${bead.bead_id} (priority: ${bead.priority})`; }, diff --git a/cloudflare-gastown/container/src/control-server.ts b/cloudflare-gastown/container/src/control-server.ts index 72e9dcbb7..012e736f8 100644 --- a/cloudflare-gastown/container/src/control-server.ts +++ b/cloudflare-gastown/container/src/control-server.ts @@ -93,6 +93,15 @@ function syncTownConfigToProcessEnv(): void { } else { delete process.env.GASTOWN_DISABLE_AI_COAUTHOR; } + + // Keep the standalone env var in sync with the town config so org + // billing context is never lost across model changes. + const orgId = cfg.organization_id; + if (typeof orgId === 'string' && orgId) { + process.env.GASTOWN_ORGANIZATION_ID = orgId; + } else { + delete process.env.GASTOWN_ORGANIZATION_ID; + } } export const app = new Hono(); @@ -216,6 +225,11 @@ app.post('/agents/start', async c => { return c.json({ error: 'Invalid request body', issues: parsed.error.issues }, 400); } + // Persist the organization ID as a standalone env var so it survives + // config rebuilds (e.g. model hot-swap). The env var is the primary + // source of truth; KILO_CONFIG_CONTENT extraction is the fallback. + process.env.GASTOWN_ORGANIZATION_ID = parsed.data.organizationId ?? ''; + console.log( `[control-server] /agents/start: role=${parsed.data.role} name=${parsed.data.name} rigId=${parsed.data.rigId} agentId=${parsed.data.agentId}` ); @@ -285,6 +299,11 @@ app.patch('/agents/:agentId/model', async c => { return c.json({ error: 'Invalid request body', issues: parsed.error.issues }, 400); } + // Update org billing context from the request body if provided. + if (parsed.data.organizationId) { + process.env.GASTOWN_ORGANIZATION_ID = parsed.data.organizationId; + } + // Sync config-derived env vars from X-Town-Config into process.env so // the SDK server restart picks up fresh tokens and git identity. // The middleware already parsed the header into lastKnownTownConfig. diff --git a/cloudflare-gastown/container/src/process-manager.ts b/cloudflare-gastown/container/src/process-manager.ts index 26f49b550..d83c45e69 100644 --- a/cloudflare-gastown/container/src/process-manager.ts +++ b/cloudflare-gastown/container/src/process-manager.ts @@ -620,6 +620,14 @@ export async function startAgent( console.log( `${MANAGER_LOG} startAgent: stopping existing session for ${request.agentId} (status=${existing.status})` ); + + // If the agent is still starting, abort the in-flight startup to prevent + // an orphaned session from being created after stopAgent returns. + if (existing.status === 'starting' && existing.startupAbortController) { + console.log(`${MANAGER_LOG} startAgent: aborting in-flight startup for ${request.agentId}`); + existing.startupAbortController.abort(); + } + await stopAgent(request.agentId).catch(err => { console.warn( `${MANAGER_LOG} startAgent: failed to stop existing session for ${request.agentId}`, @@ -629,6 +637,7 @@ export async function startAgent( } const now = new Date().toISOString(); + const startupAbortController = new AbortController(); const agent: ManagedAgent = { agentId: request.agentId, rigId: request.rigId, @@ -653,15 +662,22 @@ export async function startAgent( completionCallbackUrl: request.envVars?.GASTOWN_COMPLETION_CALLBACK_URL ?? null, model: request.model ?? null, startupEnv: env, + startupAbortController, }; agents.set(request.agentId, agent); + const { signal } = startupAbortController; let sessionCounted = false; try { // 1. Ensure SDK server is running for this workdir const { client, port } = await ensureSDKServer(workdir, env); agent.serverPort = port; + // Check if startup was cancelled while waiting for the SDK server + if (signal.aborted) { + throw new StartupAbortedError(request.agentId); + } + // Track session count on the SDK instance const instance = sdkInstances.get(workdir); if (instance) { @@ -671,6 +687,10 @@ export async function startAgent( // 2. Create a session const sessionResult = await client.session.create({ body: {} }); + + // Parse and store the session ID immediately so the catch block can + // abort an orphaned session if startupAbortController fires during + // the await above. const rawSession: unknown = sessionResult.data ?? sessionResult; const parsed = SessionResponse.safeParse(rawSession); if (!parsed.success) { @@ -684,6 +704,12 @@ export async function startAgent( const sessionId = parsed.data.id; agent.sessionId = sessionId; + // Now check if startup was cancelled while creating the session. + // agent.sessionId is already set, so the catch block will abort it. + if (signal.aborted) { + throw new StartupAbortedError(request.agentId); + } + // 3. Subscribe to events (async, runs in background) void subscribeToEvents(client, agent, request); @@ -705,6 +731,11 @@ export async function startAgent( modelParam = { providerID: 'kilo', modelID: request.model }; } + // Final abort check before sending the prompt + if (signal.aborted) { + throw new StartupAbortedError(request.agentId); + } + await client.session.prompt({ path: { id: sessionId }, body: { @@ -722,6 +753,7 @@ export async function startAgent( sessionCounted = false; throw new Error('Event stream failed during initial prompt'); } + agent.startupAbortController = null; agent.messageCount = 1; @@ -735,7 +767,39 @@ export async function startAgent( return agent; } catch (err) { + // On abort, clean up silently — the new startAgent invocation will + // proceed with a fresh entry. + if (err instanceof StartupAbortedError) { + console.log(`${MANAGER_LOG} startAgent: startup aborted for ${request.agentId}, cleaning up`); + if (sessionCounted) { + const instance = sdkInstances.get(workdir); + if (instance) { + // Abort the orphaned session if one was created before the abort + if (agent.sessionId) { + try { + await instance.client.session.abort({ path: { id: agent.sessionId } }); + } catch (abortErr) { + console.error( + `${MANAGER_LOG} startAgent: failed to abort orphaned session ${agent.sessionId}:`, + abortErr + ); + } + } + instance.sessionCount--; + if (instance.sessionCount <= 0) { + instance.server.close(); + sdkInstances.delete(workdir); + } + } + } + if (agents.get(request.agentId) === agent) { + agents.delete(request.agentId); + } + throw err; + } + agent.status = 'failed'; + agent.startupAbortController = null; agent.exitReason = err instanceof Error ? err.message : String(err); if (sessionCounted) { const instance = sdkInstances.get(workdir); @@ -745,6 +809,18 @@ export async function startAgent( } } +/** + * Thrown when a startup sequence is cancelled via AbortController. + * Distinct from other errors so the catch block can clean up without + * marking the agent as failed (a new startup is taking over). + */ +class StartupAbortedError extends Error { + constructor(agentId: string) { + super(`Startup aborted for agent ${agentId}`); + this.name = 'StartupAbortedError'; + } +} + /** * Stop an agent by aborting its session. */ @@ -753,6 +829,13 @@ export async function stopAgent(agentId: string): Promise { if (!agent) throw new Error(`Agent ${agentId} not found`); if (agent.status !== 'running' && agent.status !== 'starting') return; + // If still starting, abort the in-flight startup so session.create() + // doesn't produce an orphaned session after we return. + if (agent.startupAbortController) { + agent.startupAbortController.abort(); + agent.startupAbortController = null; + } + agent.status = 'stopping'; // Cancel any pending idle timer @@ -839,6 +922,12 @@ export async function sendMessage(agentId: string, prompt: string): Promise; @@ -133,6 +135,10 @@ export type ManagedAgent = { model: string | null; /** Full env dict from buildAgentEnv, stored so model hot-swap can replay it. */ startupEnv: Record; + /** AbortController for the in-flight startup sequence. Aborted when a + * restart is requested while the agent is still in 'starting' status, + * preventing orphaned sessions from leaking. */ + startupAbortController: AbortController | null; }; export type AgentStatusResponse = { diff --git a/cloudflare-gastown/docs/e2e-pr-feedback-testing.md b/cloudflare-gastown/docs/e2e-pr-feedback-testing.md new file mode 100644 index 000000000..443e4e342 --- /dev/null +++ b/cloudflare-gastown/docs/e2e-pr-feedback-testing.md @@ -0,0 +1,495 @@ +# E2E Local Testing: PR Feedback Auto-Resolve & Auto-Merge + +Guide for an AI agent to test the PR feedback auto-resolve and auto-merge feature locally. Covers the full lifecycle for both single beads and convoys. + +## Architecture Overview + +When `merge_strategy: 'pr'` is configured: + +1. **Polecat creates the PR** — pushes branch, runs `gh pr create`, passes `pr_url` to `gt_done` +2. **Refinery reviews the existing PR** — runs quality gates, reviews diff, adds GitHub review comments (approve or request changes) +3. **Auto-resolve detects comments** — `poll_pr` checks for unresolved review threads, dispatches polecat to fix +4. **Auto-merge** — once all comments resolved and CI passes, grace period timer starts, then PR is merged via API + +## Prerequisites + +- Wrangler dev server running for gastown (`pnpm dev` in `cloudflare-gastown/`, port 8803) +- Docker running (containers are managed by wrangler's container runtime) +- A town with an active container and at least one rig configured with a GitHub repo +- `gh` CLI authenticated (for adding PR comments and verifying merges) + +## Quick Reference + +```bash +BASE=http://localhost:8803 +TOWN_ID="${TOWN_ID:-a093a551-ff4d-4c36-9274-252df66128fd}" +RIG_ID="${RIG_ID:-mega-todo-app5}" +REPO="${REPO:-jrf0110/mega-todo-app5}" +``` + +## 1. Verify Town Settings + +Check these settings in the town settings UI: + +| Setting | Required Value | +| ------------------------ | ------------------------------ | +| Merge strategy | `pr` (Pull Request) | +| Auto-merge | enabled | +| Auto-resolve PR feedback | enabled | +| Auto-merge delay | 2 minutes (or preferred delay) | + +### Verify Clean State + +```bash +curl -s $BASE/debug/towns/$TOWN_ID/status | python3 -c " +import sys, json +d = json.load(sys.stdin) +alarm = d.get('alarmStatus', {}) +print('Agents:', json.dumps(alarm.get('agents', {}))) +print('Beads:', json.dumps(alarm.get('beads', {}))) +summary = d.get('beadSummary', []) +if summary: + print(f'WARNING: {len(summary)} non-terminal bead(s)') + for b in summary: + print(f' {b.get(\"type\",\"?\"):16s} {b.get(\"status\",\"?\"):12s} {str(b.get(\"title\",\"\"))[:60]}') +else: + print('Clean state.') +" +``` + +--- + +## Test A: Single Bead Flow + +### A.1. Send Work to the Mayor + +```bash +curl -s -m 120 -X POST $BASE/debug/towns/$TOWN_ID/send-message \ + -H "Content-Type: application/json" \ + -d "{\"message\": \"Create a bead for this task on the $RIG_ID rig: Add a new utility file src/utils/string-helpers.ts with 5 string utility functions (capitalize, truncate, slugify, camelToKebab, kebabToCamel). Each function should have JSDoc comments. Commit and push when done.\"}" +``` + +### A.2. Wait for Polecat to Create PR + +The polecat now creates the PR itself. Poll until the MR bead appears with a PR URL: + +```bash +for i in $(seq 1 60); do + STATUS=$(curl -s $BASE/debug/towns/$TOWN_ID/status) + echo "$(date +%H:%M:%S)" + echo "$STATUS" | python3 -c " +import sys, json +d = json.load(sys.stdin) +for b in d.get('beadSummary', []): + btype = b.get('type', '?') + if btype in ('issue', 'merge_request'): + print(f' {btype:16s} {b.get(\"status\",\"?\"):12s} {str(b.get(\"title\",\"\"))[:55]}') +for am in d.get('agentMeta', []): + if am.get('status') != 'idle': + hook = str(am.get('current_hook_bead_id', 'NULL') or 'NULL')[:12] + print(f' {am.get(\"role\",\"?\"):12s} status={am.get(\"status\",\"?\"):10s} hook={hook}') +for e in d.get('alarmStatus', {}).get('recentEvents', [])[:3]: + t = e.get('type', '') + if 'pr_' in t or 'created' in t or 'review' in t: + print(f' EVT: {t:20s} {e.get(\"message\",\"\")[:60]}') +" 2>/dev/null + MR_READY=$(echo "$STATUS" | python3 -c " +import sys, json +d = json.load(sys.stdin) +for b in d.get('beadSummary', []): + if b.get('type') == 'merge_request': + print('MR_EXISTS') + break +" 2>/dev/null) + if [ "$MR_READY" = "MR_EXISTS" ]; then echo "=== MR BEAD CREATED (polecat created PR) ==="; break; fi + sleep 15 +done +``` + +**Expected:** The polecat creates the PR and calls `gt_done(branch, pr_url)`. The MR bead appears as `open`. + +### A.3. Verify PR Exists on GitHub + +```bash +gh pr list --repo $REPO --state open --limit 5 --json number,title,headRefName,createdAt +``` + +Record the PR number: + +```bash +PR_NUMBER= +``` + +### A.4. Wait for Refinery Review + +The refinery is dispatched to review the existing PR. It runs quality gates, reviews the diff, and adds review comments. Watch for the refinery to complete: + +```bash +for i in $(seq 1 60); do + STATUS=$(curl -s $BASE/debug/towns/$TOWN_ID/status) + echo "$(date +%H:%M:%S)" + echo "$STATUS" | python3 -c " +import sys, json +d = json.load(sys.stdin) +for b in d.get('beadSummary', []): + if b.get('type') == 'merge_request': + print(f' MR: status={b.get(\"status\",\"?\"):12s} {str(b.get(\"title\",\"\"))[:50]}') +for am in d.get('agentMeta', []): + if am.get('role') == 'refinery' and am.get('status') != 'idle': + print(f' refinery: status={am.get(\"status\",\"?\"):10s}') +for e in d.get('alarmStatus', {}).get('recentEvents', [])[:3]: + t = e.get('type', '') + if 'pr_' in t or 'review' in t: + print(f' EVT: {t:20s} {e.get(\"message\",\"\")[:60]}') +" 2>/dev/null + # MR in_progress means refinery called gt_done with pr_url + IN_PROGRESS=$(echo "$STATUS" | python3 -c " +import sys, json +d = json.load(sys.stdin) +for b in d.get('beadSummary', []): + if b.get('type') == 'merge_request' and b.get('status') == 'in_progress': + print('yes') +" 2>/dev/null) + if [ "$IN_PROGRESS" = "yes" ]; then echo "=== REFINERY DONE — MR in_progress, poll_pr active ==="; break; fi + sleep 15 +done +``` + +### A.5. Check for Refinery Comments (Optional) + +If the refinery requested changes, an auto-resolve cycle will begin automatically. Check: + +```bash +gh api graphql -f query='query { + repository(owner: "'$(echo $REPO | cut -d/ -f1)'", name: "'$(echo $REPO | cut -d/ -f2)'") { + pullRequest(number: '$PR_NUMBER') { + reviewThreads(first: 100) { + nodes { isResolved, comments(first: 1) { nodes { body, author { login } } } } + } + } + } +}' +``` + +### A.6. Add a Human Review Comment + +To test the human feedback loop, add a review with inline comments: + +```bash +gh api repos/$REPO/pulls/$PR_NUMBER/reviews \ + --method POST \ + --input - <<'EOF' +{ + "event": "REQUEST_CHANGES", + "body": "The capitalize function needs input validation.", + "comments": [ + { + "path": "src/utils/string-helpers.ts", + "position": 5, + "body": "Please add input validation - handle empty strings and non-string inputs gracefully." + } + ] +} +EOF +``` + +**Note:** You must use inline comments (with `path` and `position`) to create review threads. The `checkPRFeedback` function detects **unresolved review threads** via GitHub GraphQL, not review state. + +### A.7. Observe Feedback Detection and Resolution + +```bash +for i in $(seq 1 60); do + STATUS=$(curl -s $BASE/debug/towns/$TOWN_ID/status) + echo "$(date +%H:%M:%S)" + echo "$STATUS" | python3 -c " +import sys, json +d = json.load(sys.stdin) +for b in d.get('beadSummary', []): + title = str(b.get('title', '')) + if b.get('type') in ('issue', 'merge_request'): + marker = ' <-- FEEDBACK' if ('Address' in title or 'PR #' in title) else '' + print(f' {b.get(\"type\",\"?\"):16s} {b.get(\"status\",\"?\"):12s} {title[:50]}{marker}') +for am in d.get('agentMeta', []): + if am.get('status') != 'idle': + print(f' {am.get(\"role\",\"?\"):12s} status={am.get(\"status\",\"?\"):10s}') +" 2>/dev/null + sleep 10 +done +``` + +### A.8. Wait for Auto-Merge + +After all review threads are resolved and CI passes, the auto-merge timer starts (configured delay, e.g. 2 minutes). Monitor until all beads close: + +```bash +echo "Waiting for auto-merge..." +MERGE_START=$(date +%s) +for i in $(seq 1 60); do + STATUS=$(curl -s $BASE/debug/towns/$TOWN_ID/status) + ELAPSED=$(( $(date +%s) - MERGE_START )) + echo "$(date +%H:%M:%S) [${ELAPSED}s]" + echo "$STATUS" | python3 -c " +import sys, json +d = json.load(sys.stdin) +beads = d.get('beadSummary', []) +relevant = [b for b in beads if b.get('type') in ('merge_request',) or 'string' in str(b.get('title','')).lower() or 'Address' in str(b.get('title',''))] +if not relevant: + print(' ALL DONE') +else: + for b in relevant: + print(f' {b.get(\"type\",\"?\"):16s} {b.get(\"status\",\"?\"):12s} {str(b.get(\"title\",\"\"))[:50]}') +" 2>/dev/null + ALL_DONE=$(echo "$STATUS" | python3 -c " +import sys, json +d = json.load(sys.stdin) +beads = d.get('beadSummary', []) +relevant = [b for b in beads if b.get('type') in ('merge_request',) or 'string' in str(b.get('title','')).lower() or 'Address' in str(b.get('title',''))] +if not relevant: print('DONE') +" 2>/dev/null) + if [ "$ALL_DONE" = "DONE" ]; then echo "=== AUTO-MERGE COMPLETE ==="; break; fi + sleep 10 +done +``` + +### A.9. Verify Merge + +```bash +gh pr view $PR_NUMBER --repo $REPO --json state,mergedAt +``` + +--- + +## Test B: 3-Bead Convoy Flow + +This tests the review-and-merge convoy mode where each bead gets its own PR, review, and auto-merge. + +### B.1. Send Convoy Work to the Mayor + +```bash +curl -s -m 120 -X POST $BASE/debug/towns/$TOWN_ID/send-message \ + -H "Content-Type: application/json" \ + -d "{\"message\": \"Create a convoy of 3 beads on the $RIG_ID rig with merge mode review-and-merge. The beads should be: (1) Add src/utils/array-helpers.ts with functions: unique, flatten, chunk, zip, groupBy. (2) Add src/utils/object-helpers.ts with functions: pick, omit, deepClone, merge, hasKey. (3) Add src/utils/math-helpers.ts with functions: clamp, lerp, roundTo, sum, average. Each file should have JSDoc comments and a simple test file alongside it. Use review-and-merge mode so each bead gets its own PR.\"}" +``` + +### B.2. Monitor All 3 Beads + +Poll the status showing all beads and their progress: + +```bash +for i in $(seq 1 120); do + STATUS=$(curl -s $BASE/debug/towns/$TOWN_ID/status) + echo "$(date +%H:%M:%S)" + echo "$STATUS" | python3 -c " +import sys, json +d = json.load(sys.stdin) +beads = d.get('beadSummary', []) +for b in beads: + btype = b.get('type', '?') + if btype in ('issue', 'merge_request', 'convoy'): + print(f' {btype:16s} {b.get(\"status\",\"?\"):12s} {str(b.get(\"title\",\"\"))[:55]}') +agents = d.get('agentMeta', []) +active = [a for a in agents if a.get('status') != 'idle'] +for am in active: + hook = str(am.get('current_hook_bead_id', 'NULL') or 'NULL')[:8] + print(f' {am.get(\"role\",\"?\"):12s} status={am.get(\"status\",\"?\"):10s} hook={hook}') +alarm = d.get('alarmStatus', {}) +recon = alarm.get('reconciler', {}) +actions = recon.get('actionsByType', {}) +if actions: + print(f' reconciler: {json.dumps(actions)}') +" 2>/dev/null + # Check if all relevant beads are closed + ALL_DONE=$(echo "$STATUS" | python3 -c " +import sys, json +d = json.load(sys.stdin) +beads = d.get('beadSummary', []) +relevant = [b for b in beads if b.get('type') in ('issue', 'merge_request', 'convoy') and ('helper' in str(b.get('title','')).lower() or 'Review' in str(b.get('title','')) or 'convoy' in b.get('type',''))] +if not relevant: print('DONE') +" 2>/dev/null) + if [ "$ALL_DONE" = "DONE" ]; then echo "=== ALL CONVOY BEADS COMPLETE ==="; break; fi + sleep 15 +done +``` + +### B.3. Verify All PRs Merged + +```bash +gh pr list --repo $REPO --state merged --limit 10 --json number,title,mergedAt | python3 -c " +import sys, json +prs = json.load(sys.stdin) +today = '$(date -u +%Y-%m-%d)' +for pr in prs: + if today in pr.get('mergedAt', ''): + print(f' PR #{pr[\"number\"]}: {pr[\"title\"]} (merged: {pr[\"mergedAt\"][:19]})') +" +``` + +--- + +## Expected Timeline + +### Single Bead + +| Step | Duration | +| ------------------------------------ | --------------------- | +| Mayor slings bead | ~30s | +| Polecat works + creates PR | 2-5 min | +| Refinery reviews PR, adds comments | 2-5 min | +| Feedback detected + polecat resolves | 2-5 min (if comments) | +| Auto-merge grace period | 2 min (configured) | +| **Total** | **8-17 min** | + +### 3-Bead Convoy (review-and-merge) + +| Step | Duration | +| ---------------------------------------- | ------------------------ | +| Mayor creates convoy + 3 beads | ~1 min | +| 3 polecats work in parallel + create PRs | 2-5 min | +| 3 refinery reviews (sequential per rig) | 5-15 min | +| Feedback resolution cycles | 2-5 min each (if needed) | +| Auto-merge per PR | 2 min grace each | +| **Total** | **15-30 min** | + +--- + +## Troubleshooting + +### Polecat Doesn't Create PR + +If the polecat pushes but doesn't create a PR: + +- Check the polecat's system prompt includes "Pull Request Creation" section +- Verify `merge_strategy` is `pr` in town settings +- Check wrangler logs for the polecat's agent output + +### Refinery Tries to Create a New PR + +If the refinery creates a duplicate PR instead of reviewing the existing one: + +- Check that `review_metadata.pr_url` is set on the MR bead (polecat should have passed it) +- The refinery prompt switches to "PR review mode" only when `existingPrUrl` is set + +### Feedback Not Detected + +`checkPRFeedback` checks for **unresolved review threads** via GitHub GraphQL, not review state. A `REQUEST_CHANGES` review without inline/line comments does NOT create review threads. Use reviews with `comments[].path` and `comments[].position` to create detectable threads. + +### Auto-Merge Stuck + +- `allChecksPass` requires either (a) 0 check-runs (no CI = pass) or (b) all check-runs completed successfully. If the repo has CI, all checks must pass. +- The GitHub token in town config must be valid. Check wrangler logs for `401` errors from `checkPRStatus` or `checkPRFeedback`. +- Check `auto_merge_delay_minutes` is set (not null) in town config. + +### Convoy Beads Not Dispatching + +- In `review-and-merge` mode, each bead is independent — no sequencing dependencies. +- In `review-then-land` mode, beads with `blocks` dependencies wait for their predecessors. Intermediate beads do NOT create PRs (the refinery merges directly to the feature branch). + +### Container Networking (Local Dev) + +The wrangler container runtime occasionally fails to route DO `container.fetch()` to the container's port 8080 — `send-message` returns `sessionStatus: "idle"` even though Docker shows the container as healthy. Workarounds: + +1. **Have a human start wrangler** via the terminal (not `nohup`). The TTY seems to help with container proxy setup. +2. **Kill all containers and restart wrangler cleanly** — stale proxy state can prevent new connections. +3. **Wait 30-60s after wrangler starts** before sending messages — the container needs time to fully initialize. +4. The `GET /health` endpoint returning 200 does NOT mean the DO-to-container path works. The DO's `container.fetch()` uses a different routing mechanism. + +--- + +## Debug Endpoints + +### Inspect a Bead + +Get full bead details including review_metadata and dependencies: + +```bash +curl -s $BASE/debug/towns/$TOWN_ID/beads/ | python3 -c " +import sys, json +d = json.load(sys.stdin) +bead = d.get('bead', {}) +print(f'Type: {bead.get(\"type\")} Status: {bead.get(\"status\")}') +print(f'Title: {bead.get(\"title\")}') +print(f'Parent: {bead.get(\"parent_bead_id\", \"NULL\")}') +rm = d.get('reviewMetadata') +if rm: + print(f'PR URL: {rm.get(\"pr_url\")}') + print(f'Branch: {rm.get(\"branch\")} -> {rm.get(\"target_branch\")}') + print(f'Auto-merge ready since: {rm.get(\"auto_merge_ready_since\", \"NULL\")}') + print(f'Last feedback check: {rm.get(\"last_feedback_check_at\", \"NULL\")}') +deps = d.get('dependencies', []) +if deps: + print(f'Dependencies ({len(deps)}):') + for dep in deps: + print(f' {dep[\"bead_id\"][:8]} -> {dep[\"depends_on_bead_id\"][:8]} ({dep[\"dependency_type\"]})') +" +``` + +### Verify Bead Chain (parent_bead_id linkage) + +After a rework or feedback cycle, verify the chain: + +```bash +# Get the MR bead +MR_ID= +curl -s $BASE/debug/towns/$TOWN_ID/beads/$MR_ID | python3 -c " +import sys, json +d = json.load(sys.stdin) +deps = d.get('dependencies', []) +print('MR bead dependencies:') +for dep in deps: + print(f' {dep[\"dependency_type\"]}: {dep[\"depends_on_bead_id\"][:12]}') +" +# Then check a rework/feedback bead's parent +REWORK_ID= +curl -s $BASE/debug/towns/$TOWN_ID/beads/$REWORK_ID | python3 -c " +import sys, json +bead = json.load(sys.stdin).get('bead', {}) +print(f'parent_bead_id: {bead.get(\"parent_bead_id\", \"NULL\")}') +# Should match the MR bead ID +" +``` + +--- + +## Test C: Code Review Toggle (refinery disabled) + +Tests the `refinery.code_review` setting — when disabled, MR beads skip the refinery and go directly to `poll_pr`. + +### C.1. Disable Code Review + +In the town settings UI, set **Refinery code review** to disabled (unchecked). + +### C.2. Send Work + +```bash +curl -s -m 120 -X POST $BASE/debug/towns/$TOWN_ID/send-message \ + -H "Content-Type: application/json" \ + -d "{\"message\": \"Create a bead for this task on the $RIG_ID rig: Add src/utils/type-guards.ts with functions: isString, isNumber, isArray, isObject, isNonNullable. Each with JSDoc. Commit and push.\"}" +``` + +### C.3. Verify MR Bead Skips Refinery + +Watch for the MR bead to go directly from `open` to `in_progress` without a refinery being dispatched: + +```bash +for i in $(seq 1 60); do + STATUS=$(curl -s $BASE/debug/towns/$TOWN_ID/status) + echo "$(date +%H:%M:%S)" + echo "$STATUS" | python3 -c " +import sys, json +d = json.load(sys.stdin) +for b in d.get('beadSummary', []): + if b.get('type') == 'merge_request': + print(f' MR: {b.get(\"status\",\"?\"):12s} {str(b.get(\"title\",\"\"))[:50]}') +for am in d.get('agentMeta', []): + if am.get('role') == 'refinery' and am.get('status') != 'idle': + print(f' WARNING: refinery is {am.get(\"status\")} — should be idle!') +" 2>/dev/null + sleep 15 +done +``` + +**Expected:** The MR bead transitions from `open` → `in_progress` by the reconciler (not the refinery). No refinery agents should become `working`. The `poll_pr` action should start immediately. + +### C.4. Re-enable Code Review + +After testing, re-enable **Refinery code review** in town settings. diff --git a/cloudflare-gastown/docs/local-debug-testing.md b/cloudflare-gastown/docs/local-debug-testing.md new file mode 100644 index 000000000..7d795592f --- /dev/null +++ b/cloudflare-gastown/docs/local-debug-testing.md @@ -0,0 +1,311 @@ +# Local Debug Testing Guide + +Guide for an AI agent to test gastown features locally — dispatching work, monitoring agents, triggering container eviction, and verifying the drain flow. + +## Prerequisites + +- Wrangler dev server running for gastown (`pnpm dev` in `cloudflare-gastown/`) +- Read the dev server port from the wrangler config (default: 8803) +- A town ID (ask the user or check the UI) +- Docker running (containers are managed by wrangler's container runtime) + +## Quick Reference + +```bash +BASE=http://localhost:8803 +TOWN_ID="" +``` + +## 1. Debug Endpoints + +All `/debug/` endpoints are unauthenticated in local dev. + +### Town Status + +The primary status endpoint — shows agents, beads, alarm, patrol, and drain state: + +```bash +curl -s $BASE/debug/towns/$TOWN_ID/status | python3 -c " +import sys, json +d = json.load(sys.stdin) +a = d.get('alarmStatus', {}) +print('Agents:', json.dumps(a.get('agents', {}))) +print('Draining:', a.get('draining', False)) +print() +for am in d.get('agentMeta', []): + print(f\" {am.get('role', '?'):12s} bead={am.get('bead_id', '?')[:8]} status={am.get('status', '?'):10s} hook={str(am.get('current_hook_bead_id', 'NULL'))[:8]}\") +print() +for b in d.get('beadSummary', []): + if b.get('type') == 'issue' and b.get('status') in ('open', 'in_progress'): + print(f\" bead={b.get('bead_id', '?')[:8]} status={b.get('status', '?'):15s} {b.get('title', '')[:60]}\") +" +``` + +### Drain Status + +```bash +curl -s $BASE/debug/towns/$TOWN_ID/drain-status | python3 -m json.tool +``` + +### Pending Nudges + +```bash +curl -s $BASE/debug/towns/$TOWN_ID/nudges | python3 -m json.tool +``` + +### Send Message to Mayor (dev only) + +Creates beads by telling the mayor what to do: + +```bash +curl -s -m 120 -X POST $BASE/debug/towns/$TOWN_ID/send-message \ + -H "Content-Type: application/json" \ + -d '{"message": "Create a bead for this task: Write src/example.ts with 10 utility functions. Commit and push when done."}' +``` + +### Trigger Graceful Stop (dev only) + +Sends SIGTERM to the container, initiating the drain flow: + +```bash +curl -s -X POST $BASE/debug/towns/$TOWN_ID/graceful-stop +``` + +## 2. Container Monitoring + +### Find the Active Container + +```bash +CONTAINER_ID=$(docker ps --format "{{.ID}}\t{{.Image}}" | grep towncontainerdo | head -1 | awk '{print $1}') +echo "Container: $CONTAINER_ID" +``` + +### Container Health + +```bash +docker exec $CONTAINER_ID curl -s http://localhost:8080/health | python3 -m json.tool +``` + +### Live Container Logs + +Stream to a file for analysis: + +```bash +docker logs -f $CONTAINER_ID > /tmp/container-logs.log 2>&1 & +``` + +### Filter Drain Logs + +```bash +grep -E "\[drain\]|handleIdleEvent|idle timeout|agent\.(exit|start)|Drain complete" /tmp/container-logs.log +``` + +### Check Agent Events + +```bash +docker logs $CONTAINER_ID 2>&1 | grep "Event #" | tail -10 +``` + +## 3. Testing Graceful Container Eviction + +### Full Test Loop + +The recommended sequence for verifying the drain flow end-to-end: + +#### Step 1: Verify clean state + +```bash +# Not draining, agents are available +curl -s $BASE/debug/towns/$TOWN_ID/drain-status # draining: false +curl -s $BASE/debug/towns/$TOWN_ID/status # check agent statuses +``` + +#### Step 2: Send work to the mayor + +```bash +curl -s -m 120 -X POST $BASE/debug/towns/$TOWN_ID/send-message \ + -H "Content-Type: application/json" \ + -d '{"message": "Create a bead for this task: ."}' +``` + +#### Step 3: Wait for a polecat to be dispatched and working + +Poll until a polecat shows `status=working`: + +```bash +for i in $(seq 1 40); do + WORKING=$(curl -s $BASE/debug/towns/$TOWN_ID/status | python3 -c " +import sys, json +d = json.load(sys.stdin) +for am in d.get('agentMeta', []): + if am.get('role') == 'polecat' and am.get('status') == 'working': + print(f\"polecat:{am.get('bead_id', '?')[:8]}\") +" 2>/dev/null) + echo "$(date +%H:%M:%S) $WORKING" + if [ -n "$WORKING" ]; then echo "READY"; break; fi + sleep 15 +done +``` + +#### Step 4: Start monitoring container logs + +```bash +CONTAINER_ID=$(docker ps --format "{{.ID}}\t{{.Image}}" | grep towncontainerdo | head -1 | awk '{print $1}') +docker logs -f $CONTAINER_ID > /tmp/drain-test.log 2>&1 & +``` + +#### Step 5: Trigger graceful stop + +```bash +curl -s -X POST $BASE/debug/towns/$TOWN_ID/graceful-stop +``` + +#### Step 6: Monitor the drain + +```bash +# Watch for container exit +for i in $(seq 1 60); do + sleep 10 + RUNNING=$(docker ps -q --filter "id=$CONTAINER_ID") + if [ -z "$RUNNING" ]; then + echo "$(date +%H:%M:%S) CONTAINER EXITED" + break + fi + DRAIN_LINE=$(grep "\[drain\]" /tmp/drain-test.log | tail -1) + echo "$(date +%H:%M:%S) ${DRAIN_LINE:0:100}" +done +``` + +#### Step 7: Verify the drain log + +```bash +grep -E "\[drain\]|handleIdleEvent.*(idle timeout fired)|agent\.(exit|start)|Drain complete" /tmp/drain-test.log +``` + +**Expected drain flow:** + +1. `Phase 1: TownDO responded 200` — TownDO notified, dispatch blocked +2. `Phase 2: waiting up to 300s` — waiting for non-mayor agents +3. `Waiting for N non-mayor agents` — polling until agents finish or idle +4. `All N non-mayor agents are idle` — agents finished, timers pending +5. `idle timeout fired` / `agent.exit` — agents exit via normal path +6. `Phase 3: freezing N straggler(s)` — only the mayor (or truly stuck agents) +7. `Phase 3: force-saving agent ...` — WIP git commit for stragglers +8. `Drain complete` — container exits + +#### Step 8: Verify post-drain state + +```bash +# Drain flag should clear within ~30s (heartbeat instance ID detection) +curl -s $BASE/debug/towns/$TOWN_ID/drain-status + +# Check bead states — evicted beads should be 'open' with eviction context +curl -s $BASE/debug/towns/$TOWN_ID/status | python3 -c " +import sys, json +d = json.load(sys.stdin) +for am in d.get('agentMeta', []): + print(f\" {am.get('role', '?'):12s} status={am.get('status', '?'):10s} hook={str(am.get('current_hook_bead_id', 'NULL'))[:8]}\") +" +``` + +## 4. Wrangler Management + +### Restart Wrangler + +When you need to pick up code changes: + +```bash +# Kill existing +ps aux | grep -i wrangler | grep gastown | grep -v grep | awk '{print $2}' | xargs kill -9 +ps aux | grep workerd | grep -v grep | awk '{print $2}' | xargs kill -9 +sleep 3 + +# Start fresh +cd cloudflare-gastown && nohup pnpm dev > /tmp/gastown-wrangler.log 2>&1 & +sleep 25 +curl -s http://localhost:8803/health +``` + +### Check Wrangler Logs + +```bash +# Dispatch errors +grep -i "error\|failed\|dispatch" /tmp/gastown-wrangler.log | tail -20 + +# Container lifecycle +grep "container\|startAgent\|eviction" /tmp/gastown-wrangler.log | tail -20 +``` + +## 5. Drain Architecture + +### 3-Phase Drain (SIGTERM handler) + +1. **Phase 1: Notify TownDO** — POST `/api/towns/:id/container-eviction`. Sets `_draining = true` on the TownDO, blocking new agent dispatch. Records the drain nonce and start time. + +2. **Phase 2: Wait for agents** — Poll the container's local `agents` Map every 5s for up to 5 minutes. Excludes mayors (they never exit on their own). When all non-mayor agents have idle timers pending (meaning they called `gt_done` and went idle), polls at 1s for fast exit. Agents exit through the normal `exitAgent` → `reportAgentCompleted` path. + +3. **Phase 3: Force-save stragglers** — Any agents still running after 5 minutes are frozen (SDK session aborted), WIP git committed and pushed, eviction context written on the bead body, and `reportAgentCompleted(agent, 'completed', 'container eviction')` called. The TownDO resets the bead to `open` and clears the assignee so the reconciler re-dispatches on the next tick. + +### Drain Flag Clearing + +The TownDO's `_draining` flag is cleared by whichever happens first: + +- **Heartbeat instance ID change** (~30s): each container has a UUID. When a new container's heartbeat arrives with a different ID, the drain clears. +- **Nonce handshake**: the new container calls `/container-ready` with the drain nonce. +- **Hard timeout** (7 min): safety net if no heartbeat or handshake arrives. + +### Key Behaviors During Drain + +- `isDraining()` returns true → `/agents/start` returns 503 +- `handleIdleEvent` skips `fetchPendingNudges` (avoids hanging outbound HTTP) +- Idle timeout is 10s (vs normal 120s/600s) so agents exit promptly +- `reconcileAgents` skips stale-heartbeat checks (heartbeat reporter is stopped) +- `reconcileGUPP` skips entirely (no false "idle for 15 minutes" nudges) +- Evicted `in_progress` beads are reset to `open` with assignee cleared + +## 6. Common Issues + +### Container Keeps Cycling + +Containers start and immediately get killed (exit code 137). This is usually the wrangler container runtime recreating them. Kill all containers and restart wrangler cleanly: + +```bash +docker kill $(docker ps -q) 2>/dev/null +# Then restart wrangler (see section 4) +``` + +### Drain Stuck "Waiting for N agents" + +Agents show as `running` but aren't doing work. Common causes: + +- **`fetchPendingNudges` hanging**: should be skipped during drain (check `_draining` flag) +- **`server.heartbeat` clearing idle timer**: these events should be in `IDLE_TIMER_IGNORE_EVENTS` +- **Agent in `starting` status**: `session.prompt()` blocking. Status is set to `running` before the prompt now. + +### Drain Flag Persists After Restart + +The drain banner stays visible after the container restarted. Causes: + +- **No heartbeats arriving**: container failed to start, no agents registered +- **`_containerInstanceId` not persisted**: should be in `ctx.storage` +- Fallback: wait for the 7-minute hard timeout + +### Git Credential Errors + +Container starts but agents fail at `git clone`: + +``` +Error checking if container is ready: Invalid username +``` + +This means the git token is stale/expired. Refresh credentials in the town settings. + +### Accumulating Escalation Beads + +Triage/escalation beads pile up with `rig_id=NULL`. These are by design: + +- `type=escalation` beads surface for human attention (merge conflicts, rework) +- `type=issue` triage beads are handled by `maybeDispatchTriageAgent` +- GUPP force-stop beads are created by the patrol system for stuck agents + +During testing, container restarts generate many of these. Bulk-close via admin panel if needed. diff --git a/cloudflare-gastown/scripts/test-drain.sh b/cloudflare-gastown/scripts/test-drain.sh new file mode 100755 index 000000000..b013cc54e --- /dev/null +++ b/cloudflare-gastown/scripts/test-drain.sh @@ -0,0 +1,146 @@ +#!/usr/bin/env bash +# +# test-drain.sh — End-to-end graceful container eviction test. +# +# Usage: +# ./scripts/test-drain.sh [port] +# +# Sends a task to the mayor, waits for a polecat to start working, +# triggers a graceful stop, and monitors the drain to completion. +# Prints a PASS/FAIL summary at the end. +# +# Requires: docker, curl, python3 + +set -euo pipefail + +TOWN_ID="${1:?Usage: $0 [port]}" +PORT="${2:-8803}" +BASE="http://localhost:$PORT" +LOG_FILE="/tmp/drain-test-$(date +%s).log" + +info() { echo "[test-drain] $*"; } +fail() { echo "[test-drain] FAIL: $*" >&2; exit 1; } + +# ── Preflight ──────────────────────────────────────────────────────────── +info "Town: $TOWN_ID Port: $PORT Log: $LOG_FILE" + +curl -sf "$BASE/health" > /dev/null || fail "Wrangler not running on port $PORT" + +DRAIN=$(curl -sf "$BASE/debug/towns/$TOWN_ID/drain-status" | python3 -c "import sys,json; print(json.load(sys.stdin).get('draining', True))") +[ "$DRAIN" = "False" ] || fail "Town is still draining from a previous run. Wait for it to clear." + +info "Preflight OK" + +# ── Step 1: Send task ──────────────────────────────────────────────────── +info "Sending task to mayor..." +curl -sf -m 120 -X POST "$BASE/debug/towns/$TOWN_ID/send-message" \ + -H "Content-Type: application/json" \ + -d '{"message": "Create a bead for this task: Write src/drainTestUtils.ts with 8 utility functions including capitalize, slugify, truncate, camelCase, kebabCase, reverse, countWords, and isPalindrome. Each function needs JSDoc comments. Commit and push when done."}' > /dev/null + +info "Task sent" + +# ── Step 2: Wait for working polecat ───────────────────────────────────── +info "Waiting for a polecat to start working..." +POLECAT_READY=false +for i in $(seq 1 40); do + WORKING=$(curl -sf "$BASE/debug/towns/$TOWN_ID/status" | python3 -c " +import sys, json +d = json.load(sys.stdin) +for am in d.get('agentMeta', []): + if am.get('role') == 'polecat' and am.get('status') == 'working': + print(am.get('bead_id', '?')[:8]) +" 2>/dev/null || true) + if [ -n "$WORKING" ]; then + info "Polecat working: $WORKING" + POLECAT_READY=true + break + fi + sleep 15 +done +$POLECAT_READY || fail "No polecat started working within 10 minutes" + +# ── Step 3: Find container and start log capture ───────────────────────── +CONTAINER_ID=$(docker ps --format "{{.ID}}\t{{.Image}}" | grep towncontainerdo | head -1 | awk '{print $1}') +[ -n "$CONTAINER_ID" ] || fail "No town container found" +info "Container: $CONTAINER_ID" + +docker logs -f "$CONTAINER_ID" > "$LOG_FILE" 2>&1 & +LOG_PID=$! +trap "kill $LOG_PID 2>/dev/null" EXIT + +# Wait 30s for the polecat to make progress +info "Waiting 30s for polecat progress..." +sleep 30 + +# ── Step 4: Trigger graceful stop ──────────────────────────────────────── +info "=== TRIGGERING GRACEFUL STOP ===" +curl -sf -X POST "$BASE/debug/towns/$TOWN_ID/graceful-stop" > /dev/null +DRAIN_START=$(date +%s) + +# ── Step 5: Monitor drain ──────────────────────────────────────────────── +info "Monitoring drain..." +CONTAINER_EXITED=false +for i in $(seq 1 60); do + sleep 10 + RUNNING=$(docker ps -q --filter "id=$CONTAINER_ID" 2>/dev/null) + if [ -z "$RUNNING" ]; then + DRAIN_END=$(date +%s) + DRAIN_SECS=$((DRAIN_END - DRAIN_START)) + info "Container exited after ${DRAIN_SECS}s" + CONTAINER_EXITED=true + break + fi + DRAIN_LINE=$(grep "\[drain\]" "$LOG_FILE" 2>/dev/null | tail -1) + echo " $(date +%H:%M:%S) ${DRAIN_LINE:0:100}" +done + +# ── Step 6: Verify ─────────────────────────────────────────────────────── +echo "" +info "=== DRAIN LOG ===" +grep -E "\[drain\]|handleIdleEvent.*(idle timeout fired)|agent\.(exit|start)|Drain complete" "$LOG_FILE" 2>/dev/null || true + +echo "" + +if ! $CONTAINER_EXITED; then + fail "Container did not exit within 10 minutes" +fi + +# Check for drain completion +if grep -q "Drain complete" "$LOG_FILE" 2>/dev/null; then + info "Drain completed successfully" +else + fail "Drain did not complete (no 'Drain complete' in logs)" +fi + +# Check Phase 1 succeeded +if grep -q "Phase 1: TownDO responded 200" "$LOG_FILE" 2>/dev/null; then + info "Phase 1: TownDO notified OK" +else + info "WARN: Phase 1 TownDO notification may have failed" +fi + +# Check for force-save (Phase 3 stragglers) +STRAGGLERS=$(grep -c "Phase 3: force-saving" "$LOG_FILE" 2>/dev/null || echo "0") +MAYOR_ONLY=$(grep -c "Phase 3: froze agent.*mayor\|Phase 3: force-saving.*mayor" "$LOG_FILE" 2>/dev/null || echo "0") +NON_MAYOR_STRAGGLERS=$((STRAGGLERS - MAYOR_ONLY)) +if [ "$NON_MAYOR_STRAGGLERS" -gt 0 ]; then + info "WARN: $NON_MAYOR_STRAGGLERS non-mayor agent(s) were force-saved (did not exit cleanly)" +else + info "All non-mayor agents exited cleanly (no force-save needed)" +fi + +# Wait for drain flag to clear +info "Waiting for drain flag to clear..." +sleep 15 +STILL_DRAINING=$(curl -sf "$BASE/debug/towns/$TOWN_ID/drain-status" 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('draining', True))" 2>/dev/null || echo "unknown") +if [ "$STILL_DRAINING" = "False" ]; then + info "Drain flag cleared" +elif [ "$STILL_DRAINING" = "unknown" ]; then + info "WARN: Could not check drain status (wrangler may have restarted)" +else + info "WARN: Drain flag still set (will clear on next heartbeat from new container)" +fi + +echo "" +info "=== RESULT: PASS ===" +info "Drain completed in ${DRAIN_SECS:-?}s, container exited, log at $LOG_FILE" diff --git a/cloudflare-gastown/src/db/tables/review-metadata.table.ts b/cloudflare-gastown/src/db/tables/review-metadata.table.ts index 8dab287a2..80846c41f 100644 --- a/cloudflare-gastown/src/db/tables/review-metadata.table.ts +++ b/cloudflare-gastown/src/db/tables/review-metadata.table.ts @@ -8,6 +8,11 @@ export const ReviewMetadataRecord = z.object({ merge_commit: z.string().nullable(), pr_url: z.string().nullable(), retry_count: z.number(), + /** Timestamp when all CI checks passed and all review threads were resolved. + * Used by the auto-merge timer to track grace period start. */ + auto_merge_ready_since: z.string().nullable(), + /** Timestamp of the last feedback detection check to prevent duplicate dispatches. */ + last_feedback_check_at: z.string().nullable(), }); export type ReviewMetadataRecord = z.output; @@ -22,5 +27,15 @@ export function createTableReviewMetadata(): string { merge_commit: `text`, pr_url: `text`, retry_count: `integer default 0`, + auto_merge_ready_since: `text`, + last_feedback_check_at: `text`, }); } + +/** Idempotent ALTER statements for existing databases. */ +export function migrateReviewMetadata(): string[] { + return [ + `ALTER TABLE review_metadata ADD COLUMN auto_merge_ready_since text`, + `ALTER TABLE review_metadata ADD COLUMN last_feedback_check_at text`, + ]; +} diff --git a/cloudflare-gastown/src/db/tables/town-events.table.ts b/cloudflare-gastown/src/db/tables/town-events.table.ts index 30be09c65..436aaceca 100644 --- a/cloudflare-gastown/src/db/tables/town-events.table.ts +++ b/cloudflare-gastown/src/db/tables/town-events.table.ts @@ -11,6 +11,8 @@ export const TownEventType = z.enum([ 'bead_cancelled', 'convoy_started', 'nudge_timeout', + 'pr_feedback_detected', + 'pr_auto_merge', ]); export type TownEventType = z.output; diff --git a/cloudflare-gastown/src/dos/Town.do.ts b/cloudflare-gastown/src/dos/Town.do.ts index d1172b5e2..382749b84 100644 --- a/cloudflare-gastown/src/dos/Town.do.ts +++ b/cloudflare-gastown/src/dos/Town.do.ts @@ -31,7 +31,8 @@ import * as scheduling from './town/scheduling'; import * as events from './town/events'; import * as reconciler from './town/reconciler'; import { applyAction } from './town/actions'; -import type { Action, ApplyActionContext } from './town/actions'; +import type { Action, ApplyActionContext, PRFeedbackCheckResult } from './town/actions'; +import { buildPolecatSystemPrompt } from '../prompts/polecat-system.prompt'; import { buildRefinerySystemPrompt } from '../prompts/refinery-system.prompt'; import { GitHubPRStatusSchema, GitLabMRStatusSchema } from '../util/platform-pr.util'; @@ -278,11 +279,17 @@ export class TownDO extends DurableObject { const bead = beadOps.getBead(this.sql, beadId); if (!agent || !bead) return false; - // Build refinery-specific system prompt with branch/target info let systemPromptOverride: string | undefined; + const townConfig = await this.getTownConfig(); + + // Build refinery-specific system prompt with branch/target info. + // When the MR bead already has a pr_url (polecat created the PR), + // the refinery reviews the existing PR and adds GitHub comments + // instead of creating a new PR. if (agent.role === 'refinery' && bead.type === 'merge_request') { const reviewMeta = reviewQueue.getReviewMetadata(this.sql, beadId); - const townConfig = await this.getTownConfig(); + const existingPrUrl = + typeof reviewMeta?.pr_url === 'string' ? reviewMeta.pr_url : undefined; systemPromptOverride = buildRefinerySystemPrompt({ identity: agent.identity, rigId, @@ -295,9 +302,33 @@ export class TownDO extends DurableObject { ? bead.metadata.source_agent_id : 'unknown', mergeStrategy: townConfig.merge_strategy ?? 'direct', + existingPrUrl, }); } + // When merge_strategy is 'pr', polecats create the PR themselves. + // Exception: review-then-land convoy intermediate beads merge directly + // into the convoy feature branch (the refinery handles that). + if (agent.role === 'polecat' && townConfig.merge_strategy === 'pr') { + const convoyId = beadOps.getConvoyForBead(this.sql, beadId); + const convoyMergeMode = convoyId ? beadOps.getConvoyMergeMode(this.sql, convoyId) : null; + const isReviewThenLandIntermediate = + convoyMergeMode === 'review-then-land' && convoyId !== beadId; + + if (!isReviewThenLandIntermediate) { + const rig = rigs.getRig(this.sql, rigId); + systemPromptOverride = buildPolecatSystemPrompt({ + agentName: agent.name, + rigId, + townId: this.townId, + identity: agent.identity, + gates: townConfig.refinery?.gates ?? [], + mergeStrategy: 'pr', + targetBranch: rig?.default_branch ?? 'main', + }); + } + } + return scheduling.dispatchAgent(schedulingCtx, agent, bead, { systemPromptOverride, }); @@ -309,6 +340,17 @@ export class TownDO extends DurableObject { const townConfig = await this.getTownConfig(); return this.checkPRStatus(prUrl, townConfig); }, + checkPRFeedback: async prUrl => { + const townConfig = await this.getTownConfig(); + return this.checkPRFeedback(prUrl, townConfig); + }, + mergePR: async prUrl => { + const townConfig = await this.getTownConfig(); + return this.mergePR(prUrl, townConfig); + }, + getTownConfig: async () => { + return this.getTownConfig(); + }, queueNudge: async (agentId, message, _tier) => { await this.queueNudge(agentId, message, { mode: 'immediate', @@ -1709,6 +1751,7 @@ export class TownDO extends DurableObject { body: input.feedback, priority: sourceBead?.priority ?? 'medium', rig_id: mrBead.rig_id ?? undefined, + parent_bead_id: mrBead.bead_id, labels: ['gt:rework'], metadata: { rework_for: sourceBeadId, @@ -2074,6 +2117,7 @@ export class TownDO extends DurableObject { body?: string; priority?: string; metadata?: Record; + labels?: string[]; }): Promise<{ bead: Bead; agent: Agent }> { const createdBead = beadOps.createBead(this.sql, { type: 'issue', @@ -2082,6 +2126,7 @@ export class TownDO extends DurableObject { priority: BeadPriority.catch('medium').parse(input.priority ?? 'medium'), rig_id: input.rigId, metadata: input.metadata, + labels: input.labels, }); events.insertEvent(this.sql, 'bead_created', { @@ -2380,6 +2425,11 @@ export class TownDO extends DurableObject { // before restarting the SDK server (tokens, git identity, etc.). const containerConfig = await config.buildContainerConfig(this.ctx.storage, this.env); + // Resolve townConfig to thread the organization_id into the request body + // (belt-and-suspenders: ensures org billing survives even if X-Town-Config + // header parsing fails on the container side). + const townConfig = await config.getTownConfig(this.ctx.storage); + const updated = await dispatch.updateAgentModelInContainer( this.env, townId, @@ -2387,7 +2437,8 @@ export class TownDO extends DurableObject { model, smallModel, conversationHistory || undefined, - containerConfig + containerConfig, + townConfig.organization_id ); if (updated) { console.log( @@ -3317,12 +3368,11 @@ export class TownDO extends DurableObject { }); } - // Refresh the container-scoped JWT before any work that might - // trigger API calls. Throttled to once per hour (tokens have 8h - // expiry, so hourly refresh provides ample safety margin). - // Gated on hasRigs (not hasActiveWork) because the container may - // still be running with an idle mayor accepting user messages, - // even when there are no active beads or agents. + // Refresh the container-scoped JWT. Throttled to once per hour (tokens + // have 8h expiry). Skips when no active work AND no alive mayor — the + // container is sleeping and the token will be refreshed at dispatch time. + // Keeps refreshing for waiting mayors since sendMayorMessage reuses the + // container without calling ensureContainerToken. try { await this.refreshContainerToken(); } catch (err) { @@ -3459,7 +3509,11 @@ export class TownDO extends DurableObject { // Phase 1: Reconcile — compute desired state vs actual state const sideEffects: Array<() => Promise> = []; try { - const actions = reconciler.reconcile(this.sql, { draining: this._draining }); + const townConfig = await this.getTownConfig(); + const actions = reconciler.reconcile(this.sql, { + draining: this._draining, + refineryCodeReview: townConfig.refinery?.code_review ?? true, + }); metrics.actionsEmitted = actions.length; for (const a of actions) { metrics.actionsByType[a.type] = (metrics.actionsByType[a.type] ?? 0) + 1; @@ -3647,6 +3701,19 @@ export class TownDO extends DurableObject { * requests that reset the container's sleepAfter timer (#1409). */ private async refreshContainerToken(): Promise { + // Skip if no active work AND no alive mayor — the container is sleeping + // and doesn't need a fresh token. The token will be refreshed when work + // is next dispatched (ensureContainerToken is called in + // startAgentInContainer at container-dispatch.ts:329). + // However, a waiting mayor IS alive in the container and needs a valid + // token for GT tool calls when the user sends the next message + // (sendMayorMessage → sendMessageToAgent does NOT call ensureContainerToken). + const mayor = agents.listAgents(this.sql, { role: 'mayor' })[0] ?? null; + const mayorAlive = + mayor && + (mayor.status === 'working' || mayor.status === 'stalled' || mayor.status === 'waiting'); + if (!this.hasActiveWork() && !mayorAlive) return; + const TOKEN_REFRESH_INTERVAL_MS = 60 * 60_000; // 1 hour const now = Date.now(); const lastRefresh = (await this.ctx.storage.get('container:lastTokenRefreshAt')) ?? 0; @@ -3934,6 +4001,28 @@ export class TownDO extends DurableObject { await Promise.allSettled(deliveries); } + /** + * Resolve a GitHub API token from the town config. + * Fallback chain: github_token → github_cli_pat → platform integration (GitHub App). + */ + private async resolveGitHubToken(townConfig: TownConfig): Promise { + let token = townConfig.git_auth?.github_token ?? townConfig.github_cli_pat; + if (!token) { + const integrationId = townConfig.git_auth?.platform_integration_id; + if (integrationId && this.env.GIT_TOKEN_SERVICE) { + try { + token = await this.env.GIT_TOKEN_SERVICE.getToken(integrationId); + } catch (err) { + console.warn( + `${TOWN_LOG} resolveGitHubToken: platform integration token lookup failed for ${integrationId}`, + err + ); + } + } + } + return token ?? null; + } + /** * Check the status of a PR/MR via its URL. * Returns 'open', 'merged', or 'closed' (null if cannot determine). @@ -3946,9 +4035,9 @@ export class TownDO extends DurableObject { const ghMatch = prUrl.match(/^https:\/\/github\.com\/([^/]+)\/([^/]+)\/pull\/(\d+)/); if (ghMatch) { const [, owner, repo, numberStr] = ghMatch; - const token = townConfig.git_auth.github_token; + const token = await this.resolveGitHubToken(townConfig); if (!token) { - console.warn(`${TOWN_LOG} checkPRStatus: no github_token configured, cannot poll ${prUrl}`); + console.warn(`${TOWN_LOG} checkPRStatus: no GitHub token available, cannot poll ${prUrl}`); return null; } @@ -4030,6 +4119,230 @@ export class TownDO extends DurableObject { return null; } + /** + * Check a PR for unresolved review comments and failing CI checks. + * Used by the auto-resolve PR feedback feature. + */ + private async checkPRFeedback( + prUrl: string, + townConfig: TownConfig + ): Promise { + const ghMatch = prUrl.match(/^https:\/\/github\.com\/([^/]+)\/([^/]+)\/pull\/(\d+)/); + if (!ghMatch) { + // GitLab feedback detection not yet supported + return null; + } + + const [, owner, repo, numberStr] = ghMatch; + const token = await this.resolveGitHubToken(townConfig); + if (!token) return null; + + const headers = { + Authorization: `token ${token}`, + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'Gastown-Refinery/1.0', + }; + + // Check for unresolved review threads via GraphQL. + // Fetches the first 100 threads; if there are more (hasNextPage), + // conservatively treat the PR as having unresolved comments to avoid + // auto-merging with un-checked reviewer feedback. + let hasUnresolvedComments = false; + try { + const graphqlRes = await fetch('https://api.github.com/graphql', { + method: 'POST', + headers: { ...headers, 'Content-Type': 'application/json' }, + body: JSON.stringify({ + query: `query($owner: String!, $repo: String!, $number: Int!) { + repository(owner: $owner, name: $repo) { + pullRequest(number: $number) { + reviewThreads(first: 100) { + pageInfo { hasNextPage } + nodes { isResolved } + } + } + } + }`, + variables: { owner, repo, number: parseInt(numberStr, 10) }, + }), + }); + if (graphqlRes.ok) { + const gqlRaw: unknown = await graphqlRes.json(); + const gql = z + .object({ + data: z + .object({ + repository: z + .object({ + pullRequest: z + .object({ + reviewThreads: z + .object({ + pageInfo: z.object({ hasNextPage: z.boolean() }).optional(), + nodes: z.array(z.object({ isResolved: z.boolean() })), + }) + .optional(), + }) + .optional(), + }) + .optional(), + }) + .optional(), + }) + .safeParse(gqlRaw); + const reviewThreads = gql.success + ? gql.data.data?.repository?.pullRequest?.reviewThreads + : undefined; + const threads = reviewThreads?.nodes ?? []; + const hasMorePages = reviewThreads?.pageInfo?.hasNextPage === true; + hasUnresolvedComments = threads.some(t => !t.isResolved) || hasMorePages; + } + } catch (err) { + console.warn(`${TOWN_LOG} checkPRFeedback: GraphQL failed for ${prUrl}`, err); + } + + // Check CI status via check-runs API. + // Uses per_page=100 (GitHub max) and compares total_count to detect + // unpaginated runs — if there are more runs than returned, conservatively + // marks allChecksPass as false to prevent premature auto-merge. + let hasFailingChecks = false; + let allChecksPass = false; + let hasUncheckedRuns = false; + try { + // Get the PR's head SHA + const prRes = await fetch( + `https://api.github.com/repos/${owner}/${repo}/pulls/${numberStr}`, + { headers } + ); + if (prRes.ok) { + const prRaw: unknown = await prRes.json(); + const prData = z + .object({ head: z.object({ sha: z.string() }).optional() }) + .safeParse(prRaw); + const sha = prData.success ? prData.data.head?.sha : undefined; + if (sha) { + const checksRes = await fetch( + `https://api.github.com/repos/${owner}/${repo}/commits/${sha}/check-runs?per_page=100`, + { headers } + ); + if (checksRes.ok) { + const checksRaw: unknown = await checksRes.json(); + const checksData = z + .object({ + total_count: z.number().optional(), + check_runs: z + .array( + z.object({ + status: z.string(), + conclusion: z.string().nullable(), + }) + ) + .optional(), + }) + .safeParse(checksRaw); + const runs = checksData.success ? (checksData.data.check_runs ?? []) : []; + const totalCount = checksData.success + ? (checksData.data.total_count ?? runs.length) + : runs.length; + const hasMorePages = totalCount > runs.length; + hasUncheckedRuns = hasMorePages; + + hasFailingChecks = runs.some( + r => + r.status === 'completed' && r.conclusion !== 'success' && r.conclusion !== 'skipped' + ); + // All checks pass when: + // - No check-runs exist (repo has no CI — nothing to fail), OR + // - All returned runs completed successfully and no unpaginated runs exist + allChecksPass = + runs.length === 0 || + (!hasMorePages && + runs.every( + r => + r.status === 'completed' && + (r.conclusion === 'success' || r.conclusion === 'skipped') + )); + } + + // Also check combined commit statuses (legacy status API). + // Some repos gate merges with commit statuses that don't appear + // in check-runs. If any status is pending/failure/error, block. + if (allChecksPass) { + const statusRes = await fetch( + `https://api.github.com/repos/${owner}/${repo}/commits/${sha}/status`, + { headers } + ); + if (statusRes.ok) { + const statusRaw: unknown = await statusRes.json(); + const statusData = z + .object({ + state: z.string(), + total_count: z.number(), + }) + .safeParse(statusRaw); + if (statusData.success && statusData.data.total_count > 0) { + const combinedState = statusData.data.state; + if (combinedState !== 'success') { + allChecksPass = false; + if (combinedState === 'failure' || combinedState === 'error') { + hasFailingChecks = true; + } + } + } + } + } + } + } + } catch (err) { + console.warn(`${TOWN_LOG} checkPRFeedback: check-runs failed for ${prUrl}`, err); + } + + return { hasUnresolvedComments, hasFailingChecks, allChecksPass, hasUncheckedRuns }; + } + + /** + * Merge a PR via GitHub API. Used by the auto-merge feature. + * Returns true if the merge succeeded. + */ + private async mergePR(prUrl: string, townConfig: TownConfig): Promise { + const ghMatch = prUrl.match(/^https:\/\/github\.com\/([^/]+)\/([^/]+)\/pull\/(\d+)/); + if (!ghMatch) { + console.warn(`${TOWN_LOG} mergePR: unsupported PR URL format: ${prUrl}`); + return false; + } + + const [, owner, repo, numberStr] = ghMatch; + const token = await this.resolveGitHubToken(townConfig); + if (!token) { + console.warn(`${TOWN_LOG} mergePR: no GitHub token available`); + return false; + } + + const response = await fetch( + `https://api.github.com/repos/${owner}/${repo}/pulls/${numberStr}/merge`, + { + method: 'PUT', + headers: { + Authorization: `token ${token}`, + Accept: 'application/vnd.github.v3+json', + 'Content-Type': 'application/json', + 'User-Agent': 'Gastown-Refinery/1.0', + }, + body: JSON.stringify({ merge_method: 'merge' }), + } + ); + + if (!response.ok) { + const text = await response.text().catch(() => '(unreadable)'); + console.warn( + `${TOWN_LOG} mergePR: GitHub API returned ${response.status} for ${prUrl}: ${text.slice(0, 500)}` + ); + return false; + } + + return true; + } + /** * Bump severity of stale unacknowledged escalations. */ @@ -4435,7 +4748,10 @@ export class TownDO extends DurableObject { } // Run reconciler against the resulting state - const actions = reconciler.reconcile(this.sql); + const tc = await this.getTownConfig(); + const actions = reconciler.reconcile(this.sql, { + refineryCodeReview: tc.refinery?.code_review ?? true, + }); // Capture a state snapshot before rollback const agentSnapshot = [ @@ -4514,7 +4830,10 @@ export class TownDO extends DurableObject { } // Phase 1: Reconcile against now-current state - const actions = reconciler.reconcile(this.sql); + const tc2 = await this.getTownConfig(); + const actions = reconciler.reconcile(this.sql, { + refineryCodeReview: tc2.refinery?.code_review ?? true, + }); const pendingEventCount = events.pendingEventCount(this.sql); const actionsByType: Record = {}; for (const a of actions) { @@ -4584,6 +4903,29 @@ export class TownDO extends DurableObject { ]; } + async debugGetBead(beadId: string): Promise { + const bead = beadOps.getBead(this.sql, beadId); + if (!bead) return { error: 'bead not found' }; + + const reviewMeta = reviewQueue.getReviewMetadata(this.sql, beadId); + const deps = [ + ...query( + this.sql, + /* sql */ ` + SELECT ${bead_dependencies.bead_id}, + ${bead_dependencies.depends_on_bead_id}, + ${bead_dependencies.dependency_type} + FROM ${bead_dependencies} + WHERE ${bead_dependencies.bead_id} = ? + OR ${bead_dependencies.depends_on_bead_id} = ? + `, + [beadId, beadId] + ), + ]; + + return { bead, reviewMetadata: reviewMeta ?? null, dependencies: deps }; + } + async debugAgentMetadata(): Promise { return [ ...query( diff --git a/cloudflare-gastown/src/dos/town/actions.ts b/cloudflare-gastown/src/dos/town/actions.ts index d9bbac5c3..323de6755 100644 --- a/cloudflare-gastown/src/dos/town/actions.ts +++ b/cloudflare-gastown/src/dos/town/actions.ts @@ -14,11 +14,14 @@ import { agent_metadata } from '../../db/tables/agent-metadata.table'; import { convoy_metadata } from '../../db/tables/convoy-metadata.table'; import { bead_dependencies } from '../../db/tables/bead-dependencies.table'; import { agent_nudges } from '../../db/tables/agent-nudges.table'; +import { review_metadata } from '../../db/tables/review-metadata.table'; import { query } from '../../util/query.util'; import * as beadOps from './beads'; import * as agentOps from './agents'; import * as reviewQueue from './review-queue'; import * as patrol from './patrol'; +import { getRig } from './rigs'; +import { parseGitUrl } from '../../util/platform-pr.util'; // ── Bead mutations ────────────────────────────────────────────────── @@ -164,6 +167,12 @@ const NotifyMayor = z.object({ message: z.string(), }); +const MergePr = z.object({ + type: z.literal('merge_pr'), + bead_id: z.string(), + pr_url: z.string(), +}); + const EmitEvent = z.object({ type: z.literal('emit_event'), event_name: z.string(), @@ -195,6 +204,7 @@ export const Action = z.discriminatedUnion('type', [ DispatchAgent, StopAgent, PollPr, + MergePr, SendNudge, CreateTriageRequest, NotifyMayor, @@ -225,6 +235,7 @@ export type CloseConvoy = z.infer; export type DispatchAgent = z.infer; export type StopAgent = z.infer; export type PollPr = z.infer; +export type MergePr = z.infer; export type SendNudge = z.infer; export type CreateTriageRequest = z.infer; export type NotifyMayor = z.infer; @@ -235,6 +246,17 @@ export type EmitEvent = z.infer; // The SQL handle is for synchronous mutations; the rest are for async // side effects (dispatch, stop, poll, nudge). +/** Result of checking PR feedback (unresolved comments + failing CI checks). */ +export type PRFeedbackCheckResult = { + hasUnresolvedComments: boolean; + hasFailingChecks: boolean; + allChecksPass: boolean; + /** True when the check-runs response was paginated and not all runs were + * inspected. allChecksPass is already false in this case, but + * hasFailingChecks only reflects the runs we actually saw. */ + hasUncheckedRuns: boolean; +}; + export type ApplyActionContext = { sql: SqlStorage; townId: string; @@ -244,6 +266,10 @@ export type ApplyActionContext = { stopAgent: (agentId: string) => Promise; /** Check a PR's status via GitHub/GitLab API. Returns 'open'|'merged'|'closed'|null. */ checkPRStatus: (prUrl: string) => Promise<'open' | 'merged' | 'closed' | null>; + /** Check PR for unresolved review comments and failing CI checks. */ + checkPRFeedback: (prUrl: string) => Promise; + /** Merge a PR via GitHub/GitLab API. */ + mergePR: (prUrl: string) => Promise; /** Queue a nudge message for an agent. */ queueNudge: (agentId: string, message: string, tier: string) => Promise; /** Insert a town_event for deferred processing (e.g. pr_status_changed). */ @@ -253,10 +279,24 @@ export type ApplyActionContext = { ) => void; /** Emit an analytics/WebSocket event. */ emitEvent: (data: Record) => void; + /** Get the current town config (read lazily). */ + getTownConfig: () => Promise<{ + refinery?: { + auto_merge?: boolean; + auto_resolve_pr_feedback?: boolean; + auto_merge_delay_minutes?: number | null; + }; + }>; }; const LOG = '[actions]'; +/** Fail MR bead after this many consecutive null poll results (#1632). */ +const PR_POLL_NULL_THRESHOLD = 10; + +/** Minimum interval between PR polls per MR bead (ms) (#1632). */ +export const PR_POLL_INTERVAL_MS = 60_000; // 1 minute + function now(): string { return new Date().toISOString(); } @@ -542,32 +582,363 @@ export function applyAction(ctx: ApplyActionContext, action: Action): (() => Pro } case 'poll_pr': { - // Touch updated_at synchronously so the bead doesn't look stale - // to Rule 4 (orphaned PR review, 30 min timeout). Without this, - // active polling keeps the PR alive but updated_at was set once - // at PR creation and never refreshed, causing a false "orphaned" - // failure after 30 minutes. + // Touch updated_at and record last_poll_at synchronously so the bead + // doesn't look stale to Rule 4 (orphaned PR review, 30 min timeout). + // Without this, active polling keeps the PR alive but updated_at was + // set once at PR creation and never refreshed, causing a false + // "orphaned" failure after 30 minutes. + const timestamp = now(); query( sql, /* sql */ ` UPDATE ${beads} - SET ${beads.columns.updated_at} = ? + SET ${beads.columns.updated_at} = ?, + ${beads.columns.metadata} = json_set( + COALESCE(${beads.columns.metadata}, '{}'), + '$.last_poll_at', ? + ) WHERE ${beads.bead_id} = ? `, - [now(), action.bead_id] + [timestamp, timestamp, action.bead_id] ); return async () => { try { const status = await ctx.checkPRStatus(action.pr_url); - if (status && status !== 'open') { + if (status !== null) { + // Any non-null result resets the consecutive null counter + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.metadata} = json_set( + COALESCE(${beads.columns.metadata}, '{}'), + '$.poll_null_count', 0 + ) + WHERE ${beads.bead_id} = ? + `, + [action.bead_id] + ); + if (status !== 'open') { + ctx.insertEvent('pr_status_changed', { + bead_id: action.bead_id, + payload: { pr_url: action.pr_url, pr_state: status }, + }); + return; + } + + // PR is open — check for feedback and auto-merge if configured + const townConfig = await ctx.getTownConfig(); + const refineryConfig = townConfig.refinery; + if (!refineryConfig) return; + + // Auto-resolve PR feedback: detect unresolved comments and failing CI + if (refineryConfig.auto_resolve_pr_feedback) { + const feedback = await ctx.checkPRFeedback(action.pr_url); + if ( + feedback && + (feedback.hasUnresolvedComments || + feedback.hasFailingChecks || + feedback.hasUncheckedRuns) + ) { + const existingFeedback = hasExistingFeedbackBead(sql, action.bead_id); + if (!existingFeedback) { + const prMeta = parsePrUrl(action.pr_url); + const rmRows = z + .object({ branch: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${review_metadata.columns.branch} + FROM ${review_metadata} + WHERE ${review_metadata.bead_id} = ? + `, + [action.bead_id] + ), + ]); + const branch = rmRows[0]?.branch ?? ''; + + ctx.insertEvent('pr_feedback_detected', { + bead_id: action.bead_id, + payload: { + mr_bead_id: action.bead_id, + pr_url: action.pr_url, + pr_number: prMeta?.prNumber ?? 0, + repo: prMeta?.repo ?? '', + branch, + has_unresolved_comments: feedback.hasUnresolvedComments, + has_failing_checks: feedback.hasFailingChecks, + has_unchecked_runs: feedback.hasUncheckedRuns, + }, + }); + } + + query( + sql, + /* sql */ ` + UPDATE ${review_metadata} + SET ${review_metadata.columns.last_feedback_check_at} = ? + WHERE ${review_metadata.bead_id} = ? + `, + [now(), action.bead_id] + ); + } + } + + // Auto-merge timer: track grace period when everything is green. + // Requires both auto_merge enabled AND a delay configured. + if ( + refineryConfig.auto_merge !== false && + refineryConfig.auto_merge_delay_minutes !== null && + refineryConfig.auto_merge_delay_minutes !== undefined + ) { + const feedback = await ctx.checkPRFeedback(action.pr_url); + if (!feedback) return; + + const allGreen = + !feedback.hasUnresolvedComments && + !feedback.hasFailingChecks && + feedback.allChecksPass; + + if (allGreen) { + const readySinceRows = z + .object({ auto_merge_ready_since: z.string().nullable() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT ${review_metadata.columns.auto_merge_ready_since} + FROM ${review_metadata} + WHERE ${review_metadata.bead_id} = ? + `, + [action.bead_id] + ), + ]); + + const readySince = readySinceRows[0]?.auto_merge_ready_since; + + if (!readySince) { + query( + sql, + /* sql */ ` + UPDATE ${review_metadata} + SET ${review_metadata.columns.auto_merge_ready_since} = ? + WHERE ${review_metadata.bead_id} = ? + `, + [now(), action.bead_id] + ); + } else { + const elapsed = Date.now() - new Date(readySince).getTime(); + if (elapsed >= refineryConfig.auto_merge_delay_minutes * 60_000) { + ctx.insertEvent('pr_auto_merge', { + bead_id: action.bead_id, + payload: { + mr_bead_id: action.bead_id, + pr_url: action.pr_url, + }, + }); + } + } + } else { + query( + sql, + /* sql */ ` + UPDATE ${review_metadata} + SET ${review_metadata.columns.auto_merge_ready_since} = NULL + WHERE ${review_metadata.bead_id} = ? + `, + [action.bead_id] + ); + } + } + } else { + // Null result — GitHub API unreachable (token missing, expired, rate-limited, or 5xx). + // Increment consecutive null counter; fail the bead after PR_POLL_NULL_THRESHOLD. + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.metadata} = json_set( + COALESCE(${beads.columns.metadata}, '{}'), + '$.poll_null_count', + COALESCE( + json_extract(${beads.columns.metadata}, '$.poll_null_count'), + 0 + ) + 1 + ) + WHERE ${beads.bead_id} = ? + `, + [action.bead_id] + ); + const rows = [ + ...query( + sql, + /* sql */ ` + SELECT json_extract(${beads.columns.metadata}, '$.poll_null_count') AS null_count + FROM ${beads} + WHERE ${beads.bead_id} = ? + `, + [action.bead_id] + ), + ]; + const nullCount = Number(rows[0]?.null_count ?? 0); + if (nullCount >= PR_POLL_NULL_THRESHOLD) { + console.warn( + `${LOG} poll_pr: ${nullCount} consecutive null results for bead=${action.bead_id}, failing` + ); + beadOps.updateBeadStatus(sql, action.bead_id, 'failed', 'system'); + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.metadata} = json_set( + COALESCE(${beads.columns.metadata}, '{}'), + '$.failureReason', 'pr_poll_failed', + '$.failureMessage', ? + ) + WHERE ${beads.bead_id} = ? + `, + [ + `Cannot poll PR status — GitHub API returned null ${nullCount} consecutive times. Check that a valid GitHub token is configured in town settings and that the GitHub API is reachable.`, + action.bead_id, + ] + ); + } + } + // status === 'open' — no action needed, poll again next tick + } catch (err) { + console.warn(`${LOG} poll_pr failed: bead=${action.bead_id} url=${action.pr_url}`, err); + } + }; + } + + case 'merge_pr': { + // Validate the PR URL matches the rig's repository before merging. + // Prevents merging an unrelated repo if a buggy refinery stores a wrong URL. + const mrBead = beadOps.getBead(sql, action.bead_id); + if (mrBead?.rig_id) { + const rig = getRig(sql, mrBead.rig_id); + if (rig?.git_url) { + const rigCoords = parseGitUrl(rig.git_url); + const prMeta = parsePrUrl(action.pr_url); + if (rigCoords && prMeta) { + const rigRepo = `${rigCoords.owner}/${rigCoords.repo}`; + if (rigRepo !== prMeta.repo) { + console.warn( + `${LOG} merge_pr: PR repo "${prMeta.repo}" does not match rig repo "${rigRepo}" — refusing to merge` + ); + // Clear the pending flag to avoid retry loops + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.metadata} = json_remove(COALESCE(${beads.metadata}, '{}'), '$.auto_merge_pending'), + ${beads.columns.updated_at} = ? + WHERE ${beads.bead_id} = ? + `, + [now(), action.bead_id] + ); + return null; + } + } + } + } + + return async () => { + try { + // Re-check feedback immediately before merging to avoid acting on + // stale state. If a reviewer posted new comments or CI regressed + // since the last poll, abort and reset the timer. + const freshFeedback = await ctx.checkPRFeedback(action.pr_url); + if ( + freshFeedback && + (freshFeedback.hasUnresolvedComments || + freshFeedback.hasFailingChecks || + !freshFeedback.allChecksPass) + ) { + console.log( + `${LOG} merge_pr: fresh feedback check found issues, aborting merge for bead=${action.bead_id}` + ); + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.metadata} = json_remove(COALESCE(${beads.metadata}, '{}'), '$.auto_merge_pending'), + ${beads.columns.updated_at} = ? + WHERE ${beads.bead_id} = ? + `, + [now(), action.bead_id] + ); + query( + sql, + /* sql */ ` + UPDATE ${review_metadata} + SET ${review_metadata.columns.auto_merge_ready_since} = NULL + WHERE ${review_metadata.bead_id} = ? + `, + [action.bead_id] + ); + return; + } + + const merged = await ctx.mergePR(action.pr_url); + if (merged) { ctx.insertEvent('pr_status_changed', { bead_id: action.bead_id, - payload: { pr_url: action.pr_url, pr_state: status }, + payload: { pr_url: action.pr_url, pr_state: 'merged' }, }); + } else { + // Merge failed (405/409: branch protection, merge conflict, stale head, etc.) + // Clear auto_merge_pending so we resume normal polling on the next tick. + // Also reset the auto_merge_ready_since timer so it re-evaluates freshness. + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.metadata} = json_remove(COALESCE(${beads.metadata}, '{}'), '$.auto_merge_pending'), + ${beads.columns.updated_at} = ? + WHERE ${beads.bead_id} = ? + `, + [now(), action.bead_id] + ); + query( + sql, + /* sql */ ` + UPDATE ${review_metadata} + SET ${review_metadata.columns.auto_merge_ready_since} = NULL + WHERE ${review_metadata.bead_id} = ? + `, + [action.bead_id] + ); + console.warn( + `${LOG} merge_pr: merge failed, cleared auto_merge_pending for bead=${action.bead_id}` + ); } } catch (err) { - console.warn(`${LOG} poll_pr failed: bead=${action.bead_id} url=${action.pr_url}`, err); + console.warn(`${LOG} merge_pr failed: bead=${action.bead_id} url=${action.pr_url}`, err); + // Clear pending flag on unexpected errors too + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.metadata} = json_remove(COALESCE(${beads.metadata}, '{}'), '$.auto_merge_pending'), + ${beads.columns.updated_at} = ? + WHERE ${beads.bead_id} = ? + `, + [now(), action.bead_id] + ); + query( + sql, + /* sql */ ` + UPDATE ${review_metadata} + SET ${review_metadata.columns.auto_merge_ready_since} = NULL + WHERE ${review_metadata.bead_id} = ? + `, + [action.bead_id] + ); } }; } @@ -646,3 +1017,43 @@ export function applyAction(ctx: ApplyActionContext, action: Action): (() => Pro } } } + +// ── Helpers ───────────────────────────────────────────────────────── + +/** Check if an MR bead already has a non-terminal feedback bead blocking it. */ +function hasExistingFeedbackBead(sql: SqlStorage, mrBeadId: string): boolean { + const rows = [ + ...query( + sql, + /* sql */ ` + SELECT 1 FROM ${bead_dependencies} bd + INNER JOIN ${beads} fb ON fb.${beads.columns.bead_id} = bd.${bead_dependencies.columns.depends_on_bead_id} + WHERE bd.${bead_dependencies.columns.bead_id} = ? + AND bd.${bead_dependencies.columns.dependency_type} = 'blocks' + AND fb.${beads.columns.labels} LIKE '%gt:pr-feedback%' + AND fb.${beads.columns.status} NOT IN ('closed', 'failed') + LIMIT 1 + `, + [mrBeadId] + ), + ]; + return rows.length > 0; +} + +/** Parse a GitHub/GitLab PR URL to extract repo and PR number. */ +function parsePrUrl(prUrl: string): { repo: string; prNumber: number } | null { + // GitHub: https://github.com/{owner}/{repo}/pull/{number} + const ghMatch = prUrl.match(/^https:\/\/github\.com\/([^/]+\/[^/]+)\/pull\/(\d+)/); + if (ghMatch) { + return { repo: ghMatch[1], prNumber: parseInt(ghMatch[2], 10) }; + } + // GitLab: https://{host}/{path}/-/merge_requests/{iid} + const glMatch = prUrl.match(/^https:\/\/[^/]+\/(.+)\/-\/merge_requests\/(\d+)/); + if (glMatch) { + return { repo: glMatch[1], prNumber: parseInt(glMatch[2], 10) }; + } + return null; +} + +// Exported for testing +export { hasExistingFeedbackBead as _hasExistingFeedbackBead, parsePrUrl as _parsePrUrl }; diff --git a/cloudflare-gastown/src/dos/town/agents.ts b/cloudflare-gastown/src/dos/town/agents.ts index a723ce6d9..21c12673e 100644 --- a/cloudflare-gastown/src/dos/town/agents.ts +++ b/cloudflare-gastown/src/dos/town/agents.ts @@ -493,12 +493,24 @@ export function prime(sql: SqlStorage, agentId: string): PrimeContext { }; } + // Build PR fixup context if the hooked bead is a PR fixup request + let pr_fixup_context: PrimeContext['pr_fixup_context'] = null; + if (hookedBead?.labels.includes('gt:pr-fixup') && hookedBead.metadata) { + const meta = hookedBead.metadata as Record; + pr_fixup_context = { + pr_url: typeof meta.pr_url === 'string' ? meta.pr_url : null, + branch: typeof meta.branch === 'string' ? meta.branch : null, + target_branch: typeof meta.target_branch === 'string' ? meta.target_branch : null, + }; + } + return { agent, hooked_bead: hookedBead, undelivered_mail: undeliveredMail, open_beads: openBeads, rework_context, + pr_fixup_context, }; } diff --git a/cloudflare-gastown/src/dos/town/beads.ts b/cloudflare-gastown/src/dos/town/beads.ts index c2e865d55..25d76d8a3 100644 --- a/cloudflare-gastown/src/dos/town/beads.ts +++ b/cloudflare-gastown/src/dos/town/beads.ts @@ -27,7 +27,11 @@ import { createTableAgentMetadata, migrateAgentMetadata, } from '../../db/tables/agent-metadata.table'; -import { review_metadata, createTableReviewMetadata } from '../../db/tables/review-metadata.table'; +import { + review_metadata, + createTableReviewMetadata, + migrateReviewMetadata, +} from '../../db/tables/review-metadata.table'; import { escalation_metadata, createTableEscalationMetadata, @@ -72,7 +76,12 @@ export function initBeadTables(sql: SqlStorage): void { dropCheckConstraints(sql); // Migrations: add columns to existing tables (idempotent) - for (const stmt of [...migrateBeads(), ...migrateConvoyMetadata(), ...migrateAgentMetadata()]) { + for (const stmt of [ + ...migrateBeads(), + ...migrateConvoyMetadata(), + ...migrateAgentMetadata(), + ...migrateReviewMetadata(), + ]) { try { query(sql, stmt, []); } catch { diff --git a/cloudflare-gastown/src/dos/town/config.ts b/cloudflare-gastown/src/dos/town/config.ts index 0d064ed6b..afc705a40 100644 --- a/cloudflare-gastown/src/dos/town/config.ts +++ b/cloudflare-gastown/src/dos/town/config.ts @@ -82,6 +82,15 @@ export async function updateTownConfig( auto_merge: update.refinery.auto_merge ?? current.refinery?.auto_merge ?? true, require_clean_merge: update.refinery.require_clean_merge ?? current.refinery?.require_clean_merge ?? true, + code_review: update.refinery.code_review ?? current.refinery?.code_review ?? true, + auto_resolve_pr_feedback: + update.refinery.auto_resolve_pr_feedback ?? + current.refinery?.auto_resolve_pr_feedback ?? + false, + auto_merge_delay_minutes: + update.refinery.auto_merge_delay_minutes !== undefined + ? update.refinery.auto_merge_delay_minutes + : (current.refinery?.auto_merge_delay_minutes ?? null), } : current.refinery, container: @@ -150,5 +159,6 @@ export async function buildContainerConfig( disable_ai_coauthor: config.disable_ai_coauthor, kilo_api_url: env.KILO_API_URL ?? '', gastown_api_url: env.GASTOWN_API_URL ?? '', + organization_id: config.organization_id, }; } diff --git a/cloudflare-gastown/src/dos/town/container-dispatch.ts b/cloudflare-gastown/src/dos/town/container-dispatch.ts index 4932e92a5..65d6266aa 100644 --- a/cloudflare-gastown/src/dos/town/container-dispatch.ts +++ b/cloudflare-gastown/src/dos/town/container-dispatch.ts @@ -676,7 +676,8 @@ export async function updateAgentModelInContainer( model: string, smallModel?: string, conversationHistory?: string, - containerConfig?: Record + containerConfig?: Record, + organizationId?: string ): Promise { try { const container = getTownContainerStub(env, townId); @@ -691,6 +692,7 @@ export async function updateAgentModelInContainer( model, ...(smallModel ? { smallModel } : {}), ...(conversationHistory ? { conversationHistory } : {}), + ...(organizationId ? { organizationId } : {}), }), }); return response.ok; diff --git a/cloudflare-gastown/src/dos/town/pr-feedback.test.ts b/cloudflare-gastown/src/dos/town/pr-feedback.test.ts new file mode 100644 index 000000000..1af8cb60a --- /dev/null +++ b/cloudflare-gastown/src/dos/town/pr-feedback.test.ts @@ -0,0 +1,219 @@ +import { describe, it, expect } from 'vitest'; +import { TownConfigSchema } from '../../types'; +import { _parsePrUrl as parsePrUrl } from './actions'; +import { TownEventType } from '../../db/tables/town-events.table'; +import { ReviewMetadataRecord } from '../../db/tables/review-metadata.table'; +import { buildRefinerySystemPrompt } from '../../prompts/refinery-system.prompt'; + +describe('TownConfigSchema refinery extensions', () => { + it('defaults code_review to true', () => { + const config = TownConfigSchema.parse({ refinery: {} }); + expect(config.refinery?.code_review).toBe(true); + }); + + it('accepts code_review = false', () => { + const config = TownConfigSchema.parse({ refinery: { code_review: false } }); + expect(config.refinery?.code_review).toBe(false); + }); + + it('defaults auto_resolve_pr_feedback to false', () => { + const config = TownConfigSchema.parse({}); + expect(config.refinery).toBeUndefined(); + + const configWithRefinery = TownConfigSchema.parse({ refinery: {} }); + expect(configWithRefinery.refinery?.auto_resolve_pr_feedback).toBe(false); + }); + + it('defaults auto_merge_delay_minutes to null', () => { + const config = TownConfigSchema.parse({ refinery: {} }); + expect(config.refinery?.auto_merge_delay_minutes).toBeNull(); + }); + + it('accepts auto_resolve_pr_feedback = true', () => { + const config = TownConfigSchema.parse({ + refinery: { auto_resolve_pr_feedback: true }, + }); + expect(config.refinery?.auto_resolve_pr_feedback).toBe(true); + }); + + it('accepts auto_merge_delay_minutes = 0 (immediate merge)', () => { + const config = TownConfigSchema.parse({ + refinery: { auto_merge_delay_minutes: 0 }, + }); + expect(config.refinery?.auto_merge_delay_minutes).toBe(0); + }); + + it('accepts auto_merge_delay_minutes = 15', () => { + const config = TownConfigSchema.parse({ + refinery: { auto_merge_delay_minutes: 15 }, + }); + expect(config.refinery?.auto_merge_delay_minutes).toBe(15); + }); + + it('rejects negative auto_merge_delay_minutes', () => { + expect(() => TownConfigSchema.parse({ refinery: { auto_merge_delay_minutes: -1 } })).toThrow(); + }); + + it('preserves existing refinery fields alongside new ones', () => { + const config = TownConfigSchema.parse({ + refinery: { + gates: ['npm test'], + auto_merge: false, + require_clean_merge: true, + auto_resolve_pr_feedback: true, + auto_merge_delay_minutes: 60, + }, + }); + expect(config.refinery?.gates).toEqual(['npm test']); + expect(config.refinery?.auto_merge).toBe(false); + expect(config.refinery?.require_clean_merge).toBe(true); + expect(config.refinery?.auto_resolve_pr_feedback).toBe(true); + expect(config.refinery?.auto_merge_delay_minutes).toBe(60); + }); +}); + +describe('parsePrUrl', () => { + it('parses GitHub PR URLs', () => { + const result = parsePrUrl('https://github.com/Kilo-Org/cloud/pull/42'); + expect(result).toEqual({ repo: 'Kilo-Org/cloud', prNumber: 42 }); + }); + + it('parses GitHub PR URLs with long paths', () => { + const result = parsePrUrl('https://github.com/org/repo/pull/123'); + expect(result).toEqual({ repo: 'org/repo', prNumber: 123 }); + }); + + it('parses GitLab MR URLs', () => { + const result = parsePrUrl('https://gitlab.com/group/project/-/merge_requests/7'); + expect(result).toEqual({ repo: 'group/project', prNumber: 7 }); + }); + + it('parses GitLab MR URLs with subgroups', () => { + const result = parsePrUrl('https://gitlab.example.com/org/team/project/-/merge_requests/99'); + expect(result).toEqual({ repo: 'org/team/project', prNumber: 99 }); + }); + + it('returns null for unrecognized URLs', () => { + expect(parsePrUrl('https://example.com/pr/1')).toBeNull(); + expect(parsePrUrl('not a url')).toBeNull(); + }); +}); + +describe('TownEventType enum', () => { + it('includes pr_feedback_detected and pr_auto_merge', () => { + expect(TownEventType.options).toContain('pr_feedback_detected'); + expect(TownEventType.options).toContain('pr_auto_merge'); + }); +}); + +describe('ReviewMetadataRecord', () => { + it('includes auto_merge_ready_since and last_feedback_check_at fields', () => { + const result = ReviewMetadataRecord.parse({ + bead_id: 'test-id', + branch: 'feature/test', + target_branch: 'main', + merge_commit: null, + pr_url: 'https://github.com/org/repo/pull/1', + retry_count: 0, + auto_merge_ready_since: '2025-01-01T00:00:00.000Z', + last_feedback_check_at: '2025-01-01T00:00:00.000Z', + }); + expect(result.auto_merge_ready_since).toBe('2025-01-01T00:00:00.000Z'); + expect(result.last_feedback_check_at).toBe('2025-01-01T00:00:00.000Z'); + }); + + it('accepts null for new fields', () => { + const result = ReviewMetadataRecord.parse({ + bead_id: 'test-id', + branch: 'feature/test', + target_branch: 'main', + merge_commit: null, + pr_url: null, + retry_count: 0, + auto_merge_ready_since: null, + last_feedback_check_at: null, + }); + expect(result.auto_merge_ready_since).toBeNull(); + expect(result.last_feedback_check_at).toBeNull(); + }); +}); + +describe('config deep merge for refinery extensions', () => { + it('preserves auto_resolve_pr_feedback when updating other refinery fields', () => { + // Simulate the merge logic from config.ts updateTownConfig: + // When a partial update provides only gates, the new refinery fields + // should fall through to the current value. + const current = TownConfigSchema.parse({ + refinery: { + gates: ['npm test'], + auto_resolve_pr_feedback: true, + auto_merge_delay_minutes: 15, + }, + }); + + // Partial update only touches gates — other fields come from current + const updateGates: string[] | undefined = ['npm run test:all']; + const updateAutoResolve: boolean | undefined = undefined; + const updateDelayMinutes: number | null | undefined = undefined; + + const merged = { + gates: updateGates ?? current.refinery?.gates ?? [], + auto_merge: current.refinery?.auto_merge ?? true, + require_clean_merge: current.refinery?.require_clean_merge ?? true, + auto_resolve_pr_feedback: + updateAutoResolve ?? current.refinery?.auto_resolve_pr_feedback ?? false, + auto_merge_delay_minutes: + updateDelayMinutes !== undefined + ? updateDelayMinutes + : (current.refinery?.auto_merge_delay_minutes ?? null), + }; + + expect(merged.auto_resolve_pr_feedback).toBe(true); + expect(merged.auto_merge_delay_minutes).toBe(15); + expect(merged.gates).toEqual(['npm run test:all']); + }); +}); + +describe('buildRefinerySystemPrompt with existingPrUrl', () => { + const baseParams = { + identity: 'refinery-alpha', + rigId: 'rig-1', + townId: 'town-1', + gates: ['pnpm test'], + branch: 'gt/toast/abc123', + targetBranch: 'main', + polecatAgentId: 'polecat-1', + mergeStrategy: 'pr' as const, + }; + + it('produces standard PR-creation prompt when no existingPrUrl', () => { + const prompt = buildRefinerySystemPrompt(baseParams); + expect(prompt).toContain('gh pr create'); + expect(prompt).toContain('create a pull request'); + expect(prompt).not.toContain('Pull Request:'); + }); + + it('produces PR-review prompt when existingPrUrl is set', () => { + const prompt = buildRefinerySystemPrompt({ + ...baseParams, + existingPrUrl: 'https://github.com/org/repo/pull/42', + }); + expect(prompt).toContain('https://github.com/org/repo/pull/42'); + expect(prompt).toContain('gh pr review'); + expect(prompt).toContain('gh pr diff'); + expect(prompt).toContain('gh pr comment'); + expect(prompt).toContain('Do NOT merge the PR'); + expect(prompt).not.toContain('gh pr create'); + expect(prompt).toContain('Do NOT use `gh pr review --approve`'); + }); + + it('includes gates in PR-review prompt', () => { + const prompt = buildRefinerySystemPrompt({ + ...baseParams, + gates: ['pnpm test', 'pnpm lint'], + existingPrUrl: 'https://github.com/org/repo/pull/42', + }); + expect(prompt).toContain('pnpm test'); + expect(prompt).toContain('pnpm lint'); + }); +}); diff --git a/cloudflare-gastown/src/dos/town/reconciler.ts b/cloudflare-gastown/src/dos/town/reconciler.ts index e3129896e..27b1e9f84 100644 --- a/cloudflare-gastown/src/dos/town/reconciler.ts +++ b/cloudflare-gastown/src/dos/town/reconciler.ts @@ -30,6 +30,7 @@ import * as reviewQueue from './review-queue'; import * as agents from './agents'; import * as beadOps from './beads'; import { getRig } from './rigs'; +import { PR_POLL_INTERVAL_MS } from './actions'; import type { Action } from './actions'; import type { TownEventRecord } from '../../db/tables/town-events.table'; @@ -163,6 +164,7 @@ const MrBeadRow = BeadRecord.pick({ rig_id: true, updated_at: true, assignee_agent_bead_id: true, + metadata: true, }).extend({ // Joined from review_metadata pr_url: ReviewMetadataRecord.shape.pr_url, @@ -368,6 +370,85 @@ export function applyEvent(sql: SqlStorage, event: TownEventRecord): void { return; } + case 'pr_feedback_detected': { + const mrBeadId = typeof payload.mr_bead_id === 'string' ? payload.mr_bead_id : null; + if (!mrBeadId) { + console.warn(`${LOG} applyEvent: pr_feedback_detected missing mr_bead_id`); + return; + } + + const mrBead = beadOps.getBead(sql, mrBeadId); + if (!mrBead || mrBead.status === 'closed' || mrBead.status === 'failed') return; + + // Check for existing non-terminal feedback bead to prevent duplicates + if (hasExistingPrFeedbackBead(sql, mrBeadId)) return; + + const prUrl = typeof payload.pr_url === 'string' ? payload.pr_url : ''; + const prNumber = typeof payload.pr_number === 'number' ? payload.pr_number : 0; + const repo = typeof payload.repo === 'string' ? payload.repo : ''; + const branch = typeof payload.branch === 'string' ? payload.branch : ''; + const hasUnresolvedComments = payload.has_unresolved_comments === true; + const hasFailingChecks = payload.has_failing_checks === true; + const hasUncheckedRuns = payload.has_unchecked_runs === true; + + const feedbackBead = beadOps.createBead(sql, { + type: 'issue', + title: buildFeedbackBeadTitle( + prNumber, + repo, + hasUnresolvedComments, + hasFailingChecks, + hasUncheckedRuns + ), + body: buildFeedbackPrompt( + prNumber, + repo, + branch, + hasUnresolvedComments, + hasFailingChecks, + hasUncheckedRuns + ), + rig_id: mrBead.rig_id ?? undefined, + parent_bead_id: mrBeadId, + labels: ['gt:pr-feedback'], + metadata: { + pr_feedback_for: mrBeadId, + pr_url: prUrl, + branch, + }, + }); + + // Feedback bead blocks the MR bead (same pattern as rework beads) + beadOps.insertDependency(sql, mrBeadId, feedbackBead.bead_id, 'blocks'); + return; + } + + case 'pr_auto_merge': { + const mrBeadId = typeof payload.mr_bead_id === 'string' ? payload.mr_bead_id : null; + if (!mrBeadId) { + console.warn(`${LOG} applyEvent: pr_auto_merge missing mr_bead_id`); + return; + } + + const mrBead = beadOps.getBead(sql, mrBeadId); + if (!mrBead || mrBead.status === 'closed' || mrBead.status === 'failed') return; + + // The actual merge is handled by the merge_pr side effect generated by + // the reconciler on the next tick when it sees this event has been processed. + // We just mark the intent here via metadata. + query( + sql, + /* sql */ ` + UPDATE ${beads} + SET ${beads.columns.metadata} = json_set(COALESCE(${beads.metadata}, '{}'), '$.auto_merge_pending', 1), + ${beads.columns.updated_at} = ? + WHERE ${beads.bead_id} = ? + `, + [new Date().toISOString(), mrBeadId] + ); + return; + } + default: { console.warn(`${LOG} applyEvent: unknown event type: ${event.event_type}`); } @@ -378,12 +459,17 @@ export function applyEvent(sql: SqlStorage, event: TownEventRecord): void { // Top-level reconcile // ════════════════════════════════════════════════════════════════════ -export function reconcile(sql: SqlStorage, opts?: { draining?: boolean }): Action[] { +export function reconcile( + sql: SqlStorage, + opts?: { draining?: boolean; refineryCodeReview?: boolean } +): Action[] { const draining = opts?.draining ?? false; const actions: Action[] = []; actions.push(...reconcileAgents(sql, { draining })); actions.push(...reconcileBeads(sql, { draining })); - actions.push(...reconcileReviewQueue(sql, { draining })); + actions.push( + ...reconcileReviewQueue(sql, { draining, refineryCodeReview: opts?.refineryCodeReview }) + ); actions.push(...reconcileConvoys(sql)); actions.push(...reconcileGUPP(sql, { draining })); actions.push(...reconcileGC(sql)); @@ -943,8 +1029,12 @@ export function reconcileBeads(sql: SqlStorage, opts?: { draining?: boolean }): // refinery dispatch // ════════════════════════════════════════════════════════════════════ -export function reconcileReviewQueue(sql: SqlStorage, opts?: { draining?: boolean }): Action[] { +export function reconcileReviewQueue( + sql: SqlStorage, + opts?: { draining?: boolean; refineryCodeReview?: boolean } +): Action[] { const draining = opts?.draining ?? false; + const refineryCodeReview = opts?.refineryCodeReview ?? true; const actions: Action[] = []; // Town-level circuit breaker @@ -957,8 +1047,10 @@ export function reconcileReviewQueue(sql: SqlStorage, opts?: { draining?: boolea /* sql */ ` SELECT b.${beads.columns.bead_id}, b.${beads.columns.status}, b.${beads.columns.rig_id}, b.${beads.columns.updated_at}, + b.${beads.columns.metadata}, rm.${review_metadata.columns.pr_url}, - b.${beads.columns.assignee_agent_bead_id} + b.${beads.columns.assignee_agent_bead_id}, + b.${beads.columns.metadata} FROM ${beads} b INNER JOIN ${review_metadata} rm ON rm.${review_metadata.columns.bead_id} = b.${beads.columns.bead_id} WHERE b.${beads.columns.type} = 'merge_request' @@ -969,13 +1061,28 @@ export function reconcileReviewQueue(sql: SqlStorage, opts?: { draining?: boolea ]); for (const mr of mrBeads) { - // Rule 1: PR-strategy MR beads in_progress need polling + // Rule 1: PR-strategy MR beads in_progress need polling. + // Rate-limit: skip if polled less than PR_POLL_INTERVAL_MS ago (#1632). if (mr.status === 'in_progress' && mr.pr_url) { - actions.push({ - type: 'poll_pr', - bead_id: mr.bead_id, - pr_url: mr.pr_url, - }); + const lastPollAt: unknown = mr.metadata?.last_poll_at; + const msSinceLastPoll = + typeof lastPollAt === 'string' ? Date.now() - new Date(lastPollAt).getTime() : Infinity; + + if (msSinceLastPoll >= PR_POLL_INTERVAL_MS) { + actions.push({ + type: 'poll_pr', + bead_id: mr.bead_id, + pr_url: mr.pr_url, + }); + } + // If auto-merge is pending, also attempt the merge + if (mr.metadata?.auto_merge_pending) { + actions.push({ + type: 'merge_pr', + bead_id: mr.bead_id, + pr_url: mr.pr_url, + }); + } } // Rule 2: Stuck MR beads in_progress with no PR, no working agent, stale >30min @@ -1032,7 +1139,10 @@ export function reconcileReviewQueue(sql: SqlStorage, opts?: { draining?: boolea // Rule 4: PR-strategy MR beads orphaned (refinery dispatched then died, stale >30min) // Only in_progress — open beads are just waiting for the refinery to pop them. + // Skip when refinery code review is disabled: poll_pr keeps the bead alive via + // updated_at touches, and no refinery is expected to be working on it. if ( + refineryCodeReview && mr.status === 'in_progress' && mr.pr_url && staleMs(mr.updated_at, ORPHANED_PR_REVIEW_TIMEOUT_MS) @@ -1051,37 +1161,77 @@ export function reconcileReviewQueue(sql: SqlStorage, opts?: { draining?: boolea } } - // Rule 5: Pop open MR bead for idle refinery - // Get all rigs that have open MR beads - const rigsWithOpenMrs = z - .object({ rig_id: z.string() }) - .array() - .parse([ - ...query( - sql, - /* sql */ ` + // When refinery code review is disabled, skip refinery dispatch (Rules 5–6) + // for MR beads that already have a pr_url (polecat created the PR). + // Transition them straight to in_progress so poll_pr can handle auto-merge. + // MR beads without a pr_url (direct merge strategy) still need the refinery. + if (!refineryCodeReview) { + const openMrsWithPr = z + .object({ bead_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` + SELECT b.${beads.columns.bead_id} + FROM ${beads} b + INNER JOIN ${review_metadata} rm + ON rm.${review_metadata.columns.bead_id} = b.${beads.columns.bead_id} + WHERE b.${beads.columns.type} = 'merge_request' + AND b.${beads.columns.status} = 'open' + AND rm.${review_metadata.columns.pr_url} IS NOT NULL + `, + [] + ), + ]); + for (const { bead_id } of openMrsWithPr) { + actions.push({ + type: 'transition_bead', + bead_id, + from: 'open', + to: 'in_progress', + reason: 'refinery code review disabled — skip to poll_pr', + actor: 'system', + }); + } + } + + // Rules 5–6 only apply when refinery code review is enabled. + // When disabled, open MR beads with pr_url are fast-tracked above. + if (!refineryCodeReview) { + // Skip refinery dispatch — jump to Rule 7 + } else { + // Rule 5: Pop open MR bead for idle refinery + // Get all rigs that have open MR beads + const rigsWithOpenMrs = z + .object({ rig_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` SELECT DISTINCT b.${beads.columns.rig_id} FROM ${beads} b WHERE b.${beads.columns.type} = 'merge_request' AND b.${beads.columns.status} = 'open' AND b.${beads.columns.rig_id} IS NOT NULL `, - [] - ), - ]); + [] + ), + ]); - for (const { rig_id } of rigsWithOpenMrs) { - // Check if rig already has an in_progress MR that needs the refinery. - // PR-strategy MR beads (pr_url IS NOT NULL) don't need the refinery — - // the merge is handled by the user/CI via the PR. Only direct-strategy - // MRs (no pr_url, refinery merges to main itself) block the queue. - const inProgressCount = z - .object({ cnt: z.number() }) - .array() - .parse([ - ...query( - sql, - /* sql */ ` + for (const { rig_id } of rigsWithOpenMrs) { + // Check if rig already has an in_progress MR that needs the refinery. + // PR-strategy MR beads (pr_url IS NOT NULL) don't need the refinery — + // the merge is handled by the user/CI via the PR. Only direct-strategy + // MRs (no pr_url, refinery merges to main itself) block the queue. + const inProgressCount = z + .object({ cnt: z.number() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` SELECT count(*) as cnt FROM ${beads} b INNER JOIN ${review_metadata} rm ON rm.${review_metadata.columns.bead_id} = b.${beads.columns.bead_id} @@ -1090,16 +1240,16 @@ export function reconcileReviewQueue(sql: SqlStorage, opts?: { draining?: boolea AND b.${beads.columns.rig_id} = ? AND rm.${review_metadata.columns.pr_url} IS NULL `, - [rig_id] - ), - ]); - if ((inProgressCount[0]?.cnt ?? 0) > 0) continue; + [rig_id] + ), + ]); + if ((inProgressCount[0]?.cnt ?? 0) > 0) continue; - // Check if the refinery for this rig is idle and unhooked - const refinery = AgentRow.array().parse([ - ...query( - sql, - /* sql */ ` + // Check if the refinery for this rig is idle and unhooked + const refinery = AgentRow.array().parse([ + ...query( + sql, + /* sql */ ` SELECT ${agent_metadata.bead_id}, ${agent_metadata.role}, ${agent_metadata.status}, ${agent_metadata.current_hook_bead_id}, ${agent_metadata.dispatch_attempts}, @@ -1111,18 +1261,18 @@ export function reconcileReviewQueue(sql: SqlStorage, opts?: { draining?: boolea AND b.${beads.columns.rig_id} = ? LIMIT 1 `, - [rig_id] - ), - ]); + [rig_id] + ), + ]); - // Get oldest open MR for this rig - const oldestMr = z - .object({ bead_id: z.string() }) - .array() - .parse([ - ...query( - sql, - /* sql */ ` + // Get oldest open MR for this rig + const oldestMr = z + .object({ bead_id: z.string() }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` SELECT ${beads.bead_id} FROM ${beads} WHERE ${beads.type} = 'merge_request' @@ -1131,70 +1281,70 @@ export function reconcileReviewQueue(sql: SqlStorage, opts?: { draining?: boolea ORDER BY ${beads.columns.created_at} ASC LIMIT 1 `, - [rig_id] - ), - ]); + [rig_id] + ), + ]); - if (oldestMr.length === 0) continue; + if (oldestMr.length === 0) continue; - // Skip dispatch if the town is draining (container eviction in progress) - if (draining) { - console.log(`${LOG} Town is draining, skipping dispatch for bead ${oldestMr[0].bead_id}`); - continue; - } + // Skip dispatch if the town is draining (container eviction in progress) + if (draining) { + console.log(`${LOG} Town is draining, skipping dispatch for bead ${oldestMr[0].bead_id}`); + continue; + } - // Town-level circuit breaker suppresses dispatch - if (circuitBreakerOpen) continue; + // Town-level circuit breaker suppresses dispatch + if (circuitBreakerOpen) continue; + + // If no refinery exists or it's busy, emit a dispatch_agent with empty + // agent_id — applyAction will create the refinery via getOrCreateAgent. + if (refinery.length === 0) { + actions.push({ + type: 'transition_bead', + bead_id: oldestMr[0].bead_id, + from: 'open', + to: 'in_progress', + reason: 'popped for review (creating refinery)', + actor: 'system', + }); + actions.push({ + type: 'dispatch_agent', + agent_id: '', + bead_id: oldestMr[0].bead_id, + rig_id, + }); + continue; + } + + const ref = refinery[0]; + if (ref.status !== 'idle' || ref.current_hook_bead_id) continue; - // If no refinery exists or it's busy, emit a dispatch_agent with empty - // agent_id — applyAction will create the refinery via getOrCreateAgent. - if (refinery.length === 0) { actions.push({ type: 'transition_bead', bead_id: oldestMr[0].bead_id, from: 'open', to: 'in_progress', - reason: 'popped for review (creating refinery)', + reason: 'popped for review', actor: 'system', }); + actions.push({ + type: 'hook_agent', + agent_id: ref.bead_id, + bead_id: oldestMr[0].bead_id, + }); actions.push({ type: 'dispatch_agent', - agent_id: '', + agent_id: ref.bead_id, bead_id: oldestMr[0].bead_id, rig_id, }); - continue; } - const ref = refinery[0]; - if (ref.status !== 'idle' || ref.current_hook_bead_id) continue; - - actions.push({ - type: 'transition_bead', - bead_id: oldestMr[0].bead_id, - from: 'open', - to: 'in_progress', - reason: 'popped for review', - actor: 'system', - }); - actions.push({ - type: 'hook_agent', - agent_id: ref.bead_id, - bead_id: oldestMr[0].bead_id, - }); - actions.push({ - type: 'dispatch_agent', - agent_id: ref.bead_id, - bead_id: oldestMr[0].bead_id, - rig_id, - }); - } - - // Rule 6: Idle refinery hooked to in_progress MR — needs re-dispatch - const idleRefineries = AgentRow.array().parse([ - ...query( - sql, - /* sql */ ` + // Rule 6: Idle refinery hooked to in_progress MR — needs re-dispatch + const idleRefineries = AgentRow.array().parse([ + ...query( + sql, + /* sql */ ` SELECT ${agent_metadata.bead_id}, ${agent_metadata.role}, ${agent_metadata.status}, ${agent_metadata.current_hook_bead_id}, ${agent_metadata.dispatch_attempts}, @@ -1206,80 +1356,121 @@ export function reconcileReviewQueue(sql: SqlStorage, opts?: { draining?: boolea AND ${agent_metadata.status} = 'idle' AND ${agent_metadata.current_hook_bead_id} IS NOT NULL `, - [] - ), - ]); - - for (const ref of idleRefineries) { - if (!ref.current_hook_bead_id) continue; + [] + ), + ]); - // Read the bead's dispatch_attempts for the per-bead circuit breaker - const mrRows = z - .object({ - status: z.string(), - type: z.string(), - rig_id: z.string().nullable(), - dispatch_attempts: z.number(), - last_dispatch_attempt_at: z.string().nullable(), - }) - .array() - .parse([ - ...query( - sql, - /* sql */ ` + for (const ref of idleRefineries) { + if (!ref.current_hook_bead_id) continue; + + // Read the bead's dispatch_attempts for the per-bead circuit breaker + const mrRows = z + .object({ + status: z.string(), + type: z.string(), + rig_id: z.string().nullable(), + dispatch_attempts: z.number(), + last_dispatch_attempt_at: z.string().nullable(), + }) + .array() + .parse([ + ...query( + sql, + /* sql */ ` SELECT ${beads.status}, ${beads.type}, ${beads.rig_id}, ${beads.dispatch_attempts}, ${beads.last_dispatch_attempt_at} FROM ${beads} WHERE ${beads.bead_id} = ? `, - [ref.current_hook_bead_id] - ), - ]); + [ref.current_hook_bead_id] + ), + ]); - if (mrRows.length === 0) continue; - const mr = mrRows[0]; - if (mr.type !== 'merge_request' || mr.status !== 'in_progress') continue; + if (mrRows.length === 0) continue; + const mr = mrRows[0]; + if (mr.type !== 'merge_request' || mr.status !== 'in_progress') continue; - if (draining) { - console.log( - `${LOG} Town is draining, skipping dispatch for bead ${ref.current_hook_bead_id}` - ); - continue; - } + if (draining) { + console.log( + `${LOG} Town is draining, skipping dispatch for bead ${ref.current_hook_bead_id}` + ); + continue; + } - // Per-bead dispatch cap — check before cooldown so max-attempt MR - // beads are failed immediately rather than waiting for the cooldown. - if (mr.dispatch_attempts >= MAX_DISPATCH_ATTEMPTS) { - actions.push({ - type: 'transition_bead', - bead_id: ref.current_hook_bead_id, - from: null, - to: 'failed', - reason: `refinery max dispatch attempts exceeded (${mr.dispatch_attempts})`, - actor: 'system', - }); + // Per-bead dispatch cap — check before cooldown so max-attempt MR + // beads are failed immediately rather than waiting for the cooldown. + if (mr.dispatch_attempts >= MAX_DISPATCH_ATTEMPTS) { + actions.push({ + type: 'transition_bead', + bead_id: ref.current_hook_bead_id, + from: null, + to: 'failed', + reason: `refinery max dispatch attempts exceeded (${mr.dispatch_attempts})`, + actor: 'system', + }); + actions.push({ + type: 'unhook_agent', + agent_id: ref.bead_id, + reason: 'max dispatch attempts', + }); + continue; + } + + // Exponential backoff using bead's last_dispatch_attempt_at + const cooldownMs = getDispatchCooldownMs(mr.dispatch_attempts); + if (!staleMs(mr.last_dispatch_attempt_at, cooldownMs)) continue; + + // Town-level circuit breaker suppresses dispatch + if (circuitBreakerOpen) continue; + + // Container status is checked at apply time (async). In shadow mode, + // we just note that a dispatch is needed. actions.push({ - type: 'unhook_agent', + type: 'dispatch_agent', agent_id: ref.bead_id, - reason: 'max dispatch attempts', + bead_id: ref.current_hook_bead_id, + rig_id: mr.rig_id ?? ref.rig_id ?? '', }); - continue; } + } // end refineryCodeReview gate (Rules 5–6) - // Exponential backoff using bead's last_dispatch_attempt_at - const cooldownMs = getDispatchCooldownMs(mr.dispatch_attempts); - if (!staleMs(mr.last_dispatch_attempt_at, cooldownMs)) continue; + // Rule 7: Working refinery hooked to a terminal MR bead — stop it. + // This catches the race where auto-merge closes the MR bead while the + // refinery is still running in the container. Without this, the refinery + // can post review comments on an already-merged PR. + const workingRefineries = AgentRow.array().parse([ + ...query( + sql, + /* sql */ ` + SELECT ${agent_metadata.bead_id}, ${agent_metadata.role}, + ${agent_metadata.status}, ${agent_metadata.current_hook_bead_id}, + ${agent_metadata.dispatch_attempts}, + ${agent_metadata.last_activity_at}, + b.${beads.columns.rig_id} + FROM ${agent_metadata} + LEFT JOIN ${beads} b ON b.${beads.columns.bead_id} = ${agent_metadata.bead_id} + WHERE ${agent_metadata.columns.role} = 'refinery' + AND ${agent_metadata.status} IN ('working', 'stalled') + AND ${agent_metadata.current_hook_bead_id} IS NOT NULL + `, + [] + ), + ]); - // Town-level circuit breaker suppresses dispatch - if (circuitBreakerOpen) continue; + for (const ref of workingRefineries) { + if (!ref.current_hook_bead_id) continue; + const mr = beadOps.getBead(sql, ref.current_hook_bead_id); + if (!mr || (mr.status !== 'closed' && mr.status !== 'failed')) continue; - // Container status is checked at apply time (async). In shadow mode, - // we just note that a dispatch is needed. actions.push({ - type: 'dispatch_agent', + type: 'stop_agent', agent_id: ref.bead_id, - bead_id: ref.current_hook_bead_id, - rig_id: mr.rig_id ?? ref.rig_id ?? '', + reason: `MR bead ${ref.current_hook_bead_id} is ${mr.status}`, + }); + actions.push({ + type: 'unhook_agent', + agent_id: ref.bead_id, + reason: `MR bead ${mr.status} — cleanup`, }); } @@ -1709,6 +1900,104 @@ function hasRecentNudge(sql: SqlStorage, agentId: string, tier: string): boolean return rows.length > 0; } +/** Check if an MR bead has a non-terminal feedback bead (gt:pr-feedback) blocking it. */ +function hasExistingPrFeedbackBead(sql: SqlStorage, mrBeadId: string): boolean { + const rows = [ + ...query( + sql, + /* sql */ ` + SELECT 1 FROM ${bead_dependencies} bd + INNER JOIN ${beads} fb ON fb.${beads.columns.bead_id} = bd.${bead_dependencies.columns.depends_on_bead_id} + WHERE bd.${bead_dependencies.columns.bead_id} = ? + AND bd.${bead_dependencies.columns.dependency_type} = 'blocks' + AND fb.${beads.columns.labels} LIKE '%gt:pr-feedback%' + AND fb.${beads.columns.status} NOT IN ('closed', 'failed') + LIMIT 1 + `, + [mrBeadId] + ), + ]; + return rows.length > 0; +} + +/** Build a human-readable title for the feedback bead. */ +function buildFeedbackBeadTitle( + prNumber: number, + repo: string, + hasComments: boolean, + hasFailingChecks: boolean, + hasUncheckedRuns = false +): string { + const parts: string[] = []; + if (hasComments) parts.push('review comments'); + if (hasFailingChecks) parts.push('failing CI'); + if (hasUncheckedRuns && !hasFailingChecks) parts.push('unchecked CI runs'); + const shortRepo = repo.includes('/') ? repo.split('/').pop() : repo; + return `Address ${parts.join(' & ')} on PR #${prNumber}${shortRepo ? ` (${shortRepo})` : ''}`; +} + +/** Build the polecat prompt body for addressing PR feedback. */ +function buildFeedbackPrompt( + prNumber: number, + repo: string, + branch: string, + hasComments: boolean, + hasFailingChecks: boolean, + hasUncheckedRuns = false +): string { + const lines: string[] = []; + lines.push(`You are addressing feedback on PR #${prNumber} on ${repo}, branch ${branch}.`); + lines.push(''); + + if (hasComments && hasFailingChecks) { + lines.push('This PR has both unresolved review comments and failing CI checks.'); + lines.push( + 'Address the review comments first, then fix the CI failures, as comment fixes may also resolve some CI issues.' + ); + } else if (hasComments && hasUncheckedRuns) { + lines.push( + 'This PR has unresolved review comments and more than 100 CI check-runs (not all could be inspected). Address the review comments and verify CI status.' + ); + } else if (hasComments) { + lines.push('This PR has unresolved review comments.'); + } else if (hasFailingChecks) { + lines.push('This PR has failing CI checks.'); + } else if (hasUncheckedRuns) { + lines.push( + 'This PR has more than 100 CI check-runs. Not all could be inspected by the system. Check `gh pr checks` for the full status and fix any failures.' + ); + } + + lines.push(''); + lines.push('## Review Comments'); + lines.push(''); + lines.push(`Run \`gh pr view ${prNumber} --comments\` to see all review comments.`); + lines.push(''); + lines.push('For each unresolved comment thread:'); + lines.push( + "- If it's a relevant code fix: make the change, push, reply explaining what you did, and resolve the thread" + ); + lines.push("- If it's not relevant: reply explaining why, and resolve the thread"); + lines.push(''); + lines.push("It's important to resolve the full thread rather than just the base comment."); + lines.push(''); + lines.push('## CI Checks'); + lines.push(''); + lines.push(`Run \`gh pr checks ${prNumber}\` to see the status of all CI checks.`); + lines.push(''); + lines.push('For each failing check:'); + lines.push('- Read the failure logs via `gh run view --log-failed`'); + lines.push('- Fix the underlying issue (test failure, lint error, type error, etc.)'); + lines.push('- Push the fix'); + lines.push('- Verify the check passes by reviewing the new run'); + lines.push(''); + lines.push( + 'After addressing everything, push all changes in a single commit (or minimal commits) and call gt_done.' + ); + + return lines.join('\n'); +} + // ════════════════════════════════════════════════════════════════════ // Invariant checker — runs after action application to detect // violations of the system invariants from spec §6. @@ -1728,6 +2017,7 @@ export function checkInvariants(sql: SqlStorage): Violation[] { const violations: Violation[] = []; // Invariant 7: Working agents must have hooks + // Mayors are always 'working' and intentionally have no hook — exclude them. const unhookedWorkers = z .object({ bead_id: z.string() }) .array() @@ -1739,6 +2029,7 @@ export function checkInvariants(sql: SqlStorage): Violation[] { FROM ${agent_metadata} WHERE ${agent_metadata.status} = 'working' AND ${agent_metadata.current_hook_bead_id} IS NULL + AND ${agent_metadata.role} != 'mayor' `, [] ), @@ -1750,26 +2041,27 @@ export function checkInvariants(sql: SqlStorage): Violation[] { }); } - // Invariant 5: Convoy beads should not be in_progress - const inProgressConvoys = z - .object({ bead_id: z.string() }) + // Invariant 5: Convoy beads should not be in unexpected states. + // Valid transient states: open, in_progress, in_review, closed. + const badStateConvoys = z + .object({ bead_id: z.string(), status: z.string() }) .array() .parse([ ...query( sql, /* sql */ ` - SELECT ${beads.bead_id} + SELECT ${beads.bead_id}, ${beads.status} FROM ${beads} WHERE ${beads.type} = 'convoy' - AND ${beads.status} = 'in_progress' + AND ${beads.status} NOT IN ('open', 'in_progress', 'in_review', 'closed') `, [] ), ]); - for (const c of inProgressConvoys) { + for (const c of badStateConvoys) { violations.push({ invariant: 5, - message: `Convoy bead ${c.bead_id} is in_progress (should only be open or closed)`, + message: `Convoy bead ${c.bead_id} is in unexpected state '${c.status}'`, }); } diff --git a/cloudflare-gastown/src/dos/town/review-queue.ts b/cloudflare-gastown/src/dos/town/review-queue.ts index 44c7e26bf..4cfbca903 100644 --- a/cloudflare-gastown/src/dos/town/review-queue.ts +++ b/cloudflare-gastown/src/dos/town/review-queue.ts @@ -71,7 +71,9 @@ const REVIEW_JOIN = /* sql */ ` SELECT ${beads}.*, ${review_metadata.branch}, ${review_metadata.target_branch}, ${review_metadata.merge_commit}, ${review_metadata.pr_url}, - ${review_metadata.retry_count} + ${review_metadata.retry_count}, + ${review_metadata.auto_merge_ready_since}, + ${review_metadata.last_feedback_check_at} FROM ${beads} INNER JOIN ${review_metadata} ON ${beads.bead_id} = ${review_metadata.bead_id} `; @@ -577,6 +579,17 @@ export function agentDone(sql: SqlStorage, agentId: string, input: AgentDoneInpu return; } + // PR-fixup beads skip the review queue. The polecat pushed fixup commits + // to an existing PR branch — no separate review is needed. + if (hookedBead?.labels.includes('gt:pr-fixup')) { + console.log( + `[review-queue] agentDone: pr-fixup bead ${agent.current_hook_bead_id} — closing directly (skip review)` + ); + closeBead(sql, agent.current_hook_bead_id, agentId); + unhookBead(sql, agentId); + return; + } + if (agent.role === 'refinery') { // The refinery handles merging (direct strategy) or PR creation (pr strategy) // itself. When it calls gt_done: diff --git a/cloudflare-gastown/src/gastown.worker.ts b/cloudflare-gastown/src/gastown.worker.ts index 13e88dc2a..6357a7278 100644 --- a/cloudflare-gastown/src/gastown.worker.ts +++ b/cloudflare-gastown/src/gastown.worker.ts @@ -294,6 +294,16 @@ app.post('/debug/towns/:townId/send-message', async c => { return c.json(result); }); +app.get('/debug/towns/:townId/beads/:beadId', async c => { + if (c.env.ENVIRONMENT !== 'development') return c.json({ error: 'dev only' }, 403); + const townId = c.req.param('townId'); + const beadId = c.req.param('beadId'); + const town = getTownDOStub(c.env, townId); + // eslint-disable-next-line @typescript-eslint/await-thenable + const result = await town.debugGetBead(beadId); + return c.json(result); +}); + app.post('/debug/towns/:townId/graceful-stop', async c => { if (c.env.ENVIRONMENT !== 'development') return c.json({ error: 'dev only' }, 403); const townId = c.req.param('townId'); diff --git a/cloudflare-gastown/src/handlers/mayor-tools.handler.ts b/cloudflare-gastown/src/handlers/mayor-tools.handler.ts index 6f778d45d..fcfe207ce 100644 --- a/cloudflare-gastown/src/handlers/mayor-tools.handler.ts +++ b/cloudflare-gastown/src/handlers/mayor-tools.handler.ts @@ -24,6 +24,7 @@ const MayorSlingBody = z.object({ title: z.string().min(1), body: z.string().optional(), metadata: z.record(z.string(), z.unknown()).optional(), + labels: z.array(z.string()).optional(), }); const MayorSlingBatchBody = z diff --git a/cloudflare-gastown/src/prompts/mayor-system.prompt.ts b/cloudflare-gastown/src/prompts/mayor-system.prompt.ts index 10ce483c4..78c3b6d34 100644 --- a/cloudflare-gastown/src/prompts/mayor-system.prompt.ts +++ b/cloudflare-gastown/src/prompts/mayor-system.prompt.ts @@ -284,6 +284,26 @@ For large convoys (>5 beads) where the decomposition is non-obvious, consider using staged=true by default to give the user a chance to review before agents start spending compute. +## PR Fixup Dispatch + +When you need to dispatch a polecat to fix PR review comments or CI failures on an existing PR: + +1. Use \`gt_sling\` with the \`labels\` parameter set to \`["gt:pr-fixup"]\` +2. Include the PR URL, branch name, and target branch in the bead metadata: + \`\`\` + metadata: { + pr_url: "https://github.com/org/repo/pull/123", + branch: "gt/toast/abc123", + target_branch: "main" + } + \`\`\` +3. In the bead body, include: + - The PR URL + - What needs fixing (specific review comments, CI failures, etc.) + - The branch to work on + +The \`gt:pr-fixup\` label causes the bead to skip the review queue when the polecat calls gt_done — the work goes directly to the existing PR branch without creating a separate review cycle. + ## Bug Reporting If a user reports a bug or you encounter a repeating error, you can file a bug report diff --git a/cloudflare-gastown/src/prompts/polecat-system.prompt.test.ts b/cloudflare-gastown/src/prompts/polecat-system.prompt.test.ts index c62a4cd9c..2002b9fa9 100644 --- a/cloudflare-gastown/src/prompts/polecat-system.prompt.test.ts +++ b/cloudflare-gastown/src/prompts/polecat-system.prompt.test.ts @@ -68,4 +68,40 @@ describe('buildPolecatSystemPrompt', () => { const prompt = buildPolecatSystemPrompt({ ...params, gates: [] }); expect(prompt).not.toContain('## Pre-Submission Gates'); }); + + it('should forbid PR creation when mergeStrategy is not set', () => { + const prompt = buildPolecatSystemPrompt(params); + expect(prompt).toContain('Do NOT create pull requests'); + expect(prompt).toContain('Do NOT pass a `pr_url` to `gt_done`'); + expect(prompt).not.toContain('## Pull Request Creation'); + }); + + it('should forbid PR creation when mergeStrategy is direct', () => { + const prompt = buildPolecatSystemPrompt({ ...params, mergeStrategy: 'direct' }); + expect(prompt).toContain('Do NOT create pull requests'); + expect(prompt).not.toContain('## Pull Request Creation'); + }); + + it('should include PR creation instructions when mergeStrategy is pr', () => { + const prompt = buildPolecatSystemPrompt({ + ...params, + mergeStrategy: 'pr', + targetBranch: 'main', + }); + expect(prompt).toContain('## Pull Request Creation'); + expect(prompt).toContain('gh pr create'); + expect(prompt).toContain('--base main'); + expect(prompt).toContain('pr_url'); + expect(prompt).not.toContain('Do NOT create pull requests'); + expect(prompt).not.toContain('Do NOT pass a `pr_url` to `gt_done`'); + }); + + it('should use the provided targetBranch in PR creation instructions', () => { + const prompt = buildPolecatSystemPrompt({ + ...params, + mergeStrategy: 'pr', + targetBranch: 'develop', + }); + expect(prompt).toContain('--base develop'); + }); }); diff --git a/cloudflare-gastown/src/prompts/polecat-system.prompt.ts b/cloudflare-gastown/src/prompts/polecat-system.prompt.ts index ea57e1077..8815dea0e 100644 --- a/cloudflare-gastown/src/prompts/polecat-system.prompt.ts +++ b/cloudflare-gastown/src/prompts/polecat-system.prompt.ts @@ -3,6 +3,9 @@ * * The prompt establishes identity, available tools, the GUPP principle, * the done flow, escalation protocol, and commit hygiene. + * + * When `mergeStrategy` is `'pr'`, the polecat creates a PR before calling + * gt_done — the refinery then reviews the PR and adds comments. */ export function buildPolecatSystemPrompt(params: { agentName: string; @@ -10,6 +13,10 @@ export function buildPolecatSystemPrompt(params: { townId: string; identity: string; gates: string[]; + /** When set to 'pr', the polecat creates the PR itself and passes pr_url to gt_done. */ + mergeStrategy?: 'direct' | 'pr'; + /** Target branch for the PR (e.g. 'main'). Only used when mergeStrategy is 'pr'. */ + targetBranch?: string; }): string { const gatesSection = params.gates.length > 0 @@ -43,7 +50,7 @@ You have these tools available. Use them to coordinate with the Gastown orchestr - **gt_prime** — Call at the start of your session to get full context: your agent record, hooked bead, undelivered mail, and open beads. Your context is injected automatically on first message, but call this if you need to refresh. - **gt_bead_status** — Inspect the current state of any bead by ID. - **gt_bead_close** — Close a bead when its work is fully complete and merged. -- **gt_done** — Signal that you are done with your current hooked bead. This pushes your branch, submits it to the review queue, transitions the bead to \`in_review\`, and unhooks you. Always push your branch before calling gt_done. +- **gt_done** — Signal that you are done with your current hooked bead. Always push your branch before calling gt_done.${params.mergeStrategy === 'pr' ? ' Pass the PR URL you created as the `pr_url` parameter.' : ''} - **gt_mail_send** — Send a message to another agent in the rig. Use this for coordination, questions, or status sharing. - **gt_mail_check** — Check for new mail from other agents. Call this periodically or when you suspect coordination messages. - **gt_escalate** — Escalate a problem you cannot solve. Creates an escalation bead. Use this when you are stuck, blocked, or need human intervention. @@ -56,14 +63,45 @@ You have these tools available. Use them to coordinate with the Gastown orchestr 2. **Work**: Implement the bead's requirements. Write code, tests, and documentation as needed. 3. **Commit frequently**: Make small, focused commits. Push often. The container's disk is ephemeral — if it restarts, unpushed work is lost. 4. **Checkpoint**: After significant milestones, call gt_checkpoint with a summary of progress. -5. **Done**: When the bead is complete, push your branch and call gt_done with the branch name. The bead transitions to \`in_review\` and the refinery picks it up for merge. If the review fails (rework), you will be re-dispatched with the bead back in \`in_progress\`. -${gatesSection} +5. **Done**: When the bead is complete, push your branch${params.mergeStrategy === 'pr' ? ', create a pull request, and call gt_done with the branch name and the PR URL' : ' and call gt_done with the branch name'}. The bead transitions to \`in_review\` and the refinery reviews it. If the review fails (rework), you will be re-dispatched with the bead back in \`in_progress\`. +${gatesSection}${ + params.mergeStrategy === 'pr' + ? ` +## Pull Request Creation + +After all gates pass and your work is complete, create a pull request before calling gt_done: + +1. Push your branch: \`git push origin \` +2. Create a pull request: + - **GitHub:** \`gh pr create --base ${params.targetBranch ?? 'main'} --head --title "" --body ""\` + - **GitLab:** \`glab mr create --source-branch --target-branch ${params.targetBranch ?? 'main'} --title "" --description ""\` +3. Capture the PR/MR URL from the command output. +4. Call \`gt_done\` with branch="" and pr_url="". + - The pr_url MUST be the URL of the created pull request (e.g. \`https://github.com/owner/repo/pull/123\`). + - Do NOT use the URL that \`git push\` prints — that is a "create new PR" link, not an existing PR. +` + : '' + } +## PR Fixup Workflow + +When your hooked bead has the \`gt:pr-fixup\` label, you are fixing an existing PR rather than creating new work. **This is the ONE exception to the "do not switch branches" rule.** You MUST check out the PR branch from your bead metadata instead of using the default worktree branch. + +1. Check out the PR branch specified in your bead metadata (e.g. \`git fetch origin && git checkout \`). This overrides the default worktree branch for this bead. +2. Look at ALL comments on the PR using \`gh pr view --comments\` and the GitHub API. +3. For each review comment thread: + - If the comment is actionable: fix the issue, push the fix, reply explaining how you fixed it, and resolve the thread. + - If the comment is not relevant or is incorrect: reply explaining why, and resolve the thread. +4. **Important**: Resolve the entire thread, not just the individual comment. Use \`gh api\` to resolve review threads. +5. After addressing all comments, push your changes and call gt_done. + +Do NOT create a new PR. Push to the existing branch. + ## Commit & Push Hygiene - Commit after every meaningful unit of work (new function, passing test, config change). - Push after every commit. Do not batch pushes. - Use descriptive commit messages referencing the bead if applicable. -- Branch naming: your branch is pre-configured in your worktree. Do not switch branches. +- Branch naming: your branch is pre-configured in your worktree. Do not switch branches — **unless** your bead has the \`gt:pr-fixup\` label (see PR Fixup Workflow above). ## Escalation @@ -84,11 +122,18 @@ Periodically call gt_status with a brief, plain-language description of what you Call gt_status when you START a new meaningful phase of work: beginning a new file, running tests, installing packages, pushing a branch. Do NOT call it on every tool use. ## Important - +${ + params.mergeStrategy === 'pr' + ? ` +- Create a pull request after your work is complete and all gates pass. See the "Pull Request Creation" section above. +- Do NOT merge your branch into the default branch yourself. +- Do NOT use \`git merge\` to merge into the target branch. Only use \`gh pr create\` or \`glab mr create\`.` + : ` - Do NOT create pull requests or merge requests. Your job is to write code on your branch. The Refinery handles merging and PR creation. - Do NOT merge your branch into the default branch yourself. - Do NOT use \`gh pr create\`, \`git merge\`, or any equivalent. Just push your branch and call gt_done. -- Do NOT pass a \`pr_url\` to \`gt_done\`. The URL that \`git push\` prints (e.g. \`https://github.com/.../pull/new/...\`) is NOT a pull request — it is a convenience link for humans. Ignore it. +- Do NOT pass a \`pr_url\` to \`gt_done\`. The URL that \`git push\` prints (e.g. \`https://github.com/.../pull/new/...\`) is NOT a pull request — it is a convenience link for humans. Ignore it.` +} - Do NOT modify files outside your worktree. - Do NOT run destructive git operations (force push, hard reset to remote). - Do NOT install global packages or modify the container environment. diff --git a/cloudflare-gastown/src/prompts/refinery-system.prompt.ts b/cloudflare-gastown/src/prompts/refinery-system.prompt.ts index 2f54dde46..b72007dd0 100644 --- a/cloudflare-gastown/src/prompts/refinery-system.prompt.ts +++ b/cloudflare-gastown/src/prompts/refinery-system.prompt.ts @@ -15,6 +15,8 @@ export function buildRefinerySystemPrompt(params: { targetBranch: string; polecatAgentId: string; mergeStrategy: MergeStrategy; + /** When set, the polecat already created a PR — the refinery reviews it via GitHub. */ + existingPrUrl?: string; /** Present when this review is for a bead inside a convoy. */ convoyContext?: { mergeMode: 'review-then-land' | 'review-and-merge'; @@ -27,13 +29,19 @@ export function buildRefinerySystemPrompt(params: { ? params.gates.map((g, i) => `${i + 1}. \`${g}\``).join('\n') : '(No quality gates configured — skip to code review)'; + const convoySection = params.convoyContext ? buildConvoySection(params.convoyContext) : ''; + + // When the polecat already created a PR, the refinery reviews it via + // GitHub and adds review comments instead of creating a new PR. + if (params.existingPrUrl) { + return buildPRReviewPrompt({ ...params, prUrl: params.existingPrUrl, gateList, convoySection }); + } + const mergeInstructions = params.mergeStrategy === 'direct' ? buildDirectMergeInstructions(params) : buildPRMergeInstructions(params); - const convoySection = params.convoyContext ? buildConvoySection(params.convoyContext) : ''; - return `You are the Refinery agent for rig "${params.rigId}" (town "${params.townId}"). Your identity: ${params.identity} @@ -94,6 +102,98 @@ ${mergeInstructions} `; } +/** + * Build the refinery prompt for reviewing an existing PR. + * + * In this mode, the polecat already created the PR. The refinery reviews + * the changes and adds GitHub review comments (approve or request changes). + * The auto-resolve system detects unresolved comments and dispatches polecats + * to address them, creating a unified feedback loop for both AI and human reviews. + */ +function buildPRReviewPrompt(params: { + identity: string; + rigId: string; + townId: string; + gates: string[]; + branch: string; + targetBranch: string; + polecatAgentId: string; + prUrl: string; + gateList: string; + convoySection: string; +}): string { + return `You are the Refinery agent for rig "${params.rigId}" (town "${params.townId}"). +Your identity: ${params.identity} + +## Your Role +You review pull requests created by polecat agents. You add review comments on the PR via the GitHub/GitLab CLI. You do NOT create PRs — the polecat already created one. + +## Current Review +- **Pull Request:** ${params.prUrl} +- **Branch:** \`${params.branch}\` +- **Target branch:** \`${params.targetBranch}\` +- **Polecat agent ID:** ${params.polecatAgentId} +${params.convoySection} + +## Review Process + +### Step 1: Run Quality Gates +Run these commands in order. If any fail, note the failures for your review. + +${params.gateList} + +### Step 2: Code Review +Review the diff on the PR: +1. Run \`gh pr diff ${params.prUrl}\` or \`git diff ${params.targetBranch}...HEAD\` to see all changes +2. Check for: + - Correctness — does the code do what the bead title/description asked? + - Style — consistent with the existing codebase? + - Test coverage — are new features tested? + - Security — no secrets, no injection vulnerabilities, no unsafe patterns? + - Build artifacts — no compiled files, node_modules, or other generated content? + +### Step 3: Submit Your Review + +**If everything passes (gates + code review):** +1. Leave a comment on the PR noting that the review passed: + \`gh pr comment ${params.prUrl} --body "Refinery code review passed. All quality gates pass."\` + Do NOT use \`gh pr review --approve\` — the bot account cannot approve its own PRs. +2. Call \`gt_done\` with branch="${params.branch}" and pr_url="${params.prUrl}". + +**If quality gates fail or code review finds issues:** +1. Submit a review requesting changes with **specific, actionable inline comments** using the GitHub API: + \`\`\` + gh api repos/{owner}/{repo}/pulls/{number}/reviews --method POST --input - <<'EOF' + { + "event": "REQUEST_CHANGES", + "body": "", + "comments": [ + {"path": "src/file.ts", "position": 10, "body": ""} + ] + } + EOF + \`\`\` + Each entry in \`comments\` creates an inline review thread at the specified file and diff position. Prefer inline comments over general body text — they create review threads the system can track and auto-resolve. +2. Call \`gt_done\` with branch="${params.branch}" and pr_url="${params.prUrl}". + The system will detect your unresolved review comments and automatically dispatch a polecat to address them. + +## Available Gastown Tools +- \`gt_prime\` — Get your role context and current assignment +- \`gt_done\` — Signal your review is complete (pass pr_url="${params.prUrl}") +- \`gt_escalate\` — Record issues for visibility +- \`gt_checkpoint\` — Save progress for crash recovery + +## Important +- ALWAYS call \`gt_done\` with pr_url="${params.prUrl}" when you finish — whether you approved or requested changes. +- Before any git operation, run \`git status\` first to understand the working tree state. +- Be specific in review comments. "Fix the tests" is not actionable. "Test \`calculateTotal\` in \`tests/cart.test.ts\` fails because the discount logic in \`src/cart.ts:47\` doesn't handle the zero-quantity case" is actionable. +- Prefer inline PR comments (with file path and line number) over general review body comments. Inline comments create review threads that the system can track and auto-resolve. +- Do NOT modify the code yourself. Your job is to review and comment — not to fix code. +- Do NOT merge the PR. The auto-merge system handles merging after all review comments are resolved. +- If you cannot determine whether the code is correct, escalate with severity "medium" instead of guessing. +`; +} + function buildConvoySection(ctx: { mergeMode: 'review-then-land' | 'review-and-merge'; isIntermediateStep: boolean; diff --git a/cloudflare-gastown/src/types.ts b/cloudflare-gastown/src/types.ts index 245097035..0c11e4a04 100644 --- a/cloudflare-gastown/src/types.ts +++ b/cloudflare-gastown/src/types.ts @@ -171,6 +171,12 @@ export type PrimeContext = { original_bead_title: string | null; mr_bead_id: string | null; } | null; + /** Present when the hooked bead is a PR fixup (gt:pr-fixup label). */ + pr_fixup_context: { + pr_url: string | null; + branch: string | null; + target_branch: string | null; + } | null; }; // -- Agent done -- @@ -258,6 +264,16 @@ export const TownConfigSchema = z.object({ gates: z.array(z.string()).default([]), auto_merge: z.boolean().default(true), require_clean_merge: z.boolean().default(true), + /** When enabled, the refinery agent reviews PRs and adds GitHub review + * comments. Disable if you use an external code-review bot. */ + code_review: z.boolean().default(true), + /** When enabled, a polecat is automatically dispatched to address + * unresolved review comments and failing CI checks on open PRs. */ + auto_resolve_pr_feedback: z.boolean().default(false), + /** After all CI checks pass and all review threads are resolved, + * automatically merge the PR after this many minutes. + * 0 = immediate, null = disabled (require manual merge). */ + auto_merge_delay_minutes: z.number().int().min(0).nullable().default(null), }) .optional(), @@ -333,6 +349,9 @@ export const TownConfigUpdateSchema = z.object({ gates: z.array(z.string()).optional(), auto_merge: z.boolean().optional(), require_clean_merge: z.boolean().optional(), + code_review: z.boolean().optional(), + auto_resolve_pr_feedback: z.boolean().optional(), + auto_merge_delay_minutes: z.number().int().min(0).nullable().optional(), }) .optional(), alarm_interval_active: z.number().int().min(5).max(600).optional(), diff --git a/src/app/(app)/gastown/[townId]/settings/TownSettingsPageClient.tsx b/src/app/(app)/gastown/[townId]/settings/TownSettingsPageClient.tsx index 07bd79ae6..8df483dcc 100644 --- a/src/app/(app)/gastown/[townId]/settings/TownSettingsPageClient.tsx +++ b/src/app/(app)/gastown/[townId]/settings/TownSettingsPageClient.tsx @@ -267,6 +267,9 @@ export function TownSettingsPageClient({ townId, readOnly = false, organizationI const [maxPolecats, setMaxPolecats] = useState(undefined); const [refineryGates, setRefineryGates] = useState([]); const [autoMerge, setAutoMerge] = useState(true); + const [refineryCodeReview, setRefineryCodeReview] = useState(true); + const [autoResolvePrFeedback, setAutoResolvePrFeedback] = useState(false); + const [autoMergeDelayMinutes, setAutoMergeDelayMinutes] = useState(null); const [mergeStrategy, setMergeStrategy] = useState<'direct' | 'pr'>('direct'); const [stagedConvoysDefault, setStagedConvoysDefault] = useState(false); const [githubCliPat, setGithubCliPat] = useState(''); @@ -292,6 +295,9 @@ export function TownSettingsPageClient({ townId, readOnly = false, organizationI setMaxPolecats(cfg.max_polecats_per_rig); setRefineryGates(cfg.refinery?.gates ?? []); setAutoMerge(cfg.refinery?.auto_merge ?? true); + setRefineryCodeReview(cfg.refinery?.code_review ?? true); + setAutoResolvePrFeedback(cfg.refinery?.auto_resolve_pr_feedback ?? false); + setAutoMergeDelayMinutes(cfg.refinery?.auto_merge_delay_minutes ?? null); setMergeStrategy(cfg.merge_strategy === 'pr' ? 'pr' : 'direct'); setStagedConvoysDefault(cfg.staged_convoys_default ?? false); setGithubCliPat(cfg.github_cli_pat ?? ''); @@ -345,7 +351,10 @@ export function TownSettingsPageClient({ townId, readOnly = false, organizationI refinery: { gates: refineryGates.filter(g => g.trim()), auto_merge: autoMerge, + code_review: refineryCodeReview, require_clean_merge: true, + auto_resolve_pr_feedback: autoResolvePrFeedback, + auto_merge_delay_minutes: autoMergeDelayMinutes, }, }, }); @@ -539,18 +548,18 @@ export function TownSettingsPageClient({ townId, readOnly = false, organizationI
- -
+

When enabled, the AI agent's Co-authored-by trailer is omitted from commits. Only takes effect when Author Name is set.

+
@@ -718,11 +727,7 @@ export function TownSettingsPageClient({ townId, readOnly = false, organizationI index={5} >
- -
+

When enabled, new convoys are created in staged mode — agents are not @@ -730,6 +735,10 @@ export function TownSettingsPageClient({ townId, readOnly = false, organizationI chance to review and adjust the plan before execution begins.

+
@@ -803,14 +812,87 @@ export function TownSettingsPageClient({ townId, readOnly = false, organizationI )}
- -
+

Automatically merge when all gates pass.

+ +
+ +
+
+ +

+ The refinery agent reviews PRs and adds GitHub review comments. Disable if you + already use an external code-review bot. +

+
+
+ +
+
+ +

+ When enabled, a polecat is automatically dispatched to address unresolved + review comments and failing CI checks on open PRs. +

+
+ +
+ + {autoResolvePrFeedback && ( +
+ +

+ After all CI checks pass and all review threads are resolved, automatically + merge the PR after this delay. Leave empty to require manual merge. +

+
+
+ {[ + { label: 'Off', value: null }, + { label: '0', value: 0 }, + { label: '15m', value: 15 }, + { label: '1h', value: 60 }, + { label: '4h', value: 240 }, + ].map(preset => ( + + ))} +
+ {autoMergeDelayMinutes !== null && ( +
+ { + const v = parseInt(e.target.value, 10); + setAutoMergeDelayMinutes(Number.isNaN(v) ? null : Math.max(0, v)); + }} + className="w-20 border-white/[0.08] bg-white/[0.03] text-center font-mono text-xs text-white/85" + /> + minutes +
+ )} +
+
+ )} {/* ── Container ──────────────────────────────────────── */} diff --git a/src/lib/gastown/types/router.d.ts b/src/lib/gastown/types/router.d.ts index ce9ece484..9a30187ce 100644 --- a/src/lib/gastown/types/router.d.ts +++ b/src/lib/gastown/types/router.d.ts @@ -44,6 +44,16 @@ export declare const gastownRouter: import('@trpc/server').TRPCBuiltRouter< }; meta: object; }>; + getDrainStatus: import('@trpc/server').TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: { + draining: boolean; + drainStartedAt: string | null; + }; + meta: object; + }>; /** * Check whether the current user is an admin viewing a town they don't own. * Used by the frontend to show an admin banner. @@ -59,16 +69,6 @@ export declare const gastownRouter: import('@trpc/server').TRPCBuiltRouter< }; meta: object; }>; - getDrainStatus: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - }; - output: { - draining: boolean; - drainStartedAt: string | null; - }; - meta: object; - }>; deleteTown: import('@trpc/server').TRPCMutationProcedure<{ input: { townId: string; @@ -485,6 +485,9 @@ export declare const gastownRouter: import('@trpc/server').TRPCBuiltRouter< gates: string[]; auto_merge: boolean; require_clean_merge: boolean; + code_review: boolean; + auto_resolve_pr_feedback: boolean; + auto_merge_delay_minutes: number | null; } | undefined; alarm_interval_active?: number | undefined; @@ -537,6 +540,9 @@ export declare const gastownRouter: import('@trpc/server').TRPCBuiltRouter< gates?: string[] | undefined; auto_merge?: boolean | undefined; require_clean_merge?: boolean | undefined; + code_review?: boolean | undefined; + auto_resolve_pr_feedback?: boolean | undefined; + auto_merge_delay_minutes?: number | null | undefined; } | undefined; alarm_interval_active?: number | undefined; @@ -583,6 +589,9 @@ export declare const gastownRouter: import('@trpc/server').TRPCBuiltRouter< gates: string[]; auto_merge: boolean; require_clean_merge: boolean; + code_review: boolean; + auto_resolve_pr_feedback: boolean; + auto_merge_delay_minutes: number | null; } | undefined; alarm_interval_active?: number | undefined; @@ -1340,6 +1349,16 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute }; meta: object; }>; + getDrainStatus: import('@trpc/server').TRPCQueryProcedure<{ + input: { + townId: string; + }; + output: { + draining: boolean; + drainStartedAt: string | null; + }; + meta: object; + }>; /** * Check whether the current user is an admin viewing a town they don't own. * Used by the frontend to show an admin banner. @@ -1355,16 +1374,6 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute }; meta: object; }>; - getDrainStatus: import('@trpc/server').TRPCQueryProcedure<{ - input: { - townId: string; - }; - output: { - draining: boolean; - drainStartedAt: string | null; - }; - meta: object; - }>; deleteTown: import('@trpc/server').TRPCMutationProcedure<{ input: { townId: string; @@ -1781,6 +1790,9 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute gates: string[]; auto_merge: boolean; require_clean_merge: boolean; + code_review: boolean; + auto_resolve_pr_feedback: boolean; + auto_merge_delay_minutes: number | null; } | undefined; alarm_interval_active?: number | undefined; @@ -1833,6 +1845,9 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute gates?: string[] | undefined; auto_merge?: boolean | undefined; require_clean_merge?: boolean | undefined; + code_review?: boolean | undefined; + auto_resolve_pr_feedback?: boolean | undefined; + auto_merge_delay_minutes?: number | null | undefined; } | undefined; alarm_interval_active?: number | undefined; @@ -1879,6 +1894,9 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute gates: string[]; auto_merge: boolean; require_clean_merge: boolean; + code_review: boolean; + auto_resolve_pr_feedback: boolean; + auto_merge_delay_minutes: number | null; } | undefined; alarm_interval_active?: number | undefined; diff --git a/src/lib/gastown/types/schemas.d.ts b/src/lib/gastown/types/schemas.d.ts index 5ccdf157d..d56dbe856 100644 --- a/src/lib/gastown/types/schemas.d.ts +++ b/src/lib/gastown/types/schemas.d.ts @@ -1,4 +1,4 @@ -import { z } from 'zod'; +import type { z } from 'zod'; export declare const TownOutput: z.ZodObject< { id: z.ZodString; diff --git a/src/routers/admin/gastown-router.ts b/src/routers/admin/gastown-router.ts index 3e8284ad5..34a135119 100644 --- a/src/routers/admin/gastown-router.ts +++ b/src/routers/admin/gastown-router.ts @@ -141,6 +141,9 @@ const TownConfigRecord = z.object({ gates: z.array(z.string()), auto_merge: z.boolean(), require_clean_merge: z.boolean(), + code_review: z.boolean(), + auto_resolve_pr_feedback: z.boolean(), + auto_merge_delay_minutes: z.number().nullable(), }) .optional(), alarm_interval_active: z.number().optional(),