From 40f10bbcc7a3a3404309a8489eb7f9966421b906 Mon Sep 17 00:00:00 2001
From: "Fred K. Schott" <622227+FredKSchott@users.noreply.github.com>
Date: Sat, 14 Feb 2026 21:59:51 -0800
Subject: [PATCH 1/2] auto-triage bot improvements (#15513)

- **New:** Docker sandbox for issue triage: Run the LLM (OpenCode server) inside an isolated Docker container during triage workflows so untrusted reproduction code never has access to secrets. Adds a Dockerfile.sandbox, a GHCR build workflow, and updates the triage workflow to use --sandbox. Moves the compiler clone into .compiler/ (gitignored) so it's accessible inside the container's bind mount.
- **New:** Add a `verify` step to the triage pipeline that checks whether reported behavior is intentional before attempting a fix. Fixes issues where the bot just trusted the submitting user's expected behavior as truth vs. potentially confused/incorrect on expected behavior.
- **New:** Make diagnose and fix skills aware of the withastro/compiler repo (cloned as a sibling in CI). Fixes issues tracked back to the compiler, where the bot was trying to work around the issue in our astro codebase instead of pointing responsibility to the compiler.
- **New:** Add a feasibility check to the fix skill for browser/runtime compatibility. Hopefully fixes issues where the bot suggests code that wouldn't run on modern browsers.
- Fix: For some reason the reproduction instructions were gone (or never there?) so we hadn't been downloading repos/stackblitz, and probably spending quite a lot of time trying to figure out the bug without a reproduction. Kind of surprised by the success rate at reproductions, given this, but I guess everyone is including enough detail without it for the LLM to go off of.
- Chore: Ensure all skills explicitly read report.md before appending to it
- Chore: Simplify the diagnose skill's review step
- Chore: Refactor issue-triage.ts into composable helper functions.
- Chore: Ignore triage folder from eslint
- Chore: Tidy up AGENTS.md, simplified the project layout section
- Chore: Bump @flue/cli to 0.0.20 and @flue/client to 0.0.12
---
 .agents/skills/triage/SKILL.md            |  15 +-
 .agents/skills/triage/comment.md          |  34 ++-
 .agents/skills/triage/diagnose.md         |  10 +-
 .agents/skills/triage/fix.md              |  39 ++-
 .agents/skills/triage/reproduce.md        |  82 +++--
 .agents/skills/triage/verify.md           | 149 +++++++++
 .flue/Dockerfile.sandbox                  |  58 ++++
 .flue/workflows/issue-triage.ts           | 355 ++++++++++++++--------
 .github/workflows/build-sandbox-image.yml |  40 +++
 .github/workflows/issue-triage.yml        |  31 +-
 .gitignore                                |   1 +
 AGENTS.md                                 |  32 +-
 eslint.config.js                          |   1 +
 package.json                              |   4 +-
 pnpm-lock.yaml                            |  30 +-
 15 files changed, 652 insertions(+), 229 deletions(-)
 create mode 100644 .agents/skills/triage/verify.md
 create mode 100644 .flue/Dockerfile.sandbox
 create mode 100644 .github/workflows/build-sandbox-image.yml

diff --git a/.agents/skills/triage/SKILL.md b/.agents/skills/triage/SKILL.md
index d012ce6877d9..d1969af23c82 100644
--- a/.agents/skills/triage/SKILL.md
+++ b/.agents/skills/triage/SKILL.md
@@ -1,11 +1,11 @@
 ---
 name: triage
-description: Triage a bug report. Reproduces the bug, diagnoses the root cause, and attempts a fix. Use when asked to "triage issue #1234", "triage this bug", or similar.
+description: Triage a bug report. Reproduces the bug, diagnoses the root cause, verifies whether the behavior is intentional, and attempts a fix. Use when asked to "triage issue #1234", "triage this bug", or similar.
 ---
 
 # Triage
 
-Triage a bug report end-to-end: reproduce the bug, diagnose the root cause, and attempt a fix.
+Triage a bug report end-to-end: reproduce the bug, diagnose the root cause, verify whether the behavior is intentional, and attempt a fix.
 
 ## Input
 
@@ -35,7 +35,16 @@ After completing diagnosis, check your confidence:
 - If confidence is **low** — skip to Output.
 - If confidence is **medium** or **high** — continue to Step 3.
 
-## Step 3: Fix
+## Step 3: Verify
+
+Read and follow [verify.md](verify.md). Use a subagent for this step to isolate context.
+
+After completing verification, check the verdict:
+
+- If the verdict is **intended-behavior** — skip to Output. The issue is not a bug; do not attempt a fix.
+- If the verdict is **bug** or **unclear** — continue to Step 4.
+
+## Step 4: Fix
 
 Read and follow [fix.md](fix.md). Use a subagent for this step to isolate context.
 
diff --git a/.agents/skills/triage/comment.md b/.agents/skills/triage/comment.md
index ecbf304e7a20..ff4c2e326e22 100644
--- a/.agents/skills/triage/comment.md
+++ b/.agents/skills/triage/comment.md
@@ -2,7 +2,7 @@
 
 Generate a GitHub issue comment from triage findings.
 
-**CRITICAL: You MUST always produce a GitHub comment as your final output, regardless of what input files are available. Even if `report.md` is missing or empty, you must still produce a comment. In that case, produce a minimal comment stating that automated triage could not be completed.**
+**CRITICAL: You MUST always read `report.md` and produce a GitHub comment as your final output, regardless of what input files are available. Even if `report.md` is missing or empty, you must still produce a comment. In that case, produce a minimal comment stating that automated triage could not be completed.**
 
 ## Prerequisites
 
@@ -11,6 +11,7 @@ These variables are referenced throughout this skill. They may be passed as args
 - **`triageDir`** — Directory containing the reproduction project (e.g. `triage/issue-123`). If not passed as an arg, infer from previous conversation.
 - **`report.md`** — File in `triageDir` that MAY exist. Contains the full context from all previous skills (reproduction, diagnosis, fix).
 - **`branchName`** — The branch name where a fix was pushed. If not passed as an arg, infer from previous conversation.
+- **`priorityLabels`** — An array of `{ name, description }` objects representing the available priority labels for the repository. Used to select a priority in the comment.
 
 ## Overview
 
@@ -27,22 +28,28 @@ If `report.md` is missing or empty, generate a minimal comment (see "Fallback" s
 
 Generate and return a GitHub comment following the template below.
 
-### Special Cases
+### "Fix" Instructions
 
-- **If the user is on a different major version than the current monorepo, and the issue could not be reproduced in the current monorepo:** In the "Fix" section of your comment, the best guidance you can provide is to suggest that the user upgrade to that newer major version to fix their issue, even if that newer major version is a beta release. Link to the relevant upgrade guide:
-  - v6: https://v6.docs.astro.build/en/guides/upgrade-to/v6/
-  - v5: https://docs.astro.build/en/guides/upgrade-to/v5/
+The **Fix** line in the template has three possible forms. Choose the one that matches the triage outcome:
+
+1. **You created a fix:** Use `I was able to fix this issue.` and include the suggested fix link.
+2. **The issue is already fixed on main** (e.g. the user is on an older major version and the bug doesn't reproduce on current main): Use `This issue has already been fixed.` and tell the user how to get the fix (e.g. upgrade). Link the relevant upgrade guide if applicable: [v6](https://v6.docs.astro.build/en/guides/upgrade-to/v6/), [v5](https://docs.astro.build/en/guides/upgrade-to/v5/).
+3. **You could not find or create a fix:** Use `I was unable to find a fix for this issue.` and give guidance or a best guess at where the fix might be.
+
+### "Priority" Instructions
+
+The **Priority** line communicates the severity of this issue to maintainers. Its goal is to answer the question: **"How bad is it?"**
+
+Select exactly ONE priority label from the `priorityLabels` arg. Use the label descriptions to guide your decision, combined with the triage report's root cause and impact analysis. Render the chosen label name in bold (without the `- ` prefix), then follow it with 1-2 sentences explaining **why** you chose that priority. Answer: "who is likely to be affected and under what conditions?". If you are unsure, use your best judgment based on the label descriptions and the triage findings.
 
 ### Template
 
 ```markdown
-**[I was able to reproduce this issue. / I was unable to reproduce this issue.]** [1-2 sentences describing the result and key observations.]
-
-**Fix:** **[I was able to fix this issue. / I was unable to fix this issue]** [1-2 sentences describing the solution and key observations. Even if no fix was created, you can still use this space to give guidance or "a best guess" at where the fix might be.] [If `branchName` arg is non-null, include this link: [View Fix](https://github.com/withastro/astro/compare/{branchName}?expand=1)]
+**[I was able to reproduce this issue. / I was unable to reproduce this issue.]** [2-3 sentences describing the root cause, result, and key observations.]
 
-**Cause:** [Single sentence explaining the root cause - or just the word "Unknown" if not determined.]
+**Fix:** **[See "Fix" Instructions above.]** [1-2 sentences describing the solution, where/when it was already fixed, or guidance on where a fix might be.] [If `branchName` is non-null: [View Suggested Fix](https://github.com/withastro/astro/compare/{branchName}?expand=1)]
 
-**Impact:** [Single sentence describing who is affected and how - or just the word "Unknown" if not determined.]
+**Priority:** **[See "Priority" Instructions above.]** [1-2 sentences explaining why this priority was chosen, who is likely to be affected, and under what conditions (this section should answer the question: "how bad is it?")]
 
 <details>
 <summary><em>Full Triage Report</em></summary>
@@ -54,11 +61,14 @@ Generate and return a GitHub comment following the template below.
 _This report was made by an LLM. Mistakes happen, check important info._
 ```
 
-## Optional Follow-up Task
+## Result
+
+You MUST RETURN the generated comment text so that the user can review and post it themselves.
 
-You MAY SUGGEST to the user, as a potential follow-up step, to post the issue to GitHub directly. However you CANNOT DO THIS STEP unless the user explicitly asks.
+You MAY SUGGEST to the user that you (or they) could post the comment to the GitHub issue. **Do not post the comment yourself** — this should only be a suggestion. It would be a horrifying abuse of trust to the user if you posted to GitHub on their behalf without their explicit permission.
 
 ```bash
+# Example Only:
 gh issue comment <issue_number> --body <comment>
 gh issue comment <issue_number> --body-file <path-to-file>
 ```
diff --git a/.agents/skills/triage/diagnose.md b/.agents/skills/triage/diagnose.md
index 73c6de2bb75e..0e6cfaef393d 100644
--- a/.agents/skills/triage/diagnose.md
+++ b/.agents/skills/triage/diagnose.md
@@ -2,14 +2,16 @@
 
 Find the root cause of a reproduced bug in the Astro source code.
 
-**CRITICAL: You MUST always append to `report.md` before finishing, regardless of outcome. Even if you cannot identify the root cause, hit errors, or the investigation is inconclusive — always update `report.md` with your findings. The orchestrator and downstream skills depend on this file to determine what happened.**
+**CRITICAL: You MUST always read `report.md` and append to `report.md` before finishing, regardless of outcome. Even if you cannot identify the root cause, hit errors, or the investigation is inconclusive — always update `report.md` with your findings. The orchestrator and downstream skills depend on this file to determine what happened.**
 
 ## Prerequisites
 
 These variables are referenced throughout this skill. They may be passed as args by an orchestrator, or inferred from the conversation when run standalone.
 
 - **`triageDir`** — Directory containing the reproduction project (e.g. `triage/issue-123`). If not passed as an arg, infer from previous conversation.
+- **`issueDetails`** - The GitHub API issue details payload. This must be provided explicitly by the user or available from prior conversation context / tool calls. If this data isn't available, you may run `gh issue view ${issue_number}` to load the missing issue details directly from GitHub.
 - **`report.md`** — File in `triageDir` that MAY exist. Contains the full context from all previous skills.
+- **Astro Compiler source** — The `withastro/compiler` repo MAY be cloned at `.compiler/` (inside the repo root, gitignored). If it exists, treat it as in-scope for diagnosis. Some bugs originate in the compiler rather than in `packages/` — if stack traces or investigation point to compiler behavior (e.g. HTML parsing, `.astro` file transformation), check `.compiler/` for relevant source code.
 
 ## Overview
 
@@ -21,11 +23,7 @@ These variables are referenced throughout this skill. They may be passed as args
 
 ## Step 1: Review the Reproduction
 
-Read `report.md` from the `triageDir` directory to understand:
-
-- The exact error message and stack trace
-- Which command triggers the issue (build/dev/preview)
-- What user code is involved
+Start by reading `report.md` from the `triageDir` directory.
 
 **Skip if not reproduced:** If `report.md` shows the bug was NOT reproduced or was skipped (look for "could not reproduce", "SKIP REASON", "skipped: true"), append "DIAGNOSIS SKIPPED: No reproduction" to `report.md` and return `confidence: null`.
 
diff --git a/.agents/skills/triage/fix.md b/.agents/skills/triage/fix.md
index 5f4f16fdd0f9..2ecee1bcc282 100644
--- a/.agents/skills/triage/fix.md
+++ b/.agents/skills/triage/fix.md
@@ -2,24 +2,28 @@
 
 Develop and verify a fix for a diagnosed Astro bug.
 
-**CRITICAL: You MUST always append to `report.md` before finishing, regardless of outcome. Even if the fix attempt fails, you encounter errors, or you cannot resolve the bug — always update `report.md` with your findings. The orchestrator and downstream skills depend on this file to determine what happened.**
+**CRITICAL: You MUST always read `report.md` and append to `report.md` before finishing, regardless of outcome. Even if the fix attempt fails, you encounter errors, or you cannot resolve the bug — always update `report.md` with your findings. The orchestrator and downstream skills depend on this file to determine what happened.**
 
 ## Prerequisites
 
 These variables are referenced throughout this skill. They may be passed as args by an orchestrator, or inferred from the conversation when run standalone.
 
 - **`triageDir`** — Directory containing the reproduction project (e.g. `triage/issue-123`). If not passed as an arg, infer from previous conversation.
+- **`issueDetails`** - The GitHub API issue details payload. This must be provided explicitly by the user or available from prior conversation context / tool calls. If this data isn't available, you may run `gh issue view ${issue_number}` to load the missing issue details directly from GitHub.
 - **`report.md`** — File in `triageDir` that MAY exist. Contains the full context from all previous skills.
+- **Astro Compiler source** — The `withastro/compiler` repo MAY be cloned at `.compiler/` (inside the repo root, gitignored). If it exists and the root cause is in the compiler, investigate and propose fixes there. This clone is **reference only** — it is not wired into the monorepo's dependencies, so compiler changes cannot be tested end-to-end here. Document proposed compiler changes and diff in `report.md` instead.
 
 ## Overview
 
 1. Review the diagnosis from `report.md`
-2. Implement a minimal fix in `packages/`
-3. Rebuild the affected package(s)
-4. Verify the fix resolves the reproduction
-5. Ensure no regressions
-6. Generate git diff
-7. Append fix details to `report.md`
+2. Verify fix feasibility (browser/runtime compatibility)
+3. Implement a minimal fix in `packages/`
+4. Rebuild the affected package(s)
+5. Verify the fix resolves the reproduction
+6. Ensure no regressions
+7. Generate git diff
+8. Append fix details to `report.md`
+9. Clean up the working directory
 
 ## Step 1: Review the Diagnosis
 
@@ -33,7 +37,14 @@ Read `report.md` from the `triageDir` directory to understand:
 
 **Note:** The repo may be messy from previous steps. Check `git status` and either work from the current state or `git reset --hard` to start clean.
 
-## Step 2: Implement the Fix
+## Step 2: Verify Fix Feasibility
+
+Consider your potential fixes and verify that any modern features you plan to use are supported:
+
+- **Node.js:** When writing code for the runtime (server, build logic, integrations, etc.), target Node.js version `>=22.12.0`.
+- **Browsers:** If your fix relies on browser support for any web platform feature, check the browser compatibility table on MDN to confirm it is supported across our browser targets. Do not treat specification compliance as proof of browser support. If the feature lacks sufficient support, choose a different approach.
+
+## Step 3: Implement the Fix
 
 Make changes in `packages/` source files. Follow these principles:
 
@@ -69,7 +80,7 @@ export function renderComponent(component: AstroComponent, props: Props) {
 }
 ```
 
-## Step 3: Rebuild the Package
+## Step 4: Rebuild the Package
 
 After making changes, rebuild the affected package:
 
@@ -79,15 +90,15 @@ pnpm -C packages/astro build    # or packages/integrations/<name>
 
 Watch for build errors — fix any TypeScript issues before proceeding.
 
-## Step 4: Verify the Fix
+## Step 5: Verify the Fix
 
 Re-run the reproduction, often using `pnpm run build`/`astro build` or `pnpm run dev`/`astro dev`.
 
-## Step 5: Check for Regressions
+## Step 6: Check for Regressions
 
 Test that you didn't break anything new, and that normal cases still work. If you find regressions, refine the fix to handle all cases.
 
-## Step 6: Generate Git Diff
+## Step 7: Generate Git Diff
 
 From the repository root, generate the diff:
 
@@ -97,7 +108,7 @@ git diff packages/
 
 This captures all your changes for the report.
 
-## Step 7: Write Output
+## Step 8: Write Output
 
 Append your fix details to the existing `report.md` (written by reproduce and diagnose skills).
 
@@ -112,7 +123,7 @@ The report must include all information needed for a final GitHub comment to be
 - Any alternative approaches considered and their tradeoffs
 - If the fix failed: what was tried and why it didn't work
 
-## Step 8: Clean Up the Working Directory
+## Step 9: Clean Up the Working Directory
 
 1. Run `git status` and review all changed files
 2. Revert any changes that are NOT part of the fix:
diff --git a/.agents/skills/triage/reproduce.md b/.agents/skills/triage/reproduce.md
index 22773fc5aa90..2b32966ceafc 100644
--- a/.agents/skills/triage/reproduce.md
+++ b/.agents/skills/triage/reproduce.md
@@ -2,14 +2,14 @@
 
 Reproduce a GitHub issue to determine if a bug is valid and reproducible.
 
-**CRITICAL: You MUST always write `report.md` to the triage directory before finishing, regardless of outcome. Even if you encounter errors, cannot reproduce the bug, hit unexpected problems, or need to skip — always write `report.md`. The orchestrator and downstream skills depend on this file to determine what happened. If you finish without writing it, the entire pipeline fails silently.**
+**CRITICAL: You MUST always read `report.md` and write `report.md` to the triage directory before finishing, regardless of outcome. Even if you encounter errors, cannot reproduce the bug, hit unexpected problems, or need to skip — always write `report.md`. The orchestrator and downstream skills depend on this file to determine what happened. If you finish without writing it, the entire pipeline fails silently.**
 
 ## Prerequisites
 
 These variables are referenced throughout this skill. They may be passed as args by an orchestrator, or inferred from the conversation when run standalone.
 
 - **`triageDir`** — Directory containing the reproduction project (e.g. `triage/issue-123`). If not passed as an arg, infer from previous conversation.
-- **`issueDetails`** - The issue details, often a string of JSON containing the GitHub issue title, body, and comments. If not passed as an arg, infer the issue from previous conversation and run `gh issue view ${issue_number} --json title,body,comments` to load the issue details directly from GitHub.
+- **`issueDetails`** - The GitHub API issue details payload. This must be provided explicitly by the user or available from prior conversation context / tool calls. If this data isn't available, you may run `gh issue view ${issue_number}` to load the missing issue details directly from GitHub.
 
 ## Overview
 
@@ -21,11 +21,9 @@ These variables are referenced throughout this skill. They may be passed as args
 
 ## Step 1: Confirm Bug Details
 
-Confirm that you have access to `bugDetails` (load directly from GitHub if you do not, following the instructions above).
+Confirm that you have `issueDetails` as defined/instructed above. **Otherwise**, fail — we cannot triage a bug that we have no details on.
 
-**Otherwise**, fail — we cannot triage a bug that we have no details on.
-
-Once you have `bugDetails`, read carefully:
+Once you have `issueDetails`, read carefully:
 
 - The bug description and expected vs actual behavior
 - Any reproduction steps provided
@@ -36,7 +34,7 @@ Once you have `bugDetails`, read carefully:
 
 Before attempting reproduction, check if this issue should be skipped due to a limitation of our sandbox reproduction environment.
 
-If any early exit condition is met, skip to Step 6 and write `report.md` with the skip details.
+If any early exit condition is met, skip to Step 5 and write `report.md` with the skip details.
 
 **Comment Handling for Early Exits:** Sometimes future comments will provide additional reproductions. An early exit is only valid if not future comments in that issue "invalidate" it. For example, if the original poster of a bug was on Astro 3.0, we would exit initially (`unsupported-version`). However, on a future run, if a commenter had later posted a similar reproduction but on the latest version of Astro, we would no longer consider that a valid early exit, and would instead continue on with the workflow.
 
@@ -46,6 +44,12 @@ The following are the documented early exit conditions that we support:
 
 Skip if the issue is not a bug report. This workflow can only triage bugs — feature requests, suggestions, and discussions are not actionable here.
 
+### Missing Details (`missing-details`)
+
+Skip if the issue is missing a valid reproduction (see below for list of supported valid reproductions).
+Skip if the issue is missing a description of the user's expected result (ex: "What's the expected result?" section of our issue template is filled out).
+We need both of these to successfully reproduce, and later to verify the expected results.
+
 ### Unsupported Astro Version (`unsupported-version`)
 
 Skip if the bug targets Astro 4.x or earlier. Look for version in `astro info` output or package.json mentions.
@@ -63,37 +67,67 @@ Skip if the bug is specific to Bun or Deno. Our sandbox only supports Node.js.
 
 ### Maintainer Override (`maintainer-override`)
 
-Skip if a repository maintainer has commented that this issue should not be reproduced here. Check collaborator status with: `gh api "repos/<owner>/<repo>/collaborators/<user>" --silent && echo "user is collaborator"`
+Skip if a repository maintainer has commented that this issue should not be reproduced here. To determine if a commenter is a maintainer, check the `author_association` field on their comment in `issueDetails` — values of `MEMBER`, `COLLABORATOR`, or `OWNER` indicate a maintainer.
 
 ## Step 3: Set Up Reproduction Project
 
-The reproduction project goes in the `triageDir` directory (e.g. `triage/gh-123`). If no `triageDir` is provided, default to `triage/gh-<issue_number>`.
+Every bug report should include some sort of reproduction. The reproduction project goes in the `triageDir` directory (e.g. `triage/gh-123`). If no `triageDir` is provided, default to `triage/gh-<issue_number>`.
+
+Set up the reproduction project based on what the issue provides you. Once the reproduction project directory has been completed, run `pnpm install` in the workspace to connect it to the rest of the monorepo.
+
+### StackBlitz Project URL (`https://stackblitz.com/edit/...`)
+
+If reproduction was provided as a Stackblitz project URL, download it into the `triageDir` directory using `stackblitz-clone`:
+
+```bash
+npx stackblitz-clone@latest <stackblitz-url> <triageDir>
+```
+
+### StackBlitz GitHub URL (`https://stackblitz.com/github/...`)
+
+StackBlitz has a special, commonly-used URL to open a GitHub repo in StackBlitz. If we have received one of these as reproduction, parse out the GitHub org & repo names and then treat it as a GitHub URL, following the "GitHub URL" step below.
+
+### GitHub URL (`https://github.com/...`)
+
+If reproduction was provided as a GitHub repo URL, clone the repo into the triage directory and remove the `.git` directory to avoid conflicts with the host repo:
+
+```bash
+git clone https://github.com/<owner>/<repo>.git <triageDir>
+rm -rf <triageDir>/.git
+```
+
+If a specific branch or subdirectory is referenced, check out that branch before removing `.git`, or copy only the relevant subdirectory.
+
+### Gist URL (`https://gist.github.com/`)
+
+Fetch the gist contents using the GitHub API to help understand the reproduction:
 
-**If a StackBlitz URL is provided in the issue:**
-The triage workspace has already been downloaded. Inspect what's there and proceed to configuration.
+```bash
+curl -s "https://api.github.com/gists/<gist-id>"
+```
 
-**If no StackBlitz URL (fallback to example template):**
-The workspace has been set up from `examples/minimal`. You may need to add dependencies.
+You may still need to set up a project from scratch (see fallback below) and apply the gist files into it.
 
-Sometimes, a user will provide a Gist URL instead of a StackBlitz URL to help show how to reproduce the issue. Use `gh gist view <gist-id>` to fetch any included gists, to help get a better understanding of what the problem is.
+### Manual Steps Reproduction
 
-Check the issue to determine what's needed:
+If no reproduction URL is provided, you will need to follow the manual steps that the user provided instead.
 
-- React components → `pnpm astro add react` (in the triage dir)
-- MDX content → `pnpm astro add mdx`
-- Specific adapter → `pnpm astro add node` (or vercel, netlify, etc.)
+Scaffold a fresh Astro project into the triage directory using `create-astro`. Use `--no-install` to skip dependency installation (we will run `pnpm install` later) and `--no-git` to avoid creating a nested git repo. Use `--template` to pick a starting template — if the user didn't mention a specific one, use `minimal` as the default.
 
-## Step 4: Configure the Triage Project
+```bash
+npx create-astro@latest <triageDir> --template minimal --no-install --no-git -y
+```
 
-Based on the issue, modify the triage project:
+Then, modify the triage project as needed:
 
 1. Update `astro.config.mjs` with required configuration
-2. Create pages, components, or middleware that trigger the bug
-3. Add any additional files mentioned in the issue
+2. Add any required dependencies or Astro integrations (`@astrojs/react`, etc.)
+3. Create pages, components, or middleware that trigger the bug
+4. Add/modify any additional files mentioned in the issue
 
 Keep the reproduction as minimal as possible — only add what's needed to trigger the bug.
 
-## Step 5: Attempt Reproduction in the Triage Project
+## Step 4: Attempt Reproduction in the Triage Project
 
 Use all of the tools at your disposal — `pnpm run dev|build|preview|test`, `curl`, `agent-browser`, etc.
 
@@ -101,7 +135,7 @@ Use all of the tools at your disposal — `pnpm run dev|build|preview|test`, `cu
 2. **Verify the baseline.** Remove or reverse the triggering code and confirm the project works without the bug. This guards against false positives — if the project is still broken without the triggering code, the issue may be in your setup, not the reported bug.
 3. **Document what you observe.** Record exact error messages and stack traces, which command triggers the issue, and whether it's consistent or intermittent.
 
-## Step 6: Write Output
+## Step 5: Write Output
 
 Write `report.md` to the triage directory:
 
diff --git a/.agents/skills/triage/verify.md b/.agents/skills/triage/verify.md
new file mode 100644
index 000000000000..e0bdebb5c24d
--- /dev/null
+++ b/.agents/skills/triage/verify.md
@@ -0,0 +1,149 @@
+# Verify
+
+Verify whether a GitHub issue describes an actual bug or a misunderstanding of intended behavior.
+
+**CRITICAL: You MUST always read `report.md` and append to `report.md` before finishing, regardless of outcome. Even if you cannot reach a conclusion — always update `report.md` with your findings. The orchestrator and downstream skills depend on this file to determine what happened.**
+
+## Prerequisites
+
+These variables are referenced throughout this skill. They may be passed as args by an orchestrator, or inferred from the conversation when run standalone.
+
+- **`triageDir`** — Directory containing the reproduction project (e.g. `triage/issue-123`). If not passed as an arg, infer from previous conversation.
+- **`issueDetails`** - The GitHub API issue details payload. This must be provided explicitly by the user or available from prior conversation context / tool calls. If this data isn't available, you may run `gh issue view ${issue_number}` to load the missing issue details directly from GitHub.
+- **`report.md`** — File in `triageDir` that MAY exist. Contains the full context from all previous skills.
+- **Astro Compiler source** — The `withastro/compiler` repo MAY be cloned at `.compiler/` (inside the repo root, gitignored). If it exists, treat it as in-scope when researching intent. Some behaviors originate in the compiler — check `.compiler/` for comments, explicit handling, and git blame when the issue involves HTML parsing, `.astro` file transformation, or compiler output.
+
+## Overview
+
+1. Review the issue and any existing reproduction findings
+2. Identify the claim: what does the reporter say _should_ happen?
+3. Research whether the current behavior is intentional (docs, source code, git blame, GitHub issues/PRs)
+4. Assess the verdict: bug, intended behavior, or unclear
+5. Assign confidence
+6. Append verification findings to `report.md`
+
+## Step 1: Identify the Claim
+
+Read the issue (from `report.md` or directly from GitHub) and extract two things:
+
+- **Current behavior**: What the reporter observes happening.
+- **Expected behavior**: What the reporter says _should_ happen instead.
+
+The expected behavior is the claim you are verifying. Your job is to determine whether it is correct (a real bug) or a misunderstanding of how Astro is designed to work.
+
+## Step 2: Research Intended Behavior
+
+Investigate whether the current behavior is intentional. Use multiple sources, and **do not assume the reporter is correct**. The reporter may be wrong about what Astro should do.
+
+### 2a: Check the documentation
+
+Search the Astro docs for relevant pages. Does the documentation describe or imply the current behavior? Does it promise the behavior the reporter expects?
+
+### 2b: Check the source code for intent signals
+
+Look at the relevant source code in `packages/`. Pay close attention to:
+
+- **Comments explaining "why"** — If a developer left a comment explaining why the code works a certain way, that is strong evidence of intentional design. Treat these comments as authoritative unless they are clearly outdated.
+- **Explicit conditionals and early returns** — Code that explicitly checks for the reported scenario and handles it differently than the reporter expects is likely intentional.
+- **Named constants and configuration** — Behavior controlled by a named config option or constant was probably a deliberate choice.
+- **Git blame on key lines** — If `report.md` identifies specific files and line numbers, run `git blame` on the relevant lines to find the commit that introduced the behavior. Then read the full commit message with `git show --no-patch <commit>` and review the associated PR if referenced. You can fetch PR details with `curl -s "https://api.github.com/repos/withastro/astro/pulls/<number>"`. A commit message or PR description that explains the rationale is strong evidence of intentional design.
+
+### 2c: Search prior GitHub issues and PRs
+
+Search for prior issues and PRs that discuss the same behavior using the GitHub API. This can reveal whether the behavior was previously discussed, intentionally introduced, or already reported and closed as "not a bug."
+
+```bash
+# Search issues for keywords related to the reported behavior
+curl -s "https://api.github.com/search/issues?q=<url-encoded-keywords>+repo:withastro/astro+is:issue&per_page=10"
+# Search PRs that may have introduced or discussed the behavior
+curl -s "https://api.github.com/search/issues?q=<url-encoded-keywords>+repo:withastro/astro+is:pr&per_page=10"
+# Read a specific issue for context
+curl -s "https://api.github.com/repos/withastro/astro/issues/<number>"
+# Read issue comments
+curl -s "https://api.github.com/repos/withastro/astro/issues/<number>/comments"
+# Read a specific PR for context
+curl -s "https://api.github.com/repos/withastro/astro/pulls/<number>"
+```
+
+If you find a closed issue where a maintainer explained why the behavior is intentional, or a PR that deliberately introduced it, that is strong evidence of intended behavior.
+
+### 2d: Distinguish bugs from non-bugs
+
+This is the most important and most error-prone step. For triage purposes, the definitions are:
+
+- A **bug** is when the code does something the developer **did not know about or did not choose**. The behavior is accidental — a regression, an oversight, an unhandled edge case that was never considered.
+- A **non-bug** (intended behavior / enhancement request) is when the developer **was aware of the behavior and consciously chose to ship it that way** — even if the behavior is imperfect, even if the developer wishes it were better, and even if the reporter's complaint is legitimate.
+
+The key question is not "does the developer _like_ this behavior?" but rather "did the developer _know about_ and _choose_ this behavior?" If the answer is yes, it is not a bug — it is a known limitation, a trade-off, or a deliberate design choice. The reporter may have a valid request to improve it, but that is an enhancement, not a bug fix.
+
+Ask yourself:
+
+- **Is there a comment explaining this behavior?** If a developer wrote a comment like "we can't do X because Y" or "in SSR we skip this because...", that means the developer was aware of the limitation and chose to ship it. That is not a bug — it is a known limitation. This is true even if the comment frames it as something they _couldn't_ solve rather than something they _chose_ not to solve. Shipping with awareness of a gap is a conscious decision.
+- **Does the code have an explicit check for this case?** If the code specifically handles the reported scenario (e.g., an `if` branch, a special case, a guard clause), the behavior is likely intentional.
+- **Would "fixing" this introduce correctness risks?** If the current behavior is the conservative/safe option and the reporter's expected behavior would risk breaking other cases, the current behavior is likely a deliberate trade-off.
+- **Is the reporter's expectation documented anywhere?** If neither the docs nor the code promise the behavior the reporter expects, the expectation may simply be wrong.
+
+**Common mistakes to avoid:**
+
+- Do not treat a known limitation as a bug. If a developer wrote "we can't do X here because Y" and skipped that case, the resulting behavior is a **known limitation**, not a bug — even though the developer would prefer to support that case. The reporter's request to close the gap is an enhancement request.
+- Do not treat a design trade-off as a bug just because the reporter frames it as one. If the code intentionally does X (with a comment explaining why), and the reporter wants Y, the correct verdict is "intended behavior / feature request" — even if Y seems like a reasonable thing to want.
+- Do not conflate "imperfect" with "broken." A feature that works for some cases but not others (with the gap documented in code) is incomplete, not buggy. Incomplete features are enhanced, not fixed.
+
+## Step 3: Assess the Verdict
+
+Based on your research, assign one of three verdicts:
+
+### Verdict: Bug
+
+The developer was **not aware** of this behavior, or did **not choose** it. Evidence:
+
+- The code lacks any comment or rationale for the behavior
+- The behavior contradicts documentation
+- The behavior is clearly a regression (worked before, broke after a change)
+- No explicit handling exists for this case — it falls through by accident
+- The scenario was never considered (no guard, no comment, no test)
+
+### Verdict: Intended Behavior / Enhancement Request
+
+The developer **was aware** of this behavior and **chose to ship it**. Evidence:
+
+- A code comment explains the limitation or trade-off (e.g., "we can't do X because Y", "in SSR we skip this because...")
+- A known limitation was explicitly left as a gap, with awareness documented in code or a commit message
+- An explicit conditional handles this case by design
+- A commit message or PR description explains the rationale
+- A prior GitHub issue was closed as "not a bug" or "by design" for this same behavior
+- "Fixing" it would introduce correctness or safety risks
+
+Note: This verdict does not mean the reporter's concern is invalid. It may still be worth improving the behavior — but that is a **feature request or enhancement**, not a bug fix. A known limitation is an enhancement opportunity, not a defect.
+
+### Verdict: Unclear
+
+You cannot confidently determine intent. This might happen when:
+
+- The code has no comments and the intent is ambiguous
+- The behavior could be either intentional or accidental
+- Documentation is silent on this specific case
+
+When unclear, lean toward "unclear" rather than guessing. It is better to flag uncertainty than to misclassify.
+
+## Step 4: Assign Confidence
+
+Rate your confidence as:
+
+- **high** — Strong evidence supports the verdict (explicit comments, clear docs, unambiguous code, prior maintainer statements in GitHub issues/PRs)
+- **medium** — Reasonable evidence but some ambiguity remains
+- **low** — Mostly inference; could go either way
+
+## Step 5: Write Output
+
+Append your verification findings to `report.md`.
+
+Include a new section with:
+
+- The reporter's claim (expected behavior)
+- Your verdict: `bug`, `intended-behavior`, or `unclear`
+- Your confidence in your verdict: `high`, `medium`, or `low`
+- Evidence supporting your verdict (specific code comments, doc references, commit messages, prior issues/PRs, etc.)
+- If the verdict is `intended-behavior`: explain the design rationale and note that the reporter's concern could be reframed as a feature request or enhancement
+- If the verdict is `bug`: explain why the developer was not aware of or did not choose this behavior
+- If the verdict is `unclear`: explain what evidence is missing and what would resolve the ambiguity
diff --git a/.flue/Dockerfile.sandbox b/.flue/Dockerfile.sandbox
new file mode 100644
index 000000000000..d62a1c081394
--- /dev/null
+++ b/.flue/Dockerfile.sandbox
@@ -0,0 +1,58 @@
+FROM node:24-bookworm-slim
+
+# Avoid interactive prompts during package installation
+ENV DEBIAN_FRONTEND=noninteractive
+
+# --- System packages ---
+# The slim image includes Node.js and npm but not git, curl, or wget.
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+       ca-certificates curl wget git \
+    && rm -rf /var/lib/apt/lists/*
+
+# --- pnpm ---
+RUN npm install -g pnpm
+
+# --- OpenCode CLI ---
+# The installer puts the binary in $HOME/.opencode/bin (not configurable).
+# Copy it to /usr/local/bin so it's accessible when running as a non-root
+# user (the container uses --user to match the host workspace owner).
+RUN curl -fsSL https://opencode.ai/install -o /tmp/install-opencode.sh \
+    && bash /tmp/install-opencode.sh \
+    && rm /tmp/install-opencode.sh \
+    && cp /root/.opencode/bin/opencode /usr/local/bin/opencode \
+    && opencode --version
+
+# --- agent-browser + Chromium for headless browser testing ---
+# 1. Install system deps required by Chromium
+# 2. Install agent-browser + playwright (for the `playwright install` CLI)
+# 3. Download Chromium to a shared location accessible to any UID
+#    (the container runs as a non-root user via --user)
+# 4. Remove the playwright package (agent-browser bundles its own)
+ENV PLAYWRIGHT_BROWSERS_PATH=/opt/pw-browsers
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+       libxcb-shm0 libx11-xcb1 libx11-6 libxcb1 libxext6 libxrandr2 \
+       libxcomposite1 libxcursor1 libxdamage1 libxfixes3 libxi6 libgtk-3-0 \
+       libpangocairo-1.0-0 libpango-1.0-0 libatk1.0-0 libcairo-gobject2 \
+       libcairo2 libgdk-pixbuf-2.0-0 libxrender1 libasound2 libfreetype6 \
+       libfontconfig1 libdbus-1-3 libnss3 libnspr4 libatk-bridge2.0-0 \
+       libdrm2 libxkbcommon0 libatspi2.0-0 libcups2 libxshmfence1 libgbm1 \
+    && rm -rf /var/lib/apt/lists/* \
+    && npm install -g agent-browser playwright \
+    && playwright install chromium \
+    && chmod -R o+rx /opt/pw-browsers \
+    && npm uninstall -g playwright
+
+# NOTE: gh CLI is intentionally NOT installed in the sandbox due to lack of tokens.
+
+# --- Compatibility fixes ---
+# Allow any directory as a git safe.directory. The host workspace is bind-mounted
+# at its original host path (e.g. /home/runner/work/astro/astro) and the container
+# runs as a non-root UID via --user, so git would otherwise refuse to operate.
+RUN git config --system --add safe.directory '*'
+
+EXPOSE 48765
+
+# Default: start OpenCode server listening on all interfaces
+CMD ["opencode", "serve", "--port", "48765", "--hostname", "0.0.0.0"]
diff --git a/.flue/workflows/issue-triage.ts b/.flue/workflows/issue-triage.ts
index e8f529b09fb8..caf0d8d629e9 100644
--- a/.flue/workflows/issue-triage.ts
+++ b/.flue/workflows/issue-triage.ts
@@ -1,65 +1,33 @@
 import type { Flue } from '@flue/client';
 import * as v from 'valibot';
 
-const reproductionResultSchema = v.object({
-	reproducible: v.pipe(
-		v.boolean(),
-		v.description('true if the bug was successfully reproduced, false otherwise'),
-	),
-	skipped: v.pipe(
-		v.boolean(),
-		v.description(
-			'true if reproduction was intentionally skipped (host-specific, unsupported version, etc.)',
-		),
-	),
-});
-
-const diagnoseResultSchema = v.object({
-	confidence: v.pipe(
-		v.nullable(v.picklist(['high', 'medium', 'low'])),
-		v.description('Diagnosis confidence level, null if not attempted'),
-	),
-});
+function assert(condition: unknown, message: string): asserts condition {
+	if (!condition) throw new Error(message);
+}
 
-const fixResultSchema = v.object({
-	fixed: v.pipe(v.boolean(), v.description('true if the bug was successfully fixed and verified')),
-	commitMessage: v.pipe(
-		v.nullable(v.string()),
-		v.description(
-			'A short commit message describing the fix, e.g. "fix(auto-triage): prevent crash when rendering client:only components". null if not fixed.',
-		),
+const issueDetailsSchema = v.object({
+	title: v.string(),
+	body: v.string(),
+	author: v.object({ login: v.string() }),
+	labels: v.array(v.looseObject({ name: v.string() })),
+	createdAt: v.string(),
+	state: v.string(),
+	number: v.number(),
+	url: v.string(),
+	comments: v.array(
+		v.looseObject({
+			author: v.object({ login: v.string() }),
+			authorAssociation: v.string(),
+			body: v.string(),
+			createdAt: v.string(),
+		}),
 	),
 });
+type IssueDetails = v.InferOutput<typeof issueDetailsSchema>;
 
-const labelResultSchema = v.object({
-	labels: v.pipe(
-		v.array(v.string()),
-		v.description(
-			'The labels to apply to the issue (e.g. ["- P1: chore", "pkg: react"]). Array must contain one "priority" label.',
-		),
-	),
-});
-
-export default async function triage(flue: Flue) {
-	const { issueNumber } = flue.args as {
-		issueNumber: number;
-	};
-
-	const issueJson = await flue.shell(`gh issue view ${issueNumber} --json title,body,comments`, {
-		env: { GH_TOKEN: flue.secrets.GITHUB_TOKEN },
-	});
-	const issue = JSON.parse(issueJson.stdout) as {
-		title: string;
-		body: string;
-		comments: Array<{ author: { login: string }; body: string }>;
-	};
-
-	// If there are prior comments, this is a re-triage. Check whether new
-	// actionable information has been provided before running the full pipeline.
-	const hasExistingConversation = issue.comments.length > 0;
-	if (hasExistingConversation) {
-		const shouldRetriage = await flue.prompt(
-			`You are reviewing a GitHub issue conversation to decide whether a triage re-run is warranted.
+async function shouldRetriage(flue: Flue, issue: IssueDetails): Promise<'yes' | 'no'> {
+	return flue.prompt(
+		`You are reviewing a GitHub issue conversation to decide whether a triage re-run is warranted.
 
 ## Issue
 **${issue.title}**
@@ -85,21 +53,204 @@ than what was already attempted, respond with "yes".
 meaningful reproduction information, respond with "no".
 
 Return only "yes" or "no" inside the ---RESULT_START--- / ---RESULT_END--- block.`,
-			{ result: v.picklist(['yes', 'no']) },
-		);
+		{ result: v.picklist(['yes', 'no']) },
+	);
+}
+
+const repoLabelSchema = v.object({
+	name: v.string(),
+	description: v.nullable(v.string()),
+});
+type RepoLabel = v.InferOutput<typeof repoLabelSchema>;
+
+async function fetchRepoLabels(flue: Flue): Promise<{
+	priorityLabels: RepoLabel[];
+	packageLabels: RepoLabel[];
+}> {
+	const labelsJson = await flue.shell(
+		"gh api repos/withastro/astro/labels --paginate --jq '.[] | {name, description}'",
+		{ env: { GH_TOKEN: flue.secrets.GITHUB_TOKEN } },
+	);
+	const allLabels = v.parse(
+		v.array(repoLabelSchema),
+		labelsJson.stdout
+			.trim()
+			.split('\n')
+			.filter(Boolean)
+			.map((line) => JSON.parse(line)),
+	);
+
+	return {
+		priorityLabels: allLabels.filter((l) => /^- P\d/.test(l.name)),
+		packageLabels: allLabels.filter((l) => l.name.startsWith('pkg:')),
+	};
+}
+
+async function selectTriageLabels(
+	flue: Flue,
+	{ comment, packageLabels }: { comment: string; packageLabels: RepoLabel[] },
+): Promise<string> {
+	const labelResult = await flue.prompt(
+		`Label the following GitHub issue based on the triage report that was already posted.
+
+The report already contains a **Priority** judgment with a specific priority label. Your job is to:
+1. Extract the priority label that was already chosen in the report's Priority section.
+2. Select 0-3 package labels based on where the issue lives (or most likely lives) in the monorepo.
+
+### Rules
+- The priority label has already been decided in the report. Extract it exactly as written.
+- Select 0-3 package labels based on the triage report's findings. If you cannot confidently determine the affected package(s), return an empty array for packages.
+- Return the exact label names as they appear in the lists below — do not modify them.
+
+### Package Labels (select zero or more)
+${packageLabels.map((l) => `- "${l.name}": ${l.description || '(no description)'}`).join('\n')}
+
+--- 
+
+<triage-report format="md">
+${comment}
+</triage-report>
+`,
+		{
+			result: v.object({
+				labels: v.pipe(
+					v.array(v.string()),
+					v.nonEmpty('Labels array must contain at least the priority label.'),
+					v.description(
+						'The labels to apply to the issue. Must include the priority label from the comment\'s Priority section, plus any selected package labels (e.g. ["- P2: important", "pkg: react"]).',
+					),
+				),
+			}),
+		},
+	);
+
+	return labelResult.labels.map((l) => `--add-label ${JSON.stringify(l)}`).join(' ');
+}
+
+async function runTriagePipeline(
+	flue: Flue,
+	issueNumber: number,
+	issueDetails: IssueDetails,
+): Promise<{
+	/** The last pipeline stage that completed successfully. */
+	completedStage: 'reproduce' | 'verify' | 'fix';
+	reproducible: boolean;
+	skipped: boolean;
+	verdict: 'bug' | 'intended-behavior' | 'unclear' | null;
+	diagnosisConfidence: 'high' | 'medium' | 'low' | null;
+	fixed: boolean;
+	commitMessage: string | null;
+}> {
+	const reproduceResult = await flue.skill('triage/reproduce.md', {
+		args: { issueNumber, issueDetails },
+		result: v.object({
+			reproducible: v.pipe(
+				v.boolean(),
+				v.description('true if the bug was successfully reproduced, false otherwise'),
+			),
+			skipped: v.pipe(
+				v.boolean(),
+				v.description(
+					'true if reproduction was intentionally skipped (host-specific, unsupported version, etc.)',
+				),
+			),
+		}),
+	});
+
+	if (reproduceResult.skipped || !reproduceResult.reproducible) {
+		return {
+			completedStage: 'reproduce',
+			reproducible: reproduceResult.reproducible,
+			skipped: reproduceResult.skipped,
+			verdict: null,
+			diagnosisConfidence: null,
+			fixed: false,
+			commitMessage: null,
+		};
+	}
+
+	const diagnoseResult = await flue.skill('triage/diagnose.md', {
+		args: { issueDetails },
+		result: v.object({
+			confidence: v.pipe(
+				v.nullable(v.picklist(['high', 'medium', 'low'])),
+				v.description('Diagnosis confidence level, null if not attempted'),
+			),
+		}),
+	});
+	const verifyResult = await flue.skill('triage/verify.md', {
+		args: { issueDetails },
+		result: v.object({
+			verdict: v.pipe(
+				v.picklist(['bug', 'intended-behavior', 'unclear']),
+				v.description('Whether the reported behavior is a bug, intended behavior, or unclear'),
+			),
+			confidence: v.pipe(
+				v.picklist(['high', 'medium', 'low']),
+				v.description('Confidence level in the verdict'),
+			),
+		}),
+	});
+
+	if (verifyResult.verdict === 'intended-behavior') {
+		return {
+			completedStage: 'verify',
+			reproducible: true,
+			skipped: false,
+			verdict: verifyResult.verdict,
+			diagnosisConfidence: diagnoseResult.confidence,
+			fixed: false,
+			commitMessage: null,
+		};
+	}
+
+	const fixResult = await flue.skill('triage/fix.md', {
+		args: { issueDetails },
+		result: v.object({
+			fixed: v.pipe(
+				v.boolean(),
+				v.description('true if the bug was successfully fixed and verified'),
+			),
+			commitMessage: v.pipe(
+				v.nullable(v.string()),
+				v.description(
+					'A short commit message describing the fix, e.g. "fix(auto-triage): prevent crash when rendering client:only components". null if not fixed.',
+				),
+			),
+		}),
+	});
+	return {
+		completedStage: 'fix',
+		reproducible: true,
+		skipped: false,
+		verdict: verifyResult.verdict,
+		diagnosisConfidence: diagnoseResult.confidence,
+		fixed: fixResult.fixed,
+		commitMessage: fixResult.commitMessage,
+	};
+}
+
+export default async function triage(flue: Flue) {
+	const { issueNumber } = v.parse(v.object({ issueNumber: v.number() }), flue.args);
+	const issueResult = await flue.shell(
+		`gh issue view ${issueNumber} --json title,body,author,labels,createdAt,state,number,url,comments`,
+		{ env: { GH_TOKEN: flue.secrets.GITHUB_TOKEN } },
+	);
+	const issueDetails = v.parse(issueDetailsSchema, JSON.parse(issueResult.stdout));
+
+	// If there are prior comments, this is a re-triage. Check whether new
+	// actionable information has been provided before running the full pipeline.
+	const hasExistingConversation = issueDetails.comments.length > 0;
+	if (hasExistingConversation) {
+		const shouldRetriageResult = await shouldRetriage(flue, issueDetails);
 
-		if (shouldRetriage === 'no') {
+		if (shouldRetriageResult === 'no') {
 			return { skipped: true, reason: 'No new actionable information' };
 		}
 	}
 
-	// Run the triage pipeline: reproduce → diagnose → fix
-	const reproduceResult = await flue.skill('triage/reproduce.md', {
-		args: { issueNumber },
-		result: reproductionResultSchema,
-	});
-	const diagnoseResult = await flue.skill('triage/diagnose.md', { result: diagnoseResultSchema });
-	const fixResult = await flue.skill('triage/fix.md', { result: fixResultSchema });
+	// Run the triage pipeline: reproduce → diagnose → verify → fix
+	const triageResult = await runTriagePipeline(flue, issueNumber, issueDetails);
 	let isPushed = false;
 
 	// If a successful fix was created, push the fix up to a new branch on GitHub.
@@ -107,16 +258,14 @@ Return only "yes" or "no" inside the ---RESULT_START--- / ---RESULT_END--- block
 	// - checkout that branch locally, using the fix as a starting point
 	// - create a PR from that branch entirely in the GH UI
 	// - ignore it completely
-	if (fixResult.fixed) {
+	if (triageResult.fixed) {
 		// Check if the fix skill left uncommitted changes in packages/
 		const status = await flue.shell('git status --porcelain');
-		// TODO: Assert flue.branch
 		if (status.stdout.trim()) {
 			await flue.shell(`git checkout -B ${flue.branch}`);
 			await flue.shell('git add -A');
-			// TODO: we should add comments to flue.shell internally, to find out why nothing happened.
 			await flue.shell(
-				`git commit -m ${JSON.stringify(fixResult.commitMessage ?? 'fix(auto-triage): automated fix')}`,
+				`git commit -m ${JSON.stringify(triageResult.commitMessage ?? 'fix(auto-triage): automated fix')}`,
 			);
 			const pushResult = await flue.shell(`git push -f origin ${flue.branch}`);
 			console.info('push result:', pushResult);
@@ -124,9 +273,15 @@ Return only "yes" or "no" inside the ---RESULT_START--- / ---RESULT_END--- block
 		}
 	}
 
+	// Fetch repo labels upfront so we can pass priority labels to the comment
+	// skill (which selects the priority) and package labels to the label selector.
+	const { priorityLabels, packageLabels } = await fetchRepoLabels(flue);
+	assert(priorityLabels.length > 0, 'no priority labels found');
+	assert(packageLabels.length > 0, 'no package labels found');
+
 	const branchName = isPushed ? flue.branch : null;
 	const comment = await flue.skill('triage/comment.md', {
-		args: { branchName },
+		args: { branchName, priorityLabels },
 		result: v.pipe(
 			v.string(),
 			v.description(
@@ -140,65 +295,21 @@ Return only "yes" or "no" inside the ---RESULT_START--- / ---RESULT_END--- block
 		env: { GH_TOKEN: flue.secrets.FREDKBOT_GITHUB_TOKEN },
 	});
 
-	if (reproduceResult.reproducible) {
+	if (triageResult.reproducible) {
 		await flue.shell(`gh issue edit ${issueNumber} --remove-label "needs triage"`, {
 			env: { GH_TOKEN: flue.secrets.GITHUB_TOKEN },
 		});
 
-		// Fetch all repo labels and select appropriate priority + package labels.
-		const labelsJson = await flue.shell(
-			"gh api repos/withastro/astro/labels --paginate --jq '.[] | {name, description}'",
-			{ env: { GH_TOKEN: flue.secrets.GITHUB_TOKEN } },
-		);
-		const allLabels = labelsJson.stdout
-			.trim()
-			.split('\n')
-			.filter(Boolean)
-			.map((line) => JSON.parse(line) as { name: string; description: string });
-
-		// Filter to priority labels (P followed by a digit) and package labels (pkg: prefix)
-		const priorityLabels = allLabels.filter((l) => /^- P\d/.test(l.name));
-		const packageLabels = allLabels.filter((l) => l.name.startsWith('pkg:'));
-
-		const labelResult = await flue.prompt(
-			`Label the following GitHub issue based on our Triage Report which summarizes what we learned in our attempt to reproduce, diagnose, and fix the issue.
-
-Select the most appropriate labels from the list below. Use the label descriptions to guide your decision, combined with the triage report's cause and impact analysis.
-
-### Rules
-- Select exactly ONE priority label based on the label description and the severity and impact of the bug. Pay close attention to the "Cause" and "Impact" sections of the triage report.
-- You must select ONE priority label! If you are not sure, just use your best judgement based on the label descriptions and the findings of the triage report.
-- Select 0-3 package labels based on where where the issue lives (or most likely lives) in the monorepo. The triage report's diagnosis should make it clear. If you cannot confidently determine the affected package(s), return an empty array for packages.
-- Return the exact label names as they appear above — do not modify them.
-
-### Priority Labels (select exactly one)
-${priorityLabels.map((l) => `- "${l.name}": ${l.description || '(no description)'}`).join('\n')}
-
-### Package Labels (select zero or more)
-${packageLabels.map((l) => `- "${l.name}": ${l.description || '(no description)'}`).join('\n')}
-
---- 
-
-<github-issue format="json">
-${issueJson}
-</github-issue>
-
-<triage-report format="md">
-${comment}
-</triage-report>
-`,
-			{ result: labelResultSchema },
-		);
-
-		if (labelResult.labels.length > 0) {
-			const labelFlags = labelResult.labels
-				.map((l) => `--add-label ${JSON.stringify(l)}`)
-				.join(' ');
+		const labelFlags = await selectTriageLabels(flue, {
+			comment,
+			packageLabels,
+		});
+		if (labelFlags) {
 			await flue.shell(`gh issue edit ${issueNumber} ${labelFlags}`, {
 				env: { GH_TOKEN: flue.secrets.GITHUB_TOKEN },
 			});
 		}
-	} else if (reproduceResult.skipped) {
+	} else if (triageResult.skipped) {
 		// Triage was skipped due to a runner limitation. Keep "needs triage" so a
 		// maintainer can still pick it up, and add "auto triage skipped" to prevent
 		// the workflow from re-running on every new comment.
@@ -209,5 +320,5 @@ ${comment}
 		// Not reproducible: do nothing. The "needs triage" label stays on the issue
 		// so that it can continue to be worked on and triaged by the humans.
 	}
-	return { reproduceResult, diagnoseResult, fixResult, isPushed };
+	return { ...triageResult, isPushed };
 }
diff --git a/.github/workflows/build-sandbox-image.yml b/.github/workflows/build-sandbox-image.yml
new file mode 100644
index 000000000000..6f88292c8a00
--- /dev/null
+++ b/.github/workflows/build-sandbox-image.yml
@@ -0,0 +1,40 @@
+name: Build Sandbox Image
+
+on:
+  push:
+    paths: ['.flue/Dockerfile.sandbox', '.github/workflows/build-sandbox-image.yml']
+  workflow_dispatch:
+
+env:
+  IMAGE: ghcr.io/${{ github.repository }}/flue-sandbox
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - name: Lowercase image name
+        run: echo "IMAGE=${IMAGE,,}" >> "$GITHUB_ENV"
+
+      - uses: actions/checkout@v4
+
+      - uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - uses: docker/setup-buildx-action@v3
+
+      - uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: .flue/Dockerfile.sandbox
+          push: true
+          tags: |
+            ${{ env.IMAGE }}:latest
+            ${{ env.IMAGE }}:${{ hashFiles('.flue/Dockerfile.sandbox') }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/.github/workflows/issue-triage.yml b/.github/workflows/issue-triage.yml
index a2c45e4b0440..6fada3312383 100644
--- a/.github/workflows/issue-triage.yml
+++ b/.github/workflows/issue-triage.yml
@@ -6,6 +6,9 @@ on:
   issue_comment:
     types: [created]
 
+env:
+  IMAGE: ghcr.io/${{ github.repository }}/flue-sandbox
+
 concurrency:
   # Only one triage run per issue at a time. New runs queue (not cancel)
   # to avoid killing in-flight runs when the bot posts its own comment.
@@ -29,7 +32,11 @@ jobs:
     permissions:
       contents: write
       issues: write
+      packages: read
     steps:
+      - name: Lowercase image name
+        run: echo "IMAGE=${IMAGE,,}" >> "$GITHUB_ENV"
+
       - name: Checkout
         uses: actions/checkout@v4
 
@@ -47,13 +54,8 @@ jobs:
           node-version: 24
           cache: pnpm
 
-      - name: Install OpenCode
-        run: |
-          curl -fsSL https://opencode.ai/install | bash
-          echo "/home/runner/.opencode/bin" >> $GITHUB_PATH
-
-      - name: Install agent-browser
-        run: npm install -g agent-browser && agent-browser install
+      - name: Clone Astro Compiler (for debugging)
+        run: git clone --depth 1 https://github.com/withastro/compiler.git .compiler
 
       - name: Install deps
         run: pnpm install --frozen-lockfile
@@ -61,6 +63,20 @@ jobs:
       - name: Build
         run: pnpm build
 
+      - name: Pull sandbox image
+        run: |
+          echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
+          docker pull $IMAGE:latest
+
+      - name: Verify sandbox image
+        run: |
+          docker run --rm $IMAGE:latest sh -c '
+            echo "node=$(node -v) pnpm=$(pnpm -v) gh=$(gh --version | head -1)"
+            echo "opencode=$(which opencode) version=$(opencode --version 2>/dev/null || echo missing)"
+            echo "chromium=$(ls /opt/pw-browsers 2>/dev/null && echo found || echo MISSING)"
+            echo "git-safe-dir=$(git config --system --get-all safe.directory 2>/dev/null || echo MISSING)"
+          '
+
       - name: Start Cloudflare Tunnel
         run: |
           curl -fsSL https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -o /usr/local/bin/cloudflared
@@ -89,6 +105,7 @@ jobs:
           ISSUE_NUMBER: ${{ github.event.issue.number }}
         run: |
           pnpm flue run .flue/workflows/issue-triage.ts \
+            --sandbox $IMAGE:latest \
             --args "{\"issueNumber\": $ISSUE_NUMBER, \"triageDir\": \"triage/issue-$ISSUE_NUMBER\"}" \
             --branch "flue/fix-$ISSUE_NUMBER" \
             --model anthropic/claude-opus-4-6
diff --git a/.gitignore b/.gitignore
index 2a55bd13e0b5..c5dffbb05f07 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 node_modules/
 /triage/
+/.compiler/
 dist/
 *.tsbuildinfo
 .DS_Store
diff --git a/AGENTS.md b/AGENTS.md
index 6fa7e6a74b66..4586f57dc1bf 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,34 +1,18 @@
-## Style Guide
+# Style Guide
 
-- Not defined here. For now, follow the same conventions and patterns that you detect in the surrounding code. Keep
+- Not defined here. For now, follow the same conventions and patterns that you detect in the surrounding code.
 - Keep formatting consistent. Our rules are defined in our [biome.jsonc](./biome.jsonc) file, enforced by Biome.
 - Run `pnpm format` to auto-format the entire repo.
 - Run `pnpm lint` to lint the entire repo.
 
-# Source Structure
+# Monorepo Structure
 
-This is a pnpm workspace monorepo with the following directory structure:
+- This directory is a Git monorepo containing a `pnpm` workspace. The codebase is primarily TypeScript.
+- All packages live in `packages/`.
+- Integration packages live in `packages/integrations/`.
+- The core Astro package is `packages/astro`.
 
-```
-packages/
-├── astro/                    # astro -- The core framework package
-│   └── src/
-│       ├── core/             # Build pipeline, rendering, routing
-│       ├── vite-plugin-astro/ # Vite integration
-│       ├── content/          # Content collections
-│       └── ...
-├── integrations/
-│   ├── react/               # @astrojs/react
-│   ├── node/                # @astrojs/node
-│   ├── cloudflare/          # @astrojs/cloudflare
-│   └── ...
-└── markdown/
-    └── remark/              # @astrojs/markdown-remark
-```
-
-When you run `pnpm install`, source packages in `packages/` are symlinked into `node_modules/` of their dependants via `workspace:*` dependencies.
-
-In error stack traces, built files in `node_modules/` will often map to TypeScript source files in the `packages/` directory.
+In error stack traces, built files from workspace packages in `node_modules/` map to TypeScript source in `packages/`:
 
 - `node_modules/astro/dist/...` → `packages/astro/src/...`
 - `node_modules/@astrojs/react/...` → `packages/integrations/react/src/...`
diff --git a/eslint.config.js b/eslint.config.js
index b15b79c9c5b8..86c5c448dee2 100644
--- a/eslint.config.js
+++ b/eslint.config.js
@@ -28,6 +28,7 @@ export default [
 			'benchmark/static-projects/**',
 			'examples/',
 			'scripts/',
+			'triage/',
 			'.github/',
 			'.changeset/',
 		],
diff --git a/package.json b/package.json
index 6647e3c1d94c..72304524c62b 100644
--- a/package.json
+++ b/package.json
@@ -64,8 +64,8 @@
     "@biomejs/biome": "2.3.6",
     "@changesets/changelog-github": "^0.5.2",
     "@changesets/cli": "^2.29.8",
-    "@flue/cli": "^0.0.18",
-    "@flue/client": "^0.0.10",
+    "@flue/cli": "^0.0.30",
+    "@flue/client": "^0.0.17",
     "@types/node": "^18.19.115",
     "esbuild": "0.25.5",
     "eslint": "^9.39.2",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index b6857084e73f..cf0f821114b8 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -25,11 +25,11 @@ importers:
         specifier: ^2.29.8
         version: 2.29.8(@types/node@18.19.130)
       '@flue/cli':
-        specifier: ^0.0.18
-        version: 0.0.18(typescript@5.9.3)
+        specifier: ^0.0.30
+        version: 0.0.30(typescript@5.9.3)
       '@flue/client':
-        specifier: ^0.0.10
-        version: 0.0.10(typescript@5.9.3)
+        specifier: ^0.0.17
+        version: 0.0.17(typescript@5.9.3)
       '@types/node':
         specifier: ^18.19.115
         version: 18.19.130
@@ -8821,12 +8821,12 @@ packages:
   '@fastify/static@9.0.0':
     resolution: {integrity: sha512-r64H8Woe/vfilg5RTy7lwWlE8ZZcTrc3kebYFMEUBrMqlydhQyoiExQXdYAy2REVpST/G35+stAM8WYp1WGmMA==}
 
-  '@flue/cli@0.0.18':
-    resolution: {integrity: sha512-qulyJWYQ/idUs6bDSGZxP4jnQ9zrNwD/kKK0z2sElEUVx5OcwWmnlPJS7DbbQSBt25WelWu4rPZ11Ad4CXVuhQ==}
+  '@flue/cli@0.0.30':
+    resolution: {integrity: sha512-wpVTFO1IF+mRI60Zixy9JK1RFbMVBat8JhyZwOAEDVbuKKKSp63SnPn1N2p6mmI2XWpn7mTPtESjfQV2e1E9VA==}
     hasBin: true
 
-  '@flue/client@0.0.10':
-    resolution: {integrity: sha512-wtSd0h9SZyw3xkcyxrfp3wVeXMjmm/vD2oDQkT0SSYEunnSaq9G5zBTOzJWtTPgqpoKQumuF3Qc+C+rRZqb5vA==}
+  '@flue/client@0.0.17':
+    resolution: {integrity: sha512-y74mA7gJpxFYWz8o4tBOi3tIzaTZTgkikG8GPGA/4/8Xm4FMI7bCYebI5xYLAKH/ZWVjws5v3py7vCYDrfDpOA==}
 
   '@fontsource/monofett@5.2.8':
     resolution: {integrity: sha512-cUtT8ScH3HHsMBkRrXFCrhGpKqRrKVNOhnYVSusECfB7g13YZjOrrLlhlc3o+R2IYpRrQQg/T/febSVD6k2Dhw==}
@@ -9394,8 +9394,8 @@ packages:
     resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==}
     engines: {node: '>= 8'}
 
-  '@opencode-ai/sdk@1.1.56':
-    resolution: {integrity: sha512-VFIk4wS7chc1fbodMXstBHV6uj9z25g3c72SlwBYjzPZZLgtr9IUK+FyGMUYefXYeeXJ/l91CdmfjCjDZTmD1g==}
+  '@opencode-ai/sdk@1.2.2':
+    resolution: {integrity: sha512-vdCSRoqZdh2ruof8w5nIRuCXeVdNP0ffOHm9p74cAAKkupptQ5khRwBB9U0mcWEL7ey7cZ4gttvGnHMRGn+TPg==}
 
   '@opentelemetry/api-logs@0.203.0':
     resolution: {integrity: sha512-9B9RU0H7Ya1Dx/Rkyc4stuBZSGVQF27WigitInx2QQoj6KUpEFYPKoWjdFTunJYxmXmh17HeBvbMa1EhGyPmqQ==}
@@ -18078,15 +18078,15 @@ snapshots:
       fastq: 1.19.1
       glob: 13.0.1
 
-  '@flue/cli@0.0.18(typescript@5.9.3)':
+  '@flue/cli@0.0.30(typescript@5.9.3)':
     dependencies:
-      '@flue/client': 0.0.10(typescript@5.9.3)
+      '@flue/client': 0.0.17(typescript@5.9.3)
     transitivePeerDependencies:
       - typescript
 
-  '@flue/client@0.0.10(typescript@5.9.3)':
+  '@flue/client@0.0.17(typescript@5.9.3)':
     dependencies:
-      '@opencode-ai/sdk': 1.1.56
+      '@opencode-ai/sdk': 1.2.2
       '@valibot/to-json-schema': 1.5.0(valibot@1.2.0(typescript@5.9.3))
       valibot: 1.2.0(typescript@5.9.3)
     transitivePeerDependencies:
@@ -18827,7 +18827,7 @@ snapshots:
       '@nodelib/fs.scandir': 2.1.5
       fastq: 1.19.1
 
-  '@opencode-ai/sdk@1.1.56': {}
+  '@opencode-ai/sdk@1.2.2': {}
 
   '@opentelemetry/api-logs@0.203.0':
     dependencies:

From dc58ce04ed5877f62c2d7728206458b2431bbf32 Mon Sep 17 00:00:00 2001
From: "Fred K. Schott" <622227+FredKSchott@users.noreply.github.com>
Date: Sun, 15 Feb 2026 01:06:38 -0800
Subject: [PATCH 2/2] auto-triage improvements: fix tests, subagents, triage
 directory (#15522)

* docs: fix test command syntax in AGENTS.md to use astro-scripts directly

Replace package.json script references (test:unit, test:match, etc.) with
direct astro-scripts test invocations. Add 'exec' to pnpm commands so
astro-scripts is resolved as a bin, not a script name. Document key flags
(--match, --parallel, --watch, --timeout, --only).

* docs: improve triage skill reproduce step with path validation

Clarify that triageDir must be passed as a positional argument to
create-astro, and add a verification step to confirm the project was
created at the expected path before proceeding.

* fix: use cp from examples/ instead of create-astro for triage scaffolding

create-astro pulls the latest published Astro from npm, which doesn't
match the monorepo version during beta. Copy from examples/ instead to
ensure the workspace-linked version is always used.

* update @flue/cli to 0.0.31

---------

Co-authored-by: Fred K. Schott <fschott@cloudflare.com>
---
 .agents/skills/triage/reproduce.md | 27 +++++++++++++++++----------
 AGENTS.md                          | 20 +++++++++++++-------
 package.json                       |  2 +-
 pnpm-lock.yaml                     | 10 +++++-----
 4 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/.agents/skills/triage/reproduce.md b/.agents/skills/triage/reproduce.md
index 2b32966ceafc..1ea0868b4ad7 100644
--- a/.agents/skills/triage/reproduce.md
+++ b/.agents/skills/triage/reproduce.md
@@ -73,7 +73,7 @@ Skip if a repository maintainer has commented that this issue should not be repr
 
 Every bug report should include some sort of reproduction. The reproduction project goes in the `triageDir` directory (e.g. `triage/gh-123`). If no `triageDir` is provided, default to `triage/gh-<issue_number>`.
 
-Set up the reproduction project based on what the issue provides you. Once the reproduction project directory has been completed, run `pnpm install` in the workspace to connect it to the rest of the monorepo.
+Set up the reproduction project based on what the issue provides you. Once the reproduction project directory has been completed, run `pnpm install` in the workspace top-level root to connect it to the rest of the monorepo.
 
 ### StackBlitz Project URL (`https://stackblitz.com/edit/...`)
 
@@ -110,22 +110,29 @@ You may still need to set up a project from scratch (see fallback below) and app
 
 ### Manual Steps Reproduction
 
-If no reproduction URL is provided, you will need to follow the manual steps that the user provided instead.
-
-Scaffold a fresh Astro project into the triage directory using `create-astro`. Use `--no-install` to skip dependency installation (we will run `pnpm install` later) and `--no-git` to avoid creating a nested git repo. Use `--template` to pick a starting template — if the user didn't mention a specific one, use `minimal` as the default.
+If no reproduction URL is provided, you will need to follow the manual steps that the user provided instead. If the user didn't mention a specific template, use `minimal` as the default.
 
 ```bash
-npx create-astro@latest <triageDir> --template minimal --no-install --no-git -y
+# 1. List available example templates
+ls examples/
+# 2. Remove the selected template's node_modules directory to avoid problems with `cp -r`
+rm -rf examples/<template>/node_modules
+# 3. Copy over the selected template into the triage directory
+cp -r examples/<template> <triageDir>
+# 4. Re-run install (at the workspace root) to add back missing node_modules dependencies
+pnpm install
 ```
 
-Then, modify the triage project as needed:
+Verify that the project was created in the correct place (`cat <triageDir>/package.json`).
+
+Then, modify the triage project as needed to attempt your reproduction:
 
-1. Update `astro.config.mjs` with required configuration
-2. Add any required dependencies or Astro integrations (`@astrojs/react`, etc.)
-3. Create pages, components, or middleware that trigger the bug
+1. Update `astro.config.mjs` with required configuration changes
+2. Add/modify any dependencies or Astro integrations (`@astrojs/react`, etc.)
+3. Add/modify any pages, components, middleware, etc. that trigger the bug
 4. Add/modify any additional files mentioned in the issue
 
-Keep the reproduction as minimal as possible — only add what's needed to trigger the bug.
+Keep the reproduction as minimal as possible — only add what the issue reporter has documented as needed to trigger the bug.
 
 ## Step 4: Attempt Reproduction in the Triage Project
 
diff --git a/AGENTS.md b/AGENTS.md
index 4586f57dc1bf..0e8a706fc96b 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -21,12 +21,18 @@ Note: Edits to source files take effect after rebuilding the package via `pnpm b
 
 # Running Tests
 
-- Run `pnpm test` in the workspace root to run the full suite (slow).
-- Run `pnpm -C <package-directory> test` to run a package’s tests (example: `pnpm -C packages/astro test`, `pnpm -C packages/integrations/react test`).
-- Run an individual test file with `node path/to/test.js` (non-E2E).
-- For focused Astro package runs: `pnpm -C packages/astro test:unit`, `pnpm -C packages/astro test:integration`, `pnpm -C packages/astro test:cli`, `pnpm -C packages/astro test:types`.
-- For matching a subset by test name in Astro: `pnpm -C packages/astro test:match -- "<pattern>"`.
-- For E2E: `pnpm test:e2e` or `pnpm test:e2e:match -- "<pattern>"`.
+- Run `pnpm test` in workspace root or package directory to run full test suite (can be slow!)
+- Integration tests live in special `packages/integrations` folders.
+- Example: `pnpm -C <package-directory> exec astro-scripts test` - Run a single package test suite
+- Example: `pnpm -C <package-directory> exec astro-scripts test "test/actions.test.js"` - Run a single test file
+- Example: `pnpm -C <package-directory> exec astro-scripts test "test/**/*.test.js" --match "CSS"` - Run specific tests matching a string or regex patterns
+- Example: `pnpm -C <package-directory> exec astro-scripts test "test/{actions,css,middleware}.test.js"` - Run multiple test files
+- Key flags:
+  - `--match` / `-m`: Filter tests by name pattern (regex)
+  - `--only` / `-o`: Run only tests marked with `.only`
+  - `--parallel` / `-p`: Run tests in parallel (default is sequential)
+  - `--timeout` / `-t`: Set timeout in milliseconds
+  - `--watch` / `-w`: Watch mode
 
 # Astro Quick Reference
 
@@ -43,7 +49,7 @@ Note: Edits to source files take effect after rebuilding the package via `pnpm b
 
 - Use `astro dev` and `astro preview` in the background to prevent hanging your entire session, and use `&` to run them in the background. Use `--port RANDOM_NUMBER --strictPort` to avoid port conflicts. Cleanup old servers when you're done.
 - Use `astro dev` and `astro preview` as web servers for Astro project. They are reliable. Don't use other web servers for testing.
-- Use `pnpm -C <dir> <command>` for project-local commands when working in packages/examples/triage directories. Only omit `-C` flag when intentionally working in the monorepo root. (Example: `pnpm -C packages/astro build`, `pnpm -C examples/blog dev`)
+- Use `pnpm -C <dir> <command>` for project-local script commands when working in packages/examples/triage directories. Only omit `-C` flag when intentionally working in the monorepo root. (Example: `pnpm -C packages/astro build`, `pnpm -C examples/blog dev`)
 - Use `agent-browser` for web automation or when UI interaction, long-running browsers, or HMR testing is required. Use `agent-browser --help` for all commands. Use this core workflow:
   - Example: `agent-browser open <url>` - Navigate to page
   - Example: `agent-browser snapshot -i` - Get interactive elements with refs (@e1, @e2)
diff --git a/package.json b/package.json
index 72304524c62b..24529b517184 100644
--- a/package.json
+++ b/package.json
@@ -64,7 +64,7 @@
     "@biomejs/biome": "2.3.6",
     "@changesets/changelog-github": "^0.5.2",
     "@changesets/cli": "^2.29.8",
-    "@flue/cli": "^0.0.30",
+    "@flue/cli": "^0.0.31",
     "@flue/client": "^0.0.17",
     "@types/node": "^18.19.115",
     "esbuild": "0.25.5",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index cf0f821114b8..44fac7ffba04 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -25,8 +25,8 @@ importers:
         specifier: ^2.29.8
         version: 2.29.8(@types/node@18.19.130)
       '@flue/cli':
-        specifier: ^0.0.30
-        version: 0.0.30(typescript@5.9.3)
+        specifier: ^0.0.31
+        version: 0.0.31(typescript@5.9.3)
       '@flue/client':
         specifier: ^0.0.17
         version: 0.0.17(typescript@5.9.3)
@@ -8821,8 +8821,8 @@ packages:
   '@fastify/static@9.0.0':
     resolution: {integrity: sha512-r64H8Woe/vfilg5RTy7lwWlE8ZZcTrc3kebYFMEUBrMqlydhQyoiExQXdYAy2REVpST/G35+stAM8WYp1WGmMA==}
 
-  '@flue/cli@0.0.30':
-    resolution: {integrity: sha512-wpVTFO1IF+mRI60Zixy9JK1RFbMVBat8JhyZwOAEDVbuKKKSp63SnPn1N2p6mmI2XWpn7mTPtESjfQV2e1E9VA==}
+  '@flue/cli@0.0.31':
+    resolution: {integrity: sha512-Y9naqmmHGXZw2nCCmeJ1yap5RPFdgABJmb2yVJ2Dx2blxTkR+cadcJ6G+uQRTiUckUl788KXtxTUVgUv/Oxpig==}
     hasBin: true
 
   '@flue/client@0.0.17':
@@ -18078,7 +18078,7 @@ snapshots:
       fastq: 1.19.1
       glob: 13.0.1
 
-  '@flue/cli@0.0.30(typescript@5.9.3)':
+  '@flue/cli@0.0.31(typescript@5.9.3)':
     dependencies:
       '@flue/client': 0.0.17(typescript@5.9.3)
     transitivePeerDependencies: