diff --git a/.codex/skills/gstack-browse/SKILL.md b/.codex/skills/gstack-browse/SKILL.md new file mode 100644 index 0000000..fadc46d --- /dev/null +++ b/.codex/skills/gstack-browse/SKILL.md @@ -0,0 +1,25 @@ +--- +name: gstack-browse +description: Use gstack's compiled browser engine for browsing, UI verification, screenshots, and dogfooding flows. +--- + +# gstack-browse + +Use this skill when the user says `/browse` or asks to browse a site, click through a flow, capture screenshots, verify UI behavior, or dogfood a web app with gstack's browser engine. + +## First steps + +1. Read `../../../references/workflows/compatibility.md`. +2. Read `../../../references/workflows/browse.md`. +3. If the browser binary is missing, run `../../../setup --host codex`. + +## Tool expectations + +- Prefer shell execution via `../../../browse/bin/find-browse` or `../../../browse/dist/browse`. +- Use the compiled gstack browser instead of host-native browser tooling whenever it can do the job. +- Pull in `../../../BROWSER.md` only when you need deeper command coverage. + +## Boundaries + +- Switch to `gstack-qa` for a structured QA report. +- Switch to `gstack-browser-cookies` for cookie/session import. diff --git a/.codex/skills/gstack-browser-cookies/SKILL.md b/.codex/skills/gstack-browser-cookies/SKILL.md new file mode 100644 index 0000000..bcceb77 --- /dev/null +++ b/.codex/skills/gstack-browser-cookies/SKILL.md @@ -0,0 +1,24 @@ +--- +name: gstack-browser-cookies +description: Import real browser cookies into gstack's browser session for authenticated testing. +--- + +# gstack-browser-cookies + +Use this skill when the user says `/setup-browser-cookies` or asks to import browser cookies for authenticated testing. + +## First steps + +1. Read `../../../references/workflows/browser-cookies.md`. +2. Read `../../../references/workflows/compatibility.md`. +3. If the browser binary is missing, run `../../../setup --host codex`. + +## Tool expectations + +- Use the compiled gstack browser CLI for the import. +- Verify the resulting session against a real authenticated page. +- Keep secrets out of the transcript. + +## Boundaries + +- If the user wants broader verification after import, switch to `gstack-qa` or `gstack-browse`. diff --git a/.codex/skills/gstack-plan-ceo-review/SKILL.md b/.codex/skills/gstack-plan-ceo-review/SKILL.md new file mode 100644 index 0000000..b5d99c1 --- /dev/null +++ b/.codex/skills/gstack-plan-ceo-review/SKILL.md @@ -0,0 +1,24 @@ +--- +name: gstack-plan-ceo-review +description: Pressure-test a plan in founder mode and decide whether to expand, hold, or reduce scope. +--- + +# gstack-plan-ceo-review + +Use this skill when the user says `/plan-ceo-review` or wants a founder-level product review of a plan, feature, or roadmap item. + +## First steps + +1. Read `../../../references/workflows/plan-ceo-review.md`. +2. Read `../../../references/workflows/compatibility.md` if the request used slash-style aliases. + +## Tool expectations + +- Review the plan, not the code. +- Pressure-test the premise, ambition level, and 12-month trajectory. +- Make the scope mode explicit: expansion, hold, or reduction. +- Use Codex-native user input only when ambition level or success criteria are genuinely ambiguous. + +## Boundaries + +- Hand off to `gstack-plan-eng-review` when the product direction is locked and technical review is next. diff --git a/.codex/skills/gstack-plan-eng-review/SKILL.md b/.codex/skills/gstack-plan-eng-review/SKILL.md new file mode 100644 index 0000000..5e7352e --- /dev/null +++ b/.codex/skills/gstack-plan-eng-review/SKILL.md @@ -0,0 +1,24 @@ +--- +name: gstack-plan-eng-review +description: Pressure-test a plan technically for architecture, failure modes, rollout, and tests. +--- + +# gstack-plan-eng-review + +Use this skill when the user says `/plan-eng-review` or wants technical review of a plan before implementation. + +## First steps + +1. Read `../../../references/workflows/plan-eng-review.md`. +2. Read `../../../references/workflows/compatibility.md` if the request used slash-style aliases. + +## Tool expectations + +- Audit the existing system surface first. +- Review architecture, failure modes, security, rollout, rollback, and tests. +- Prefer concrete diagrams and explicit invariants over generic advice. +- Use Codex-native user input only for real architecture or scope decisions. + +## Boundaries + +- Do not start implementing in this mode. diff --git a/.codex/skills/gstack-qa/SKILL.md b/.codex/skills/gstack-qa/SKILL.md new file mode 100644 index 0000000..7659228 --- /dev/null +++ b/.codex/skills/gstack-qa/SKILL.md @@ -0,0 +1,26 @@ +--- +name: gstack-qa +description: Run structured QA passes with gstack's browser engine and report template. +--- + +# gstack-qa + +Use this skill when the user says `/qa` or asks for a smoke test, systematic QA pass, diff-aware verification, or a structured bug report. + +## First steps + +1. Read `../../../references/workflows/compatibility.md`. +2. Read `../../../references/workflows/qa.md`. +3. Read `../../../qa/templates/qa-report-template.md`. +4. Read `../../../qa/references/issue-taxonomy.md`. +5. If the browser binary is missing, run `../../../setup --host codex`. + +## Tool expectations + +- Drive the session with the compiled gstack browser. +- Execute the QA pass and write the report; do not stop at analysis. +- Use Codex-native user input only when auth, CAPTCHA, or a missing target URL blocks progress. + +## Boundaries + +- If the user only wants browser interaction without a QA report, use `gstack-browse`. diff --git a/.codex/skills/gstack-retro/SKILL.md b/.codex/skills/gstack-retro/SKILL.md new file mode 100644 index 0000000..611d62a --- /dev/null +++ b/.codex/skills/gstack-retro/SKILL.md @@ -0,0 +1,23 @@ +--- +name: gstack-retro +description: Produce an engineering retrospective grounded in git history and recent delivery outcomes. +--- + +# gstack-retro + +Use this skill when the user says `/retro` or asks for an engineering retrospective based on recent project activity. + +## First steps + +1. Read `../../../references/workflows/retro.md`. +2. Read `../../../references/workflows/compatibility.md` if the request used slash-style aliases. + +## Tool expectations + +- Ground the retro in git history, open TODOs, and recent delivery outcomes. +- Separate wins, failures, and follow-ups clearly. +- Keep praise specific and criticism operational. + +## Boundaries + +- This mode is for analysis and synthesis, not implementation. diff --git a/.codex/skills/gstack-review/SKILL.md b/.codex/skills/gstack-review/SKILL.md new file mode 100644 index 0000000..b7ce311 --- /dev/null +++ b/.codex/skills/gstack-review/SKILL.md @@ -0,0 +1,26 @@ +--- +name: gstack-review +description: Run a findings-first pre-landing review of the current diff using gstack's review checklist. +--- + +# gstack-review + +Use this skill when the user says `/review` or wants a findings-first pre-landing review of the current diff. + +## First steps + +1. Read `../../../references/workflows/review.md`. +2. Read `../../../review/checklist.md`. +3. Read `../../../review/TODOS-format.md`. +4. Read `../../../review/greptile-triage.md` only if Greptile comments are relevant. + +## Tool expectations + +- Use repo inspection, `git diff`, and shell commands directly. +- Review the full diff before writing findings. +- Keep findings ordered by severity with file references. +- Use Codex-native user input only for real blocking decisions. + +## Boundaries + +- Do not drift into implementation unless the user explicitly asks for fixes. diff --git a/.codex/skills/gstack-ship/SKILL.md b/.codex/skills/gstack-ship/SKILL.md new file mode 100644 index 0000000..a45e057 --- /dev/null +++ b/.codex/skills/gstack-ship/SKILL.md @@ -0,0 +1,25 @@ +--- +name: gstack-ship +description: Execute deterministic shell-driven release steps for a ready branch. +--- + +# gstack-ship + +Use this skill when the user says `/ship` or wants deterministic shell-driven release steps for a ready branch. + +## First steps + +1. Read `../../../references/workflows/ship.md`. +2. Read `../../../references/workflows/review.md` if a pre-landing review has not already been done. + +## Tool expectations + +- Execute the repo’s real validation and release commands. +- Prefer documented wrappers and scripts over ad hoc command chains. +- Stop on merge conflicts, failing validation, or missing release prerequisites. +- Use Codex-native user input only when a real release decision is required. + +## Boundaries + +- Do not ship from `main`. +- Do not invent project commands that the repo does not actually have. diff --git a/.gitignore b/.gitignore index cc41a3e..9bc7a63 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ node_modules/ browse/dist/ .gstack/ .claude/skills/ +.codex/skills/gstack +.agents/skills/gstack /tmp/ *.log bun.lock diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..7965979 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,28 @@ +# gstack for Codex + +This repo now has two layers: + +- Portable core: `browse/`, the shared build/setup scripts, QA/review assets, and the workflow reference docs under `references/workflows/`. +- Host glue: `CLAUDE.md` plus top-level `*/SKILL.md` for Claude, and `.codex/skills/*` for Codex. + +## Codex rules + +- If the user says `/browse`, `/qa`, `/review`, `/ship`, `/plan-ceo-review`, `/plan-eng-review`, `/setup-browser-cookies`, or `/retro`, map that to the corresponding Codex skill: + - `/browse` → `gstack-browse` + - `/qa` → `gstack-qa` + - `/review` → `gstack-review` + - `/ship` → `gstack-ship` + - `/plan-ceo-review` → `gstack-plan-ceo-review` + - `/plan-eng-review` → `gstack-plan-eng-review` + - `/setup-browser-cookies` → `gstack-browser-cookies` + - `/retro` → `gstack-retro` +- Prefer the gstack browser binary first for browser automation and QA. Do not reach for host-native browser tooling when `browse/dist/browse` can do the job. +- If the browser binary is missing, run `./setup --host codex` from the gstack root before doing browser work. +- Codex skills live under `.codex/skills/`. Their detailed workflow contracts live under `references/workflows/`. +- Claude-only assets stay in `CLAUDE.md` and the top-level workflow directories. Do not rewrite those when a Codex-only change will solve the problem. + +## Editing guidance + +- Keep the browser runtime host-neutral. Path resolution and installation may branch by host, but the CLI/server behavior should stay shared. +- When workflow behavior changes, update the shared workflow reference in `references/workflows/` first, then touch Codex skills and Claude templates only where host-specific wording differs. +- Do not add Claude-only tool names or `.claude/skills` paths to `.codex/skills/*`. diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 07d03ba..2c06700 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -1,10 +1,15 @@ # Architecture -This document explains **why** gstack is built the way it is. For setup and commands, see CLAUDE.md. For contributing, see CONTRIBUTING.md. +This document explains **why** gstack is built the way it is. For Codex behavior, see AGENTS.md. For Claude-specific setup and commands, see CLAUDE.md. For contributing, see CONTRIBUTING.md. ## The core idea -gstack gives Claude Code a persistent browser and a set of opinionated workflow skills. The browser is the hard part — everything else is Markdown. +gstack gives coding agents a persistent browser and a set of opinionated workflow skills. The browser is the hard part — everything else is Markdown. + +The repo is now split into a portable core plus host-specific glue: + +- Portable core: `browse/`, shared setup/build scripts, QA/review references, and `references/workflows/*` +- Host glue: top-level `*/SKILL.md` plus `CLAUDE.md` for Claude, `.codex/skills/*` plus `AGENTS.md` for Codex The key insight: an AI agent interacting with a browser needs **sub-second latency** and **persistent state**. If every command cold-starts a browser, you're waiting 3-5 seconds per tool call. If the browser dies between commands, you lose cookies, tabs, and login sessions. So gstack runs a long-lived Chromium daemon that the CLI talks to over localhost HTTP. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 34e502e..3eaf845 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,7 +4,12 @@ Thanks for wanting to make gstack better. Whether you're fixing a typo in a skil ## Quick start -gstack skills are Markdown files that Claude Code discovers from a `skills/` directory. Normally they live at `~/.claude/skills/gstack/` (your global install). But when you're developing gstack itself, you want Claude Code to use the skills *in your working tree* — so edits take effect instantly without copying or deploying anything. +gstack now has two runtime surfaces: + +- Claude: top-level workflow directories plus `CLAUDE.md` +- Codex: `.codex/skills/*` plus `AGENTS.md` + +Codex does not need a special dev-mode shim when you are working in this repo: the repo-local `.codex/skills/` tree is already part of the checkout. Claude still uses dev mode so its `skills/` loader points at your working tree instead of a copied global install. That's what dev mode does. It symlinks your repo into the local `.claude/skills/` directory so Claude Code reads skills straight from your checkout. @@ -62,6 +67,8 @@ bin/dev-teardown ## Testing & evals +Static packaging checks are host-agnostic. Claude E2E and LLM eval tiers remain Claude-specific because they shell out to `claude -p` or call Anthropic directly. + ### Setup ```bash @@ -79,14 +86,16 @@ Bun auto-loads `.env` — no extra config. Conductor workspaces inherit `.env` f | Tier | Command | Cost | What it tests | |------|---------|------|---------------| -| 1 — Static | `bun test` | Free | Command validation, snapshot flags, SKILL.md correctness, TODOS-format.md refs, observability unit tests | -| 2 — E2E | `bun run test:e2e` | ~$3.85 | Full skill execution via `claude -p` subprocess | +| 1 — Static | `bun test` | Free | Command validation, snapshot flags, SKILL.md correctness, Codex packaging checks, TODOS-format.md refs, observability unit tests | +| 2 — E2E (Claude) | `bun run test:e2e` | ~$3.85 | Full skill execution via `claude -p` subprocess | +| 2 — E2E (Codex) | `bun run test:codex-e2e` | variable | Codex noninteractive smoke against repo-local `.codex/skills/*` | | 3 — LLM eval | `bun run test:evals` | ~$0.15 standalone | LLM-as-judge scoring of generated SKILL.md docs | | 2+3 | `bun run test:evals` | ~$4 combined | E2E + LLM-as-judge (runs both) | ```bash bun test # Tier 1 only (runs on every commit, <5s) bun run test:e2e # Tier 2: E2E only (needs EVALS=1, can't run inside Claude Code) +bun run test:codex-e2e # Tier 2: Codex E2E only (needs CODEX_EVALS=1) bun run test:evals # Tier 2 + 3 combined (~$4/run) ``` @@ -96,6 +105,7 @@ Runs automatically with `bun test`. No API keys needed. - **Skill parser tests** (`test/skill-parser.test.ts`) — Extracts every `$B` command from SKILL.md bash code blocks and validates against the command registry in `browse/src/commands.ts`. Catches typos, removed commands, and invalid snapshot flags. - **Skill validation tests** (`test/skill-validation.test.ts`) — Validates that SKILL.md files reference only real commands and flags, and that command descriptions meet quality thresholds. +- **Codex packaging tests** (`test/codex-compat.test.ts`) — Validates that `.codex/skills/*` exist, avoid Claude-only references, and that the compatibility alias wrappers emit the correct skill names. - **Generator tests** (`test/gen-skill-docs.test.ts`) — Tests the template system: verifies placeholders resolve correctly, output includes value hints for flags (e.g. `-d ` not just `-d`), enriched descriptions for key commands (e.g. `is` lists valid states, `press` lists key examples). ### Tier 2: E2E via `claude -p` (~$3.85/run) @@ -114,6 +124,19 @@ EVALS=1 bun test test/skill-e2e.test.ts - Saves full NDJSON transcripts and failure JSON for debugging - Tests live in `test/skill-e2e.test.ts`, runner logic in `test/helpers/session-runner.ts` +### Tier 2b: E2E via `codex exec --json` + +Runs a small noninteractive smoke suite against the repo-local Codex skills. + +```bash +CODEX_EVALS=1 bun test test/codex-e2e.test.ts +``` + +- Gated by `CODEX_EVALS=1` +- Uses `codex exec --json` instead of `claude -p` +- Validates that repo-local `.codex/skills/*` load cleanly and can drive basic shell-backed workflows +- Tests live in `test/codex-e2e.test.ts`, runner logic in `test/helpers/codex-session-runner.ts` + ### E2E observability When E2E tests run, they produce machine-readable artifacts in `~/.gstack-dev/`: @@ -166,6 +189,8 @@ Tests run against the browse binary directly — they don't require dev mode. SKILL.md files are **generated** from `.tmpl` templates. Don't edit the `.md` directly — your changes will be overwritten on the next build. +This applies to the Claude-facing top-level workflow skills. The Codex-facing `.codex/skills/*/SKILL.md` files are maintained directly and should stay short; the durable shared behavior belongs in `references/workflows/*`. + ```bash # 1. Edit the template vim SKILL.md.tmpl # or browse/SKILL.md.tmpl diff --git a/README.md b/README.md index 2754806..3f9b983 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # gstack -**gstack turns Claude Code from one generic assistant into a team of specialists you can summon on demand.** +**gstack turns one generic coding agent into a team of specialists you can summon on demand.** -Eight opinionated workflow skills for [Claude Code](https://docs.anthropic.com/en/docs/claude-code). Plan review, code review, one-command shipping, browser automation, QA testing, and engineering retrospectives — all as slash commands. +Eight opinionated workflow modes for [Claude Code](https://docs.anthropic.com/en/docs/claude-code), plus a Codex-compatible layer that reuses the same browser engine, workflow references, and setup/build flow. Plan review, code review, one-command shipping, browser automation, QA testing, and engineering retrospectives — without maintaining two forks. ### Without gstack @@ -85,6 +85,22 @@ You already use Claude Code heavily and want consistent, high-rigor workflows in This is not a prompt pack for beginners. It is an operating system for people who ship. +## Codex compatibility + +gstack now ships as two layers: + +- Portable core: `browse/`, shared build/setup scripts, QA/review assets, and `references/workflows/*` +- Host glue: `CLAUDE.md` + top-level skill prompts for Claude, `AGENTS.md` + `.codex/skills/*` for Codex + +Codex does not try to emulate Claude slash-command registration. Instead it gets: + +- repo-local skills in `.codex/skills/` +- host-aware install via `./setup --host codex` or `./setup --host auto` +- compatibility wrappers like `bin/gstack-browse`, `bin/gstack-qa`, and `bin/gstack-review` +- an explicit alias contract: `/browse` maps to `gstack-browse`, `/qa` to `gstack-qa`, and so on + +The compiled browser binary stays unchanged. Browser/QA flows still run through `browse/dist/browse`, not host-native browser tools. + ## How to fly: 10 sessions at once gstack is powerful with one Claude Code session. It is transformative with ten. @@ -97,29 +113,74 @@ This is the setup I use. One person, ten parallel agents, each with the right co ## Install -**Requirements:** [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Git](https://git-scm.com/), [Bun](https://bun.sh/) v1.0+. `/browse` compiles a native binary — works on macOS and Linux (x64 and arm64). +**Requirements:** [Git](https://git-scm.com/), [Bun](https://bun.sh/) v1.0+, and either [Claude Code](https://docs.anthropic.com/en/docs/claude-code) or Codex. `/browse` compiles a native binary — works on macOS and Linux (x64 and arm64). + +### Codex: global install + +Clone anywhere, then register Codex skills globally: + +```bash +git clone https://github.com/garrytan/gstack.git ~/src/gstack +cd ~/src/gstack +./setup --host codex +``` + +This creates `~/.codex/skills/gstack` plus sibling skills: + +- `gstack-browse` +- `gstack-qa` +- `gstack-review` +- `gstack-plan-ceo-review` +- `gstack-plan-eng-review` +- `gstack-browser-cookies` +- `gstack-ship` +- `gstack-retro` + +### Codex: add to your repo so teammates get it -### Step 1: Install on your machine +```bash +mkdir -p .codex/skills +cp -Rf ~/src/gstack .codex/skills/gstack +rm -rf .codex/skills/gstack/.git +cd .codex/skills/gstack +./setup --host codex +``` -Open Claude Code and paste this. Claude will do the rest. +Then add an `AGENTS.md` section telling Codex to map `/browse`, `/qa`, `/review`, `/ship`, `/plan-ceo-review`, `/plan-eng-review`, `/setup-browser-cookies`, and `/retro` to the corresponding `gstack-*` skills. -> Install gstack: run `git clone https://github.com/garrytan/gstack.git ~/.claude/skills/gstack && cd ~/.claude/skills/gstack && ./setup` then add a "gstack" section to CLAUDE.md that says to use the /browse skill from gstack for all web browsing, never use mcp\_\_claude-in-chrome\_\_\* tools, and lists the available skills: /plan-ceo-review, /plan-eng-review, /review, /ship, /browse, /qa, /setup-browser-cookies, /retro. Then ask the user if they also want to add gstack to the current project so teammates get it. +### Claude: existing install path -### Step 2: Add to your repo so teammates get it (optional) +Global install still works: -> Add gstack to this project: run `cp -Rf ~/.claude/skills/gstack .claude/skills/gstack && rm -rf .claude/skills/gstack/.git && cd .claude/skills/gstack && ./setup` then add a "gstack" section to this project's CLAUDE.md that says to use the /browse skill from gstack for all web browsing, never use mcp\_\_claude-in-chrome\_\_\* tools, lists the available skills: /plan-ceo-review, /plan-eng-review, /review, /ship, /browse, /qa, /setup-browser-cookies, /retro, and tells Claude that if gstack skills aren't working, run `cd .claude/skills/gstack && ./setup` to build the binary and register skills. +```bash +git clone https://github.com/garrytan/gstack.git ~/.claude/skills/gstack +cd ~/.claude/skills/gstack +./setup --host claude +``` + +Project-local vendoring still works: + +```bash +cp -Rf ~/.claude/skills/gstack .claude/skills/gstack +rm -rf .claude/skills/gstack/.git +cd .claude/skills/gstack +./setup --host claude +``` -Real files get committed to your repo (not a submodule), so `git clone` just works. The binary and node\_modules are gitignored — teammates just need to run `cd .claude/skills/gstack && ./setup` once to build (or `/browse` handles it automatically on first use). +The Claude path is intentionally preserved. The Codex layer is additive, not a rewrite. ### What gets installed -- Skill files (Markdown prompts) in `~/.claude/skills/gstack/` (or `.claude/skills/gstack/` for project installs) -- Symlinks at `~/.claude/skills/browse`, `~/.claude/skills/qa`, `~/.claude/skills/review`, etc. pointing into the gstack directory +- Host root: `~/.claude/skills/gstack` or `~/.codex/skills/gstack` depending on setup host +- Host-visible skill aliases: + - Claude: `~/.claude/skills/browse`, `~/.claude/skills/qa`, `~/.claude/skills/review`, etc. + - Codex: `~/.codex/skills/gstack-browse`, `~/.codex/skills/gstack-qa`, `~/.codex/skills/gstack-review`, etc. - Browser binary at `browse/dist/browse` (~58MB, gitignored) - `node_modules/` (gitignored) +- Codex workflow references in `references/workflows/` - `/retro` saves JSON snapshots to `.context/retros/` in your project for trend tracking -Everything lives inside `.claude/`. Nothing touches your PATH or runs in the background. +Nothing touches your PATH or runs in the background. The compatibility wrapper scripts in `bin/` just print the exact Codex skill invocation text. --- diff --git a/SKILL.md b/SKILL.md index 9cc9acd..a46bcef 100644 --- a/SKILL.md +++ b/SKILL.md @@ -19,11 +19,14 @@ allowed-tools: ## Update Check (run first) ```bash -_UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true) +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +_UPD=$( + "$HOME/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null || { [ -n "$_ROOT" ] && "$_ROOT/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || true +) [ -n "$_UPD" ] && echo "$_UPD" || true ``` -If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. +If output shows `UPGRADE_AVAILABLE `: read the installed `gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. # gstack browse: QA Testing & Dogfooding @@ -35,8 +38,9 @@ Auto-shuts down after 30 min idle. State persists between calls (cookies, tabs, ```bash _ROOT=$(git rev-parse --show-toplevel 2>/dev/null) B="" -[ -n "$_ROOT" ] && [ -x "$_ROOT/.claude/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.claude/skills/gstack/browse/dist/browse" -[ -z "$B" ] && B=~/.claude/skills/gstack/browse/dist/browse +for _cand in "$_ROOT/.codex/skills/gstack/browse/dist/browse" "$_ROOT/.agents/skills/gstack/browse/dist/browse" "$_ROOT/.claude/skills/gstack/browse/dist/browse" "$HOME/.codex/skills/gstack/browse/dist/browse" "$HOME/.agents/skills/gstack/browse/dist/browse" "$HOME/.claude/skills/gstack/browse/dist/browse"; do + [ -n "$_cand" ] && [ -x "$_cand" ] && B="$_cand" && break +done if [ -x "$B" ]; then echo "READY: $B" else diff --git a/bin/gstack-alias b/bin/gstack-alias new file mode 100755 index 0000000..4d123c2 --- /dev/null +++ b/bin/gstack-alias @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [ $# -lt 3 ]; then + echo "Usage: gstack-alias [request...]" >&2 + exit 1 +fi + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +SKILL_NAME="$1" +SLASH_ALIAS="$2" +WORKFLOW_DOC="$3" +shift 3 + +REQUEST="" +if [ $# -gt 0 ]; then + REQUEST="$(printf '%q ' "$@")" + REQUEST="${REQUEST% }" +fi + +echo "Codex skill: $SKILL_NAME" +echo "Compatibility alias: $SLASH_ALIAS" +echo "Workflow doc: $ROOT/$WORKFLOW_DOC" +echo "" +echo "Prompt to paste into Codex:" +if [ -n "$REQUEST" ]; then + echo "Use the \`$SKILL_NAME\` skill from gstack. Request: $REQUEST" +else + echo "Use the \`$SKILL_NAME\` skill from gstack." +fi diff --git a/bin/gstack-browse b/bin/gstack-browse new file mode 100755 index 0000000..cfb739f --- /dev/null +++ b/bin/gstack-browse @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +set -euo pipefail +"$(cd "$(dirname "$0")" && pwd)/gstack-alias" gstack-browse /browse references/workflows/browse.md "$@" diff --git a/bin/gstack-browser-cookies b/bin/gstack-browser-cookies new file mode 100755 index 0000000..9c66325 --- /dev/null +++ b/bin/gstack-browser-cookies @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +set -euo pipefail +"$(cd "$(dirname "$0")" && pwd)/gstack-alias" gstack-browser-cookies /setup-browser-cookies references/workflows/browser-cookies.md "$@" diff --git a/bin/gstack-config b/bin/gstack-config index e99a940..a7dde79 100755 --- a/bin/gstack-config +++ b/bin/gstack-config @@ -23,7 +23,24 @@ case "${1:-}" in VALUE="${3:?Usage: gstack-config set }" mkdir -p "$STATE_DIR" if grep -qE "^${KEY}:" "$CONFIG_FILE" 2>/dev/null; then - sed -i '' "s/^${KEY}:.*/${KEY}: ${VALUE}/" "$CONFIG_FILE" + TMP_FILE="$(mktemp "${CONFIG_FILE}.XXXXXX")" + awk -v key="$KEY" -v value="$VALUE" ' + BEGIN { replaced = 0 } + $1 == key ":" { + if (!replaced) { + print key ": " value + replaced = 1 + } + next + } + { print } + END { + if (!replaced) { + print key ": " value + } + } + ' "$CONFIG_FILE" > "$TMP_FILE" + mv "$TMP_FILE" "$CONFIG_FILE" else echo "${KEY}: ${VALUE}" >> "$CONFIG_FILE" fi diff --git a/bin/gstack-plan-ceo-review b/bin/gstack-plan-ceo-review new file mode 100755 index 0000000..54d9aff --- /dev/null +++ b/bin/gstack-plan-ceo-review @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +set -euo pipefail +"$(cd "$(dirname "$0")" && pwd)/gstack-alias" gstack-plan-ceo-review /plan-ceo-review references/workflows/plan-ceo-review.md "$@" diff --git a/bin/gstack-plan-eng-review b/bin/gstack-plan-eng-review new file mode 100755 index 0000000..75eb3b5 --- /dev/null +++ b/bin/gstack-plan-eng-review @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +set -euo pipefail +"$(cd "$(dirname "$0")" && pwd)/gstack-alias" gstack-plan-eng-review /plan-eng-review references/workflows/plan-eng-review.md "$@" diff --git a/bin/gstack-qa b/bin/gstack-qa new file mode 100755 index 0000000..54e976d --- /dev/null +++ b/bin/gstack-qa @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +set -euo pipefail +"$(cd "$(dirname "$0")" && pwd)/gstack-alias" gstack-qa /qa references/workflows/qa.md "$@" diff --git a/bin/gstack-retro b/bin/gstack-retro new file mode 100755 index 0000000..c9d05a7 --- /dev/null +++ b/bin/gstack-retro @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +set -euo pipefail +"$(cd "$(dirname "$0")" && pwd)/gstack-alias" gstack-retro /retro references/workflows/retro.md "$@" diff --git a/bin/gstack-review b/bin/gstack-review new file mode 100755 index 0000000..6794553 --- /dev/null +++ b/bin/gstack-review @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +set -euo pipefail +"$(cd "$(dirname "$0")" && pwd)/gstack-alias" gstack-review /review references/workflows/review.md "$@" diff --git a/bin/gstack-ship b/bin/gstack-ship new file mode 100755 index 0000000..945bad0 --- /dev/null +++ b/bin/gstack-ship @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +set -euo pipefail +"$(cd "$(dirname "$0")" && pwd)/gstack-alias" gstack-ship /ship references/workflows/ship.md "$@" diff --git a/browse/SKILL.md b/browse/SKILL.md index 22c5d88..d8f7b0f 100644 --- a/browse/SKILL.md +++ b/browse/SKILL.md @@ -19,11 +19,14 @@ allowed-tools: ## Update Check (run first) ```bash -_UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true) +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +_UPD=$( + "$HOME/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null || { [ -n "$_ROOT" ] && "$_ROOT/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || true +) [ -n "$_UPD" ] && echo "$_UPD" || true ``` -If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. +If output shows `UPGRADE_AVAILABLE `: read the installed `gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. # browse: QA Testing & Dogfooding @@ -35,8 +38,9 @@ State persists between calls (cookies, tabs, login sessions). ```bash _ROOT=$(git rev-parse --show-toplevel 2>/dev/null) B="" -[ -n "$_ROOT" ] && [ -x "$_ROOT/.claude/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.claude/skills/gstack/browse/dist/browse" -[ -z "$B" ] && B=~/.claude/skills/gstack/browse/dist/browse +for _cand in "$_ROOT/.codex/skills/gstack/browse/dist/browse" "$_ROOT/.agents/skills/gstack/browse/dist/browse" "$_ROOT/.claude/skills/gstack/browse/dist/browse" "$HOME/.codex/skills/gstack/browse/dist/browse" "$HOME/.agents/skills/gstack/browse/dist/browse" "$HOME/.claude/skills/gstack/browse/dist/browse"; do + [ -n "$_cand" ] && [ -x "$_cand" ] && B="$_cand" && break +done if [ -x "$B" ]; then echo "READY: $B" else diff --git a/browse/bin/find-browse b/browse/bin/find-browse index 9cbd7f8..89c725a 100755 --- a/browse/bin/find-browse +++ b/browse/bin/find-browse @@ -7,11 +7,23 @@ if test -x "$DIR/find-browse"; then fi # Fallback: basic discovery ROOT=$(git rev-parse --show-toplevel 2>/dev/null) -if [ -n "$ROOT" ] && test -x "$ROOT/.claude/skills/gstack/browse/dist/browse"; then - echo "$ROOT/.claude/skills/gstack/browse/dist/browse" -elif test -x "$HOME/.claude/skills/gstack/browse/dist/browse"; then - echo "$HOME/.claude/skills/gstack/browse/dist/browse" -else - echo "ERROR: browse binary not found. Run: cd && ./setup" >&2 - exit 1 +check_candidate() { + local candidate="$1" + if test -x "$candidate"; then + echo "$candidate" + exit 0 + fi +} + +if [ -n "$ROOT" ]; then + check_candidate "$ROOT/.codex/skills/gstack/browse/dist/browse" + check_candidate "$ROOT/.agents/skills/gstack/browse/dist/browse" + check_candidate "$ROOT/.claude/skills/gstack/browse/dist/browse" fi + +check_candidate "$HOME/.codex/skills/gstack/browse/dist/browse" +check_candidate "$HOME/.agents/skills/gstack/browse/dist/browse" +check_candidate "$HOME/.claude/skills/gstack/browse/dist/browse" + +echo "ERROR: browse binary not found. Run: cd && ./setup --host codex" >&2 +exit 1 diff --git a/browse/src/find-browse.ts b/browse/src/find-browse.ts index 44d76b4..62ea593 100644 --- a/browse/src/find-browse.ts +++ b/browse/src/find-browse.ts @@ -5,17 +5,17 @@ * Outputs the absolute path to the browse binary on stdout, or exits 1 if not found. */ -import { existsSync } from 'fs'; -import { join } from 'path'; -import { homedir } from 'os'; +import { existsSync } from "fs"; +import { join } from "path"; +import { homedir } from "os"; // ─── Binary Discovery ─────────────────────────────────────────── function getGitRoot(): string | null { try { - const proc = Bun.spawnSync(['git', 'rev-parse', '--show-toplevel'], { - stdout: 'pipe', - stderr: 'pipe', + const proc = Bun.spawnSync(["git", "rev-parse", "--show-toplevel"], { + stdout: "pipe", + stderr: "pipe", }); if (proc.exitCode !== 0) return null; return proc.stdout.toString().trim(); @@ -24,19 +24,30 @@ function getGitRoot(): string | null { } } -export function locateBinary(): string | null { - const root = getGitRoot(); - const home = homedir(); +export function getBrowseCandidatePaths(root = getGitRoot(), home = homedir()): string[] { + const candidates: string[] = []; - // Workspace-local takes priority (for development) if (root) { - const local = join(root, '.claude', 'skills', 'gstack', 'browse', 'dist', 'browse'); - if (existsSync(local)) return local; + candidates.push( + join(root, ".codex", "skills", "gstack", "browse", "dist", "browse"), + join(root, ".agents", "skills", "gstack", "browse", "dist", "browse"), + join(root, ".claude", "skills", "gstack", "browse", "dist", "browse"), + ); } - // Global fallback - const global = join(home, '.claude', 'skills', 'gstack', 'browse', 'dist', 'browse'); - if (existsSync(global)) return global; + candidates.push( + join(home, ".codex", "skills", "gstack", "browse", "dist", "browse"), + join(home, ".agents", "skills", "gstack", "browse", "dist", "browse"), + join(home, ".claude", "skills", "gstack", "browse", "dist", "browse"), + ); + + return candidates; +} + +export function locateBinary(root = getGitRoot(), home = homedir()): string | null { + for (const candidate of getBrowseCandidatePaths(root, home)) { + if (existsSync(candidate)) return candidate; + } return null; } @@ -46,7 +57,7 @@ export function locateBinary(): string | null { function main() { const bin = locateBinary(); if (!bin) { - process.stderr.write('ERROR: browse binary not found. Run: cd && ./setup\n'); + process.stderr.write("ERROR: browse binary not found. Run: cd && ./setup\n"); process.exit(1); } diff --git a/browse/test/find-browse.test.ts b/browse/test/find-browse.test.ts index 7ac5a3f..3da8823 100644 --- a/browse/test/find-browse.test.ts +++ b/browse/test/find-browse.test.ts @@ -2,20 +2,33 @@ * Tests for find-browse binary locator. */ -import { describe, test, expect } from 'bun:test'; -import { locateBinary } from '../src/find-browse'; -import { existsSync } from 'fs'; +import { describe, test, expect } from "bun:test"; +import { getBrowseCandidatePaths, locateBinary } from "../src/find-browse"; +import { existsSync } from "fs"; -describe('locateBinary', () => { - test('returns null when no binary exists at known paths', () => { +describe("locateBinary", () => { + test("checks Codex and Claude-style install roots in priority order", () => { + const paths = getBrowseCandidatePaths("/repo", "/home/tester"); + expect(paths).toEqual([ + "/repo/.codex/skills/gstack/browse/dist/browse", + "/repo/.agents/skills/gstack/browse/dist/browse", + "/repo/.claude/skills/gstack/browse/dist/browse", + "/home/tester/.codex/skills/gstack/browse/dist/browse", + "/home/tester/.agents/skills/gstack/browse/dist/browse", + "/home/tester/.claude/skills/gstack/browse/dist/browse", + ]); + }); + + test("returns null when no binary exists at known paths", () => { // This test depends on the test environment — if a real binary exists at - // ~/.claude/skills/gstack/browse/dist/browse, it will find it. + // ~/.codex/skills/gstack/browse/dist/browse or ~/.claude/skills/gstack/browse/dist/browse, + // it will find it. // We mainly test that the function doesn't throw. const result = locateBinary(); - expect(result === null || typeof result === 'string').toBe(true); + expect(result === null || typeof result === "string").toBe(true); }); - test('returns string path when binary exists', () => { + test("returns string path when binary exists", () => { const result = locateBinary(); if (result !== null) { expect(existsSync(result)).toBe(true); diff --git a/gstack-upgrade/SKILL.md b/gstack-upgrade/SKILL.md index 1cf7d54..472ded4 100644 --- a/gstack-upgrade/SKILL.md +++ b/gstack-upgrade/SKILL.md @@ -27,7 +27,12 @@ First, check if auto-upgrade is enabled: ```bash _AUTO="" [ "${GSTACK_AUTO_UPGRADE:-}" = "1" ] && _AUTO="true" -[ -z "$_AUTO" ] && _AUTO=$(~/.claude/skills/gstack/bin/gstack-config get auto_upgrade 2>/dev/null || true) +[ -z "$_AUTO" ] && _AUTO=$( + "$HOME/.codex/skills/gstack/bin/gstack-config" get auto_upgrade 2>/dev/null || \ + "$HOME/.agents/skills/gstack/bin/gstack-config" get auto_upgrade 2>/dev/null || \ + "$HOME/.claude/skills/gstack/bin/gstack-config" get auto_upgrade 2>/dev/null || \ + true +) echo "AUTO_UPGRADE=$_AUTO" ``` @@ -41,7 +46,7 @@ echo "AUTO_UPGRADE=$_AUTO" **If "Always keep me up to date":** ```bash -~/.claude/skills/gstack/bin/gstack-config set auto_upgrade true +"$INSTALL_DIR/bin/gstack-config" set auto_upgrade true ``` Tell user: "Auto-upgrade enabled. Future updates will install automatically." Then proceed to Step 2. @@ -67,30 +72,35 @@ Tell user the snooze duration: "Next reminder in 24h" (or 48h or 1 week, dependi **If "Never ask again":** ```bash -~/.claude/skills/gstack/bin/gstack-config set update_check false +"$INSTALL_DIR/bin/gstack-config" set update_check false ``` -Tell user: "Update checks disabled. Run `~/.claude/skills/gstack/bin/gstack-config set update_check true` to re-enable." +Tell user: "Update checks disabled. Run `$INSTALL_DIR/bin/gstack-config set update_check true` to re-enable." Continue with the current skill. ### Step 2: Detect install type ```bash -if [ -d "$HOME/.claude/skills/gstack/.git" ]; then - INSTALL_TYPE="global-git" - INSTALL_DIR="$HOME/.claude/skills/gstack" -elif [ -d ".claude/skills/gstack/.git" ]; then - INSTALL_TYPE="local-git" - INSTALL_DIR=".claude/skills/gstack" -elif [ -d ".claude/skills/gstack" ]; then +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +INSTALL_TYPE="" +INSTALL_DIR="" +for _candidate in \ + "$HOME/.codex/skills/gstack" \ + "$HOME/.agents/skills/gstack" \ + "$HOME/.claude/skills/gstack" \ + "$_ROOT/.codex/skills/gstack" \ + "$_ROOT/.agents/skills/gstack" \ + "$_ROOT/.claude/skills/gstack"; do + [ -d "$_candidate" ] || continue + if [ -d "$_candidate/.git" ]; then + INSTALL_TYPE="git" + INSTALL_DIR="$_candidate" + break + fi INSTALL_TYPE="vendored" - INSTALL_DIR=".claude/skills/gstack" -elif [ -d "$HOME/.claude/skills/gstack" ]; then - INSTALL_TYPE="vendored-global" - INSTALL_DIR="$HOME/.claude/skills/gstack" -else - echo "ERROR: gstack not found" - exit 1 -fi + INSTALL_DIR="$_candidate" + break +done +[ -n "$INSTALL_DIR" ] || { echo "ERROR: gstack not found"; exit 1; } echo "Install type: $INSTALL_TYPE at $INSTALL_DIR" ``` @@ -102,7 +112,7 @@ OLD_VERSION=$(cat "$INSTALL_DIR/VERSION" 2>/dev/null || echo "unknown") ### Step 4: Upgrade -**For git installs** (global-git, local-git): +**For git installs**: ```bash cd "$INSTALL_DIR" STASH_OUTPUT=$(git stash 2>&1) @@ -112,7 +122,7 @@ git reset --hard origin/main ``` If `$STASH_OUTPUT` contains "Saved working directory", warn the user: "Note: local changes were stashed. Run `git stash pop` in the skill directory to restore them." -**For vendored installs** (vendored, vendored-global): +**For vendored installs**: ```bash PARENT=$(dirname "$INSTALL_DIR") TMP_DIR=$(mktemp -d) @@ -130,17 +140,22 @@ After upgrading the primary install, check if there's also a local copy in the c ```bash _ROOT=$(git rev-parse --show-toplevel 2>/dev/null) LOCAL_GSTACK="" -if [ -n "$_ROOT" ] && [ -d "$_ROOT/.claude/skills/gstack" ]; then - _RESOLVED_LOCAL=$(cd "$_ROOT/.claude/skills/gstack" && pwd -P) - _RESOLVED_PRIMARY=$(cd "$INSTALL_DIR" && pwd -P) +_RESOLVED_PRIMARY=$(cd "$INSTALL_DIR" && pwd -P) +for _candidate in \ + "$_ROOT/.codex/skills/gstack" \ + "$_ROOT/.agents/skills/gstack" \ + "$_ROOT/.claude/skills/gstack"; do + [ -d "$_candidate" ] || continue + _RESOLVED_LOCAL=$(cd "$_candidate" && pwd -P) if [ "$_RESOLVED_LOCAL" != "$_RESOLVED_PRIMARY" ]; then - LOCAL_GSTACK="$_ROOT/.claude/skills/gstack" + LOCAL_GSTACK="$_candidate" + break fi -fi +done echo "LOCAL_GSTACK=$LOCAL_GSTACK" ``` -If `LOCAL_GSTACK` is non-empty, update it by copying from the freshly-upgraded primary install (same approach as README vendored install): +If `LOCAL_GSTACK` is non-empty, update it by copying from the freshly-upgraded primary install: ```bash mv "$LOCAL_GSTACK" "$LOCAL_GSTACK.bak" cp -Rf "$INSTALL_DIR" "$LOCAL_GSTACK" @@ -148,7 +163,7 @@ rm -rf "$LOCAL_GSTACK/.git" cd "$LOCAL_GSTACK" && ./setup rm -rf "$LOCAL_GSTACK.bak" ``` -Tell user: "Also updated vendored copy at `$LOCAL_GSTACK` — commit `.claude/skills/gstack/` when you're ready." +Tell user: "Also updated vendored copy at `$LOCAL_GSTACK` — commit that host-local `gstack/` directory when you're ready." ### Step 5: Write marker + clear cache diff --git a/gstack-upgrade/SKILL.md.tmpl b/gstack-upgrade/SKILL.md.tmpl index 4a124be..a236df5 100644 --- a/gstack-upgrade/SKILL.md.tmpl +++ b/gstack-upgrade/SKILL.md.tmpl @@ -25,7 +25,12 @@ First, check if auto-upgrade is enabled: ```bash _AUTO="" [ "${GSTACK_AUTO_UPGRADE:-}" = "1" ] && _AUTO="true" -[ -z "$_AUTO" ] && _AUTO=$(~/.claude/skills/gstack/bin/gstack-config get auto_upgrade 2>/dev/null || true) +[ -z "$_AUTO" ] && _AUTO=$( + "$HOME/.codex/skills/gstack/bin/gstack-config" get auto_upgrade 2>/dev/null || \ + "$HOME/.agents/skills/gstack/bin/gstack-config" get auto_upgrade 2>/dev/null || \ + "$HOME/.claude/skills/gstack/bin/gstack-config" get auto_upgrade 2>/dev/null || \ + true +) echo "AUTO_UPGRADE=$_AUTO" ``` @@ -39,7 +44,7 @@ echo "AUTO_UPGRADE=$_AUTO" **If "Always keep me up to date":** ```bash -~/.claude/skills/gstack/bin/gstack-config set auto_upgrade true +"$INSTALL_DIR/bin/gstack-config" set auto_upgrade true ``` Tell user: "Auto-upgrade enabled. Future updates will install automatically." Then proceed to Step 2. @@ -65,30 +70,35 @@ Tell user the snooze duration: "Next reminder in 24h" (or 48h or 1 week, dependi **If "Never ask again":** ```bash -~/.claude/skills/gstack/bin/gstack-config set update_check false +"$INSTALL_DIR/bin/gstack-config" set update_check false ``` -Tell user: "Update checks disabled. Run `~/.claude/skills/gstack/bin/gstack-config set update_check true` to re-enable." +Tell user: "Update checks disabled. Run `$INSTALL_DIR/bin/gstack-config set update_check true` to re-enable." Continue with the current skill. ### Step 2: Detect install type ```bash -if [ -d "$HOME/.claude/skills/gstack/.git" ]; then - INSTALL_TYPE="global-git" - INSTALL_DIR="$HOME/.claude/skills/gstack" -elif [ -d ".claude/skills/gstack/.git" ]; then - INSTALL_TYPE="local-git" - INSTALL_DIR=".claude/skills/gstack" -elif [ -d ".claude/skills/gstack" ]; then +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd) +INSTALL_TYPE="" +INSTALL_DIR="" +for _candidate in \ + "$HOME/.codex/skills/gstack" \ + "$HOME/.agents/skills/gstack" \ + "$HOME/.claude/skills/gstack" \ + "$_ROOT/.codex/skills/gstack" \ + "$_ROOT/.agents/skills/gstack" \ + "$_ROOT/.claude/skills/gstack"; do + [ -d "$_candidate" ] || continue + if [ -d "$_candidate/.git" ]; then + INSTALL_TYPE="git" + INSTALL_DIR="$_candidate" + break + fi INSTALL_TYPE="vendored" - INSTALL_DIR=".claude/skills/gstack" -elif [ -d "$HOME/.claude/skills/gstack" ]; then - INSTALL_TYPE="vendored-global" - INSTALL_DIR="$HOME/.claude/skills/gstack" -else - echo "ERROR: gstack not found" - exit 1 -fi + INSTALL_DIR="$_candidate" + break +done +[ -n "$INSTALL_DIR" ] || { echo "ERROR: gstack not found"; exit 1; } echo "Install type: $INSTALL_TYPE at $INSTALL_DIR" ``` @@ -100,7 +110,7 @@ OLD_VERSION=$(cat "$INSTALL_DIR/VERSION" 2>/dev/null || echo "unknown") ### Step 4: Upgrade -**For git installs** (global-git, local-git): +**For git installs**: ```bash cd "$INSTALL_DIR" STASH_OUTPUT=$(git stash 2>&1) @@ -110,7 +120,7 @@ git reset --hard origin/main ``` If `$STASH_OUTPUT` contains "Saved working directory", warn the user: "Note: local changes were stashed. Run `git stash pop` in the skill directory to restore them." -**For vendored installs** (vendored, vendored-global): +**For vendored installs**: ```bash PARENT=$(dirname "$INSTALL_DIR") TMP_DIR=$(mktemp -d) @@ -128,17 +138,22 @@ After upgrading the primary install, check if there's also a local copy in the c ```bash _ROOT=$(git rev-parse --show-toplevel 2>/dev/null) LOCAL_GSTACK="" -if [ -n "$_ROOT" ] && [ -d "$_ROOT/.claude/skills/gstack" ]; then - _RESOLVED_LOCAL=$(cd "$_ROOT/.claude/skills/gstack" && pwd -P) - _RESOLVED_PRIMARY=$(cd "$INSTALL_DIR" && pwd -P) +_RESOLVED_PRIMARY=$(cd "$INSTALL_DIR" && pwd -P) +for _candidate in \ + "$_ROOT/.codex/skills/gstack" \ + "$_ROOT/.agents/skills/gstack" \ + "$_ROOT/.claude/skills/gstack"; do + [ -d "$_candidate" ] || continue + _RESOLVED_LOCAL=$(cd "$_candidate" && pwd -P) if [ "$_RESOLVED_LOCAL" != "$_RESOLVED_PRIMARY" ]; then - LOCAL_GSTACK="$_ROOT/.claude/skills/gstack" + LOCAL_GSTACK="$_candidate" + break fi -fi +done echo "LOCAL_GSTACK=$LOCAL_GSTACK" ``` -If `LOCAL_GSTACK` is non-empty, update it by copying from the freshly-upgraded primary install (same approach as README vendored install): +If `LOCAL_GSTACK` is non-empty, update it by copying from the freshly-upgraded primary install: ```bash mv "$LOCAL_GSTACK" "$LOCAL_GSTACK.bak" cp -Rf "$INSTALL_DIR" "$LOCAL_GSTACK" @@ -146,7 +161,7 @@ rm -rf "$LOCAL_GSTACK/.git" cd "$LOCAL_GSTACK" && ./setup rm -rf "$LOCAL_GSTACK.bak" ``` -Tell user: "Also updated vendored copy at `$LOCAL_GSTACK` — commit `.claude/skills/gstack/` when you're ready." +Tell user: "Also updated vendored copy at `$LOCAL_GSTACK` — commit that host-local `gstack/` directory when you're ready." ### Step 5: Write marker + clear cache diff --git a/package.json b/package.json index a5044b7..f681b08 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "gstack", "version": "0.3.3", - "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.", + "description": "Garry's Stack — agent workflows + fast headless browser for Claude Code and Codex from one shared repo.", "license": "MIT", "type": "module", "bin": { @@ -15,6 +15,7 @@ "test": "bun test browse/test/ test/ --ignore test/skill-e2e.test.ts --ignore test/skill-llm-eval.test.ts", "test:evals": "EVALS=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts", "test:e2e": "EVALS=1 bun test test/skill-e2e.test.ts", + "test:codex-e2e": "CODEX_EVALS=1 bun test test/codex-e2e.test.ts", "skill:check": "bun run scripts/skill-check.ts", "dev:skill": "bun run scripts/dev-skill.ts", "start": "bun run browse/src/server.ts", @@ -37,6 +38,7 @@ "headless", "cli", "claude", + "codex", "ai-agent", "devtools" ], diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md index 7bb5dad..10fed9a 100644 --- a/plan-ceo-review/SKILL.md +++ b/plan-ceo-review/SKILL.md @@ -19,11 +19,14 @@ allowed-tools: ## Update Check (run first) ```bash -_UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true) +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +_UPD=$( + "$HOME/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null || { [ -n "$_ROOT" ] && "$_ROOT/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || true +) [ -n "$_UPD" ] && echo "$_UPD" || true ``` -If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. +If output shows `UPGRADE_AVAILABLE `: read the installed `gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. # Mega Plan Review Mode diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md index d8a052a..e48837b 100644 --- a/plan-eng-review/SKILL.md +++ b/plan-eng-review/SKILL.md @@ -18,11 +18,14 @@ allowed-tools: ## Update Check (run first) ```bash -_UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true) +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +_UPD=$( + "$HOME/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null || { [ -n "$_ROOT" ] && "$_ROOT/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || true +) [ -n "$_UPD" ] && echo "$_UPD" || true ``` -If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. +If output shows `UPGRADE_AVAILABLE `: read the installed `gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. # Plan Review Mode diff --git a/qa/SKILL.md b/qa/SKILL.md index dd4b888..1a1da6c 100644 --- a/qa/SKILL.md +++ b/qa/SKILL.md @@ -19,11 +19,14 @@ allowed-tools: ## Update Check (run first) ```bash -_UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true) +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +_UPD=$( + "$HOME/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null || { [ -n "$_ROOT" ] && "$_ROOT/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || true +) [ -n "$_UPD" ] && echo "$_UPD" || true ``` -If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. +If output shows `UPGRADE_AVAILABLE `: read the installed `gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. # /qa: Systematic QA Testing @@ -50,8 +53,9 @@ You are a QA engineer. Test web applications like a real user — click everythi ```bash _ROOT=$(git rev-parse --show-toplevel 2>/dev/null) B="" -[ -n "$_ROOT" ] && [ -x "$_ROOT/.claude/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.claude/skills/gstack/browse/dist/browse" -[ -z "$B" ] && B=~/.claude/skills/gstack/browse/dist/browse +for _cand in "$_ROOT/.codex/skills/gstack/browse/dist/browse" "$_ROOT/.agents/skills/gstack/browse/dist/browse" "$_ROOT/.claude/skills/gstack/browse/dist/browse" "$HOME/.codex/skills/gstack/browse/dist/browse" "$HOME/.agents/skills/gstack/browse/dist/browse" "$HOME/.claude/skills/gstack/browse/dist/browse"; do + [ -n "$_cand" ] && [ -x "$_cand" ] && B="$_cand" && break +done if [ -x "$B" ]; then echo "READY: $B" else diff --git a/references/workflows/browse.md b/references/workflows/browse.md new file mode 100644 index 0000000..46c6ff4 --- /dev/null +++ b/references/workflows/browse.md @@ -0,0 +1,25 @@ +# gstack-browse + +Use this mode when the user wants browser automation, screenshots, UI verification, login flows, site dogfooding, or any task they would previously route to `/browse`. + +## Contract + +- Primary engine: `browse/dist/browse` +- Setup: run `./setup --host codex` from the gstack root if the binary or Chromium is missing +- Reference docs: `BROWSER.md`, `ARCHITECTURE.md`, and `browse/bin/find-browse` +- Evidence: prefer snapshots, screenshots, console logs, and direct command output over narrative summaries + +## Operating rules + +1. Resolve the browse binary using the compatibility path order in `references/workflows/compatibility.md`. +2. Prefer the compiled browser binary over host-native browser MCPs. The portability win comes from reusing the same engine across Claude and Codex. +3. Start with `goto`, `snapshot -i`, and `console` so the page model is explicit before interacting. +4. For longer flows, use `chain` or stepwise commands that leave reproducible evidence. +5. If the request is really a structured QA pass, switch to `gstack-qa`. If it is cookie/session import, switch to `gstack-browser-cookies`. + +## Output + +- What was tested +- Evidence captured +- Failures or regressions +- Exact next action if setup or auth blocked progress diff --git a/references/workflows/browser-cookies.md b/references/workflows/browser-cookies.md new file mode 100644 index 0000000..ade95ed --- /dev/null +++ b/references/workflows/browser-cookies.md @@ -0,0 +1,22 @@ +# gstack-browser-cookies + +Use this mode when the user wants to import real browser cookies into the gstack browser session for authenticated testing. + +## Contract + +- Primary engine: `browse/dist/browse` +- Primary command: `cookie-import-browser` +- Platform caveat: this flow is currently macOS-centric because it reads browser profiles and Keychain-backed cookies + +## Workflow + +1. Resolve the gstack browse binary. +2. Confirm the target browser and domain if the user provided them. +3. Run the import using the compiled browser CLI. +4. Verify the imported session by visiting an authenticated page. + +## Operating rules + +- Do not echo raw cookie values into the transcript. +- If the platform or browser is unsupported, say so quickly and fall back to manual login guidance. +- If the task becomes full QA after import, switch to `gstack-qa`. diff --git a/references/workflows/compatibility.md b/references/workflows/compatibility.md new file mode 100644 index 0000000..e462348 --- /dev/null +++ b/references/workflows/compatibility.md @@ -0,0 +1,37 @@ +# Codex Compatibility + +gstack keeps one upstream repo and exposes host-specific entrypoints on top of it. + +## Host layout + +- Claude global install: `~/.claude/skills/gstack` plus sibling skill aliases like `~/.claude/skills/browse`. +- Codex global install: `~/.codex/skills/gstack` plus sibling skills like `~/.codex/skills/gstack-browse`. +- Codex repo-local install: project `.codex/skills/gstack` plus sibling `.codex/skills/gstack-*` aliases created by `./setup --host codex`. + +`./setup --host codex` is the canonical registration step for Codex. `./setup --host auto` resolves from the current install path when possible and otherwise prefers Codex. + +## Alias mapping + +- `/browse` → `gstack-browse` +- `/qa` → `gstack-qa` +- `/review` → `gstack-review` +- `/ship` → `gstack-ship` +- `/plan-ceo-review` → `gstack-plan-ceo-review` +- `/plan-eng-review` → `gstack-plan-eng-review` +- `/setup-browser-cookies` → `gstack-browser-cookies` +- `/retro` → `gstack-retro` + +The shell wrappers in `bin/gstack-*` are compatibility sugar. They print the exact Codex-facing skill name and preserve the old gstack mental model without pretending Codex has Claude-style slash-command registration. + +## Browser resolution + +Prefer these browse roots, in order: + +1. project `.codex/skills/gstack` +2. project `.agents/skills/gstack` +3. project `.claude/skills/gstack` +4. `~/.codex/skills/gstack` +5. `~/.agents/skills/gstack` +6. `~/.claude/skills/gstack` + +Use `browse/bin/find-browse` or `browse/dist/browse` from the resolved root. The compiled browser remains the primary cross-host engine. diff --git a/references/workflows/plan-ceo-review.md b/references/workflows/plan-ceo-review.md new file mode 100644 index 0000000..8315334 --- /dev/null +++ b/references/workflows/plan-ceo-review.md @@ -0,0 +1,27 @@ +# gstack-plan-ceo-review + +Use this mode when the user wants founder-level product pressure on a plan, feature request, or roadmap item. + +## Contract + +- Goal: challenge the premise, identify the real user outcome, and decide whether the plan should expand, hold scope, or shrink +- Default posture: ambitious but concrete; no code changes +- Output: a decisive plan review, not implementation prose + +## Review spine + +1. Restate the real user and business outcome. +2. Challenge the current framing. +3. Map the current state, the proposed delta, and the 12-month ideal. +4. Pick the right mode: + - Scope expansion + - Hold scope + - Scope reduction +5. Produce the strongest version of the plan for that mode. + +## Operating rules + +- Be explicit about what is non-negotiable and what is optional. +- Prefer concrete product bets over vague ambition. +- Ask for user input only when the desired ambition level or success metric is genuinely ambiguous. +- Leave a clean handoff for `gstack-plan-eng-review` if the plan survives. diff --git a/references/workflows/plan-eng-review.md b/references/workflows/plan-eng-review.md new file mode 100644 index 0000000..50f5b72 --- /dev/null +++ b/references/workflows/plan-eng-review.md @@ -0,0 +1,25 @@ +# gstack-plan-eng-review + +Use this mode when the user wants technical review of a proposed plan before implementation. + +## Contract + +- Goal: harden architecture, state transitions, failure handling, security, observability, rollout, and tests +- Default posture: rigorous, explicit, and implementation-ready +- Output: a review that an engineer can execute without guessing + +## Review spine + +1. Audit the current system surface relevant to the plan. +2. Pressure-test architecture and dependency boundaries. +3. Enumerate failure modes and error handling. +4. Review security and trust boundaries. +5. Define observability, rollout, rollback, and test expectations. +6. Surface open decisions early instead of letting them leak into implementation. + +## Operating rules + +- Prefer diagrams, tables, and explicit invariants over vague advice. +- Bias toward the smallest safe implementation that still meets the plan. +- Use Codex-native user input only when a real architecture or scope decision blocks the review. +- Do not start implementation in this mode. diff --git a/references/workflows/qa.md b/references/workflows/qa.md new file mode 100644 index 0000000..226ab83 --- /dev/null +++ b/references/workflows/qa.md @@ -0,0 +1,28 @@ +# gstack-qa + +Use this mode when the user wants a systematic QA pass, a smoke test, diff-aware verification of a branch, or a structured bug report. + +## Contract + +- Primary engine: `gstack-browse` via `browse/dist/browse` +- Primary assets: `qa/templates/qa-report-template.md` and `qa/references/issue-taxonomy.md` +- Default posture: execute the QA pass, collect evidence, and write a report instead of explaining how QA should work + +## Workflow + +1. Resolve the browse binary using `references/workflows/compatibility.md`. +2. Create `.gstack/qa-reports/` and `screenshots/` if they do not exist. +3. Choose mode: + - Diff-aware: no URL provided and current branch is not `main` + - Full: user gave a target URL and wants broad coverage + - Quick: smoke test or `--quick` + - Regression: compare against a previous QA report or baseline JSON +4. Use the issue taxonomy while exploring and document problems immediately when they are found. +5. For each finding, capture the minimum evidence needed to reproduce it cleanly. + +## Operating rules + +- Prefer real interactions over speculative findings. +- Keep the report findings-first and severity-scored. +- Use `request_user_input` only when auth, CAPTCHA, or a missing target URL truly blocks execution. +- If the user only wants browser navigation, do not over-promote to QA mode. diff --git a/references/workflows/retro.md b/references/workflows/retro.md new file mode 100644 index 0000000..ed51ed6 --- /dev/null +++ b/references/workflows/retro.md @@ -0,0 +1,23 @@ +# gstack-retro + +Use this mode for engineering retrospectives grounded in git history, TODOs, and recent delivery outcomes. + +## Contract + +- Goal: summarize what happened, what got better, what regressed, and what the team should change next +- Primary sources: `git log`, `TODOS.md`, release notes, and issue history when available +- Output: concise retrospective notes with actionable follow-ups + +## Workflow + +1. Define the review window from the user request or default to the recent working period. +2. Inspect commits, contributors, and major themes. +3. Cross-reference open TODOs and recently closed work. +4. Separate wins, failures, and process gaps. +5. End with concrete actions, not vague observations. + +## Operating rules + +- Attribute work carefully; do not guess ownership. +- Keep praise specific and keep criticism operational. +- Do not rewrite history to sound positive. The retro should be useful. diff --git a/references/workflows/review.md b/references/workflows/review.md new file mode 100644 index 0000000..f757b78 --- /dev/null +++ b/references/workflows/review.md @@ -0,0 +1,24 @@ +# gstack-review + +Use this mode for a findings-first pre-landing review of the current diff against `main`. + +## Contract + +- Primary references: `review/checklist.md`, `review/greptile-triage.md`, `review/TODOS-format.md` +- Focus: correctness, trust boundaries, data safety, test gaps, and release risk +- Default posture: review non-interactively and only ask for user input when there is a real tradeoff that blocks action + +## Workflow + +1. Confirm the branch is not `main` and that there is a diff to review. +2. Read `review/checklist.md` before writing any findings. +3. Review the full diff against the latest `origin/main`. +4. Cross-reference `TODOS.md` when it exists. +5. If Greptile review comments are relevant, apply `review/greptile-triage.md`. + +## Operating rules + +- Findings come first, ordered by severity, with file and line references. +- Do not generate busywork. Skip style nits unless they create real risk. +- Use Codex-native user input only for blocking decisions such as “fix now vs acknowledge”. +- Never depend on Claude-only tool names or slash-command behavior. diff --git a/references/workflows/ship.md b/references/workflows/ship.md new file mode 100644 index 0000000..32a49da --- /dev/null +++ b/references/workflows/ship.md @@ -0,0 +1,25 @@ +# gstack-ship + +Use this mode for deterministic shell-driven release steps on a ready feature branch. + +## Contract + +- Goal: converge the branch with main, run the repo’s canonical validation, and prepare the branch for review or merge +- Default posture: execute, do not brainstorm +- Scope: deterministic shell steps only; avoid host-specific slash-command assumptions + +## Workflow + +1. Refuse to ship directly from `main`. +2. Inspect the diff and recent commits. +3. Sync with `origin/main`. +4. Run the repository’s existing validation commands. Prefer documented wrappers over ad hoc command chains. +5. Run `gstack-review` logic if a pre-landing review has not been done. +6. If the repo uses versioning or changelog files, update them only when the project’s conventions require it. +7. Push and open or update a PR when the repo is configured for that flow. + +## Operating rules + +- Stop on merge conflicts, broken validation, or missing release prerequisites. +- Use user input only when a real release decision is required. +- Keep the output brief: branch state, validation results, and next release artifact. diff --git a/retro/SKILL.md b/retro/SKILL.md index f1e92c2..e9f29c9 100644 --- a/retro/SKILL.md +++ b/retro/SKILL.md @@ -18,11 +18,14 @@ allowed-tools: ## Update Check (run first) ```bash -_UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true) +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +_UPD=$( + "$HOME/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null || { [ -n "$_ROOT" ] && "$_ROOT/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || true +) [ -n "$_UPD" ] && echo "$_UPD" || true ``` -If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. +If output shows `UPGRADE_AVAILABLE `: read the installed `gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. # /retro — Weekly Engineering Retrospective diff --git a/review/SKILL.md b/review/SKILL.md index b572283..f9359c4 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -19,11 +19,14 @@ allowed-tools: ## Update Check (run first) ```bash -_UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true) +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +_UPD=$( + "$HOME/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null || { [ -n "$_ROOT" ] && "$_ROOT/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || true +) [ -n "$_UPD" ] && echo "$_UPD" || true ``` -If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. +If output shows `UPGRADE_AVAILABLE `: read the installed `gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. # Pre-Landing PR Review diff --git a/review/greptile-triage.md b/review/greptile-triage.md index 3cb6e8d..d732f60 100644 --- a/review/greptile-triage.md +++ b/review/greptile-triage.md @@ -34,7 +34,13 @@ The `position != null` filter on line-level comments automatically skips outdate Derive the project-specific history path: ```bash -REMOTE_SLUG=$(browse/bin/remote-slug 2>/dev/null || ~/.claude/skills/gstack/browse/bin/remote-slug 2>/dev/null || basename "$(git rev-parse --show-toplevel 2>/dev/null || pwd)") +REMOTE_SLUG=$( + browse/bin/remote-slug 2>/dev/null || \ + "$HOME/.codex/skills/gstack/browse/bin/remote-slug" 2>/dev/null || \ + "$HOME/.agents/skills/gstack/browse/bin/remote-slug" 2>/dev/null || \ + "$HOME/.claude/skills/gstack/browse/bin/remote-slug" 2>/dev/null || \ + basename "$(git rev-parse --show-toplevel 2>/dev/null || pwd)" +) PROJECT_HISTORY="$HOME/.gstack/projects/$REMOTE_SLUG/greptile-history.md" ``` @@ -183,7 +189,13 @@ When classifying comments, also assess whether Greptile's implied severity match Before writing, ensure both directories exist: ```bash -REMOTE_SLUG=$(browse/bin/remote-slug 2>/dev/null || ~/.claude/skills/gstack/browse/bin/remote-slug 2>/dev/null || basename "$(git rev-parse --show-toplevel 2>/dev/null || pwd)") +REMOTE_SLUG=$( + browse/bin/remote-slug 2>/dev/null || \ + "$HOME/.codex/skills/gstack/browse/bin/remote-slug" 2>/dev/null || \ + "$HOME/.agents/skills/gstack/browse/bin/remote-slug" 2>/dev/null || \ + "$HOME/.claude/skills/gstack/browse/bin/remote-slug" 2>/dev/null || \ + basename "$(git rev-parse --show-toplevel 2>/dev/null || pwd)" +) mkdir -p "$HOME/.gstack/projects/$REMOTE_SLUG" mkdir -p ~/.gstack ``` diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts index 7f6bd24..aab9d14 100644 --- a/scripts/gen-skill-docs.ts +++ b/scripts/gen-skill-docs.ts @@ -98,11 +98,20 @@ function generateUpdateCheck(): string { return `## Update Check (run first) \`\`\`bash -_UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true) +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +_UPD=$( + "$HOME/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null || \ + "$HOME/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null || \ + "$HOME/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null || \ + { [ -n "$_ROOT" ] && "$_ROOT/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || \ + { [ -n "$_ROOT" ] && "$_ROOT/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || \ + { [ -n "$_ROOT" ] && "$_ROOT/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || \ + true +) [ -n "$_UPD" ] && echo "$_UPD" || true \`\`\` -If output shows \`UPGRADE_AVAILABLE \`: read \`~/.claude/skills/gstack/gstack-upgrade/SKILL.md\` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If \`JUST_UPGRADED \`: tell user "Running gstack v{to} (just updated!)" and continue.`; +If output shows \`UPGRADE_AVAILABLE \`: read the installed \`gstack-upgrade/SKILL.md\` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If \`JUST_UPGRADED \`: tell user "Running gstack v{to} (just updated!)" and continue.`; } function generateBrowseSetup(): string { @@ -111,8 +120,15 @@ function generateBrowseSetup(): string { \`\`\`bash _ROOT=$(git rev-parse --show-toplevel 2>/dev/null) B="" -[ -n "$_ROOT" ] && [ -x "$_ROOT/.claude/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.claude/skills/gstack/browse/dist/browse" -[ -z "$B" ] && B=~/.claude/skills/gstack/browse/dist/browse +for _cand in \ + "$_ROOT/.codex/skills/gstack/browse/dist/browse" \ + "$_ROOT/.agents/skills/gstack/browse/dist/browse" \ + "$_ROOT/.claude/skills/gstack/browse/dist/browse" \ + "$HOME/.codex/skills/gstack/browse/dist/browse" \ + "$HOME/.agents/skills/gstack/browse/dist/browse" \ + "$HOME/.claude/skills/gstack/browse/dist/browse"; do + [ -n "$_cand" ] && [ -x "$_cand" ] && B="$_cand" && break +done if [ -x "$B" ]; then echo "READY: $B" else diff --git a/setup b/setup index 607c277..75a17eb 100755 --- a/setup +++ b/setup @@ -1,10 +1,52 @@ #!/usr/bin/env bash -# gstack setup — build browser binary + register all skills with Claude Code -set -e +# gstack setup — build browser binary + register skills for Claude or Codex. +set -euo pipefail GSTACK_DIR="$(cd "$(dirname "$0")" && pwd)" -SKILLS_DIR="$(dirname "$GSTACK_DIR")" BROWSE_BIN="$GSTACK_DIR/browse/dist/browse" +CLAUDE_SKILLS_DIR="${GSTACK_CLAUDE_SKILLS_DIR:-$HOME/.claude/skills}" +CODEX_SKILLS_DIR="${GSTACK_CODEX_SKILLS_DIR:-$HOME/.codex/skills}" +HOST="auto" +FIRST_RUN=0 + +[ -d "$HOME/.gstack" ] || FIRST_RUN=1 + +usage() { + cat <<'EOF' +Usage: ./setup [--host claude|codex|auto] + +Builds the browse binary, ensures Playwright Chromium is installed, and +registers workflow aliases for the selected host. + +Host resolution: + auto Detect from install path when possible, otherwise prefer Codex. + claude Register Claude slash-skill symlinks under ~/.claude/skills. + codex Register Codex skills under ~/.codex/skills. + +Overrides: + GSTACK_CLAUDE_SKILLS_DIR Override the Claude skills root. + GSTACK_CODEX_SKILLS_DIR Override the Codex skills root. +EOF +} + +while [ $# -gt 0 ]; do + case "$1" in + --host) + [ $# -ge 2 ] || { echo "Missing value for --host" >&2; exit 1; } + HOST="$2" + shift 2 + ;; + --help|-h) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 1 + ;; + esac +done ensure_playwright_browser() { ( @@ -13,6 +55,135 @@ ensure_playwright_browser() { ) >/dev/null 2>&1 } +resolve_embedded_host() { + case "$GSTACK_DIR" in + */.claude/skills/gstack) echo "claude" ;; + */.codex/skills/gstack|*/.agents/skills/gstack) echo "codex" ;; + *) echo "" ;; + esac +} + +resolve_host() { + case "$HOST" in + claude|codex) echo "$HOST" ;; + auto) + local embedded + embedded="$(resolve_embedded_host)" + if [ -n "$embedded" ]; then + echo "$embedded" + elif [ -d "$CODEX_SKILLS_DIR" ] && [ ! -d "$CLAUDE_SKILLS_DIR" ]; then + echo "codex" + elif [ -d "$CLAUDE_SKILLS_DIR" ] && [ ! -d "$CODEX_SKILLS_DIR" ]; then + echo "claude" + else + echo "codex" + fi + ;; + *) + echo "Invalid host: $HOST" >&2 + exit 1 + ;; + esac +} + +resolve_install_root() { + local selected_host="$1" + + case "$GSTACK_DIR" in + */.claude/skills/gstack) + if [ "$selected_host" = "claude" ]; then + echo "${GSTACK_DIR%/gstack}" + return + fi + ;; + */.codex/skills/gstack|*/.agents/skills/gstack) + if [ "$selected_host" = "codex" ]; then + echo "${GSTACK_DIR%/gstack}" + return + fi + ;; + esac + + if [ "$selected_host" = "claude" ]; then + echo "$CLAUDE_SKILLS_DIR" + else + echo "$CODEX_SKILLS_DIR" + fi +} + +link_root_if_needed() { + local install_root="$1" + local root_link="$install_root/gstack" + + mkdir -p "$install_root" + + if [ "$GSTACK_DIR" = "$root_link" ]; then + return + fi + + if [ -L "$root_link" ] || [ ! -e "$root_link" ]; then + ln -snf "$GSTACK_DIR" "$root_link" + return + fi + + if [ -d "$root_link" ] && [ "$(cd "$root_link" && pwd -P)" = "$(cd "$GSTACK_DIR" && pwd -P)" ]; then + return + fi + + echo "gstack setup warning: $root_link already exists and was left unchanged." >&2 +} + +link_claude_skills() { + local install_root="$1" + local linked=() + local skill_dir skill_name target + + for skill_dir in "$GSTACK_DIR"/*/; do + if [ -f "$skill_dir/SKILL.md" ]; then + skill_name="$(basename "$skill_dir")" + [ "$skill_name" = "node_modules" ] && continue + target="$install_root/$skill_name" + if [ -L "$target" ] || [ ! -e "$target" ]; then + ln -snf "$skill_dir" "$target" + linked+=("$skill_name") + fi + fi + done + + echo "${linked[*]}" +} + +link_codex_skills() { + local install_root="$1" + local skill_names=( + gstack-browse + gstack-qa + gstack-review + gstack-plan-ceo-review + gstack-plan-eng-review + gstack-browser-cookies + gstack-ship + gstack-retro + ) + local linked=() + local skill_name source_dir target + + for skill_name in "${skill_names[@]}"; do + source_dir="$GSTACK_DIR/.codex/skills/$skill_name" + target="$install_root/$skill_name" + if [ ! -d "$source_dir" ]; then + echo "gstack setup warning: missing Codex skill at $source_dir" >&2 + continue + fi + if [ -L "$target" ] || [ ! -e "$target" ]; then + ln -snf "$source_dir" "$target" + linked+=("$skill_name") + fi + done + + echo "${linked[*]}" +} + # 1. Build browse binary if needed (smart rebuild: stale sources, package.json, lock) NEEDS_BUILD=0 if [ ! -x "$BROWSE_BIN" ]; then @@ -32,7 +203,6 @@ if [ "$NEEDS_BUILD" -eq 1 ]; then bun install bun run build ) - # Safety net: write .version if build script didn't (e.g., git not available during build) if [ ! -f "$GSTACK_DIR/browse/dist/.version" ]; then git -C "$GSTACK_DIR" rev-parse HEAD > "$GSTACK_DIR/browse/dist/.version" 2>/dev/null || true fi @@ -60,38 +230,26 @@ fi # 3. Ensure ~/.gstack global state directory exists mkdir -p "$HOME/.gstack/projects" -# 4. Only create skill symlinks if we're inside a .claude/skills directory -SKILLS_BASENAME="$(basename "$SKILLS_DIR")" -if [ "$SKILLS_BASENAME" = "skills" ]; then - linked=() - for skill_dir in "$GSTACK_DIR"/*/; do - if [ -f "$skill_dir/SKILL.md" ]; then - skill_name="$(basename "$skill_dir")" - # Skip node_modules - [ "$skill_name" = "node_modules" ] && continue - target="$SKILLS_DIR/$skill_name" - # Create or update symlink; skip if a real file/directory exists - if [ -L "$target" ] || [ ! -e "$target" ]; then - ln -snf "gstack/$skill_name" "$target" - linked+=("$skill_name") - fi - fi - done +# 4. Register host-specific skills +SELECTED_HOST="$(resolve_host)" +INSTALL_ROOT="$(resolve_install_root "$SELECTED_HOST")" +link_root_if_needed "$INSTALL_ROOT" - echo "gstack ready." - echo " browse: $BROWSE_BIN" - if [ ${#linked[@]} -gt 0 ]; then - echo " linked skills: ${linked[*]}" - fi +if [ "$SELECTED_HOST" = "claude" ]; then + LINKED_SKILLS="$(link_claude_skills "$INSTALL_ROOT")" else - echo "gstack ready." - echo " browse: $BROWSE_BIN" - echo " (skipped skill symlinks — not inside .claude/skills/)" + LINKED_SKILLS="$(link_codex_skills "$INSTALL_ROOT")" fi -# 4. First-time welcome + legacy cleanup -if [ ! -d "$HOME/.gstack" ]; then +echo "gstack ready." +echo " host: $SELECTED_HOST" +echo " browse: $BROWSE_BIN" +echo " skills: $INSTALL_ROOT" +[ -n "$LINKED_SKILLS" ] && echo " linked skills: $LINKED_SKILLS" + +# 5. First-time welcome + legacy cleanup +if [ "$FIRST_RUN" -eq 1 ]; then mkdir -p "$HOME/.gstack" - echo " Welcome! Run /gstack-upgrade anytime to stay current." + echo " Welcome! Run gstack-upgrade or ./setup again after updates." fi rm -f /tmp/gstack-latest-version diff --git a/setup-browser-cookies/SKILL.md b/setup-browser-cookies/SKILL.md index b2f8fc6..07b1915 100644 --- a/setup-browser-cookies/SKILL.md +++ b/setup-browser-cookies/SKILL.md @@ -16,11 +16,14 @@ allowed-tools: ## Update Check (run first) ```bash -_UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true) +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +_UPD=$( + "$HOME/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null || { [ -n "$_ROOT" ] && "$_ROOT/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || true +) [ -n "$_UPD" ] && echo "$_UPD" || true ``` -If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. +If output shows `UPGRADE_AVAILABLE `: read the installed `gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. # Setup Browser Cookies @@ -42,8 +45,9 @@ Import logged-in sessions from your real Chromium browser into the headless brow ```bash _ROOT=$(git rev-parse --show-toplevel 2>/dev/null) B="" -[ -n "$_ROOT" ] && [ -x "$_ROOT/.claude/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.claude/skills/gstack/browse/dist/browse" -[ -z "$B" ] && B=~/.claude/skills/gstack/browse/dist/browse +for _cand in "$_ROOT/.codex/skills/gstack/browse/dist/browse" "$_ROOT/.agents/skills/gstack/browse/dist/browse" "$_ROOT/.claude/skills/gstack/browse/dist/browse" "$HOME/.codex/skills/gstack/browse/dist/browse" "$HOME/.agents/skills/gstack/browse/dist/browse" "$HOME/.claude/skills/gstack/browse/dist/browse"; do + [ -n "$_cand" ] && [ -x "$_cand" ] && B="$_cand" && break +done if [ -x "$B" ]; then echo "READY: $B" else diff --git a/ship/SKILL.md b/ship/SKILL.md index 386299b..7c32625 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -18,11 +18,14 @@ allowed-tools: ## Update Check (run first) ```bash -_UPD=$(~/.claude/skills/gstack/bin/gstack-update-check 2>/dev/null || .claude/skills/gstack/bin/gstack-update-check 2>/dev/null || true) +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +_UPD=$( + "$HOME/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null || "$HOME/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null || { [ -n "$_ROOT" ] && "$_ROOT/.codex/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.agents/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || { [ -n "$_ROOT" ] && "$_ROOT/.claude/skills/gstack/bin/gstack-update-check" 2>/dev/null; } || true +) [ -n "$_UPD" ] && echo "$_UPD" || true ``` -If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. +If output shows `UPGRADE_AVAILABLE `: read the installed `gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. # Ship: Fully Automated Ship Workflow diff --git a/test/codex-compat.test.ts b/test/codex-compat.test.ts new file mode 100644 index 0000000..d4b3d1b --- /dev/null +++ b/test/codex-compat.test.ts @@ -0,0 +1,69 @@ +import { describe, test, expect } from "bun:test"; +import * as fs from "fs"; +import * as path from "path"; + +const ROOT = path.resolve(import.meta.dir, ".."); +const CODEX_SKILLS = [ + "gstack-browse", + "gstack-qa", + "gstack-review", + "gstack-plan-ceo-review", + "gstack-plan-eng-review", + "gstack-browser-cookies", + "gstack-ship", + "gstack-retro", +] as const; + +describe("Codex skill packaging", () => { + test("all Codex skills exist and stay free of Claude-only references", () => { + for (const skill of CODEX_SKILLS) { + const skillPath = path.join(ROOT, ".codex", "skills", skill, "SKILL.md"); + expect(fs.existsSync(skillPath)).toBe(true); + + const content = fs.readFileSync(skillPath, "utf-8"); + expect(content.startsWith("---\n")).toBe(true); + expect(content).toContain("references/workflows/"); + expect(content).not.toContain(".claude/skills"); + expect(content).not.toContain("AskUserQuestion"); + expect(content).not.toContain("mcp__claude-in-chrome"); + } + }); + + test("AGENTS.md documents the slash-to-skill compatibility contract", () => { + const content = fs.readFileSync(path.join(ROOT, "AGENTS.md"), "utf-8"); + expect(content).toContain("/browse"); + expect(content).toContain("gstack-browse"); + expect(content).toContain("/qa"); + expect(content).toContain("gstack-qa"); + }); +}); + +describe("Codex alias wrappers", () => { + test("gstack-browse wrapper emits the Codex skill name and slash alias", () => { + const result = Bun.spawnSync( + ["bash", path.join(ROOT, "bin", "gstack-browse"), "https://example.com"], + { + stdout: "pipe", + stderr: "pipe", + }, + ); + + expect(result.exitCode).toBe(0); + const stdout = result.stdout.toString(); + expect(stdout).toContain("Codex skill: gstack-browse"); + expect(stdout).toContain("Compatibility alias: /browse"); + expect(stdout).toContain("https://example.com"); + }); + + test("gstack-qa wrapper emits the Codex skill name and workflow doc", () => { + const result = Bun.spawnSync(["bash", path.join(ROOT, "bin", "gstack-qa")], { + stdout: "pipe", + stderr: "pipe", + }); + + expect(result.exitCode).toBe(0); + const stdout = result.stdout.toString(); + expect(stdout).toContain("Codex skill: gstack-qa"); + expect(stdout).toContain("references/workflows/qa.md"); + }); +}); diff --git a/test/codex-e2e.test.ts b/test/codex-e2e.test.ts new file mode 100644 index 0000000..8f246b1 --- /dev/null +++ b/test/codex-e2e.test.ts @@ -0,0 +1,47 @@ +import { describe, test, expect } from "bun:test"; +import { runCodexSkillTest } from "./helpers/codex-session-runner"; +import * as path from "path"; + +const ROOT = path.resolve(import.meta.dir, ".."); +const codexEvalsEnabled = !!process.env.CODEX_EVALS; +const describeCodex = codexEvalsEnabled ? describe : describe.skip; +const runId = new Date().toISOString().replace(/[:.]/g, "").replace("T", "-").slice(0, 15); + +describeCodex("Codex skill E2E tests", () => { + test("Codex loads repo-local skills without frontmatter/runtime errors", async () => { + const result = await runCodexSkillTest({ + prompt: [ + "Use the `gstack-browse` skill from this repository.", + "Do not run any browser commands.", + "Summarize the first two setup steps in one short paragraph.", + ].join(" "), + workingDirectory: ROOT, + timeout: 90_000, + testName: "codex-skill-load", + runId, + }); + + expect(result.exitReason).toBe("success"); + expect(result.stderr).not.toContain("failed to load skill"); + expect(result.output.toLowerCase()).toContain("setup"); + }, 120_000); + + test("Codex can execute a simple shell-backed workflow prompt", async () => { + const result = await runCodexSkillTest({ + prompt: [ + "Run `pwd` in the current workspace and report the result using the required four-part output contract.", + "Keep it concise.", + ].join(" "), + workingDirectory: ROOT, + timeout: 90_000, + testName: "codex-basic-shell", + runId, + }); + + expect(result.exitReason).toBe("success"); + expect(result.toolCalls.some((call) => String(call.input.command || "").includes("pwd"))).toBe( + true, + ); + expect(result.output).toContain("/Users/mneves/dev/gstack"); + }, 120_000); +}); diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 264cb90..b9c59d2 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -129,6 +129,18 @@ describe('gen-skill-docs', () => { expect(browseTmpl).toContain('{{COMMAND_REFERENCE}}'); expect(browseTmpl).toContain('{{SNAPSHOT_FLAGS}}'); }); + + test('generated browse setup block includes Codex install paths', () => { + const content = fs.readFileSync(path.join(ROOT, 'SKILL.md'), 'utf-8'); + expect(content).toContain('.codex/skills/gstack/browse/dist/browse'); + expect(content).toContain('.agents/skills/gstack/browse/dist/browse'); + }); + + test('generated update check block includes Codex install paths', () => { + const content = fs.readFileSync(path.join(ROOT, 'SKILL.md'), 'utf-8'); + expect(content).toContain('.codex/skills/gstack/bin/gstack-update-check'); + expect(content).toContain('.agents/skills/gstack/bin/gstack-update-check'); + }); }); /** diff --git a/test/helpers/codex-session-runner.test.ts b/test/helpers/codex-session-runner.test.ts new file mode 100644 index 0000000..43510bc --- /dev/null +++ b/test/helpers/codex-session-runner.test.ts @@ -0,0 +1,47 @@ +import { describe, test, expect } from "bun:test"; +import { parseCodexJSONL, sanitizeCodexTestName } from "./codex-session-runner"; + +const FIXTURE_LINES = [ + '{"type":"thread.started","thread_id":"abc"}', + '{"type":"turn.started"}', + '{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"I will run pwd."}}', + '{"type":"item.started","item":{"id":"item_1","type":"command_execution","command":"/bin/zsh -lc pwd","aggregated_output":"","exit_code":null,"status":"in_progress"}}', + '{"type":"item.completed","item":{"id":"item_1","type":"command_execution","command":"/bin/zsh -lc pwd","aggregated_output":"/repo\\n","exit_code":0,"status":"completed"}}', + '{"type":"item.completed","item":{"id":"item_2","type":"agent_message","text":"Done."}}', + '{"type":"turn.completed","usage":{"input_tokens":100,"cached_input_tokens":20,"output_tokens":10}}', +]; + +describe("sanitizeCodexTestName", () => { + test("strips leading slashes and replaces path separators", () => { + expect(sanitizeCodexTestName("/qa/smoke")).toBe("qa-smoke"); + expect(sanitizeCodexTestName("plain-name")).toBe("plain-name"); + }); +}); + +describe("parseCodexJSONL", () => { + test("parses agent messages, command executions, turns, and usage", () => { + const parsed = parseCodexJSONL(FIXTURE_LINES); + expect(parsed.transcript).toHaveLength(7); + expect(parsed.agentMessages).toEqual(["I will run pwd.", "Done."]); + expect(parsed.toolCalls).toEqual([ + { + tool: "command_execution", + input: { command: "/bin/zsh -lc pwd" }, + output: "/repo\n", + }, + ]); + expect(parsed.turnCount).toBe(1); + expect(parsed.usageTokens).toBe(130); + }); + + test("skips malformed lines without throwing", () => { + const parsed = parseCodexJSONL([ + '{"type":"turn.started"}', + "not json", + '{"type":"turn.completed","usage":{"input_tokens":1,"cached_input_tokens":0,"output_tokens":2}}', + ]); + expect(parsed.transcript).toHaveLength(2); + expect(parsed.turnCount).toBe(1); + expect(parsed.usageTokens).toBe(3); + }); +}); diff --git a/test/helpers/codex-session-runner.ts b/test/helpers/codex-session-runner.ts new file mode 100644 index 0000000..705d5d9 --- /dev/null +++ b/test/helpers/codex-session-runner.ts @@ -0,0 +1,307 @@ +/** + * Codex CLI subprocess runner for skill E2E testing. + * + * Spawns `codex exec --json` as an independent process, parses JSONL events, + * and extracts command executions plus final agent messages. + */ + +import * as fs from "fs"; +import * as path from "path"; +import * as os from "os"; + +const GSTACK_DEV_DIR = path.join(os.homedir(), ".gstack-dev"); +const HEARTBEAT_PATH = path.join(GSTACK_DEV_DIR, "codex-e2e-live.json"); + +export function sanitizeCodexTestName(name: string): string { + return name.replace(/^\/+/, "").replace(/\//g, "-"); +} + +function atomicWriteSync(filePath: string, data: string): void { + const tmp = filePath + ".tmp"; + fs.writeFileSync(tmp, data); + fs.renameSync(tmp, filePath); +} + +export interface CodexCostEstimate { + estimatedTokens: number; + estimatedCost: number; + turnsUsed: number; +} + +export interface CodexToolCall { + tool: string; + input: Record; + output: string; +} + +export interface CodexSkillTestResult { + toolCalls: CodexToolCall[]; + browseErrors: string[]; + exitReason: string; + duration: number; + output: string; + costEstimate: CodexCostEstimate; + transcript: any[]; + stderr: string; +} + +export interface ParsedCodexJSONL { + transcript: any[]; + toolCalls: CodexToolCall[]; + agentMessages: string[]; + turnCount: number; + usageTokens: number; +} + +const BROWSE_ERROR_PATTERNS = [ + /failed to load skill .*missing YAML frontmatter/i, + /Unknown command: \w+/, + /Unknown snapshot flag: .+/, + /ERROR: browse binary not found/, + /Server failed to start/, + /no such file or directory.*browse/i, +]; + +export function parseCodexJSONL(lines: string[]): ParsedCodexJSONL { + const transcript: any[] = []; + const toolCalls: CodexToolCall[] = []; + const agentMessages: string[] = []; + let turnCount = 0; + let usageTokens = 0; + + for (const line of lines) { + if (!line.trim()) continue; + try { + const event = JSON.parse(line); + transcript.push(event); + + if (event.type === "turn.completed") { + turnCount += 1; + usageTokens += + (event.usage?.input_tokens || 0) + + (event.usage?.cached_input_tokens || 0) + + (event.usage?.output_tokens || 0); + } + + if (event.type === "item.completed") { + const item = event.item || {}; + if (item.type === "agent_message" && typeof item.text === "string") { + agentMessages.push(item.text); + } + if (item.type === "command_execution") { + toolCalls.push({ + tool: "command_execution", + input: { command: item.command || "" }, + output: item.aggregated_output || "", + }); + } + } + } catch { + // ignore malformed lines + } + } + + return { transcript, toolCalls, agentMessages, turnCount, usageTokens }; +} + +export async function runCodexSkillTest(options: { + prompt: string; + workingDirectory: string; + timeout?: number; + testName?: string; + runId?: string; +}): Promise { + const { prompt, workingDirectory, timeout = 120_000, testName, runId } = options; + + const startTime = Date.now(); + const startedAt = new Date().toISOString(); + let runDir: string | null = null; + const safeName = testName ? sanitizeCodexTestName(testName) : null; + + if (runId) { + try { + runDir = path.join(GSTACK_DEV_DIR, "codex-e2e-runs", runId); + fs.mkdirSync(runDir, { recursive: true }); + } catch { + /* non-fatal */ + } + } + + const proc = Bun.spawn( + [ + "codex", + "exec", + "--json", + "--dangerously-bypass-approvals-and-sandbox", + "--skip-git-repo-check", + "-C", + workingDirectory, + "-", + ], + { + stdin: "pipe", + stdout: "pipe", + stderr: "pipe", + }, + ); + + proc.stdin.write(prompt); + proc.stdin.end(); + + let stderr = ""; + let timedOut = false; + + const timeoutId = setTimeout(() => { + timedOut = true; + proc.kill(); + }, timeout); + + const stderrPromise = new Response(proc.stderr).text(); + const reader = proc.stdout.getReader(); + const decoder = new TextDecoder(); + const collectedLines: string[] = []; + let buf = ""; + let liveTurnCount = 0; + let liveToolCount = 0; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buf += decoder.decode(value, { stream: true }); + const lines = buf.split("\n"); + buf = lines.pop() || ""; + for (const line of lines) { + if (!line.trim()) continue; + collectedLines.push(line); + + try { + const event = JSON.parse(line); + if (event.type === "turn.completed") { + liveTurnCount += 1; + } + if (event.type === "item.completed" && event.item?.type === "command_execution") { + liveToolCount += 1; + const elapsed = Math.round((Date.now() - startTime) / 1000); + const progressLine = ` [${elapsed}s] codex turn ${liveTurnCount} command #${liveToolCount}: ${event.item.command}\n`; + process.stderr.write(progressLine); + + if (runDir) { + try { + fs.appendFileSync(path.join(runDir, "progress.log"), progressLine); + } catch { + /* non-fatal */ + } + } + + if (runId && testName) { + try { + atomicWriteSync( + HEARTBEAT_PATH, + JSON.stringify( + { + runId, + startedAt, + currentTest: testName, + status: "running", + turn: liveTurnCount, + toolCount: liveToolCount, + lastTool: event.item.command, + lastToolAt: new Date().toISOString(), + elapsedSec: elapsed, + }, + null, + 2, + ) + "\n", + ); + } catch { + /* non-fatal */ + } + } + } + } catch { + // ignore malformed lines here; parse step handles them later + } + + if (runDir && safeName) { + try { + fs.appendFileSync(path.join(runDir, `${safeName}.jsonl`), line + "\n"); + } catch { + /* non-fatal */ + } + } + } + } + } catch { + // fall through to exit handling + } + + if (buf.trim()) collectedLines.push(buf); + + stderr = await stderrPromise; + const exitCode = await proc.exited; + clearTimeout(timeoutId); + + const duration = Date.now() - startTime; + const parsed = parseCodexJSONL(collectedLines); + const browseErrors: string[] = []; + const allText = collectedLines.join("\n") + "\n" + stderr; + + for (const pattern of BROWSE_ERROR_PATTERNS) { + const match = allText.match(pattern); + if (match) browseErrors.push(match[0].slice(0, 200)); + } + + let exitReason = "success"; + if (timedOut) exitReason = "timeout"; + else if (exitCode !== 0) exitReason = `exit_code_${exitCode}`; + + if (browseErrors.length > 0 && exitReason === "success") { + exitReason = "error_runtime"; + } + + const output = parsed.agentMessages.at(-1) || ""; + + if (browseErrors.length > 0 || exitReason !== "success") { + try { + const failureDir = runDir || path.join(workingDirectory, ".gstack", "test-transcripts"); + fs.mkdirSync(failureDir, { recursive: true }); + const failureName = safeName + ? `${safeName}-failure.json` + : `codex-e2e-${new Date().toISOString().replace(/[:.]/g, "-")}.json`; + fs.writeFileSync( + path.join(failureDir, failureName), + JSON.stringify( + { + prompt: prompt.slice(0, 500), + testName: testName || "unknown", + exitReason, + browseErrors, + duration, + stderr: stderr.slice(0, 4000), + output: output.slice(0, 1000), + }, + null, + 2, + ), + ); + } catch { + /* non-fatal */ + } + } + + return { + toolCalls: parsed.toolCalls, + browseErrors, + exitReason, + duration, + output, + stderr, + costEstimate: { + estimatedTokens: parsed.usageTokens, + estimatedCost: 0, + turnsUsed: parsed.turnCount, + }, + transcript: parsed.transcript, + }; +}