diff --git a/.gitignore b/.gitignore index cc41a3e..9010bcc 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ bun.lock .env.local .env.* !.env.example +.DS_Store +gemini-port/generated/ diff --git a/TODOS.md b/TODOS.md index 4916c23..eaecd93 100644 --- a/TODOS.md +++ b/TODOS.md @@ -388,3 +388,23 @@ ### Auto-upgrade mode + smart update check - Config CLI (`bin/gstack-config`), auto-upgrade via `~/.gstack/config.yaml`, 12h cache TTL, exponential snooze backoff (24h→48h→1wk), "never ask again" option, vendored copy sync on upgrade **Completed:** v0.3.8 + +### Gemini CLI port +- `gemini-port/` with native Gemini skill system integration +- `gstack-browse`: stateful Playwright engine with 14 commands, `@eN` ref selector, annotated screenshots, diff mode, console/network capture +- `gstack-setup-browser-cookies`: macOS Keychain cookie extraction with full attribute normalization +- `install.sh`: generates Gemini-adapted versions of 6 main skills at install time (single source of truth), links all 8 skills via `gemini skills link` +- Output truncation for `text`/`html` commands (50k char limit) +**Completed:** feat/gemini-port + +## Gemini CLI Port + +### Linux/Windows support for gstack-setup-browser-cookies + +**What:** The `setup-cookies.js` script relies on `chrome-cookies-secure` which uses the macOS Keychain. Add support for GNOME Keyring (Linux) and DPAPI (Windows). + +**Why:** Cross-platform cookie import. Currently macOS-only. + +**Effort:** M +**Priority:** P3 +**Depends on:** Linux/Windows cookie decryption (Browse section) diff --git a/gemini-port/README.md b/gemini-port/README.md new file mode 100644 index 0000000..7bcd6f2 --- /dev/null +++ b/gemini-port/README.md @@ -0,0 +1,94 @@ +# Gemini gstack: Team of Specialists for Gemini CLI + +This directory ports the [gstack](https://github.com/garrytan/gstack) toolkit to Gemini CLI. + +gstack transforms Gemini CLI from a general-purpose assistant into a team of opinionated specialists. + +## Architecture + +Only two components live permanently in this directory — the ones that are genuinely Gemini-specific: + +| Directory | What it is | +| :--- | :--- | +| `gstack-browse/` | Stateful Playwright browser engine for Gemini CLI | +| `gstack-setup-browser-cookies/` | macOS Keychain cookie extractor for authenticated testing | + +The 6 "content" skills (ship, reviewer, qa, retro, ceo, eng-lead) are **generated at install time** from the main gstack repo. `install.sh` strips Claude-specific frontmatter and tooling from each SKILL.md and links the result into Gemini CLI. There is no duplication — the main repo is the single source of truth. + +## Installation + +```bash +# From the gstack repo root: +bash gemini-port/install.sh + +# Or with workspace scope (installs for a single Gemini workspace): +bash gemini-port/install.sh --scope workspace +``` + +Then reload in Gemini: +``` +/skills reload +``` + +That's it. The script handles npm deps, skill generation, and `gemini skills link` for all 8 skills. + +## Updating + +When the main gstack skills evolve, regenerate the Gemini versions by re-running: + +```bash +bash gemini-port/install.sh +``` + +## Skills + +| Command | Role | Focus | +| :--- | :--- | :--- | +| `gstack-ceo` | **CEO / Founder** | Product vision, 10-star experience, problem scoping. | +| `gstack-eng-lead` | **Tech Lead** | Architecture, state machines, test matrices, Mermaid diagrams. | +| `gstack-reviewer` | **Paranoid Reviewer** | Structural audit, race conditions, N+1 queries, security. | +| `gstack-ship` | **Release Engineer** | Git sync, test execution, PR creation. | +| `gstack-browse` | **QA Engineer** | Stateful browser automation via Playwright. | +| `gstack-qa` | **QA Lead** | Automated regression testing based on git diffs. | +| `gstack-setup-browser-cookies` | **Session Manager** | macOS Keychain cookie extraction for authenticated testing. | +| `gstack-retro` | **EM Retro** | Data-driven weekly engineering retrospectives. | + +## Technical Notes + +### Stateful Browsing + +Unlike the original Claude gstack (which requires a persistent Bun daemon), this port uses a **chained command architecture** in Playwright. A sequence of actions (`goto -> click -> fill -> screenshot`) runs in a single Node.js execution context, preserving DOM state without a background process. + +### Cookie Extraction + +`gstack-setup-browser-cookies` uses the macOS Keychain to decrypt local Chrome cookies, normalizing `SameSite` and `Expires` attributes for Playwright ingestion. + +### Skill Generation + +`install.sh` applies the following transforms to each main SKILL.md: + +1. Strip Claude frontmatter (`allowed-tools`, `version`) +2. Add minimal Gemini frontmatter (`name`, `description`) +3. Remove the update-check block (Claude-specific self-upgrade logic) +4. Replace the `$B` binary discovery block with `B="node $HOME/.gemini/skills/gstack-browse/scripts/browse.js"` +5. Remove HTML generator comments + +Generated files land in `generated/` (gitignored — they are build artifacts). + +## Rebuilding Skill Bundles + +The `.skill` files are pre-packaged archives for `gemini skills install` (offline use). After modifying source files, rebuild with: + +```bash +# Rebuild gstack-browse +node /path/to/skill-creator/scripts/package_skill.cjs gstack-browse + +# Rebuild gstack-setup-browser-cookies +node /path/to/skill-creator/scripts/package_skill.cjs gstack-setup-browser-cookies +``` + +The `SKILL.md` files in each skill subdirectory are the authoritative source. The `.skill` bundles are distribution artifacts. + +--- +*Ported by Rémi Al Ajroudi from [gstack](https://github.com/garrytan/gstack).* + diff --git a/gemini-port/gstack-browse.skill b/gemini-port/gstack-browse.skill new file mode 100644 index 0000000..744ad28 Binary files /dev/null and b/gemini-port/gstack-browse.skill differ diff --git a/gemini-port/gstack-browse/SKILL.md b/gemini-port/gstack-browse/SKILL.md new file mode 100644 index 0000000..62d878c --- /dev/null +++ b/gemini-port/gstack-browse/SKILL.md @@ -0,0 +1,190 @@ +--- +name: gstack-browse +description: QA Engineer Mode (Browsing). Use when asked to test a UI, navigate a site, verify rendering, fill forms, or perform automated browser tasks. Gives the agent "eyes" and hands to interact with live applications. +--- + +# QA Engineer Mode (Browsing) + +You are acting as a QA Engineer with a live Playwright browser. Your goal is to interact with web applications, verify UI state, document bugs, and catch regressions. + +The browser uses a **persistent Chromium profile** at `~/.gstack/gemini-browser-data` — cookies, login sessions, and storage persist across runs. SSL certificate errors are ignored by default (supports local/staging servers). + +**All commands run in the same browser context** — chain them in a single `run_shell_command` call to preserve state (DOM, cookies, navigation history). + +--- + +## Setup + +Before using any browse command, ensure Node.js and the Playwright dependency are installed: + +```bash +cd /gstack-browse && node -e "require('playwright')" 2>/dev/null || npm install +``` + +Then run commands via: +```bash +node /gstack-browse/scripts/browse.js [args...] [args...] +``` + +For brevity in this document, `$B` means `node /gstack-browse/scripts/browse.js`. + +--- + +## Core QA Patterns + +### 1. Verify a page loads correctly +```bash +$B goto https://yourapp.com +$B text # content loads? +$B console --errors # JS errors? +$B network --errors # failed requests? +$B is visible ".main-content" # key elements present? +``` + +### 2. Test a login flow +```bash +$B goto https://app.com/login +$B snapshot -i # see all interactive elements → get @eN refs +$B fill @e2 "user@test.com" +$B fill @e3 "password" +$B click @e4 # submit +$B snapshot -D # diff: what changed after submit? +$B is visible ".dashboard" # success state? +``` + +### 3. Verify an action worked +```bash +$B snapshot # baseline DOM snapshot +$B click @e3 # do something +$B snapshot -D # unified diff — shows exactly what changed +``` + +### 4. Visual evidence for bug reports +```bash +$B snapshot -i -a -o /tmp/annotated.png # annotated screenshot with @eN labels +$B screenshot /tmp/bug.png # plain full-page screenshot +$B console --errors # error log +``` + +### 5. Find all clickable elements (including non-ARIA divs) +```bash +$B snapshot -C # includes cursor:pointer, onclick elements +$B click @c1 +``` + +### 6. Assert element states +```bash +$B is visible ".modal" +$B is enabled "#submit-btn" +$B is disabled "#submit-btn" +$B is checked "#agree-checkbox" +$B is editable "#name-field" +``` + +### 7. Test responsive layouts +```bash +$B viewport 375x812 +$B screenshot /tmp/mobile.png +$B viewport 1280x720 +``` + +### 8. Fill forms and select dropdowns +```bash +$B fill "#username" "alice" +$B select "#country" "United States" +$B press Tab +$B press Enter +``` + +### 9. Test page with authentication (cookies already imported) +```bash +$B goto https://app.com/dashboard # session cookies loaded from persistent profile +$B snapshot -i -a -o /tmp/dash.png +``` + +--- + +## Available Commands + +### Navigation +| Command | Args | Description | +|---------|------|-------------| +| `goto` | `` | Navigate to a URL. Waits for network idle. | +| `wait` | `` | Wait for CSS selector to appear, or pause N milliseconds. | +| `scroll` | `` | Scroll page or bring element into view. | +| `viewport` | `` | Set browser viewport. Example: `375x812`, `1280x720`. | + +### Interaction +| Command | Args | Description | +|---------|------|-------------| +| `click` | `` | Click an element. Accepts CSS selectors or `@eN` refs. | +| `fill` | ` ` | Fill an input field. Wrap multi-word text in quotes. | +| `hover` | `` | Mouse over an element (reveals tooltips, dropdowns). | +| `press` | `` | Keyboard press. Examples: `Enter`, `Tab`, `Escape`, `ArrowDown`. | +| `select` | ` ` | Choose an option in a ` element by value or label. + */ + select: { + argc: 2, + fn: async (page, [selector, value], ctx) => { + log(`Selecting "${value}" in ${selector}...`); + const resolved = ctx.refs[selector] || selector; + await page.waitForSelector(resolved, { timeout: 10000 }); + await page.selectOption(resolved, { label: value }).catch(() => + page.selectOption(resolved, { value }) + ); + log('Selected.'); + } + }, + + /** + * text + * Output the page's visible text content. + */ + text: { + argc: 0, + fn: async (page) => { + const textContent = await page.evaluate(() => document.body.innerText); + out(textContent.slice(0, 50000)); + } + }, + + /** + * html + * Output the page's full HTML. + */ + html: { + argc: 0, + fn: async (page) => { + const html = await page.evaluate(() => document.documentElement.outerHTML); + out(html.slice(0, 50000)); + } + }, + + /** + * links + * List all links on the page (href, text, status check for SPA nav). + */ + links: { + argc: 0, + fn: async (page) => { + const links = await page.evaluate(() => { + return Array.from(document.querySelectorAll('a[href]')).map(a => ({ + href: a.href, + text: (a.textContent || '').trim().slice(0, 80) + })).filter(l => l.href && !l.href.startsWith('javascript:')); + }); + if (links.length === 0) { + out('No links found.'); + } else { + out(`${links.length} links:`); + links.forEach(l => out(` ${l.href} ${l.text ? `"${l.text}"` : ''}`)); + } + } + }, + + /** + * snapshot [-i] [-C] [-D] [-a] [-o ] + * + * Without flags: Full accessibility tree text snapshot. + * -i Interactive elements only (inputs, buttons, links, selects). + * -C Include non-ARIA clickable divs (cursor:pointer, onclick). + * -D Diff mode: show changes since last snapshot call. + * -a -o Annotated screenshot with @eN labels saved to . + */ + snapshot: { + parseArgs: (argv, offset) => { + return parseFlags(argv, offset, ['-i', '-C', '-D', '-a', '-o']); + }, + fn: async (page, flags) => { + const interactive = flags['-i'] || flags['-C'] || false; + const clickable = flags['-C'] || false; + const diff = flags['-D'] || false; + const annotate = flags['-a'] || false; + const outputPath = flags['-o']; + + if (annotate && outputPath) { + log(`Annotated screenshot → ${outputPath}`); + const elements = await getInteractiveElements(page, clickable); + await screenshotAnnotated(page, elements, outputPath); + out(`Annotated screenshot saved: ${outputPath} (${elements.length} elements labeled)`); + return; + } + + // Build text snapshot + if (interactive) { + const elements = await getInteractiveElements(page, clickable); + // Store @eN → CSS selector map in context + page.__gstack_refs = {}; + elements.forEach(el => { page.__gstack_refs[el.ref] = el.selector; }); + + const lines = elements.map(el => { + const parts = [el.ref.padEnd(5), el.tag.padEnd(10)]; + if (el.type) parts.push(el.type.padEnd(8)); + if (el.label) parts.push(`"${el.label.slice(0, 50)}"`); + parts.push(el.selector); + return parts.join(' '); + }); + + const snapshotText = lines.join('\n'); + if (diff) { + const prev = loadSnapshot(); + out(diffText(prev, snapshotText)); + } else { + out(`${elements.length} interactive element${elements.length !== 1 ? 's' : ''}:`); + out(snapshotText); + } + saveSnapshot(snapshotText); + } else { + // Full accessibility tree + const snapshotText = await page.evaluate(() => { + function nodeText(el, depth) { + const tag = el.tagName ? el.tagName.toLowerCase() : ''; + if (['script', 'style', 'noscript', 'svg'].includes(tag)) return ''; + const text = (el.textContent || '').trim().slice(0, 120); + const role = el.getAttribute ? el.getAttribute('role') : ''; + const label = el.getAttribute ? (el.getAttribute('aria-label') || el.getAttribute('alt') || '') : ''; + const indent = ' '.repeat(Math.min(depth, 6)); + let line = ''; + if (text || role || label) { + line = `${indent}${tag}${role ? `[${role}]` : ''}${label ? ` "${label}"` : ''}${text ? `: ${text.slice(0, 80)}` : ''}`; + } + const children = Array.from(el.children || []).map(c => nodeText(c, depth + 1)).filter(Boolean); + return [line, ...children].filter(Boolean).join('\n'); + } + return nodeText(document.body, 0); + }); + + if (diff) { + const prev = loadSnapshot(); + out(diffText(prev, snapshotText)); + } else { + out(snapshotText); + } + saveSnapshot(snapshotText); + } + } + }, + + /** + * is + * Assert element state. States: visible, hidden, enabled, disabled, checked, editable. + * Exits non-zero if assertion fails. + */ + is: { + argc: 2, + fn: async (page, [state, selector], ctx) => { + const resolved = ctx.refs[selector] || selector; + log(`Checking ${selector} is ${state}...`); + + let result = false; + try { + switch (state) { + case 'visible': result = await page.isVisible(resolved, { timeout: 5000 }); break; + case 'hidden': result = !(await page.isVisible(resolved, { timeout: 5000 })); break; + case 'enabled': result = await page.isEnabled(resolved, { timeout: 5000 }); break; + case 'disabled': result = await page.isDisabled(resolved, { timeout: 5000 }); break; + case 'checked': result = await page.isChecked(resolved, { timeout: 5000 }); break; + case 'editable': result = await page.isEditable(resolved, { timeout: 5000 }); break; + default: + process.stderr.write(`Unknown state: ${state}. Valid: visible, hidden, enabled, disabled, checked, editable\n`); + process.exitCode = 1; + return; + } + } catch (e) { + result = false; + } + + if (result) { + out(`✓ ${selector} is ${state}`); + } else { + out(`✗ FAIL: ${selector} is NOT ${state}`); + process.exitCode = 1; + } + } + }, + + /** + * count + * Output the number of elements matching the selector. + */ + count: { + argc: 1, + fn: async (page, [selector]) => { + const n = await page.locator(selector).count(); + out(`${n} element${n !== 1 ? 's' : ''} match "${selector}"`); + } + }, + + /** + * wait + * Wait for a CSS selector to appear, or pause for N milliseconds. + * Example: wait .modal-open wait 2000 + */ + wait: { + argc: 1, + fn: async (page, [arg]) => { + if (/^\d+$/.test(arg)) { + const ms = parseInt(arg, 10); + log(`Waiting ${ms}ms...`); + await page.waitForTimeout(ms); + log(`Done waiting.`); + } else { + log(`Waiting for "${arg}"...`); + await page.waitForSelector(arg, { timeout: 15000 }); + log(`"${arg}" appeared.`); + } + } + }, + + /** + * scroll + * Scroll the page or an element into view. + * direction: top | bottom | up | down + * selector: any CSS selector — scrolls that element into view + */ + scroll: { + argc: 1, + fn: async (page, [arg], ctx) => { + if (['top', 'bottom', 'up', 'down'].includes(arg)) { + log(`Scrolling ${arg}...`); + await page.evaluate((dir) => { + if (dir === 'top') window.scrollTo(0, 0); + else if (dir === 'bottom') window.scrollTo(0, document.body.scrollHeight); + else if (dir === 'up') window.scrollBy(0, -window.innerHeight * 0.8); + else if (dir === 'down') window.scrollBy(0, window.innerHeight * 0.8); + }, arg); + } else { + const resolved = ctx.refs[arg] || arg; + log(`Scrolling ${resolved} into view...`); + await page.waitForSelector(resolved, { timeout: 5000 }); + await page.locator(resolved).scrollIntoViewIfNeeded(); + } + log('Scrolled.'); + } + }, + + /** + * viewport + * Resize the browser viewport. Example: viewport 375x812 viewport 1280x720 + */ + viewport: { + argc: 1, + fn: async (page, [dimensions]) => { + const match = dimensions.match(/^(\d+)[x×](\d+)$/i); + if (!match) { + process.stderr.write(`Invalid viewport format: "${dimensions}". Use WxH, e.g. 375x812\n`); + process.exitCode = 1; + return; + } + const width = parseInt(match[1], 10); + const height = parseInt(match[2], 10); + log(`Setting viewport to ${width}x${height}...`); + await page.setViewportSize({ width, height }); + log('Viewport set.'); + } + }, + + /** + * js + * Execute JavaScript in the page context. Result is printed to stdout. + * Wrap code in quotes: js "document.title" + */ + js: { + argc: 1, + fn: async (page, [code]) => { + log(`Executing JS...`); + const result = await page.evaluate(code); + out(JSON.stringify(result, null, 2)); + } + }, + + /** + * console [--errors] + * Output collected browser console messages. + * --errors Only show error-level messages. + */ + console: { + parseArgs: (argv, offset) => { + return parseFlags(argv, offset, ['--errors']); + }, + fn: async (_page, flags) => { + const onlyErrors = flags['--errors'] || false; + const msgs = onlyErrors + ? consoleMessages.filter(m => m.type === 'error' || m.type === 'warning') + : consoleMessages; + if (msgs.length === 0) { + out(onlyErrors ? 'No console errors.' : 'No console messages.'); + } else { + out(`${msgs.length} console message${msgs.length !== 1 ? 's' : ''}:`); + msgs.forEach(m => out(` [${m.type}] ${m.text}`)); + } + } + }, + + /** + * network [--errors] + * Output network requests made during the session. + * --errors Only show failed requests (4xx, 5xx, aborted). + */ + network: { + parseArgs: (argv, offset) => { + return parseFlags(argv, offset, ['--errors']); + }, + fn: async (_page, flags) => { + const onlyErrors = flags['--errors'] || false; + if (onlyErrors) { + if (networkErrors.length === 0) { + out('No network errors.'); + } else { + out(`${networkErrors.length} network error${networkErrors.length !== 1 ? 's' : ''}:`); + networkErrors.forEach(e => out(` [${e.status || 'FAIL'}] ${e.url}${e.failure ? ` — ${e.failure}` : ''}`)); + } + } else { + const all = networkRequests; + out(`${all.length} request${all.length !== 1 ? 's' : ''}:`); + all.slice(-30).forEach(r => out(` ${r.method} ${r.url}`)); + if (all.length > 30) out(` ... and ${all.length - 30} more`); + } + } + }, + + /** + * cookie-import + * Import cookies from a JSON file (Playwright cookie format) into the session. + * Useful for sharing sessions exported from setup-browser-cookies. + */ + 'cookie-import': { + argc: 1, + fn: async (page, [filepath]) => { + log(`Importing cookies from ${filepath}...`); + const raw = fs.readFileSync(filepath, 'utf8'); + const cookies = JSON.parse(raw); + await page.context().addCookies(cookies); + log(`Imported ${cookies.length} cookies.`); + } + } + +}; + +// ─── Argument Parser ────────────────────────────────────────────────────────── + +function parseArgs(argv) { + const queue = []; + let i = 0; + while (i < argv.length) { + const cmdName = argv[i]; + const definition = COMMANDS[cmdName]; + if (!definition) { + process.stderr.write(`Error: Unknown command "${cmdName}"\n`); + process.stderr.write(`Available: ${Object.keys(COMMANDS).join(', ')}\n`); + process.exit(1); + } + + let cmdArgs; + let consumed; + + if (definition.parseArgs) { + const result = definition.parseArgs(argv, i + 1); + cmdArgs = result.flags; + consumed = result.consumed; + } else { + const argc = definition.argc; + if (i + argc >= argv.length && argc > 0) { + process.stderr.write(`Error: "${cmdName}" requires ${argc} argument(s).\n`); + process.exit(1); + } + cmdArgs = argv.slice(i + 1, i + 1 + argc); + consumed = argc; + } + + queue.push({ name: cmdName, fn: definition.fn, args: cmdArgs }); + i += 1 + consumed; + } + return queue; +} + +// ─── Main ───────────────────────────────────────────────────────────────────── + +async function main() { + const argv = process.argv.slice(2); + + if (argv.length === 0 || argv[0] === '--help' || argv[0] === '-h') { + process.stderr.write([ + 'Usage: node browse.js [args...] [command2] [args...] ...', + '', + 'Commands:', + ' goto Navigate to URL', + ' screenshot Full-page screenshot', + ' click Click element', + ' fill Fill input field', + ' hover Mouse over element', + ' press Keyboard key press (Enter, Tab, Escape, ...)', + ' select Choose option in