From 01c8593cdbf23e910c2eb2d6cff45c3aa6329510 Mon Sep 17 00:00:00 2001
From: ebembi-crdb <ebembi@cockroachlabs.com>
Date: Thu, 26 Mar 2026 19:31:36 +0530
Subject: [PATCH 1/2] Add branch existence check for generated-diagrams
 (EDUENG-614)

Adds a script and daily workflow that verify every crdb_branch_name entry in
versions.csv exists as a branch in cockroachdb/generated-diagrams, and flags
entries where a proper release-X.Y branch now exists but versions.csv still
points to an older one (e.g. v26.2 -> release-26.1 after release-26.2
is created).

Files added:
- .github/scripts/validate_branch_existence.py
- .github/workflows/validate-branch-existence.yml
---
 .github/scripts/validate_branch_existence.py  | 189 ++++++++++++++++++
 .../workflows/validate-branch-existence.yml   | 133 ++++++++++++
 2 files changed, 322 insertions(+)
 create mode 100644 .github/scripts/validate_branch_existence.py
 create mode 100644 .github/workflows/validate-branch-existence.yml

diff --git a/.github/scripts/validate_branch_existence.py b/.github/scripts/validate_branch_existence.py
new file mode 100644
index 00000000000..8b067e53707
--- /dev/null
+++ b/.github/scripts/validate_branch_existence.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+"""
+validate_branch_existence.py  (EDUENG-614)
+
+For every row in src/current/_data/versions.csv, verifies that the listed
+crdb_branch_name exists as a branch in cockroachdb/generated-diagrams.
+
+Also flags entries where versions.csv still points to an older branch even
+though the "natural" release-X.Y branch for that version now exists
+(e.g. v26.2 pointing to release-26.1 after release-26.2 is created).
+
+Usage:
+  python .github/scripts/validate_branch_existence.py
+
+Exit codes:
+  0  all checks passed
+  1  one or more issues found
+  2  fatal error (versions.csv not found)
+
+Environment:
+  GITHUB_TOKEN    Optional. Raises API rate limit from 60 to 5000 req/hr.
+  GITHUB_ACTIONS  Set automatically in CI. Enables pr-comment.md output.
+"""
+
+import csv
+import json
+import os
+import re
+import sys
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+GENERATED_DIAGRAMS_REPO = "cockroachdb/generated-diagrams"
+GITHUB_API_BASE = "https://api.github.com"
+VERSIONS_CSV = Path("src/current/_data/versions.csv")
+
+# ---------------------------------------------------------------------------
+# HTTP
+# ---------------------------------------------------------------------------
+
+def _api_get(path: str) -> dict | None:
+    url = f"{GITHUB_API_BASE}/{path}"
+    req = urllib.request.Request(url)
+    req.add_header("Accept", "application/vnd.github+json")
+    req.add_header("X-GitHub-Api-Version", "2022-11-28")
+    token = os.environ.get("GITHUB_TOKEN")
+    if token:
+        req.add_header("Authorization", f"Bearer {token}")
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            return json.loads(resp.read().decode())
+    except urllib.error.HTTPError as exc:
+        if exc.code == 404:
+            return None
+        raise
+    except Exception as exc:
+        print(f"  Warning: request to {url} failed: {exc}", file=sys.stderr)
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Core logic
+# ---------------------------------------------------------------------------
+
+_cache: dict[str, bool] = {}
+
+
+def branch_exists(branch: str) -> bool:
+    if branch not in _cache:
+        result = _api_get(f"repos/{GENERATED_DIAGRAMS_REPO}/branches/{branch}")
+        _cache[branch] = result is not None
+    return _cache[branch]
+
+
+def load_versions_csv() -> list[dict]:
+    if not VERSIONS_CSV.exists():
+        print(f"Error: {VERSIONS_CSV} not found. Run from the repo root.", file=sys.stderr)
+        sys.exit(2)
+    with open(VERSIONS_CSV, newline="") as f:
+        return list(csv.DictReader(f))
+
+
+def run_checks(rows: list[dict]) -> list[dict]:
+    failures = []
+    checked: set[str] = set()
+
+    for row in rows:
+        version = row.get("major_version", "").strip()
+        branch  = row.get("crdb_branch_name", "").strip()
+        if not branch or branch == "N/A":
+            continue
+
+        # (a) Does the listed branch exist?
+        if branch not in checked:
+            checked.add(branch)
+            print(f"  {version:8s} → {branch} ...", end=" ", flush=True)
+            if branch_exists(branch):
+                print("OK")
+            else:
+                print("MISSING")
+                failures.append({
+                    "type": "branch_missing",
+                    "version": version,
+                    "branch": branch,
+                    "message": (
+                        f"{version}: crdb_branch_name={branch!r} does not exist "
+                        f"in cockroachdb/generated-diagrams."
+                    ),
+                })
+                continue
+
+        # (b) Is the version still pointing to an older branch?
+        #     e.g. v26.2 → release-26.1 when release-26.2 now exists.
+        expected = f"release-{version.lstrip('v')}"
+        if branch != expected and expected not in checked:
+            if branch_exists(expected):
+                checked.add(expected)
+                failures.append({
+                    "type": "branch_mismatch",
+                    "version": version,
+                    "branch": branch,
+                    "expected": expected,
+                    "message": (
+                        f"{version}: crdb_branch_name={branch!r} but {expected!r} "
+                        f"now exists in cockroachdb/generated-diagrams. "
+                        f"Update versions.csv to use {expected!r}."
+                    ),
+                })
+
+    return failures
+
+
+# ---------------------------------------------------------------------------
+# Output
+# ---------------------------------------------------------------------------
+
+def format_comment(failures: list[dict]) -> str:
+    if not failures:
+        return (
+            "## Branch Existence Check: Passed\n\n"
+            "All `crdb_branch_name` entries in `versions.csv` exist in "
+            "`cockroachdb/generated-diagrams`."
+        )
+
+    lines = [
+        "## Branch Existence Check: Failed",
+        "",
+        f"Found **{len(failures)}** issue(s) in `versions.csv`:",
+        "",
+        "> **Context**: [EDUENG-614](https://cockroachlabs.atlassian.net/browse/EDUENG-614)",
+        "",
+    ]
+    for f in failures:
+        icon = ":warning:" if f["type"] == "branch_mismatch" else ":x:"
+        lines.append(f"- {icon} {f['message']}")
+
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    rows = load_versions_csv()
+    print(f"Checking {len(rows)} versions.csv entries against cockroachdb/generated-diagrams...\n")
+    failures = run_checks(rows)
+
+    comment = format_comment(failures)
+    if os.environ.get("GITHUB_ACTIONS"):
+        summary = os.environ.get("GITHUB_STEP_SUMMARY")
+        if summary:
+            Path(summary).write_text(comment, encoding="utf-8")
+        Path("pr-comment.md").write_text(comment, encoding="utf-8")
+
+    if failures:
+        print(f"\n--- Issues ---", file=sys.stderr)
+        for f in failures:
+            print(f"  [{f['type']}] {f['message']}", file=sys.stderr)
+        print(f"\nTotal: {len(failures)} issue(s).", file=sys.stderr)
+        sys.exit(1)
+    else:
+        print("\nAll branch existence checks passed.")
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/validate-branch-existence.yml b/.github/workflows/validate-branch-existence.yml
new file mode 100644
index 00000000000..abaadd47566
--- /dev/null
+++ b/.github/workflows/validate-branch-existence.yml
@@ -0,0 +1,133 @@
+name: Validate Branch Existence
+
+# EDUENG-614
+# Verifies that every crdb_branch_name in versions.csv exists as a branch in
+# cockroachdb/generated-diagrams, and flags entries where a proper release-X.Y
+# branch has been created but versions.csv still points to an older one.
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+    paths:
+      - 'src/current/_data/versions.csv'
+  schedule:
+    # Daily at 07:00 UTC.
+    - cron: '0 7 * * *'
+  workflow_dispatch:
+
+jobs:
+  validate-branch-existence:
+    name: Check crdb_branch_name entries against generated-diagrams
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.x'
+
+      - name: Run branch existence check
+        id: validate
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_ACTIONS: 'true'
+        run: python .github/scripts/validate_branch_existence.py
+        continue-on-error: true
+
+      - name: Post PR comment
+        if: github.event_name == 'pull_request'
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const fs = require('fs');
+            const marker = '<!-- branch-existence-check -->';
+
+            let body = marker + '\n';
+            try {
+              body += fs.readFileSync('pr-comment.md', 'utf8');
+            } catch {
+              body += '### Branch Existence Check\n\nCheck ran but could not generate a detailed report.';
+            }
+
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+            });
+
+            const existing = comments.find(
+              c => c.user.type === 'Bot' && c.body.includes(marker)
+            );
+
+            if (existing) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existing.id,
+                body,
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body,
+              });
+            }
+
+      - name: Fail on PR issues
+        if: github.event_name == 'pull_request' && steps.validate.outcome == 'failure'
+        run: |
+          echo "Branch existence check failed. See the PR comment for details."
+          exit 1
+
+      - name: Open or update tracking issue (scheduled failure)
+        if: github.event_name != 'pull_request' && steps.validate.outcome == 'failure'
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const fs = require('fs');
+            const runUrl = `${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`;
+
+            let detail = '';
+            try {
+              detail = fs.readFileSync('pr-comment.md', 'utf8');
+            } catch {
+              detail = `Check failed. See [workflow run](${runUrl}) for details.`;
+            }
+
+            const label = 'sql-diagram-validation';
+            const { data: issues } = await github.rest.issues.listForRepo({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              state: 'open',
+              labels: label,
+            });
+
+            if (issues.length === 0) {
+              await github.rest.issues.create({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                title: 'Branch existence check failure (automated)',
+                body: [
+                  'Opened automatically by the nightly branch existence workflow.',
+                  '',
+                  detail,
+                  '',
+                  `[Workflow run](${runUrl})`,
+                ].join('\n'),
+                labels: [label],
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: issues[0].number,
+                body: `**Nightly update** — [run ${{ github.run_id }}](${runUrl}):\n\n${detail}`,
+              });
+            }

From a3b864d26c28ee5da43d3683e695234ad3ffe4f0 Mon Sep 17 00:00:00 2001
From: ebembi-crdb <ebembi@cockroachlabs.com>
Date: Tue, 31 Mar 2026 19:25:36 +0530
Subject: [PATCH 2/2] Address review: URL-encode branch names, add self-tests,
 injectable exists_fn

- URL-encode branch name with urllib.parse.quote before interpolating
  into the /repos/.../branches/{branch} API path
- Make run_checks accept an optional _exists_fn parameter so the core
  logic can be tested without network access
- Add _run_self_tests() covering branch_missing, all-OK, branch_mismatch,
  N/A skip, and empty-field skip; invoke with --self-test flag
---
 .github/scripts/validate_branch_existence.py | 69 ++++++++++++++++++--
 1 file changed, 65 insertions(+), 4 deletions(-)

diff --git a/.github/scripts/validate_branch_existence.py b/.github/scripts/validate_branch_existence.py
index 8b067e53707..15a9e46a590 100644
--- a/.github/scripts/validate_branch_existence.py
+++ b/.github/scripts/validate_branch_existence.py
@@ -12,6 +12,9 @@
 Usage:
   python .github/scripts/validate_branch_existence.py
 
+  # Run built-in unit tests (no network required):
+  python .github/scripts/validate_branch_existence.py --self-test
+
 Exit codes:
   0  all checks passed
   1  one or more issues found
@@ -22,12 +25,15 @@
   GITHUB_ACTIONS  Set automatically in CI. Enables pr-comment.md output.
 """
 
+import contextlib
 import csv
+import io
 import json
 import os
 import re
 import sys
 import urllib.error
+import urllib.parse
 import urllib.request
 from pathlib import Path
 
@@ -68,7 +74,8 @@ def _api_get(path: str) -> dict | None:
 
 def branch_exists(branch: str) -> bool:
     if branch not in _cache:
-        result = _api_get(f"repos/{GENERATED_DIAGRAMS_REPO}/branches/{branch}")
+        encoded = urllib.parse.quote(branch, safe="")
+        result = _api_get(f"repos/{GENERATED_DIAGRAMS_REPO}/branches/{encoded}")
         _cache[branch] = result is not None
     return _cache[branch]
 
@@ -81,7 +88,14 @@ def load_versions_csv() -> list[dict]:
         return list(csv.DictReader(f))
 
 
-def run_checks(rows: list[dict]) -> list[dict]:
+def run_checks(rows: list[dict], _exists_fn=None) -> list[dict]:
+    """Check each versions.csv row for branch existence and staleness.
+
+    _exists_fn is injectable for unit tests; defaults to branch_exists.
+    """
+    if _exists_fn is None:
+        _exists_fn = branch_exists
+
     failures = []
     checked: set[str] = set()
 
@@ -95,7 +109,7 @@ def run_checks(rows: list[dict]) -> list[dict]:
         if branch not in checked:
             checked.add(branch)
             print(f"  {version:8s} → {branch} ...", end=" ", flush=True)
-            if branch_exists(branch):
+            if _exists_fn(branch):
                 print("OK")
             else:
                 print("MISSING")
@@ -114,7 +128,7 @@ def run_checks(rows: list[dict]) -> list[dict]:
         #     e.g. v26.2 → release-26.1 when release-26.2 now exists.
         expected = f"release-{version.lstrip('v')}"
         if branch != expected and expected not in checked:
-            if branch_exists(expected):
+            if _exists_fn(expected):
                 checked.add(expected)
                 failures.append({
                     "type": "branch_mismatch",
@@ -158,11 +172,58 @@ def format_comment(failures: list[dict]) -> str:
     return "\n".join(lines)
 
 
+# ---------------------------------------------------------------------------
+# Self-tests (no network required)
+# ---------------------------------------------------------------------------
+
+def _run_self_tests() -> None:
+    """Unit tests for run_checks logic using injected exists functions."""
+
+    def _quiet(rows, exists_fn):
+        with contextlib.redirect_stdout(io.StringIO()):
+            return run_checks(rows, _exists_fn=exists_fn)
+
+    # branch_missing: listed branch does not exist
+    rows = [{"major_version": "v26.1", "crdb_branch_name": "release-26.1"}]
+    failures = _quiet(rows, lambda b: False)
+    assert len(failures) == 1, failures
+    assert failures[0]["type"] == "branch_missing", failures
+
+    # all OK: branch exists and matches expected
+    rows = [{"major_version": "v26.1", "crdb_branch_name": "release-26.1"}]
+    failures = _quiet(rows, lambda b: True)
+    assert failures == [], failures
+
+    # branch_mismatch: listed branch exists but a newer canonical branch also exists
+    rows = [{"major_version": "v26.2", "crdb_branch_name": "release-26.1"}]
+    known = {"release-26.1", "release-26.2"}
+    failures = _quiet(rows, lambda b: b in known)
+    assert len(failures) == 1, failures
+    assert failures[0]["type"] == "branch_mismatch", failures
+    assert failures[0]["expected"] == "release-26.2", failures
+
+    # N/A entries are skipped entirely
+    rows = [{"major_version": "v24.1", "crdb_branch_name": "N/A"}]
+    failures = _quiet(rows, lambda b: (_ for _ in ()).throw(AssertionError("unexpected call")))
+    assert failures == [], failures
+
+    # empty branch field is skipped
+    rows = [{"major_version": "v25.1", "crdb_branch_name": ""}]
+    failures = _quiet(rows, lambda b: (_ for _ in ()).throw(AssertionError("unexpected call")))
+    assert failures == [], failures
+
+    print("All self-tests passed.")
+    sys.exit(0)
+
+
 # ---------------------------------------------------------------------------
 # Entry point
 # ---------------------------------------------------------------------------
 
 def main() -> None:
+    if "--self-test" in sys.argv:
+        _run_self_tests()
+
     rows = load_versions_csv()
     print(f"Checking {len(rows)} versions.csv entries against cockroachdb/generated-diagrams...\n")
     failures = run_checks(rows)