Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
250 changes: 250 additions & 0 deletions .github/scripts/validate_branch_existence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
#!/usr/bin/env python3
"""
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did we check the teamcity job ? How it's gonna affect if we have both the flows? As that teamcity job is still active?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checked it — no conflict. The TC job is a write path: it generates diagrams via Bazel and force-pushes them to generated-diagrams. This check is read-only; it just asks the API whether a branch exists. They're actually complementary — TC creates branches, this validates that versions.csv stays in sync with what TC has already pushed.

validate_branch_existence.py (EDUENG-614)

For every row in src/current/_data/versions.csv, verifies that the listed
crdb_branch_name exists as a branch in cockroachdb/generated-diagrams.

Also flags entries where versions.csv still points to an older branch even
though the "natural" release-X.Y branch for that version now exists
(e.g. v26.2 pointing to release-26.1 after release-26.2 is created).

Usage:
python .github/scripts/validate_branch_existence.py

# Run built-in unit tests (no network required):
python .github/scripts/validate_branch_existence.py --self-test

Exit codes:
0 all checks passed
1 one or more issues found
2 fatal error (versions.csv not found)

Environment:
GITHUB_TOKEN Optional. Raises API rate limit from 60 to 5000 req/hr.
GITHUB_ACTIONS Set automatically in CI. Enables pr-comment.md output.
"""

import contextlib
import csv
import io
import json
import os
import re
import sys
import urllib.error
import urllib.parse
import urllib.request
from pathlib import Path

GENERATED_DIAGRAMS_REPO = "cockroachdb/generated-diagrams"
GITHUB_API_BASE = "https://api.github.com"
VERSIONS_CSV = Path("src/current/_data/versions.csv")

# ---------------------------------------------------------------------------
# HTTP
# ---------------------------------------------------------------------------

def _api_get(path: str) -> dict | None:
url = f"{GITHUB_API_BASE}/{path}"
req = urllib.request.Request(url)
req.add_header("Accept", "application/vnd.github+json")
req.add_header("X-GitHub-Api-Version", "2022-11-28")
token = os.environ.get("GITHUB_TOKEN")
if token:
req.add_header("Authorization", f"Bearer {token}")
try:
with urllib.request.urlopen(req, timeout=15) as resp:
return json.loads(resp.read().decode())
except urllib.error.HTTPError as exc:
if exc.code == 404:
return None
raise
except Exception as exc:
print(f" Warning: request to {url} failed: {exc}", file=sys.stderr)
return None


# ---------------------------------------------------------------------------
# Core logic
# ---------------------------------------------------------------------------

_cache: dict[str, bool] = {}


def branch_exists(branch: str) -> bool:
if branch not in _cache:
encoded = urllib.parse.quote(branch, safe="")
result = _api_get(f"repos/{GENERATED_DIAGRAMS_REPO}/branches/{encoded}")
_cache[branch] = result is not None
return _cache[branch]


def load_versions_csv() -> list[dict]:
if not VERSIONS_CSV.exists():
print(f"Error: {VERSIONS_CSV} not found. Run from the repo root.", file=sys.stderr)
sys.exit(2)
with open(VERSIONS_CSV, newline="") as f:
return list(csv.DictReader(f))


def run_checks(rows: list[dict], _exists_fn=None) -> list[dict]:
"""Check each versions.csv row for branch existence and staleness.

_exists_fn is injectable for unit tests; defaults to branch_exists.
"""
if _exists_fn is None:
_exists_fn = branch_exists

failures = []
checked: set[str] = set()

for row in rows:
version = row.get("major_version", "").strip()
branch = row.get("crdb_branch_name", "").strip()
if not branch or branch == "N/A":
continue

# (a) Does the listed branch exist?
if branch not in checked:
checked.add(branch)
print(f" {version:8s} → {branch} ...", end=" ", flush=True)
if _exists_fn(branch):
print("OK")
else:
print("MISSING")
failures.append({
"type": "branch_missing",
"version": version,
"branch": branch,
"message": (
f"{version}: crdb_branch_name={branch!r} does not exist "
f"in cockroachdb/generated-diagrams."
),
})
continue

# (b) Is the version still pointing to an older branch?
# e.g. v26.2 → release-26.1 when release-26.2 now exists.
expected = f"release-{version.lstrip('v')}"
if branch != expected and expected not in checked:
if _exists_fn(expected):
checked.add(expected)
failures.append({
"type": "branch_mismatch",
"version": version,
"branch": branch,
"expected": expected,
"message": (
f"{version}: crdb_branch_name={branch!r} but {expected!r} "
f"now exists in cockroachdb/generated-diagrams. "
f"Update versions.csv to use {expected!r}."
),
})

return failures


# ---------------------------------------------------------------------------
# Output
# ---------------------------------------------------------------------------

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR includes a manual test plan — consider adding a lightweight unit test (or an integration mode that runs against a small sample versions.csv) to make local iteration faster.

def format_comment(failures: list[dict]) -> str:
if not failures:
return (
"## Branch Existence Check: Passed\n\n"
"All `crdb_branch_name` entries in `versions.csv` exist in "
"`cockroachdb/generated-diagrams`."
)

lines = [
"## Branch Existence Check: Failed",
"",
f"Found **{len(failures)}** issue(s) in `versions.csv`:",
"",
"> **Context**: [EDUENG-614](https://cockroachlabs.atlassian.net/browse/EDUENG-614)",
"",
]
for f in failures:
icon = ":warning:" if f["type"] == "branch_mismatch" else ":x:"
lines.append(f"- {icon} {f['message']}")

return "\n".join(lines)


# ---------------------------------------------------------------------------
# Self-tests (no network required)
# ---------------------------------------------------------------------------

def _run_self_tests() -> None:
"""Unit tests for run_checks logic using injected exists functions."""

def _quiet(rows, exists_fn):
with contextlib.redirect_stdout(io.StringIO()):
return run_checks(rows, _exists_fn=exists_fn)

# branch_missing: listed branch does not exist
rows = [{"major_version": "v26.1", "crdb_branch_name": "release-26.1"}]
failures = _quiet(rows, lambda b: False)
assert len(failures) == 1, failures
assert failures[0]["type"] == "branch_missing", failures

# all OK: branch exists and matches expected
rows = [{"major_version": "v26.1", "crdb_branch_name": "release-26.1"}]
failures = _quiet(rows, lambda b: True)
assert failures == [], failures

# branch_mismatch: listed branch exists but a newer canonical branch also exists
rows = [{"major_version": "v26.2", "crdb_branch_name": "release-26.1"}]
known = {"release-26.1", "release-26.2"}
failures = _quiet(rows, lambda b: b in known)
assert len(failures) == 1, failures
assert failures[0]["type"] == "branch_mismatch", failures
assert failures[0]["expected"] == "release-26.2", failures

# N/A entries are skipped entirely
rows = [{"major_version": "v24.1", "crdb_branch_name": "N/A"}]
failures = _quiet(rows, lambda b: (_ for _ in ()).throw(AssertionError("unexpected call")))
assert failures == [], failures

# empty branch field is skipped
rows = [{"major_version": "v25.1", "crdb_branch_name": ""}]
failures = _quiet(rows, lambda b: (_ for _ in ()).throw(AssertionError("unexpected call")))
assert failures == [], failures

print("All self-tests passed.")
sys.exit(0)


# ---------------------------------------------------------------------------
# Entry point
# ---------------------------------------------------------------------------

def main() -> None:
if "--self-test" in sys.argv:
_run_self_tests()

rows = load_versions_csv()
print(f"Checking {len(rows)} versions.csv entries against cockroachdb/generated-diagrams...\n")
failures = run_checks(rows)

comment = format_comment(failures)
if os.environ.get("GITHUB_ACTIONS"):
summary = os.environ.get("GITHUB_STEP_SUMMARY")
if summary:
Path(summary).write_text(comment, encoding="utf-8")
Path("pr-comment.md").write_text(comment, encoding="utf-8")

if failures:
print(f"\n--- Issues ---", file=sys.stderr)
for f in failures:
print(f" [{f['type']}] {f['message']}", file=sys.stderr)
print(f"\nTotal: {len(failures)} issue(s).", file=sys.stderr)
sys.exit(1)
else:
print("\nAll branch existence checks passed.")
sys.exit(0)


if __name__ == "__main__":
main()
133 changes: 133 additions & 0 deletions .github/workflows/validate-branch-existence.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
name: Validate Branch Existence

# EDUENG-614
# Verifies that every crdb_branch_name in versions.csv exists as a branch in
# cockroachdb/generated-diagrams, and flags entries where a proper release-X.Y
# branch has been created but versions.csv still points to an older one.

on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- 'src/current/_data/versions.csv'
schedule:
# Daily at 07:00 UTC.
- cron: '0 7 * * *'
workflow_dispatch:

jobs:
validate-branch-existence:
name: Check crdb_branch_name entries against generated-diagrams
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Run branch existence check
id: validate
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_ACTIONS: 'true'
run: python .github/scripts/validate_branch_existence.py
continue-on-error: true

- name: Post PR comment
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const fs = require('fs');
const marker = '<!-- branch-existence-check -->';

let body = marker + '\n';
try {
body += fs.readFileSync('pr-comment.md', 'utf8');
} catch {
body += '### Branch Existence Check\n\nCheck ran but could not generate a detailed report.';
}

const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
});

const existing = comments.find(
c => c.user.type === 'Bot' && c.body.includes(marker)
);

if (existing) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: existing.id,
body,
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body,
});
}

- name: Fail on PR issues
if: github.event_name == 'pull_request' && steps.validate.outcome == 'failure'
run: |
echo "Branch existence check failed. See the PR comment for details."
exit 1

- name: Open or update tracking issue (scheduled failure)
if: github.event_name != 'pull_request' && steps.validate.outcome == 'failure'
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const fs = require('fs');
const runUrl = `${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`;

let detail = '';
try {
detail = fs.readFileSync('pr-comment.md', 'utf8');
} catch {
detail = `Check failed. See [workflow run](${runUrl}) for details.`;
}

const label = 'sql-diagram-validation';
const { data: issues } = await github.rest.issues.listForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
labels: label,
});

if (issues.length === 0) {
await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: 'Branch existence check failure (automated)',
body: [
'Opened automatically by the nightly branch existence workflow.',
'',
detail,
'',
`[Workflow run](${runUrl})`,
].join('\n'),
labels: [label],
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issues[0].number,
body: `**Nightly update** — [run ${{ github.run_id }}](${runUrl}):\n\n${detail}`,
});
}
Loading