From a80010e0de8d8e3f74f5e6595fdb029b5284e376 Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 20:27:46 +0200 Subject: [PATCH 1/5] chore(ci): add benchmark workflow concurrency guardrails (#20) --- .github/workflows/ci.yml | 7 +++++ Makefile | 5 +++- scripts/workflow-policy-check.py | 48 ++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 scripts/workflow-policy-check.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d49713d..8c52723 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,10 @@ on: - main pull_request: +concurrency: + group: ci-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: pr-title: name: Validate PR title @@ -35,6 +39,9 @@ jobs: scripts: name: Script smoke tests (skipped targets expected) runs-on: ubuntu-latest + concurrency: + group: benchmark-smoke-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true steps: - uses: actions/checkout@v5 - uses: actions/setup-go@v5 diff --git a/Makefile b/Makefile index f6b7f72..28ad2a0 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ GOPATH ?= $(shell $(GO) env GOPATH) GO_PATCH_COVER ?= $(GOPATH)/bin/go-patch-cover MODULES = $(shell find . -type f -name "go.mod" -not -path "*/.*/*" -not -path "*/vendor/*" -exec dirname {} \;) -.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check +.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check benchmark: bash scripts/run-all.sh @@ -99,3 +99,6 @@ benchmark-benchstat-check: ci-benchmark-quality-check: $(PYTHON) scripts/benchmark-quality-check.py ci-check + +workflow-concurrency-check: + $(PYTHON) scripts/workflow-policy-check.py concurrency-check diff --git a/scripts/workflow-policy-check.py b/scripts/workflow-policy-check.py new file mode 100644 index 0000000..d3bc395 --- /dev/null +++ b/scripts/workflow-policy-check.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +from pathlib import Path + + +ROOT = Path(__file__).resolve().parent.parent +CI_WORKFLOW = ROOT / ".github" / "workflows" / "ci.yml" + + +def read_text(path: Path) -> str: + if not path.exists(): + raise SystemExit(f"Workflow file not found: {path}") + return path.read_text(encoding="utf-8") + + +def assert_contains(text: str, needle: str, err: str) -> None: + if needle not in text: + raise SystemExit(err) + + +def check_concurrency() -> None: + text = read_text(CI_WORKFLOW) + assert_contains( + text, + "concurrency:\n group: ci-${{ github.workflow }}-${{ github.ref }}\n cancel-in-progress: true", + "workflow-concurrency-check failed: top-level workflow concurrency with cancel-in-progress=true is required", + ) + assert_contains( + text, + " scripts:\n name: Script smoke tests (skipped targets expected)\n runs-on: ubuntu-latest\n concurrency:\n group: benchmark-smoke-${{ github.workflow }}-${{ github.ref }}\n cancel-in-progress: true", + "workflow-concurrency-check failed: scripts job must define benchmark concurrency with cancel-in-progress=true", + ) + print("workflow-concurrency-check: validated workflow and benchmark job concurrency controls") + + +def main() -> None: + parser = argparse.ArgumentParser(description="Validate benchmark workflow safety policies") + parser.add_argument("cmd", choices=["concurrency-check"]) + args = parser.parse_args() + + if args.cmd == "concurrency-check": + check_concurrency() + + +if __name__ == "__main__": + main() From c3523fac507fa22336ae8edbc1f9f96618c48515 Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 20:28:47 +0200 Subject: [PATCH 2/5] chore(ci): add benchmark timeout and retention budget policy (#21) --- .github/workflows/ci.yml | 2 ++ Makefile | 5 ++++- docs/guides/benchmark-workflow.md | 6 ++++++ scripts/workflow-policy-check.py | 19 ++++++++++++++++++- 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8c52723..fd37a70 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,6 +39,7 @@ jobs: scripts: name: Script smoke tests (skipped targets expected) runs-on: ubuntu-latest + timeout-minutes: 25 concurrency: group: benchmark-smoke-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true @@ -66,3 +67,4 @@ jobs: with: name: benchmark-quality-summary path: results/latest/benchmark-quality-summary.json + retention-days: 14 diff --git a/Makefile b/Makefile index 28ad2a0..09f6484 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ GOPATH ?= $(shell $(GO) env GOPATH) GO_PATCH_COVER ?= $(GOPATH)/bin/go-patch-cover MODULES = $(shell find . -type f -name "go.mod" -not -path "*/.*/*" -not -path "*/vendor/*" -exec dirname {} \;) -.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check +.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check workflow-budget-check benchmark: bash scripts/run-all.sh @@ -102,3 +102,6 @@ ci-benchmark-quality-check: workflow-concurrency-check: $(PYTHON) scripts/workflow-policy-check.py concurrency-check + +workflow-budget-check: + $(PYTHON) scripts/workflow-policy-check.py budget-check diff --git a/docs/guides/benchmark-workflow.md b/docs/guides/benchmark-workflow.md index 216828b..cfd3d05 100644 --- a/docs/guides/benchmark-workflow.md +++ b/docs/guides/benchmark-workflow.md @@ -77,3 +77,9 @@ Quality thresholds and required metrics are versioned in `stats-policy.yaml`. - run from a clean working tree when possible - keep runtime versions stable - include host and Docker metadata in report notes + +## CI budget policy + +- benchmark smoke job timeout budget: 25 minutes +- benchmark quality summary artifact retention: 14 days +- expected CI compute envelope: one benchmark smoke run per ref due to concurrency cancellation; superseded runs are canceled before full benchmark execution diff --git a/scripts/workflow-policy-check.py b/scripts/workflow-policy-check.py index d3bc395..a1b2d44 100644 --- a/scripts/workflow-policy-check.py +++ b/scripts/workflow-policy-check.py @@ -35,13 +35,30 @@ def check_concurrency() -> None: print("workflow-concurrency-check: validated workflow and benchmark job concurrency controls") +def check_budget() -> None: + text = read_text(CI_WORKFLOW) + assert_contains( + text, + " scripts:\n name: Script smoke tests (skipped targets expected)\n runs-on: ubuntu-latest\n timeout-minutes: 25", + "workflow-budget-check failed: scripts job timeout-minutes budget must be set to 25", + ) + assert_contains( + text, + " retention-days: 14", + "workflow-budget-check failed: benchmark-quality-summary artifact retention-days must be set", + ) + print("workflow-budget-check: validated timeout budget and artifact retention policy") + + def main() -> None: parser = argparse.ArgumentParser(description="Validate benchmark workflow safety policies") - parser.add_argument("cmd", choices=["concurrency-check"]) + parser.add_argument("cmd", choices=["concurrency-check", "budget-check"]) args = parser.parse_args() if args.cmd == "concurrency-check": check_concurrency() + elif args.cmd == "budget-check": + check_budget() if __name__ == "__main__": From a7aeff2ffb1fceb0aaad62eeb15debab0c14ebf5 Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 20:30:55 +0200 Subject: [PATCH 3/5] feat(ci): add bounded manual benchmark workflow inputs (#22) --- .github/workflows/benchmark-manual.yml | 133 +++++++++++++++++++++++++ Makefile | 5 +- docs/guides/benchmark-workflow.md | 10 ++ scripts/workflow-policy-check.py | 49 ++++++++- 4 files changed, 193 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/benchmark-manual.yml diff --git a/.github/workflows/benchmark-manual.yml b/.github/workflows/benchmark-manual.yml new file mode 100644 index 0000000..1037be8 --- /dev/null +++ b/.github/workflows/benchmark-manual.yml @@ -0,0 +1,133 @@ +name: benchmark-manual + +on: + workflow_dispatch: + inputs: + frameworks: + description: "Comma-separated frameworks (modkit,nestjs,baseline,wire,fx,do)" + required: true + default: "modkit,nestjs" + type: string + runs: + description: "Benchmark runs per framework (1-10)" + required: true + default: "3" + type: string + benchmark_requests: + description: "Benchmark requests per run (50-1000)" + required: true + default: "300" + type: string + +concurrency: + group: benchmark-manual-${{ github.ref }}-${{ github.event.inputs.frameworks }} + cancel-in-progress: true + +jobs: + benchmark: + name: Manual bounded benchmark run + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + - name: Install benchmark quality tools + run: | + sudo apt-get update + sudo apt-get install -y hyperfine + go install golang.org/x/perf/cmd/benchstat@latest + echo "$(go env GOPATH)/bin" >> "$GITHUB_PATH" + - name: Validate and normalize workflow inputs + id: normalize + shell: bash + env: + INPUT_FRAMEWORKS: ${{ github.event.inputs.frameworks }} + INPUT_RUNS: ${{ github.event.inputs.runs }} + INPUT_BENCHMARK_REQUESTS: ${{ github.event.inputs.benchmark_requests }} + run: | + set -euo pipefail + allowed="modkit nestjs baseline wire fx do" + framework_csv="$(printf '%s' "$INPUT_FRAMEWORKS" | tr -d '[:space:]')" + if [[ -z "$framework_csv" ]]; then + echo "frameworks input must not be empty" >&2 + exit 1 + fi + + IFS=',' read -r -a raw_frameworks <<< "$framework_csv" + if [[ ${#raw_frameworks[@]} -eq 0 || ${#raw_frameworks[@]} -gt 6 ]]; then + echo "frameworks input must contain 1-6 entries" >&2 + exit 1 + fi + + normalized=() + for framework in "${raw_frameworks[@]}"; do + case " $allowed " in + *" $framework "*) normalized+=("$framework") ;; + *) + echo "unsupported framework: $framework" >&2 + exit 1 + ;; + esac + done + + if ! [[ "$INPUT_RUNS" =~ ^[0-9]+$ ]]; then + echo "runs must be an integer" >&2 + exit 1 + fi + if ! [[ "$INPUT_BENCHMARK_REQUESTS" =~ ^[0-9]+$ ]]; then + echo "benchmark_requests must be an integer" >&2 + exit 1 + fi + + BENCH_RUNS="$INPUT_RUNS" + BENCH_REQUESTS="$INPUT_BENCHMARK_REQUESTS" + + if (( BENCH_RUNS < 1 || BENCH_RUNS > 10 )); then + echo "runs must be between 1 and 10" >&2 + exit 1 + fi + if (( BENCH_REQUESTS < 50 || BENCH_REQUESTS > 1000 )); then + echo "benchmark_requests must be between 50 and 1000" >&2 + exit 1 + fi + + { + echo "frameworks=$(IFS=,; echo "${normalized[*]}")" + echo "bench_runs=$BENCH_RUNS" + echo "bench_requests=$BENCH_REQUESTS" + } >> "$GITHUB_OUTPUT" + - name: Run bounded benchmarks + shell: bash + env: + BENCH_ENGINE: hyperfine + run: | + set -euo pipefail + python3 scripts/environment-manifest.py collect-fingerprint --out results/latest/environment.fingerprint.json + IFS=',' read -r -a frameworks <<< "${{ steps.normalize.outputs.frameworks }}" + for framework in "${frameworks[@]}"; do + BENCHMARK_METADATA_MANAGED=1 \ + BENCHMARK_RUNS="${{ steps.normalize.outputs.bench_runs }}" \ + BENCHMARK_REQUESTS="${{ steps.normalize.outputs.bench_requests }}" \ + bash scripts/run-single.sh "$framework" + done + python3 scripts/validate-result-schemas.py raw-check --raw-dir results/latest/raw + python3 scripts/environment-manifest.py write-manifest --raw-dir results/latest/raw --fingerprint results/latest/environment.fingerprint.json --out results/latest/environment.manifest.json + - name: Generate report and policy checks + run: | + python3 scripts/generate-report.py + make benchmark-schema-validate + make ci-benchmark-quality-check + - name: Upload manual benchmark artifacts + uses: actions/upload-artifact@v4 + with: + name: benchmark-manual-results + path: | + results/latest/raw + results/latest/summary.json + results/latest/report.md + results/latest/benchmark-quality-summary.json + results/latest/environment.fingerprint.json + results/latest/environment.manifest.json + retention-days: 14 diff --git a/Makefile b/Makefile index 09f6484..4584c5b 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ GOPATH ?= $(shell $(GO) env GOPATH) GO_PATCH_COVER ?= $(GOPATH)/bin/go-patch-cover MODULES = $(shell find . -type f -name "go.mod" -not -path "*/.*/*" -not -path "*/vendor/*" -exec dirname {} \;) -.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check workflow-budget-check +.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check workflow-budget-check workflow-inputs-check benchmark: bash scripts/run-all.sh @@ -105,3 +105,6 @@ workflow-concurrency-check: workflow-budget-check: $(PYTHON) scripts/workflow-policy-check.py budget-check + +workflow-inputs-check: + $(PYTHON) scripts/workflow-policy-check.py inputs-check diff --git a/docs/guides/benchmark-workflow.md b/docs/guides/benchmark-workflow.md index cfd3d05..2519eb6 100644 --- a/docs/guides/benchmark-workflow.md +++ b/docs/guides/benchmark-workflow.md @@ -25,6 +25,16 @@ make benchmark-nestjs Per-target runs also emit `results/latest/environment.fingerprint.json` and `results/latest/environment.manifest.json`. +## Manual bounded CI run + +Use GitHub Actions workflow `benchmark-manual` with bounded `workflow_dispatch` inputs: + +- `frameworks`: comma-separated subset of `modkit,nestjs,baseline,wire,fx,do` +- `runs`: integer in range `1..10` +- `benchmark_requests`: integer in range `50..1000` + +Runs that exceed bounds are rejected before benchmark execution. + Optional OSS measurement engine: ```bash diff --git a/scripts/workflow-policy-check.py b/scripts/workflow-policy-check.py index a1b2d44..39c820a 100644 --- a/scripts/workflow-policy-check.py +++ b/scripts/workflow-policy-check.py @@ -7,6 +7,7 @@ ROOT = Path(__file__).resolve().parent.parent CI_WORKFLOW = ROOT / ".github" / "workflows" / "ci.yml" +MANUAL_WORKFLOW = ROOT / ".github" / "workflows" / "benchmark-manual.yml" def read_text(path: Path) -> str: @@ -29,8 +30,18 @@ def check_concurrency() -> None: ) assert_contains( text, - " scripts:\n name: Script smoke tests (skipped targets expected)\n runs-on: ubuntu-latest\n concurrency:\n group: benchmark-smoke-${{ github.workflow }}-${{ github.ref }}\n cancel-in-progress: true", - "workflow-concurrency-check failed: scripts job must define benchmark concurrency with cancel-in-progress=true", + " scripts:\n name: Script smoke tests (skipped targets expected)", + "workflow-concurrency-check failed: scripts benchmark smoke job is missing", + ) + assert_contains( + text, + " group: benchmark-smoke-${{ github.workflow }}-${{ github.ref }}", + "workflow-concurrency-check failed: scripts job benchmark concurrency group is missing", + ) + assert_contains( + text, + " cancel-in-progress: true", + "workflow-concurrency-check failed: scripts job cancel-in-progress=true is required", ) print("workflow-concurrency-check: validated workflow and benchmark job concurrency controls") @@ -50,15 +61,47 @@ def check_budget() -> None: print("workflow-budget-check: validated timeout budget and artifact retention policy") +def check_inputs() -> None: + text = read_text(MANUAL_WORKFLOW) + assert_contains( + text, + "workflow_dispatch:", + "workflow-inputs-check failed: benchmark-manual workflow_dispatch is required", + ) + for key in ("frameworks", "runs", "benchmark_requests"): + assert_contains( + text, + f" {key}:", + f"workflow-inputs-check failed: missing workflow_dispatch input '{key}'", + ) + for token in ( + "INPUT_FRAMEWORKS", + "INPUT_RUNS", + "INPUT_BENCHMARK_REQUESTS", + "BENCH_RUNS", + "BENCH_REQUESTS", + "runs must be between 1 and 10", + "benchmark_requests must be between 50 and 1000", + ): + assert_contains( + text, + token, + f"workflow-inputs-check failed: missing bounded input token '{token}'", + ) + print("workflow-inputs-check: validated bounded manual workflow inputs") + + def main() -> None: parser = argparse.ArgumentParser(description="Validate benchmark workflow safety policies") - parser.add_argument("cmd", choices=["concurrency-check", "budget-check"]) + parser.add_argument("cmd", choices=["concurrency-check", "budget-check", "inputs-check"]) args = parser.parse_args() if args.cmd == "concurrency-check": check_concurrency() elif args.cmd == "budget-check": check_budget() + elif args.cmd == "inputs-check": + check_inputs() if __name__ == "__main__": From 91cc6601181d650dcfa600463a9ed344a102a161 Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 20:36:52 +0200 Subject: [PATCH 4/5] fix(ci): tighten manual benchmark input and retention assertions --- .github/workflows/benchmark-manual.yml | 10 +++++++++- scripts/workflow-policy-check.py | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmark-manual.yml b/.github/workflows/benchmark-manual.yml index 1037be8..1b7a5e0 100644 --- a/.github/workflows/benchmark-manual.yml +++ b/.github/workflows/benchmark-manual.yml @@ -62,9 +62,17 @@ jobs: fi normalized=() + seen_frameworks="," for framework in "${raw_frameworks[@]}"; do case " $allowed " in - *" $framework "*) normalized+=("$framework") ;; + *" $framework "*) + if [[ "$seen_frameworks" == *",$framework,"* ]]; then + echo "duplicate framework not allowed: $framework" >&2 + exit 1 + fi + normalized+=("$framework") + seen_frameworks+="$framework," + ;; *) echo "unsupported framework: $framework" >&2 exit 1 diff --git a/scripts/workflow-policy-check.py b/scripts/workflow-policy-check.py index 39c820a..bc652c0 100644 --- a/scripts/workflow-policy-check.py +++ b/scripts/workflow-policy-check.py @@ -55,7 +55,7 @@ def check_budget() -> None: ) assert_contains( text, - " retention-days: 14", + " - name: Upload benchmark quality summary\n uses: actions/upload-artifact@v4\n with:\n name: benchmark-quality-summary\n path: results/latest/benchmark-quality-summary.json\n retention-days: 14", "workflow-budget-check failed: benchmark-quality-summary artifact retention-days must be set", ) print("workflow-budget-check: validated timeout budget and artifact retention policy") @@ -82,6 +82,7 @@ def check_inputs() -> None: "BENCH_REQUESTS", "runs must be between 1 and 10", "benchmark_requests must be between 50 and 1000", + "duplicate framework not allowed", ): assert_contains( text, From 70b5f5401d82157d1a6afa504f6de1f104126adb Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 20:42:24 +0200 Subject: [PATCH 5/5] docs: document manual benchmark workflow bounds --- METHODOLOGY.md | 1 + README.md | 3 +++ 2 files changed, 4 insertions(+) diff --git a/METHODOLOGY.md b/METHODOLOGY.md index 77df593..3efddca 100644 --- a/METHODOLOGY.md +++ b/METHODOLOGY.md @@ -36,6 +36,7 @@ - thresholds and required metrics are defined in `stats-policy.yaml` - `make ci-benchmark-quality-check` enforces policy locally and in CI - benchstat comparisons are evaluated against policy baseline framework (`baseline` by default) +- manual CI benchmark runs use bounded workflow inputs (`frameworks` subset, `runs` 1..10, `benchmark_requests` 50..1000) ## Reporting diff --git a/README.md b/README.md index d407be4..b5d1201 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,9 @@ make ci-benchmark-quality-check Benchmark/report flow enforces schema validation for raw and summary artifacts before quality gates. +Manual bounded benchmark workflow is available in GitHub Actions as `benchmark-manual`. +See `docs/guides/benchmark-workflow.md` for input bounds and execution details. + Use OSS measurement engine (optional): ```bash