From 1600524cb8fc7f8c80382fb71bbc98031359eaa4 Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 20:56:07 +0200 Subject: [PATCH 1/4] feat(issue-23): add fairness disclaimer template and validator --- Makefile | 5 +++- scripts/generate-report.py | 11 +++++++ scripts/publication-policy-check.py | 45 +++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 scripts/publication-policy-check.py diff --git a/Makefile b/Makefile index 4584c5b..c0665b2 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ GOPATH ?= $(shell $(GO) env GOPATH) GO_PATCH_COVER ?= $(GOPATH)/bin/go-patch-cover MODULES = $(shell find . -type f -name "go.mod" -not -path "*/.*/*" -not -path "*/vendor/*" -exec dirname {} \;) -.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check workflow-budget-check workflow-inputs-check +.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check workflow-budget-check workflow-inputs-check report-disclaimer-check benchmark: bash scripts/run-all.sh @@ -100,6 +100,9 @@ benchmark-benchstat-check: ci-benchmark-quality-check: $(PYTHON) scripts/benchmark-quality-check.py ci-check +report-disclaimer-check: + $(PYTHON) scripts/publication-policy-check.py + workflow-concurrency-check: $(PYTHON) scripts/workflow-policy-check.py concurrency-check diff --git a/scripts/generate-report.py b/scripts/generate-report.py index faeb914..f792082 100755 --- a/scripts/generate-report.py +++ b/scripts/generate-report.py @@ -113,6 +113,17 @@ def write_report(summary): lines.extend( [ + "", + "## Fairness Disclaimer", + "", + "- Language-vs-framework caveat: cross-language results include runtime and ecosystem effects and must not be treated as framework-only deltas.", + "- Cross-language baseline: compare implementations with equivalent API behavior, workload profile, and environment constraints before drawing conclusions.", + "", + "## Anti-Misinterpretation Guidance", + "", + "- Do not rank frameworks across languages as absolute winners; use results as scenario-specific signals.", + "- Treat large cross-language deltas as prompts for deeper profiling (runtime, I/O, GC, and dependency effects), not as standalone product claims.", + "- Parity failures invalidate performance interpretation until correctness is restored.", "", "## Raw Artifacts", "", diff --git a/scripts/publication-policy-check.py b/scripts/publication-policy-check.py new file mode 100644 index 0000000..f06747f --- /dev/null +++ b/scripts/publication-policy-check.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +from pathlib import Path + + +ROOT = Path(__file__).resolve().parent.parent +REPORT = ROOT / "results" / "latest" / "report.md" +REPORT_GENERATOR = ROOT / "scripts" / "generate-report.py" + + +def report_content() -> str: + if REPORT.exists(): + return REPORT.read_text(encoding="utf-8") + return "" + + +def generator_template_content() -> str: + if not REPORT_GENERATOR.exists(): + raise SystemExit(f"report-disclaimer-check failed: missing generator at {REPORT_GENERATOR}") + return REPORT_GENERATOR.read_text(encoding="utf-8") + + +def disclaimer_check() -> None: + content = report_content() + template = generator_template_content() + required = [ + "## Fairness Disclaimer", + "Language-vs-framework caveat", + "## Anti-Misinterpretation Guidance", + "cross-language", + ] + for token in required: + if token not in template: + raise SystemExit(f"report-disclaimer-check failed: missing '{token}' in scripts/generate-report.py") + if REPORT.exists() and token not in content: + raise SystemExit(f"report-disclaimer-check failed: missing '{token}' in results/latest/report.md") + source = "report + generator" if REPORT.exists() else "generator template" + print(f"report-disclaimer-check: validated disclaimer sections via {source}") + + +def main() -> None: + disclaimer_check() + + +if __name__ == "__main__": + main() From fa996eccb604d558a9c217a3950f16f30e05df4a Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 20:57:07 +0200 Subject: [PATCH 2/4] feat(issue-24): add methodology changelog policy and check --- METHODOLOGY.md | 22 +++++++++++++++ Makefile | 7 +++-- scripts/publication-policy-check.py | 42 ++++++++++++++++++++++++++++- 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/METHODOLOGY.md b/METHODOLOGY.md index 3efddca..4932a7f 100644 --- a/METHODOLOGY.md +++ b/METHODOLOGY.md @@ -46,6 +46,28 @@ - quality summary: `results/latest/benchmark-quality-summary.json` - optional tool artifacts: `results/latest/tooling/benchstat/*.txt` +## Methodology changelog policy + +### Update rules + +- update this changelog whenever benchmark process, tooling, schema, thresholds, runtime constraints, or interpretation rules change +- classify each entry as `comparability-impacting` or `non-comparability-impacting` +- for `comparability-impacting` changes, include migration notes and baseline reset guidance +- do not publish new benchmark claims without a corresponding changelog entry when methodology or version changed + +### Entry format + +Use one row per change with required fields: + +`version | date (UTC) | change_type | summary | comparability_impact | required_action` + +### Changelog + +| version | date (UTC) | change_type | summary | comparability_impact | required_action | +|---|---|---|---|---|---| +| 1.1.0 | 2026-02-07 | policy | Added publication fairness disclaimer template and README/report sync policy checks | comparability-impacting | Rebaseline external comparisons and reference this version in publication notes | +| 1.0.0 | 2026-02-05 | baseline | Established parity-gated benchmark workflow, schema validation, and quality gates | comparability-impacting | Treat pre-1.0 outputs as non-comparable to current policy | + ## Interpretation guidance - treat parity failures as correctness blockers, not performance regressions diff --git a/Makefile b/Makefile index c0665b2..fb4ffe2 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ GOPATH ?= $(shell $(GO) env GOPATH) GO_PATCH_COVER ?= $(GOPATH)/bin/go-patch-cover MODULES = $(shell find . -type f -name "go.mod" -not -path "*/.*/*" -not -path "*/vendor/*" -exec dirname {} \;) -.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check workflow-budget-check workflow-inputs-check report-disclaimer-check +.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check workflow-budget-check workflow-inputs-check report-disclaimer-check methodology-changelog-check benchmark: bash scripts/run-all.sh @@ -101,7 +101,10 @@ ci-benchmark-quality-check: $(PYTHON) scripts/benchmark-quality-check.py ci-check report-disclaimer-check: - $(PYTHON) scripts/publication-policy-check.py + $(PYTHON) scripts/publication-policy-check.py report-disclaimer-check + +methodology-changelog-check: + $(PYTHON) scripts/publication-policy-check.py methodology-changelog-check workflow-concurrency-check: $(PYTHON) scripts/workflow-policy-check.py concurrency-check diff --git a/scripts/publication-policy-check.py b/scripts/publication-policy-check.py index f06747f..e396668 100644 --- a/scripts/publication-policy-check.py +++ b/scripts/publication-policy-check.py @@ -1,10 +1,12 @@ #!/usr/bin/env python3 +import sys from pathlib import Path ROOT = Path(__file__).resolve().parent.parent REPORT = ROOT / "results" / "latest" / "report.md" REPORT_GENERATOR = ROOT / "scripts" / "generate-report.py" +METHODOLOGY = ROOT / "METHODOLOGY.md" def report_content() -> str: @@ -37,8 +39,46 @@ def disclaimer_check() -> None: print(f"report-disclaimer-check: validated disclaimer sections via {source}") +def changelog_check() -> None: + if not METHODOLOGY.exists(): + raise SystemExit(f"methodology-changelog-check failed: missing {METHODOLOGY}") + + content = METHODOLOGY.read_text(encoding="utf-8") + required = [ + "## Methodology changelog policy", + "### Update rules", + "### Entry format", + "### Changelog", + "comparability-impacting", + "| version | date (UTC) | change_type | summary | comparability_impact | required_action |", + ] + for token in required: + if token not in content: + raise SystemExit(f"methodology-changelog-check failed: missing '{token}' in METHODOLOGY.md") + + changelog_rows = [ + line + for line in content.splitlines() + if line.startswith("|") and "comparability-impacting" in line + ] + if not changelog_rows: + raise SystemExit( + "methodology-changelog-check failed: changelog requires at least one comparability-impacting entry" + ) + print("methodology-changelog-check: validated changelog policy and comparability entries") + + def main() -> None: - disclaimer_check() + command = sys.argv[1] if len(sys.argv) > 1 else "report-disclaimer-check" + if command == "report-disclaimer-check": + disclaimer_check() + return + if command == "methodology-changelog-check": + changelog_check() + return + raise SystemExit( + "usage: publication-policy-check.py [report-disclaimer-check|methodology-changelog-check]" + ) if __name__ == "__main__": From adf4d459680dff0d9600aaba44c9a852e7b41f5f Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 20:58:30 +0200 Subject: [PATCH 3/4] feat(issue-25): enforce README and report publication sync checks --- .github/workflows/ci.yml | 6 +++++ Makefile | 5 +++- README.md | 12 +++++++++ docs/guides/benchmark-workflow.md | 3 +++ scripts/publication-policy-check.py | 40 ++++++++++++++++++++++++++++- 5 files changed, 64 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fd37a70..2d500d7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -62,6 +62,12 @@ jobs: run: make benchmark-schema-validate - name: Run statistical quality gate run: make ci-benchmark-quality-check + - name: Validate publication disclaimer policy + run: make report-disclaimer-check + - name: Validate methodology changelog policy + run: make methodology-changelog-check + - name: Validate README/report publication sync + run: make publication-sync-check - name: Upload benchmark quality summary uses: actions/upload-artifact@v4 with: diff --git a/Makefile b/Makefile index fb4ffe2..94c0571 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ GOPATH ?= $(shell $(GO) env GOPATH) GO_PATCH_COVER ?= $(GOPATH)/bin/go-patch-cover MODULES = $(shell find . -type f -name "go.mod" -not -path "*/.*/*" -not -path "*/vendor/*" -exec dirname {} \;) -.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check workflow-budget-check workflow-inputs-check report-disclaimer-check methodology-changelog-check +.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check workflow-budget-check workflow-inputs-check report-disclaimer-check methodology-changelog-check publication-sync-check benchmark: bash scripts/run-all.sh @@ -106,6 +106,9 @@ report-disclaimer-check: methodology-changelog-check: $(PYTHON) scripts/publication-policy-check.py methodology-changelog-check +publication-sync-check: + $(PYTHON) scripts/publication-policy-check.py publication-sync-check + workflow-concurrency-check: $(PYTHON) scripts/workflow-policy-check.py concurrency-check diff --git a/README.md b/README.md index b5d1201..b50d7c6 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,18 @@ benchmarks/ - fixture contract is source-of-truth for expected API behavior - matcher changes require fixture updates and design doc updates +## Publication policy + +- latest-results source of truth: `results/latest/summary.json` and `results/latest/report.md` +- report and summary are generated from `results/latest/raw/*.json` via `python3 scripts/generate-report.py` +- README must not publish standalone benchmark numbers; publication references must point to generated artifacts + +### Fairness disclaimer (publication-wide) + +- Language-vs-framework caveat: cross-language comparisons include runtime/ecosystem effects and are not framework-only deltas +- Cross-language interpretation must be treated as directional evidence, not absolute winner claims +- Parity failures invalidate performance interpretation until correctness is restored + ## Documentation - `docs/design/002-api-parity-contract.md` - parity contract rationale diff --git a/docs/guides/benchmark-workflow.md b/docs/guides/benchmark-workflow.md index 2519eb6..9f050e7 100644 --- a/docs/guides/benchmark-workflow.md +++ b/docs/guides/benchmark-workflow.md @@ -78,6 +78,9 @@ make benchmark-stats-check make benchmark-variance-check make benchmark-benchstat-check make ci-benchmark-quality-check +make report-disclaimer-check +make methodology-changelog-check +make publication-sync-check ``` Quality thresholds and required metrics are versioned in `stats-policy.yaml`. diff --git a/scripts/publication-policy-check.py b/scripts/publication-policy-check.py index e396668..73c31a1 100644 --- a/scripts/publication-policy-check.py +++ b/scripts/publication-policy-check.py @@ -7,6 +7,7 @@ REPORT = ROOT / "results" / "latest" / "report.md" REPORT_GENERATOR = ROOT / "scripts" / "generate-report.py" METHODOLOGY = ROOT / "METHODOLOGY.md" +README = ROOT / "README.md" def report_content() -> str: @@ -68,6 +69,40 @@ def changelog_check() -> None: print("methodology-changelog-check: validated changelog policy and comparability entries") +def publication_sync_check() -> None: + if not README.exists(): + raise SystemExit(f"publication-sync-check failed: missing {README}") + + readme = README.read_text(encoding="utf-8") + report = report_content() + template = generator_template_content() + + readme_required = [ + "## Publication policy", + "latest-results source of truth: `results/latest/summary.json` and `results/latest/report.md`", + "README must not publish standalone benchmark numbers", + ] + for token in readme_required: + if token not in readme: + raise SystemExit(f"publication-sync-check failed: missing '{token}' in README.md") + + shared_caveats = [ + "Language-vs-framework caveat", + "cross-language", + "Parity failures invalidate performance interpretation", + ] + for token in shared_caveats: + if token not in readme: + raise SystemExit(f"publication-sync-check failed: missing caveat '{token}' in README.md") + if token not in template: + raise SystemExit(f"publication-sync-check failed: missing caveat '{token}' in scripts/generate-report.py") + if REPORT.exists() and token not in report: + raise SystemExit(f"publication-sync-check failed: missing caveat '{token}' in results/latest/report.md") + + report_source = "report + generator" if REPORT.exists() else "generator template" + print(f"publication-sync-check: validated README/report caveat sync via {report_source}") + + def main() -> None: command = sys.argv[1] if len(sys.argv) > 1 else "report-disclaimer-check" if command == "report-disclaimer-check": @@ -76,8 +111,11 @@ def main() -> None: if command == "methodology-changelog-check": changelog_check() return + if command == "publication-sync-check": + publication_sync_check() + return raise SystemExit( - "usage: publication-policy-check.py [report-disclaimer-check|methodology-changelog-check]" + "usage: publication-policy-check.py [report-disclaimer-check|methodology-changelog-check|publication-sync-check]" ) From ec8a091478798b8ca0b2d0c56e993968bb74c686 Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 21:13:50 +0200 Subject: [PATCH 4/4] fix(pr-32): address review findings for policy checks --- .github/workflows/ci.yml | 15 +++++++++++++++ scripts/publication-policy-check.py | 10 +++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2d500d7..dfa8504 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,6 +11,21 @@ concurrency: cancel-in-progress: true jobs: + codeql: + name: CodeQL analysis + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + steps: + - uses: actions/checkout@v5 + - uses: github/codeql-action/init@v4 + with: + languages: go + - uses: github/codeql-action/autobuild@v4 + - uses: github/codeql-action/analyze@v4 + pr-title: name: Validate PR title if: github.event_name == 'pull_request' diff --git a/scripts/publication-policy-check.py b/scripts/publication-policy-check.py index 73c31a1..a95584b 100644 --- a/scripts/publication-policy-check.py +++ b/scripts/publication-policy-check.py @@ -76,6 +76,9 @@ def publication_sync_check() -> None: readme = README.read_text(encoding="utf-8") report = report_content() template = generator_template_content() + readme_folded = readme.casefold() + template_folded = template.casefold() + report_folded = report.casefold() readme_required = [ "## Publication policy", @@ -92,11 +95,12 @@ def publication_sync_check() -> None: "Parity failures invalidate performance interpretation", ] for token in shared_caveats: - if token not in readme: + token_folded = token.casefold() + if token_folded not in readme_folded: raise SystemExit(f"publication-sync-check failed: missing caveat '{token}' in README.md") - if token not in template: + if token_folded not in template_folded: raise SystemExit(f"publication-sync-check failed: missing caveat '{token}' in scripts/generate-report.py") - if REPORT.exists() and token not in report: + if REPORT.exists() and token_folded not in report_folded: raise SystemExit(f"publication-sync-check failed: missing caveat '{token}' in results/latest/report.md") report_source = "report + generator" if REPORT.exists() else "generator template"