go-modkit · aryeko · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026 · Feb 7, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -11,6 +11,21 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
+  codeql:
+    name: CodeQL analysis
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+    steps:
+      - uses: actions/checkout@v5
+      - uses: github/codeql-action/init@v4
+        with:
+          languages: go
+      - uses: github/codeql-action/autobuild@v4
+      - uses: github/codeql-action/analyze@v4
+
   pr-title:
     name: Validate PR title
     if: github.event_name == 'pull_request'
@@ -62,6 +77,12 @@ jobs:
         run: make benchmark-schema-validate
       - name: Run statistical quality gate
         run: make ci-benchmark-quality-check
+      - name: Validate publication disclaimer policy
+        run: make report-disclaimer-check
+      - name: Validate methodology changelog policy
+        run: make methodology-changelog-check
+      - name: Validate README/report publication sync
+        run: make publication-sync-check
       - name: Upload benchmark quality summary
         uses: actions/upload-artifact@v4
         with:

diff --git a/METHODOLOGY.md b/METHODOLOGY.md
@@ -46,6 +46,28 @@
 - quality summary: `results/latest/benchmark-quality-summary.json`
 - optional tool artifacts: `results/latest/tooling/benchstat/*.txt`
 
+## Methodology changelog policy
+
+### Update rules
+
+- update this changelog whenever benchmark process, tooling, schema, thresholds, runtime constraints, or interpretation rules change
+- classify each entry as `comparability-impacting` or `non-comparability-impacting`
+- for `comparability-impacting` changes, include migration notes and baseline reset guidance
+- do not publish new benchmark claims without a corresponding changelog entry when methodology or version changed
+
+### Entry format
+
+Use one row per change with required fields:
+
+`version | date (UTC) | change_type | summary | comparability_impact | required_action`
+
+### Changelog
+
+| version | date (UTC) | change_type | summary | comparability_impact | required_action |
+|---|---|---|---|---|---|
+| 1.1.0 | 2026-02-07 | policy | Added publication fairness disclaimer template and README/report sync policy checks | comparability-impacting | Rebaseline external comparisons and reference this version in publication notes |
+| 1.0.0 | 2026-02-05 | baseline | Established parity-gated benchmark workflow, schema validation, and quality gates | comparability-impacting | Treat pre-1.0 outputs as non-comparable to current policy |
+
 ## Interpretation guidance
 
 - treat parity failures as correctness blockers, not performance regressions

diff --git a/Makefile b/Makefile
@@ -5,7 +5,7 @@ GOPATH ?= $(shell $(GO) env GOPATH)
 GO_PATCH_COVER ?= $(GOPATH)/bin/go-patch-cover
 MODULES = $(shell find . -type f -name "go.mod" -not -path "*/.*/*" -not -path "*/vendor/*" -exec dirname {} \;)
 
-.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check workflow-budget-check workflow-inputs-check
+.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check workflow-budget-check workflow-inputs-check report-disclaimer-check methodology-changelog-check publication-sync-check
 
 benchmark:
 	bash scripts/run-all.sh
@@ -100,6 +100,15 @@ benchmark-benchstat-check:
 ci-benchmark-quality-check:
 	$(PYTHON) scripts/benchmark-quality-check.py ci-check
 
+report-disclaimer-check:
+	$(PYTHON) scripts/publication-policy-check.py report-disclaimer-check
+
+methodology-changelog-check:
+	$(PYTHON) scripts/publication-policy-check.py methodology-changelog-check
+
+publication-sync-check:
+	$(PYTHON) scripts/publication-policy-check.py publication-sync-check
+
 workflow-concurrency-check:
 	$(PYTHON) scripts/workflow-policy-check.py concurrency-check
 

diff --git a/README.md b/README.md
@@ -62,6 +62,18 @@ benchmarks/
 - fixture contract is source-of-truth for expected API behavior
 - matcher changes require fixture updates and design doc updates
 
+## Publication policy
+
+- latest-results source of truth: `results/latest/summary.json` and `results/latest/report.md`
+- report and summary are generated from `results/latest/raw/*.json` via `python3 scripts/generate-report.py`
+- README must not publish standalone benchmark numbers; publication references must point to generated artifacts
+
+### Fairness disclaimer (publication-wide)
+
+- Language-vs-framework caveat: cross-language comparisons include runtime/ecosystem effects and are not framework-only deltas
+- Cross-language interpretation must be treated as directional evidence, not absolute winner claims
+- Parity failures invalidate performance interpretation until correctness is restored
+
 ## Documentation
 
 - `docs/design/002-api-parity-contract.md` - parity contract rationale

diff --git a/docs/guides/benchmark-workflow.md b/docs/guides/benchmark-workflow.md
@@ -78,6 +78,9 @@ make benchmark-stats-check
 make benchmark-variance-check
 make benchmark-benchstat-check
 make ci-benchmark-quality-check
+make report-disclaimer-check
+make methodology-changelog-check
+make publication-sync-check
 ```
 
 Quality thresholds and required metrics are versioned in `stats-policy.yaml`.

diff --git a/scripts/generate-report.py b/scripts/generate-report.py
@@ -113,6 +113,17 @@ def write_report(summary):
 
     lines.extend(
         [
+            "",
+            "## Fairness Disclaimer",
+            "",
+            "- Language-vs-framework caveat: cross-language results include runtime and ecosystem effects and must not be treated as framework-only deltas.",
+            "- Cross-language baseline: compare implementations with equivalent API behavior, workload profile, and environment constraints before drawing conclusions.",
+            "",
+            "## Anti-Misinterpretation Guidance",
+            "",
+            "- Do not rank frameworks across languages as absolute winners; use results as scenario-specific signals.",
+            "- Treat large cross-language deltas as prompts for deeper profiling (runtime, I/O, GC, and dependency effects), not as standalone product claims.",
+            "- Parity failures invalidate performance interpretation until correctness is restored.",
             "",
             "## Raw Artifacts",
             "",

diff --git a/scripts/publication-policy-check.py b/scripts/publication-policy-check.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+import sys
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent.parent
+REPORT = ROOT / "results" / "latest" / "report.md"
+REPORT_GENERATOR = ROOT / "scripts" / "generate-report.py"
+METHODOLOGY = ROOT / "METHODOLOGY.md"
+README = ROOT / "README.md"
+
+
+def report_content() -> str:
+    if REPORT.exists():
+        return REPORT.read_text(encoding="utf-8")
+    return ""
+
+
+def generator_template_content() -> str:
+    if not REPORT_GENERATOR.exists():
+        raise SystemExit(f"report-disclaimer-check failed: missing generator at {REPORT_GENERATOR}")
+    return REPORT_GENERATOR.read_text(encoding="utf-8")
+
+
+def disclaimer_check() -> None:
+    content = report_content()
+    template = generator_template_content()
+    required = [
+        "## Fairness Disclaimer",
+        "Language-vs-framework caveat",
+        "## Anti-Misinterpretation Guidance",
+        "cross-language",
+    ]
+    for token in required:
+        if token not in template:
+            raise SystemExit(f"report-disclaimer-check failed: missing '{token}' in scripts/generate-report.py")
+        if REPORT.exists() and token not in content:
+            raise SystemExit(f"report-disclaimer-check failed: missing '{token}' in results/latest/report.md")
+    source = "report + generator" if REPORT.exists() else "generator template"
+    print(f"report-disclaimer-check: validated disclaimer sections via {source}")
+
+
+def changelog_check() -> None:
+    if not METHODOLOGY.exists():
+        raise SystemExit(f"methodology-changelog-check failed: missing {METHODOLOGY}")
+
+    content = METHODOLOGY.read_text(encoding="utf-8")
+    required = [
+        "## Methodology changelog policy",
+        "### Update rules",
+        "### Entry format",
+        "### Changelog",
+        "comparability-impacting",
+        "| version | date (UTC) | change_type | summary | comparability_impact | required_action |",
+    ]
+    for token in required:
+        if token not in content:
+            raise SystemExit(f"methodology-changelog-check failed: missing '{token}' in METHODOLOGY.md")
+
+    changelog_rows = [
+        line
+        for line in content.splitlines()
+        if line.startswith("|") and "comparability-impacting" in line
+    ]
+    if not changelog_rows:
+        raise SystemExit(
+            "methodology-changelog-check failed: changelog requires at least one comparability-impacting entry"
+        )
+    print("methodology-changelog-check: validated changelog policy and comparability entries")
+
+
+def publication_sync_check() -> None:
+    if not README.exists():
+        raise SystemExit(f"publication-sync-check failed: missing {README}")
+
+    readme = README.read_text(encoding="utf-8")
+    report = report_content()
+    template = generator_template_content()
+    readme_folded = readme.casefold()
+    template_folded = template.casefold()
+    report_folded = report.casefold()
+
+    readme_required = [
+        "## Publication policy",
+        "latest-results source of truth: `results/latest/summary.json` and `results/latest/report.md`",
+        "README must not publish standalone benchmark numbers",
+    ]
+    for token in readme_required:
+        if token not in readme:
+            raise SystemExit(f"publication-sync-check failed: missing '{token}' in README.md")
+
+    shared_caveats = [
+        "Language-vs-framework caveat",
+        "cross-language",
+        "Parity failures invalidate performance interpretation",
+    ]
+    for token in shared_caveats:
+        token_folded = token.casefold()
+        if token_folded not in readme_folded:
+            raise SystemExit(f"publication-sync-check failed: missing caveat '{token}' in README.md")
+        if token_folded not in template_folded:
+            raise SystemExit(f"publication-sync-check failed: missing caveat '{token}' in scripts/generate-report.py")
+        if REPORT.exists() and token_folded not in report_folded:
+            raise SystemExit(f"publication-sync-check failed: missing caveat '{token}' in results/latest/report.md")
+
+    report_source = "report + generator" if REPORT.exists() else "generator template"
+    print(f"publication-sync-check: validated README/report caveat sync via {report_source}")
+
+
+def main() -> None:
+    command = sys.argv[1] if len(sys.argv) > 1 else "report-disclaimer-check"
+    if command == "report-disclaimer-check":
+        disclaimer_check()
+        return
+    if command == "methodology-changelog-check":
+        changelog_check()
+        return
+    if command == "publication-sync-check":
+        publication_sync_check()
+        return
+    raise SystemExit(
+        "usage: publication-policy-check.py [report-disclaimer-check|methodology-changelog-check|publication-sync-check]"
+    )
+
+
+if __name__ == "__main__":
+    main()