From afb30f5fe6bf04ed7c2439d947521231927640b2 Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 19:38:33 +0200 Subject: [PATCH 1/8] feat: define raw benchmark schema v1 for issue #17 --- Makefile | 5 +- schemas/benchmark-raw-v1.schema.json | 94 ++++++++++++++++++++++++++++ scripts/benchmark-measure.py | 2 + scripts/run-single.sh | 2 + scripts/validate-result-schemas.py | 90 ++++++++++++++++++++++++++ 5 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 schemas/benchmark-raw-v1.schema.json create mode 100644 scripts/validate-result-schemas.py diff --git a/Makefile b/Makefile index 0755546..3db5781 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check +.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check benchmark: bash scripts/run-all.sh @@ -45,6 +45,9 @@ benchmark-limits-check: benchmark-manifest-check: python3 scripts/environment-manifest.py check-manifest --file results/latest/environment.manifest.json +benchmark-raw-schema-check: + python3 scripts/validate-result-schemas.py raw-check + benchmark-stats-check: python3 scripts/benchmark-quality-check.py stats-check diff --git a/schemas/benchmark-raw-v1.schema.json b/schemas/benchmark-raw-v1.schema.json new file mode 100644 index 0000000..f28be4c --- /dev/null +++ b/schemas/benchmark-raw-v1.schema.json @@ -0,0 +1,94 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://go-modkit.dev/schemas/benchmark-raw-v1.schema.json", + "title": "Benchmark Raw Result v1", + "type": "object", + "required": [ + "schema_version", + "framework", + "target", + "status" + ], + "additionalProperties": true, + "properties": { + "schema_version": { + "const": "raw-v1" + }, + "framework": { + "type": "string", + "minLength": 1 + }, + "target": { + "type": "string", + "minLength": 1 + }, + "status": { + "type": "string", + "enum": [ + "ok", + "skipped" + ] + }, + "reason": { + "type": "string" + }, + "parity": { + "type": "string" + }, + "engine": { + "type": "string" + }, + "metric_units": { + "type": "object" + }, + "benchmark": { + "type": "object" + }, + "docker": { + "type": "object" + }, + "resources_normalized": { + "type": "object" + } + }, + "allOf": [ + { + "if": { + "properties": { + "status": { + "const": "ok" + } + }, + "required": [ + "status" + ] + }, + "then": { + "required": [ + "parity", + "engine", + "metric_units", + "benchmark", + "resources_normalized" + ] + } + }, + { + "if": { + "properties": { + "status": { + "const": "skipped" + } + }, + "required": [ + "status" + ] + }, + "then": { + "required": [ + "reason" + ] + } + } + ] +} diff --git a/scripts/benchmark-measure.py b/scripts/benchmark-measure.py index 166e815..cb23599 100644 --- a/scripts/benchmark-measure.py +++ b/scripts/benchmark-measure.py @@ -241,6 +241,7 @@ def main(): if not run_stats: payload = { + "schema_version": "raw-v1", "framework": args.framework, "target": args.target, "status": "skipped", @@ -279,6 +280,7 @@ def main(): docker_stats = collect_docker_stats(args.framework) payload = { + "schema_version": "raw-v1", "framework": args.framework, "target": args.target, "status": "ok", diff --git a/scripts/run-single.sh b/scripts/run-single.sh index 2e86241..cbfd7cf 100755 --- a/scripts/run-single.sh +++ b/scripts/run-single.sh @@ -96,6 +96,7 @@ if ! curl -fsS "$target/health" >/dev/null 2>&1; then import json, sys framework, target, out_file = sys.argv[1], sys.argv[2], sys.argv[3] payload = { + "schema_version": "raw-v1", "framework": framework, "target": target, "status": "skipped", @@ -116,6 +117,7 @@ else import json, sys framework, target, out_file = sys.argv[1], sys.argv[2], sys.argv[3] payload = { + "schema_version": "raw-v1", "framework": framework, "target": target, "status": "skipped", diff --git a/scripts/validate-result-schemas.py b/scripts/validate-result-schemas.py new file mode 100644 index 0000000..10d1116 --- /dev/null +++ b/scripts/validate-result-schemas.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +import argparse +import json +from pathlib import Path + + +ROOT = Path(__file__).resolve().parent.parent +RAW_DIR = ROOT / "results" / "latest" / "raw" +RAW_SCHEMA = ROOT / "schemas" / "benchmark-raw-v1.schema.json" + + +def load_json(path): + with path.open("r", encoding="utf-8") as handle: + return json.load(handle) + + +def validate_raw_row(path, payload, schema_version): + required = ("schema_version", "framework", "target", "status") + for field in required: + if field not in payload: + raise SystemExit(f"Raw schema validation failed for {path}: missing {field}") + + if payload.get("schema_version") != schema_version: + raise SystemExit( + f"Raw schema validation failed for {path}: schema_version={payload.get('schema_version')!r}, expected {schema_version!r}" + ) + + status = payload.get("status") + if status not in ("ok", "skipped"): + raise SystemExit(f"Raw schema validation failed for {path}: status={status!r} must be 'ok' or 'skipped'") + + if not isinstance(payload.get("framework"), str) or not payload.get("framework"): + raise SystemExit(f"Raw schema validation failed for {path}: framework must be non-empty string") + if not isinstance(payload.get("target"), str) or not payload.get("target"): + raise SystemExit(f"Raw schema validation failed for {path}: target must be non-empty string") + + if status == "skipped": + reason = payload.get("reason") + if not isinstance(reason, str) or not reason: + raise SystemExit(f"Raw schema validation failed for {path}: skipped rows require non-empty reason") + return + + for field in ("parity", "engine", "metric_units", "benchmark", "resources_normalized"): + if field not in payload: + raise SystemExit(f"Raw schema validation failed for {path}: missing {field}") + + benchmark = payload.get("benchmark") + if not isinstance(benchmark, dict): + raise SystemExit(f"Raw schema validation failed for {path}: benchmark must be object") + + for metric_field in ("run_stats", "median"): + if metric_field not in benchmark: + raise SystemExit(f"Raw schema validation failed for {path}: benchmark.{metric_field} is required") + + +def validate_raw(raw_dir, schema_path): + schema = load_json(schema_path) + schema_version = (schema.get("properties") or {}).get("schema_version", {}).get("const") + if not isinstance(schema_version, str) or not schema_version: + raise SystemExit(f"Raw schema file missing properties.schema_version.const: {schema_path}") + + files = sorted(raw_dir.glob("*.json")) + if not files: + raise SystemExit(f"No raw benchmark files found in: {raw_dir}") + + for path in files: + payload = load_json(path) + validate_raw_row(path, payload, schema_version) + + print(f"benchmark-raw-schema-check: validated {len(files)} raw artifact(s)") + + +def parse_args(): + parser = argparse.ArgumentParser(description="Validate benchmark result schemas") + parser.add_argument("cmd", choices=["raw-check"]) + parser.add_argument("--raw-dir", type=Path, default=RAW_DIR) + parser.add_argument("--raw-schema", type=Path, default=RAW_SCHEMA) + return parser.parse_args() + + +def main(): + args = parse_args() + if args.cmd == "raw-check": + validate_raw(args.raw_dir, args.raw_schema) + return + raise SystemExit(f"Unknown command: {args.cmd}") + + +if __name__ == "__main__": + main() From f773796b315119ad28b6970ab459bf6c6ceb0000 Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 19:40:09 +0200 Subject: [PATCH 2/8] feat: define summary benchmark schema v1 for issue #18 --- Makefile | 5 +- schemas/benchmark-summary-v1.schema.json | 91 ++++++++++++++++++++++++ scripts/generate-report.py | 15 +++- scripts/validate-result-schemas.py | 61 +++++++++++++++- 4 files changed, 169 insertions(+), 3 deletions(-) create mode 100644 schemas/benchmark-summary-v1.schema.json diff --git a/Makefile b/Makefile index 3db5781..515a7b6 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check +.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check benchmark: bash scripts/run-all.sh @@ -48,6 +48,9 @@ benchmark-manifest-check: benchmark-raw-schema-check: python3 scripts/validate-result-schemas.py raw-check +benchmark-summary-schema-check: + python3 scripts/validate-result-schemas.py summary-check + benchmark-stats-check: python3 scripts/benchmark-quality-check.py stats-check diff --git a/schemas/benchmark-summary-v1.schema.json b/schemas/benchmark-summary-v1.schema.json new file mode 100644 index 0000000..84a47b6 --- /dev/null +++ b/schemas/benchmark-summary-v1.schema.json @@ -0,0 +1,91 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://go-modkit.dev/schemas/benchmark-summary-v1.schema.json", + "title": "Benchmark Summary v1", + "type": "object", + "required": [ + "schema_version", + "generated_at", + "total_targets", + "successful_targets", + "skipped_targets", + "targets" + ], + "additionalProperties": true, + "properties": { + "schema_version": { + "const": "summary-v1" + }, + "generated_at": { + "type": "string", + "minLength": 1 + }, + "total_targets": { + "type": "integer", + "minimum": 0 + }, + "successful_targets": { + "type": "integer", + "minimum": 0 + }, + "skipped_targets": { + "type": "integer", + "minimum": 0 + }, + "targets": { + "type": "array", + "items": { + "type": "object", + "required": [ + "framework", + "status", + "target", + "provenance" + ], + "additionalProperties": true, + "properties": { + "framework": { + "type": "string", + "minLength": 1 + }, + "status": { + "type": "string", + "enum": [ + "ok", + "skipped" + ] + }, + "target": { + "type": "string", + "minLength": 1 + }, + "reason": { + "type": "string" + }, + "median": { + "type": "object" + }, + "uncertainty": { + "type": "object" + }, + "provenance": { + "type": "object", + "required": [ + "raw_source" + ], + "properties": { + "raw_source": { + "type": "string", + "minLength": 1 + }, + "manifest": { + "type": "string", + "minLength": 1 + } + } + } + } + } + } + } +} diff --git a/scripts/generate-report.py b/scripts/generate-report.py index 3da31ba..b1c2475 100755 --- a/scripts/generate-report.py +++ b/scripts/generate-report.py @@ -18,7 +18,9 @@ def load_raw_files(): for path in sorted(RAW_DIR.glob("*.json")): try: with path.open("r", encoding="utf-8") as f: - rows.append(json.load(f)) + payload = json.load(f) + payload["_source_file"] = path.name + rows.append(payload) except json.JSONDecodeError as exc: print(f"Warning: skipping malformed JSON {path}: {exc}") return rows @@ -27,6 +29,7 @@ def load_raw_files(): def build_summary(rows): generated_at = datetime.now(timezone.utc).isoformat() summary = { + "schema_version": "summary-v1", "generated_at": generated_at, "total_targets": len(rows), "successful_targets": sum(1 for r in rows if r.get("status") == "ok"), @@ -39,8 +42,18 @@ def build_summary(rows): "status": row.get("status"), "target": row.get("target"), "reason": row.get("reason"), + "provenance": { + "raw_source": f"results/latest/raw/{row.get('_source_file', 'unknown')}" + }, } bench = row.get("benchmark") or {} + quality = (bench.get("quality") or {}).get("variance") or {} + if quality: + target["uncertainty"] = { + "rps_cv": quality.get("rps_cv"), + "latency_ms_p95_cv": quality.get("latency_ms_p95_cv"), + "latency_ms_p99_cv": quality.get("latency_ms_p99_cv"), + } median = bench.get("median") or {} if median: target["median"] = { diff --git a/scripts/validate-result-schemas.py b/scripts/validate-result-schemas.py index 10d1116..ffa782e 100644 --- a/scripts/validate-result-schemas.py +++ b/scripts/validate-result-schemas.py @@ -7,6 +7,8 @@ ROOT = Path(__file__).resolve().parent.parent RAW_DIR = ROOT / "results" / "latest" / "raw" RAW_SCHEMA = ROOT / "schemas" / "benchmark-raw-v1.schema.json" +SUMMARY_FILE = ROOT / "results" / "latest" / "summary.json" +SUMMARY_SCHEMA = ROOT / "schemas" / "benchmark-summary-v1.schema.json" def load_json(path): @@ -70,11 +72,65 @@ def validate_raw(raw_dir, schema_path): print(f"benchmark-raw-schema-check: validated {len(files)} raw artifact(s)") +def validate_summary(summary_file, schema_path): + schema = load_json(schema_path) + schema_version = (schema.get("properties") or {}).get("schema_version", {}).get("const") + if not isinstance(schema_version, str) or not schema_version: + raise SystemExit(f"Summary schema file missing properties.schema_version.const: {schema_path}") + + if not summary_file.exists(): + raise SystemExit(f"Summary file not found: {summary_file}") + + payload = load_json(summary_file) + required = ( + "schema_version", + "generated_at", + "total_targets", + "successful_targets", + "skipped_targets", + "targets", + ) + for field in required: + if field not in payload: + raise SystemExit(f"Summary schema validation failed for {summary_file}: missing {field}") + + if payload.get("schema_version") != schema_version: + raise SystemExit( + f"Summary schema validation failed for {summary_file}: schema_version={payload.get('schema_version')!r}, expected {schema_version!r}" + ) + + targets = payload.get("targets") + if not isinstance(targets, list): + raise SystemExit(f"Summary schema validation failed for {summary_file}: targets must be array") + + for idx, target in enumerate(targets): + if not isinstance(target, dict): + raise SystemExit(f"Summary schema validation failed for {summary_file}: targets[{idx}] must be object") + for field in ("framework", "status", "target", "provenance"): + if field not in target: + raise SystemExit(f"Summary schema validation failed for {summary_file}: targets[{idx}] missing {field}") + provenance = target.get("provenance") + if not isinstance(provenance, dict) or not provenance.get("raw_source"): + raise SystemExit( + f"Summary schema validation failed for {summary_file}: targets[{idx}].provenance.raw_source is required" + ) + + status = target.get("status") + if status == "ok" and "uncertainty" not in target: + raise SystemExit( + f"Summary schema validation failed for {summary_file}: targets[{idx}] missing uncertainty for status=ok" + ) + + print("benchmark-summary-schema-check: validated summary artifact") + + def parse_args(): parser = argparse.ArgumentParser(description="Validate benchmark result schemas") - parser.add_argument("cmd", choices=["raw-check"]) + parser.add_argument("cmd", choices=["raw-check", "summary-check"]) parser.add_argument("--raw-dir", type=Path, default=RAW_DIR) parser.add_argument("--raw-schema", type=Path, default=RAW_SCHEMA) + parser.add_argument("--summary-file", type=Path, default=SUMMARY_FILE) + parser.add_argument("--summary-schema", type=Path, default=SUMMARY_SCHEMA) return parser.parse_args() @@ -83,6 +139,9 @@ def main(): if args.cmd == "raw-check": validate_raw(args.raw_dir, args.raw_schema) return + if args.cmd == "summary-check": + validate_summary(args.summary_file, args.summary_schema) + return raise SystemExit(f"Unknown command: {args.cmd}") From 04b1732745e33278e0eca49a7ceefee6bbf98573 Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 19:42:03 +0200 Subject: [PATCH 3/8] feat: enforce schema validation in scripts and CI for issue #19 --- .github/workflows/ci.yml | 2 ++ Makefile | 6 +++++- scripts/generate-report.py | 11 +++++++++++ scripts/run-all.sh | 2 ++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a1f2a95..d49713d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,6 +50,8 @@ jobs: run: bash scripts/run-all.sh - name: Generate report from raw results run: python3 scripts/generate-report.py + - name: Validate benchmark result schemas + run: make benchmark-schema-validate - name: Run statistical quality gate run: make ci-benchmark-quality-check - name: Upload benchmark quality summary diff --git a/Makefile b/Makefile index 515a7b6..8baa296 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check +.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check benchmark: bash scripts/run-all.sh @@ -51,6 +51,10 @@ benchmark-raw-schema-check: benchmark-summary-schema-check: python3 scripts/validate-result-schemas.py summary-check +benchmark-schema-validate: + $(MAKE) benchmark-raw-schema-check + $(MAKE) benchmark-summary-schema-check + benchmark-stats-check: python3 scripts/benchmark-quality-check.py stats-check diff --git a/scripts/generate-report.py b/scripts/generate-report.py index b1c2475..faeb914 100755 --- a/scripts/generate-report.py +++ b/scripts/generate-report.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 import json +import subprocess +import sys from datetime import datetime, timezone from pathlib import Path @@ -11,6 +13,13 @@ REPORT_PATH = RESULTS_LATEST / "report.md" +def run_schema_check(command): + completed = subprocess.run(command, capture_output=True, text=True, check=False) + if completed.returncode != 0: + message = completed.stderr.strip() or completed.stdout.strip() or "schema validation failed" + raise SystemExit(message) + + def load_raw_files(): if not RAW_DIR.exists(): return [] @@ -117,9 +126,11 @@ def write_report(summary): def main(): + run_schema_check([sys.executable, "scripts/validate-result-schemas.py", "raw-check"]) rows = load_raw_files() summary = build_summary(rows) write_summary(summary) + run_schema_check([sys.executable, "scripts/validate-result-schemas.py", "summary-check"]) write_report(summary) print(f"Wrote: {SUMMARY_PATH}") print(f"Wrote: {REPORT_PATH}") diff --git a/scripts/run-all.sh b/scripts/run-all.sh index 52f6e4f..8cbebf6 100755 --- a/scripts/run-all.sh +++ b/scripts/run-all.sh @@ -71,6 +71,8 @@ for framework in "${frameworks[@]}"; do BENCHMARK_METADATA_MANAGED=1 bash scripts/run-single.sh "$framework" done +python3 scripts/validate-result-schemas.py raw-check --raw-dir "$raw_dir" + python3 scripts/environment-manifest.py write-manifest --raw-dir "$raw_dir" --fingerprint "$fingerprint_file" --out "$manifest_file" echo "Raw benchmark files generated in: $raw_dir" From 17884ed8205f72be5a2f63bc58e2d25ec36bd829 Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 19:53:22 +0200 Subject: [PATCH 4/8] fix: align schema contracts and validation errors for issue #19 --- schemas/benchmark-raw-v1.schema.json | 20 +++++++++-- schemas/benchmark-summary-v1.schema.json | 43 +++++++++++++++++++++++- scripts/validate-result-schemas.py | 7 ++-- 3 files changed, 65 insertions(+), 5 deletions(-) diff --git a/schemas/benchmark-raw-v1.schema.json b/schemas/benchmark-raw-v1.schema.json index f28be4c..0c08480 100644 --- a/schemas/benchmark-raw-v1.schema.json +++ b/schemas/benchmark-raw-v1.schema.json @@ -42,7 +42,15 @@ "type": "object" }, "benchmark": { - "type": "object" + "type": "object", + "properties": { + "run_stats": { + "type": "array" + }, + "median": { + "type": "object" + } + } }, "docker": { "type": "object" @@ -70,7 +78,15 @@ "metric_units", "benchmark", "resources_normalized" - ] + ], + "properties": { + "benchmark": { + "required": [ + "run_stats", + "median" + ] + } + } } }, { diff --git a/schemas/benchmark-summary-v1.schema.json b/schemas/benchmark-summary-v1.schema.json index 84a47b6..aa98a05 100644 --- a/schemas/benchmark-summary-v1.schema.json +++ b/schemas/benchmark-summary-v1.schema.json @@ -87,5 +87,46 @@ } } } - } + }, + "allOf": [ + { + "if": { + "properties": { + "targets": { + "type": "array" + } + }, + "required": [ + "targets" + ] + }, + "then": { + "properties": { + "targets": { + "items": { + "allOf": [ + { + "if": { + "properties": { + "status": { + "const": "ok" + } + }, + "required": [ + "status" + ] + }, + "then": { + "required": [ + "uncertainty" + ] + } + } + ] + } + } + } + } + } + ] } diff --git a/scripts/validate-result-schemas.py b/scripts/validate-result-schemas.py index ffa782e..66b3dfc 100644 --- a/scripts/validate-result-schemas.py +++ b/scripts/validate-result-schemas.py @@ -12,8 +12,11 @@ def load_json(path): - with path.open("r", encoding="utf-8") as handle: - return json.load(handle) + try: + with path.open("r", encoding="utf-8") as handle: + return json.load(handle) + except json.JSONDecodeError as exc: + raise SystemExit(f"Malformed JSON in {path}: {exc.msg} at line {exc.lineno}, column {exc.colno}") from exc def validate_raw_row(path, payload, schema_version): From d0c1854cf2a9da741c21882945a84c84bc7d20c7 Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 20:02:26 +0200 Subject: [PATCH 5/8] chore: normalize make shell and tool command variables --- Makefile | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 8baa296..67dd288 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,7 @@ +SHELL := /bin/sh +PYTHON ?= python3 +GO ?= go + .PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check benchmark: @@ -22,10 +26,10 @@ benchmark-do: bash scripts/run-single.sh do report: - python3 scripts/generate-report.py + $(PYTHON) scripts/generate-report.py test: - go test ./... + $(GO) test ./... parity-check: TARGET="$(PARITY_TARGET)" bash scripts/parity-check.sh @@ -37,32 +41,32 @@ parity-check-nestjs: TARGET=http://localhost:3002 bash scripts/parity-check.sh benchmark-fingerprint-check: - python3 scripts/environment-manifest.py check-fingerprint --file results/latest/environment.fingerprint.json + $(PYTHON) scripts/environment-manifest.py check-fingerprint --file results/latest/environment.fingerprint.json benchmark-limits-check: - python3 scripts/environment-manifest.py check-limits --compose docker-compose.yml + $(PYTHON) scripts/environment-manifest.py check-limits --compose docker-compose.yml benchmark-manifest-check: - python3 scripts/environment-manifest.py check-manifest --file results/latest/environment.manifest.json + $(PYTHON) scripts/environment-manifest.py check-manifest --file results/latest/environment.manifest.json benchmark-raw-schema-check: - python3 scripts/validate-result-schemas.py raw-check + $(PYTHON) scripts/validate-result-schemas.py raw-check benchmark-summary-schema-check: - python3 scripts/validate-result-schemas.py summary-check + $(PYTHON) scripts/validate-result-schemas.py summary-check benchmark-schema-validate: $(MAKE) benchmark-raw-schema-check $(MAKE) benchmark-summary-schema-check benchmark-stats-check: - python3 scripts/benchmark-quality-check.py stats-check + $(PYTHON) scripts/benchmark-quality-check.py stats-check benchmark-variance-check: - python3 scripts/benchmark-quality-check.py variance-check + $(PYTHON) scripts/benchmark-quality-check.py variance-check benchmark-benchstat-check: - python3 scripts/benchmark-quality-check.py benchstat-check + $(PYTHON) scripts/benchmark-quality-check.py benchstat-check ci-benchmark-quality-check: - python3 scripts/benchmark-quality-check.py ci-check + $(PYTHON) scripts/benchmark-quality-check.py ci-check From 785eac1b6d69ef2284cf6120d588229a9c8a9712 Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 20:07:53 +0200 Subject: [PATCH 6/8] chore: add coverage targets and document schema checks --- CONTRIBUTING.md | 13 +++++++++++++ Makefile | 31 ++++++++++++++++++++++++++++++- README.md | 4 ++++ docs/guides/benchmark-workflow.md | 4 ++++ 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a5289d9..98201f6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -23,15 +23,28 @@ Run these before opening a PR: ```bash go test ./... +make test-coverage TARGET=http://localhost:3001 bash scripts/parity-check.sh ``` +For patch coverage against `origin/main`: + +```bash +make test-patch-coverage +``` + If you changed scripts, also run shell linting if available: ```bash shellcheck scripts/*.sh ``` +If you changed benchmark artifacts or report generation, also run schema validation: + +```bash +make benchmark-schema-validate +``` + ## Pull request process 1. Create a branch from `main`. diff --git a/Makefile b/Makefile index 67dd288..f6b7f72 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,11 @@ SHELL := /bin/sh PYTHON ?= python3 GO ?= go +GOPATH ?= $(shell $(GO) env GOPATH) +GO_PATCH_COVER ?= $(GOPATH)/bin/go-patch-cover +MODULES = $(shell find . -type f -name "go.mod" -not -path "*/.*/*" -not -path "*/vendor/*" -exec dirname {} \;) -.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check +.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check benchmark: bash scripts/run-all.sh @@ -31,6 +34,32 @@ report: test: $(GO) test ./... +test-coverage: + @mkdir -p .coverage + @echo "mode: atomic" > .coverage/coverage.out + @for mod in $(MODULES); do \ + echo "Testing coverage for module: $$mod"; \ + (cd $$mod && $(GO) test -coverprofile=profile.out -covermode=atomic ./...) || exit 1; \ + if [ -f $$mod/profile.out ]; then \ + tail -n +2 $$mod/profile.out >> .coverage/coverage.out; \ + rm $$mod/profile.out; \ + fi; \ + done + @printf "\nTotal Coverage:\n" + @$(GO) tool cover -func=.coverage/coverage.out | grep "total:" + +test-patch-coverage: tools test-coverage + @echo "Comparing against origin/main..." + @git diff -U0 --no-color origin/main...HEAD > .coverage/diff.patch + @$(GO_PATCH_COVER) .coverage/coverage.out .coverage/diff.patch > .coverage/patch_coverage.out + @echo "Patch Coverage Report:" + @cat .coverage/patch_coverage.out + +tools: + @echo "Installing development tools..." + @$(GO) install github.com/seriousben/go-patch-cover/cmd/go-patch-cover@latest + @echo "Done: go-patch-cover installed" + parity-check: TARGET="$(PARITY_TARGET)" bash scripts/parity-check.sh diff --git a/README.md b/README.md index a0865ec..d407be4 100644 --- a/README.md +++ b/README.md @@ -21,9 +21,12 @@ Run benchmark orchestration and generate a report: ```bash make benchmark make report +make benchmark-schema-validate make ci-benchmark-quality-check ``` +Benchmark/report flow enforces schema validation for raw and summary artifacts before quality gates. + Use OSS measurement engine (optional): ```bash @@ -36,6 +39,7 @@ BENCH_ENGINE=hyperfine make benchmark - Python 3 - hyperfine (optional benchmark engine) - benchstat (`go install golang.org/x/perf/cmd/benchstat@latest`) +- go-patch-cover (`go install github.com/seriousben/go-patch-cover/cmd/go-patch-cover@latest`, for `make test-patch-coverage`) ## Repository layout diff --git a/docs/guides/benchmark-workflow.md b/docs/guides/benchmark-workflow.md index 8485d4c..216828b 100644 --- a/docs/guides/benchmark-workflow.md +++ b/docs/guides/benchmark-workflow.md @@ -13,6 +13,7 @@ ```bash make benchmark make report +make benchmark-schema-validate ``` ## Per-target run @@ -56,10 +57,13 @@ Benchmark scripts must run parity first for each target. If parity fails, skip b - `results/latest/report.md` - markdown report - `results/latest/benchmark-quality-summary.json` - policy quality gate output - `results/latest/tooling/benchstat/*.txt` - benchstat comparison outputs +- `schemas/benchmark-raw-v1.schema.json` - raw benchmark artifact contract +- `schemas/benchmark-summary-v1.schema.json` - summary artifact contract ## Quality checks ```bash +make benchmark-schema-validate make benchmark-stats-check make benchmark-variance-check make benchmark-benchstat-check From c7a97aefe569de2920f6f06e1e651ca55a946cbb Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 20:13:00 +0200 Subject: [PATCH 7/8] fix: handle missing schema files with clear validator errors --- scripts/validate-result-schemas.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/validate-result-schemas.py b/scripts/validate-result-schemas.py index 66b3dfc..658ae58 100644 --- a/scripts/validate-result-schemas.py +++ b/scripts/validate-result-schemas.py @@ -15,6 +15,8 @@ def load_json(path): try: with path.open("r", encoding="utf-8") as handle: return json.load(handle) + except FileNotFoundError as exc: + raise SystemExit(f"File not found: {path}") from exc except json.JSONDecodeError as exc: raise SystemExit(f"Malformed JSON in {path}: {exc.msg} at line {exc.lineno}, column {exc.colno}") from exc From b76d03f373015858ce23b9d03841074a21f8d21a Mon Sep 17 00:00:00 2001 From: Arye Kogan Date: Sat, 7 Feb 2026 20:16:26 +0200 Subject: [PATCH 8/8] chore: ignore local worktrees directory --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 280124e..92cb619 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,6 @@ coverage.out # Python cache/bytecode **/__pycache__/ *.py[cod] + +# Local worktrees +.worktrees/