From afb30f5fe6bf04ed7c2439d947521231927640b2 Mon Sep 17 00:00:00 2001
From: Arye Kogan <aryekogan@gmail.com>
Date: Sat, 7 Feb 2026 19:38:33 +0200
Subject: [PATCH 1/8] feat: define raw benchmark schema v1 for issue #17

---
 Makefile                             |  5 +-
 schemas/benchmark-raw-v1.schema.json | 94 ++++++++++++++++++++++++++++
 scripts/benchmark-measure.py         |  2 +
 scripts/run-single.sh                |  2 +
 scripts/validate-result-schemas.py   | 90 ++++++++++++++++++++++++++
 5 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 schemas/benchmark-raw-v1.schema.json
 create mode 100644 scripts/validate-result-schemas.py

diff --git a/Makefile b/Makefile
index 0755546..3db5781 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check
+.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check
 
 benchmark:
 	bash scripts/run-all.sh
@@ -45,6 +45,9 @@ benchmark-limits-check:
 benchmark-manifest-check:
 	python3 scripts/environment-manifest.py check-manifest --file results/latest/environment.manifest.json
 
+benchmark-raw-schema-check:
+	python3 scripts/validate-result-schemas.py raw-check
+
 benchmark-stats-check:
 	python3 scripts/benchmark-quality-check.py stats-check
 
diff --git a/schemas/benchmark-raw-v1.schema.json b/schemas/benchmark-raw-v1.schema.json
new file mode 100644
index 0000000..f28be4c
--- /dev/null
+++ b/schemas/benchmark-raw-v1.schema.json
@@ -0,0 +1,94 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://go-modkit.dev/schemas/benchmark-raw-v1.schema.json",
+  "title": "Benchmark Raw Result v1",
+  "type": "object",
+  "required": [
+    "schema_version",
+    "framework",
+    "target",
+    "status"
+  ],
+  "additionalProperties": true,
+  "properties": {
+    "schema_version": {
+      "const": "raw-v1"
+    },
+    "framework": {
+      "type": "string",
+      "minLength": 1
+    },
+    "target": {
+      "type": "string",
+      "minLength": 1
+    },
+    "status": {
+      "type": "string",
+      "enum": [
+        "ok",
+        "skipped"
+      ]
+    },
+    "reason": {
+      "type": "string"
+    },
+    "parity": {
+      "type": "string"
+    },
+    "engine": {
+      "type": "string"
+    },
+    "metric_units": {
+      "type": "object"
+    },
+    "benchmark": {
+      "type": "object"
+    },
+    "docker": {
+      "type": "object"
+    },
+    "resources_normalized": {
+      "type": "object"
+    }
+  },
+  "allOf": [
+    {
+      "if": {
+        "properties": {
+          "status": {
+            "const": "ok"
+          }
+        },
+        "required": [
+          "status"
+        ]
+      },
+      "then": {
+        "required": [
+          "parity",
+          "engine",
+          "metric_units",
+          "benchmark",
+          "resources_normalized"
+        ]
+      }
+    },
+    {
+      "if": {
+        "properties": {
+          "status": {
+            "const": "skipped"
+          }
+        },
+        "required": [
+          "status"
+        ]
+      },
+      "then": {
+        "required": [
+          "reason"
+        ]
+      }
+    }
+  ]
+}
diff --git a/scripts/benchmark-measure.py b/scripts/benchmark-measure.py
index 166e815..cb23599 100644
--- a/scripts/benchmark-measure.py
+++ b/scripts/benchmark-measure.py
@@ -241,6 +241,7 @@ def main():
 
     if not run_stats:
         payload = {
+            "schema_version": "raw-v1",
             "framework": args.framework,
             "target": args.target,
             "status": "skipped",
@@ -279,6 +280,7 @@ def main():
     docker_stats = collect_docker_stats(args.framework)
 
     payload = {
+        "schema_version": "raw-v1",
         "framework": args.framework,
         "target": args.target,
         "status": "ok",
diff --git a/scripts/run-single.sh b/scripts/run-single.sh
index 2e86241..cbfd7cf 100755
--- a/scripts/run-single.sh
+++ b/scripts/run-single.sh
@@ -96,6 +96,7 @@ if ! curl -fsS "$target/health" >/dev/null 2>&1; then
 import json, sys
 framework, target, out_file = sys.argv[1], sys.argv[2], sys.argv[3]
 payload = {
+    "schema_version": "raw-v1",
     "framework": framework,
     "target": target,
     "status": "skipped",
@@ -116,6 +117,7 @@ else
 import json, sys
 framework, target, out_file = sys.argv[1], sys.argv[2], sys.argv[3]
 payload = {
+    "schema_version": "raw-v1",
     "framework": framework,
     "target": target,
     "status": "skipped",
diff --git a/scripts/validate-result-schemas.py b/scripts/validate-result-schemas.py
new file mode 100644
index 0000000..10d1116
--- /dev/null
+++ b/scripts/validate-result-schemas.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+import argparse
+import json
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent.parent
+RAW_DIR = ROOT / "results" / "latest" / "raw"
+RAW_SCHEMA = ROOT / "schemas" / "benchmark-raw-v1.schema.json"
+
+
+def load_json(path):
+    with path.open("r", encoding="utf-8") as handle:
+        return json.load(handle)
+
+
+def validate_raw_row(path, payload, schema_version):
+    required = ("schema_version", "framework", "target", "status")
+    for field in required:
+        if field not in payload:
+            raise SystemExit(f"Raw schema validation failed for {path}: missing {field}")
+
+    if payload.get("schema_version") != schema_version:
+        raise SystemExit(
+            f"Raw schema validation failed for {path}: schema_version={payload.get('schema_version')!r}, expected {schema_version!r}"
+        )
+
+    status = payload.get("status")
+    if status not in ("ok", "skipped"):
+        raise SystemExit(f"Raw schema validation failed for {path}: status={status!r} must be 'ok' or 'skipped'")
+
+    if not isinstance(payload.get("framework"), str) or not payload.get("framework"):
+        raise SystemExit(f"Raw schema validation failed for {path}: framework must be non-empty string")
+    if not isinstance(payload.get("target"), str) or not payload.get("target"):
+        raise SystemExit(f"Raw schema validation failed for {path}: target must be non-empty string")
+
+    if status == "skipped":
+        reason = payload.get("reason")
+        if not isinstance(reason, str) or not reason:
+            raise SystemExit(f"Raw schema validation failed for {path}: skipped rows require non-empty reason")
+        return
+
+    for field in ("parity", "engine", "metric_units", "benchmark", "resources_normalized"):
+        if field not in payload:
+            raise SystemExit(f"Raw schema validation failed for {path}: missing {field}")
+
+    benchmark = payload.get("benchmark")
+    if not isinstance(benchmark, dict):
+        raise SystemExit(f"Raw schema validation failed for {path}: benchmark must be object")
+
+    for metric_field in ("run_stats", "median"):
+        if metric_field not in benchmark:
+            raise SystemExit(f"Raw schema validation failed for {path}: benchmark.{metric_field} is required")
+
+
+def validate_raw(raw_dir, schema_path):
+    schema = load_json(schema_path)
+    schema_version = (schema.get("properties") or {}).get("schema_version", {}).get("const")
+    if not isinstance(schema_version, str) or not schema_version:
+        raise SystemExit(f"Raw schema file missing properties.schema_version.const: {schema_path}")
+
+    files = sorted(raw_dir.glob("*.json"))
+    if not files:
+        raise SystemExit(f"No raw benchmark files found in: {raw_dir}")
+
+    for path in files:
+        payload = load_json(path)
+        validate_raw_row(path, payload, schema_version)
+
+    print(f"benchmark-raw-schema-check: validated {len(files)} raw artifact(s)")
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Validate benchmark result schemas")
+    parser.add_argument("cmd", choices=["raw-check"])
+    parser.add_argument("--raw-dir", type=Path, default=RAW_DIR)
+    parser.add_argument("--raw-schema", type=Path, default=RAW_SCHEMA)
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    if args.cmd == "raw-check":
+        validate_raw(args.raw_dir, args.raw_schema)
+        return
+    raise SystemExit(f"Unknown command: {args.cmd}")
+
+
+if __name__ == "__main__":
+    main()

From f773796b315119ad28b6970ab459bf6c6ceb0000 Mon Sep 17 00:00:00 2001
From: Arye Kogan <aryekogan@gmail.com>
Date: Sat, 7 Feb 2026 19:40:09 +0200
Subject: [PATCH 2/8] feat: define summary benchmark schema v1 for issue #18

---
 Makefile                                 |  5 +-
 schemas/benchmark-summary-v1.schema.json | 91 ++++++++++++++++++++++++
 scripts/generate-report.py               | 15 +++-
 scripts/validate-result-schemas.py       | 61 +++++++++++++++-
 4 files changed, 169 insertions(+), 3 deletions(-)
 create mode 100644 schemas/benchmark-summary-v1.schema.json

diff --git a/Makefile b/Makefile
index 3db5781..515a7b6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check
+.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check
 
 benchmark:
 	bash scripts/run-all.sh
@@ -48,6 +48,9 @@ benchmark-manifest-check:
 benchmark-raw-schema-check:
 	python3 scripts/validate-result-schemas.py raw-check
 
+benchmark-summary-schema-check:
+	python3 scripts/validate-result-schemas.py summary-check
+
 benchmark-stats-check:
 	python3 scripts/benchmark-quality-check.py stats-check
 
diff --git a/schemas/benchmark-summary-v1.schema.json b/schemas/benchmark-summary-v1.schema.json
new file mode 100644
index 0000000..84a47b6
--- /dev/null
+++ b/schemas/benchmark-summary-v1.schema.json
@@ -0,0 +1,91 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://go-modkit.dev/schemas/benchmark-summary-v1.schema.json",
+  "title": "Benchmark Summary v1",
+  "type": "object",
+  "required": [
+    "schema_version",
+    "generated_at",
+    "total_targets",
+    "successful_targets",
+    "skipped_targets",
+    "targets"
+  ],
+  "additionalProperties": true,
+  "properties": {
+    "schema_version": {
+      "const": "summary-v1"
+    },
+    "generated_at": {
+      "type": "string",
+      "minLength": 1
+    },
+    "total_targets": {
+      "type": "integer",
+      "minimum": 0
+    },
+    "successful_targets": {
+      "type": "integer",
+      "minimum": 0
+    },
+    "skipped_targets": {
+      "type": "integer",
+      "minimum": 0
+    },
+    "targets": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "required": [
+          "framework",
+          "status",
+          "target",
+          "provenance"
+        ],
+        "additionalProperties": true,
+        "properties": {
+          "framework": {
+            "type": "string",
+            "minLength": 1
+          },
+          "status": {
+            "type": "string",
+            "enum": [
+              "ok",
+              "skipped"
+            ]
+          },
+          "target": {
+            "type": "string",
+            "minLength": 1
+          },
+          "reason": {
+            "type": "string"
+          },
+          "median": {
+            "type": "object"
+          },
+          "uncertainty": {
+            "type": "object"
+          },
+          "provenance": {
+            "type": "object",
+            "required": [
+              "raw_source"
+            ],
+            "properties": {
+              "raw_source": {
+                "type": "string",
+                "minLength": 1
+              },
+              "manifest": {
+                "type": "string",
+                "minLength": 1
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/scripts/generate-report.py b/scripts/generate-report.py
index 3da31ba..b1c2475 100755
--- a/scripts/generate-report.py
+++ b/scripts/generate-report.py
@@ -18,7 +18,9 @@ def load_raw_files():
     for path in sorted(RAW_DIR.glob("*.json")):
         try:
             with path.open("r", encoding="utf-8") as f:
-                rows.append(json.load(f))
+                payload = json.load(f)
+                payload["_source_file"] = path.name
+                rows.append(payload)
         except json.JSONDecodeError as exc:
             print(f"Warning: skipping malformed JSON {path}: {exc}")
     return rows
@@ -27,6 +29,7 @@ def load_raw_files():
 def build_summary(rows):
     generated_at = datetime.now(timezone.utc).isoformat()
     summary = {
+        "schema_version": "summary-v1",
         "generated_at": generated_at,
         "total_targets": len(rows),
         "successful_targets": sum(1 for r in rows if r.get("status") == "ok"),
@@ -39,8 +42,18 @@ def build_summary(rows):
             "status": row.get("status"),
             "target": row.get("target"),
             "reason": row.get("reason"),
+            "provenance": {
+                "raw_source": f"results/latest/raw/{row.get('_source_file', 'unknown')}"
+            },
         }
         bench = row.get("benchmark") or {}
+        quality = (bench.get("quality") or {}).get("variance") or {}
+        if quality:
+            target["uncertainty"] = {
+                "rps_cv": quality.get("rps_cv"),
+                "latency_ms_p95_cv": quality.get("latency_ms_p95_cv"),
+                "latency_ms_p99_cv": quality.get("latency_ms_p99_cv"),
+            }
         median = bench.get("median") or {}
         if median:
             target["median"] = {
diff --git a/scripts/validate-result-schemas.py b/scripts/validate-result-schemas.py
index 10d1116..ffa782e 100644
--- a/scripts/validate-result-schemas.py
+++ b/scripts/validate-result-schemas.py
@@ -7,6 +7,8 @@
 ROOT = Path(__file__).resolve().parent.parent
 RAW_DIR = ROOT / "results" / "latest" / "raw"
 RAW_SCHEMA = ROOT / "schemas" / "benchmark-raw-v1.schema.json"
+SUMMARY_FILE = ROOT / "results" / "latest" / "summary.json"
+SUMMARY_SCHEMA = ROOT / "schemas" / "benchmark-summary-v1.schema.json"
 
 
 def load_json(path):
@@ -70,11 +72,65 @@ def validate_raw(raw_dir, schema_path):
     print(f"benchmark-raw-schema-check: validated {len(files)} raw artifact(s)")
 
 
+def validate_summary(summary_file, schema_path):
+    schema = load_json(schema_path)
+    schema_version = (schema.get("properties") or {}).get("schema_version", {}).get("const")
+    if not isinstance(schema_version, str) or not schema_version:
+        raise SystemExit(f"Summary schema file missing properties.schema_version.const: {schema_path}")
+
+    if not summary_file.exists():
+        raise SystemExit(f"Summary file not found: {summary_file}")
+
+    payload = load_json(summary_file)
+    required = (
+        "schema_version",
+        "generated_at",
+        "total_targets",
+        "successful_targets",
+        "skipped_targets",
+        "targets",
+    )
+    for field in required:
+        if field not in payload:
+            raise SystemExit(f"Summary schema validation failed for {summary_file}: missing {field}")
+
+    if payload.get("schema_version") != schema_version:
+        raise SystemExit(
+            f"Summary schema validation failed for {summary_file}: schema_version={payload.get('schema_version')!r}, expected {schema_version!r}"
+        )
+
+    targets = payload.get("targets")
+    if not isinstance(targets, list):
+        raise SystemExit(f"Summary schema validation failed for {summary_file}: targets must be array")
+
+    for idx, target in enumerate(targets):
+        if not isinstance(target, dict):
+            raise SystemExit(f"Summary schema validation failed for {summary_file}: targets[{idx}] must be object")
+        for field in ("framework", "status", "target", "provenance"):
+            if field not in target:
+                raise SystemExit(f"Summary schema validation failed for {summary_file}: targets[{idx}] missing {field}")
+        provenance = target.get("provenance")
+        if not isinstance(provenance, dict) or not provenance.get("raw_source"):
+            raise SystemExit(
+                f"Summary schema validation failed for {summary_file}: targets[{idx}].provenance.raw_source is required"
+            )
+
+        status = target.get("status")
+        if status == "ok" and "uncertainty" not in target:
+            raise SystemExit(
+                f"Summary schema validation failed for {summary_file}: targets[{idx}] missing uncertainty for status=ok"
+            )
+
+    print("benchmark-summary-schema-check: validated summary artifact")
+
+
 def parse_args():
     parser = argparse.ArgumentParser(description="Validate benchmark result schemas")
-    parser.add_argument("cmd", choices=["raw-check"])
+    parser.add_argument("cmd", choices=["raw-check", "summary-check"])
     parser.add_argument("--raw-dir", type=Path, default=RAW_DIR)
     parser.add_argument("--raw-schema", type=Path, default=RAW_SCHEMA)
+    parser.add_argument("--summary-file", type=Path, default=SUMMARY_FILE)
+    parser.add_argument("--summary-schema", type=Path, default=SUMMARY_SCHEMA)
     return parser.parse_args()
 
 
@@ -83,6 +139,9 @@ def main():
     if args.cmd == "raw-check":
         validate_raw(args.raw_dir, args.raw_schema)
         return
+    if args.cmd == "summary-check":
+        validate_summary(args.summary_file, args.summary_schema)
+        return
     raise SystemExit(f"Unknown command: {args.cmd}")
 
 

From 04b1732745e33278e0eca49a7ceefee6bbf98573 Mon Sep 17 00:00:00 2001
From: Arye Kogan <aryekogan@gmail.com>
Date: Sat, 7 Feb 2026 19:42:03 +0200
Subject: [PATCH 3/8] feat: enforce schema validation in scripts and CI for
 issue #19

---
 .github/workflows/ci.yml   |  2 ++
 Makefile                   |  6 +++++-
 scripts/generate-report.py | 11 +++++++++++
 scripts/run-all.sh         |  2 ++
 4 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a1f2a95..d49713d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -50,6 +50,8 @@ jobs:
         run: bash scripts/run-all.sh
       - name: Generate report from raw results
         run: python3 scripts/generate-report.py
+      - name: Validate benchmark result schemas
+        run: make benchmark-schema-validate
       - name: Run statistical quality gate
         run: make ci-benchmark-quality-check
       - name: Upload benchmark quality summary
diff --git a/Makefile b/Makefile
index 515a7b6..8baa296 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check
+.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check
 
 benchmark:
 	bash scripts/run-all.sh
@@ -51,6 +51,10 @@ benchmark-raw-schema-check:
 benchmark-summary-schema-check:
 	python3 scripts/validate-result-schemas.py summary-check
 
+benchmark-schema-validate:
+	$(MAKE) benchmark-raw-schema-check
+	$(MAKE) benchmark-summary-schema-check
+
 benchmark-stats-check:
 	python3 scripts/benchmark-quality-check.py stats-check
 
diff --git a/scripts/generate-report.py b/scripts/generate-report.py
index b1c2475..faeb914 100755
--- a/scripts/generate-report.py
+++ b/scripts/generate-report.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python3
 import json
+import subprocess
+import sys
 from datetime import datetime, timezone
 from pathlib import Path
 
@@ -11,6 +13,13 @@
 REPORT_PATH = RESULTS_LATEST / "report.md"
 
 
+def run_schema_check(command):
+    completed = subprocess.run(command, capture_output=True, text=True, check=False)
+    if completed.returncode != 0:
+        message = completed.stderr.strip() or completed.stdout.strip() or "schema validation failed"
+        raise SystemExit(message)
+
+
 def load_raw_files():
     if not RAW_DIR.exists():
         return []
@@ -117,9 +126,11 @@ def write_report(summary):
 
 
 def main():
+    run_schema_check([sys.executable, "scripts/validate-result-schemas.py", "raw-check"])
     rows = load_raw_files()
     summary = build_summary(rows)
     write_summary(summary)
+    run_schema_check([sys.executable, "scripts/validate-result-schemas.py", "summary-check"])
     write_report(summary)
     print(f"Wrote: {SUMMARY_PATH}")
     print(f"Wrote: {REPORT_PATH}")
diff --git a/scripts/run-all.sh b/scripts/run-all.sh
index 52f6e4f..8cbebf6 100755
--- a/scripts/run-all.sh
+++ b/scripts/run-all.sh
@@ -71,6 +71,8 @@ for framework in "${frameworks[@]}"; do
   BENCHMARK_METADATA_MANAGED=1 bash scripts/run-single.sh "$framework"
 done
 
+python3 scripts/validate-result-schemas.py raw-check --raw-dir "$raw_dir"
+
 python3 scripts/environment-manifest.py write-manifest --raw-dir "$raw_dir" --fingerprint "$fingerprint_file" --out "$manifest_file"
 
 echo "Raw benchmark files generated in: $raw_dir"

From 17884ed8205f72be5a2f63bc58e2d25ec36bd829 Mon Sep 17 00:00:00 2001
From: Arye Kogan <aryekogan@gmail.com>
Date: Sat, 7 Feb 2026 19:53:22 +0200
Subject: [PATCH 4/8] fix: align schema contracts and validation errors for
 issue #19

---
 schemas/benchmark-raw-v1.schema.json     | 20 +++++++++--
 schemas/benchmark-summary-v1.schema.json | 43 +++++++++++++++++++++++-
 scripts/validate-result-schemas.py       |  7 ++--
 3 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/schemas/benchmark-raw-v1.schema.json b/schemas/benchmark-raw-v1.schema.json
index f28be4c..0c08480 100644
--- a/schemas/benchmark-raw-v1.schema.json
+++ b/schemas/benchmark-raw-v1.schema.json
@@ -42,7 +42,15 @@
       "type": "object"
     },
     "benchmark": {
-      "type": "object"
+      "type": "object",
+      "properties": {
+        "run_stats": {
+          "type": "array"
+        },
+        "median": {
+          "type": "object"
+        }
+      }
     },
     "docker": {
       "type": "object"
@@ -70,7 +78,15 @@
           "metric_units",
           "benchmark",
           "resources_normalized"
-        ]
+        ],
+        "properties": {
+          "benchmark": {
+            "required": [
+              "run_stats",
+              "median"
+            ]
+          }
+        }
       }
     },
     {
diff --git a/schemas/benchmark-summary-v1.schema.json b/schemas/benchmark-summary-v1.schema.json
index 84a47b6..aa98a05 100644
--- a/schemas/benchmark-summary-v1.schema.json
+++ b/schemas/benchmark-summary-v1.schema.json
@@ -87,5 +87,46 @@
         }
       }
     }
-  }
+  },
+  "allOf": [
+    {
+      "if": {
+        "properties": {
+          "targets": {
+            "type": "array"
+          }
+        },
+        "required": [
+          "targets"
+        ]
+      },
+      "then": {
+        "properties": {
+          "targets": {
+            "items": {
+              "allOf": [
+                {
+                  "if": {
+                    "properties": {
+                      "status": {
+                        "const": "ok"
+                      }
+                    },
+                    "required": [
+                      "status"
+                    ]
+                  },
+                  "then": {
+                    "required": [
+                      "uncertainty"
+                    ]
+                  }
+                }
+              ]
+            }
+          }
+        }
+      }
+    }
+  ]
 }
diff --git a/scripts/validate-result-schemas.py b/scripts/validate-result-schemas.py
index ffa782e..66b3dfc 100644
--- a/scripts/validate-result-schemas.py
+++ b/scripts/validate-result-schemas.py
@@ -12,8 +12,11 @@
 
 
 def load_json(path):
-    with path.open("r", encoding="utf-8") as handle:
-        return json.load(handle)
+    try:
+        with path.open("r", encoding="utf-8") as handle:
+            return json.load(handle)
+    except json.JSONDecodeError as exc:
+        raise SystemExit(f"Malformed JSON in {path}: {exc.msg} at line {exc.lineno}, column {exc.colno}") from exc
 
 
 def validate_raw_row(path, payload, schema_version):

From d0c1854cf2a9da741c21882945a84c84bc7d20c7 Mon Sep 17 00:00:00 2001
From: Arye Kogan <aryekogan@gmail.com>
Date: Sat, 7 Feb 2026 20:02:26 +0200
Subject: [PATCH 5/8] chore: normalize make shell and tool command variables

---
 Makefile | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/Makefile b/Makefile
index 8baa296..67dd288 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,7 @@
+SHELL := /bin/sh
+PYTHON ?= python3
+GO ?= go
+
 .PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check
 
 benchmark:
@@ -22,10 +26,10 @@ benchmark-do:
 	bash scripts/run-single.sh do
 
 report:
-	python3 scripts/generate-report.py
+	$(PYTHON) scripts/generate-report.py
 
 test:
-	go test ./...
+	$(GO) test ./...
 
 parity-check:
 	TARGET="$(PARITY_TARGET)" bash scripts/parity-check.sh
@@ -37,32 +41,32 @@ parity-check-nestjs:
 	TARGET=http://localhost:3002 bash scripts/parity-check.sh
 
 benchmark-fingerprint-check:
-	python3 scripts/environment-manifest.py check-fingerprint --file results/latest/environment.fingerprint.json
+	$(PYTHON) scripts/environment-manifest.py check-fingerprint --file results/latest/environment.fingerprint.json
 
 benchmark-limits-check:
-	python3 scripts/environment-manifest.py check-limits --compose docker-compose.yml
+	$(PYTHON) scripts/environment-manifest.py check-limits --compose docker-compose.yml
 
 benchmark-manifest-check:
-	python3 scripts/environment-manifest.py check-manifest --file results/latest/environment.manifest.json
+	$(PYTHON) scripts/environment-manifest.py check-manifest --file results/latest/environment.manifest.json
 
 benchmark-raw-schema-check:
-	python3 scripts/validate-result-schemas.py raw-check
+	$(PYTHON) scripts/validate-result-schemas.py raw-check
 
 benchmark-summary-schema-check:
-	python3 scripts/validate-result-schemas.py summary-check
+	$(PYTHON) scripts/validate-result-schemas.py summary-check
 
 benchmark-schema-validate:
 	$(MAKE) benchmark-raw-schema-check
 	$(MAKE) benchmark-summary-schema-check
 
 benchmark-stats-check:
-	python3 scripts/benchmark-quality-check.py stats-check
+	$(PYTHON) scripts/benchmark-quality-check.py stats-check
 
 benchmark-variance-check:
-	python3 scripts/benchmark-quality-check.py variance-check
+	$(PYTHON) scripts/benchmark-quality-check.py variance-check
 
 benchmark-benchstat-check:
-	python3 scripts/benchmark-quality-check.py benchstat-check
+	$(PYTHON) scripts/benchmark-quality-check.py benchstat-check
 
 ci-benchmark-quality-check:
-	python3 scripts/benchmark-quality-check.py ci-check
+	$(PYTHON) scripts/benchmark-quality-check.py ci-check

From 785eac1b6d69ef2284cf6120d588229a9c8a9712 Mon Sep 17 00:00:00 2001
From: Arye Kogan <aryekogan@gmail.com>
Date: Sat, 7 Feb 2026 20:07:53 +0200
Subject: [PATCH 6/8] chore: add coverage targets and document schema checks

---
 CONTRIBUTING.md                   | 13 +++++++++++++
 Makefile                          | 31 ++++++++++++++++++++++++++++++-
 README.md                         |  4 ++++
 docs/guides/benchmark-workflow.md |  4 ++++
 4 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a5289d9..98201f6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -23,15 +23,28 @@ Run these before opening a PR:
 
 ```bash
 go test ./...
+make test-coverage
 TARGET=http://localhost:3001 bash scripts/parity-check.sh
 ```
 
+For patch coverage against `origin/main`:
+
+```bash
+make test-patch-coverage
+```
+
 If you changed scripts, also run shell linting if available:
 
 ```bash
 shellcheck scripts/*.sh
 ```
 
+If you changed benchmark artifacts or report generation, also run schema validation:
+
+```bash
+make benchmark-schema-validate
+```
+
 ## Pull request process
 
 1. Create a branch from `main`.
diff --git a/Makefile b/Makefile
index 67dd288..f6b7f72 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,11 @@
 SHELL := /bin/sh
 PYTHON ?= python3
 GO ?= go
+GOPATH ?= $(shell $(GO) env GOPATH)
+GO_PATCH_COVER ?= $(GOPATH)/bin/go-patch-cover
+MODULES = $(shell find . -type f -name "go.mod" -not -path "*/.*/*" -not -path "*/vendor/*" -exec dirname {} \;)
 
-.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check
+.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check
 
 benchmark:
 	bash scripts/run-all.sh
@@ -31,6 +34,32 @@ report:
 test:
 	$(GO) test ./...
 
+test-coverage:
+	@mkdir -p .coverage
+	@echo "mode: atomic" > .coverage/coverage.out
+	@for mod in $(MODULES); do \
+		echo "Testing coverage for module: $$mod"; \
+		(cd $$mod && $(GO) test -coverprofile=profile.out -covermode=atomic ./...) || exit 1; \
+		if [ -f $$mod/profile.out ]; then \
+			tail -n +2 $$mod/profile.out >> .coverage/coverage.out; \
+			rm $$mod/profile.out; \
+		fi; \
+	done
+	@printf "\nTotal Coverage:\n"
+	@$(GO) tool cover -func=.coverage/coverage.out | grep "total:"
+
+test-patch-coverage: tools test-coverage
+	@echo "Comparing against origin/main..."
+	@git diff -U0 --no-color origin/main...HEAD > .coverage/diff.patch
+	@$(GO_PATCH_COVER) .coverage/coverage.out .coverage/diff.patch > .coverage/patch_coverage.out
+	@echo "Patch Coverage Report:"
+	@cat .coverage/patch_coverage.out
+
+tools:
+	@echo "Installing development tools..."
+	@$(GO) install github.com/seriousben/go-patch-cover/cmd/go-patch-cover@latest
+	@echo "Done: go-patch-cover installed"
+
 parity-check:
 	TARGET="$(PARITY_TARGET)" bash scripts/parity-check.sh
 
diff --git a/README.md b/README.md
index a0865ec..d407be4 100644
--- a/README.md
+++ b/README.md
@@ -21,9 +21,12 @@ Run benchmark orchestration and generate a report:
 ```bash
 make benchmark
 make report
+make benchmark-schema-validate
 make ci-benchmark-quality-check
 ```
 
+Benchmark/report flow enforces schema validation for raw and summary artifacts before quality gates.
+
 Use OSS measurement engine (optional):
 
 ```bash
@@ -36,6 +39,7 @@ BENCH_ENGINE=hyperfine make benchmark
 - Python 3
 - hyperfine (optional benchmark engine)
 - benchstat (`go install golang.org/x/perf/cmd/benchstat@latest`)
+- go-patch-cover (`go install github.com/seriousben/go-patch-cover/cmd/go-patch-cover@latest`, for `make test-patch-coverage`)
 
 ## Repository layout
 
diff --git a/docs/guides/benchmark-workflow.md b/docs/guides/benchmark-workflow.md
index 8485d4c..216828b 100644
--- a/docs/guides/benchmark-workflow.md
+++ b/docs/guides/benchmark-workflow.md
@@ -13,6 +13,7 @@
 ```bash
 make benchmark
 make report
+make benchmark-schema-validate
 ```
 
 ## Per-target run
@@ -56,10 +57,13 @@ Benchmark scripts must run parity first for each target. If parity fails, skip b
 - `results/latest/report.md` - markdown report
 - `results/latest/benchmark-quality-summary.json` - policy quality gate output
 - `results/latest/tooling/benchstat/*.txt` - benchstat comparison outputs
+- `schemas/benchmark-raw-v1.schema.json` - raw benchmark artifact contract
+- `schemas/benchmark-summary-v1.schema.json` - summary artifact contract
 
 ## Quality checks
 
 ```bash
+make benchmark-schema-validate
 make benchmark-stats-check
 make benchmark-variance-check
 make benchmark-benchstat-check

From c7a97aefe569de2920f6f06e1e651ca55a946cbb Mon Sep 17 00:00:00 2001
From: Arye Kogan <aryekogan@gmail.com>
Date: Sat, 7 Feb 2026 20:13:00 +0200
Subject: [PATCH 7/8] fix: handle missing schema files with clear validator
 errors

---
 scripts/validate-result-schemas.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/validate-result-schemas.py b/scripts/validate-result-schemas.py
index 66b3dfc..658ae58 100644
--- a/scripts/validate-result-schemas.py
+++ b/scripts/validate-result-schemas.py
@@ -15,6 +15,8 @@ def load_json(path):
     try:
         with path.open("r", encoding="utf-8") as handle:
             return json.load(handle)
+    except FileNotFoundError as exc:
+        raise SystemExit(f"File not found: {path}") from exc
     except json.JSONDecodeError as exc:
         raise SystemExit(f"Malformed JSON in {path}: {exc.msg} at line {exc.lineno}, column {exc.colno}") from exc
 

From b76d03f373015858ce23b9d03841074a21f8d21a Mon Sep 17 00:00:00 2001
From: Arye Kogan <aryekogan@gmail.com>
Date: Sat, 7 Feb 2026 20:16:26 +0200
Subject: [PATCH 8/8] chore: ignore local worktrees directory

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 280124e..92cb619 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,6 @@ coverage.out
 # Python cache/bytecode
 **/__pycache__/
 *.py[cod]
+
+# Local worktrees
+.worktrees/