From ec6ad9e379c7e9d8f35fcfceeb6a0a2d8fbeea21 Mon Sep 17 00:00:00 2001
From: Mayk Thewessen <mayk_12@hotmail.com>
Date: Fri, 13 Mar 2026 22:23:58 +0100
Subject: [PATCH 1/4] perf: use numpy array lookup for solution unpacking

Convert the primal/dual pandas Series to a dense numpy lookup array
before the per-variable/per-constraint unpacking loop. This replaces
pandas indexing (sol[idx].values) with direct numpy array indexing
(sol_arr[idx]), avoiding pandas overhead per variable type.

The loop over variable/constraint types still exists (needed to set
each variable's .solution xr.DataArray), but the inner indexing
operation is now pure numpy instead of pandas Series.__getitem__.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 linopy/model.py | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/linopy/model.py b/linopy/model.py
index 54334411..cd9a2215 100644
--- a/linopy/model.py
+++ b/linopy/model.py
@@ -1572,26 +1572,42 @@ def solve(
             sol = set_int_index(sol)
             sol.loc[-1] = nan
 
+            # Convert to numpy array for fast label-based lookup.
+            # Labels are integers; build a dense lookup array so each
+            # variable can index directly without pandas overhead.
+            sol_max_idx = max(sol.index.max(), 0)
+            sol_arr = np.full(sol_max_idx + 1, nan)
+            sol_arr[sol.index[sol.index >= 0]] = sol.values[sol.index >= 0]
+
             for name, var in self.variables.items():
                 idx = np.ravel(var.labels)
-                try:
-                    vals = sol[idx].values.reshape(var.labels.shape)
-                except KeyError:
-                    vals = sol.reindex(idx).values.reshape(var.labels.shape)
-                var.solution = xr.DataArray(vals, var.coords)
+                # Use numpy indexing: labels ≥ 0 map to sol_arr, -1 maps to NaN
+                safe_idx = np.clip(idx, 0, sol_max_idx)
+                vals = sol_arr[safe_idx]
+                vals[idx < 0] = nan
+                var.solution = xr.DataArray(
+                    vals.reshape(var.labels.shape), var.coords
+                )
 
             if not result.solution.dual.empty:
                 dual = result.solution.dual.copy()
                 dual = set_int_index(dual)
                 dual.loc[-1] = nan
 
+                dual_max_idx = max(dual.index.max(), 0)
+                dual_arr = np.full(dual_max_idx + 1, nan)
+                dual_arr[dual.index[dual.index >= 0]] = dual.values[
+                    dual.index >= 0
+                ]
+
                 for name, con in self.constraints.items():
                     idx = np.ravel(con.labels)
-                    try:
-                        vals = dual[idx].values.reshape(con.labels.shape)
-                    except KeyError:
-                        vals = dual.reindex(idx).values.reshape(con.labels.shape)
-                    con.dual = xr.DataArray(vals, con.labels.coords)
+                    safe_idx = np.clip(idx, 0, dual_max_idx)
+                    vals = dual_arr[safe_idx]
+                    vals[idx < 0] = nan
+                    con.dual = xr.DataArray(
+                        vals.reshape(con.labels.shape), con.labels.coords
+                    )
 
             return result.status.status.value, result.status.termination_condition.value
         finally:

From 0e9abcf0000dda8da11a3a08d22a1e96d9560502 Mon Sep 17 00:00:00 2001
From: Mayk Thewessen <mayk_12@hotmail.com>
Date: Tue, 17 Mar 2026 21:57:11 +0100
Subject: [PATCH 2/4] =?UTF-8?q?Add=20reproducible=20benchmark=20script=20f?=
 =?UTF-8?q?or=20PRs=20#616=E2=80=93#619?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds benchmark/scripts/benchmark_matrix_gen.py covering all four
performance code paths:
  - #616  cached_property on MatrixAccessor (flat_vars / flat_cons)
  - #617  np.char.add label string concatenation
  - #618  single-step sparse matrix slicing
  - #619  numpy dense-array solution unpacking

Reproduce with:
  python benchmark/scripts/benchmark_matrix_gen.py -o results.json
  python benchmark/scripts/benchmark_matrix_gen.py --include-solve   # PR #619 path
  python benchmark/scripts/benchmark_matrix_gen.py --compare before.json after.json

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 benchmark/scripts/benchmark_matrix_gen.py | 386 ++++++++++++++++++++++
 linopy/model.py                           |   8 +-
 2 files changed, 388 insertions(+), 6 deletions(-)
 create mode 100644 benchmark/scripts/benchmark_matrix_gen.py

diff --git a/benchmark/scripts/benchmark_matrix_gen.py b/benchmark/scripts/benchmark_matrix_gen.py
new file mode 100644
index 00000000..8fa77367
--- /dev/null
+++ b/benchmark/scripts/benchmark_matrix_gen.py
@@ -0,0 +1,386 @@
+#!/usr/bin/env python3
+"""
+Benchmark script for linopy matrix generation and solution-unpacking performance.
+
+Covers the code paths optimised by PRs #616–#619:
+  - #616  cached_property on MatrixAccessor (flat_vars / flat_cons)
+  - #617  np.char.add for label string concatenation
+  - #618  sparse matrix slicing in MatrixAccessor.A
+  - #619  numpy solution unpacking in Model.solve
+
+Usage
+-----
+    # Quick run (24 snapshots only):
+    python benchmark/scripts/benchmark_matrix_gen.py --quick
+
+    # Full matrix-generation sweep with JSON output:
+    python benchmark/scripts/benchmark_matrix_gen.py -o results.json --label "after-PR-616"
+
+    # Include solution-unpacking benchmark (requires HiGHS solver, #619):
+    python benchmark/scripts/benchmark_matrix_gen.py --include-solve -o results.json
+
+    # Compare two runs:
+    python benchmark/scripts/benchmark_matrix_gen.py --compare before.json after.json
+"""
+
+from __future__ import annotations
+
+import argparse
+import gc
+import json
+import platform
+import sys
+import time
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+
+# ---------------------------------------------------------------------------
+# Model builders
+# ---------------------------------------------------------------------------
+
+def build_scigrid_network(n_snapshots: int):
+    """Return a PyPSA Network (SciGrid-DE) with extended snapshots, without building the model."""
+    import pypsa
+
+    n = pypsa.examples.scigrid_de()
+    orig_snapshots = n.snapshots
+    orig_len = len(orig_snapshots)
+
+    new_snapshots = pd.date_range(orig_snapshots[0], periods=n_snapshots, freq="h")
+    n.set_snapshots(new_snapshots)
+
+    for component_t in (n.generators_t, n.loads_t, n.storage_units_t):
+        for attr in list(component_t):
+            df = getattr(component_t, attr)
+            if df is not None and not df.empty:
+                tiles = int(np.ceil(n_snapshots / orig_len)) + 1
+                tiled = np.tile(df.values, (tiles, 1))[:n_snapshots]
+                setattr(component_t, attr, pd.DataFrame(
+                    tiled, index=new_snapshots, columns=df.columns,
+                ))
+    return n
+
+
+def build_scigrid(n_snapshots: int):
+    """Return a linopy Model from PyPSA SciGrid-DE with extended snapshots."""
+    n = build_scigrid_network(n_snapshots)
+    n.optimize.create_model(include_objective_constant=False)
+    return n.model
+
+
+def build_synthetic(n: int):
+    """Return a linopy Model with 2×N² variables and 2×N² constraints."""
+    from linopy import Model
+
+    m = Model()
+    N = np.arange(n)
+    x = m.add_variables(coords=[N, N], name="x")
+    y = m.add_variables(coords=[N, N], name="y")
+    m.add_constraints(x - y >= N, name="lower")
+    m.add_constraints(x + y >= 0, name="upper")
+    m.add_objective(2 * x.sum() + y.sum())
+    return m
+
+
+# ---------------------------------------------------------------------------
+# Benchmark phases
+# ---------------------------------------------------------------------------
+
+def time_phase(func, label: str, repeats: int = 3) -> dict:
+    """Time a callable, return best-of-N result."""
+    times = []
+    for _ in range(repeats):
+        gc.collect()
+        gc.disable()
+        t0 = time.perf_counter()
+        result = func()
+        elapsed = time.perf_counter() - t0
+        gc.enable()
+        times.append(elapsed)
+        del result
+    return {"phase": label, "best_s": min(times), "median_s": sorted(times)[len(times) // 2],
+            "times": times}
+
+
+def benchmark_model(model, repeats: int = 3) -> list[dict]:
+    """Benchmark all matrix generation phases on a linopy Model."""
+    results = []
+    matrices = model.matrices
+
+    # Phase 1: flat_vars (exercises #616 caching + #617 label vectorisation)
+    def do_flat_vars():
+        matrices.clean_cached_properties()
+        return matrices.flat_vars
+
+    results.append(time_phase(do_flat_vars, "flat_vars", repeats))
+
+    # Phase 2: flat_cons (exercises #616 caching + #617 label vectorisation)
+    def do_flat_cons():
+        matrices.clean_cached_properties()
+        return matrices.flat_cons
+
+    results.append(time_phase(do_flat_cons, "flat_cons", repeats))
+
+    # Phase 3: vlabels + clabels (vector creation from flat data)
+    # Ensure flat data is cached first
+    matrices.clean_cached_properties()
+    _ = matrices.flat_vars
+    _ = matrices.flat_cons
+
+    def do_labels():
+        return (matrices.vlabels, matrices.clabels)
+
+    results.append(time_phase(do_labels, "vlabels+clabels", repeats))
+
+    # Phase 4: A matrix (exercises #618 sparse slicing)
+    def do_A():
+        return matrices.A
+
+    results.append(time_phase(do_A, "A_matrix", repeats))
+
+    # Phase 5: full get_matrix_data pipeline (end-to-end)
+    def do_full():
+        matrices.clean_cached_properties()
+        _ = matrices.vlabels
+        _ = matrices.clabels
+        _ = matrices.lb
+        _ = matrices.ub
+        _ = matrices.A
+        _ = matrices.b
+        _ = matrices.sense
+        return True
+
+    results.append(time_phase(do_full, "full_pipeline", repeats))
+
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Solution-unpacking benchmark (#619)
+# ---------------------------------------------------------------------------
+
+def benchmark_solution_unpack(n_snapshots: int, repeats: int = 3) -> list[dict]:
+    """
+    Benchmark the solution-assignment loop in Model.solve (PR #619).
+
+    Strategy: solve once with HiGHS to get a real solution vector, then
+    re-run only the assignment loop (sol[idx] → var.solution) repeatedly
+    without re-solving, isolating the unpacking cost from solver time.
+    """
+    import xarray as xr
+
+    n = build_scigrid_network(n_snapshots)
+    n.optimize.create_model(include_objective_constant=False)
+    model = n.model
+
+    # Solve once to populate the raw solution
+    status, _ = model.solve(solver_name="highs", io_api="direct")
+    if status != "ok":
+        print(f"  WARNING: solve failed ({status}), skipping solution-unpack benchmark")
+        return []
+
+    # Reconstruct the raw solution Series (as returned by the solver):
+    # a float-indexed Series mapping variable label → solution value.
+    nan = float("nan")
+    parts = []
+    for name, var in model.variables.items():
+        if var.solution is None:
+            continue
+        labels = np.ravel(var.labels)
+        values = np.ravel(var.solution.values)
+        parts.append(pd.Series(values, index=labels.astype(float)))
+    if not parts:
+        print("  WARNING: no solution found on variables, was solve successful?")
+        return []
+    sol_series = pd.concat(parts).drop_duplicates()
+    sol_series.loc[-1] = nan
+
+    n_vars = sum(np.ravel(model.variables[name].labels).size for name in model.variables)
+    results = []
+
+    # ----- Old path (pandas label-based, pre-#619) -----
+    def unpack_pandas():
+        for name, var in model.variables.items():
+            idx = np.ravel(var.labels).astype(float)
+            try:
+                vals = sol_series[idx].values.reshape(var.labels.shape)
+            except KeyError:
+                vals = sol_series.reindex(idx).values.reshape(var.labels.shape)
+            var.solution = xr.DataArray(vals, var.coords)
+
+    results.append(time_phase(unpack_pandas, "unpack_pandas (before)", repeats))
+
+    # ----- New path (numpy dense array, #619) -----
+    def unpack_numpy():
+        sol_max_idx = int(max(sol_series.index.max(), 0))
+        sol_arr = np.full(sol_max_idx + 1, nan)
+        mask = sol_series.index >= 0
+        valid = sol_series.index[mask].astype(int)
+        sol_arr[valid] = sol_series.values[mask]
+        for name, var in model.variables.items():
+            idx = np.ravel(var.labels)
+            safe_idx = np.clip(idx, 0, sol_max_idx)
+            vals = sol_arr[safe_idx]
+            vals[idx < 0] = nan
+            var.solution = xr.DataArray(vals.reshape(var.labels.shape), var.coords)
+
+    results.append(time_phase(unpack_numpy, "unpack_numpy (after)", repeats))
+
+    for r in results:
+        r.update(model_type="scigrid_solve", size=n_snapshots, n_vars=n_vars, n_cons=0)
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Runners
+# ---------------------------------------------------------------------------
+
+QUICK_SNAPSHOTS = [24]
+FULL_SNAPSHOTS = [24, 100, 200, 500]
+SYNTHETIC_SIZES = [20, 50, 100, 200]
+
+
+def run_benchmarks(model_type: str, quick: bool, repeats: int, include_solve: bool = False) -> list[dict]:
+    """Run benchmarks across problem sizes, return flat list of results."""
+    all_results = []
+
+    if model_type in ("scigrid", "all"):
+        sizes = QUICK_SNAPSHOTS if quick else FULL_SNAPSHOTS
+        for n_snap in sizes:
+            print(f"\n{'='*60}")
+            print(f"SciGrid-DE  {n_snap} snapshots")
+            print(f"{'='*60}")
+            model = build_scigrid(n_snap)
+            n_vars = len(model.variables.flat)
+            n_cons = len(model.constraints.flat)
+            print(f"  {n_vars:,} variables, {n_cons:,} constraints")
+
+            for r in benchmark_model(model, repeats):
+                r.update(model_type="scigrid", size=n_snap,
+                         n_vars=n_vars, n_cons=n_cons)
+                all_results.append(r)
+                print(f"  {r['phase']:20s}  {r['best_s']:.4f}s  (median {r['median_s']:.4f}s)")
+
+            del model
+            gc.collect()
+
+    if include_solve:
+        # Solution-unpacking benchmark for PR #619 (SciGrid-DE only, small sizes)
+        solve_sizes = QUICK_SNAPSHOTS if quick else [24, 100]
+        for n_snap in solve_sizes:
+            print(f"\n{'='*60}")
+            print(f"SciGrid-DE solve + unpack  {n_snap} snapshots  (#619)")
+            print(f"{'='*60}")
+            for r in benchmark_solution_unpack(n_snap, repeats):
+                all_results.append(r)
+                print(f"  {r['phase']:30s}  {r['best_s']:.4f}s  (median {r['median_s']:.4f}s)")
+            gc.collect()
+
+    if model_type in ("synthetic", "all"):
+        sizes = [20, 50] if quick else SYNTHETIC_SIZES
+        for n in sizes:
+            print(f"\n{'='*60}")
+            print(f"Synthetic  N={n}  ({2*n*n} vars, {2*n*n} cons)")
+            print(f"{'='*60}")
+            model = build_synthetic(n)
+            n_vars = 2 * n * n
+            n_cons = 2 * n * n
+
+            for r in benchmark_model(model, repeats):
+                r.update(model_type="synthetic", size=n,
+                         n_vars=n_vars, n_cons=n_cons)
+                all_results.append(r)
+                print(f"  {r['phase']:20s}  {r['best_s']:.4f}s  (median {r['median_s']:.4f}s)")
+
+            del model
+            gc.collect()
+
+    return all_results
+
+
+def format_comparison(before: list[dict], after: list[dict]) -> str:
+    """Format a before/after comparison table."""
+    df_b = pd.DataFrame(before).set_index(["model_type", "size", "phase"])
+    df_a = pd.DataFrame(after).set_index(["model_type", "size", "phase"])
+    merged = df_b[["best_s"]].join(df_a[["best_s"]], lsuffix="_before", rsuffix="_after")
+    merged["speedup"] = merged["best_s_before"] / merged["best_s_after"]
+    lines = [
+        f"{'Model':>10s} {'Size':>6s} {'Phase':>20s} {'Before':>8s} {'After':>8s} {'Speedup':>8s}",
+        "-" * 70,
+    ]
+    for (mtype, size, phase), row in merged.iterrows():
+        lines.append(
+            f"{mtype:>10s} {size:>6} {phase:>20s} "
+            f"{row['best_s_before']:>7.4f}s {row['best_s_after']:>7.4f}s "
+            f"{row['speedup']:>7.2f}x"
+        )
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Benchmark linopy matrix generation (PRs #616–#619)"
+    )
+    parser.add_argument(
+        "--model", choices=["scigrid", "synthetic", "all"], default="all",
+        help="Model type to benchmark (default: all)",
+    )
+    parser.add_argument("--quick", action="store_true", help="Quick mode (smallest sizes only)")
+    parser.add_argument("--repeats", type=int, default=3, help="Timing repeats per phase (default: 3)")
+    parser.add_argument("-o", "--output", type=str, help="Save results to JSON file")
+    parser.add_argument("--label", type=str, default="", help="Label for this run")
+    parser.add_argument(
+        "--compare", nargs=2, metavar=("BEFORE", "AFTER"),
+        help="Compare two JSON result files instead of running benchmarks",
+    )
+    parser.add_argument(
+        "--include-solve", action="store_true",
+        help="Also benchmark solution unpacking (PR #619); requires HiGHS solver",
+    )
+    args = parser.parse_args()
+
+    if args.compare:
+        before = json.loads(Path(args.compare[0]).read_text())["results"]
+        after = json.loads(Path(args.compare[1]).read_text())["results"]
+        print(format_comparison(before, after))
+        return
+
+    print(f"linopy matrix generation benchmark")
+    print(f"Python {sys.version.split()[0]}, numpy {np.__version__}, "
+          f"{platform.machine()}, {platform.system()}")
+
+    results = run_benchmarks(args.model, args.quick, args.repeats, args.include_solve)
+
+    if args.output:
+        out = {
+            "label": args.label,
+            "python": sys.version.split()[0],
+            "numpy": np.__version__,
+            "platform": f"{platform.system()} {platform.machine()}",
+            "results": results,
+        }
+        Path(args.output).write_text(json.dumps(out, indent=2, default=str))
+        print(f"\nResults saved to {args.output}")
+
+    # Summary table
+    print(f"\n{'='*60}")
+    print("Summary (best times)")
+    print(f"{'='*60}")
+    df = pd.DataFrame(results)
+    for (mtype, size), group in df.groupby(["model_type", "size"]):
+        n_vars = group.iloc[0]["n_vars"]
+        n_cons = group.iloc[0]["n_cons"]
+        print(f"\n  {mtype} size={size}  ({n_vars:,} vars, {n_cons:,} cons)")
+        for _, row in group.iterrows():
+            print(f"    {row['phase']:20s}  {row['best_s']:.4f}s")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/linopy/model.py b/linopy/model.py
index cd9a2215..85a7e9a3 100644
--- a/linopy/model.py
+++ b/linopy/model.py
@@ -1585,9 +1585,7 @@ def solve(
                 safe_idx = np.clip(idx, 0, sol_max_idx)
                 vals = sol_arr[safe_idx]
                 vals[idx < 0] = nan
-                var.solution = xr.DataArray(
-                    vals.reshape(var.labels.shape), var.coords
-                )
+                var.solution = xr.DataArray(vals.reshape(var.labels.shape), var.coords)
 
             if not result.solution.dual.empty:
                 dual = result.solution.dual.copy()
@@ -1596,9 +1594,7 @@ def solve(
 
                 dual_max_idx = max(dual.index.max(), 0)
                 dual_arr = np.full(dual_max_idx + 1, nan)
-                dual_arr[dual.index[dual.index >= 0]] = dual.values[
-                    dual.index >= 0
-                ]
+                dual_arr[dual.index[dual.index >= 0]] = dual.values[dual.index >= 0]
 
                 for name, con in self.constraints.items():
                     idx = np.ravel(con.labels)

From c29ecf639bc019bd9b6bac7aec36645d943d1129 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 17 Mar 2026 21:14:40 +0000
Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 benchmark/scripts/benchmark_matrix_gen.py | 102 +++++++++++++++-------
 1 file changed, 69 insertions(+), 33 deletions(-)

diff --git a/benchmark/scripts/benchmark_matrix_gen.py b/benchmark/scripts/benchmark_matrix_gen.py
index 8fa77367..7572d36a 100644
--- a/benchmark/scripts/benchmark_matrix_gen.py
+++ b/benchmark/scripts/benchmark_matrix_gen.py
@@ -36,11 +36,11 @@
 import numpy as np
 import pandas as pd
 
-
 # ---------------------------------------------------------------------------
 # Model builders
 # ---------------------------------------------------------------------------
 
+
 def build_scigrid_network(n_snapshots: int):
     """Return a PyPSA Network (SciGrid-DE) with extended snapshots, without building the model."""
     import pypsa
@@ -58,9 +58,15 @@ def build_scigrid_network(n_snapshots: int):
             if df is not None and not df.empty:
                 tiles = int(np.ceil(n_snapshots / orig_len)) + 1
                 tiled = np.tile(df.values, (tiles, 1))[:n_snapshots]
-                setattr(component_t, attr, pd.DataFrame(
-                    tiled, index=new_snapshots, columns=df.columns,
-                ))
+                setattr(
+                    component_t,
+                    attr,
+                    pd.DataFrame(
+                        tiled,
+                        index=new_snapshots,
+                        columns=df.columns,
+                    ),
+                )
     return n
 
 
@@ -89,6 +95,7 @@ def build_synthetic(n: int):
 # Benchmark phases
 # ---------------------------------------------------------------------------
 
+
 def time_phase(func, label: str, repeats: int = 3) -> dict:
     """Time a callable, return best-of-N result."""
     times = []
@@ -101,8 +108,12 @@ def time_phase(func, label: str, repeats: int = 3) -> dict:
         gc.enable()
         times.append(elapsed)
         del result
-    return {"phase": label, "best_s": min(times), "median_s": sorted(times)[len(times) // 2],
-            "times": times}
+    return {
+        "phase": label,
+        "best_s": min(times),
+        "median_s": sorted(times)[len(times) // 2],
+        "times": times,
+    }
 
 
 def benchmark_model(model, repeats: int = 3) -> list[dict]:
@@ -162,6 +173,7 @@ def do_full():
 # Solution-unpacking benchmark (#619)
 # ---------------------------------------------------------------------------
 
+
 def benchmark_solution_unpack(n_snapshots: int, repeats: int = 3) -> list[dict]:
     """
     Benchmark the solution-assignment loop in Model.solve (PR #619).
@@ -198,7 +210,9 @@ def benchmark_solution_unpack(n_snapshots: int, repeats: int = 3) -> list[dict]:
     sol_series = pd.concat(parts).drop_duplicates()
     sol_series.loc[-1] = nan
 
-    n_vars = sum(np.ravel(model.variables[name].labels).size for name in model.variables)
+    n_vars = sum(
+        np.ravel(model.variables[name].labels).size for name in model.variables
+    )
     results = []
 
     # ----- Old path (pandas label-based, pre-#619) -----
@@ -243,26 +257,31 @@ def unpack_numpy():
 SYNTHETIC_SIZES = [20, 50, 100, 200]
 
 
-def run_benchmarks(model_type: str, quick: bool, repeats: int, include_solve: bool = False) -> list[dict]:
+def run_benchmarks(
+    model_type: str, quick: bool, repeats: int, include_solve: bool = False
+) -> list[dict]:
     """Run benchmarks across problem sizes, return flat list of results."""
     all_results = []
 
     if model_type in ("scigrid", "all"):
         sizes = QUICK_SNAPSHOTS if quick else FULL_SNAPSHOTS
         for n_snap in sizes:
-            print(f"\n{'='*60}")
+            print(f"\n{'=' * 60}")
             print(f"SciGrid-DE  {n_snap} snapshots")
-            print(f"{'='*60}")
+            print(f"{'=' * 60}")
             model = build_scigrid(n_snap)
             n_vars = len(model.variables.flat)
             n_cons = len(model.constraints.flat)
             print(f"  {n_vars:,} variables, {n_cons:,} constraints")
 
             for r in benchmark_model(model, repeats):
-                r.update(model_type="scigrid", size=n_snap,
-                         n_vars=n_vars, n_cons=n_cons)
+                r.update(
+                    model_type="scigrid", size=n_snap, n_vars=n_vars, n_cons=n_cons
+                )
                 all_results.append(r)
-                print(f"  {r['phase']:20s}  {r['best_s']:.4f}s  (median {r['median_s']:.4f}s)")
+                print(
+                    f"  {r['phase']:20s}  {r['best_s']:.4f}s  (median {r['median_s']:.4f}s)"
+                )
 
             del model
             gc.collect()
@@ -271,29 +290,32 @@ def run_benchmarks(model_type: str, quick: bool, repeats: int, include_solve: bo
         # Solution-unpacking benchmark for PR #619 (SciGrid-DE only, small sizes)
         solve_sizes = QUICK_SNAPSHOTS if quick else [24, 100]
         for n_snap in solve_sizes:
-            print(f"\n{'='*60}")
+            print(f"\n{'=' * 60}")
             print(f"SciGrid-DE solve + unpack  {n_snap} snapshots  (#619)")
-            print(f"{'='*60}")
+            print(f"{'=' * 60}")
             for r in benchmark_solution_unpack(n_snap, repeats):
                 all_results.append(r)
-                print(f"  {r['phase']:30s}  {r['best_s']:.4f}s  (median {r['median_s']:.4f}s)")
+                print(
+                    f"  {r['phase']:30s}  {r['best_s']:.4f}s  (median {r['median_s']:.4f}s)"
+                )
             gc.collect()
 
     if model_type in ("synthetic", "all"):
         sizes = [20, 50] if quick else SYNTHETIC_SIZES
         for n in sizes:
-            print(f"\n{'='*60}")
-            print(f"Synthetic  N={n}  ({2*n*n} vars, {2*n*n} cons)")
-            print(f"{'='*60}")
+            print(f"\n{'=' * 60}")
+            print(f"Synthetic  N={n}  ({2 * n * n} vars, {2 * n * n} cons)")
+            print(f"{'=' * 60}")
             model = build_synthetic(n)
             n_vars = 2 * n * n
             n_cons = 2 * n * n
 
             for r in benchmark_model(model, repeats):
-                r.update(model_type="synthetic", size=n,
-                         n_vars=n_vars, n_cons=n_cons)
+                r.update(model_type="synthetic", size=n, n_vars=n_vars, n_cons=n_cons)
                 all_results.append(r)
-                print(f"  {r['phase']:20s}  {r['best_s']:.4f}s  (median {r['median_s']:.4f}s)")
+                print(
+                    f"  {r['phase']:20s}  {r['best_s']:.4f}s  (median {r['median_s']:.4f}s)"
+                )
 
             del model
             gc.collect()
@@ -305,7 +327,9 @@ def format_comparison(before: list[dict], after: list[dict]) -> str:
     """Format a before/after comparison table."""
     df_b = pd.DataFrame(before).set_index(["model_type", "size", "phase"])
     df_a = pd.DataFrame(after).set_index(["model_type", "size", "phase"])
-    merged = df_b[["best_s"]].join(df_a[["best_s"]], lsuffix="_before", rsuffix="_after")
+    merged = df_b[["best_s"]].join(
+        df_a[["best_s"]], lsuffix="_before", rsuffix="_after"
+    )
     merged["speedup"] = merged["best_s_before"] / merged["best_s_after"]
     lines = [
         f"{'Model':>10s} {'Size':>6s} {'Phase':>20s} {'Before':>8s} {'After':>8s} {'Speedup':>8s}",
@@ -324,24 +348,34 @@ def format_comparison(before: list[dict], after: list[dict]) -> str:
 # CLI
 # ---------------------------------------------------------------------------
 
+
 def main():
     parser = argparse.ArgumentParser(
         description="Benchmark linopy matrix generation (PRs #616–#619)"
     )
     parser.add_argument(
-        "--model", choices=["scigrid", "synthetic", "all"], default="all",
+        "--model",
+        choices=["scigrid", "synthetic", "all"],
+        default="all",
         help="Model type to benchmark (default: all)",
     )
-    parser.add_argument("--quick", action="store_true", help="Quick mode (smallest sizes only)")
-    parser.add_argument("--repeats", type=int, default=3, help="Timing repeats per phase (default: 3)")
+    parser.add_argument(
+        "--quick", action="store_true", help="Quick mode (smallest sizes only)"
+    )
+    parser.add_argument(
+        "--repeats", type=int, default=3, help="Timing repeats per phase (default: 3)"
+    )
     parser.add_argument("-o", "--output", type=str, help="Save results to JSON file")
     parser.add_argument("--label", type=str, default="", help="Label for this run")
     parser.add_argument(
-        "--compare", nargs=2, metavar=("BEFORE", "AFTER"),
+        "--compare",
+        nargs=2,
+        metavar=("BEFORE", "AFTER"),
         help="Compare two JSON result files instead of running benchmarks",
     )
     parser.add_argument(
-        "--include-solve", action="store_true",
+        "--include-solve",
+        action="store_true",
         help="Also benchmark solution unpacking (PR #619); requires HiGHS solver",
     )
     args = parser.parse_args()
@@ -352,9 +386,11 @@ def main():
         print(format_comparison(before, after))
         return
 
-    print(f"linopy matrix generation benchmark")
-    print(f"Python {sys.version.split()[0]}, numpy {np.__version__}, "
-          f"{platform.machine()}, {platform.system()}")
+    print("linopy matrix generation benchmark")
+    print(
+        f"Python {sys.version.split()[0]}, numpy {np.__version__}, "
+        f"{platform.machine()}, {platform.system()}"
+    )
 
     results = run_benchmarks(args.model, args.quick, args.repeats, args.include_solve)
 
@@ -370,9 +406,9 @@ def main():
         print(f"\nResults saved to {args.output}")
 
     # Summary table
-    print(f"\n{'='*60}")
+    print(f"\n{'=' * 60}")
     print("Summary (best times)")
-    print(f"{'='*60}")
+    print(f"{'=' * 60}")
     df = pd.DataFrame(results)
     for (mtype, size), group in df.groupby(["model_type", "size"]):
         n_vars = group.iloc[0]["n_vars"]

From 7529977220fa2633eef1c1013b1da1a9ba1bc14f Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Wed, 18 Mar 2026 09:05:32 +0100
Subject: [PATCH 4/4] Delete benchmark/scripts/benchmark_matrix_gen.py

---
 benchmark/scripts/benchmark_matrix_gen.py | 422 ----------------------
 1 file changed, 422 deletions(-)
 delete mode 100644 benchmark/scripts/benchmark_matrix_gen.py

diff --git a/benchmark/scripts/benchmark_matrix_gen.py b/benchmark/scripts/benchmark_matrix_gen.py
deleted file mode 100644
index 7572d36a..00000000
--- a/benchmark/scripts/benchmark_matrix_gen.py
+++ /dev/null
@@ -1,422 +0,0 @@
-#!/usr/bin/env python3
-"""
-Benchmark script for linopy matrix generation and solution-unpacking performance.
-
-Covers the code paths optimised by PRs #616–#619:
-  - #616  cached_property on MatrixAccessor (flat_vars / flat_cons)
-  - #617  np.char.add for label string concatenation
-  - #618  sparse matrix slicing in MatrixAccessor.A
-  - #619  numpy solution unpacking in Model.solve
-
-Usage
------
-    # Quick run (24 snapshots only):
-    python benchmark/scripts/benchmark_matrix_gen.py --quick
-
-    # Full matrix-generation sweep with JSON output:
-    python benchmark/scripts/benchmark_matrix_gen.py -o results.json --label "after-PR-616"
-
-    # Include solution-unpacking benchmark (requires HiGHS solver, #619):
-    python benchmark/scripts/benchmark_matrix_gen.py --include-solve -o results.json
-
-    # Compare two runs:
-    python benchmark/scripts/benchmark_matrix_gen.py --compare before.json after.json
-"""
-
-from __future__ import annotations
-
-import argparse
-import gc
-import json
-import platform
-import sys
-import time
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-
-# ---------------------------------------------------------------------------
-# Model builders
-# ---------------------------------------------------------------------------
-
-
-def build_scigrid_network(n_snapshots: int):
-    """Return a PyPSA Network (SciGrid-DE) with extended snapshots, without building the model."""
-    import pypsa
-
-    n = pypsa.examples.scigrid_de()
-    orig_snapshots = n.snapshots
-    orig_len = len(orig_snapshots)
-
-    new_snapshots = pd.date_range(orig_snapshots[0], periods=n_snapshots, freq="h")
-    n.set_snapshots(new_snapshots)
-
-    for component_t in (n.generators_t, n.loads_t, n.storage_units_t):
-        for attr in list(component_t):
-            df = getattr(component_t, attr)
-            if df is not None and not df.empty:
-                tiles = int(np.ceil(n_snapshots / orig_len)) + 1
-                tiled = np.tile(df.values, (tiles, 1))[:n_snapshots]
-                setattr(
-                    component_t,
-                    attr,
-                    pd.DataFrame(
-                        tiled,
-                        index=new_snapshots,
-                        columns=df.columns,
-                    ),
-                )
-    return n
-
-
-def build_scigrid(n_snapshots: int):
-    """Return a linopy Model from PyPSA SciGrid-DE with extended snapshots."""
-    n = build_scigrid_network(n_snapshots)
-    n.optimize.create_model(include_objective_constant=False)
-    return n.model
-
-
-def build_synthetic(n: int):
-    """Return a linopy Model with 2×N² variables and 2×N² constraints."""
-    from linopy import Model
-
-    m = Model()
-    N = np.arange(n)
-    x = m.add_variables(coords=[N, N], name="x")
-    y = m.add_variables(coords=[N, N], name="y")
-    m.add_constraints(x - y >= N, name="lower")
-    m.add_constraints(x + y >= 0, name="upper")
-    m.add_objective(2 * x.sum() + y.sum())
-    return m
-
-
-# ---------------------------------------------------------------------------
-# Benchmark phases
-# ---------------------------------------------------------------------------
-
-
-def time_phase(func, label: str, repeats: int = 3) -> dict:
-    """Time a callable, return best-of-N result."""
-    times = []
-    for _ in range(repeats):
-        gc.collect()
-        gc.disable()
-        t0 = time.perf_counter()
-        result = func()
-        elapsed = time.perf_counter() - t0
-        gc.enable()
-        times.append(elapsed)
-        del result
-    return {
-        "phase": label,
-        "best_s": min(times),
-        "median_s": sorted(times)[len(times) // 2],
-        "times": times,
-    }
-
-
-def benchmark_model(model, repeats: int = 3) -> list[dict]:
-    """Benchmark all matrix generation phases on a linopy Model."""
-    results = []
-    matrices = model.matrices
-
-    # Phase 1: flat_vars (exercises #616 caching + #617 label vectorisation)
-    def do_flat_vars():
-        matrices.clean_cached_properties()
-        return matrices.flat_vars
-
-    results.append(time_phase(do_flat_vars, "flat_vars", repeats))
-
-    # Phase 2: flat_cons (exercises #616 caching + #617 label vectorisation)
-    def do_flat_cons():
-        matrices.clean_cached_properties()
-        return matrices.flat_cons
-
-    results.append(time_phase(do_flat_cons, "flat_cons", repeats))
-
-    # Phase 3: vlabels + clabels (vector creation from flat data)
-    # Ensure flat data is cached first
-    matrices.clean_cached_properties()
-    _ = matrices.flat_vars
-    _ = matrices.flat_cons
-
-    def do_labels():
-        return (matrices.vlabels, matrices.clabels)
-
-    results.append(time_phase(do_labels, "vlabels+clabels", repeats))
-
-    # Phase 4: A matrix (exercises #618 sparse slicing)
-    def do_A():
-        return matrices.A
-
-    results.append(time_phase(do_A, "A_matrix", repeats))
-
-    # Phase 5: full get_matrix_data pipeline (end-to-end)
-    def do_full():
-        matrices.clean_cached_properties()
-        _ = matrices.vlabels
-        _ = matrices.clabels
-        _ = matrices.lb
-        _ = matrices.ub
-        _ = matrices.A
-        _ = matrices.b
-        _ = matrices.sense
-        return True
-
-    results.append(time_phase(do_full, "full_pipeline", repeats))
-
-    return results
-
-
-# ---------------------------------------------------------------------------
-# Solution-unpacking benchmark (#619)
-# ---------------------------------------------------------------------------
-
-
-def benchmark_solution_unpack(n_snapshots: int, repeats: int = 3) -> list[dict]:
-    """
-    Benchmark the solution-assignment loop in Model.solve (PR #619).
-
-    Strategy: solve once with HiGHS to get a real solution vector, then
-    re-run only the assignment loop (sol[idx] → var.solution) repeatedly
-    without re-solving, isolating the unpacking cost from solver time.
-    """
-    import xarray as xr
-
-    n = build_scigrid_network(n_snapshots)
-    n.optimize.create_model(include_objective_constant=False)
-    model = n.model
-
-    # Solve once to populate the raw solution
-    status, _ = model.solve(solver_name="highs", io_api="direct")
-    if status != "ok":
-        print(f"  WARNING: solve failed ({status}), skipping solution-unpack benchmark")
-        return []
-
-    # Reconstruct the raw solution Series (as returned by the solver):
-    # a float-indexed Series mapping variable label → solution value.
-    nan = float("nan")
-    parts = []
-    for name, var in model.variables.items():
-        if var.solution is None:
-            continue
-        labels = np.ravel(var.labels)
-        values = np.ravel(var.solution.values)
-        parts.append(pd.Series(values, index=labels.astype(float)))
-    if not parts:
-        print("  WARNING: no solution found on variables, was solve successful?")
-        return []
-    sol_series = pd.concat(parts).drop_duplicates()
-    sol_series.loc[-1] = nan
-
-    n_vars = sum(
-        np.ravel(model.variables[name].labels).size for name in model.variables
-    )
-    results = []
-
-    # ----- Old path (pandas label-based, pre-#619) -----
-    def unpack_pandas():
-        for name, var in model.variables.items():
-            idx = np.ravel(var.labels).astype(float)
-            try:
-                vals = sol_series[idx].values.reshape(var.labels.shape)
-            except KeyError:
-                vals = sol_series.reindex(idx).values.reshape(var.labels.shape)
-            var.solution = xr.DataArray(vals, var.coords)
-
-    results.append(time_phase(unpack_pandas, "unpack_pandas (before)", repeats))
-
-    # ----- New path (numpy dense array, #619) -----
-    def unpack_numpy():
-        sol_max_idx = int(max(sol_series.index.max(), 0))
-        sol_arr = np.full(sol_max_idx + 1, nan)
-        mask = sol_series.index >= 0
-        valid = sol_series.index[mask].astype(int)
-        sol_arr[valid] = sol_series.values[mask]
-        for name, var in model.variables.items():
-            idx = np.ravel(var.labels)
-            safe_idx = np.clip(idx, 0, sol_max_idx)
-            vals = sol_arr[safe_idx]
-            vals[idx < 0] = nan
-            var.solution = xr.DataArray(vals.reshape(var.labels.shape), var.coords)
-
-    results.append(time_phase(unpack_numpy, "unpack_numpy (after)", repeats))
-
-    for r in results:
-        r.update(model_type="scigrid_solve", size=n_snapshots, n_vars=n_vars, n_cons=0)
-    return results
-
-
-# ---------------------------------------------------------------------------
-# Runners
-# ---------------------------------------------------------------------------
-
-QUICK_SNAPSHOTS = [24]
-FULL_SNAPSHOTS = [24, 100, 200, 500]
-SYNTHETIC_SIZES = [20, 50, 100, 200]
-
-
-def run_benchmarks(
-    model_type: str, quick: bool, repeats: int, include_solve: bool = False
-) -> list[dict]:
-    """Run benchmarks across problem sizes, return flat list of results."""
-    all_results = []
-
-    if model_type in ("scigrid", "all"):
-        sizes = QUICK_SNAPSHOTS if quick else FULL_SNAPSHOTS
-        for n_snap in sizes:
-            print(f"\n{'=' * 60}")
-            print(f"SciGrid-DE  {n_snap} snapshots")
-            print(f"{'=' * 60}")
-            model = build_scigrid(n_snap)
-            n_vars = len(model.variables.flat)
-            n_cons = len(model.constraints.flat)
-            print(f"  {n_vars:,} variables, {n_cons:,} constraints")
-
-            for r in benchmark_model(model, repeats):
-                r.update(
-                    model_type="scigrid", size=n_snap, n_vars=n_vars, n_cons=n_cons
-                )
-                all_results.append(r)
-                print(
-                    f"  {r['phase']:20s}  {r['best_s']:.4f}s  (median {r['median_s']:.4f}s)"
-                )
-
-            del model
-            gc.collect()
-
-    if include_solve:
-        # Solution-unpacking benchmark for PR #619 (SciGrid-DE only, small sizes)
-        solve_sizes = QUICK_SNAPSHOTS if quick else [24, 100]
-        for n_snap in solve_sizes:
-            print(f"\n{'=' * 60}")
-            print(f"SciGrid-DE solve + unpack  {n_snap} snapshots  (#619)")
-            print(f"{'=' * 60}")
-            for r in benchmark_solution_unpack(n_snap, repeats):
-                all_results.append(r)
-                print(
-                    f"  {r['phase']:30s}  {r['best_s']:.4f}s  (median {r['median_s']:.4f}s)"
-                )
-            gc.collect()
-
-    if model_type in ("synthetic", "all"):
-        sizes = [20, 50] if quick else SYNTHETIC_SIZES
-        for n in sizes:
-            print(f"\n{'=' * 60}")
-            print(f"Synthetic  N={n}  ({2 * n * n} vars, {2 * n * n} cons)")
-            print(f"{'=' * 60}")
-            model = build_synthetic(n)
-            n_vars = 2 * n * n
-            n_cons = 2 * n * n
-
-            for r in benchmark_model(model, repeats):
-                r.update(model_type="synthetic", size=n, n_vars=n_vars, n_cons=n_cons)
-                all_results.append(r)
-                print(
-                    f"  {r['phase']:20s}  {r['best_s']:.4f}s  (median {r['median_s']:.4f}s)"
-                )
-
-            del model
-            gc.collect()
-
-    return all_results
-
-
-def format_comparison(before: list[dict], after: list[dict]) -> str:
-    """Format a before/after comparison table."""
-    df_b = pd.DataFrame(before).set_index(["model_type", "size", "phase"])
-    df_a = pd.DataFrame(after).set_index(["model_type", "size", "phase"])
-    merged = df_b[["best_s"]].join(
-        df_a[["best_s"]], lsuffix="_before", rsuffix="_after"
-    )
-    merged["speedup"] = merged["best_s_before"] / merged["best_s_after"]
-    lines = [
-        f"{'Model':>10s} {'Size':>6s} {'Phase':>20s} {'Before':>8s} {'After':>8s} {'Speedup':>8s}",
-        "-" * 70,
-    ]
-    for (mtype, size, phase), row in merged.iterrows():
-        lines.append(
-            f"{mtype:>10s} {size:>6} {phase:>20s} "
-            f"{row['best_s_before']:>7.4f}s {row['best_s_after']:>7.4f}s "
-            f"{row['speedup']:>7.2f}x"
-        )
-    return "\n".join(lines)
-
-
-# ---------------------------------------------------------------------------
-# CLI
-# ---------------------------------------------------------------------------
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Benchmark linopy matrix generation (PRs #616–#619)"
-    )
-    parser.add_argument(
-        "--model",
-        choices=["scigrid", "synthetic", "all"],
-        default="all",
-        help="Model type to benchmark (default: all)",
-    )
-    parser.add_argument(
-        "--quick", action="store_true", help="Quick mode (smallest sizes only)"
-    )
-    parser.add_argument(
-        "--repeats", type=int, default=3, help="Timing repeats per phase (default: 3)"
-    )
-    parser.add_argument("-o", "--output", type=str, help="Save results to JSON file")
-    parser.add_argument("--label", type=str, default="", help="Label for this run")
-    parser.add_argument(
-        "--compare",
-        nargs=2,
-        metavar=("BEFORE", "AFTER"),
-        help="Compare two JSON result files instead of running benchmarks",
-    )
-    parser.add_argument(
-        "--include-solve",
-        action="store_true",
-        help="Also benchmark solution unpacking (PR #619); requires HiGHS solver",
-    )
-    args = parser.parse_args()
-
-    if args.compare:
-        before = json.loads(Path(args.compare[0]).read_text())["results"]
-        after = json.loads(Path(args.compare[1]).read_text())["results"]
-        print(format_comparison(before, after))
-        return
-
-    print("linopy matrix generation benchmark")
-    print(
-        f"Python {sys.version.split()[0]}, numpy {np.__version__}, "
-        f"{platform.machine()}, {platform.system()}"
-    )
-
-    results = run_benchmarks(args.model, args.quick, args.repeats, args.include_solve)
-
-    if args.output:
-        out = {
-            "label": args.label,
-            "python": sys.version.split()[0],
-            "numpy": np.__version__,
-            "platform": f"{platform.system()} {platform.machine()}",
-            "results": results,
-        }
-        Path(args.output).write_text(json.dumps(out, indent=2, default=str))
-        print(f"\nResults saved to {args.output}")
-
-    # Summary table
-    print(f"\n{'=' * 60}")
-    print("Summary (best times)")
-    print(f"{'=' * 60}")
-    df = pd.DataFrame(results)
-    for (mtype, size), group in df.groupby(["model_type", "size"]):
-        n_vars = group.iloc[0]["n_vars"]
-        n_cons = group.iloc[0]["n_cons"]
-        print(f"\n  {mtype} size={size}  ({n_vars:,} vars, {n_cons:,} cons)")
-        for _, row in group.iterrows():
-            print(f"    {row['phase']:20s}  {row['best_s']:.4f}s")
-
-
-if __name__ == "__main__":
-    main()