From ec6ad9e379c7e9d8f35fcfceeb6a0a2d8fbeea21 Mon Sep 17 00:00:00 2001 From: Mayk Thewessen Date: Fri, 13 Mar 2026 22:23:58 +0100 Subject: [PATCH 1/4] perf: use numpy array lookup for solution unpacking Convert the primal/dual pandas Series to a dense numpy lookup array before the per-variable/per-constraint unpacking loop. This replaces pandas indexing (sol[idx].values) with direct numpy array indexing (sol_arr[idx]), avoiding pandas overhead per variable type. The loop over variable/constraint types still exists (needed to set each variable's .solution xr.DataArray), but the inner indexing operation is now pure numpy instead of pandas Series.__getitem__. Co-Authored-By: Claude Opus 4.6 --- linopy/model.py | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/linopy/model.py b/linopy/model.py index 54334411..cd9a2215 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -1572,26 +1572,42 @@ def solve( sol = set_int_index(sol) sol.loc[-1] = nan + # Convert to numpy array for fast label-based lookup. + # Labels are integers; build a dense lookup array so each + # variable can index directly without pandas overhead. + sol_max_idx = max(sol.index.max(), 0) + sol_arr = np.full(sol_max_idx + 1, nan) + sol_arr[sol.index[sol.index >= 0]] = sol.values[sol.index >= 0] + for name, var in self.variables.items(): idx = np.ravel(var.labels) - try: - vals = sol[idx].values.reshape(var.labels.shape) - except KeyError: - vals = sol.reindex(idx).values.reshape(var.labels.shape) - var.solution = xr.DataArray(vals, var.coords) + # Use numpy indexing: labels ≥ 0 map to sol_arr, -1 maps to NaN + safe_idx = np.clip(idx, 0, sol_max_idx) + vals = sol_arr[safe_idx] + vals[idx < 0] = nan + var.solution = xr.DataArray( + vals.reshape(var.labels.shape), var.coords + ) if not result.solution.dual.empty: dual = result.solution.dual.copy() dual = set_int_index(dual) dual.loc[-1] = nan + dual_max_idx = max(dual.index.max(), 0) + dual_arr = np.full(dual_max_idx + 1, nan) + dual_arr[dual.index[dual.index >= 0]] = dual.values[ + dual.index >= 0 + ] + for name, con in self.constraints.items(): idx = np.ravel(con.labels) - try: - vals = dual[idx].values.reshape(con.labels.shape) - except KeyError: - vals = dual.reindex(idx).values.reshape(con.labels.shape) - con.dual = xr.DataArray(vals, con.labels.coords) + safe_idx = np.clip(idx, 0, dual_max_idx) + vals = dual_arr[safe_idx] + vals[idx < 0] = nan + con.dual = xr.DataArray( + vals.reshape(con.labels.shape), con.labels.coords + ) return result.status.status.value, result.status.termination_condition.value finally: From 0e9abcf0000dda8da11a3a08d22a1e96d9560502 Mon Sep 17 00:00:00 2001 From: Mayk Thewessen Date: Tue, 17 Mar 2026 21:57:11 +0100 Subject: [PATCH 2/4] =?UTF-8?q?Add=20reproducible=20benchmark=20script=20f?= =?UTF-8?q?or=20PRs=20#616=E2=80=93#619?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds benchmark/scripts/benchmark_matrix_gen.py covering all four performance code paths: - #616 cached_property on MatrixAccessor (flat_vars / flat_cons) - #617 np.char.add label string concatenation - #618 single-step sparse matrix slicing - #619 numpy dense-array solution unpacking Reproduce with: python benchmark/scripts/benchmark_matrix_gen.py -o results.json python benchmark/scripts/benchmark_matrix_gen.py --include-solve # PR #619 path python benchmark/scripts/benchmark_matrix_gen.py --compare before.json after.json Co-Authored-By: Claude Sonnet 4.6 --- benchmark/scripts/benchmark_matrix_gen.py | 386 ++++++++++++++++++++++ linopy/model.py | 8 +- 2 files changed, 388 insertions(+), 6 deletions(-) create mode 100644 benchmark/scripts/benchmark_matrix_gen.py diff --git a/benchmark/scripts/benchmark_matrix_gen.py b/benchmark/scripts/benchmark_matrix_gen.py new file mode 100644 index 00000000..8fa77367 --- /dev/null +++ b/benchmark/scripts/benchmark_matrix_gen.py @@ -0,0 +1,386 @@ +#!/usr/bin/env python3 +""" +Benchmark script for linopy matrix generation and solution-unpacking performance. + +Covers the code paths optimised by PRs #616–#619: + - #616 cached_property on MatrixAccessor (flat_vars / flat_cons) + - #617 np.char.add for label string concatenation + - #618 sparse matrix slicing in MatrixAccessor.A + - #619 numpy solution unpacking in Model.solve + +Usage +----- + # Quick run (24 snapshots only): + python benchmark/scripts/benchmark_matrix_gen.py --quick + + # Full matrix-generation sweep with JSON output: + python benchmark/scripts/benchmark_matrix_gen.py -o results.json --label "after-PR-616" + + # Include solution-unpacking benchmark (requires HiGHS solver, #619): + python benchmark/scripts/benchmark_matrix_gen.py --include-solve -o results.json + + # Compare two runs: + python benchmark/scripts/benchmark_matrix_gen.py --compare before.json after.json +""" + +from __future__ import annotations + +import argparse +import gc +import json +import platform +import sys +import time +from pathlib import Path + +import numpy as np +import pandas as pd + + +# --------------------------------------------------------------------------- +# Model builders +# --------------------------------------------------------------------------- + +def build_scigrid_network(n_snapshots: int): + """Return a PyPSA Network (SciGrid-DE) with extended snapshots, without building the model.""" + import pypsa + + n = pypsa.examples.scigrid_de() + orig_snapshots = n.snapshots + orig_len = len(orig_snapshots) + + new_snapshots = pd.date_range(orig_snapshots[0], periods=n_snapshots, freq="h") + n.set_snapshots(new_snapshots) + + for component_t in (n.generators_t, n.loads_t, n.storage_units_t): + for attr in list(component_t): + df = getattr(component_t, attr) + if df is not None and not df.empty: + tiles = int(np.ceil(n_snapshots / orig_len)) + 1 + tiled = np.tile(df.values, (tiles, 1))[:n_snapshots] + setattr(component_t, attr, pd.DataFrame( + tiled, index=new_snapshots, columns=df.columns, + )) + return n + + +def build_scigrid(n_snapshots: int): + """Return a linopy Model from PyPSA SciGrid-DE with extended snapshots.""" + n = build_scigrid_network(n_snapshots) + n.optimize.create_model(include_objective_constant=False) + return n.model + + +def build_synthetic(n: int): + """Return a linopy Model with 2×N² variables and 2×N² constraints.""" + from linopy import Model + + m = Model() + N = np.arange(n) + x = m.add_variables(coords=[N, N], name="x") + y = m.add_variables(coords=[N, N], name="y") + m.add_constraints(x - y >= N, name="lower") + m.add_constraints(x + y >= 0, name="upper") + m.add_objective(2 * x.sum() + y.sum()) + return m + + +# --------------------------------------------------------------------------- +# Benchmark phases +# --------------------------------------------------------------------------- + +def time_phase(func, label: str, repeats: int = 3) -> dict: + """Time a callable, return best-of-N result.""" + times = [] + for _ in range(repeats): + gc.collect() + gc.disable() + t0 = time.perf_counter() + result = func() + elapsed = time.perf_counter() - t0 + gc.enable() + times.append(elapsed) + del result + return {"phase": label, "best_s": min(times), "median_s": sorted(times)[len(times) // 2], + "times": times} + + +def benchmark_model(model, repeats: int = 3) -> list[dict]: + """Benchmark all matrix generation phases on a linopy Model.""" + results = [] + matrices = model.matrices + + # Phase 1: flat_vars (exercises #616 caching + #617 label vectorisation) + def do_flat_vars(): + matrices.clean_cached_properties() + return matrices.flat_vars + + results.append(time_phase(do_flat_vars, "flat_vars", repeats)) + + # Phase 2: flat_cons (exercises #616 caching + #617 label vectorisation) + def do_flat_cons(): + matrices.clean_cached_properties() + return matrices.flat_cons + + results.append(time_phase(do_flat_cons, "flat_cons", repeats)) + + # Phase 3: vlabels + clabels (vector creation from flat data) + # Ensure flat data is cached first + matrices.clean_cached_properties() + _ = matrices.flat_vars + _ = matrices.flat_cons + + def do_labels(): + return (matrices.vlabels, matrices.clabels) + + results.append(time_phase(do_labels, "vlabels+clabels", repeats)) + + # Phase 4: A matrix (exercises #618 sparse slicing) + def do_A(): + return matrices.A + + results.append(time_phase(do_A, "A_matrix", repeats)) + + # Phase 5: full get_matrix_data pipeline (end-to-end) + def do_full(): + matrices.clean_cached_properties() + _ = matrices.vlabels + _ = matrices.clabels + _ = matrices.lb + _ = matrices.ub + _ = matrices.A + _ = matrices.b + _ = matrices.sense + return True + + results.append(time_phase(do_full, "full_pipeline", repeats)) + + return results + + +# --------------------------------------------------------------------------- +# Solution-unpacking benchmark (#619) +# --------------------------------------------------------------------------- + +def benchmark_solution_unpack(n_snapshots: int, repeats: int = 3) -> list[dict]: + """ + Benchmark the solution-assignment loop in Model.solve (PR #619). + + Strategy: solve once with HiGHS to get a real solution vector, then + re-run only the assignment loop (sol[idx] → var.solution) repeatedly + without re-solving, isolating the unpacking cost from solver time. + """ + import xarray as xr + + n = build_scigrid_network(n_snapshots) + n.optimize.create_model(include_objective_constant=False) + model = n.model + + # Solve once to populate the raw solution + status, _ = model.solve(solver_name="highs", io_api="direct") + if status != "ok": + print(f" WARNING: solve failed ({status}), skipping solution-unpack benchmark") + return [] + + # Reconstruct the raw solution Series (as returned by the solver): + # a float-indexed Series mapping variable label → solution value. + nan = float("nan") + parts = [] + for name, var in model.variables.items(): + if var.solution is None: + continue + labels = np.ravel(var.labels) + values = np.ravel(var.solution.values) + parts.append(pd.Series(values, index=labels.astype(float))) + if not parts: + print(" WARNING: no solution found on variables, was solve successful?") + return [] + sol_series = pd.concat(parts).drop_duplicates() + sol_series.loc[-1] = nan + + n_vars = sum(np.ravel(model.variables[name].labels).size for name in model.variables) + results = [] + + # ----- Old path (pandas label-based, pre-#619) ----- + def unpack_pandas(): + for name, var in model.variables.items(): + idx = np.ravel(var.labels).astype(float) + try: + vals = sol_series[idx].values.reshape(var.labels.shape) + except KeyError: + vals = sol_series.reindex(idx).values.reshape(var.labels.shape) + var.solution = xr.DataArray(vals, var.coords) + + results.append(time_phase(unpack_pandas, "unpack_pandas (before)", repeats)) + + # ----- New path (numpy dense array, #619) ----- + def unpack_numpy(): + sol_max_idx = int(max(sol_series.index.max(), 0)) + sol_arr = np.full(sol_max_idx + 1, nan) + mask = sol_series.index >= 0 + valid = sol_series.index[mask].astype(int) + sol_arr[valid] = sol_series.values[mask] + for name, var in model.variables.items(): + idx = np.ravel(var.labels) + safe_idx = np.clip(idx, 0, sol_max_idx) + vals = sol_arr[safe_idx] + vals[idx < 0] = nan + var.solution = xr.DataArray(vals.reshape(var.labels.shape), var.coords) + + results.append(time_phase(unpack_numpy, "unpack_numpy (after)", repeats)) + + for r in results: + r.update(model_type="scigrid_solve", size=n_snapshots, n_vars=n_vars, n_cons=0) + return results + + +# --------------------------------------------------------------------------- +# Runners +# --------------------------------------------------------------------------- + +QUICK_SNAPSHOTS = [24] +FULL_SNAPSHOTS = [24, 100, 200, 500] +SYNTHETIC_SIZES = [20, 50, 100, 200] + + +def run_benchmarks(model_type: str, quick: bool, repeats: int, include_solve: bool = False) -> list[dict]: + """Run benchmarks across problem sizes, return flat list of results.""" + all_results = [] + + if model_type in ("scigrid", "all"): + sizes = QUICK_SNAPSHOTS if quick else FULL_SNAPSHOTS + for n_snap in sizes: + print(f"\n{'='*60}") + print(f"SciGrid-DE {n_snap} snapshots") + print(f"{'='*60}") + model = build_scigrid(n_snap) + n_vars = len(model.variables.flat) + n_cons = len(model.constraints.flat) + print(f" {n_vars:,} variables, {n_cons:,} constraints") + + for r in benchmark_model(model, repeats): + r.update(model_type="scigrid", size=n_snap, + n_vars=n_vars, n_cons=n_cons) + all_results.append(r) + print(f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)") + + del model + gc.collect() + + if include_solve: + # Solution-unpacking benchmark for PR #619 (SciGrid-DE only, small sizes) + solve_sizes = QUICK_SNAPSHOTS if quick else [24, 100] + for n_snap in solve_sizes: + print(f"\n{'='*60}") + print(f"SciGrid-DE solve + unpack {n_snap} snapshots (#619)") + print(f"{'='*60}") + for r in benchmark_solution_unpack(n_snap, repeats): + all_results.append(r) + print(f" {r['phase']:30s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)") + gc.collect() + + if model_type in ("synthetic", "all"): + sizes = [20, 50] if quick else SYNTHETIC_SIZES + for n in sizes: + print(f"\n{'='*60}") + print(f"Synthetic N={n} ({2*n*n} vars, {2*n*n} cons)") + print(f"{'='*60}") + model = build_synthetic(n) + n_vars = 2 * n * n + n_cons = 2 * n * n + + for r in benchmark_model(model, repeats): + r.update(model_type="synthetic", size=n, + n_vars=n_vars, n_cons=n_cons) + all_results.append(r) + print(f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)") + + del model + gc.collect() + + return all_results + + +def format_comparison(before: list[dict], after: list[dict]) -> str: + """Format a before/after comparison table.""" + df_b = pd.DataFrame(before).set_index(["model_type", "size", "phase"]) + df_a = pd.DataFrame(after).set_index(["model_type", "size", "phase"]) + merged = df_b[["best_s"]].join(df_a[["best_s"]], lsuffix="_before", rsuffix="_after") + merged["speedup"] = merged["best_s_before"] / merged["best_s_after"] + lines = [ + f"{'Model':>10s} {'Size':>6s} {'Phase':>20s} {'Before':>8s} {'After':>8s} {'Speedup':>8s}", + "-" * 70, + ] + for (mtype, size, phase), row in merged.iterrows(): + lines.append( + f"{mtype:>10s} {size:>6} {phase:>20s} " + f"{row['best_s_before']:>7.4f}s {row['best_s_after']:>7.4f}s " + f"{row['speedup']:>7.2f}x" + ) + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + description="Benchmark linopy matrix generation (PRs #616–#619)" + ) + parser.add_argument( + "--model", choices=["scigrid", "synthetic", "all"], default="all", + help="Model type to benchmark (default: all)", + ) + parser.add_argument("--quick", action="store_true", help="Quick mode (smallest sizes only)") + parser.add_argument("--repeats", type=int, default=3, help="Timing repeats per phase (default: 3)") + parser.add_argument("-o", "--output", type=str, help="Save results to JSON file") + parser.add_argument("--label", type=str, default="", help="Label for this run") + parser.add_argument( + "--compare", nargs=2, metavar=("BEFORE", "AFTER"), + help="Compare two JSON result files instead of running benchmarks", + ) + parser.add_argument( + "--include-solve", action="store_true", + help="Also benchmark solution unpacking (PR #619); requires HiGHS solver", + ) + args = parser.parse_args() + + if args.compare: + before = json.loads(Path(args.compare[0]).read_text())["results"] + after = json.loads(Path(args.compare[1]).read_text())["results"] + print(format_comparison(before, after)) + return + + print(f"linopy matrix generation benchmark") + print(f"Python {sys.version.split()[0]}, numpy {np.__version__}, " + f"{platform.machine()}, {platform.system()}") + + results = run_benchmarks(args.model, args.quick, args.repeats, args.include_solve) + + if args.output: + out = { + "label": args.label, + "python": sys.version.split()[0], + "numpy": np.__version__, + "platform": f"{platform.system()} {platform.machine()}", + "results": results, + } + Path(args.output).write_text(json.dumps(out, indent=2, default=str)) + print(f"\nResults saved to {args.output}") + + # Summary table + print(f"\n{'='*60}") + print("Summary (best times)") + print(f"{'='*60}") + df = pd.DataFrame(results) + for (mtype, size), group in df.groupby(["model_type", "size"]): + n_vars = group.iloc[0]["n_vars"] + n_cons = group.iloc[0]["n_cons"] + print(f"\n {mtype} size={size} ({n_vars:,} vars, {n_cons:,} cons)") + for _, row in group.iterrows(): + print(f" {row['phase']:20s} {row['best_s']:.4f}s") + + +if __name__ == "__main__": + main() diff --git a/linopy/model.py b/linopy/model.py index cd9a2215..85a7e9a3 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -1585,9 +1585,7 @@ def solve( safe_idx = np.clip(idx, 0, sol_max_idx) vals = sol_arr[safe_idx] vals[idx < 0] = nan - var.solution = xr.DataArray( - vals.reshape(var.labels.shape), var.coords - ) + var.solution = xr.DataArray(vals.reshape(var.labels.shape), var.coords) if not result.solution.dual.empty: dual = result.solution.dual.copy() @@ -1596,9 +1594,7 @@ def solve( dual_max_idx = max(dual.index.max(), 0) dual_arr = np.full(dual_max_idx + 1, nan) - dual_arr[dual.index[dual.index >= 0]] = dual.values[ - dual.index >= 0 - ] + dual_arr[dual.index[dual.index >= 0]] = dual.values[dual.index >= 0] for name, con in self.constraints.items(): idx = np.ravel(con.labels) From c29ecf639bc019bd9b6bac7aec36645d943d1129 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Mar 2026 21:14:40 +0000 Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- benchmark/scripts/benchmark_matrix_gen.py | 102 +++++++++++++++------- 1 file changed, 69 insertions(+), 33 deletions(-) diff --git a/benchmark/scripts/benchmark_matrix_gen.py b/benchmark/scripts/benchmark_matrix_gen.py index 8fa77367..7572d36a 100644 --- a/benchmark/scripts/benchmark_matrix_gen.py +++ b/benchmark/scripts/benchmark_matrix_gen.py @@ -36,11 +36,11 @@ import numpy as np import pandas as pd - # --------------------------------------------------------------------------- # Model builders # --------------------------------------------------------------------------- + def build_scigrid_network(n_snapshots: int): """Return a PyPSA Network (SciGrid-DE) with extended snapshots, without building the model.""" import pypsa @@ -58,9 +58,15 @@ def build_scigrid_network(n_snapshots: int): if df is not None and not df.empty: tiles = int(np.ceil(n_snapshots / orig_len)) + 1 tiled = np.tile(df.values, (tiles, 1))[:n_snapshots] - setattr(component_t, attr, pd.DataFrame( - tiled, index=new_snapshots, columns=df.columns, - )) + setattr( + component_t, + attr, + pd.DataFrame( + tiled, + index=new_snapshots, + columns=df.columns, + ), + ) return n @@ -89,6 +95,7 @@ def build_synthetic(n: int): # Benchmark phases # --------------------------------------------------------------------------- + def time_phase(func, label: str, repeats: int = 3) -> dict: """Time a callable, return best-of-N result.""" times = [] @@ -101,8 +108,12 @@ def time_phase(func, label: str, repeats: int = 3) -> dict: gc.enable() times.append(elapsed) del result - return {"phase": label, "best_s": min(times), "median_s": sorted(times)[len(times) // 2], - "times": times} + return { + "phase": label, + "best_s": min(times), + "median_s": sorted(times)[len(times) // 2], + "times": times, + } def benchmark_model(model, repeats: int = 3) -> list[dict]: @@ -162,6 +173,7 @@ def do_full(): # Solution-unpacking benchmark (#619) # --------------------------------------------------------------------------- + def benchmark_solution_unpack(n_snapshots: int, repeats: int = 3) -> list[dict]: """ Benchmark the solution-assignment loop in Model.solve (PR #619). @@ -198,7 +210,9 @@ def benchmark_solution_unpack(n_snapshots: int, repeats: int = 3) -> list[dict]: sol_series = pd.concat(parts).drop_duplicates() sol_series.loc[-1] = nan - n_vars = sum(np.ravel(model.variables[name].labels).size for name in model.variables) + n_vars = sum( + np.ravel(model.variables[name].labels).size for name in model.variables + ) results = [] # ----- Old path (pandas label-based, pre-#619) ----- @@ -243,26 +257,31 @@ def unpack_numpy(): SYNTHETIC_SIZES = [20, 50, 100, 200] -def run_benchmarks(model_type: str, quick: bool, repeats: int, include_solve: bool = False) -> list[dict]: +def run_benchmarks( + model_type: str, quick: bool, repeats: int, include_solve: bool = False +) -> list[dict]: """Run benchmarks across problem sizes, return flat list of results.""" all_results = [] if model_type in ("scigrid", "all"): sizes = QUICK_SNAPSHOTS if quick else FULL_SNAPSHOTS for n_snap in sizes: - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f"SciGrid-DE {n_snap} snapshots") - print(f"{'='*60}") + print(f"{'=' * 60}") model = build_scigrid(n_snap) n_vars = len(model.variables.flat) n_cons = len(model.constraints.flat) print(f" {n_vars:,} variables, {n_cons:,} constraints") for r in benchmark_model(model, repeats): - r.update(model_type="scigrid", size=n_snap, - n_vars=n_vars, n_cons=n_cons) + r.update( + model_type="scigrid", size=n_snap, n_vars=n_vars, n_cons=n_cons + ) all_results.append(r) - print(f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)") + print( + f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)" + ) del model gc.collect() @@ -271,29 +290,32 @@ def run_benchmarks(model_type: str, quick: bool, repeats: int, include_solve: bo # Solution-unpacking benchmark for PR #619 (SciGrid-DE only, small sizes) solve_sizes = QUICK_SNAPSHOTS if quick else [24, 100] for n_snap in solve_sizes: - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print(f"SciGrid-DE solve + unpack {n_snap} snapshots (#619)") - print(f"{'='*60}") + print(f"{'=' * 60}") for r in benchmark_solution_unpack(n_snap, repeats): all_results.append(r) - print(f" {r['phase']:30s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)") + print( + f" {r['phase']:30s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)" + ) gc.collect() if model_type in ("synthetic", "all"): sizes = [20, 50] if quick else SYNTHETIC_SIZES for n in sizes: - print(f"\n{'='*60}") - print(f"Synthetic N={n} ({2*n*n} vars, {2*n*n} cons)") - print(f"{'='*60}") + print(f"\n{'=' * 60}") + print(f"Synthetic N={n} ({2 * n * n} vars, {2 * n * n} cons)") + print(f"{'=' * 60}") model = build_synthetic(n) n_vars = 2 * n * n n_cons = 2 * n * n for r in benchmark_model(model, repeats): - r.update(model_type="synthetic", size=n, - n_vars=n_vars, n_cons=n_cons) + r.update(model_type="synthetic", size=n, n_vars=n_vars, n_cons=n_cons) all_results.append(r) - print(f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)") + print( + f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)" + ) del model gc.collect() @@ -305,7 +327,9 @@ def format_comparison(before: list[dict], after: list[dict]) -> str: """Format a before/after comparison table.""" df_b = pd.DataFrame(before).set_index(["model_type", "size", "phase"]) df_a = pd.DataFrame(after).set_index(["model_type", "size", "phase"]) - merged = df_b[["best_s"]].join(df_a[["best_s"]], lsuffix="_before", rsuffix="_after") + merged = df_b[["best_s"]].join( + df_a[["best_s"]], lsuffix="_before", rsuffix="_after" + ) merged["speedup"] = merged["best_s_before"] / merged["best_s_after"] lines = [ f"{'Model':>10s} {'Size':>6s} {'Phase':>20s} {'Before':>8s} {'After':>8s} {'Speedup':>8s}", @@ -324,24 +348,34 @@ def format_comparison(before: list[dict], after: list[dict]) -> str: # CLI # --------------------------------------------------------------------------- + def main(): parser = argparse.ArgumentParser( description="Benchmark linopy matrix generation (PRs #616–#619)" ) parser.add_argument( - "--model", choices=["scigrid", "synthetic", "all"], default="all", + "--model", + choices=["scigrid", "synthetic", "all"], + default="all", help="Model type to benchmark (default: all)", ) - parser.add_argument("--quick", action="store_true", help="Quick mode (smallest sizes only)") - parser.add_argument("--repeats", type=int, default=3, help="Timing repeats per phase (default: 3)") + parser.add_argument( + "--quick", action="store_true", help="Quick mode (smallest sizes only)" + ) + parser.add_argument( + "--repeats", type=int, default=3, help="Timing repeats per phase (default: 3)" + ) parser.add_argument("-o", "--output", type=str, help="Save results to JSON file") parser.add_argument("--label", type=str, default="", help="Label for this run") parser.add_argument( - "--compare", nargs=2, metavar=("BEFORE", "AFTER"), + "--compare", + nargs=2, + metavar=("BEFORE", "AFTER"), help="Compare two JSON result files instead of running benchmarks", ) parser.add_argument( - "--include-solve", action="store_true", + "--include-solve", + action="store_true", help="Also benchmark solution unpacking (PR #619); requires HiGHS solver", ) args = parser.parse_args() @@ -352,9 +386,11 @@ def main(): print(format_comparison(before, after)) return - print(f"linopy matrix generation benchmark") - print(f"Python {sys.version.split()[0]}, numpy {np.__version__}, " - f"{platform.machine()}, {platform.system()}") + print("linopy matrix generation benchmark") + print( + f"Python {sys.version.split()[0]}, numpy {np.__version__}, " + f"{platform.machine()}, {platform.system()}" + ) results = run_benchmarks(args.model, args.quick, args.repeats, args.include_solve) @@ -370,9 +406,9 @@ def main(): print(f"\nResults saved to {args.output}") # Summary table - print(f"\n{'='*60}") + print(f"\n{'=' * 60}") print("Summary (best times)") - print(f"{'='*60}") + print(f"{'=' * 60}") df = pd.DataFrame(results) for (mtype, size), group in df.groupby(["model_type", "size"]): n_vars = group.iloc[0]["n_vars"] From 7529977220fa2633eef1c1013b1da1a9ba1bc14f Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 18 Mar 2026 09:05:32 +0100 Subject: [PATCH 4/4] Delete benchmark/scripts/benchmark_matrix_gen.py --- benchmark/scripts/benchmark_matrix_gen.py | 422 ---------------------- 1 file changed, 422 deletions(-) delete mode 100644 benchmark/scripts/benchmark_matrix_gen.py diff --git a/benchmark/scripts/benchmark_matrix_gen.py b/benchmark/scripts/benchmark_matrix_gen.py deleted file mode 100644 index 7572d36a..00000000 --- a/benchmark/scripts/benchmark_matrix_gen.py +++ /dev/null @@ -1,422 +0,0 @@ -#!/usr/bin/env python3 -""" -Benchmark script for linopy matrix generation and solution-unpacking performance. - -Covers the code paths optimised by PRs #616–#619: - - #616 cached_property on MatrixAccessor (flat_vars / flat_cons) - - #617 np.char.add for label string concatenation - - #618 sparse matrix slicing in MatrixAccessor.A - - #619 numpy solution unpacking in Model.solve - -Usage ------ - # Quick run (24 snapshots only): - python benchmark/scripts/benchmark_matrix_gen.py --quick - - # Full matrix-generation sweep with JSON output: - python benchmark/scripts/benchmark_matrix_gen.py -o results.json --label "after-PR-616" - - # Include solution-unpacking benchmark (requires HiGHS solver, #619): - python benchmark/scripts/benchmark_matrix_gen.py --include-solve -o results.json - - # Compare two runs: - python benchmark/scripts/benchmark_matrix_gen.py --compare before.json after.json -""" - -from __future__ import annotations - -import argparse -import gc -import json -import platform -import sys -import time -from pathlib import Path - -import numpy as np -import pandas as pd - -# --------------------------------------------------------------------------- -# Model builders -# --------------------------------------------------------------------------- - - -def build_scigrid_network(n_snapshots: int): - """Return a PyPSA Network (SciGrid-DE) with extended snapshots, without building the model.""" - import pypsa - - n = pypsa.examples.scigrid_de() - orig_snapshots = n.snapshots - orig_len = len(orig_snapshots) - - new_snapshots = pd.date_range(orig_snapshots[0], periods=n_snapshots, freq="h") - n.set_snapshots(new_snapshots) - - for component_t in (n.generators_t, n.loads_t, n.storage_units_t): - for attr in list(component_t): - df = getattr(component_t, attr) - if df is not None and not df.empty: - tiles = int(np.ceil(n_snapshots / orig_len)) + 1 - tiled = np.tile(df.values, (tiles, 1))[:n_snapshots] - setattr( - component_t, - attr, - pd.DataFrame( - tiled, - index=new_snapshots, - columns=df.columns, - ), - ) - return n - - -def build_scigrid(n_snapshots: int): - """Return a linopy Model from PyPSA SciGrid-DE with extended snapshots.""" - n = build_scigrid_network(n_snapshots) - n.optimize.create_model(include_objective_constant=False) - return n.model - - -def build_synthetic(n: int): - """Return a linopy Model with 2×N² variables and 2×N² constraints.""" - from linopy import Model - - m = Model() - N = np.arange(n) - x = m.add_variables(coords=[N, N], name="x") - y = m.add_variables(coords=[N, N], name="y") - m.add_constraints(x - y >= N, name="lower") - m.add_constraints(x + y >= 0, name="upper") - m.add_objective(2 * x.sum() + y.sum()) - return m - - -# --------------------------------------------------------------------------- -# Benchmark phases -# --------------------------------------------------------------------------- - - -def time_phase(func, label: str, repeats: int = 3) -> dict: - """Time a callable, return best-of-N result.""" - times = [] - for _ in range(repeats): - gc.collect() - gc.disable() - t0 = time.perf_counter() - result = func() - elapsed = time.perf_counter() - t0 - gc.enable() - times.append(elapsed) - del result - return { - "phase": label, - "best_s": min(times), - "median_s": sorted(times)[len(times) // 2], - "times": times, - } - - -def benchmark_model(model, repeats: int = 3) -> list[dict]: - """Benchmark all matrix generation phases on a linopy Model.""" - results = [] - matrices = model.matrices - - # Phase 1: flat_vars (exercises #616 caching + #617 label vectorisation) - def do_flat_vars(): - matrices.clean_cached_properties() - return matrices.flat_vars - - results.append(time_phase(do_flat_vars, "flat_vars", repeats)) - - # Phase 2: flat_cons (exercises #616 caching + #617 label vectorisation) - def do_flat_cons(): - matrices.clean_cached_properties() - return matrices.flat_cons - - results.append(time_phase(do_flat_cons, "flat_cons", repeats)) - - # Phase 3: vlabels + clabels (vector creation from flat data) - # Ensure flat data is cached first - matrices.clean_cached_properties() - _ = matrices.flat_vars - _ = matrices.flat_cons - - def do_labels(): - return (matrices.vlabels, matrices.clabels) - - results.append(time_phase(do_labels, "vlabels+clabels", repeats)) - - # Phase 4: A matrix (exercises #618 sparse slicing) - def do_A(): - return matrices.A - - results.append(time_phase(do_A, "A_matrix", repeats)) - - # Phase 5: full get_matrix_data pipeline (end-to-end) - def do_full(): - matrices.clean_cached_properties() - _ = matrices.vlabels - _ = matrices.clabels - _ = matrices.lb - _ = matrices.ub - _ = matrices.A - _ = matrices.b - _ = matrices.sense - return True - - results.append(time_phase(do_full, "full_pipeline", repeats)) - - return results - - -# --------------------------------------------------------------------------- -# Solution-unpacking benchmark (#619) -# --------------------------------------------------------------------------- - - -def benchmark_solution_unpack(n_snapshots: int, repeats: int = 3) -> list[dict]: - """ - Benchmark the solution-assignment loop in Model.solve (PR #619). - - Strategy: solve once with HiGHS to get a real solution vector, then - re-run only the assignment loop (sol[idx] → var.solution) repeatedly - without re-solving, isolating the unpacking cost from solver time. - """ - import xarray as xr - - n = build_scigrid_network(n_snapshots) - n.optimize.create_model(include_objective_constant=False) - model = n.model - - # Solve once to populate the raw solution - status, _ = model.solve(solver_name="highs", io_api="direct") - if status != "ok": - print(f" WARNING: solve failed ({status}), skipping solution-unpack benchmark") - return [] - - # Reconstruct the raw solution Series (as returned by the solver): - # a float-indexed Series mapping variable label → solution value. - nan = float("nan") - parts = [] - for name, var in model.variables.items(): - if var.solution is None: - continue - labels = np.ravel(var.labels) - values = np.ravel(var.solution.values) - parts.append(pd.Series(values, index=labels.astype(float))) - if not parts: - print(" WARNING: no solution found on variables, was solve successful?") - return [] - sol_series = pd.concat(parts).drop_duplicates() - sol_series.loc[-1] = nan - - n_vars = sum( - np.ravel(model.variables[name].labels).size for name in model.variables - ) - results = [] - - # ----- Old path (pandas label-based, pre-#619) ----- - def unpack_pandas(): - for name, var in model.variables.items(): - idx = np.ravel(var.labels).astype(float) - try: - vals = sol_series[idx].values.reshape(var.labels.shape) - except KeyError: - vals = sol_series.reindex(idx).values.reshape(var.labels.shape) - var.solution = xr.DataArray(vals, var.coords) - - results.append(time_phase(unpack_pandas, "unpack_pandas (before)", repeats)) - - # ----- New path (numpy dense array, #619) ----- - def unpack_numpy(): - sol_max_idx = int(max(sol_series.index.max(), 0)) - sol_arr = np.full(sol_max_idx + 1, nan) - mask = sol_series.index >= 0 - valid = sol_series.index[mask].astype(int) - sol_arr[valid] = sol_series.values[mask] - for name, var in model.variables.items(): - idx = np.ravel(var.labels) - safe_idx = np.clip(idx, 0, sol_max_idx) - vals = sol_arr[safe_idx] - vals[idx < 0] = nan - var.solution = xr.DataArray(vals.reshape(var.labels.shape), var.coords) - - results.append(time_phase(unpack_numpy, "unpack_numpy (after)", repeats)) - - for r in results: - r.update(model_type="scigrid_solve", size=n_snapshots, n_vars=n_vars, n_cons=0) - return results - - -# --------------------------------------------------------------------------- -# Runners -# --------------------------------------------------------------------------- - -QUICK_SNAPSHOTS = [24] -FULL_SNAPSHOTS = [24, 100, 200, 500] -SYNTHETIC_SIZES = [20, 50, 100, 200] - - -def run_benchmarks( - model_type: str, quick: bool, repeats: int, include_solve: bool = False -) -> list[dict]: - """Run benchmarks across problem sizes, return flat list of results.""" - all_results = [] - - if model_type in ("scigrid", "all"): - sizes = QUICK_SNAPSHOTS if quick else FULL_SNAPSHOTS - for n_snap in sizes: - print(f"\n{'=' * 60}") - print(f"SciGrid-DE {n_snap} snapshots") - print(f"{'=' * 60}") - model = build_scigrid(n_snap) - n_vars = len(model.variables.flat) - n_cons = len(model.constraints.flat) - print(f" {n_vars:,} variables, {n_cons:,} constraints") - - for r in benchmark_model(model, repeats): - r.update( - model_type="scigrid", size=n_snap, n_vars=n_vars, n_cons=n_cons - ) - all_results.append(r) - print( - f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)" - ) - - del model - gc.collect() - - if include_solve: - # Solution-unpacking benchmark for PR #619 (SciGrid-DE only, small sizes) - solve_sizes = QUICK_SNAPSHOTS if quick else [24, 100] - for n_snap in solve_sizes: - print(f"\n{'=' * 60}") - print(f"SciGrid-DE solve + unpack {n_snap} snapshots (#619)") - print(f"{'=' * 60}") - for r in benchmark_solution_unpack(n_snap, repeats): - all_results.append(r) - print( - f" {r['phase']:30s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)" - ) - gc.collect() - - if model_type in ("synthetic", "all"): - sizes = [20, 50] if quick else SYNTHETIC_SIZES - for n in sizes: - print(f"\n{'=' * 60}") - print(f"Synthetic N={n} ({2 * n * n} vars, {2 * n * n} cons)") - print(f"{'=' * 60}") - model = build_synthetic(n) - n_vars = 2 * n * n - n_cons = 2 * n * n - - for r in benchmark_model(model, repeats): - r.update(model_type="synthetic", size=n, n_vars=n_vars, n_cons=n_cons) - all_results.append(r) - print( - f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)" - ) - - del model - gc.collect() - - return all_results - - -def format_comparison(before: list[dict], after: list[dict]) -> str: - """Format a before/after comparison table.""" - df_b = pd.DataFrame(before).set_index(["model_type", "size", "phase"]) - df_a = pd.DataFrame(after).set_index(["model_type", "size", "phase"]) - merged = df_b[["best_s"]].join( - df_a[["best_s"]], lsuffix="_before", rsuffix="_after" - ) - merged["speedup"] = merged["best_s_before"] / merged["best_s_after"] - lines = [ - f"{'Model':>10s} {'Size':>6s} {'Phase':>20s} {'Before':>8s} {'After':>8s} {'Speedup':>8s}", - "-" * 70, - ] - for (mtype, size, phase), row in merged.iterrows(): - lines.append( - f"{mtype:>10s} {size:>6} {phase:>20s} " - f"{row['best_s_before']:>7.4f}s {row['best_s_after']:>7.4f}s " - f"{row['speedup']:>7.2f}x" - ) - return "\n".join(lines) - - -# --------------------------------------------------------------------------- -# CLI -# --------------------------------------------------------------------------- - - -def main(): - parser = argparse.ArgumentParser( - description="Benchmark linopy matrix generation (PRs #616–#619)" - ) - parser.add_argument( - "--model", - choices=["scigrid", "synthetic", "all"], - default="all", - help="Model type to benchmark (default: all)", - ) - parser.add_argument( - "--quick", action="store_true", help="Quick mode (smallest sizes only)" - ) - parser.add_argument( - "--repeats", type=int, default=3, help="Timing repeats per phase (default: 3)" - ) - parser.add_argument("-o", "--output", type=str, help="Save results to JSON file") - parser.add_argument("--label", type=str, default="", help="Label for this run") - parser.add_argument( - "--compare", - nargs=2, - metavar=("BEFORE", "AFTER"), - help="Compare two JSON result files instead of running benchmarks", - ) - parser.add_argument( - "--include-solve", - action="store_true", - help="Also benchmark solution unpacking (PR #619); requires HiGHS solver", - ) - args = parser.parse_args() - - if args.compare: - before = json.loads(Path(args.compare[0]).read_text())["results"] - after = json.loads(Path(args.compare[1]).read_text())["results"] - print(format_comparison(before, after)) - return - - print("linopy matrix generation benchmark") - print( - f"Python {sys.version.split()[0]}, numpy {np.__version__}, " - f"{platform.machine()}, {platform.system()}" - ) - - results = run_benchmarks(args.model, args.quick, args.repeats, args.include_solve) - - if args.output: - out = { - "label": args.label, - "python": sys.version.split()[0], - "numpy": np.__version__, - "platform": f"{platform.system()} {platform.machine()}", - "results": results, - } - Path(args.output).write_text(json.dumps(out, indent=2, default=str)) - print(f"\nResults saved to {args.output}") - - # Summary table - print(f"\n{'=' * 60}") - print("Summary (best times)") - print(f"{'=' * 60}") - df = pd.DataFrame(results) - for (mtype, size), group in df.groupby(["model_type", "size"]): - n_vars = group.iloc[0]["n_vars"] - n_cons = group.iloc[0]["n_cons"] - print(f"\n {mtype} size={size} ({n_vars:,} vars, {n_cons:,} cons)") - for _, row in group.iterrows(): - print(f" {row['phase']:20s} {row['best_s']:.4f}s") - - -if __name__ == "__main__": - main()