|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Benchmark script for linopy matrix generation and solution-unpacking performance. |
| 4 | +
|
| 5 | +Covers the code paths optimised by PRs #616–#619: |
| 6 | + - #616 cached_property on MatrixAccessor (flat_vars / flat_cons) |
| 7 | + - #617 np.char.add for label string concatenation |
| 8 | + - #618 sparse matrix slicing in MatrixAccessor.A |
| 9 | + - #619 numpy solution unpacking in Model.solve |
| 10 | +
|
| 11 | +Usage |
| 12 | +----- |
| 13 | + # Quick run (24 snapshots only): |
| 14 | + python benchmark/scripts/benchmark_matrix_gen.py --quick |
| 15 | +
|
| 16 | + # Full matrix-generation sweep with JSON output: |
| 17 | + python benchmark/scripts/benchmark_matrix_gen.py -o results.json --label "after-PR-616" |
| 18 | +
|
| 19 | + # Include solution-unpacking benchmark (requires HiGHS solver, #619): |
| 20 | + python benchmark/scripts/benchmark_matrix_gen.py --include-solve -o results.json |
| 21 | +
|
| 22 | + # Compare two runs: |
| 23 | + python benchmark/scripts/benchmark_matrix_gen.py --compare before.json after.json |
| 24 | +""" |
| 25 | + |
| 26 | +from __future__ import annotations |
| 27 | + |
| 28 | +import argparse |
| 29 | +import gc |
| 30 | +import json |
| 31 | +import platform |
| 32 | +import sys |
| 33 | +import time |
| 34 | +from pathlib import Path |
| 35 | + |
| 36 | +import numpy as np |
| 37 | +import pandas as pd |
| 38 | + |
| 39 | + |
| 40 | +# --------------------------------------------------------------------------- |
| 41 | +# Model builders |
| 42 | +# --------------------------------------------------------------------------- |
| 43 | + |
| 44 | +def build_scigrid_network(n_snapshots: int): |
| 45 | + """Return a PyPSA Network (SciGrid-DE) with extended snapshots, without building the model.""" |
| 46 | + import pypsa |
| 47 | + |
| 48 | + n = pypsa.examples.scigrid_de() |
| 49 | + orig_snapshots = n.snapshots |
| 50 | + orig_len = len(orig_snapshots) |
| 51 | + |
| 52 | + new_snapshots = pd.date_range(orig_snapshots[0], periods=n_snapshots, freq="h") |
| 53 | + n.set_snapshots(new_snapshots) |
| 54 | + |
| 55 | + for component_t in (n.generators_t, n.loads_t, n.storage_units_t): |
| 56 | + for attr in list(component_t): |
| 57 | + df = getattr(component_t, attr) |
| 58 | + if df is not None and not df.empty: |
| 59 | + tiles = int(np.ceil(n_snapshots / orig_len)) + 1 |
| 60 | + tiled = np.tile(df.values, (tiles, 1))[:n_snapshots] |
| 61 | + setattr(component_t, attr, pd.DataFrame( |
| 62 | + tiled, index=new_snapshots, columns=df.columns, |
| 63 | + )) |
| 64 | + return n |
| 65 | + |
| 66 | + |
| 67 | +def build_scigrid(n_snapshots: int): |
| 68 | + """Return a linopy Model from PyPSA SciGrid-DE with extended snapshots.""" |
| 69 | + n = build_scigrid_network(n_snapshots) |
| 70 | + n.optimize.create_model(include_objective_constant=False) |
| 71 | + return n.model |
| 72 | + |
| 73 | + |
| 74 | +def build_synthetic(n: int): |
| 75 | + """Return a linopy Model with 2×N² variables and 2×N² constraints.""" |
| 76 | + from linopy import Model |
| 77 | + |
| 78 | + m = Model() |
| 79 | + N = np.arange(n) |
| 80 | + x = m.add_variables(coords=[N, N], name="x") |
| 81 | + y = m.add_variables(coords=[N, N], name="y") |
| 82 | + m.add_constraints(x - y >= N, name="lower") |
| 83 | + m.add_constraints(x + y >= 0, name="upper") |
| 84 | + m.add_objective(2 * x.sum() + y.sum()) |
| 85 | + return m |
| 86 | + |
| 87 | + |
| 88 | +# --------------------------------------------------------------------------- |
| 89 | +# Benchmark phases |
| 90 | +# --------------------------------------------------------------------------- |
| 91 | + |
| 92 | +def time_phase(func, label: str, repeats: int = 3) -> dict: |
| 93 | + """Time a callable, return best-of-N result.""" |
| 94 | + times = [] |
| 95 | + for _ in range(repeats): |
| 96 | + gc.collect() |
| 97 | + gc.disable() |
| 98 | + t0 = time.perf_counter() |
| 99 | + result = func() |
| 100 | + elapsed = time.perf_counter() - t0 |
| 101 | + gc.enable() |
| 102 | + times.append(elapsed) |
| 103 | + del result |
| 104 | + return {"phase": label, "best_s": min(times), "median_s": sorted(times)[len(times) // 2], |
| 105 | + "times": times} |
| 106 | + |
| 107 | + |
| 108 | +def benchmark_model(model, repeats: int = 3) -> list[dict]: |
| 109 | + """Benchmark all matrix generation phases on a linopy Model.""" |
| 110 | + results = [] |
| 111 | + matrices = model.matrices |
| 112 | + |
| 113 | + # Phase 1: flat_vars (exercises #616 caching + #617 label vectorisation) |
| 114 | + def do_flat_vars(): |
| 115 | + matrices.clean_cached_properties() |
| 116 | + return matrices.flat_vars |
| 117 | + |
| 118 | + results.append(time_phase(do_flat_vars, "flat_vars", repeats)) |
| 119 | + |
| 120 | + # Phase 2: flat_cons (exercises #616 caching + #617 label vectorisation) |
| 121 | + def do_flat_cons(): |
| 122 | + matrices.clean_cached_properties() |
| 123 | + return matrices.flat_cons |
| 124 | + |
| 125 | + results.append(time_phase(do_flat_cons, "flat_cons", repeats)) |
| 126 | + |
| 127 | + # Phase 3: vlabels + clabels (vector creation from flat data) |
| 128 | + # Ensure flat data is cached first |
| 129 | + matrices.clean_cached_properties() |
| 130 | + _ = matrices.flat_vars |
| 131 | + _ = matrices.flat_cons |
| 132 | + |
| 133 | + def do_labels(): |
| 134 | + return (matrices.vlabels, matrices.clabels) |
| 135 | + |
| 136 | + results.append(time_phase(do_labels, "vlabels+clabels", repeats)) |
| 137 | + |
| 138 | + # Phase 4: A matrix (exercises #618 sparse slicing) |
| 139 | + def do_A(): |
| 140 | + return matrices.A |
| 141 | + |
| 142 | + results.append(time_phase(do_A, "A_matrix", repeats)) |
| 143 | + |
| 144 | + # Phase 5: full get_matrix_data pipeline (end-to-end) |
| 145 | + def do_full(): |
| 146 | + matrices.clean_cached_properties() |
| 147 | + _ = matrices.vlabels |
| 148 | + _ = matrices.clabels |
| 149 | + _ = matrices.lb |
| 150 | + _ = matrices.ub |
| 151 | + _ = matrices.A |
| 152 | + _ = matrices.b |
| 153 | + _ = matrices.sense |
| 154 | + return True |
| 155 | + |
| 156 | + results.append(time_phase(do_full, "full_pipeline", repeats)) |
| 157 | + |
| 158 | + return results |
| 159 | + |
| 160 | + |
| 161 | +# --------------------------------------------------------------------------- |
| 162 | +# Solution-unpacking benchmark (#619) |
| 163 | +# --------------------------------------------------------------------------- |
| 164 | + |
| 165 | +def benchmark_solution_unpack(n_snapshots: int, repeats: int = 3) -> list[dict]: |
| 166 | + """ |
| 167 | + Benchmark the solution-assignment loop in Model.solve (PR #619). |
| 168 | +
|
| 169 | + Strategy: solve once with HiGHS to get a real solution vector, then |
| 170 | + re-run only the assignment loop (sol[idx] → var.solution) repeatedly |
| 171 | + without re-solving, isolating the unpacking cost from solver time. |
| 172 | + """ |
| 173 | + import xarray as xr |
| 174 | + |
| 175 | + n = build_scigrid_network(n_snapshots) |
| 176 | + n.optimize.create_model(include_objective_constant=False) |
| 177 | + model = n.model |
| 178 | + |
| 179 | + # Solve once to populate the raw solution |
| 180 | + status, _ = model.solve(solver_name="highs", io_api="direct") |
| 181 | + if status != "ok": |
| 182 | + print(f" WARNING: solve failed ({status}), skipping solution-unpack benchmark") |
| 183 | + return [] |
| 184 | + |
| 185 | + # Reconstruct the raw solution Series (as returned by the solver): |
| 186 | + # a float-indexed Series mapping variable label → solution value. |
| 187 | + nan = float("nan") |
| 188 | + parts = [] |
| 189 | + for name, var in model.variables.items(): |
| 190 | + if var.solution is None: |
| 191 | + continue |
| 192 | + labels = np.ravel(var.labels) |
| 193 | + values = np.ravel(var.solution.values) |
| 194 | + parts.append(pd.Series(values, index=labels.astype(float))) |
| 195 | + if not parts: |
| 196 | + print(" WARNING: no solution found on variables, was solve successful?") |
| 197 | + return [] |
| 198 | + sol_series = pd.concat(parts).drop_duplicates() |
| 199 | + sol_series.loc[-1] = nan |
| 200 | + |
| 201 | + n_vars = sum(np.ravel(model.variables[name].labels).size for name in model.variables) |
| 202 | + results = [] |
| 203 | + |
| 204 | + # ----- Old path (pandas label-based, pre-#619) ----- |
| 205 | + def unpack_pandas(): |
| 206 | + for name, var in model.variables.items(): |
| 207 | + idx = np.ravel(var.labels).astype(float) |
| 208 | + try: |
| 209 | + vals = sol_series[idx].values.reshape(var.labels.shape) |
| 210 | + except KeyError: |
| 211 | + vals = sol_series.reindex(idx).values.reshape(var.labels.shape) |
| 212 | + var.solution = xr.DataArray(vals, var.coords) |
| 213 | + |
| 214 | + results.append(time_phase(unpack_pandas, "unpack_pandas (before)", repeats)) |
| 215 | + |
| 216 | + # ----- New path (numpy dense array, #619) ----- |
| 217 | + def unpack_numpy(): |
| 218 | + sol_max_idx = int(max(sol_series.index.max(), 0)) |
| 219 | + sol_arr = np.full(sol_max_idx + 1, nan) |
| 220 | + mask = sol_series.index >= 0 |
| 221 | + valid = sol_series.index[mask].astype(int) |
| 222 | + sol_arr[valid] = sol_series.values[mask] |
| 223 | + for name, var in model.variables.items(): |
| 224 | + idx = np.ravel(var.labels) |
| 225 | + safe_idx = np.clip(idx, 0, sol_max_idx) |
| 226 | + vals = sol_arr[safe_idx] |
| 227 | + vals[idx < 0] = nan |
| 228 | + var.solution = xr.DataArray(vals.reshape(var.labels.shape), var.coords) |
| 229 | + |
| 230 | + results.append(time_phase(unpack_numpy, "unpack_numpy (after)", repeats)) |
| 231 | + |
| 232 | + for r in results: |
| 233 | + r.update(model_type="scigrid_solve", size=n_snapshots, n_vars=n_vars, n_cons=0) |
| 234 | + return results |
| 235 | + |
| 236 | + |
| 237 | +# --------------------------------------------------------------------------- |
| 238 | +# Runners |
| 239 | +# --------------------------------------------------------------------------- |
| 240 | + |
| 241 | +QUICK_SNAPSHOTS = [24] |
| 242 | +FULL_SNAPSHOTS = [24, 100, 200, 500] |
| 243 | +SYNTHETIC_SIZES = [20, 50, 100, 200] |
| 244 | + |
| 245 | + |
| 246 | +def run_benchmarks(model_type: str, quick: bool, repeats: int, include_solve: bool = False) -> list[dict]: |
| 247 | + """Run benchmarks across problem sizes, return flat list of results.""" |
| 248 | + all_results = [] |
| 249 | + |
| 250 | + if model_type in ("scigrid", "all"): |
| 251 | + sizes = QUICK_SNAPSHOTS if quick else FULL_SNAPSHOTS |
| 252 | + for n_snap in sizes: |
| 253 | + print(f"\n{'='*60}") |
| 254 | + print(f"SciGrid-DE {n_snap} snapshots") |
| 255 | + print(f"{'='*60}") |
| 256 | + model = build_scigrid(n_snap) |
| 257 | + n_vars = len(model.variables.flat) |
| 258 | + n_cons = len(model.constraints.flat) |
| 259 | + print(f" {n_vars:,} variables, {n_cons:,} constraints") |
| 260 | + |
| 261 | + for r in benchmark_model(model, repeats): |
| 262 | + r.update(model_type="scigrid", size=n_snap, |
| 263 | + n_vars=n_vars, n_cons=n_cons) |
| 264 | + all_results.append(r) |
| 265 | + print(f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)") |
| 266 | + |
| 267 | + del model |
| 268 | + gc.collect() |
| 269 | + |
| 270 | + if include_solve: |
| 271 | + # Solution-unpacking benchmark for PR #619 (SciGrid-DE only, small sizes) |
| 272 | + solve_sizes = QUICK_SNAPSHOTS if quick else [24, 100] |
| 273 | + for n_snap in solve_sizes: |
| 274 | + print(f"\n{'='*60}") |
| 275 | + print(f"SciGrid-DE solve + unpack {n_snap} snapshots (#619)") |
| 276 | + print(f"{'='*60}") |
| 277 | + for r in benchmark_solution_unpack(n_snap, repeats): |
| 278 | + all_results.append(r) |
| 279 | + print(f" {r['phase']:30s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)") |
| 280 | + gc.collect() |
| 281 | + |
| 282 | + if model_type in ("synthetic", "all"): |
| 283 | + sizes = [20, 50] if quick else SYNTHETIC_SIZES |
| 284 | + for n in sizes: |
| 285 | + print(f"\n{'='*60}") |
| 286 | + print(f"Synthetic N={n} ({2*n*n} vars, {2*n*n} cons)") |
| 287 | + print(f"{'='*60}") |
| 288 | + model = build_synthetic(n) |
| 289 | + n_vars = 2 * n * n |
| 290 | + n_cons = 2 * n * n |
| 291 | + |
| 292 | + for r in benchmark_model(model, repeats): |
| 293 | + r.update(model_type="synthetic", size=n, |
| 294 | + n_vars=n_vars, n_cons=n_cons) |
| 295 | + all_results.append(r) |
| 296 | + print(f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)") |
| 297 | + |
| 298 | + del model |
| 299 | + gc.collect() |
| 300 | + |
| 301 | + return all_results |
| 302 | + |
| 303 | + |
| 304 | +def format_comparison(before: list[dict], after: list[dict]) -> str: |
| 305 | + """Format a before/after comparison table.""" |
| 306 | + df_b = pd.DataFrame(before).set_index(["model_type", "size", "phase"]) |
| 307 | + df_a = pd.DataFrame(after).set_index(["model_type", "size", "phase"]) |
| 308 | + merged = df_b[["best_s"]].join(df_a[["best_s"]], lsuffix="_before", rsuffix="_after") |
| 309 | + merged["speedup"] = merged["best_s_before"] / merged["best_s_after"] |
| 310 | + lines = [ |
| 311 | + f"{'Model':>10s} {'Size':>6s} {'Phase':>20s} {'Before':>8s} {'After':>8s} {'Speedup':>8s}", |
| 312 | + "-" * 70, |
| 313 | + ] |
| 314 | + for (mtype, size, phase), row in merged.iterrows(): |
| 315 | + lines.append( |
| 316 | + f"{mtype:>10s} {size:>6} {phase:>20s} " |
| 317 | + f"{row['best_s_before']:>7.4f}s {row['best_s_after']:>7.4f}s " |
| 318 | + f"{row['speedup']:>7.2f}x" |
| 319 | + ) |
| 320 | + return "\n".join(lines) |
| 321 | + |
| 322 | + |
| 323 | +# --------------------------------------------------------------------------- |
| 324 | +# CLI |
| 325 | +# --------------------------------------------------------------------------- |
| 326 | + |
| 327 | +def main(): |
| 328 | + parser = argparse.ArgumentParser( |
| 329 | + description="Benchmark linopy matrix generation (PRs #616–#619)" |
| 330 | + ) |
| 331 | + parser.add_argument( |
| 332 | + "--model", choices=["scigrid", "synthetic", "all"], default="all", |
| 333 | + help="Model type to benchmark (default: all)", |
| 334 | + ) |
| 335 | + parser.add_argument("--quick", action="store_true", help="Quick mode (smallest sizes only)") |
| 336 | + parser.add_argument("--repeats", type=int, default=3, help="Timing repeats per phase (default: 3)") |
| 337 | + parser.add_argument("-o", "--output", type=str, help="Save results to JSON file") |
| 338 | + parser.add_argument("--label", type=str, default="", help="Label for this run") |
| 339 | + parser.add_argument( |
| 340 | + "--compare", nargs=2, metavar=("BEFORE", "AFTER"), |
| 341 | + help="Compare two JSON result files instead of running benchmarks", |
| 342 | + ) |
| 343 | + parser.add_argument( |
| 344 | + "--include-solve", action="store_true", |
| 345 | + help="Also benchmark solution unpacking (PR #619); requires HiGHS solver", |
| 346 | + ) |
| 347 | + args = parser.parse_args() |
| 348 | + |
| 349 | + if args.compare: |
| 350 | + before = json.loads(Path(args.compare[0]).read_text())["results"] |
| 351 | + after = json.loads(Path(args.compare[1]).read_text())["results"] |
| 352 | + print(format_comparison(before, after)) |
| 353 | + return |
| 354 | + |
| 355 | + print(f"linopy matrix generation benchmark") |
| 356 | + print(f"Python {sys.version.split()[0]}, numpy {np.__version__}, " |
| 357 | + f"{platform.machine()}, {platform.system()}") |
| 358 | + |
| 359 | + results = run_benchmarks(args.model, args.quick, args.repeats, args.include_solve) |
| 360 | + |
| 361 | + if args.output: |
| 362 | + out = { |
| 363 | + "label": args.label, |
| 364 | + "python": sys.version.split()[0], |
| 365 | + "numpy": np.__version__, |
| 366 | + "platform": f"{platform.system()} {platform.machine()}", |
| 367 | + "results": results, |
| 368 | + } |
| 369 | + Path(args.output).write_text(json.dumps(out, indent=2, default=str)) |
| 370 | + print(f"\nResults saved to {args.output}") |
| 371 | + |
| 372 | + # Summary table |
| 373 | + print(f"\n{'='*60}") |
| 374 | + print("Summary (best times)") |
| 375 | + print(f"{'='*60}") |
| 376 | + df = pd.DataFrame(results) |
| 377 | + for (mtype, size), group in df.groupby(["model_type", "size"]): |
| 378 | + n_vars = group.iloc[0]["n_vars"] |
| 379 | + n_cons = group.iloc[0]["n_cons"] |
| 380 | + print(f"\n {mtype} size={size} ({n_vars:,} vars, {n_cons:,} cons)") |
| 381 | + for _, row in group.iterrows(): |
| 382 | + print(f" {row['phase']:20s} {row['best_s']:.4f}s") |
| 383 | + |
| 384 | + |
| 385 | +if __name__ == "__main__": |
| 386 | + main() |
0 commit comments