Skip to content

Commit e639bb6

Browse files
MaykThewessenclaude
andcommitted
Add reproducible benchmark script for PRs #616#619
Adds benchmark/scripts/benchmark_matrix_gen.py covering all four performance code paths: - #616 cached_property on MatrixAccessor (flat_vars / flat_cons) - #617 np.char.add label string concatenation - #618 single-step sparse matrix slicing - #619 numpy dense-array solution unpacking Reproduce with: python benchmark/scripts/benchmark_matrix_gen.py -o results.json python benchmark/scripts/benchmark_matrix_gen.py --include-solve # PR #619 python benchmark/scripts/benchmark_matrix_gen.py --compare before.json after.json Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent a6db422 commit e639bb6

1 file changed

Lines changed: 386 additions & 0 deletions

File tree

Lines changed: 386 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,386 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Benchmark script for linopy matrix generation and solution-unpacking performance.
4+
5+
Covers the code paths optimised by PRs #616–#619:
6+
- #616 cached_property on MatrixAccessor (flat_vars / flat_cons)
7+
- #617 np.char.add for label string concatenation
8+
- #618 sparse matrix slicing in MatrixAccessor.A
9+
- #619 numpy solution unpacking in Model.solve
10+
11+
Usage
12+
-----
13+
# Quick run (24 snapshots only):
14+
python benchmark/scripts/benchmark_matrix_gen.py --quick
15+
16+
# Full matrix-generation sweep with JSON output:
17+
python benchmark/scripts/benchmark_matrix_gen.py -o results.json --label "after-PR-616"
18+
19+
# Include solution-unpacking benchmark (requires HiGHS solver, #619):
20+
python benchmark/scripts/benchmark_matrix_gen.py --include-solve -o results.json
21+
22+
# Compare two runs:
23+
python benchmark/scripts/benchmark_matrix_gen.py --compare before.json after.json
24+
"""
25+
26+
from __future__ import annotations
27+
28+
import argparse
29+
import gc
30+
import json
31+
import platform
32+
import sys
33+
import time
34+
from pathlib import Path
35+
36+
import numpy as np
37+
import pandas as pd
38+
39+
40+
# ---------------------------------------------------------------------------
41+
# Model builders
42+
# ---------------------------------------------------------------------------
43+
44+
def build_scigrid_network(n_snapshots: int):
45+
"""Return a PyPSA Network (SciGrid-DE) with extended snapshots, without building the model."""
46+
import pypsa
47+
48+
n = pypsa.examples.scigrid_de()
49+
orig_snapshots = n.snapshots
50+
orig_len = len(orig_snapshots)
51+
52+
new_snapshots = pd.date_range(orig_snapshots[0], periods=n_snapshots, freq="h")
53+
n.set_snapshots(new_snapshots)
54+
55+
for component_t in (n.generators_t, n.loads_t, n.storage_units_t):
56+
for attr in list(component_t):
57+
df = getattr(component_t, attr)
58+
if df is not None and not df.empty:
59+
tiles = int(np.ceil(n_snapshots / orig_len)) + 1
60+
tiled = np.tile(df.values, (tiles, 1))[:n_snapshots]
61+
setattr(component_t, attr, pd.DataFrame(
62+
tiled, index=new_snapshots, columns=df.columns,
63+
))
64+
return n
65+
66+
67+
def build_scigrid(n_snapshots: int):
68+
"""Return a linopy Model from PyPSA SciGrid-DE with extended snapshots."""
69+
n = build_scigrid_network(n_snapshots)
70+
n.optimize.create_model(include_objective_constant=False)
71+
return n.model
72+
73+
74+
def build_synthetic(n: int):
75+
"""Return a linopy Model with 2×N² variables and 2×N² constraints."""
76+
from linopy import Model
77+
78+
m = Model()
79+
N = np.arange(n)
80+
x = m.add_variables(coords=[N, N], name="x")
81+
y = m.add_variables(coords=[N, N], name="y")
82+
m.add_constraints(x - y >= N, name="lower")
83+
m.add_constraints(x + y >= 0, name="upper")
84+
m.add_objective(2 * x.sum() + y.sum())
85+
return m
86+
87+
88+
# ---------------------------------------------------------------------------
89+
# Benchmark phases
90+
# ---------------------------------------------------------------------------
91+
92+
def time_phase(func, label: str, repeats: int = 3) -> dict:
93+
"""Time a callable, return best-of-N result."""
94+
times = []
95+
for _ in range(repeats):
96+
gc.collect()
97+
gc.disable()
98+
t0 = time.perf_counter()
99+
result = func()
100+
elapsed = time.perf_counter() - t0
101+
gc.enable()
102+
times.append(elapsed)
103+
del result
104+
return {"phase": label, "best_s": min(times), "median_s": sorted(times)[len(times) // 2],
105+
"times": times}
106+
107+
108+
def benchmark_model(model, repeats: int = 3) -> list[dict]:
109+
"""Benchmark all matrix generation phases on a linopy Model."""
110+
results = []
111+
matrices = model.matrices
112+
113+
# Phase 1: flat_vars (exercises #616 caching + #617 label vectorisation)
114+
def do_flat_vars():
115+
matrices.clean_cached_properties()
116+
return matrices.flat_vars
117+
118+
results.append(time_phase(do_flat_vars, "flat_vars", repeats))
119+
120+
# Phase 2: flat_cons (exercises #616 caching + #617 label vectorisation)
121+
def do_flat_cons():
122+
matrices.clean_cached_properties()
123+
return matrices.flat_cons
124+
125+
results.append(time_phase(do_flat_cons, "flat_cons", repeats))
126+
127+
# Phase 3: vlabels + clabels (vector creation from flat data)
128+
# Ensure flat data is cached first
129+
matrices.clean_cached_properties()
130+
_ = matrices.flat_vars
131+
_ = matrices.flat_cons
132+
133+
def do_labels():
134+
return (matrices.vlabels, matrices.clabels)
135+
136+
results.append(time_phase(do_labels, "vlabels+clabels", repeats))
137+
138+
# Phase 4: A matrix (exercises #618 sparse slicing)
139+
def do_A():
140+
return matrices.A
141+
142+
results.append(time_phase(do_A, "A_matrix", repeats))
143+
144+
# Phase 5: full get_matrix_data pipeline (end-to-end)
145+
def do_full():
146+
matrices.clean_cached_properties()
147+
_ = matrices.vlabels
148+
_ = matrices.clabels
149+
_ = matrices.lb
150+
_ = matrices.ub
151+
_ = matrices.A
152+
_ = matrices.b
153+
_ = matrices.sense
154+
return True
155+
156+
results.append(time_phase(do_full, "full_pipeline", repeats))
157+
158+
return results
159+
160+
161+
# ---------------------------------------------------------------------------
162+
# Solution-unpacking benchmark (#619)
163+
# ---------------------------------------------------------------------------
164+
165+
def benchmark_solution_unpack(n_snapshots: int, repeats: int = 3) -> list[dict]:
166+
"""
167+
Benchmark the solution-assignment loop in Model.solve (PR #619).
168+
169+
Strategy: solve once with HiGHS to get a real solution vector, then
170+
re-run only the assignment loop (sol[idx] → var.solution) repeatedly
171+
without re-solving, isolating the unpacking cost from solver time.
172+
"""
173+
import xarray as xr
174+
175+
n = build_scigrid_network(n_snapshots)
176+
n.optimize.create_model(include_objective_constant=False)
177+
model = n.model
178+
179+
# Solve once to populate the raw solution
180+
status, _ = model.solve(solver_name="highs", io_api="direct")
181+
if status != "ok":
182+
print(f" WARNING: solve failed ({status}), skipping solution-unpack benchmark")
183+
return []
184+
185+
# Reconstruct the raw solution Series (as returned by the solver):
186+
# a float-indexed Series mapping variable label → solution value.
187+
nan = float("nan")
188+
parts = []
189+
for name, var in model.variables.items():
190+
if var.solution is None:
191+
continue
192+
labels = np.ravel(var.labels)
193+
values = np.ravel(var.solution.values)
194+
parts.append(pd.Series(values, index=labels.astype(float)))
195+
if not parts:
196+
print(" WARNING: no solution found on variables, was solve successful?")
197+
return []
198+
sol_series = pd.concat(parts).drop_duplicates()
199+
sol_series.loc[-1] = nan
200+
201+
n_vars = sum(np.ravel(model.variables[name].labels).size for name in model.variables)
202+
results = []
203+
204+
# ----- Old path (pandas label-based, pre-#619) -----
205+
def unpack_pandas():
206+
for name, var in model.variables.items():
207+
idx = np.ravel(var.labels).astype(float)
208+
try:
209+
vals = sol_series[idx].values.reshape(var.labels.shape)
210+
except KeyError:
211+
vals = sol_series.reindex(idx).values.reshape(var.labels.shape)
212+
var.solution = xr.DataArray(vals, var.coords)
213+
214+
results.append(time_phase(unpack_pandas, "unpack_pandas (before)", repeats))
215+
216+
# ----- New path (numpy dense array, #619) -----
217+
def unpack_numpy():
218+
sol_max_idx = int(max(sol_series.index.max(), 0))
219+
sol_arr = np.full(sol_max_idx + 1, nan)
220+
mask = sol_series.index >= 0
221+
valid = sol_series.index[mask].astype(int)
222+
sol_arr[valid] = sol_series.values[mask]
223+
for name, var in model.variables.items():
224+
idx = np.ravel(var.labels)
225+
safe_idx = np.clip(idx, 0, sol_max_idx)
226+
vals = sol_arr[safe_idx]
227+
vals[idx < 0] = nan
228+
var.solution = xr.DataArray(vals.reshape(var.labels.shape), var.coords)
229+
230+
results.append(time_phase(unpack_numpy, "unpack_numpy (after)", repeats))
231+
232+
for r in results:
233+
r.update(model_type="scigrid_solve", size=n_snapshots, n_vars=n_vars, n_cons=0)
234+
return results
235+
236+
237+
# ---------------------------------------------------------------------------
238+
# Runners
239+
# ---------------------------------------------------------------------------
240+
241+
QUICK_SNAPSHOTS = [24]
242+
FULL_SNAPSHOTS = [24, 100, 200, 500]
243+
SYNTHETIC_SIZES = [20, 50, 100, 200]
244+
245+
246+
def run_benchmarks(model_type: str, quick: bool, repeats: int, include_solve: bool = False) -> list[dict]:
247+
"""Run benchmarks across problem sizes, return flat list of results."""
248+
all_results = []
249+
250+
if model_type in ("scigrid", "all"):
251+
sizes = QUICK_SNAPSHOTS if quick else FULL_SNAPSHOTS
252+
for n_snap in sizes:
253+
print(f"\n{'='*60}")
254+
print(f"SciGrid-DE {n_snap} snapshots")
255+
print(f"{'='*60}")
256+
model = build_scigrid(n_snap)
257+
n_vars = len(model.variables.flat)
258+
n_cons = len(model.constraints.flat)
259+
print(f" {n_vars:,} variables, {n_cons:,} constraints")
260+
261+
for r in benchmark_model(model, repeats):
262+
r.update(model_type="scigrid", size=n_snap,
263+
n_vars=n_vars, n_cons=n_cons)
264+
all_results.append(r)
265+
print(f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)")
266+
267+
del model
268+
gc.collect()
269+
270+
if include_solve:
271+
# Solution-unpacking benchmark for PR #619 (SciGrid-DE only, small sizes)
272+
solve_sizes = QUICK_SNAPSHOTS if quick else [24, 100]
273+
for n_snap in solve_sizes:
274+
print(f"\n{'='*60}")
275+
print(f"SciGrid-DE solve + unpack {n_snap} snapshots (#619)")
276+
print(f"{'='*60}")
277+
for r in benchmark_solution_unpack(n_snap, repeats):
278+
all_results.append(r)
279+
print(f" {r['phase']:30s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)")
280+
gc.collect()
281+
282+
if model_type in ("synthetic", "all"):
283+
sizes = [20, 50] if quick else SYNTHETIC_SIZES
284+
for n in sizes:
285+
print(f"\n{'='*60}")
286+
print(f"Synthetic N={n} ({2*n*n} vars, {2*n*n} cons)")
287+
print(f"{'='*60}")
288+
model = build_synthetic(n)
289+
n_vars = 2 * n * n
290+
n_cons = 2 * n * n
291+
292+
for r in benchmark_model(model, repeats):
293+
r.update(model_type="synthetic", size=n,
294+
n_vars=n_vars, n_cons=n_cons)
295+
all_results.append(r)
296+
print(f" {r['phase']:20s} {r['best_s']:.4f}s (median {r['median_s']:.4f}s)")
297+
298+
del model
299+
gc.collect()
300+
301+
return all_results
302+
303+
304+
def format_comparison(before: list[dict], after: list[dict]) -> str:
305+
"""Format a before/after comparison table."""
306+
df_b = pd.DataFrame(before).set_index(["model_type", "size", "phase"])
307+
df_a = pd.DataFrame(after).set_index(["model_type", "size", "phase"])
308+
merged = df_b[["best_s"]].join(df_a[["best_s"]], lsuffix="_before", rsuffix="_after")
309+
merged["speedup"] = merged["best_s_before"] / merged["best_s_after"]
310+
lines = [
311+
f"{'Model':>10s} {'Size':>6s} {'Phase':>20s} {'Before':>8s} {'After':>8s} {'Speedup':>8s}",
312+
"-" * 70,
313+
]
314+
for (mtype, size, phase), row in merged.iterrows():
315+
lines.append(
316+
f"{mtype:>10s} {size:>6} {phase:>20s} "
317+
f"{row['best_s_before']:>7.4f}s {row['best_s_after']:>7.4f}s "
318+
f"{row['speedup']:>7.2f}x"
319+
)
320+
return "\n".join(lines)
321+
322+
323+
# ---------------------------------------------------------------------------
324+
# CLI
325+
# ---------------------------------------------------------------------------
326+
327+
def main():
328+
parser = argparse.ArgumentParser(
329+
description="Benchmark linopy matrix generation (PRs #616–#619)"
330+
)
331+
parser.add_argument(
332+
"--model", choices=["scigrid", "synthetic", "all"], default="all",
333+
help="Model type to benchmark (default: all)",
334+
)
335+
parser.add_argument("--quick", action="store_true", help="Quick mode (smallest sizes only)")
336+
parser.add_argument("--repeats", type=int, default=3, help="Timing repeats per phase (default: 3)")
337+
parser.add_argument("-o", "--output", type=str, help="Save results to JSON file")
338+
parser.add_argument("--label", type=str, default="", help="Label for this run")
339+
parser.add_argument(
340+
"--compare", nargs=2, metavar=("BEFORE", "AFTER"),
341+
help="Compare two JSON result files instead of running benchmarks",
342+
)
343+
parser.add_argument(
344+
"--include-solve", action="store_true",
345+
help="Also benchmark solution unpacking (PR #619); requires HiGHS solver",
346+
)
347+
args = parser.parse_args()
348+
349+
if args.compare:
350+
before = json.loads(Path(args.compare[0]).read_text())["results"]
351+
after = json.loads(Path(args.compare[1]).read_text())["results"]
352+
print(format_comparison(before, after))
353+
return
354+
355+
print(f"linopy matrix generation benchmark")
356+
print(f"Python {sys.version.split()[0]}, numpy {np.__version__}, "
357+
f"{platform.machine()}, {platform.system()}")
358+
359+
results = run_benchmarks(args.model, args.quick, args.repeats, args.include_solve)
360+
361+
if args.output:
362+
out = {
363+
"label": args.label,
364+
"python": sys.version.split()[0],
365+
"numpy": np.__version__,
366+
"platform": f"{platform.system()} {platform.machine()}",
367+
"results": results,
368+
}
369+
Path(args.output).write_text(json.dumps(out, indent=2, default=str))
370+
print(f"\nResults saved to {args.output}")
371+
372+
# Summary table
373+
print(f"\n{'='*60}")
374+
print("Summary (best times)")
375+
print(f"{'='*60}")
376+
df = pd.DataFrame(results)
377+
for (mtype, size), group in df.groupby(["model_type", "size"]):
378+
n_vars = group.iloc[0]["n_vars"]
379+
n_cons = group.iloc[0]["n_cons"]
380+
print(f"\n {mtype} size={size} ({n_vars:,} vars, {n_cons:,} cons)")
381+
for _, row in group.iterrows():
382+
print(f" {row['phase']:20s} {row['best_s']:.4f}s")
383+
384+
385+
if __name__ == "__main__":
386+
main()

0 commit comments

Comments
 (0)