From 80ec38e26a71b47fbb03623be5a2adc87f4d5857 Mon Sep 17 00:00:00 2001 From: ethanglaser Date: Wed, 8 Apr 2026 12:10:57 -0700 Subject: [PATCH 1/5] accommodate non-list metrics in baselines --- sklbench/utils/measurement.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/sklbench/utils/measurement.py b/sklbench/utils/measurement.py index 82177337..a80da7fc 100644 --- a/sklbench/utils/measurement.py +++ b/sklbench/utils/measurement.py @@ -65,22 +65,31 @@ def enrich_metrics( """Transforms raw performance and other results into aggregated metrics""" # time metrics res = bench_result.copy() - mean, std = box_filter(res["time[ms]"]) - if include_performance_stability_metrics: + if isinstance(res["time[ms]"], list): + mean, std = box_filter(res["time[ms]"]) + if include_performance_stability_metrics: + res.update( + { + "1st run time[ms]": res["time[ms]"][0], + "1st-mean run ratio": res["time[ms]"][0] / mean, + } + ) res.update( { - "1st run time[ms]": res["time[ms]"][0], - "1st-mean run ratio": res["time[ms]"][0] / mean, + "time[ms]": mean, + "time CV": std / mean, # Coefficient of Variation } ) - res.update( - { - "time[ms]": mean, - "time CV": std / mean, # Coefficient of Variation - } - ) + else: + # already aggregated (e.g. from a baseline file) + mean = res["time[ms]"] + std = res.get("time std[ms]", 0.0) + if mean != 0: + res["time CV"] = std / mean + else: + res["time CV"] = 0.0 cost = res.get("cost[microdollar]", None) - if cost: + if cost and isinstance(cost, list): res["cost[microdollar]"] = box_filter(res["cost[microdollar]"])[0] batch_size = res.get("batch_size", None) if batch_size: From 0ce10ea667e1d01ae4f7742edfed338fcc059edb Mon Sep 17 00:00:00 2001 From: ethanglaser Date: Fri, 10 Apr 2026 11:43:44 -0700 Subject: [PATCH 2/5] remove year_prediction_msd from regular runs --- configs/regular/svm.json | 8 -------- 1 file changed, 8 deletions(-) diff --git a/configs/regular/svm.json b/configs/regular/svm.json index 4a1bb915..babfdb9a 100644 --- a/configs/regular/svm.json +++ b/configs/regular/svm.json @@ -36,10 +36,6 @@ } ], "svr datasets": [ - { - "data": { "dataset": "year_prediction_msd", "split_kwargs": { "train_size": 20000, "test_size": null } }, - "algorithm": { "estimator_params": { "C": 1.0, "kernel": "rbf" } } - }, { "data": { "dataset": "fried", "split_kwargs": { "train_size": 0.5, "test_size": 0.5 } }, "algorithm": { "estimator_params": { "C": 2.0, "kernel": "rbf" } } @@ -84,10 +80,6 @@ } ], "nusvr datasets": [ - { - "data": { "dataset": "year_prediction_msd", "split_kwargs": { "train_size": 20000, "test_size": null } }, - "algorithm": { "estimator_params": { "C": 1.0, "kernel": "rbf" } } - }, { "data": { "dataset": "twodplanes", "split_kwargs": { "train_size": 25000, "test_size": null } }, "algorithm": { "estimator_params": { "C": 1.0, "kernel": ["linear", "poly", "rbf"] } } From 1e7ae9ed822ab902495631f5e9d406a5226037f1 Mon Sep 17 00:00:00 2001 From: ethanglaser Date: Fri, 10 Apr 2026 11:49:14 -0700 Subject: [PATCH 3/5] address future warning --- sklbench/datasets/downloaders.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklbench/datasets/downloaders.py b/sklbench/datasets/downloaders.py index d75f5ea3..2aa076d4 100644 --- a/sklbench/datasets/downloaders.py +++ b/sklbench/datasets/downloaders.py @@ -97,7 +97,7 @@ def fetch_and_correct_openml( # Get the data with target column specified x, y, _, _ = dataset.get_data( - dataset_format="dataframe" if as_frame is True else "array", + dataset_format="dataframe", target=dataset.default_target_attribute, ) @@ -109,6 +109,8 @@ def fetch_and_correct_openml( if isinstance(x, pd.DataFrame): if any(pd.api.types.is_sparse(x[col]) for col in x.columns): x = x.sparse.to_dense() + if not as_frame: + x = x.to_numpy() # Convert y to numpy array if needed if isinstance(y, pd.Series): From bcf452a9995b0f8c7d395e5c123782ad71473d48 Mon Sep 17 00:00:00 2001 From: ethanglaser Date: Mon, 13 Apr 2026 12:32:10 -0700 Subject: [PATCH 4/5] minor revision --- sklbench/datasets/downloaders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklbench/datasets/downloaders.py b/sklbench/datasets/downloaders.py index 2aa076d4..5fb21832 100644 --- a/sklbench/datasets/downloaders.py +++ b/sklbench/datasets/downloaders.py @@ -109,7 +109,7 @@ def fetch_and_correct_openml( if isinstance(x, pd.DataFrame): if any(pd.api.types.is_sparse(x[col]) for col in x.columns): x = x.sparse.to_dense() - if not as_frame: + if as_frame is not True: x = x.to_numpy() # Convert y to numpy array if needed From 9f1742e95ad07dc394a3e52cc3db887cb59bdf43 Mon Sep 17 00:00:00 2001 From: ethanglaser Date: Mon, 13 Apr 2026 13:32:19 -0700 Subject: [PATCH 5/5] only call todense if sparse --- sklbench/datasets/loaders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklbench/datasets/loaders.py b/sklbench/datasets/loaders.py index b4ba6cef..94adfee9 100644 --- a/sklbench/datasets/loaders.py +++ b/sklbench/datasets/loaders.py @@ -450,7 +450,7 @@ def load_codrnanorm( data_name: str, data_cache: str, raw_data_cache: str, dataset_params: Dict ) -> Tuple[Dict, Dict]: def transform_x_y(x, y): - x = pd.DataFrame(x.todense()) + x = pd.DataFrame(x.todense() if hasattr(x, "todense") else x) y = y.astype("int") y[y == -1] = 0 return x, y