From 80ec38e26a71b47fbb03623be5a2adc87f4d5857 Mon Sep 17 00:00:00 2001
From: ethanglaser <ethan.glaser@intel.com>
Date: Wed, 8 Apr 2026 12:10:57 -0700
Subject: [PATCH 1/5] accommodate non-list metrics in baselines

---
 sklbench/utils/measurement.py | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/sklbench/utils/measurement.py b/sklbench/utils/measurement.py
index 82177337..a80da7fc 100644
--- a/sklbench/utils/measurement.py
+++ b/sklbench/utils/measurement.py
@@ -65,22 +65,31 @@ def enrich_metrics(
     """Transforms raw performance and other results into aggregated metrics"""
     # time metrics
     res = bench_result.copy()
-    mean, std = box_filter(res["time[ms]"])
-    if include_performance_stability_metrics:
+    if isinstance(res["time[ms]"], list):
+        mean, std = box_filter(res["time[ms]"])
+        if include_performance_stability_metrics:
+            res.update(
+                {
+                    "1st run time[ms]": res["time[ms]"][0],
+                    "1st-mean run ratio": res["time[ms]"][0] / mean,
+                }
+            )
         res.update(
             {
-                "1st run time[ms]": res["time[ms]"][0],
-                "1st-mean run ratio": res["time[ms]"][0] / mean,
+                "time[ms]": mean,
+                "time CV": std / mean,  # Coefficient of Variation
             }
         )
-    res.update(
-        {
-            "time[ms]": mean,
-            "time CV": std / mean,  # Coefficient of Variation
-        }
-    )
+    else:
+        # already aggregated (e.g. from a baseline file)
+        mean = res["time[ms]"]
+        std = res.get("time std[ms]", 0.0)
+        if mean != 0:
+            res["time CV"] = std / mean
+        else:
+            res["time CV"] = 0.0
     cost = res.get("cost[microdollar]", None)
-    if cost:
+    if cost and isinstance(cost, list):
         res["cost[microdollar]"] = box_filter(res["cost[microdollar]"])[0]
     batch_size = res.get("batch_size", None)
     if batch_size:

From 0ce10ea667e1d01ae4f7742edfed338fcc059edb Mon Sep 17 00:00:00 2001
From: ethanglaser <ethan.glaser@intel.com>
Date: Fri, 10 Apr 2026 11:43:44 -0700
Subject: [PATCH 2/5] remove year_prediction_msd from regular runs

---
 configs/regular/svm.json | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/configs/regular/svm.json b/configs/regular/svm.json
index 4a1bb915..babfdb9a 100644
--- a/configs/regular/svm.json
+++ b/configs/regular/svm.json
@@ -36,10 +36,6 @@
             }
         ],
         "svr datasets": [
-            {
-                "data": { "dataset": "year_prediction_msd", "split_kwargs": { "train_size": 20000, "test_size": null } },
-                "algorithm": { "estimator_params": { "C": 1.0, "kernel": "rbf" } }
-            },
             {
                 "data": { "dataset": "fried", "split_kwargs": { "train_size": 0.5, "test_size": 0.5 } },
                 "algorithm": { "estimator_params": { "C": 2.0, "kernel": "rbf" } }
@@ -84,10 +80,6 @@
             }
         ],
         "nusvr datasets": [
-            {
-                "data": { "dataset": "year_prediction_msd", "split_kwargs": { "train_size": 20000, "test_size": null } },
-                "algorithm": { "estimator_params": { "C": 1.0, "kernel": "rbf" } }
-            },
             {
                 "data": { "dataset": "twodplanes", "split_kwargs": { "train_size": 25000, "test_size": null } },
                 "algorithm": { "estimator_params": { "C": 1.0, "kernel": ["linear", "poly", "rbf"] } }

From 1e7ae9ed822ab902495631f5e9d406a5226037f1 Mon Sep 17 00:00:00 2001
From: ethanglaser <ethan.glaser@intel.com>
Date: Fri, 10 Apr 2026 11:49:14 -0700
Subject: [PATCH 3/5] address future warning

---
 sklbench/datasets/downloaders.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sklbench/datasets/downloaders.py b/sklbench/datasets/downloaders.py
index d75f5ea3..2aa076d4 100644
--- a/sklbench/datasets/downloaders.py
+++ b/sklbench/datasets/downloaders.py
@@ -97,7 +97,7 @@ def fetch_and_correct_openml(
 
     # Get the data with target column specified
     x, y, _, _ = dataset.get_data(
-        dataset_format="dataframe" if as_frame is True else "array",
+        dataset_format="dataframe",
         target=dataset.default_target_attribute,
     )
 
@@ -109,6 +109,8 @@ def fetch_and_correct_openml(
     if isinstance(x, pd.DataFrame):
         if any(pd.api.types.is_sparse(x[col]) for col in x.columns):
             x = x.sparse.to_dense()
+        if not as_frame:
+            x = x.to_numpy()
 
     # Convert y to numpy array if needed
     if isinstance(y, pd.Series):

From bcf452a9995b0f8c7d395e5c123782ad71473d48 Mon Sep 17 00:00:00 2001
From: ethanglaser <ethan.glaser@intel.com>
Date: Mon, 13 Apr 2026 12:32:10 -0700
Subject: [PATCH 4/5] minor revision

---
 sklbench/datasets/downloaders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklbench/datasets/downloaders.py b/sklbench/datasets/downloaders.py
index 2aa076d4..5fb21832 100644
--- a/sklbench/datasets/downloaders.py
+++ b/sklbench/datasets/downloaders.py
@@ -109,7 +109,7 @@ def fetch_and_correct_openml(
     if isinstance(x, pd.DataFrame):
         if any(pd.api.types.is_sparse(x[col]) for col in x.columns):
             x = x.sparse.to_dense()
-        if not as_frame:
+        if as_frame is not True:
             x = x.to_numpy()
 
     # Convert y to numpy array if needed

From 9f1742e95ad07dc394a3e52cc3db887cb59bdf43 Mon Sep 17 00:00:00 2001
From: ethanglaser <ethan.glaser@intel.com>
Date: Mon, 13 Apr 2026 13:32:19 -0700
Subject: [PATCH 5/5] only call todense if sparse

---
 sklbench/datasets/loaders.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklbench/datasets/loaders.py b/sklbench/datasets/loaders.py
index b4ba6cef..94adfee9 100644
--- a/sklbench/datasets/loaders.py
+++ b/sklbench/datasets/loaders.py
@@ -450,7 +450,7 @@ def load_codrnanorm(
     data_name: str, data_cache: str, raw_data_cache: str, dataset_params: Dict
 ) -> Tuple[Dict, Dict]:
     def transform_x_y(x, y):
-        x = pd.DataFrame(x.todense())
+        x = pd.DataFrame(x.todense() if hasattr(x, "todense") else x)
         y = y.astype("int")
         y[y == -1] = 0
         return x, y