NeuroTechX · bruAristimunha · Mar 30, 2026 · Mar 26, 2026 · Mar 26, 2026 · Mar 26, 2026
diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst
@@ -20,6 +20,9 @@ Version 1.6  (Source - GitHub)
 Enhancements
 ~~~~~~~~~~~~
 - Add unified interactive macro table for dataset summary page with 58 metadata columns, SearchPanes filtering, paradigm distribution bar, and CSV export (:gh:`1043`).
+- Expose ``motor_imagery`` and ``mental_arithmetic`` keyword-only parameters on :class:`moabb.datasets.Shin2017A` (default: MI=True, MA=False) and :class:`moabb.datasets.Shin2017B` (default: MI=False, MA=True), allowing users to load both conditions simultaneously while preserving backward compatibility (by `Bruno Aristimunha`_)
+- Add resting state annotations and EMG channel support to :class:`moabb.datasets.Lee2019` resting state runs for BIDS export compatibility (by `Bruno Aristimunha`_)
+- Skip zip extraction in :class:`moabb.datasets.GuttmannFlury2025` when files are already extracted, with ``/scratch`` fallback for NFS filesystems on compute nodes (by `Bruno Aristimunha`_)
 
 API changes
 ~~~~~~~~~~~
@@ -32,6 +35,10 @@ Requirements
 Bugs
 ~~~~
 - Fix session key off-by-one in :class:`moabb.datasets.Lee2019` that caused silent data loss when filtering sessions, and improve session filtering in :class:`moabb.datasets.base.BaseDataset` to match compound session keys (e.g., ``"0train"``) by integer prefix (:gh:`1046` by `Benedetto Leto`_ and `Bruno Aristimunha`_).
+- Fix BIDS conversion failures across multiple datasets: crop BDF/EDF signals to exact data records in ``bids_interface``, add standard montage fiducials when missing, fix :class:`moabb.datasets.BNCI2016_002` ``KeyError`` in event mapping, handle lowercase ``trigger`` attribute in :class:`moabb.datasets.BNCI2022_001` ``.mat`` files, detect and re-download truncated files in :class:`moabb.datasets.Kaneshiro2015`, add stim-channel annotations in :class:`moabb.datasets.Lee2024` for BIDS compatibility, convert µV to V in :class:`moabb.datasets.MartinezCagigal2023Checker` and :class:`moabb.datasets.MartinezCagigal2023Pary` to fix BDF physical range overflow, and handle alternate ``data`` key in :class:`moabb.datasets.Zuo2025` ``.mat`` files (by `Bruno Aristimunha`_)
+- Fix :class:`moabb.datasets.Chang2025` BIDS conversion crash by gracefully skipping subjects with missing directories or ``.set`` files (by `Bruno Aristimunha`_)
+- Fix :class:`moabb.datasets.GuttmannFlury2025` BIDS export ``OSError`` by correcting channel types (``Trig`` → stim, ``HEO``/``VEO`` → eog, ``M1``/``M2`` → misc) so trigger channel values no longer exceed EEG physical range limits (by `Bruno Aristimunha`_)
+- Fix ``numpy.void.get()`` error in :class:`moabb.datasets.Lee2019` resting state EMG channel handling (by `Bruno Aristimunha`_)
 - Fix data path lookup in :class:`moabb.datasets.Forenzo2023` that makes MOABB unable to find the downloaded data (:gh:`1048` by `Ethan Davis`_).
 
 Code health

diff --git a/moabb/datasets/Lee2019.py b/moabb/datasets/Lee2019.py
@@ -3,7 +3,7 @@
 from functools import partialmethod
 
 import numpy as np
-from mne import create_info
+from mne import Annotations, create_info
 from mne.channels import make_standard_montage
 from mne.io import RawArray
 from scipy.io import loadmat
@@ -195,11 +195,26 @@ def _get_single_run(self, data):
 
     def _get_single_rest_run(self, data, prefix):
         sfreq = data["fs"].item()
-        raw = self._make_raw_array(
-            data["{}_rest".format(prefix)], data["chan"], "eeg", sfreq
-        )
+        rest_key = f"{prefix}_rest"
+        raw = self._make_raw_array(data[rest_key], data["chan"], "eeg", sfreq)
         montage = make_standard_montage("standard_1005")
         raw.set_montage(montage)
+
+        # Add EMG channels if available and duration matches
+        if "EMG" in data.dtype.names and "EMG_index" in data.dtype.names:
+            rest_samples = data[rest_key].shape[0]
+            if prefix == "pre":
+                emg_slice = data["EMG"][:rest_samples]
+            else:
+                emg_slice = data["EMG"][-rest_samples:]
+            if emg_slice.shape[0] == rest_samples:
+                emg_raw = self._make_raw_array(emg_slice, data["EMG_index"], "emg", sfreq)
+                raw = raw.add_channels([emg_raw])
+
+        # Add annotation for BIDS compatibility
+        raw.set_annotations(
+            Annotations(onset=[0], duration=[raw.times[-1]], description=["rest"])
+        )
         return raw
 
     def _get_single_subject_data(self, subject):

diff --git a/moabb/datasets/bbci_eeg_fnirs.py b/moabb/datasets/bbci_eeg_fnirs.py
@@ -541,6 +541,8 @@ def __init__(
         subjects=None,
         sessions=None,
         *,
+        motor_imagery=True,
+        mental_arithmetic=False,
         return_all_modalities=False,
         **kwargs,
     ):
@@ -550,8 +552,8 @@ def __init__(
         super().__init__(
             suffix="A",
             fnirs=False,
-            motor_imagery=True,
-            mental_arithmetic=False,
+            motor_imagery=motor_imagery,
+            mental_arithmetic=mental_arithmetic,
             accept=accept,
             subjects=subjects,
             sessions=sessions,
@@ -863,6 +865,8 @@ def __init__(
         subjects=None,
         sessions=None,
         *,
+        motor_imagery=False,
+        mental_arithmetic=True,
         return_all_modalities=False,
         **kwargs,
     ):
@@ -872,8 +876,8 @@ def __init__(
         super().__init__(
             suffix="B",
             fnirs=False,
-            motor_imagery=False,
-            mental_arithmetic=True,
+            motor_imagery=motor_imagery,
+            mental_arithmetic=mental_arithmetic,
             subjects=subjects,
             sessions=sessions,
             accept=accept,

diff --git a/moabb/datasets/bids_interface.py b/moabb/datasets/bids_interface.py
@@ -628,6 +628,9 @@ def _build_sidecar_enrichment(metadata):
     # EEGReference is REQUIRED by BIDS — ensure always present
     entries.setdefault("EEGReference", "n/a")
 
+    # SoftwareVersions is RECOMMENDED — ensure always present
+    entries.setdefault("SoftwareVersions", "n/a")
+
     # HardwareFilters and SoftwareFilters
     if prep and any(
         v is not None
@@ -2537,6 +2540,7 @@ def _suffix(self):
     "EDF": ".edf",
     "BrainVision": ".vhdr",
     "EEGLAB": ".set",
+    "BDF": ".bdf",
 }
 
 
@@ -2634,6 +2638,41 @@ def _write_file(self, bids_path, raw):
                 'Encountered data in "double" format',
                 RuntimeWarning,
             )
+            # Crop to exact number of data records for BDF/EDF export — edfio
+            # requires signal duration exactly divisible by data_record_duration.
+            if self._format in ("BDF", "EDF"):
+                sfreq = raw.info["sfreq"]
+                n_samples = raw.n_times
+                # edfio uses data_record_duration = round(sfreq) / sfreq
+                # for non-integer sfreq, or 1.0 for integer sfreq.
+                samples_per_record = round(sfreq)
+                n_records = n_samples // samples_per_record
+                target_samples = n_records * samples_per_record
+                if 0 < target_samples < n_samples:
+                    raw = raw.copy().crop(tmax=(target_samples - 1) / sfreq)
+
+            # Fix montage: if 'head' frame but missing fiducials (NAS/LPA/RPA),
+            # re-set a standard montage with fiducials so mne_bids doesn't crash.
+            montage = raw.get_montage()
+            if montage is not None:
+                FIFF = mne.io.constants.FIFF
+                has_nas = any(
+                    p["kind"] == FIFF.FIFFV_POINT_CARDINAL
+                    and p.get("ident") == FIFF.FIFFV_POINT_NASION
+                    for p in montage.dig or []
+                )
+                coord_frame = montage.get_positions().get("coord_frame", "")
+                if coord_frame == "head" and not has_nas:
+                    try:
+                        std = mne.channels.make_standard_montage("standard_1005")
+                        raw.set_montage(std, on_missing="ignore")
+                    except Exception:
+                        log.warning(
+                            "Could not set standard montage, dropping "
+                            "electrode positions."
+                        )
+                        raw.set_montage(None)
+
             # Save annotation extras before write_raw_bids (which may
             # strip them).  We patch events.tsv afterwards.
             ann_extras = getattr(raw.annotations, "extras", None)
@@ -2695,6 +2734,36 @@ def _write_file(self, bids_path, raw):
         # SpatialReference in electrodes.json sidecars is handled by the
         # monkey-patched _write_dig_bids function at module level.
 
+        # FiducialsCoordinates is RECOMMENDED in coordsystem.json — add if missing
+        FIFF = mne.io.constants.FIFF
+        coordsystem_files = list(bids_path.root.rglob("*_coordsystem.json"))
+        for cs_path in coordsystem_files:
+            with open(cs_path) as f:
+                cs = json.load(f)
+            if "FiducialsCoordinates" not in cs:
+                montage = raw.get_montage() if raw is not None else None
+                fids = {}
+                if montage is not None:
+                    for dig_point in montage.dig or []:
+                        kind = dig_point["kind"]
+                        ident = dig_point.get("ident", 0)
+                        pos = list(dig_point["r"])
+                        if kind == FIFF.FIFFV_POINT_CARDINAL:
+                            if ident == FIFF.FIFFV_POINT_NASION:
+                                fids["NAS"] = pos
+                            elif ident == FIFF.FIFFV_POINT_LPA:
+                                fids["LPA"] = pos
+                            elif ident == FIFF.FIFFV_POINT_RPA:
+                                fids["RPA"] = pos
+                if fids:
+                    cs["FiducialsCoordinates"] = fids
+                    cs.setdefault(
+                        "FiducialsCoordinateSystem",
+                        cs.get("EEGCoordinateSystem", "CapTrak"),
+                    )
+                    with open(cs_path, "w") as f:
+                        json.dump(cs, f, indent="\t")
+
         # Enrich events.json sidecar with HED annotations and stimulus info
         hed_tags = _build_hed_sidecar_annotations(self.dataset)
         _update_events_json_sidecar(bids_path, hed_tags, metadata)

diff --git a/moabb/datasets/bnci/bnci_2016_002.py b/moabb/datasets/bnci/bnci_2016_002.py
@@ -177,10 +177,12 @@ def _load_data_002_2016(
     onset_times = []
     descriptions = []
 
+    # marker_labels has shape (n_classes, n_events); each column is an event
+    # and the row index with a positive value indicates the class.
     for i, time_ms in enumerate(marker_times):
         # Find which class this event belongs to
-        event_row = marker_labels[i, :]
-        for class_idx, value in enumerate(event_row):
+        event_col = marker_labels[:, i]
+        for class_idx, value in enumerate(event_col):
             if value > 0:
                 # Marker times are in milliseconds, convert to seconds
                 onset_times.append(time_ms / 1000.0)

diff --git a/moabb/datasets/bnci/bnci_2022_001.py b/moabb/datasets/bnci/bnci_2022_001.py
@@ -201,8 +201,12 @@ def _convert_run_001_2022(
 
     if hasattr(run_data, "Trigger"):
         trigger = run_data.Trigger
+    elif hasattr(run_data, "trigger"):
+        trigger = run_data.trigger
     elif "Trigger" in data:
         trigger = data["Trigger"]
+    elif "trigger" in data:
+        trigger = data["trigger"]
 
     # Try to get sampling rate from header
     if hasattr(run_data, "Header"):

diff --git a/moabb/datasets/chang2025.py b/moabb/datasets/chang2025.py
@@ -249,15 +249,33 @@ def _get_single_subject_data(self, subject):
 
         if not subj_dir.exists():
             # Try alternative directory structures.
-            for candidate in base.iterdir():
-                if candidate.is_dir() and orig_id in candidate.name:
-                    subj_dir = candidate
-                    break
+            found = False
+            if base.exists():
+                for candidate in base.iterdir():
+                    if candidate.is_dir() and orig_id in candidate.name:
+                        subj_dir = candidate
+                        found = True
+                        break
+            if not found:
+                log.warning(
+                    "Subject directory for %s not found under %s. "
+                    "The upstream data may be missing for this subject. Skipping.",
+                    orig_id,
+                    base,
+                )
+                return {}
 
         # Find .set files for the selected paradigm type.
         set_files = sorted(subj_dir.rglob("*.set"))
         if not set_files:
-            raise FileNotFoundError(f"No .set files for {orig_id} in {subj_dir}")
+            log.warning(
+                "No .set files found for %s in %s. "
+                "The download may be incomplete or the upstream data may be "
+                "missing for this subject. Skipping.",
+                orig_id,
+                subj_dir,
+            )
+            return {}
 
         # Filter files by paradigm type.
         # File naming: {orig_id}_{prefix}{session_num}.set
@@ -309,7 +327,13 @@ def _get_single_subject_data(self, subject):
                 log.warning("Failed to load %s: %s", sf.name, e)
 
         if not sessions:
-            raise FileNotFoundError(f"No loadable {pt} data for {orig_id}")
+            log.warning(
+                "No loadable %s session data for %s. "
+                "All .set files failed to load. Skipping.",
+                pt,
+                orig_id,
+            )
+            return {}
         return sessions
 
     def data_path(

diff --git a/moabb/datasets/guttmann_flury2025.py b/moabb/datasets/guttmann_flury2025.py
@@ -263,11 +263,33 @@ def _data_path_for_paradigm(
                 f"Downloaded {zip_name} but could not locate ZIP in {dl_path}"
             )
 
-    # Extract ZIP to subject directory.
+    # Extract ZIP to subject directory (skip if already extracted).
     subj_dir.mkdir(parents=True, exist_ok=True)
-    log.info("Extracting %s (%s) to %s", zip_name, paradigm, subj_dir)
-    with zipfile.ZipFile(str(dl_path)) as zf:
-        safe_extract_zip(zf, subj_dir)
+    if any(subj_dir.rglob("*.bdf")):
+        log.info("Already extracted %s (%s), skipping.", zip_name, paradigm)
+    else:
+        log.info("Extracting %s (%s) to %s", zip_name, paradigm, subj_dir)
+        try:
+            with zipfile.ZipFile(str(dl_path)) as zf:
+                safe_extract_zip(zf, subj_dir)
+        except OSError:
+            # NFS may fail with EINVAL; try extracting via /scratch as fallback
+            import shutil
+            import tempfile
+
+            log.warning("NFS extraction failed, using /scratch fallback")
+            with tempfile.TemporaryDirectory(
+                dir="/scratch/baristim" if Path("/scratch/baristim").exists() else None
+            ) as tmp:
+                with zipfile.ZipFile(str(dl_path)) as zf:
+                    safe_extract_zip(zf, Path(tmp))
+                # Copy extracted files to destination
+                for item in Path(tmp).rglob("*"):
+                    if item.is_file():
+                        rel = item.relative_to(tmp)
+                        dest = subj_dir / rel
+                        dest.parent.mkdir(parents=True, exist_ok=True)
+                        shutil.copy2(item, dest)
 
     return str(subj_dir)
 
@@ -385,12 +407,37 @@ def _decode_p300_from_sync_csv(bdf_path):
     return records if records else None
 
 
+def _fix_channel_types(raw):
+    """Fix channel types for non-EEG channels in Guttmann-Flury BDF files.
+
+    The BDF files have malformed headers so MNE defaults all channels to EEG.
+    The ``Trig`` channel contains trigger codes with values up to 40 V,
+    which exceeds BDF physical range limits and causes ``OSError`` during
+    BIDS export.  Re-type ``Trig`` as ``stim``, ``HEO`` as ``eog``, and
+    ``M1``/``M2`` as ``misc`` so that downstream writers handle them
+    correctly.
+    """
+    type_mapping = {}
+    for name in raw.ch_names:
+        upper = name.upper()
+        if upper == "TRIG":
+            type_mapping[name] = "stim"
+        elif upper in ("HEO", "VEO", "HEOG", "VEOG"):
+            type_mapping[name] = "eog"
+        elif upper in ("M1", "M2"):
+            type_mapping[name] = "misc"
+    if type_mapping:
+        raw.set_channel_types(type_mapping)
+    return raw
+
+
 def _load_raw_with_stim_events(bdf_path, event_id):
     """Load BDF file, decode Trig channel events, and set annotations.
 
     Used by MI/ME where Trig channel codes directly map to event types.
     """
     raw = mne.io.read_raw_bdf(str(bdf_path), preload=True, verbose="ERROR")
+    _fix_channel_types(raw)
 
     stim_ch = "Trig"
     if stim_ch not in raw.ch_names:
@@ -757,6 +804,7 @@ def __init__(self, subjects=None, sessions=None, *, return_all_modalities=False)
     def _load_ssvep_raw(self, bdf_path):
         """Load SSVEP BDF and decode frequency events from sync CSV."""
         raw = mne.io.read_raw_bdf(str(bdf_path), preload=True, verbose="ERROR")
+        _fix_channel_types(raw)
 
         annot_onset = []
         annot_dur = []
@@ -946,6 +994,7 @@ def _load_p300_raw(self, bdf_path):
         falls back to decoding flash events from the sync CSV.
         """
         raw = mne.io.read_raw_bdf(str(bdf_path), preload=True, verbose="ERROR")
+        _fix_channel_types(raw)
 
         # Try annotations JSON first.
         ann_records = _load_annotations_json(bdf_path)