Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
3f3c18d
Fix BIDS conversion issues for multiple datasets
bruAristimunha Mar 26, 2026
3a133e1
Fix Chang2025 and GuttmannFlury2025 BIDS conversion failures
bruAristimunha Mar 26, 2026
f4ea371
Include all available data in NEMAR deposits
bruAristimunha Mar 26, 2026
0125d8c
Allow Shin2017A/B to load both MI and MA conditions
bruAristimunha Mar 26, 2026
135c6ed
Fix Lee2019 resting state annotations and add BDF format
bruAristimunha Mar 26, 2026
000fa19
Improve Lee2019 resting state runs with EMG and annotations
bruAristimunha Mar 26, 2026
4b83da9
updating dataset
bruAristimunha Mar 27, 2026
2e84401
updating the script
bruAristimunha Mar 27, 2026
11eb439
Fix numpy.void.get() error in Lee2019 resting state EMG
bruAristimunha Mar 27, 2026
76248d9
Disable resting state for NEMAR (BIDS event_id conflict), fix numpy e…
bruAristimunha Mar 27, 2026
28f4292
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 27, 2026
f51bf43
Revert TrianaGuzman2024 skip fix — corrupt files were truncated downl…
bruAristimunha Mar 27, 2026
d8a950b
Keep completed BIDS output after upload failure on compute nodes
bruAristimunha Mar 28, 2026
6845f23
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 28, 2026
a9f3e35
removing the script
bruAristimunha Mar 28, 2026
1e36cfb
Skip zip extraction if already extracted, /scratch fallback for NFS
bruAristimunha Mar 28, 2026
8e81304
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 28, 2026
0046129
Update whats_new for v1.6 BIDS conversion fixes
bruAristimunha Mar 28, 2026
dc89ff2
Merge branch 'develop' into fix/bids-conversion-fixes
bruAristimunha Mar 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/source/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ Version 1.6 (Source - GitHub)
Enhancements
~~~~~~~~~~~~
- Add unified interactive macro table for dataset summary page with 58 metadata columns, SearchPanes filtering, paradigm distribution bar, and CSV export (:gh:`1043`).
- Expose ``motor_imagery`` and ``mental_arithmetic`` keyword-only parameters on :class:`moabb.datasets.Shin2017A` (default: MI=True, MA=False) and :class:`moabb.datasets.Shin2017B` (default: MI=False, MA=True), allowing users to load both conditions simultaneously while preserving backward compatibility (by `Bruno Aristimunha`_)
- Add resting state annotations and EMG channel support to :class:`moabb.datasets.Lee2019` resting state runs for BIDS export compatibility (by `Bruno Aristimunha`_)
- Skip zip extraction in :class:`moabb.datasets.GuttmannFlury2025` when files are already extracted, with ``/scratch`` fallback for NFS filesystems on compute nodes (by `Bruno Aristimunha`_)

API changes
~~~~~~~~~~~
Expand All @@ -32,6 +35,10 @@ Requirements
Bugs
~~~~
- Fix session key off-by-one in :class:`moabb.datasets.Lee2019` that caused silent data loss when filtering sessions, and improve session filtering in :class:`moabb.datasets.base.BaseDataset` to match compound session keys (e.g., ``"0train"``) by integer prefix (:gh:`1046` by `Benedetto Leto`_ and `Bruno Aristimunha`_).
- Fix BIDS conversion failures across multiple datasets: crop BDF/EDF signals to exact data records in ``bids_interface``, add standard montage fiducials when missing, fix :class:`moabb.datasets.BNCI2016_002` ``KeyError`` in event mapping, handle lowercase ``trigger`` attribute in :class:`moabb.datasets.BNCI2022_001` ``.mat`` files, detect and re-download truncated files in :class:`moabb.datasets.Kaneshiro2015`, add stim-channel annotations in :class:`moabb.datasets.Lee2024` for BIDS compatibility, convert µV to V in :class:`moabb.datasets.MartinezCagigal2023Checker` and :class:`moabb.datasets.MartinezCagigal2023Pary` to fix BDF physical range overflow, and handle alternate ``data`` key in :class:`moabb.datasets.Zuo2025` ``.mat`` files (by `Bruno Aristimunha`_)
- Fix :class:`moabb.datasets.Chang2025` BIDS conversion crash by gracefully skipping subjects with missing directories or ``.set`` files (by `Bruno Aristimunha`_)
- Fix :class:`moabb.datasets.GuttmannFlury2025` BIDS export ``OSError`` by correcting channel types (``Trig`` → stim, ``HEO``/``VEO`` → eog, ``M1``/``M2`` → misc) so trigger channel values no longer exceed EEG physical range limits (by `Bruno Aristimunha`_)
- Fix ``numpy.void.get()`` error in :class:`moabb.datasets.Lee2019` resting state EMG channel handling (by `Bruno Aristimunha`_)
- Fix data path lookup in :class:`moabb.datasets.Forenzo2023` that makes MOABB unable to find the downloaded data (:gh:`1048` by `Ethan Davis`_).

Code health
Expand Down
23 changes: 19 additions & 4 deletions moabb/datasets/Lee2019.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from functools import partialmethod

import numpy as np
from mne import create_info
from mne import Annotations, create_info
from mne.channels import make_standard_montage
from mne.io import RawArray
from scipy.io import loadmat
Expand Down Expand Up @@ -195,11 +195,26 @@ def _get_single_run(self, data):

def _get_single_rest_run(self, data, prefix):
sfreq = data["fs"].item()
raw = self._make_raw_array(
data["{}_rest".format(prefix)], data["chan"], "eeg", sfreq
)
rest_key = f"{prefix}_rest"
raw = self._make_raw_array(data[rest_key], data["chan"], "eeg", sfreq)
montage = make_standard_montage("standard_1005")
raw.set_montage(montage)

# Add EMG channels if available and duration matches
if "EMG" in data.dtype.names and "EMG_index" in data.dtype.names:
rest_samples = data[rest_key].shape[0]
if prefix == "pre":
emg_slice = data["EMG"][:rest_samples]
else:
emg_slice = data["EMG"][-rest_samples:]
if emg_slice.shape[0] == rest_samples:
emg_raw = self._make_raw_array(emg_slice, data["EMG_index"], "emg", sfreq)
raw = raw.add_channels([emg_raw])

# Add annotation for BIDS compatibility
raw.set_annotations(
Annotations(onset=[0], duration=[raw.times[-1]], description=["rest"])
)
return raw

def _get_single_subject_data(self, subject):
Expand Down
12 changes: 8 additions & 4 deletions moabb/datasets/bbci_eeg_fnirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,8 @@ def __init__(
subjects=None,
sessions=None,
*,
motor_imagery=True,
mental_arithmetic=False,
return_all_modalities=False,
**kwargs,
):
Expand All @@ -550,8 +552,8 @@ def __init__(
super().__init__(
suffix="A",
fnirs=False,
motor_imagery=True,
mental_arithmetic=False,
motor_imagery=motor_imagery,
mental_arithmetic=mental_arithmetic,
accept=accept,
subjects=subjects,
sessions=sessions,
Expand Down Expand Up @@ -863,6 +865,8 @@ def __init__(
subjects=None,
sessions=None,
*,
motor_imagery=False,
mental_arithmetic=True,
return_all_modalities=False,
**kwargs,
):
Expand All @@ -872,8 +876,8 @@ def __init__(
super().__init__(
suffix="B",
fnirs=False,
motor_imagery=False,
mental_arithmetic=True,
motor_imagery=motor_imagery,
mental_arithmetic=mental_arithmetic,
subjects=subjects,
sessions=sessions,
accept=accept,
Expand Down
69 changes: 69 additions & 0 deletions moabb/datasets/bids_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,9 @@ def _build_sidecar_enrichment(metadata):
# EEGReference is REQUIRED by BIDS — ensure always present
entries.setdefault("EEGReference", "n/a")

# SoftwareVersions is RECOMMENDED — ensure always present
entries.setdefault("SoftwareVersions", "n/a")

# HardwareFilters and SoftwareFilters
if prep and any(
v is not None
Expand Down Expand Up @@ -2537,6 +2540,7 @@ def _suffix(self):
"EDF": ".edf",
"BrainVision": ".vhdr",
"EEGLAB": ".set",
"BDF": ".bdf",
}


Expand Down Expand Up @@ -2634,6 +2638,41 @@ def _write_file(self, bids_path, raw):
'Encountered data in "double" format',
RuntimeWarning,
)
# Crop to exact number of data records for BDF/EDF export — edfio
# requires signal duration exactly divisible by data_record_duration.
if self._format in ("BDF", "EDF"):
sfreq = raw.info["sfreq"]
n_samples = raw.n_times
# edfio uses data_record_duration = round(sfreq) / sfreq
# for non-integer sfreq, or 1.0 for integer sfreq.
samples_per_record = round(sfreq)
n_records = n_samples // samples_per_record
target_samples = n_records * samples_per_record
if 0 < target_samples < n_samples:
raw = raw.copy().crop(tmax=(target_samples - 1) / sfreq)

# Fix montage: if 'head' frame but missing fiducials (NAS/LPA/RPA),
# re-set a standard montage with fiducials so mne_bids doesn't crash.
montage = raw.get_montage()
if montage is not None:
FIFF = mne.io.constants.FIFF
has_nas = any(
p["kind"] == FIFF.FIFFV_POINT_CARDINAL
and p.get("ident") == FIFF.FIFFV_POINT_NASION
for p in montage.dig or []
)
coord_frame = montage.get_positions().get("coord_frame", "")
if coord_frame == "head" and not has_nas:
try:
std = mne.channels.make_standard_montage("standard_1005")
raw.set_montage(std, on_missing="ignore")
except Exception:
log.warning(
"Could not set standard montage, dropping "
"electrode positions."
)
raw.set_montage(None)

# Save annotation extras before write_raw_bids (which may
# strip them). We patch events.tsv afterwards.
ann_extras = getattr(raw.annotations, "extras", None)
Expand Down Expand Up @@ -2695,6 +2734,36 @@ def _write_file(self, bids_path, raw):
# SpatialReference in electrodes.json sidecars is handled by the
# monkey-patched _write_dig_bids function at module level.

# FiducialsCoordinates is RECOMMENDED in coordsystem.json — add if missing
FIFF = mne.io.constants.FIFF
coordsystem_files = list(bids_path.root.rglob("*_coordsystem.json"))
for cs_path in coordsystem_files:
with open(cs_path) as f:
cs = json.load(f)
if "FiducialsCoordinates" not in cs:
montage = raw.get_montage() if raw is not None else None
fids = {}
if montage is not None:
for dig_point in montage.dig or []:
kind = dig_point["kind"]
ident = dig_point.get("ident", 0)
pos = list(dig_point["r"])
if kind == FIFF.FIFFV_POINT_CARDINAL:
if ident == FIFF.FIFFV_POINT_NASION:
fids["NAS"] = pos
elif ident == FIFF.FIFFV_POINT_LPA:
fids["LPA"] = pos
elif ident == FIFF.FIFFV_POINT_RPA:
fids["RPA"] = pos
if fids:
cs["FiducialsCoordinates"] = fids
cs.setdefault(
"FiducialsCoordinateSystem",
cs.get("EEGCoordinateSystem", "CapTrak"),
)
with open(cs_path, "w") as f:
json.dump(cs, f, indent="\t")

# Enrich events.json sidecar with HED annotations and stimulus info
hed_tags = _build_hed_sidecar_annotations(self.dataset)
_update_events_json_sidecar(bids_path, hed_tags, metadata)
Expand Down
6 changes: 4 additions & 2 deletions moabb/datasets/bnci/bnci_2016_002.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,12 @@ def _load_data_002_2016(
onset_times = []
descriptions = []

# marker_labels has shape (n_classes, n_events); each column is an event
# and the row index with a positive value indicates the class.
for i, time_ms in enumerate(marker_times):
# Find which class this event belongs to
event_row = marker_labels[i, :]
for class_idx, value in enumerate(event_row):
event_col = marker_labels[:, i]
for class_idx, value in enumerate(event_col):
if value > 0:
# Marker times are in milliseconds, convert to seconds
onset_times.append(time_ms / 1000.0)
Expand Down
4 changes: 4 additions & 0 deletions moabb/datasets/bnci/bnci_2022_001.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,12 @@ def _convert_run_001_2022(

if hasattr(run_data, "Trigger"):
trigger = run_data.Trigger
elif hasattr(run_data, "trigger"):
trigger = run_data.trigger
elif "Trigger" in data:
trigger = data["Trigger"]
elif "trigger" in data:
trigger = data["trigger"]

# Try to get sampling rate from header
if hasattr(run_data, "Header"):
Expand Down
36 changes: 30 additions & 6 deletions moabb/datasets/chang2025.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,15 +249,33 @@ def _get_single_subject_data(self, subject):

if not subj_dir.exists():
# Try alternative directory structures.
for candidate in base.iterdir():
if candidate.is_dir() and orig_id in candidate.name:
subj_dir = candidate
break
found = False
if base.exists():
for candidate in base.iterdir():
if candidate.is_dir() and orig_id in candidate.name:
subj_dir = candidate
found = True
break
if not found:
log.warning(
"Subject directory for %s not found under %s. "
"The upstream data may be missing for this subject. Skipping.",
orig_id,
base,
)
return {}

# Find .set files for the selected paradigm type.
set_files = sorted(subj_dir.rglob("*.set"))
if not set_files:
raise FileNotFoundError(f"No .set files for {orig_id} in {subj_dir}")
log.warning(
"No .set files found for %s in %s. "
"The download may be incomplete or the upstream data may be "
"missing for this subject. Skipping.",
orig_id,
subj_dir,
)
return {}

# Filter files by paradigm type.
# File naming: {orig_id}_{prefix}{session_num}.set
Expand Down Expand Up @@ -309,7 +327,13 @@ def _get_single_subject_data(self, subject):
log.warning("Failed to load %s: %s", sf.name, e)

if not sessions:
raise FileNotFoundError(f"No loadable {pt} data for {orig_id}")
log.warning(
"No loadable %s session data for %s. "
"All .set files failed to load. Skipping.",
pt,
orig_id,
)
return {}
return sessions

def data_path(
Expand Down
57 changes: 53 additions & 4 deletions moabb/datasets/guttmann_flury2025.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,11 +263,33 @@ def _data_path_for_paradigm(
f"Downloaded {zip_name} but could not locate ZIP in {dl_path}"
)

# Extract ZIP to subject directory.
# Extract ZIP to subject directory (skip if already extracted).
subj_dir.mkdir(parents=True, exist_ok=True)
log.info("Extracting %s (%s) to %s", zip_name, paradigm, subj_dir)
with zipfile.ZipFile(str(dl_path)) as zf:
safe_extract_zip(zf, subj_dir)
if any(subj_dir.rglob("*.bdf")):
log.info("Already extracted %s (%s), skipping.", zip_name, paradigm)
else:
log.info("Extracting %s (%s) to %s", zip_name, paradigm, subj_dir)
try:
with zipfile.ZipFile(str(dl_path)) as zf:
safe_extract_zip(zf, subj_dir)
except OSError:
# NFS may fail with EINVAL; try extracting via /scratch as fallback
import shutil
import tempfile

log.warning("NFS extraction failed, using /scratch fallback")
with tempfile.TemporaryDirectory(
dir="/scratch/baristim" if Path("/scratch/baristim").exists() else None
) as tmp:
with zipfile.ZipFile(str(dl_path)) as zf:
safe_extract_zip(zf, Path(tmp))
# Copy extracted files to destination
for item in Path(tmp).rglob("*"):
if item.is_file():
rel = item.relative_to(tmp)
dest = subj_dir / rel
dest.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(item, dest)

return str(subj_dir)

Expand Down Expand Up @@ -385,12 +407,37 @@ def _decode_p300_from_sync_csv(bdf_path):
return records if records else None


def _fix_channel_types(raw):
"""Fix channel types for non-EEG channels in Guttmann-Flury BDF files.

The BDF files have malformed headers so MNE defaults all channels to EEG.
The ``Trig`` channel contains trigger codes with values up to 40 V,
which exceeds BDF physical range limits and causes ``OSError`` during
BIDS export. Re-type ``Trig`` as ``stim``, ``HEO`` as ``eog``, and
``M1``/``M2`` as ``misc`` so that downstream writers handle them
correctly.
"""
type_mapping = {}
for name in raw.ch_names:
upper = name.upper()
if upper == "TRIG":
type_mapping[name] = "stim"
elif upper in ("HEO", "VEO", "HEOG", "VEOG"):
type_mapping[name] = "eog"
elif upper in ("M1", "M2"):
type_mapping[name] = "misc"
if type_mapping:
raw.set_channel_types(type_mapping)
return raw


def _load_raw_with_stim_events(bdf_path, event_id):
"""Load BDF file, decode Trig channel events, and set annotations.

Used by MI/ME where Trig channel codes directly map to event types.
"""
raw = mne.io.read_raw_bdf(str(bdf_path), preload=True, verbose="ERROR")
_fix_channel_types(raw)

stim_ch = "Trig"
if stim_ch not in raw.ch_names:
Expand Down Expand Up @@ -757,6 +804,7 @@ def __init__(self, subjects=None, sessions=None, *, return_all_modalities=False)
def _load_ssvep_raw(self, bdf_path):
"""Load SSVEP BDF and decode frequency events from sync CSV."""
raw = mne.io.read_raw_bdf(str(bdf_path), preload=True, verbose="ERROR")
_fix_channel_types(raw)

annot_onset = []
annot_dur = []
Expand Down Expand Up @@ -946,6 +994,7 @@ def _load_p300_raw(self, bdf_path):
falls back to decoding flash events from the sync CSV.
"""
raw = mne.io.read_raw_bdf(str(bdf_path), preload=True, verbose="ERROR")
_fix_channel_types(raw)

# Try annotations JSON first.
ann_records = _load_annotations_json(bdf_path)
Expand Down
Loading
Loading