mne-tools · hoechenberger · Jan 27, 2024 · Jan 24, 2024 · Jan 26, 2024 · Jan 26, 2024
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -297,6 +297,26 @@ jobs:
           paths:
             - ~/mne_data/eeg_matchingpennies
 
+  cache_MNE-phantom-KIT-data:
+    <<: *imageconfig
+    steps:
+      - attach_workspace:
+          at: ~/
+      - restore_cache:
+          keys:
+            - data-cache-MNE-phantom-KIT-data-1
+      - bash_env
+      - gitconfig  # email address is needed for datalad
+      - run:
+          name: Get MNE-phantom-KIT-data
+          command: |
+            $DOWNLOAD_DATA MNE-phantom-KIT-data
+      - codecov/upload
+      - save_cache:
+          key: data-cache-MNE-phantom-KIT-data-1
+          paths:
+            - ~/mne_data/MNE-phantom-KIT-data
+
   cache_ERP_CORE:
     <<: *imageconfig
     steps:
@@ -765,6 +785,32 @@ jobs:
           paths:
             - mne_data/derivatives/mne-bids-pipeline/eeg_matchingpennies/*/*/*.html
 
+  test_MNE-phantom-KIT-data:
+    <<: *imageconfig
+    steps:
+      - attach_workspace:
+          at: ~/
+      - bash_env
+      - restore_cache:
+          keys:
+            - data-cache-MNE-phantom-KIT-data-1
+      - run:
+          name: test MNE-phantom-KIT-data
+          command: $RUN_TESTS MNE-phantom-KIT-data
+      - codecov/upload
+      - store_test_results:
+          path: ./test-results
+      - store_artifacts:
+          path: ./test-results
+          destination: test-results
+      - store_artifacts:
+          path: /home/circleci/reports/MNE-phantom-KIT-data
+          destination: reports/MNE-phantom-KIT-data
+      - persist_to_workspace:
+          root: ~/
+          paths:
+            - mne_data/derivatives/mne-bids-pipeline/MNE-phantom-KIT-data/*/*/*.html
+
   test_ERP_CORE_N400:
     <<: *imageconfig
     resource_class: large
@@ -1191,6 +1237,15 @@ workflows:
             - cache_eeg_matchingpennies
           <<: *filter_tags
 
+      - cache_MNE-phantom-KIT-data:
+          requires:
+            - setup_env
+          <<: *filter_tags
+      - test_MNE-phantom-KIT-data:
+          requires:
+            - cache_MNE-phantom-KIT-data
+          <<: *filter_tags
+
       - cache_ERP_CORE:
           requires:
             - setup_env
@@ -1242,6 +1297,7 @@ workflows:
             - test_ds003392
             - test_ds004229
             - test_eeg_matchingpennies
+            - test_MNE-phantom-KIT-data
             - test_ERP_CORE_N400
             - test_ERP_CORE_ERN
             - test_ERP_CORE_LRP

diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -90,7 +90,7 @@ nav:
             - Epoching: settings/preprocessing/epochs.md
             - Artifact removal:
                 - Stimulation artifact: settings/preprocessing/stim_artifact.md
-                - SSP & ICA: settings/preprocessing/ssp_ica.md
+                - SSP, ICA, and artifact regression: settings/preprocessing/ssp_ica.md
                 - Amplitude-based artifact rejection: settings/preprocessing/artifacts.md
         - Sensor-level analysis:
             - Condition contrasts: settings/sensor/contrasts.md
@@ -116,6 +116,7 @@ nav:
         - examples/ds000248_no_mri.md
         - examples/ds003104.md
         - examples/eeg_matchingpennies.md
+        - examples/MNE-phantom-KIT-data.md
         - examples/ds001810.md
         - examples/ds000117.md
         - examples/ds003775.md

diff --git a/docs/source/examples/gen_examples.py b/docs/source/examples/gen_examples.py
@@ -63,6 +63,8 @@ def _gen_demonstrated_funcs(example_config_path: Path) -> dict:
         key = "Maxwell filter"
         funcs[key] = funcs[key] or config.use_maxwell_filter
         funcs["Frequency filter"] = config.l_freq or config.h_freq
+        key = "Artifact regression"
+        funcs[key] = funcs[key] or (config.regress_artifact is not None)
         key = "SSP"
         funcs[key] = funcs[key] or (config.spatial_filter == "ssp")
         key = "ICA"
@@ -144,6 +146,7 @@ def _gen_demonstrated_funcs(example_config_path: Path) -> dict:
         logger.warning(f"Dataset {dataset_name} has no HTML report.")
         continue
 
+    assert dataset_options_key in DATASET_OPTIONS, dataset_options_key
     options = DATASET_OPTIONS[dataset_options_key].copy()  # we modify locally
 
     report_str = "\n## Generated output\n\n"
@@ -200,13 +203,18 @@ def _gen_demonstrated_funcs(example_config_path: Path) -> dict:
             f"{fname.name} :fontawesome-solid-square-poll-vertical:</a>\n\n"
         )
 
-    assert sum(key in options for key in ("openneuro", "git", "web", "datalad")) == 1
+    assert (
+        sum(key in options for key in ("openneuro", "git", "web", "datalad", "mne"))
+        == 1
+    )
     if "openneuro" in options:
         url = f'https://openneuro.org/datasets/{options["openneuro"]}'
     elif "git" in options:
         url = options["git"]
     elif "web" in options:
         url = options["web"]
+    elif "mne" in options:
+        url = f"https://mne.tools/dev/generated/mne.datasets.{options['mne']}.data_path.html"  # noqa: E501
     else:
         assert "datalad" in options  # guaranteed above
         url = ""
@@ -246,7 +254,9 @@ def _gen_demonstrated_funcs(example_config_path: Path) -> dict:
 
     # TODO: For things like ERP_CORE_ERN, decoding_csp are not populated
     # properly by the root config
-    config_path = root / "tests" / "configs" / f"config_{dataset_name}.py"
+    config_path = (
+        root / "tests" / "configs" / f"config_{dataset_name.replace('-', '_')}.py"
+    )
     config = config_path.read_text(encoding="utf-8-sig").strip()
     descr_end_idx = config[2:].find('"""')
     config_descr = "# " + config[: descr_end_idx + 1].replace('"""', "").strip()

diff --git a/docs/source/settings/preprocessing/ssp_ica.md b/docs/source/settings/preprocessing/ssp_ica.md
@@ -11,6 +11,7 @@ tags:
 ::: mne_bids_pipeline._config
     options:
       members:
+        - regress_artifact
         - spatial_filter
         - min_ecg_epochs
         - min_eog_epochs

diff --git a/docs/source/v1.6.md.inc b/docs/source/v1.6.md.inc
@@ -2,9 +2,9 @@
 
 ## vX.Y.0 (unreleased)
 
-[//]: # (### :new: New features & enhancements)
+:new: New features & enhancements
 
-[//]: # (- Whatever (#000 by @whoever))
+- Added [`regress_artifact`][mne_bids_pipeline._config.regress_artifact] to allow artifact regression (e.g., of MEG reference sensors in KIT systems) (#837 by @larsoner)
 
 [//]: # (### :warning: Behavior changes)
 

diff --git a/mne_bids_pipeline/_config.py b/mne_bids_pipeline/_config.py
@@ -1,7 +1,8 @@
 # Default settings for data processing and analysis.
 
-from typing import Callable, Iterable, Literal, Optional, Union
+from typing import Annotated, Any, Callable, Literal, Optional, Sequence, Union
 
+from annotated_types import Ge, Interval, Len
 from mne import Covariance
 from mne_bids import BIDSPath
 
@@ -94,7 +95,7 @@
 The task to process.
 """
 
-runs: Union[Iterable, Literal["all"]] = "all"
+runs: Union[Sequence, Literal["all"]] = "all"
 """
 The runs to process. If `'all'`, will process all runs found in the
 BIDS dataset.
@@ -143,15 +144,15 @@
 The BIDS `space` entity.
 """
 
-plot_psd_for_runs: Union[Literal["all"], Iterable[str]] = "all"
+plot_psd_for_runs: Union[Literal["all"], Sequence[str]] = "all"
 """
 For which runs to add a power spectral density (PSD) plot to the generated
 report. This can take a considerable amount of time if you have many long
 runs. In this case, specify the runs, or pass an empty list to disable raw PSD
 plotting.
 """
 
-subjects: Union[Iterable[str], Literal["all"]] = "all"
+subjects: Union[Sequence[str], Literal["all"]] = "all"
 """
 Subjects to analyze. If `'all'`, include all subjects. To only
 include a subset of subjects, pass a list of their identifiers. Even
@@ -171,7 +172,7 @@
     ```
 """
 
-exclude_subjects: Iterable[str] = []
+exclude_subjects: Sequence[str] = []
 """
 Specify subjects to exclude from analysis. The MEG empty-room mock-subject
 is automatically excluded from regular analysis.
@@ -201,7 +202,7 @@
 covariance (via `noise_cov='rest'`).
 """
 
-ch_types: Iterable[Literal["meg", "mag", "grad", "eeg"]] = []
+ch_types: Annotated[Sequence[Literal["meg", "mag", "grad", "eeg"]], Len(1, 4)] = []
 """
 The channel types to consider.
 
@@ -252,7 +253,7 @@
     ```
 """
 
-eog_channels: Optional[Iterable[str]] = None
+eog_channels: Optional[Sequence[str]] = None
 """
 Specify EOG channels to use, or create virtual EOG channels.
 
@@ -320,7 +321,7 @@
     ```
 """
 
-eeg_reference: Union[Literal["average"], str, Iterable["str"]] = "average"
+eeg_reference: Union[Literal["average"], str, Sequence["str"]] = "average"
 """
 The EEG reference to use. If `average`, will use the average reference,
 i.e. the average across all channels. If a string, must be the name of a single
@@ -371,7 +372,7 @@
     ```
 """
 
-drop_channels: Iterable[str] = []
+drop_channels: Sequence[str] = []
 """
 Names of channels to remove from the data. This can be useful, for example,
 if you have added a new bipolar channel via `eeg_bipolar_channels` and now wish
@@ -385,7 +386,7 @@
 """
 
 analyze_channels: Union[
-    Literal["all"], Literal["ch_types"], Iterable["str"]
+    Literal["all"], Literal["ch_types"], Sequence["str"]
 ] = "ch_types"
 """
 The names of the channels to analyze during ERP/ERF and time-frequency analysis
@@ -789,7 +790,7 @@
 Keep it `None` if no lowpass filtering should be applied.
 """
 
-notch_freq: Optional[Union[float, Iterable[float]]] = None
+notch_freq: Optional[Union[float, Sequence[float]]] = None
 """
 Notch filter frequency. More than one frequency can be supplied, e.g. to remove
 harmonics. Keep it `None` if no notch filter should be applied.
@@ -827,7 +828,7 @@
 Specifies the transition bandwidth of the notch filter. The default is `1.`.
 """
 
-notch_widths: Optional[Union[float, Iterable[float]]] = None
+notch_widths: Optional[Union[float, Sequence[float]]] = None
 """
 Specifies the width of each stop band. `None` uses the MNE default.
 """
@@ -931,7 +932,7 @@
 window for metadata generation.
 """
 
-epochs_metadata_keep_first: Optional[Iterable[str]] = None
+epochs_metadata_keep_first: Optional[Sequence[str]] = None
 """
 Event groupings using hierarchical event descriptors (HEDs) for which to store
 the time of the **first** occurrence of any event of this group in a new column
@@ -959,7 +960,7 @@
     and `first_stimulus`.
 """
 
-epochs_metadata_keep_last: Optional[Iterable[str]] = None
+epochs_metadata_keep_last: Optional[Sequence[str]] = None
 """
 Same as `epochs_metadata_keep_first`, but for keeping the **last**
 occurrence of matching event types. The columns indicating the event types
@@ -979,7 +980,7 @@
     ```
 """  # noqa: E501
 
-conditions: Optional[Union[Iterable[str], dict[str, str]]] = None
+conditions: Optional[Union[Sequence[str], dict[str, str]]] = None
 """
 The time-locked events based on which to create evoked responses.
 This can either be name of the experimental condition as specified in the
@@ -1058,7 +1059,7 @@
     ```
 """
 
-contrasts: Iterable[Union[tuple[str, str], ArbitraryContrast]] = []
+contrasts: Sequence[Union[tuple[str, str], ArbitraryContrast]] = []
 """
 The conditions to contrast via a subtraction of ERPs / ERFs. The list elements
 can either be tuples or dictionaries (or a mix of both). Each element in the
@@ -1125,6 +1126,24 @@
 #
 # Currently you cannot use both.
 
+regress_artifact: Optional[dict[str, Any]] = None
+"""
+Keyword arguments to pass to the `mne.preprocessing.EOGRegression` model used
+in `mne.preprocessing.regress_artifact`. If `None`, no time-domain regression will
+be applied. Note that any channels picked in `regress_artifact["picks_artifact"]` will
+have the same time-domain filters applied to them as the experimental data.
+
+Artifact regression is applied before SSP or ICA.
+
+???+ example "Example"
+    For example, if you have MEG reference channel data recorded in three
+    miscellaneous channels, you could do:
+
+    ```python
+    regress_artifact = {"picks": "meg", "picks_artifact": ["MISC 001", "MISC 002", "MISC 003"]}
+    ```
+"""  # noqa: E501
+
 spatial_filter: Optional[Literal["ssp", "ica"]] = None
 """
 Whether to use a spatial filter to detect and remove artifacts. The BIDS
@@ -1516,7 +1535,7 @@
 you don't need to be worried about **exactly** balancing class sizes.
 """
 
-decoding_n_splits: int = 5
+decoding_n_splits: Annotated[int, Ge(2)] = 5
 """
 The number of folds (also called "splits") to use in the K-fold cross-validation
 scheme.
@@ -1577,7 +1596,7 @@
 test to determine the significance of the decoding scores across participants.
 """
 
-cluster_permutation_p_threshold: float = 0.05
+cluster_permutation_p_threshold: Annotated[float, Interval(gt=0, lt=1)] = 0.05
 """
 The alpha level (p-value, p threshold) to use for rejecting the null hypothesis
 that the clusters show no significant difference between conditions. This is
@@ -1609,7 +1628,7 @@
 # TIME-FREQUENCY
 # --------------
 
-time_frequency_conditions: Iterable[str] = []
+time_frequency_conditions: Sequence[str] = []
 """
 The conditions to compute time-frequency decomposition on.