Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## [0.5.9] - 2025-02-01
### Added
- Add support to store NSRR token in environment variable or user config ([#243](https://github.com/cbrnr/sleepecg/pull/243) by [Simon Pusterhofer](https://github.com/simon-p-2000))
- Add support for downloading and storing activity counts for the MESA dataset ([#249](https://github.com/cbrnr/sleepecg/pull/249) by [Simon Pusterhofer](https://github.com/simon-p-2000))
- Add Python 3.13+ support by transforming wheel builds using ABI3 mode ([#251](https://github.com/cbrnr/sleepecg/pull/251) by [Eric Larson](https://github.com/larsoner))

### Changed
Expand Down
4 changes: 3 additions & 1 deletion docs/datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Instead of always using [`set_nsrr_token()`](sleepecg.set_nsrr_token), you can s

SleepECG checks for the NSRR token in the following order:

1. Token set via [`set_nsrr_token()`][sleepecg.set_nsrr_token]
1. Token set via [`set_nsrr_token()`](sleepecg.set_nsrr_token)
2. Token set via environment variable `NSRR_TOKEN`
3. Token set in the user configuration

Expand All @@ -59,6 +59,8 @@ set_nsrr_token("<your-download-token-here>")
mesa = read_mesa(records_pattern="00*") # note that this is a generator
```

SleepECG supports downloading and storing activity counts for the MESA dataset. These metrics quantify a subject's movement based on accelerometer measurements recorded and processed using a proprietary algorithm in Philips Actiware. To access activity counts, call [`read_mesa()`](sleepecg.read_mesa) with `activity_source='actigraphy'` to download the data or `activity_source='cached'` to use previously downloaded counts.

!!! note
Reader functions are generators, so they do not return the data directly. To access the data, you need to consume the generator, either by iterating over it or with subsequent calls of `next()`.

Expand Down
5 changes: 2 additions & 3 deletions examples/classifiers/ws_gru_mesa.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
set_nsrr_token,
)

set_nsrr_token("25042-5JxoCwc8KQ3uV3ubyK-D")
set_nsrr_token("your-token-here")

TRAIN = True # set to False to skip training and load classifier from disk

Expand All @@ -27,7 +27,7 @@
if TRAIN:
print("‣ Starting training...")
print("‣‣ Extracting features...")
records = list(read_mesa(offline=False, activity_source="actigraphy"))
records = list(read_mesa(offline=False))

feature_extraction_params = {
"lookback": 240,
Expand All @@ -38,7 +38,6 @@
"recording_start_time",
"age",
"gender",
"activity_counts"
],
"min_rri": 0.3,
"max_rri": 2,
Expand Down
75 changes: 0 additions & 75 deletions examples/classifiers/ws_lda_mesa.py

This file was deleted.

1 change: 0 additions & 1 deletion src/sleepecg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
list_classifiers,
load_classifier,
prepare_data_keras,
prepare_data_sklearn,
print_class_balance,
save_classifier,
stage,
Expand Down
49 changes: 0 additions & 49 deletions src/sleepecg/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,55 +21,6 @@
from sleepecg.io.sleep_readers import SleepRecord, SleepStage
from sleepecg.utils import _STAGE_NAMES, _merge_sleep_stages

def prepare_data_sklearn(
features: list[np.ndarray],
stages: list[np.ndarray],
feature_ids: list[str],
stages_mode: str,
remove_nan: str = 'none',
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Prepare sleep records for a sklearn model.

The following steps are performed:

- Merge sleep stages in `stages` according to `stage_mode`.
- Set features corresponding to `SleepStage.UNDEFINED` as invalid.
- Replace `np.nan` and `np.inf` in `features` with `mask_value`.
- Pad to a common length, where `mask_value` is used for `features` and
`SleepStage.UNDEFINED` (i.e `0`) is used for stages.

Parameters
----------
features : list[np.ndarray]
Each 2D array in this list is a feature matrix of shape `(n_samples, n_features)`
corresponding to a single record as returned by `extract_features()`.
feature_ids: list[str]
A list containing the identifiers of the extracted features. Feature groups passed
in `feature_selection` are expanded to all individual features they contain. The
order matches the column order of the feature matrix.
stages : list[np.ndarray]
Each 1D array in this list contains the sleep stages of a single record as returned
by `extract_features()`.
stages_mode : str
Identifier of the grouping mode. Can be any of `'wake-sleep'`, `'wake-rem-nrem'`,
`'wake-rem-light-n3'`, `'wake-rem-n1-n2-n3'`.
Returns
-------
features_stacked : np.ndarray
A 2D array of shape `(total samples, features)`.
stages_stacked : np.ndarray
A 1D array containing the annotated sleep stage for each sample. The sleep stages
are merged based on the stages_mode parameter.
record_ids : np.ndarray
A 1D array containing a calculated index for each valid sample that is returned.
"""
record_ids = np.hstack([i * np.ones(len(X)) for i, X in enumerate(features)])
features_stacked = np.vstack(features)
stages_stacked = np.hstack(_merge_sleep_stages(stages, stages_mode))
valid = stages_stacked != SleepStage.UNDEFINED

return features_stacked[valid], stages_stacked[valid], record_ids[valid]

def prepare_data_keras(
features: list[np.ndarray],
Expand Down
13 changes: 5 additions & 8 deletions src/sleepecg/feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import numpy as np
from numpy.lib.stride_tricks import sliding_window_view
from scipy.integrate import trapezoid
from scipy.interpolate import interp1d
from scipy.signal import periodogram

Expand Down Expand Up @@ -56,7 +57,6 @@
"LF_HF_ratio",
),
"metadata": ("recording_start_time", "age", "gender", "weight"),
"actigraphy": ("activity_counts", "dummy_feature"),
}
_FEATURE_ID_TO_GROUP = {id: group for group, ids in _FEATURE_GROUPS.items() for id in ids}

Expand Down Expand Up @@ -368,10 +368,10 @@ def _hrv_frequencydomain_features(
lf_mask = (0.04 < freq) & (freq <= 0.15)
hf_mask = (0.15 < freq) & (freq <= 0.4)

total_power = np.trapz(psd[:, total_power_mask], freq[total_power_mask])
vlf = np.trapz(psd[:, vlf_mask], freq[vlf_mask])
lf = np.trapz(psd[:, lf_mask], freq[lf_mask])
hf = np.trapz(psd[:, hf_mask], freq[hf_mask])
total_power = trapezoid(psd[:, total_power_mask], freq[total_power_mask])
vlf = trapezoid(psd[:, vlf_mask], freq[vlf_mask])
lf = trapezoid(psd[:, lf_mask], freq[lf_mask])
hf = trapezoid(psd[:, hf_mask], freq[hf_mask])

lf_norm = lf / (lf + hf) * 100
hf_norm = hf / (lf + hf) * 100
Expand Down Expand Up @@ -657,9 +657,6 @@ def _extract_features_single(
)
elif feature_group == "metadata":
X.append(_metadata_features(record, num_stages))
elif feature_group == "actigraphy":
if record.activity_counts is not None:
X.append(record.activity_counts.reshape(-1, 1))
features = np.hstack(X)[:, col_indices]

if record.sleep_stages is None or sleep_stage_duration == record.sleep_stage_duration:
Expand Down
25 changes: 15 additions & 10 deletions src/sleepecg/io/sleep_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import csv
import datetime
import os
from collections.abc import Iterator
from dataclasses import dataclass
from enum import IntEnum
Expand Down Expand Up @@ -442,6 +443,10 @@ def read_mesa(
checksums[activity_filename],
)

if not os.path.exists(activity_filepath):
print(f"Skipping {record_id} due to missing activity data.")
continue

activity_data = []

with open(activity_filepath) as csv_file:
Expand Down Expand Up @@ -470,19 +475,19 @@ def read_mesa(

start_line = overlap_data[mesaid] + 1

end_line = (
int(
next(
row["line"]
for row in activity_data
if row.get("linetime") == recording_end_time_str
)
for item in activity_data:
if item.get("linetime") == recording_end_time_str:
end_line = int(item["line"]) - 1
break
else:
print(
f"Skipping {record_id} due to missing line matching "
f"{recording_end_time_str}."
)
- 1
)
continue

activity_counts = [
row["activity"] for row in activity_data[start_line - 1 : end_line]
item["activity"] for item in activity_data[start_line - 1 : end_line]
]

activity_counts = np.array(activity_counts)
Expand Down
2 changes: 2 additions & 0 deletions tests/test_sleep_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def _dummy_nsrr_overlap(filename: str, mesa_ids: list[int]):


def _dummy_nsrr_actigraphy(filename: str, mesa_id: str):
"""Create dummy actigraphy file with four usable activity counts."""
base_time = datetime.datetime(2024, 1, 1, 20, 30, 0)

linetimes = [
Expand All @@ -39,6 +40,7 @@ def _dummy_nsrr_actigraphy(filename: str, mesa_id: str):


def _dummy_nsrr_actigraphy_cached(filename: str):
"""Create dummy npy file that resembles cached activity counts."""
activity_counts = np.array([10, 10, 10, 10, 10, 10])
np.save(filename, activity_counts)

Expand Down