Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] convert sample sizes #919

Merged
merged 3 commits into from
Mar 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 37 additions & 5 deletions nimare/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,45 @@ def _analysis_to_dict(study, analysis):
"z": [p.z for p in analysis.points] or [None],
},
}
sample_size = study.metadata.get("sample_size")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (complexity): Consider extracting the inline sample size and annotation validations into helper functions to improve readability and reduce nesting.

Consider extracting the inline sample size and annotation validations into small helper functions. This will reduce nesting and improve readability. For example, you could do the following:

```python
def validate_sample_sizes(analysis_metadata, study_metadata):
    sample_sizes = analysis_metadata.get("sample_sizes")
    if sample_sizes is not None and not isinstance(sample_sizes, (list, tuple)):
        raise TypeError(
            f"Expected sample_sizes to be list or tuple, but got {type(sample_sizes)}"
        )
    if not sample_sizes:
        sample_size = analysis_metadata.get("sample_size") or study_metadata.get("sample_size")
        if sample_size is not None and not isinstance(sample_size, (int, float)):
            raise TypeError(
                f"Expected sample_size to be numeric, but got {type(sample_size)}"
            )
        return [sample_size] if sample_size is not None else None
    return sample_sizes

def validate_annotations(annotations):
    labels = {}
    for annotation in annotations.values():
        if not isinstance(annotation, dict):
            raise TypeError(
                f"Expected annotation to be dict, but got {type(annotation)}"
            )
        labels.update(annotation)
    return labels

Then simplify your main code as follows:

sample_sizes_validated = validate_sample_sizes(analysis.metadata, study.metadata)
if sample_sizes_validated:
    try:
        result["metadata"]["sample_sizes"] = sample_sizes_validated
    except TypeError as e:
        raise TypeError(f"Error converting sample size data to list: {str(e)}") from e

if analysis.annotations:
    try:
        result["labels"] = validate_annotations(analysis.annotations)
    except (TypeError, AttributeError) as e:
        raise ValueError(f"Invalid annotation format: {str(e)}") from e

These changes isolate the validation logic into focused units, reducing complexity while preserving functionality.

if sample_size:
result["metadata"]["sample_sizes"] = [sample_size]

sample_sizes = analysis.metadata.get("sample_sizes")
sample_size = None

# Validate sample sizes if present
if sample_sizes is not None and not isinstance(sample_sizes, (list, tuple)):
raise TypeError(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (bug_risk): Review broad exception handling for annotations.

The try/except block around updating the 'labels' dictionary catches TypeError and AttributeError broadly and re-raises them as ValueError. This might mask issues that are not related to annotation formatting. Consider narrowing the exception scope to only the operations you expect might fail.

f"Expected sample_sizes to be list or tuple, but got {type(sample_sizes)}"
)

if not sample_sizes:
# Try to get single sample size from analysis or study metadata
sample_size = analysis.metadata.get("sample_size")
if sample_size is None:
sample_size = study.metadata.get("sample_size")

# Validate single sample size if present
if sample_size is not None and not isinstance(sample_size, (int, float)):
raise TypeError(f"Expected sample_size to be numeric, but got {type(sample_size)}")

# Add sample size info to result if available
if sample_sizes or sample_size is not None:
try:
result["metadata"]["sample_sizes"] = sample_sizes or [sample_size]
except TypeError as e:
raise TypeError(f"Error converting sample size data to list: {str(e)}") from e

# Handle annotations if present
if analysis.annotations:
result["labels"] = {}
for annotation in analysis.annotations.values():
result["labels"].update(annotation)
try:
for annotation in analysis.annotations.values():
if not isinstance(annotation, dict):
raise TypeError(
f"Expected annotation to be dict, but got {type(annotation)}"
)
result["labels"].update(annotation)
except (TypeError, AttributeError) as e:
raise ValueError(f"Invalid annotation format: {str(e)}") from e

return result

Expand Down
54 changes: 54 additions & 0 deletions nimare/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,60 @@ def test_convert_nimads_to_dataset(example_nimads_studyset, example_nimads_annot
assert isinstance(dset2, nimare.dataset.Dataset)


def test_convert_nimads_to_dataset_sample_sizes(
example_nimads_studyset, example_nimads_annotation
):
"""Conversion of nimads JSON to nimare dataset."""
studyset = Studyset(example_nimads_studyset)
for study in studyset.studies:
for analysis in study.analyses:
analysis.metadata["sample_sizes"] = [2, 20]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Add test for single sample size.

Consider adding a test case where the sample_size is a single value instead of a list to ensure both code paths are tested.

Suggested implementation:

def test_convert_nimads_to_dataset_single_sample_size(
    example_nimads_studyset, example_nimads_annotation
):
    """Test conversion of nimads JSON to nimare dataset with a single sample size value."""
    studyset = Studyset(example_nimads_studyset)
    for study in studyset.studies:
        for analysis in study.analyses:
            analysis.metadata["sample_sizes"] = 20

    dset = io.convert_nimads_to_dataset(studyset)

    assert isinstance(dset, nimare.dataset.Dataset)
    assert "sample_sizes" in dset.metadata.columns

def test_convert_sleuth_to_dataset_smoke():

Make sure your test runner picks up the new test function. You may need to adjust the placement of the new test according to your project's test suite conventions.


dset = io.convert_nimads_to_dataset(studyset)

assert isinstance(dset, nimare.dataset.Dataset)
assert "sample_sizes" in dset.metadata.columns
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Add tests for edge cases and error handling.

It would be beneficial to include tests that cover edge cases and error conditions within the _analysis_to_dict function, such as when sample_sizes is not a list/tuple, sample_size is not numeric, or when annotations have an invalid format. This ensures that the error handling logic is robust and functions as expected.

Suggested implementation:

def test_convert_nimads_to_dataset_sample_sizes(
    example_nimads_studyset, example_nimads_annotation
):
    """Conversion of nimads JSON to nimare dataset."""
    studyset = Studyset(example_nimads_studyset)
    for study in studyset.studies:
        for analysis in study.analyses:
            analysis.metadata["sample_sizes"] = [2, 20]
import pytest
from nimare import io, dataset  # adjust imports as needed
from nimare.dataset import Dataset
from nimare.io import _analysis_to_dict  # assuming _analysis_to_dict is accessible for testing
def test_analysis_to_dict_invalid_sample_sizes_type(example_nimads_studyset):
    """Test _analysis_to_dict raises ValueError when sample_sizes is not a list/tuple."""
    studyset = Studyset(example_nimads_studyset)
    # Set sample_sizes to an int rather than list/tuple
    for study in studyset.studies:
        for analysis in study.analyses:
            analysis.metadata["sample_sizes"] = 5
    with pytest.raises(ValueError):
        # Trigger conversion which internally calls _analysis_to_dict
        io.convert_nimads_to_dataset(studyset)


def test_analysis_to_dict_non_numeric_sample_sizes(example_nimads_studyset):
    """Test _analysis_to_dict raises ValueError when sample_sizes contains non-numeric values."""
    studyset = Studyset(example_nimads_studyset)
    # Set sample_sizes to a list with non-numeric value
    for study in studyset.studies:
        for analysis in study.analyses:
            analysis.metadata["sample_sizes"] = [2, "twenty"]
    with pytest.raises(ValueError):
        io.convert_nimads_to_dataset(studyset)


def test_analysis_to_dict_invalid_annotations_format(example_nimads_studyset):
    """Test _analysis_to_dict raises ValueError when annotations are in an invalid format."""
    studyset = Studyset(example_nimads_studyset)
    # Here we assume that the annotation is expected to be a dict
    # Set annotation to an invalid format (e.g., a string)
    for study in studyset.studies:
        for analysis in study.analyses:
            analysis.metadata["annotations"] = "invalid_format"
    with pytest.raises(ValueError):
        io.convert_nimads_to_dataset(studyset)

These tests assume that the underlying _analysis_to_dict function (or the conversion process in io.convert_nimads_to_dataset) performs robust type checking and raises a ValueError when encountering:

  1. sample_sizes that is not a list/tuple,
  2. sample_sizes with non-numeric values, and
  3. annotations with an unexpected format.
    You may need to adjust the exception type or the input modifications to match your actual implementation of error handling.
    Also, ensure that the test file has access to the Studyset class and the necessary fixture definitions (example_nimads_studyset, example_nimads_annotation) or import them as needed.



def test_convert_nimads_to_dataset_single_sample_size(
example_nimads_studyset, example_nimads_annotation
):
"""Test conversion of nimads JSON to nimare dataset with a single sample size value."""
studyset = Studyset(example_nimads_studyset)
for study in studyset.studies:
for analysis in study.analyses:
analysis.metadata["sample_size"] = 20

dset = io.convert_nimads_to_dataset(studyset)

assert isinstance(dset, nimare.dataset.Dataset)
assert "sample_sizes" in dset.metadata.columns


def test_analysis_to_dict_invalid_sample_sizes_type(example_nimads_studyset):
"""Test _analysis_to_dict raises ValueError when sample_sizes is not a list/tuple."""
studyset = Studyset(example_nimads_studyset)
# Set sample_sizes to an int rather than list/tuple
for study in studyset.studies:
for analysis in study.analyses:
analysis.metadata["sample_sizes"] = 5
with pytest.raises(TypeError):
# Trigger conversion which internally calls _analysis_to_dict
io.convert_nimads_to_dataset(studyset)


def test_analysis_to_dict_invalid_annotations_format(example_nimads_studyset):
"""Test _analysis_to_dict raises ValueError when annotations are in an invalid format."""
studyset = Studyset(example_nimads_studyset)
# Here we assume that the annotation is expected to be a dict
# Set annotation to an invalid format (e.g., a string)
for study in studyset.studies:
for analysis in study.analyses:
analysis.metadata["annotations"] = "invalid_format"
with pytest.raises(TypeError):
io.convert_nimads_to_dataset(studyset)


def test_convert_sleuth_to_dataset_smoke():
"""Smoke test for Sleuth text file conversion."""
sleuth_file = os.path.join(get_test_data_path(), "test_sleuth_file.txt")
Expand Down
Loading