-
Notifications
You must be signed in to change notification settings - Fork 58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ENH] convert sample sizes #919
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,13 +62,45 @@ def _analysis_to_dict(study, analysis): | |
"z": [p.z for p in analysis.points] or [None], | ||
}, | ||
} | ||
sample_size = study.metadata.get("sample_size") | ||
if sample_size: | ||
result["metadata"]["sample_sizes"] = [sample_size] | ||
|
||
sample_sizes = analysis.metadata.get("sample_sizes") | ||
sample_size = None | ||
|
||
# Validate sample sizes if present | ||
if sample_sizes is not None and not isinstance(sample_sizes, (list, tuple)): | ||
raise TypeError( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (bug_risk): Review broad exception handling for annotations. The try/except block around updating the 'labels' dictionary catches TypeError and AttributeError broadly and re-raises them as ValueError. This might mask issues that are not related to annotation formatting. Consider narrowing the exception scope to only the operations you expect might fail. |
||
f"Expected sample_sizes to be list or tuple, but got {type(sample_sizes)}" | ||
) | ||
|
||
if not sample_sizes: | ||
# Try to get single sample size from analysis or study metadata | ||
sample_size = analysis.metadata.get("sample_size") | ||
if sample_size is None: | ||
sample_size = study.metadata.get("sample_size") | ||
|
||
# Validate single sample size if present | ||
if sample_size is not None and not isinstance(sample_size, (int, float)): | ||
raise TypeError(f"Expected sample_size to be numeric, but got {type(sample_size)}") | ||
|
||
# Add sample size info to result if available | ||
if sample_sizes or sample_size is not None: | ||
try: | ||
result["metadata"]["sample_sizes"] = sample_sizes or [sample_size] | ||
except TypeError as e: | ||
raise TypeError(f"Error converting sample size data to list: {str(e)}") from e | ||
|
||
# Handle annotations if present | ||
if analysis.annotations: | ||
result["labels"] = {} | ||
for annotation in analysis.annotations.values(): | ||
result["labels"].update(annotation) | ||
try: | ||
for annotation in analysis.annotations.values(): | ||
if not isinstance(annotation, dict): | ||
raise TypeError( | ||
f"Expected annotation to be dict, but got {type(annotation)}" | ||
) | ||
result["labels"].update(annotation) | ||
except (TypeError, AttributeError) as e: | ||
raise ValueError(f"Invalid annotation format: {str(e)}") from e | ||
|
||
return result | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,60 @@ def test_convert_nimads_to_dataset(example_nimads_studyset, example_nimads_annot | |
assert isinstance(dset2, nimare.dataset.Dataset) | ||
|
||
|
||
def test_convert_nimads_to_dataset_sample_sizes( | ||
example_nimads_studyset, example_nimads_annotation | ||
): | ||
"""Conversion of nimads JSON to nimare dataset.""" | ||
studyset = Studyset(example_nimads_studyset) | ||
for study in studyset.studies: | ||
for analysis in study.analyses: | ||
analysis.metadata["sample_sizes"] = [2, 20] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (testing): Add test for single sample size. Consider adding a test case where the Suggested implementation: def test_convert_nimads_to_dataset_single_sample_size(
example_nimads_studyset, example_nimads_annotation
):
"""Test conversion of nimads JSON to nimare dataset with a single sample size value."""
studyset = Studyset(example_nimads_studyset)
for study in studyset.studies:
for analysis in study.analyses:
analysis.metadata["sample_sizes"] = 20
dset = io.convert_nimads_to_dataset(studyset)
assert isinstance(dset, nimare.dataset.Dataset)
assert "sample_sizes" in dset.metadata.columns
def test_convert_sleuth_to_dataset_smoke(): Make sure your test runner picks up the new test function. You may need to adjust the placement of the new test according to your project's test suite conventions. |
||
|
||
dset = io.convert_nimads_to_dataset(studyset) | ||
|
||
assert isinstance(dset, nimare.dataset.Dataset) | ||
assert "sample_sizes" in dset.metadata.columns | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (testing): Add tests for edge cases and error handling. It would be beneficial to include tests that cover edge cases and error conditions within the Suggested implementation: def test_convert_nimads_to_dataset_sample_sizes(
example_nimads_studyset, example_nimads_annotation
):
"""Conversion of nimads JSON to nimare dataset."""
studyset = Studyset(example_nimads_studyset)
for study in studyset.studies:
for analysis in study.analyses:
analysis.metadata["sample_sizes"] = [2, 20] import pytest
from nimare import io, dataset # adjust imports as needed
from nimare.dataset import Dataset
from nimare.io import _analysis_to_dict # assuming _analysis_to_dict is accessible for testing
def test_analysis_to_dict_invalid_sample_sizes_type(example_nimads_studyset):
"""Test _analysis_to_dict raises ValueError when sample_sizes is not a list/tuple."""
studyset = Studyset(example_nimads_studyset)
# Set sample_sizes to an int rather than list/tuple
for study in studyset.studies:
for analysis in study.analyses:
analysis.metadata["sample_sizes"] = 5
with pytest.raises(ValueError):
# Trigger conversion which internally calls _analysis_to_dict
io.convert_nimads_to_dataset(studyset)
def test_analysis_to_dict_non_numeric_sample_sizes(example_nimads_studyset):
"""Test _analysis_to_dict raises ValueError when sample_sizes contains non-numeric values."""
studyset = Studyset(example_nimads_studyset)
# Set sample_sizes to a list with non-numeric value
for study in studyset.studies:
for analysis in study.analyses:
analysis.metadata["sample_sizes"] = [2, "twenty"]
with pytest.raises(ValueError):
io.convert_nimads_to_dataset(studyset)
def test_analysis_to_dict_invalid_annotations_format(example_nimads_studyset):
"""Test _analysis_to_dict raises ValueError when annotations are in an invalid format."""
studyset = Studyset(example_nimads_studyset)
# Here we assume that the annotation is expected to be a dict
# Set annotation to an invalid format (e.g., a string)
for study in studyset.studies:
for analysis in study.analyses:
analysis.metadata["annotations"] = "invalid_format"
with pytest.raises(ValueError):
io.convert_nimads_to_dataset(studyset) These tests assume that the underlying _analysis_to_dict function (or the conversion process in io.convert_nimads_to_dataset) performs robust type checking and raises a ValueError when encountering:
|
||
|
||
|
||
def test_convert_nimads_to_dataset_single_sample_size( | ||
example_nimads_studyset, example_nimads_annotation | ||
): | ||
"""Test conversion of nimads JSON to nimare dataset with a single sample size value.""" | ||
studyset = Studyset(example_nimads_studyset) | ||
for study in studyset.studies: | ||
for analysis in study.analyses: | ||
analysis.metadata["sample_size"] = 20 | ||
|
||
dset = io.convert_nimads_to_dataset(studyset) | ||
|
||
assert isinstance(dset, nimare.dataset.Dataset) | ||
assert "sample_sizes" in dset.metadata.columns | ||
|
||
|
||
def test_analysis_to_dict_invalid_sample_sizes_type(example_nimads_studyset): | ||
"""Test _analysis_to_dict raises ValueError when sample_sizes is not a list/tuple.""" | ||
studyset = Studyset(example_nimads_studyset) | ||
# Set sample_sizes to an int rather than list/tuple | ||
for study in studyset.studies: | ||
for analysis in study.analyses: | ||
analysis.metadata["sample_sizes"] = 5 | ||
with pytest.raises(TypeError): | ||
# Trigger conversion which internally calls _analysis_to_dict | ||
io.convert_nimads_to_dataset(studyset) | ||
|
||
|
||
def test_analysis_to_dict_invalid_annotations_format(example_nimads_studyset): | ||
"""Test _analysis_to_dict raises ValueError when annotations are in an invalid format.""" | ||
studyset = Studyset(example_nimads_studyset) | ||
# Here we assume that the annotation is expected to be a dict | ||
# Set annotation to an invalid format (e.g., a string) | ||
for study in studyset.studies: | ||
for analysis in study.analyses: | ||
analysis.metadata["annotations"] = "invalid_format" | ||
with pytest.raises(TypeError): | ||
io.convert_nimads_to_dataset(studyset) | ||
|
||
|
||
def test_convert_sleuth_to_dataset_smoke(): | ||
"""Smoke test for Sleuth text file conversion.""" | ||
sleuth_file = os.path.join(get_test_data_path(), "test_sleuth_file.txt") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
issue (complexity): Consider extracting the inline sample size and annotation validations into helper functions to improve readability and reduce nesting.
Then simplify your main code as follows:
These changes isolate the validation logic into focused units, reducing complexity while preserving functionality.