Skip to content

fixed bug with BIDS sidecar files when supplied by user and updated tests #394

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 32 additions & 8 deletions src/nidm/experiment/Utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1409,17 +1409,28 @@ def map_variables_to_terms(
dataset_identifier=None,
):
"""

:param df: data frame with first row containing variable names
:param assessment_name: Name for the assessment to use in storing JSON mapping dictionary keys
:param json_source: optional json document either in file or structure
with variable names as keys and minimal fields "definition","label","url"
:param output_file: output filename to save variable-> term mappings
:param directory: if output_file parameter is set to None then use this directory to store default JSON mapping file
if doing variable->term mappings
if doing variable->term mappings
:param: bids: if bids is set to True then a BIDS-compliant sidecar file will be written if annotations are made
:param: owl_file: if a web-ontology language (OWL) file is supplied then it will be used to look for terms while
annotating otherwise the default NIDM terminology will be used.
:param associate_concepts: if this is set to True then concept association will be performed for each variable
otherwise it will not.
:param: dataset_identifier: unique identifier to identify a dataset such as a project in OpenNeuro
which is used in the NIDM records as a namespace to go along with a unique ID generated for the NIDM RDF graphs
:return:return dictionary mapping variable names (i.e. columns) to terms
"""

# 12/15/23: indicator variable to identify if annotations were made with the pynidm tools. If not, and this
# is a bids-nidm conversion, and the user supplied a bids-compliant json sidecar file, save the original
# file.
annot_made = False

# dictionary mapping column name to preferred term
column_to_terms = {}

Expand Down Expand Up @@ -1549,11 +1560,13 @@ def map_variables_to_terms(
column_to_terms[current_tuple]["label"] = json_map[json_key][
"sourceVariable"
]
# this is probably a BIDS json file so use the json_key as the label
else:
column_to_terms[current_tuple]["label"] = ""
column_to_terms[current_tuple]["label"] = json_key
print(
"No label or source_variable or sourceVariable keys found in json mapping file for variable "
f"{json_key}. Consider adding these to the json file as they are important"
"No label or source_variable/SourceVariable key found in json mapping file for variable "
f"{json_key}. This is ok if this is a BIDS json sidecar file."
"Otherwise, consider adding a label to the json file."
)
else:
column_to_terms[current_tuple]["label"] = json_map[json_key][
Expand Down Expand Up @@ -1812,6 +1825,7 @@ def map_variables_to_terms(
"maxValue:",
column_to_terms[current_tuple]["responseOptions"]["maxValue"],
)

if "hasUnit" in json_map[json_key]:
# upgrade 'hasUnit' to 'responseOptions'->'unitCode
if "responseOptions" not in column_to_terms[current_tuple].keys():
Expand Down Expand Up @@ -1849,6 +1863,7 @@ def map_variables_to_terms(
ilx_obj,
nidm_owl_graph=nidm_owl_graph,
)
annot_made = True
# write annotations to json file so user can start up again if not doing whole file
write_json_mapping_file(
column_to_terms, output_file, bids
Expand Down Expand Up @@ -1917,7 +1932,8 @@ def map_variables_to_terms(
# if user ran in mode where they want to associate concepts and this isn't the participant
# id field then associate concepts.
if match_participant_id_field(
json_map[json_key]["source_variable"]
# json_map[json_key]["source_variable"]
column_to_terms[current_tuple]["source_variable"]
):
column_to_terms[current_tuple]["isAbout"] = []
column_to_terms[current_tuple]["isAbout"].append(
Expand All @@ -1936,6 +1952,7 @@ def map_variables_to_terms(
ilx_obj,
nidm_owl_graph=nidm_owl_graph,
)
annot_made = True
# write annotations to json file so user can start up again if not doing whole file
write_json_mapping_file(column_to_terms, output_file, bids)

Expand Down Expand Up @@ -1990,6 +2007,8 @@ def map_variables_to_terms(
column_to_terms[current_tuple] = {}
# enter user interaction function to get data dictionary annotations from user
annotate_data_element(column, current_tuple, column_to_terms)
# 12/15/23
annot_made = True
# then ask user to find a concept if they selected to do so
if associate_concepts:
# provide user with opportunity to associate a concept with this annotation
Expand All @@ -2000,6 +2019,7 @@ def map_variables_to_terms(
ilx_obj,
nidm_owl_graph=nidm_owl_graph,
)
annot_made = True
# write annotations to json file so user can start up again if not doing whole file
write_json_mapping_file(column_to_terms, output_file, bids)

Expand Down Expand Up @@ -2062,8 +2082,12 @@ def map_variables_to_terms(
column_to_terms[current_tuple]["url"] = ilx_output.iri
except Exception:
print("WARNING: WIP: Data element not submitted to InterLex. ")
# write annotations to json file since data element annotations are complete
write_json_mapping_file(column_to_terms, output_file, bids)

# 12/15/23: If doing a BIDS-NIDM conversion and the user supplied a BIDS-compliant json sidecar file
# and no annotations were made, leave original BIDS json file as it is...
if annot_made:
# write annotations to json file since data element annotations are complete
write_json_mapping_file(column_to_terms, output_file, bids)

# get CDEs for data dictionary and NIDM graph entity of data
cde = DD_to_nidm(column_to_terms, dataset_identifier=dataset_identifier)
Expand Down
2 changes: 1 addition & 1 deletion src/nidm/experiment/tools/nidm_affinity_propagation.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def data_aggregation(reporter): # all data from all the files is collected
+ ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables."
)
for i, nf in enumerate(not_found_list):
reporter.print(f"{i+1}. {nf}")
reporter.print(f"{i + 1}. {nf}")
not_found_list.clear()
not_found_count += 1
print()
Expand Down
2 changes: 1 addition & 1 deletion src/nidm/experiment/tools/nidm_agglomerative_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def data_aggregation(reporter): # all data from all the files is collected
+ ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables."
)
for i, nf in enumerate(not_found_list):
reporter.print(f"{i+1}. {nf}")
reporter.print(f"{i + 1}. {nf}")
not_found_list.clear()
not_found_count += 1
print()
Expand Down
2 changes: 1 addition & 1 deletion src/nidm/experiment/tools/nidm_gmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def data_aggregation(reporter): # all data from all the files is collected
+ ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables."
)
for i, nf in enumerate(not_found_list):
reporter.print(f"{i+1}. {nf}")
reporter.print(f"{i + 1}. {nf}")
not_found_list.clear()
not_found_count += 1
print()
Expand Down
2 changes: 1 addition & 1 deletion src/nidm/experiment/tools/nidm_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def data_aggregation(reporter): # all data from all the files is collected
+ ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables."
)
for i, nf in enumerate(not_found_list):
reporter.print(f"{i+1}. {nf}")
reporter.print(f"{i + 1}. {nf}")
not_found_list.clear()
not_found_count += 1
print()
Expand Down
2 changes: 1 addition & 1 deletion src/nidm/experiment/tools/nidm_linreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def data_aggregation(reporter): # all data from all the files is collected
+ ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables."
)
for i, nf in enumerate(not_found_list):
reporter.print(f"{i+1}. {nf}")
reporter.print(f"{i + 1}. {nf}")
not_found_list.clear()
not_found_count += 1
print()
Expand Down
103 changes: 102 additions & 1 deletion tests/experiment/test_map_vars_to_terms.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from dataclasses import dataclass
import json
from os.path import join
from pathlib import Path
import pandas as pd
import pytest
from nidm.experiment.Utils import map_variables_to_terms
from nidm.experiment.Utils import map_variables_to_terms, write_json_mapping_file


@dataclass
class Setup:
data: pd.DataFrame
reproschema_json_map: dict
bids_sidecar: dict
bids_sidecar_simple: dict


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -135,11 +137,24 @@ def setup() -> Setup:
}
"""
)
bids_sidecar_simple = json.loads(
"""
{
"age": {
"description": "age of participant"
},
"sex": {
"description": "biological sex of participant"
}
}
"""
)

return Setup(
data=data,
reproschema_json_map=reproschema_json_map,
bids_sidecar=bids_sidecar,
bids_sidecar_simple=bids_sidecar_simple,
)


Expand All @@ -149,6 +164,7 @@ def test_map_vars_to_terms_BIDS(setup: Setup, tmp_path: Path) -> None:
JSON sidecar file
"""

# test BIDS sidecar json file with all pynidm annotations
column_to_terms, cde = map_variables_to_terms(
df=setup.data,
json_source=setup.bids_sidecar,
Expand Down Expand Up @@ -204,6 +220,17 @@ def test_map_vars_to_terms_BIDS(setup: Setup, tmp_path: Path) -> None:
]["Male"]
)

# force writing of column_to_terms structure because here we're not doing annotations and so
# map_variables_to_terms won't write it out since we supplied one for it to open...thus it already exists
# and no annotations were made so it should exist in its original form.
# By explicitly writing it out here, after running map_variables_to_terms, we can assure it's the same as the
# original.

# write annotations to json file since data element annotations are complete
write_json_mapping_file(
column_to_terms, join(str(tmp_path), "nidm_annotations.json"), True
)

# now check the JSON sidecar file created by map_variables_to_terms which should match BIDS format
with open(tmp_path / "nidm_annotations.json", encoding="utf-8") as fp:
bids_sidecar = json.load(fp)
Expand Down Expand Up @@ -245,6 +272,69 @@ def test_map_vars_to_terms_BIDS(setup: Setup, tmp_path: Path) -> None:
assert len(results) == 20


def test_map_vars_to_terms_BIDS_simple(setup: Setup, tmp_path: Path) -> None:
"""
This function will test the Utils.py "map_vars_to_terms" function with a BIDS-formatted
JSON sidecar file
"""

# test BIDS sidecar json file with all pynidm annotations
column_to_terms, cde = map_variables_to_terms(
df=setup.data,
json_source=setup.bids_sidecar_simple,
directory=str(tmp_path),
assessment_name="test",
associate_concepts=False,
bids=True,
)

# check whether JSON mapping structure returned from map_variables_to_terms matches the
# reproshema structure
assert "DD(source='test', variable='age')" in column_to_terms
assert "DD(source='test', variable='sex')" in column_to_terms
assert "description" in column_to_terms["DD(source='test', variable='age')"]
assert "description" in column_to_terms["DD(source='test', variable='sex')"]

# force writing of column_to_terms structure because here we're not doing annotations and so
# map_variables_to_terms won't write it out since we supplied one for it to open...thus it already exists
# and no annotations were made so it should exist in its original form.
# By explicitly writing it out here, after running map_variables_to_terms, we can assure it's the same as the
# original.

# write annotations to json file since data element annotations are complete
write_json_mapping_file(
column_to_terms, join(str(tmp_path), "nidm_annotations.json"), True
)

# now check the JSON sidecar file created by map_variables_to_terms which should match BIDS format
with open(tmp_path / "nidm_annotations.json", encoding="utf-8") as fp:
bids_sidecar = json.load(fp)

assert "age" in bids_sidecar.keys()
assert "sex" in bids_sidecar.keys()
assert "description" in bids_sidecar["age"]
assert "description" in bids_sidecar["sex"]

# check the CDE dataelement graph for correct information
query = """
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>

select distinct ?uuid ?DataElements ?property ?value
where {

?uuid a/rdfs:subClassOf* nidm:DataElement ;
?property ?value .

}"""
qres = cde.query(query)

results = []
for row in qres:
results.append(list(row))

assert len(results) == 16


def test_map_vars_to_terms_reproschema(setup: Setup, tmp_path: Path) -> None:
"""
This function will test the Utils.py "map_vars_to_terms" function with a reproschema-formatted
Expand Down Expand Up @@ -305,6 +395,17 @@ def test_map_vars_to_terms_reproschema(setup: Setup, tmp_path: Path) -> None:
]["Male"]
)

# force writing of column_to_terms structure because here we're not doing annotations and so
# map_variables_to_terms won't write it out since we supplied one for it to open...thus it already exists
# and no annotations were made so it should exist in its original form.
# By explicitly writing it out here, after running map_variables_to_terms, we can assure it's the same as the
# original.

# write annotations to json file since data element annotations are complete
write_json_mapping_file(
column_to_terms, join(str(tmp_path), "nidm_annotations.json"), False
)

# now check the JSON mapping file created by map_variables_to_terms which should match Reproschema format
with open(tmp_path / "nidm_annotations_annotations.json", encoding="utf-8") as fp:
json.load(fp)
Expand Down