Skip to content

Commit 40b6199

Browse files
authored
Merge pull request #394 from dbkeator/master
fixed bug with BIDS sidecar files when supplied by user and updated tests
2 parents 0237ab3 + fe42841 commit 40b6199

File tree

7 files changed

+139
-14
lines changed

7 files changed

+139
-14
lines changed

src/nidm/experiment/Utils.py

+32-8
Original file line numberDiff line numberDiff line change
@@ -1409,17 +1409,28 @@ def map_variables_to_terms(
14091409
dataset_identifier=None,
14101410
):
14111411
"""
1412-
14131412
:param df: data frame with first row containing variable names
14141413
:param assessment_name: Name for the assessment to use in storing JSON mapping dictionary keys
14151414
:param json_source: optional json document either in file or structure
14161415
with variable names as keys and minimal fields "definition","label","url"
14171416
:param output_file: output filename to save variable-> term mappings
14181417
:param directory: if output_file parameter is set to None then use this directory to store default JSON mapping file
1419-
if doing variable->term mappings
1418+
if doing variable->term mappings
1419+
:param: bids: if bids is set to True then a BIDS-compliant sidecar file will be written if annotations are made
1420+
:param: owl_file: if a web-ontology language (OWL) file is supplied then it will be used to look for terms while
1421+
annotating otherwise the default NIDM terminology will be used.
1422+
:param associate_concepts: if this is set to True then concept association will be performed for each variable
1423+
otherwise it will not.
1424+
:param: dataset_identifier: unique identifier to identify a dataset such as a project in OpenNeuro
1425+
which is used in the NIDM records as a namespace to go along with a unique ID generated for the NIDM RDF graphs
14201426
:return:return dictionary mapping variable names (i.e. columns) to terms
14211427
"""
14221428

1429+
# 12/15/23: indicator variable to identify if annotations were made with the pynidm tools. If not, and this
1430+
# is a bids-nidm conversion, and the user supplied a bids-compliant json sidecar file, save the original
1431+
# file.
1432+
annot_made = False
1433+
14231434
# dictionary mapping column name to preferred term
14241435
column_to_terms = {}
14251436

@@ -1549,11 +1560,13 @@ def map_variables_to_terms(
15491560
column_to_terms[current_tuple]["label"] = json_map[json_key][
15501561
"sourceVariable"
15511562
]
1563+
# this is probably a BIDS json file so use the json_key as the label
15521564
else:
1553-
column_to_terms[current_tuple]["label"] = ""
1565+
column_to_terms[current_tuple]["label"] = json_key
15541566
print(
1555-
"No label or source_variable or sourceVariable keys found in json mapping file for variable "
1556-
f"{json_key}. Consider adding these to the json file as they are important"
1567+
"No label or source_variable/SourceVariable key found in json mapping file for variable "
1568+
f"{json_key}. This is ok if this is a BIDS json sidecar file."
1569+
"Otherwise, consider adding a label to the json file."
15571570
)
15581571
else:
15591572
column_to_terms[current_tuple]["label"] = json_map[json_key][
@@ -1812,6 +1825,7 @@ def map_variables_to_terms(
18121825
"maxValue:",
18131826
column_to_terms[current_tuple]["responseOptions"]["maxValue"],
18141827
)
1828+
18151829
if "hasUnit" in json_map[json_key]:
18161830
# upgrade 'hasUnit' to 'responseOptions'->'unitCode
18171831
if "responseOptions" not in column_to_terms[current_tuple].keys():
@@ -1849,6 +1863,7 @@ def map_variables_to_terms(
18491863
ilx_obj,
18501864
nidm_owl_graph=nidm_owl_graph,
18511865
)
1866+
annot_made = True
18521867
# write annotations to json file so user can start up again if not doing whole file
18531868
write_json_mapping_file(
18541869
column_to_terms, output_file, bids
@@ -1917,7 +1932,8 @@ def map_variables_to_terms(
19171932
# if user ran in mode where they want to associate concepts and this isn't the participant
19181933
# id field then associate concepts.
19191934
if match_participant_id_field(
1920-
json_map[json_key]["source_variable"]
1935+
# json_map[json_key]["source_variable"]
1936+
column_to_terms[current_tuple]["source_variable"]
19211937
):
19221938
column_to_terms[current_tuple]["isAbout"] = []
19231939
column_to_terms[current_tuple]["isAbout"].append(
@@ -1936,6 +1952,7 @@ def map_variables_to_terms(
19361952
ilx_obj,
19371953
nidm_owl_graph=nidm_owl_graph,
19381954
)
1955+
annot_made = True
19391956
# write annotations to json file so user can start up again if not doing whole file
19401957
write_json_mapping_file(column_to_terms, output_file, bids)
19411958

@@ -1990,6 +2007,8 @@ def map_variables_to_terms(
19902007
column_to_terms[current_tuple] = {}
19912008
# enter user interaction function to get data dictionary annotations from user
19922009
annotate_data_element(column, current_tuple, column_to_terms)
2010+
# 12/15/23
2011+
annot_made = True
19932012
# then ask user to find a concept if they selected to do so
19942013
if associate_concepts:
19952014
# provide user with opportunity to associate a concept with this annotation
@@ -2000,6 +2019,7 @@ def map_variables_to_terms(
20002019
ilx_obj,
20012020
nidm_owl_graph=nidm_owl_graph,
20022021
)
2022+
annot_made = True
20032023
# write annotations to json file so user can start up again if not doing whole file
20042024
write_json_mapping_file(column_to_terms, output_file, bids)
20052025

@@ -2062,8 +2082,12 @@ def map_variables_to_terms(
20622082
column_to_terms[current_tuple]["url"] = ilx_output.iri
20632083
except Exception:
20642084
print("WARNING: WIP: Data element not submitted to InterLex. ")
2065-
# write annotations to json file since data element annotations are complete
2066-
write_json_mapping_file(column_to_terms, output_file, bids)
2085+
2086+
# 12/15/23: If doing a BIDS-NIDM conversion and the user supplied a BIDS-compliant json sidecar file
2087+
# and no annotations were made, leave original BIDS json file as it is...
2088+
if annot_made:
2089+
# write annotations to json file since data element annotations are complete
2090+
write_json_mapping_file(column_to_terms, output_file, bids)
20672091

20682092
# get CDEs for data dictionary and NIDM graph entity of data
20692093
cde = DD_to_nidm(column_to_terms, dataset_identifier=dataset_identifier)

src/nidm/experiment/tools/nidm_affinity_propagation.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ def data_aggregation(reporter): # all data from all the files is collected
239239
+ ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables."
240240
)
241241
for i, nf in enumerate(not_found_list):
242-
reporter.print(f"{i+1}. {nf}")
242+
reporter.print(f"{i + 1}. {nf}")
243243
not_found_list.clear()
244244
not_found_count += 1
245245
print()

src/nidm/experiment/tools/nidm_agglomerative_clustering.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ def data_aggregation(reporter): # all data from all the files is collected
239239
+ ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables."
240240
)
241241
for i, nf in enumerate(not_found_list):
242-
reporter.print(f"{i+1}. {nf}")
242+
reporter.print(f"{i + 1}. {nf}")
243243
not_found_list.clear()
244244
not_found_count += 1
245245
print()

src/nidm/experiment/tools/nidm_gmm.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def data_aggregation(reporter): # all data from all the files is collected
272272
+ ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables."
273273
)
274274
for i, nf in enumerate(not_found_list):
275-
reporter.print(f"{i+1}. {nf}")
275+
reporter.print(f"{i + 1}. {nf}")
276276
not_found_list.clear()
277277
not_found_count += 1
278278
print()

src/nidm/experiment/tools/nidm_kmeans.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def data_aggregation(reporter): # all data from all the files is collected
278278
+ ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables."
279279
)
280280
for i, nf in enumerate(not_found_list):
281-
reporter.print(f"{i+1}. {nf}")
281+
reporter.print(f"{i + 1}. {nf}")
282282
not_found_list.clear()
283283
not_found_count += 1
284284
print()

src/nidm/experiment/tools/nidm_linreg.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ def data_aggregation(reporter): # all data from all the files is collected
328328
+ ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables."
329329
)
330330
for i, nf in enumerate(not_found_list):
331-
reporter.print(f"{i+1}. {nf}")
331+
reporter.print(f"{i + 1}. {nf}")
332332
not_found_list.clear()
333333
not_found_count += 1
334334
print()

tests/experiment/test_map_vars_to_terms.py

+102-1
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
from dataclasses import dataclass
22
import json
3+
from os.path import join
34
from pathlib import Path
45
import pandas as pd
56
import pytest
6-
from nidm.experiment.Utils import map_variables_to_terms
7+
from nidm.experiment.Utils import map_variables_to_terms, write_json_mapping_file
78

89

910
@dataclass
1011
class Setup:
1112
data: pd.DataFrame
1213
reproschema_json_map: dict
1314
bids_sidecar: dict
15+
bids_sidecar_simple: dict
1416

1517

1618
@pytest.fixture(scope="module")
@@ -135,11 +137,24 @@ def setup() -> Setup:
135137
}
136138
"""
137139
)
140+
bids_sidecar_simple = json.loads(
141+
"""
142+
{
143+
"age": {
144+
"description": "age of participant"
145+
},
146+
"sex": {
147+
"description": "biological sex of participant"
148+
}
149+
}
150+
"""
151+
)
138152

139153
return Setup(
140154
data=data,
141155
reproschema_json_map=reproschema_json_map,
142156
bids_sidecar=bids_sidecar,
157+
bids_sidecar_simple=bids_sidecar_simple,
143158
)
144159

145160

@@ -149,6 +164,7 @@ def test_map_vars_to_terms_BIDS(setup: Setup, tmp_path: Path) -> None:
149164
JSON sidecar file
150165
"""
151166

167+
# test BIDS sidecar json file with all pynidm annotations
152168
column_to_terms, cde = map_variables_to_terms(
153169
df=setup.data,
154170
json_source=setup.bids_sidecar,
@@ -204,6 +220,17 @@ def test_map_vars_to_terms_BIDS(setup: Setup, tmp_path: Path) -> None:
204220
]["Male"]
205221
)
206222

223+
# force writing of column_to_terms structure because here we're not doing annotations and so
224+
# map_variables_to_terms won't write it out since we supplied one for it to open...thus it already exists
225+
# and no annotations were made so it should exist in its original form.
226+
# By explicitly writing it out here, after running map_variables_to_terms, we can assure it's the same as the
227+
# original.
228+
229+
# write annotations to json file since data element annotations are complete
230+
write_json_mapping_file(
231+
column_to_terms, join(str(tmp_path), "nidm_annotations.json"), True
232+
)
233+
207234
# now check the JSON sidecar file created by map_variables_to_terms which should match BIDS format
208235
with open(tmp_path / "nidm_annotations.json", encoding="utf-8") as fp:
209236
bids_sidecar = json.load(fp)
@@ -245,6 +272,69 @@ def test_map_vars_to_terms_BIDS(setup: Setup, tmp_path: Path) -> None:
245272
assert len(results) == 20
246273

247274

275+
def test_map_vars_to_terms_BIDS_simple(setup: Setup, tmp_path: Path) -> None:
276+
"""
277+
This function will test the Utils.py "map_vars_to_terms" function with a BIDS-formatted
278+
JSON sidecar file
279+
"""
280+
281+
# test BIDS sidecar json file with all pynidm annotations
282+
column_to_terms, cde = map_variables_to_terms(
283+
df=setup.data,
284+
json_source=setup.bids_sidecar_simple,
285+
directory=str(tmp_path),
286+
assessment_name="test",
287+
associate_concepts=False,
288+
bids=True,
289+
)
290+
291+
# check whether JSON mapping structure returned from map_variables_to_terms matches the
292+
# reproshema structure
293+
assert "DD(source='test', variable='age')" in column_to_terms
294+
assert "DD(source='test', variable='sex')" in column_to_terms
295+
assert "description" in column_to_terms["DD(source='test', variable='age')"]
296+
assert "description" in column_to_terms["DD(source='test', variable='sex')"]
297+
298+
# force writing of column_to_terms structure because here we're not doing annotations and so
299+
# map_variables_to_terms won't write it out since we supplied one for it to open...thus it already exists
300+
# and no annotations were made so it should exist in its original form.
301+
# By explicitly writing it out here, after running map_variables_to_terms, we can assure it's the same as the
302+
# original.
303+
304+
# write annotations to json file since data element annotations are complete
305+
write_json_mapping_file(
306+
column_to_terms, join(str(tmp_path), "nidm_annotations.json"), True
307+
)
308+
309+
# now check the JSON sidecar file created by map_variables_to_terms which should match BIDS format
310+
with open(tmp_path / "nidm_annotations.json", encoding="utf-8") as fp:
311+
bids_sidecar = json.load(fp)
312+
313+
assert "age" in bids_sidecar.keys()
314+
assert "sex" in bids_sidecar.keys()
315+
assert "description" in bids_sidecar["age"]
316+
assert "description" in bids_sidecar["sex"]
317+
318+
# check the CDE dataelement graph for correct information
319+
query = """
320+
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
321+
322+
select distinct ?uuid ?DataElements ?property ?value
323+
where {
324+
325+
?uuid a/rdfs:subClassOf* nidm:DataElement ;
326+
?property ?value .
327+
328+
}"""
329+
qres = cde.query(query)
330+
331+
results = []
332+
for row in qres:
333+
results.append(list(row))
334+
335+
assert len(results) == 16
336+
337+
248338
def test_map_vars_to_terms_reproschema(setup: Setup, tmp_path: Path) -> None:
249339
"""
250340
This function will test the Utils.py "map_vars_to_terms" function with a reproschema-formatted
@@ -305,6 +395,17 @@ def test_map_vars_to_terms_reproschema(setup: Setup, tmp_path: Path) -> None:
305395
]["Male"]
306396
)
307397

398+
# force writing of column_to_terms structure because here we're not doing annotations and so
399+
# map_variables_to_terms won't write it out since we supplied one for it to open...thus it already exists
400+
# and no annotations were made so it should exist in its original form.
401+
# By explicitly writing it out here, after running map_variables_to_terms, we can assure it's the same as the
402+
# original.
403+
404+
# write annotations to json file since data element annotations are complete
405+
write_json_mapping_file(
406+
column_to_terms, join(str(tmp_path), "nidm_annotations.json"), False
407+
)
408+
308409
# now check the JSON mapping file created by map_variables_to_terms which should match Reproschema format
309410
with open(tmp_path / "nidm_annotations_annotations.json", encoding="utf-8") as fp:
310411
json.load(fp)

0 commit comments

Comments
 (0)