Skip to content

Commit 046431e

Browse files
committed
updated read_nidm to account for missing tuple with label of isAbout concept associations after reading existing NIDM document. Also updated nidm2bids to save out participants.json file with annotations from NIDM source document
1 parent 39b6df1 commit 046431e

File tree

3 files changed

+216
-21
lines changed

3 files changed

+216
-21
lines changed

nidm/experiment/Utils.py

+34-15
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,6 @@ def read_nidm(nidmDoc):
101101
rdf_graph_parse = rdf_graph.parse(nidmDoc,format=util.guess_format(nidmDoc))
102102

103103

104-
105-
# add known CDE graphs
106-
#rdf_graph_parse = rdf_graph.parse
107-
108-
109104
# Query graph for project metadata and create project level objects
110105
# Get subject URI for project
111106
proj_id=None
@@ -144,7 +139,6 @@ def read_nidm(nidmDoc):
144139
add_metadata_for_subject (rdf_graph_parse,proj_id,project.graph.namespaces,project)
145140

146141

147-
148142
#Query graph for sessions, instantiate session objects, and add to project._session list
149143
#Get subject URI for sessions
150144
for s in rdf_graph_parse.subjects(predicate=RDF.type,object=URIRef(Constants.NIDM_SESSION.uri)):
@@ -288,22 +282,47 @@ def read_nidm(nidmDoc):
288282

289283
# Query graph for nidm:DataElements and instantiate a nidm:DataElement class and add them to the project
290284
query = '''
291-
prefix nidm: <http://purl.org/nidash/nidm#>
292-
select distinct ?uuid
293-
where {
294-
?uuid a/rdfs:subClassOf* nidm:DataElement .
295-
296-
}
297-
'''
285+
prefix nidm: <http://purl.org/nidash/nidm#>
286+
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
287+
select distinct ?uuid
288+
where {
289+
?uuid a/rdfs:subClassOf* nidm:DataElement .
290+
291+
}
292+
'''
298293

299294
# add all nidm:DataElements in graph
300295
qres = rdf_graph_parse.query(query)
301296
for row in qres:
297+
print(row)
302298
# instantiate a data element class assigning it the existing uuid
303-
de = DataElement(project=project, uuid=row['uuid'],add_default_type=False)
299+
de = DataElement(project=project, uuid=row['uuid'], add_default_type=False)
304300
# get the rest of the attributes for this data element and store
305301
add_metadata_for_subject(rdf_graph_parse, row['uuid'], project.graph.namespaces, de)
306302

303+
# now we need to check if there are labels for data element isAbout entries, if so add them.
304+
query2 = '''
305+
306+
prefix nidm: <http://purl.org/nidash/nidm#>
307+
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
308+
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
309+
prefix prov: <http://www.w3.org/ns/prov#>
310+
311+
select distinct ?id ?label
312+
where {
313+
<%s> nidm:isAbout ?id .
314+
315+
?id rdf:type prov:Entity ;
316+
rdfs:label ?label .
317+
}
318+
319+
''' % row['uuid']
320+
# print(query2)
321+
qres2 = rdf_graph_parse.query(query2)
322+
323+
# add this tuple to graph
324+
for row2 in qres2:
325+
project.graph.entity(row2[0], {'rdfs:label': row2[1]})
307326

308327
# check for Derivatives.
309328
# WIP: Currently FSL, Freesurfer, and ANTS tools add these derivatives as nidm:FSStatsCollection,
@@ -486,7 +505,7 @@ def add_metadata_for_subject (rdf_graph,subject_uri,namespaces,nidm_obj):
486505
# add rest of meatadata about the agent
487506
add_metadata_for_subject(rdf_graph=rdf_graph, subject_uri=agent_obj.identifier,
488507
namespaces=namespaces, nidm_obj=generic_agent)
489-
# try and split uri into namespacea and local parts, if fails just use entire URI
508+
# try and split uri into namespace and local parts, if fails just use entire URI
490509
try:
491510
# create qualified names for objects
492511
obj_nm, obj_term = split_uri(r_obj.identifier)

nidm/experiment/tools/nidm2bids.py

+181-5
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@
4242
from nidm.experiment import Project,Session,Acquisition,AcquisitionObject,DemographicsObject,AssessmentObject, MRObject
4343
from nidm.core import BIDS_Constants,Constants
4444
from prov.model import PROV_LABEL,PROV_TYPE
45-
from nidm.experiment.Utils import read_nidm
45+
from nidm.experiment.Utils import read_nidm, write_json_mapping_file
4646
from nidm.experiment.Query import GetProjectsUUID, GetProjectLocation, GetParticipantIDFromAcquisition
47+
from nidm.core.Constants import DD
4748

4849
import json
4950
from pprint import pprint
@@ -152,6 +153,122 @@ def GetImageFromURL(url):
152153
print("ERROR! Can't open url: %s" % url)
153154
return -1
154155

156+
def GetDataElementMetadata(nidm_graph,de_uuid):
157+
'''
158+
This function will query the nidm_graph for the DataElement de_uuid and return all the metadata as a BIDS-compliant
159+
participants sidecar file dictionary
160+
'''
161+
162+
# query nidm_graph for Constants.NIIRI[de_uuid] rdf:type PersonalDataElement
163+
query = """
164+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
165+
PREFIX prov: <http://www.w3.org/ns/prov#>
166+
PREFIX niiri: <http://iri.nidash.org/>
167+
PREFIX nidm: <http://purl.org/nidash/nidm#>
168+
169+
select distinct ?p ?o
170+
where {
171+
172+
<%s> rdf:type nidm:PersonalDataElement ;
173+
?p ?o .
174+
}
175+
""" % Constants.NIIRI[de_uuid]
176+
177+
# print(query)
178+
qres = nidm_graph.query(query)
179+
180+
# set up a dictionary entry for this column
181+
#current_tuple = str(DD(source="participants.tsv", variable=column))
182+
183+
# temporary dictionary of metadata
184+
temp_dict = {}
185+
# add info to BIDS-formatted json sidecar file
186+
for row in qres:
187+
temp_dict[str(row[0])] = str(row[1])
188+
189+
# set up a dictionary entry for this column
190+
current_tuple = str(DD(source="participants.tsv", variable=
191+
temp_dict['http://purl.org/nidash/nidm#sourceVariable']))
192+
193+
de = {}
194+
de[current_tuple] = {}
195+
# now look for label entry in temp_dict and set up a proper NIDM-style JSON data structure
196+
# see Utils.py function map_variables_to_terms for example (column_to_terms[current_tuple])
197+
for key,value in temp_dict.items():
198+
if key == 'http://purl.org/nidash/nidm#sourceVariable':
199+
de[current_tuple]['source_variable'] = value
200+
elif key == 'http://purl.org/dc/terms/description':
201+
de[current_tuple]['description'] = value
202+
elif key == 'http://purl.org/nidash/nidm#isAbout':
203+
# here we need to do an additional query to see if there's a label associated with the isAbout value
204+
de[current_tuple]['isAbout'] = []
205+
206+
# check whether there are multiple 'isAbout' entries
207+
if type(value) == 'list':
208+
# if this is a list we have to loop through the entries and store the url and labels
209+
for entry in value:
210+
# query for label for this isAbout URL
211+
query = '''
212+
213+
prefix prov: <http://www.w3.org/ns/prov#>
214+
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
215+
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
216+
217+
select distinct ?label
218+
where {
219+
<%s> rdf:type prov:Entity ;
220+
rdfs:label ?label .
221+
}
222+
''' % entry
223+
#print(query)
224+
qres = nidm_graph.query(query)
225+
226+
for row in qres:
227+
de[current_tuple]['isAbout'].append({'@id': value, 'label': row[0]})
228+
else:
229+
# only 1 isAbout entry
230+
# query for label for this isAbout URL
231+
query = '''
232+
233+
prefix prov: <http://www.w3.org/ns/prov#>
234+
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
235+
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
236+
237+
select distinct ?label
238+
where {
239+
<%s> rdf:type prov:Entity ;
240+
rdfs:label ?label .
241+
}
242+
''' % value
243+
# print(query)
244+
qres = nidm_graph.query(query)
245+
for row in qres:
246+
de[current_tuple]['isAbout'].append({'@id': value, 'label': row[0]})
247+
248+
elif key == 'http://www.w3.org/2000/01/rdf-schema#label':
249+
de[current_tuple]['label'] = value
250+
elif key =='http://purl.org/nidash/nidm#valueType':
251+
if 'responseOptions' not in de[current_tuple].keys():
252+
de[current_tuple]['responseOptions'] = {}
253+
de[current_tuple]['responseOptions']['valueType'] = value
254+
else:
255+
de[current_tuple]['responseOptions']['valueType'] = value
256+
elif key == 'http://purl.org/nidash/nidm#levels':
257+
if 'responseOptions' not in de[current_tuple].keys():
258+
de[current_tuple]['responseOptions'] = {}
259+
de[current_tuple]['responseOptions']['levels'] = value
260+
else:
261+
de[current_tuple]['responseOptions']['levels'] = value
262+
elif key == 'http://uri.interlex.org/ilx_0739289':
263+
de[current_tuple]['associatedWith'] = value
264+
elif key == Constants.NIDM['minValue']:
265+
de[current_tuple]['responseOptions']['minValue'] = value
266+
elif key == Constants.NIDM['maxValue']:
267+
de[current_tuple]['responseOptions']['maxValue'] = value
268+
elif key == Constants.NIDM['url']:
269+
de[current_tuple]['url'] = value
270+
271+
return de
155272

156273

157274
def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
@@ -186,6 +303,9 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
186303
#add row to the pandas data frame
187304
#data.append(obj)
188305
participants.loc[row_index,BIDS_Constants.participants[fields].uri] = obj
306+
307+
# find Data Element and add metadata to participants_json dictionary
308+
189309
else:
190310
#text matching task, remove basepart of URIs and try to fuzzy match the field in the part_fields parameter string
191311
#to the "term" part of a qname URI...this part let's a user simply ask for "age" for example without knowing the
@@ -219,7 +339,7 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
219339
?pred ?value .
220340
FILTER (regex(str(?pred) ,"%s","i" ))
221341
}""" % (subj_uri,fields)
222-
# print(query)
342+
#print(query)
223343
qres = nidm_graph.query(query)
224344

225345
for row in qres:
@@ -233,8 +353,12 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
233353
short_name = path_parts[2]
234354
else:
235355
short_name = url_parts.fragment
236-
participants_json[short_name] = {}
237-
participants_json[short_name]['TermURL'] = row[0]
356+
357+
# find Data Element and add metadata to participants_json dictionary
358+
if 'de' not in locals():
359+
de = GetDataElementMetadata(nidm_graph, short_name)
360+
else:
361+
de.update(GetDataElementMetadata(nidm_graph, short_name))
238362

239363
participants.loc[row_index,str(short_name)] = str(row[1])
240364
#data.append(str(row[1]))
@@ -251,6 +375,10 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
251375
with open(output_file + ".json",'w') as f:
252376
json.dump(participants_json,f,sort_keys=True,indent=2)
253377

378+
379+
# save participant sidecar file
380+
write_json_mapping_file(de, join(splitext(output_file)[0] + ".json"), True)
381+
254382
return participants, participants_json
255383

256384

@@ -280,7 +408,11 @@ def NIDMProject2BIDSDatasetDescriptor(nidm_graph,output_directory):
280408

281409
for key,value in BIDS_Constants.dataset_description.items():
282410
if BIDS_Constants.dataset_description[key]._uri == proj_key:
283-
project_metadata[key] = project_metadata[proj_key]
411+
# added since BIDS validator validates values of certain keys
412+
if (key == "Authors") or (key == "Funding") or (key == "ReferencesAndLinks"):
413+
project_metadata[key] = [project_metadata[proj_key]]
414+
else:
415+
project_metadata[key] = project_metadata[proj_key]
284416
del project_metadata[proj_key]
285417
key_found=1
286418
continue
@@ -293,6 +425,34 @@ def NIDMProject2BIDSDatasetDescriptor(nidm_graph,output_directory):
293425

294426
##############################################################################
295427

428+
def AddMetadataToImageSidecar(graph_entity,graph, output_directory, image_filename):
429+
'''
430+
This function will query the metadata in graph_entity and compare the entries with mappings in
431+
core/BIDS_Constants.py json_keys where we'll be mapping the value (NIDM entry) to key (BIDS key). It
432+
will create the appropriate sidecar json file associated with image_filename in output_directory.
433+
'''
434+
435+
# query graph for metadata associated with graph_entity
436+
query = '''
437+
Select DISTINCT ?p ?o
438+
WHERE {
439+
<%s> ?p ?o .
440+
}
441+
''' %graph_entity
442+
qres = graph.query(query)
443+
444+
# dictionary to store metadata
445+
json_dict = {}
446+
for row in qres:
447+
key = next((k for k in BIDS_Constants.json_keys if BIDS_Constants.json_keys[k] == row[0]), None)
448+
if key != None:
449+
json_dict[key] = row[1]
450+
451+
# write json_dict out to appropriate sidecar filename
452+
with open(join(output_directory,image_filename + ".json"),"w") as fp:
453+
json.dump(json_dict,fp,indent=2)
454+
455+
296456
def ProcessFiles(graph,scan_type,output_directory,project_location,args):
297457
'''
298458
This function will essentially cycle through the acquisition objects in the NIDM file loaded into graph
@@ -354,6 +514,7 @@ def ProcessFiles(graph,scan_type,output_directory,project_location,args):
354514
print("Trying to copy file from %s" % (location))
355515
try:
356516
copyfile(location, join(output_directory, sub_dir, bids_ext, basename(filename)))
517+
357518
except:
358519
print("ERROR! Failed to find file %s on filesystem..." % location)
359520
if not args.no_downloads:
@@ -367,10 +528,22 @@ def ProcessFiles(graph,scan_type,output_directory,project_location,args):
367528
sys.exc_info()[0], location))
368529
GetImageFromAWS(location=location, output_file=
369530
join(output_directory, sub_dir, bids_ext, basename(filename)),args=args)
531+
370532
else:
371533
# copy temporary file to BIDS directory
372534
copyfile(ret, join(output_directory, sub_dir, bids_ext, basename(filename)))
373535

536+
# if we were able to copy the image file then add the json sidecar file with additional metadata
537+
# available in the NIDM file
538+
if isfile(join(output_directory, sub_dir, bids_ext, basename(filename))):
539+
# get rest of metadata for this acquisition and store in sidecar file
540+
if "gz" in basename(filename):
541+
image_filename = splitext(splitext(basename(filename))[0])[0]
542+
else:
543+
image_filename = splitext(basename(filename))[0]
544+
AddMetadataToImageSidecar(graph_entity=acq,graph=graph,output_directory=join(output_directory,
545+
sub_dir,bids_ext),image_filename=image_filename)
546+
374547
# if this is a DWI scan then we should copy over the b-value and b-vector files
375548
if bids_ext == 'dwi':
376549
# search for entity uuid with rdf:type nidm:b-value that was generated by activity
@@ -529,6 +702,9 @@ def main(argv):
529702
print("Reading RDF file as %s..." % format)
530703
#load NIDM graph into NIDM-Exp API objects
531704
nidm_project = read_nidm(rdf_file)
705+
# temporary save nidm_project
706+
with open("/Users/dbkeator/Downloads/nidm.ttl", 'w') as f:
707+
print(nidm_project.serializeTurtle(), file=f)
532708
print("RDF file sucessfully read")
533709
format_found=True
534710
break

nidm/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Format expected by setup.py and doc/source/conf.py: string of form "X.Y.Z"
55
_version_major = 3
66
_version_minor = 9
7-
_version_micro = '4' # use '' for first of series, number for 1 and above
7+
_version_micro = '5' # use '' for first of series, number for 1 and above
88
_version_extra = ''
99
# _version_extra = '' # Uncomment this for full releases
1010

0 commit comments

Comments
 (0)