Skip to content

Commit f896e7e

Browse files
authored
Merge pull request #307 from dbkeator/master
updated read_nidm to account for missing tuple with label of isAbout …
2 parents 924372a + 046431e commit f896e7e

File tree

3 files changed

+216
-21
lines changed

3 files changed

+216
-21
lines changed

nidm/experiment/Utils.py

+34-15
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,6 @@ def read_nidm(nidmDoc):
101101
rdf_graph_parse = rdf_graph.parse(nidmDoc,format=util.guess_format(nidmDoc))
102102

103103

104-
105-
# add known CDE graphs
106-
#rdf_graph_parse = rdf_graph.parse
107-
108-
109104
# Query graph for project metadata and create project level objects
110105
# Get subject URI for project
111106
proj_id=None
@@ -144,7 +139,6 @@ def read_nidm(nidmDoc):
144139
add_metadata_for_subject (rdf_graph_parse,proj_id,project.graph.namespaces,project)
145140

146141

147-
148142
#Query graph for sessions, instantiate session objects, and add to project._session list
149143
#Get subject URI for sessions
150144
for s in rdf_graph_parse.subjects(predicate=RDF.type,object=URIRef(Constants.NIDM_SESSION.uri)):
@@ -288,22 +282,47 @@ def read_nidm(nidmDoc):
288282

289283
# Query graph for nidm:DataElements and instantiate a nidm:DataElement class and add them to the project
290284
query = '''
291-
prefix nidm: <http://purl.org/nidash/nidm#>
292-
select distinct ?uuid
293-
where {
294-
?uuid a/rdfs:subClassOf* nidm:DataElement .
295-
296-
}
297-
'''
285+
prefix nidm: <http://purl.org/nidash/nidm#>
286+
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
287+
select distinct ?uuid
288+
where {
289+
?uuid a/rdfs:subClassOf* nidm:DataElement .
290+
291+
}
292+
'''
298293

299294
# add all nidm:DataElements in graph
300295
qres = rdf_graph_parse.query(query)
301296
for row in qres:
297+
print(row)
302298
# instantiate a data element class assigning it the existing uuid
303-
de = DataElement(project=project, uuid=row['uuid'],add_default_type=False)
299+
de = DataElement(project=project, uuid=row['uuid'], add_default_type=False)
304300
# get the rest of the attributes for this data element and store
305301
add_metadata_for_subject(rdf_graph_parse, row['uuid'], project.graph.namespaces, de)
306302

303+
# now we need to check if there are labels for data element isAbout entries, if so add them.
304+
query2 = '''
305+
306+
prefix nidm: <http://purl.org/nidash/nidm#>
307+
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
308+
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
309+
prefix prov: <http://www.w3.org/ns/prov#>
310+
311+
select distinct ?id ?label
312+
where {
313+
<%s> nidm:isAbout ?id .
314+
315+
?id rdf:type prov:Entity ;
316+
rdfs:label ?label .
317+
}
318+
319+
''' % row['uuid']
320+
# print(query2)
321+
qres2 = rdf_graph_parse.query(query2)
322+
323+
# add this tuple to graph
324+
for row2 in qres2:
325+
project.graph.entity(row2[0], {'rdfs:label': row2[1]})
307326

308327
# check for Derivatives.
309328
# WIP: Currently FSL, Freesurfer, and ANTS tools add these derivatives as nidm:FSStatsCollection,
@@ -486,7 +505,7 @@ def add_metadata_for_subject (rdf_graph,subject_uri,namespaces,nidm_obj):
486505
# add rest of meatadata about the agent
487506
add_metadata_for_subject(rdf_graph=rdf_graph, subject_uri=agent_obj.identifier,
488507
namespaces=namespaces, nidm_obj=generic_agent)
489-
# try and split uri into namespacea and local parts, if fails just use entire URI
508+
# try and split uri into namespace and local parts, if fails just use entire URI
490509
try:
491510
# create qualified names for objects
492511
obj_nm, obj_term = split_uri(r_obj.identifier)

nidm/experiment/tools/nidm2bids.py

+181-5
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@
4242
from nidm.experiment import Project,Session,Acquisition,AcquisitionObject,DemographicsObject,AssessmentObject, MRObject
4343
from nidm.core import BIDS_Constants,Constants
4444
from prov.model import PROV_LABEL,PROV_TYPE
45-
from nidm.experiment.Utils import read_nidm
45+
from nidm.experiment.Utils import read_nidm, write_json_mapping_file
4646
from nidm.experiment.Query import GetProjectsUUID, GetProjectLocation, GetParticipantIDFromAcquisition
47+
from nidm.core.Constants import DD
4748

4849
import json
4950
from pprint import pprint
@@ -152,6 +153,122 @@ def GetImageFromURL(url):
152153
print("ERROR! Can't open url: %s" % url)
153154
return -1
154155

156+
def GetDataElementMetadata(nidm_graph,de_uuid):
157+
'''
158+
This function will query the nidm_graph for the DataElement de_uuid and return all the metadata as a BIDS-compliant
159+
participants sidecar file dictionary
160+
'''
161+
162+
# query nidm_graph for Constants.NIIRI[de_uuid] rdf:type PersonalDataElement
163+
query = """
164+
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
165+
PREFIX prov: <http://www.w3.org/ns/prov#>
166+
PREFIX niiri: <http://iri.nidash.org/>
167+
PREFIX nidm: <http://purl.org/nidash/nidm#>
168+
169+
select distinct ?p ?o
170+
where {
171+
172+
<%s> rdf:type nidm:PersonalDataElement ;
173+
?p ?o .
174+
}
175+
""" % Constants.NIIRI[de_uuid]
176+
177+
# print(query)
178+
qres = nidm_graph.query(query)
179+
180+
# set up a dictionary entry for this column
181+
#current_tuple = str(DD(source="participants.tsv", variable=column))
182+
183+
# temporary dictionary of metadata
184+
temp_dict = {}
185+
# add info to BIDS-formatted json sidecar file
186+
for row in qres:
187+
temp_dict[str(row[0])] = str(row[1])
188+
189+
# set up a dictionary entry for this column
190+
current_tuple = str(DD(source="participants.tsv", variable=
191+
temp_dict['http://purl.org/nidash/nidm#sourceVariable']))
192+
193+
de = {}
194+
de[current_tuple] = {}
195+
# now look for label entry in temp_dict and set up a proper NIDM-style JSON data structure
196+
# see Utils.py function map_variables_to_terms for example (column_to_terms[current_tuple])
197+
for key,value in temp_dict.items():
198+
if key == 'http://purl.org/nidash/nidm#sourceVariable':
199+
de[current_tuple]['source_variable'] = value
200+
elif key == 'http://purl.org/dc/terms/description':
201+
de[current_tuple]['description'] = value
202+
elif key == 'http://purl.org/nidash/nidm#isAbout':
203+
# here we need to do an additional query to see if there's a label associated with the isAbout value
204+
de[current_tuple]['isAbout'] = []
205+
206+
# check whether there are multiple 'isAbout' entries
207+
if type(value) == 'list':
208+
# if this is a list we have to loop through the entries and store the url and labels
209+
for entry in value:
210+
# query for label for this isAbout URL
211+
query = '''
212+
213+
prefix prov: <http://www.w3.org/ns/prov#>
214+
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
215+
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
216+
217+
select distinct ?label
218+
where {
219+
<%s> rdf:type prov:Entity ;
220+
rdfs:label ?label .
221+
}
222+
''' % entry
223+
#print(query)
224+
qres = nidm_graph.query(query)
225+
226+
for row in qres:
227+
de[current_tuple]['isAbout'].append({'@id': value, 'label': row[0]})
228+
else:
229+
# only 1 isAbout entry
230+
# query for label for this isAbout URL
231+
query = '''
232+
233+
prefix prov: <http://www.w3.org/ns/prov#>
234+
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
235+
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
236+
237+
select distinct ?label
238+
where {
239+
<%s> rdf:type prov:Entity ;
240+
rdfs:label ?label .
241+
}
242+
''' % value
243+
# print(query)
244+
qres = nidm_graph.query(query)
245+
for row in qres:
246+
de[current_tuple]['isAbout'].append({'@id': value, 'label': row[0]})
247+
248+
elif key == 'http://www.w3.org/2000/01/rdf-schema#label':
249+
de[current_tuple]['label'] = value
250+
elif key =='http://purl.org/nidash/nidm#valueType':
251+
if 'responseOptions' not in de[current_tuple].keys():
252+
de[current_tuple]['responseOptions'] = {}
253+
de[current_tuple]['responseOptions']['valueType'] = value
254+
else:
255+
de[current_tuple]['responseOptions']['valueType'] = value
256+
elif key == 'http://purl.org/nidash/nidm#levels':
257+
if 'responseOptions' not in de[current_tuple].keys():
258+
de[current_tuple]['responseOptions'] = {}
259+
de[current_tuple]['responseOptions']['levels'] = value
260+
else:
261+
de[current_tuple]['responseOptions']['levels'] = value
262+
elif key == 'http://uri.interlex.org/ilx_0739289':
263+
de[current_tuple]['associatedWith'] = value
264+
elif key == Constants.NIDM['minValue']:
265+
de[current_tuple]['responseOptions']['minValue'] = value
266+
elif key == Constants.NIDM['maxValue']:
267+
de[current_tuple]['responseOptions']['maxValue'] = value
268+
elif key == Constants.NIDM['url']:
269+
de[current_tuple]['url'] = value
270+
271+
return de
155272

156273

157274
def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
@@ -186,6 +303,9 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
186303
#add row to the pandas data frame
187304
#data.append(obj)
188305
participants.loc[row_index,BIDS_Constants.participants[fields].uri] = obj
306+
307+
# find Data Element and add metadata to participants_json dictionary
308+
189309
else:
190310
#text matching task, remove basepart of URIs and try to fuzzy match the field in the part_fields parameter string
191311
#to the "term" part of a qname URI...this part let's a user simply ask for "age" for example without knowing the
@@ -219,7 +339,7 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
219339
?pred ?value .
220340
FILTER (regex(str(?pred) ,"%s","i" ))
221341
}""" % (subj_uri,fields)
222-
# print(query)
342+
#print(query)
223343
qres = nidm_graph.query(query)
224344

225345
for row in qres:
@@ -233,8 +353,12 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
233353
short_name = path_parts[2]
234354
else:
235355
short_name = url_parts.fragment
236-
participants_json[short_name] = {}
237-
participants_json[short_name]['TermURL'] = row[0]
356+
357+
# find Data Element and add metadata to participants_json dictionary
358+
if 'de' not in locals():
359+
de = GetDataElementMetadata(nidm_graph, short_name)
360+
else:
361+
de.update(GetDataElementMetadata(nidm_graph, short_name))
238362

239363
participants.loc[row_index,str(short_name)] = str(row[1])
240364
#data.append(str(row[1]))
@@ -251,6 +375,10 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
251375
with open(output_file + ".json",'w') as f:
252376
json.dump(participants_json,f,sort_keys=True,indent=2)
253377

378+
379+
# save participant sidecar file
380+
write_json_mapping_file(de, join(splitext(output_file)[0] + ".json"), True)
381+
254382
return participants, participants_json
255383

256384

@@ -280,7 +408,11 @@ def NIDMProject2BIDSDatasetDescriptor(nidm_graph,output_directory):
280408

281409
for key,value in BIDS_Constants.dataset_description.items():
282410
if BIDS_Constants.dataset_description[key]._uri == proj_key:
283-
project_metadata[key] = project_metadata[proj_key]
411+
# added since BIDS validator validates values of certain keys
412+
if (key == "Authors") or (key == "Funding") or (key == "ReferencesAndLinks"):
413+
project_metadata[key] = [project_metadata[proj_key]]
414+
else:
415+
project_metadata[key] = project_metadata[proj_key]
284416
del project_metadata[proj_key]
285417
key_found=1
286418
continue
@@ -293,6 +425,34 @@ def NIDMProject2BIDSDatasetDescriptor(nidm_graph,output_directory):
293425

294426
##############################################################################
295427

428+
def AddMetadataToImageSidecar(graph_entity,graph, output_directory, image_filename):
429+
'''
430+
This function will query the metadata in graph_entity and compare the entries with mappings in
431+
core/BIDS_Constants.py json_keys where we'll be mapping the value (NIDM entry) to key (BIDS key). It
432+
will create the appropriate sidecar json file associated with image_filename in output_directory.
433+
'''
434+
435+
# query graph for metadata associated with graph_entity
436+
query = '''
437+
Select DISTINCT ?p ?o
438+
WHERE {
439+
<%s> ?p ?o .
440+
}
441+
''' %graph_entity
442+
qres = graph.query(query)
443+
444+
# dictionary to store metadata
445+
json_dict = {}
446+
for row in qres:
447+
key = next((k for k in BIDS_Constants.json_keys if BIDS_Constants.json_keys[k] == row[0]), None)
448+
if key != None:
449+
json_dict[key] = row[1]
450+
451+
# write json_dict out to appropriate sidecar filename
452+
with open(join(output_directory,image_filename + ".json"),"w") as fp:
453+
json.dump(json_dict,fp,indent=2)
454+
455+
296456
def ProcessFiles(graph,scan_type,output_directory,project_location,args):
297457
'''
298458
This function will essentially cycle through the acquisition objects in the NIDM file loaded into graph
@@ -354,6 +514,7 @@ def ProcessFiles(graph,scan_type,output_directory,project_location,args):
354514
print("Trying to copy file from %s" % (location))
355515
try:
356516
copyfile(location, join(output_directory, sub_dir, bids_ext, basename(filename)))
517+
357518
except:
358519
print("ERROR! Failed to find file %s on filesystem..." % location)
359520
if not args.no_downloads:
@@ -367,10 +528,22 @@ def ProcessFiles(graph,scan_type,output_directory,project_location,args):
367528
sys.exc_info()[0], location))
368529
GetImageFromAWS(location=location, output_file=
369530
join(output_directory, sub_dir, bids_ext, basename(filename)),args=args)
531+
370532
else:
371533
# copy temporary file to BIDS directory
372534
copyfile(ret, join(output_directory, sub_dir, bids_ext, basename(filename)))
373535

536+
# if we were able to copy the image file then add the json sidecar file with additional metadata
537+
# available in the NIDM file
538+
if isfile(join(output_directory, sub_dir, bids_ext, basename(filename))):
539+
# get rest of metadata for this acquisition and store in sidecar file
540+
if "gz" in basename(filename):
541+
image_filename = splitext(splitext(basename(filename))[0])[0]
542+
else:
543+
image_filename = splitext(basename(filename))[0]
544+
AddMetadataToImageSidecar(graph_entity=acq,graph=graph,output_directory=join(output_directory,
545+
sub_dir,bids_ext),image_filename=image_filename)
546+
374547
# if this is a DWI scan then we should copy over the b-value and b-vector files
375548
if bids_ext == 'dwi':
376549
# search for entity uuid with rdf:type nidm:b-value that was generated by activity
@@ -529,6 +702,9 @@ def main(argv):
529702
print("Reading RDF file as %s..." % format)
530703
#load NIDM graph into NIDM-Exp API objects
531704
nidm_project = read_nidm(rdf_file)
705+
# temporary save nidm_project
706+
with open("/Users/dbkeator/Downloads/nidm.ttl", 'w') as f:
707+
print(nidm_project.serializeTurtle(), file=f)
532708
print("RDF file sucessfully read")
533709
format_found=True
534710
break

nidm/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Format expected by setup.py and doc/source/conf.py: string of form "X.Y.Z"
55
_version_major = 3
66
_version_minor = 9
7-
_version_micro = '4' # use '' for first of series, number for 1 and above
7+
_version_micro = '5' # use '' for first of series, number for 1 and above
88
_version_extra = ''
99
# _version_extra = '' # Uncomment this for full releases
1010

0 commit comments

Comments
 (0)