Merge pull request #307 from dbkeator/master

dbkeator · web-flow · commit f896e7e0c123 · 2021-12-07T15:11:56.000-08:00
updated read_nidm to account for missing tuple with label of isAbout …
diff --git a/nidm/experiment/Utils.py b/nidm/experiment/Utils.py
@@ -101,11 +101,6 @@ def read_nidm(nidmDoc):
     rdf_graph_parse = rdf_graph.parse(nidmDoc,format=util.guess_format(nidmDoc))
 
 
-
-    # add known CDE graphs
-    #rdf_graph_parse = rdf_graph.parse
-
-
     # Query graph for project metadata and create project level objects
     # Get subject URI for project
     proj_id=None
@@ -144,7 +139,6 @@ def read_nidm(nidmDoc):
         add_metadata_for_subject (rdf_graph_parse,proj_id,project.graph.namespaces,project)
 
 
-
     #Query graph for sessions, instantiate session objects, and add to project._session list
     #Get subject URI for sessions
     for s in rdf_graph_parse.subjects(predicate=RDF.type,object=URIRef(Constants.NIDM_SESSION.uri)):
@@ -288,22 +282,47 @@ def read_nidm(nidmDoc):
 
     # Query graph for nidm:DataElements and instantiate a nidm:DataElement class and add them to the project
     query = '''
-            prefix nidm: <http://purl.org/nidash/nidm#>  
-            select distinct ?uuid
-            where {
-                ?uuid a/rdfs:subClassOf* nidm:DataElement .
-     			
-            }
-            '''
+                prefix nidm: <http://purl.org/nidash/nidm#>
+                prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
+                select distinct ?uuid
+                where {
+                    ?uuid a/rdfs:subClassOf* nidm:DataElement .
+
+                }
+                '''
 
     # add all nidm:DataElements in graph
     qres = rdf_graph_parse.query(query)
     for row in qres:
+        print(row)
         # instantiate a data element class assigning it the existing uuid
-        de = DataElement(project=project, uuid=row['uuid'],add_default_type=False)
+        de = DataElement(project=project, uuid=row['uuid'], add_default_type=False)
         # get the rest of the attributes for this data element and store
         add_metadata_for_subject(rdf_graph_parse, row['uuid'], project.graph.namespaces, de)
 
+        # now we need to check if there are labels for data element isAbout entries, if so add them.
+        query2 = '''
+
+                prefix nidm: <http://purl.org/nidash/nidm#>
+                prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+                prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+                prefix prov: <http://www.w3.org/ns/prov#>
+
+                select distinct ?id ?label
+                where {
+                    <%s> nidm:isAbout ?id .
+
+                    ?id rdf:type prov:Entity ;
+                        rdfs:label ?label .  
+                }
+
+            ''' % row['uuid']
+        # print(query2)
+        qres2 = rdf_graph_parse.query(query2)
+
+        # add this tuple to graph
+        for row2 in qres2:
+            project.graph.entity(row2[0], {'rdfs:label': row2[1]})
 
     # check for Derivatives.
     # WIP: Currently FSL, Freesurfer, and ANTS tools add these derivatives as nidm:FSStatsCollection,
@@ -486,7 +505,7 @@ def add_metadata_for_subject (rdf_graph,subject_uri,namespaces,nidm_obj):
                         # add rest of meatadata about the agent
                         add_metadata_for_subject(rdf_graph=rdf_graph, subject_uri=agent_obj.identifier,
                                                  namespaces=namespaces, nidm_obj=generic_agent)
-                    # try and split uri into namespacea and local parts, if fails just use entire URI
+                    # try and split uri into namespace and local parts, if fails just use entire URI
                     try:
                         # create qualified names for objects
                         obj_nm, obj_term = split_uri(r_obj.identifier)
diff --git a/nidm/experiment/tools/nidm2bids.py b/nidm/experiment/tools/nidm2bids.py
@@ -42,8 +42,9 @@
 from nidm.experiment import Project,Session,Acquisition,AcquisitionObject,DemographicsObject,AssessmentObject, MRObject
 from nidm.core import BIDS_Constants,Constants
 from prov.model import PROV_LABEL,PROV_TYPE
-from nidm.experiment.Utils import read_nidm
+from nidm.experiment.Utils import read_nidm, write_json_mapping_file
 from nidm.experiment.Query import GetProjectsUUID, GetProjectLocation, GetParticipantIDFromAcquisition
+from nidm.core.Constants import DD
 
 import json
 from pprint import pprint
@@ -152,6 +153,122 @@ def GetImageFromURL(url):
         print("ERROR! Can't open url: %s" % url)
         return -1
 
+def GetDataElementMetadata(nidm_graph,de_uuid):
+    '''
+    This function will query the nidm_graph for the DataElement de_uuid and return all the metadata as a BIDS-compliant
+    participants sidecar file dictionary
+    '''
+
+    # query nidm_graph for Constants.NIIRI[de_uuid] rdf:type PersonalDataElement
+    query = """
+        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+        PREFIX prov: <http://www.w3.org/ns/prov#>
+        PREFIX niiri: <http://iri.nidash.org/>
+        PREFIX nidm: <http://purl.org/nidash/nidm#>
+        
+        select distinct ?p ?o
+        where {
+            
+            <%s> rdf:type nidm:PersonalDataElement ;
+                ?p ?o .
+        }  
+    """ % Constants.NIIRI[de_uuid]
+
+    # print(query)
+    qres = nidm_graph.query(query)
+
+    # set up a dictionary entry for this column
+    #current_tuple = str(DD(source="participants.tsv", variable=column))
+
+    # temporary dictionary of metadata
+    temp_dict = {}
+    # add info to BIDS-formatted json sidecar file
+    for row in qres:
+        temp_dict[str(row[0])] = str(row[1])
+
+    # set up a dictionary entry for this column
+    current_tuple = str(DD(source="participants.tsv", variable=
+        temp_dict['http://purl.org/nidash/nidm#sourceVariable']))
+
+    de = {}
+    de[current_tuple] = {}
+    # now look for label entry in temp_dict and set up a proper NIDM-style JSON data structure
+    # see Utils.py function map_variables_to_terms for example (column_to_terms[current_tuple])
+    for key,value in temp_dict.items():
+        if key == 'http://purl.org/nidash/nidm#sourceVariable':
+            de[current_tuple]['source_variable'] = value
+        elif key == 'http://purl.org/dc/terms/description':
+            de[current_tuple]['description'] = value
+        elif key == 'http://purl.org/nidash/nidm#isAbout':
+            # here we need to do an additional query to see if there's a label associated with the isAbout value
+            de[current_tuple]['isAbout'] = []
+
+            # check whether there are multiple 'isAbout' entries
+            if type(value) == 'list':
+                # if this is a list we have to loop through the entries and store the url and labels
+                for entry in value:
+                    # query for label for this isAbout URL
+                    query = '''
+
+                                    prefix prov: <http://www.w3.org/ns/prov#>
+                                    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+                                    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+                                    
+                                    select distinct ?label
+                                    where {
+                                        <%s> rdf:type prov:Entity ;
+                                            rdfs:label ?label .    
+                                    }      
+                                ''' % entry
+                    #print(query)
+                    qres = nidm_graph.query(query)
+
+                    for row in qres:
+                        de[current_tuple]['isAbout'].append({'@id': value, 'label': row[0]})
+            else:
+                # only 1 isAbout entry
+                # query for label for this isAbout URL
+                query = '''
+
+                        prefix prov: <http://www.w3.org/ns/prov#>
+                        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+                        prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+
+                        select distinct ?label
+                        where {
+                            <%s> rdf:type prov:Entity ;
+                                rdfs:label ?label .    
+                        }      
+                    ''' % value
+                # print(query)
+                qres = nidm_graph.query(query)
+                for row in qres:
+                    de[current_tuple]['isAbout'].append({'@id': value, 'label': row[0]})
+
+        elif key == 'http://www.w3.org/2000/01/rdf-schema#label':
+            de[current_tuple]['label'] = value
+        elif key =='http://purl.org/nidash/nidm#valueType':
+            if 'responseOptions' not in de[current_tuple].keys():
+                de[current_tuple]['responseOptions'] = {}
+                de[current_tuple]['responseOptions']['valueType'] = value
+            else:
+                de[current_tuple]['responseOptions']['valueType'] = value
+        elif key == 'http://purl.org/nidash/nidm#levels':
+            if 'responseOptions' not in de[current_tuple].keys():
+                de[current_tuple]['responseOptions'] = {}
+                de[current_tuple]['responseOptions']['levels'] = value
+            else:
+                de[current_tuple]['responseOptions']['levels'] = value
+        elif key ==  'http://uri.interlex.org/ilx_0739289':
+            de[current_tuple]['associatedWith'] = value
+        elif key == Constants.NIDM['minValue']:
+            de[current_tuple]['responseOptions']['minValue'] = value
+        elif key == Constants.NIDM['maxValue']:
+            de[current_tuple]['responseOptions']['maxValue'] = value
+        elif key == Constants.NIDM['url']:
+            de[current_tuple]['url'] = value
+
+    return de
 
 
 def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
@@ -186,6 +303,9 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
                     #add row to the pandas data frame
                     #data.append(obj)
                     participants.loc[row_index,BIDS_Constants.participants[fields].uri] = obj
+
+                    # find Data Element and add metadata to participants_json dictionary
+
             else:
                 #text matching task, remove basepart of URIs and try to fuzzy match the field in the part_fields parameter string
                 #to the "term" part of a qname URI...this part let's a user simply ask for "age" for example without knowing the
@@ -219,7 +339,7 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
                             ?pred ?value .
                         FILTER (regex(str(?pred) ,"%s","i" ))
                     }""" % (subj_uri,fields)
-                # print(query)
+                #print(query)
                 qres = nidm_graph.query(query)
 
                 for row in qres:
@@ -233,8 +353,12 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
                         short_name = path_parts[2]
                     else:
                         short_name = url_parts.fragment
-                    participants_json[short_name] = {}
-                    participants_json[short_name]['TermURL'] = row[0]
+
+                    # find Data Element and add metadata to participants_json dictionary
+                    if 'de' not in locals():
+                        de = GetDataElementMetadata(nidm_graph, short_name)
+                    else:
+                        de.update(GetDataElementMetadata(nidm_graph, short_name))
 
                     participants.loc[row_index,str(short_name)] = str(row[1])
                     #data.append(str(row[1]))
@@ -251,6 +375,10 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
     with open(output_file + ".json",'w') as f:
         json.dump(participants_json,f,sort_keys=True,indent=2)
 
+
+    # save participant sidecar file
+    write_json_mapping_file(de, join(splitext(output_file)[0] + ".json"), True)
+
     return participants, participants_json
 
 
@@ -280,7 +408,11 @@ def NIDMProject2BIDSDatasetDescriptor(nidm_graph,output_directory):
 
         for key,value in BIDS_Constants.dataset_description.items():
             if BIDS_Constants.dataset_description[key]._uri == proj_key:
-                project_metadata[key] = project_metadata[proj_key]
+                # added since BIDS validator validates values of certain keys
+                if (key == "Authors") or (key == "Funding") or (key == "ReferencesAndLinks"):
+                    project_metadata[key] = [project_metadata[proj_key]]
+                else:
+                    project_metadata[key] = project_metadata[proj_key]
                 del project_metadata[proj_key]
                 key_found=1
                 continue
@@ -293,6 +425,34 @@ def NIDMProject2BIDSDatasetDescriptor(nidm_graph,output_directory):
 
     ##############################################################################
 
+def AddMetadataToImageSidecar(graph_entity,graph, output_directory, image_filename):
+    '''
+    This function will query the metadata in graph_entity and compare the entries with mappings in
+    core/BIDS_Constants.py json_keys where we'll be mapping the value (NIDM entry) to key (BIDS key). It
+    will create the appropriate sidecar json file associated with image_filename in output_directory.
+    '''
+
+    # query graph for metadata associated with graph_entity
+    query = '''
+        Select DISTINCT ?p ?o
+        WHERE {
+            <%s> ?p ?o .
+        }
+    ''' %graph_entity
+    qres = graph.query(query)
+
+    # dictionary to store metadata
+    json_dict = {}
+    for row in qres:
+        key = next((k for k in BIDS_Constants.json_keys if BIDS_Constants.json_keys[k] == row[0]), None)
+        if key != None:
+            json_dict[key] = row[1]
+
+    # write json_dict out to appropriate sidecar filename
+    with open(join(output_directory,image_filename + ".json"),"w") as fp:
+        json.dump(json_dict,fp,indent=2)
+
+
 def ProcessFiles(graph,scan_type,output_directory,project_location,args):
     '''
     This function will essentially cycle through the acquisition objects in the NIDM file loaded into graph
@@ -354,6 +514,7 @@ def ProcessFiles(graph,scan_type,output_directory,project_location,args):
                         print("Trying to copy file from %s" % (location))
                         try:
                             copyfile(location, join(output_directory, sub_dir, bids_ext, basename(filename)))
+
                         except:
                             print("ERROR! Failed to find file %s on filesystem..." % location)
                             if not args.no_downloads:
@@ -367,10 +528,22 @@ def ProcessFiles(graph,scan_type,output_directory,project_location,args):
                                     sys.exc_info()[0], location))
                                     GetImageFromAWS(location=location, output_file=
                                         join(output_directory, sub_dir, bids_ext, basename(filename)),args=args)
+
                 else:
                     # copy temporary file to BIDS directory
                     copyfile(ret, join(output_directory, sub_dir, bids_ext, basename(filename)))
 
+                # if we were able to copy the image file then add the json sidecar file with additional metadata
+                # available in the NIDM file
+                if isfile(join(output_directory, sub_dir, bids_ext, basename(filename))):
+                    # get rest of metadata for this acquisition and store in sidecar file
+                    if "gz" in basename(filename):
+                        image_filename = splitext(splitext(basename(filename))[0])[0]
+                    else:
+                        image_filename = splitext(basename(filename))[0]
+                    AddMetadataToImageSidecar(graph_entity=acq,graph=graph,output_directory=join(output_directory,
+                            sub_dir,bids_ext),image_filename=image_filename)
+
             # if this is a DWI scan then we should copy over the b-value and b-vector files
             if bids_ext == 'dwi':
                 # search for entity uuid with rdf:type nidm:b-value that was generated by activity
@@ -529,6 +702,9 @@ def main(argv):
             print("Reading RDF file as %s..." % format)
             #load NIDM graph into NIDM-Exp API objects
             nidm_project = read_nidm(rdf_file)
+            # temporary save nidm_project
+            with open("/Users/dbkeator/Downloads/nidm.ttl", 'w') as f:
+                print(nidm_project.serializeTurtle(), file=f)
             print("RDF file sucessfully read")
             format_found=True
             break
diff --git a/nidm/version.py b/nidm/version.py
@@ -4,7 +4,7 @@
 # Format expected by setup.py and doc/source/conf.py: string of form "X.Y.Z"
 _version_major = 3
 _version_minor = 9 
-_version_micro = '4'  # use '' for first of series, number for 1 and above
+_version_micro = '5'  # use '' for first of series, number for 1 and above
 _version_extra = ''
 # _version_extra = ''  # Uncomment this for full releases