updated read_nidm to account for missing tuple with label of isAbout concept associations after reading existing NIDM document. Also updated nidm2bids to save out participants.json file with annotations from NIDM source document

dbkeator · dbkeator · commit 046431e93101 · 2021-12-07T15:11:08.000-08:00
diff --git a/nidm/experiment/Utils.py b/nidm/experiment/Utils.py
@@ -101,11 +101,6 @@ def read_nidm(nidmDoc):
     rdf_graph_parse = rdf_graph.parse(nidmDoc,format=util.guess_format(nidmDoc))
 
 
-
-    # add known CDE graphs
-    #rdf_graph_parse = rdf_graph.parse
-
-
     # Query graph for project metadata and create project level objects
     # Get subject URI for project
     proj_id=None
@@ -144,7 +139,6 @@ def read_nidm(nidmDoc):
         add_metadata_for_subject (rdf_graph_parse,proj_id,project.graph.namespaces,project)
 
 
-
     #Query graph for sessions, instantiate session objects, and add to project._session list
     #Get subject URI for sessions
     for s in rdf_graph_parse.subjects(predicate=RDF.type,object=URIRef(Constants.NIDM_SESSION.uri)):
@@ -288,22 +282,47 @@ def read_nidm(nidmDoc):
 
     # Query graph for nidm:DataElements and instantiate a nidm:DataElement class and add them to the project
     query = '''
-            prefix nidm: <http://purl.org/nidash/nidm#>  
-            select distinct ?uuid
-            where {
-                ?uuid a/rdfs:subClassOf* nidm:DataElement .
-     			
-            }
-            '''
+                prefix nidm: <http://purl.org/nidash/nidm#>
+                prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
+                select distinct ?uuid
+                where {
+                    ?uuid a/rdfs:subClassOf* nidm:DataElement .
+
+                }
+                '''
 
     # add all nidm:DataElements in graph
     qres = rdf_graph_parse.query(query)
     for row in qres:
+        print(row)
         # instantiate a data element class assigning it the existing uuid
-        de = DataElement(project=project, uuid=row['uuid'],add_default_type=False)
+        de = DataElement(project=project, uuid=row['uuid'], add_default_type=False)
         # get the rest of the attributes for this data element and store
         add_metadata_for_subject(rdf_graph_parse, row['uuid'], project.graph.namespaces, de)
 
+        # now we need to check if there are labels for data element isAbout entries, if so add them.
+        query2 = '''
+
+                prefix nidm: <http://purl.org/nidash/nidm#>
+                prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+                prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+                prefix prov: <http://www.w3.org/ns/prov#>
+
+                select distinct ?id ?label
+                where {
+                    <%s> nidm:isAbout ?id .
+
+                    ?id rdf:type prov:Entity ;
+                        rdfs:label ?label .  
+                }
+
+            ''' % row['uuid']
+        # print(query2)
+        qres2 = rdf_graph_parse.query(query2)
+
+        # add this tuple to graph
+        for row2 in qres2:
+            project.graph.entity(row2[0], {'rdfs:label': row2[1]})
 
     # check for Derivatives.
     # WIP: Currently FSL, Freesurfer, and ANTS tools add these derivatives as nidm:FSStatsCollection,
@@ -486,7 +505,7 @@ def add_metadata_for_subject (rdf_graph,subject_uri,namespaces,nidm_obj):
                         # add rest of meatadata about the agent
                         add_metadata_for_subject(rdf_graph=rdf_graph, subject_uri=agent_obj.identifier,
                                                  namespaces=namespaces, nidm_obj=generic_agent)
-                    # try and split uri into namespacea and local parts, if fails just use entire URI
+                    # try and split uri into namespace and local parts, if fails just use entire URI
                     try:
                         # create qualified names for objects
                         obj_nm, obj_term = split_uri(r_obj.identifier)
diff --git a/nidm/experiment/tools/nidm2bids.py b/nidm/experiment/tools/nidm2bids.py
@@ -42,8 +42,9 @@
 from nidm.experiment import Project,Session,Acquisition,AcquisitionObject,DemographicsObject,AssessmentObject, MRObject
 from nidm.core import BIDS_Constants,Constants
 from prov.model import PROV_LABEL,PROV_TYPE
-from nidm.experiment.Utils import read_nidm
+from nidm.experiment.Utils import read_nidm, write_json_mapping_file
 from nidm.experiment.Query import GetProjectsUUID, GetProjectLocation, GetParticipantIDFromAcquisition
+from nidm.core.Constants import DD
 
 import json
 from pprint import pprint
@@ -152,6 +153,122 @@ def GetImageFromURL(url):
         print("ERROR! Can't open url: %s" % url)
         return -1
 
+def GetDataElementMetadata(nidm_graph,de_uuid):
+    '''
+    This function will query the nidm_graph for the DataElement de_uuid and return all the metadata as a BIDS-compliant
+    participants sidecar file dictionary
+    '''
+
+    # query nidm_graph for Constants.NIIRI[de_uuid] rdf:type PersonalDataElement
+    query = """
+        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+        PREFIX prov: <http://www.w3.org/ns/prov#>
+        PREFIX niiri: <http://iri.nidash.org/>
+        PREFIX nidm: <http://purl.org/nidash/nidm#>
+        
+        select distinct ?p ?o
+        where {
+            
+            <%s> rdf:type nidm:PersonalDataElement ;
+                ?p ?o .
+        }  
+    """ % Constants.NIIRI[de_uuid]
+
+    # print(query)
+    qres = nidm_graph.query(query)
+
+    # set up a dictionary entry for this column
+    #current_tuple = str(DD(source="participants.tsv", variable=column))
+
+    # temporary dictionary of metadata
+    temp_dict = {}
+    # add info to BIDS-formatted json sidecar file
+    for row in qres:
+        temp_dict[str(row[0])] = str(row[1])
+
+    # set up a dictionary entry for this column
+    current_tuple = str(DD(source="participants.tsv", variable=
+        temp_dict['http://purl.org/nidash/nidm#sourceVariable']))
+
+    de = {}
+    de[current_tuple] = {}
+    # now look for label entry in temp_dict and set up a proper NIDM-style JSON data structure
+    # see Utils.py function map_variables_to_terms for example (column_to_terms[current_tuple])
+    for key,value in temp_dict.items():
+        if key == 'http://purl.org/nidash/nidm#sourceVariable':
+            de[current_tuple]['source_variable'] = value
+        elif key == 'http://purl.org/dc/terms/description':
+            de[current_tuple]['description'] = value
+        elif key == 'http://purl.org/nidash/nidm#isAbout':
+            # here we need to do an additional query to see if there's a label associated with the isAbout value
+            de[current_tuple]['isAbout'] = []
+
+            # check whether there are multiple 'isAbout' entries
+            if type(value) == 'list':
+                # if this is a list we have to loop through the entries and store the url and labels
+                for entry in value:
+                    # query for label for this isAbout URL
+                    query = '''
+
+                                    prefix prov: <http://www.w3.org/ns/prov#>
+                                    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+                                    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+                                    
+                                    select distinct ?label
+                                    where {
+                                        <%s> rdf:type prov:Entity ;
+                                            rdfs:label ?label .    
+                                    }      
+                                ''' % entry
+                    #print(query)
+                    qres = nidm_graph.query(query)
+
+                    for row in qres:
+                        de[current_tuple]['isAbout'].append({'@id': value, 'label': row[0]})
+            else:
+                # only 1 isAbout entry
+                # query for label for this isAbout URL
+                query = '''
+
+                        prefix prov: <http://www.w3.org/ns/prov#>
+                        prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+                        prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+
+                        select distinct ?label
+                        where {
+                            <%s> rdf:type prov:Entity ;
+                                rdfs:label ?label .    
+                        }      
+                    ''' % value
+                # print(query)
+                qres = nidm_graph.query(query)
+                for row in qres:
+                    de[current_tuple]['isAbout'].append({'@id': value, 'label': row[0]})
+
+        elif key == 'http://www.w3.org/2000/01/rdf-schema#label':
+            de[current_tuple]['label'] = value
+        elif key =='http://purl.org/nidash/nidm#valueType':
+            if 'responseOptions' not in de[current_tuple].keys():
+                de[current_tuple]['responseOptions'] = {}
+                de[current_tuple]['responseOptions']['valueType'] = value
+            else:
+                de[current_tuple]['responseOptions']['valueType'] = value
+        elif key == 'http://purl.org/nidash/nidm#levels':
+            if 'responseOptions' not in de[current_tuple].keys():
+                de[current_tuple]['responseOptions'] = {}
+                de[current_tuple]['responseOptions']['levels'] = value
+            else:
+                de[current_tuple]['responseOptions']['levels'] = value
+        elif key ==  'http://uri.interlex.org/ilx_0739289':
+            de[current_tuple]['associatedWith'] = value
+        elif key == Constants.NIDM['minValue']:
+            de[current_tuple]['responseOptions']['minValue'] = value
+        elif key == Constants.NIDM['maxValue']:
+            de[current_tuple]['responseOptions']['maxValue'] = value
+        elif key == Constants.NIDM['url']:
+            de[current_tuple]['url'] = value
+
+    return de
 
 
 def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
@@ -186,6 +303,9 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
                     #add row to the pandas data frame
                     #data.append(obj)
                     participants.loc[row_index,BIDS_Constants.participants[fields].uri] = obj
+
+                    # find Data Element and add metadata to participants_json dictionary
+
             else:
                 #text matching task, remove basepart of URIs and try to fuzzy match the field in the part_fields parameter string
                 #to the "term" part of a qname URI...this part let's a user simply ask for "age" for example without knowing the
@@ -219,7 +339,7 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
                             ?pred ?value .
                         FILTER (regex(str(?pred) ,"%s","i" ))
                     }""" % (subj_uri,fields)
-                # print(query)
+                #print(query)
                 qres = nidm_graph.query(query)
 
                 for row in qres:
@@ -233,8 +353,12 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
                         short_name = path_parts[2]
                     else:
                         short_name = url_parts.fragment
-                    participants_json[short_name] = {}
-                    participants_json[short_name]['TermURL'] = row[0]
+
+                    # find Data Element and add metadata to participants_json dictionary
+                    if 'de' not in locals():
+                        de = GetDataElementMetadata(nidm_graph, short_name)
+                    else:
+                        de.update(GetDataElementMetadata(nidm_graph, short_name))
 
                     participants.loc[row_index,str(short_name)] = str(row[1])
                     #data.append(str(row[1]))
@@ -251,6 +375,10 @@ def CreateBIDSParticipantFile(nidm_graph,output_file,participant_fields):
     with open(output_file + ".json",'w') as f:
         json.dump(participants_json,f,sort_keys=True,indent=2)
 
+
+    # save participant sidecar file
+    write_json_mapping_file(de, join(splitext(output_file)[0] + ".json"), True)
+
     return participants, participants_json
 
 
@@ -280,7 +408,11 @@ def NIDMProject2BIDSDatasetDescriptor(nidm_graph,output_directory):
 
         for key,value in BIDS_Constants.dataset_description.items():
             if BIDS_Constants.dataset_description[key]._uri == proj_key:
-                project_metadata[key] = project_metadata[proj_key]
+                # added since BIDS validator validates values of certain keys
+                if (key == "Authors") or (key == "Funding") or (key == "ReferencesAndLinks"):
+                    project_metadata[key] = [project_metadata[proj_key]]
+                else:
+                    project_metadata[key] = project_metadata[proj_key]
                 del project_metadata[proj_key]
                 key_found=1
                 continue
@@ -293,6 +425,34 @@ def NIDMProject2BIDSDatasetDescriptor(nidm_graph,output_directory):
 
     ##############################################################################
 
+def AddMetadataToImageSidecar(graph_entity,graph, output_directory, image_filename):
+    '''
+    This function will query the metadata in graph_entity and compare the entries with mappings in
+    core/BIDS_Constants.py json_keys where we'll be mapping the value (NIDM entry) to key (BIDS key). It
+    will create the appropriate sidecar json file associated with image_filename in output_directory.
+    '''
+
+    # query graph for metadata associated with graph_entity
+    query = '''
+        Select DISTINCT ?p ?o
+        WHERE {
+            <%s> ?p ?o .
+        }
+    ''' %graph_entity
+    qres = graph.query(query)
+
+    # dictionary to store metadata
+    json_dict = {}
+    for row in qres:
+        key = next((k for k in BIDS_Constants.json_keys if BIDS_Constants.json_keys[k] == row[0]), None)
+        if key != None:
+            json_dict[key] = row[1]
+
+    # write json_dict out to appropriate sidecar filename
+    with open(join(output_directory,image_filename + ".json"),"w") as fp:
+        json.dump(json_dict,fp,indent=2)
+
+
 def ProcessFiles(graph,scan_type,output_directory,project_location,args):
     '''
     This function will essentially cycle through the acquisition objects in the NIDM file loaded into graph
@@ -354,6 +514,7 @@ def ProcessFiles(graph,scan_type,output_directory,project_location,args):
                         print("Trying to copy file from %s" % (location))
                         try:
                             copyfile(location, join(output_directory, sub_dir, bids_ext, basename(filename)))
+
                         except:
                             print("ERROR! Failed to find file %s on filesystem..." % location)
                             if not args.no_downloads:
@@ -367,10 +528,22 @@ def ProcessFiles(graph,scan_type,output_directory,project_location,args):
                                     sys.exc_info()[0], location))
                                     GetImageFromAWS(location=location, output_file=
                                         join(output_directory, sub_dir, bids_ext, basename(filename)),args=args)
+
                 else:
                     # copy temporary file to BIDS directory
                     copyfile(ret, join(output_directory, sub_dir, bids_ext, basename(filename)))
 
+                # if we were able to copy the image file then add the json sidecar file with additional metadata
+                # available in the NIDM file
+                if isfile(join(output_directory, sub_dir, bids_ext, basename(filename))):
+                    # get rest of metadata for this acquisition and store in sidecar file
+                    if "gz" in basename(filename):
+                        image_filename = splitext(splitext(basename(filename))[0])[0]
+                    else:
+                        image_filename = splitext(basename(filename))[0]
+                    AddMetadataToImageSidecar(graph_entity=acq,graph=graph,output_directory=join(output_directory,
+                            sub_dir,bids_ext),image_filename=image_filename)
+
             # if this is a DWI scan then we should copy over the b-value and b-vector files
             if bids_ext == 'dwi':
                 # search for entity uuid with rdf:type nidm:b-value that was generated by activity
@@ -529,6 +702,9 @@ def main(argv):
             print("Reading RDF file as %s..." % format)
             #load NIDM graph into NIDM-Exp API objects
             nidm_project = read_nidm(rdf_file)
+            # temporary save nidm_project
+            with open("/Users/dbkeator/Downloads/nidm.ttl", 'w') as f:
+                print(nidm_project.serializeTurtle(), file=f)
             print("RDF file sucessfully read")
             format_found=True
             break
diff --git a/nidm/version.py b/nidm/version.py
@@ -4,7 +4,7 @@
 # Format expected by setup.py and doc/source/conf.py: string of form "X.Y.Z"
 _version_major = 3
 _version_minor = 9 
-_version_micro = '4'  # use '' for first of series, number for 1 and above
+_version_micro = '5'  # use '' for first of series, number for 1 and above
 _version_extra = ''
 # _version_extra = ''  # Uncomment this for full releases