Skip to content

Commit 0b35cb8

Browse files
authored
Merge pull request #410 from dbkeator/master
fixed bugs when working with nidm files containing only segmentation data
2 parents ede6549 + b9e37cb commit 0b35cb8

File tree

9 files changed

+324
-82
lines changed

9 files changed

+324
-82
lines changed

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ install_requires =
4141
patsy
4242
prov
4343
pybids >= 0.12.4
44-
pydot
44+
pydot ~= 1.4.2
4545
pygithub
4646
pyld >= 1.0.5, <3.0
4747
python-dateutil ~= 2.0

src/nidm/core/Constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
5151
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
5252
VC = Namespace("http://www.w3.org/2006/vcard/ns#")
53-
DICOM = Namespace("http://neurolex.org/wiki/Category:DICOM_term/")
53+
DICOM = Namespace("http://neurolex.org/wiki/Category/DICOM_term/")
5454
DCTYPES = Namespace("http://purl.org/dc/dcmitype/")
5555
NCIT = Namespace("http://ncitt.ncit.nih.gov/")
5656
DCAT = Namespace("http://www.w3.org/ns/dcat#")

src/nidm/experiment/Core.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,19 @@ def getUUID():
2525
return uid
2626

2727

28+
def find_in_namespaces(search_uri, namespaces):
29+
"""
30+
Looks through namespaces for search_uri
31+
:return: URI if found else False
32+
"""
33+
34+
for uris in namespaces:
35+
if str(uris.uri) == str(search_uri):
36+
return uris
37+
38+
return False
39+
40+
2841
class Core:
2942
"""Base-class for NIDM-Experimenent
3043

src/nidm/experiment/Project.py

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import prov.model as pm
2-
from .Core import Core, getUUID
2+
from .Core import Core, find_in_namespaces, getUUID
33
from ..core import Constants
44

55

@@ -109,9 +109,56 @@ def add_derivatives(self, derivative):
109109
self._derivatives.extend([derivative])
110110
# create links in graph
111111
# session.add_attributes({str("dct:isPartOf"):self})
112-
derivative.add_attributes(
113-
{pm.QualifiedName(pm.Namespace("dct", Constants.DCT), "isPartOf"): self}
112+
113+
# check if dct namespace is already in graph and use it otherwise create it...
114+
found_dct = find_in_namespaces(
115+
search_uri=str(Constants.DCT), namespaces=self.graph.namespaces
116+
)
117+
found_niiri = find_in_namespaces(
118+
search_uri=str(Constants.NIIRI), namespaces=self.graph.namespaces
114119
)
120+
if found_dct and found_niiri:
121+
derivative.add_attributes(
122+
{found_dct["isPartOf"]: found_niiri[self.get_uuid()]}
123+
)
124+
elif found_dct and not found_niiri:
125+
derivative.add_attributes(
126+
{
127+
found_dct["isPartOf"]: pm.QualifiedName(
128+
namespace=pm.Namespace(
129+
uri=str(Constants.NIIRI), prefix="niiri"
130+
),
131+
localpart=self.get_uuid(),
132+
)
133+
}
134+
)
135+
elif not found_dct and found_niiri:
136+
derivative.add_attributes(
137+
{
138+
pm.QualifiedName(
139+
namespace=pm.Namespace(
140+
uri=str(Constants.DCT), prefix="dct"
141+
),
142+
localpart="isPartOf",
143+
): found_niiri[self.get_uuid()]
144+
}
145+
)
146+
else:
147+
derivative.add_attributes(
148+
{
149+
pm.QualifiedName(
150+
namespace=pm.Namespace(
151+
uri=str(Constants.DCT), prefix="dct"
152+
),
153+
localpart="isPartOf",
154+
): pm.QualifiedName(
155+
namespace=pm.Namespace(
156+
uri=str(Constants.NIIRI), prefix="niiri"
157+
),
158+
localpart=self.get_uuid(),
159+
)
160+
}
161+
)
115162
return True
116163

117164
def add_dataelements(self, dataelement):

src/nidm/experiment/Query.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ def getProjectAcquisitionObjects(nidm_file_list, project_id):
660660
# find all the projects
661661
for project, _, _ in rdf_graph.triples((None, None, Constants.NIDM["Project"])):
662662
# check if it is our project
663-
if str(project) == project_uuid:
663+
if (str(project) == project_uuid) or (str(project) == str(project_uuid)):
664664
for session, _, _ in rdf_graph.triples(
665665
(None, isa, Constants.NIDM["Session"])
666666
):

src/nidm/experiment/Utils.py

Lines changed: 103 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from binascii import crc32
22
import getpass
3+
import io
34
import json
45
import logging
56
import os
@@ -14,7 +15,7 @@
1415
import prov.model as pm
1516
from prov.model import Identifier
1617
from prov.model import Namespace as provNamespace
17-
from prov.model import QualifiedName
18+
from prov.model import ProvDocument, QualifiedName
1819
from rapidfuzz import fuzz
1920
from rdflib import RDF, RDFS, Graph, Literal, Namespace, URIRef, util
2021
from rdflib.namespace import XSD, split_uri
@@ -26,7 +27,7 @@
2627
from .AcquisitionObject import AcquisitionObject
2728
from .AssessmentAcquisition import AssessmentAcquisition
2829
from .AssessmentObject import AssessmentObject
29-
from .Core import getUUID
30+
from .Core import find_in_namespaces, getUUID
3031
from .DataElement import DataElement
3132
from .Derivative import Derivative
3233
from .DerivativeObject import DerivativeObject
@@ -390,41 +391,6 @@ def read_nidm(nidmDoc):
390391
}
391392
"""
392393

393-
# add all nidm:DataElements in graph
394-
qres = rdf_graph_parse.query(query)
395-
for row in qres:
396-
print(row)
397-
# instantiate a data element class assigning it the existing uuid
398-
de = DataElement(project=project, uuid=row["uuid"], add_default_type=False)
399-
# get the rest of the attributes for this data element and store
400-
add_metadata_for_subject(
401-
rdf_graph_parse, row["uuid"], project.graph.namespaces, de
402-
)
403-
404-
# now we need to check if there are labels for data element isAbout entries, if so add them.
405-
query2 = f"""
406-
407-
prefix nidm: <http://purl.org/nidash/nidm#>
408-
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
409-
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
410-
prefix prov: <http://www.w3.org/ns/prov#>
411-
412-
select distinct ?id ?label
413-
where {{
414-
<{row["uuid"]}> nidm:isAbout ?id .
415-
416-
?id rdf:type prov:Entity ;
417-
rdfs:label ?label .
418-
}}
419-
420-
"""
421-
# print(query2)
422-
qres2 = rdf_graph_parse.query(query2)
423-
424-
# add this tuple to graph
425-
for row2 in qres2:
426-
project.graph.entity(row2[0], {"rdfs:label": row2[1]})
427-
428394
# check for Derivatives.
429395
# WIP: Currently FSL, Freesurfer, and ANTS tools add these derivatives as nidm:FSStatsCollection,
430396
# nidm:FSLStatsCollection, or nidm:ANTSStatsCollection which are subclasses of nidm:Derivatives
@@ -474,6 +440,76 @@ def read_nidm(nidmDoc):
474440
rdf_graph_parse, row["uuid"], project.graph.namespaces, derivobj
475441
)
476442

443+
# add all nidm:DataElements in graph
444+
qres = rdf_graph_parse.query(query)
445+
for row in qres:
446+
print(row)
447+
# instantiate a data element class assigning it the existing uuid
448+
de = DataElement(project=project, uuid=row["uuid"], add_default_type=False)
449+
# get the rest of the attributes for this data element and store
450+
add_metadata_for_subject(
451+
rdf_graph_parse, row["uuid"], project.graph.namespaces, de
452+
)
453+
454+
# now we need to check if there are labels for data element isAbout entries, if so add them.
455+
query2 = f"""
456+
457+
prefix nidm: <http://purl.org/nidash/nidm#>
458+
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
459+
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
460+
prefix prov: <http://www.w3.org/ns/prov#>
461+
462+
select distinct ?id ?label
463+
where {{
464+
<{row["uuid"]}> nidm:isAbout ?id .
465+
466+
?id rdf:type prov:Entity ;
467+
rdfs:label ?label .
468+
}}
469+
470+
"""
471+
# print(query2)
472+
qres2 = rdf_graph_parse.query(query2)
473+
474+
# check qres2 length and if zero then skip all this converting from prov to rdf and back to add
475+
# data element isAbout definitions that are identified with a url and not a qualified name as required by prov
476+
# but not rdf in general
477+
if len(qres2) == 0:
478+
continue
479+
480+
# Step 1: Convert `project.graph` (ProvDocument) to an RDFLib Graph
481+
rdf_graph = Graph()
482+
rdf_graph.parse(
483+
data=project.serializeTurtle(), format="turtle"
484+
) # Proper RDF export
485+
486+
# Step 2: Modify RDF Graph (Keeping Full URIs)
487+
for row2 in qres2:
488+
entity_uri = URIRef(str(row2[0])) # Preserve full URI
489+
label = Literal(str(row2[1])) # Convert label to Literal
490+
491+
# Add triples directly to RDF graph
492+
rdf_graph.add((entity_uri, RDF.type, Constants.PROV.Entity))
493+
rdf_graph.add((entity_uri, RDFS.label, label))
494+
495+
# Step 3: Serialize modified RDF graph to an in-memory bytes buffer
496+
rdf_bytes = io.BytesIO()
497+
rdf_graph.serialize(
498+
destination=rdf_bytes, format="turtle"
499+
) # Use "turtle" correctly
500+
501+
# Step 4: Convert RDF bytes to a string for `prov` to read
502+
rdf_string = rdf_bytes.getvalue().decode("utf-8")
503+
504+
# Step 5: Create a new ProvDocument and read RDF data from the string
505+
project.graph = ProvDocument()
506+
project.graph = project.graph.deserialize(
507+
source=io.StringIO(rdf_string), format="rdf"
508+
)
509+
510+
# Close the BytesIO buffer (good practice)
511+
rdf_bytes.close()
512+
477513
return project
478514

479515

@@ -489,19 +525,6 @@ def get_RDFliteral_type(rdf_literal):
489525
return pm.Literal(rdf_literal, datatype=pm.XSD["string"])
490526

491527

492-
def find_in_namespaces(search_uri, namespaces):
493-
"""
494-
Looks through namespaces for search_uri
495-
:return: URI if found else False
496-
"""
497-
498-
for uris in namespaces:
499-
if uris.uri == search_uri:
500-
return uris
501-
502-
return False
503-
504-
505528
def add_metadata_for_subject(rdf_graph, subject_uri, namespaces, nidm_obj):
506529
"""
507530
Cycles through triples for a particular subject and adds them to the nidm_obj
@@ -553,11 +576,25 @@ def add_metadata_for_subject(rdf_graph, subject_uri, namespaces, nidm_obj):
553576
# so we check explicitly here
554577
if obj_nm == str(Constants.PROV):
555578
nidm_obj.add_attributes(
556-
{predicate: QualifiedName(Constants.PROV[obj_term])}
579+
{
580+
predicate: pm.QualifiedName(
581+
namespace=pm.Namespace(
582+
uri=Constants.PROV, prefix="prov"
583+
),
584+
localpart=str(obj_term),
585+
)
586+
}
557587
)
558588
elif obj_nm == str(Constants.NIDM):
559589
nidm_obj.add_attributes(
560-
{predicate: QualifiedName(Constants.NIDM[obj_term])}
590+
{
591+
predicate: pm.QualifiedName(
592+
namespace=pm.Namespace(
593+
uri=Constants.NIDM, prefix="prov"
594+
),
595+
localpart=str(obj_term),
596+
)
597+
}
561598
)
562599
else:
563600
found_uri = find_in_namespaces(
@@ -566,20 +603,27 @@ def add_metadata_for_subject(rdf_graph, subject_uri, namespaces, nidm_obj):
566603
# if obj_nm is not in namespaces then it must just be part of some URI in the triple
567604
# so just add it as a prov.Identifier
568605
if not found_uri:
569-
nidm_obj.add_attributes({predicate: Identifier(objects)})
606+
nidm_obj.add_attributes({predicate: URIRef(objects)})
570607
# else add as explicit prov.QualifiedName because it's easier to read
571608
else:
572609
nidm_obj.add_attributes(
573610
{predicate: pm.QualifiedName(found_uri, obj_term)}
574611
)
575612
except Exception:
576-
nidm_obj.add_attributes(
577-
{
578-
predicate: pm.QualifiedName(
579-
namespace=Namespace(str(objects)), localpart=""
580-
)
581-
}
613+
# here we likely have a uri without a localpart so we'll search and see if we have a namespace for
614+
# it.
615+
found_uri = find_in_namespaces(
616+
search_uri=URIRef(str(objects)), namespaces=namespaces
582617
)
618+
619+
# if objects is not in namespaces just add as a generic url
620+
if not found_uri:
621+
nidm_obj.add_attributes({predicate: URIRef(objects)})
622+
# else add as explicit prov.QualifiedName because it's easier to read
623+
else:
624+
nidm_obj.add_attributes(
625+
{predicate: pm.QualifiedName(found_uri, "")}
626+
)
583627
else:
584628
# check if this is a qname and if so expand it
585629
# added to handle when a value is a qname. this should expand it....

0 commit comments

Comments
 (0)