1
1
from binascii import crc32
2
2
import getpass
3
+ import io
3
4
import json
4
5
import logging
5
6
import os
14
15
import prov .model as pm
15
16
from prov .model import Identifier
16
17
from prov .model import Namespace as provNamespace
17
- from prov .model import QualifiedName
18
+ from prov .model import ProvDocument , QualifiedName
18
19
from rapidfuzz import fuzz
19
20
from rdflib import RDF , RDFS , Graph , Literal , Namespace , URIRef , util
20
21
from rdflib .namespace import XSD , split_uri
26
27
from .AcquisitionObject import AcquisitionObject
27
28
from .AssessmentAcquisition import AssessmentAcquisition
28
29
from .AssessmentObject import AssessmentObject
29
- from .Core import getUUID
30
+ from .Core import find_in_namespaces , getUUID
30
31
from .DataElement import DataElement
31
32
from .Derivative import Derivative
32
33
from .DerivativeObject import DerivativeObject
@@ -390,41 +391,6 @@ def read_nidm(nidmDoc):
390
391
}
391
392
"""
392
393
393
- # add all nidm:DataElements in graph
394
- qres = rdf_graph_parse .query (query )
395
- for row in qres :
396
- print (row )
397
- # instantiate a data element class assigning it the existing uuid
398
- de = DataElement (project = project , uuid = row ["uuid" ], add_default_type = False )
399
- # get the rest of the attributes for this data element and store
400
- add_metadata_for_subject (
401
- rdf_graph_parse , row ["uuid" ], project .graph .namespaces , de
402
- )
403
-
404
- # now we need to check if there are labels for data element isAbout entries, if so add them.
405
- query2 = f"""
406
-
407
- prefix nidm: <http://purl.org/nidash/nidm#>
408
- prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
409
- prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
410
- prefix prov: <http://www.w3.org/ns/prov#>
411
-
412
- select distinct ?id ?label
413
- where {{
414
- <{ row ["uuid" ]} > nidm:isAbout ?id .
415
-
416
- ?id rdf:type prov:Entity ;
417
- rdfs:label ?label .
418
- }}
419
-
420
- """
421
- # print(query2)
422
- qres2 = rdf_graph_parse .query (query2 )
423
-
424
- # add this tuple to graph
425
- for row2 in qres2 :
426
- project .graph .entity (row2 [0 ], {"rdfs:label" : row2 [1 ]})
427
-
428
394
# check for Derivatives.
429
395
# WIP: Currently FSL, Freesurfer, and ANTS tools add these derivatives as nidm:FSStatsCollection,
430
396
# nidm:FSLStatsCollection, or nidm:ANTSStatsCollection which are subclasses of nidm:Derivatives
@@ -474,6 +440,76 @@ def read_nidm(nidmDoc):
474
440
rdf_graph_parse , row ["uuid" ], project .graph .namespaces , derivobj
475
441
)
476
442
443
+ # add all nidm:DataElements in graph
444
+ qres = rdf_graph_parse .query (query )
445
+ for row in qres :
446
+ print (row )
447
+ # instantiate a data element class assigning it the existing uuid
448
+ de = DataElement (project = project , uuid = row ["uuid" ], add_default_type = False )
449
+ # get the rest of the attributes for this data element and store
450
+ add_metadata_for_subject (
451
+ rdf_graph_parse , row ["uuid" ], project .graph .namespaces , de
452
+ )
453
+
454
+ # now we need to check if there are labels for data element isAbout entries, if so add them.
455
+ query2 = f"""
456
+
457
+ prefix nidm: <http://purl.org/nidash/nidm#>
458
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
459
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
460
+ prefix prov: <http://www.w3.org/ns/prov#>
461
+
462
+ select distinct ?id ?label
463
+ where {{
464
+ <{ row ["uuid" ]} > nidm:isAbout ?id .
465
+
466
+ ?id rdf:type prov:Entity ;
467
+ rdfs:label ?label .
468
+ }}
469
+
470
+ """
471
+ # print(query2)
472
+ qres2 = rdf_graph_parse .query (query2 )
473
+
474
+ # check qres2 length and if zero then skip all this converting from prov to rdf and back to add
475
+ # data element isAbout definitions that are identified with a url and not a qualified name as required by prov
476
+ # but not rdf in general
477
+ if len (qres2 ) == 0 :
478
+ continue
479
+
480
+ # Step 1: Convert `project.graph` (ProvDocument) to an RDFLib Graph
481
+ rdf_graph = Graph ()
482
+ rdf_graph .parse (
483
+ data = project .serializeTurtle (), format = "turtle"
484
+ ) # Proper RDF export
485
+
486
+ # Step 2: Modify RDF Graph (Keeping Full URIs)
487
+ for row2 in qres2 :
488
+ entity_uri = URIRef (str (row2 [0 ])) # Preserve full URI
489
+ label = Literal (str (row2 [1 ])) # Convert label to Literal
490
+
491
+ # Add triples directly to RDF graph
492
+ rdf_graph .add ((entity_uri , RDF .type , Constants .PROV .Entity ))
493
+ rdf_graph .add ((entity_uri , RDFS .label , label ))
494
+
495
+ # Step 3: Serialize modified RDF graph to an in-memory bytes buffer
496
+ rdf_bytes = io .BytesIO ()
497
+ rdf_graph .serialize (
498
+ destination = rdf_bytes , format = "turtle"
499
+ ) # Use "turtle" correctly
500
+
501
+ # Step 4: Convert RDF bytes to a string for `prov` to read
502
+ rdf_string = rdf_bytes .getvalue ().decode ("utf-8" )
503
+
504
+ # Step 5: Create a new ProvDocument and read RDF data from the string
505
+ project .graph = ProvDocument ()
506
+ project .graph = project .graph .deserialize (
507
+ source = io .StringIO (rdf_string ), format = "rdf"
508
+ )
509
+
510
+ # Close the BytesIO buffer (good practice)
511
+ rdf_bytes .close ()
512
+
477
513
return project
478
514
479
515
@@ -489,19 +525,6 @@ def get_RDFliteral_type(rdf_literal):
489
525
return pm .Literal (rdf_literal , datatype = pm .XSD ["string" ])
490
526
491
527
492
- def find_in_namespaces (search_uri , namespaces ):
493
- """
494
- Looks through namespaces for search_uri
495
- :return: URI if found else False
496
- """
497
-
498
- for uris in namespaces :
499
- if uris .uri == search_uri :
500
- return uris
501
-
502
- return False
503
-
504
-
505
528
def add_metadata_for_subject (rdf_graph , subject_uri , namespaces , nidm_obj ):
506
529
"""
507
530
Cycles through triples for a particular subject and adds them to the nidm_obj
@@ -553,11 +576,25 @@ def add_metadata_for_subject(rdf_graph, subject_uri, namespaces, nidm_obj):
553
576
# so we check explicitly here
554
577
if obj_nm == str (Constants .PROV ):
555
578
nidm_obj .add_attributes (
556
- {predicate : QualifiedName (Constants .PROV [obj_term ])}
579
+ {
580
+ predicate : pm .QualifiedName (
581
+ namespace = pm .Namespace (
582
+ uri = Constants .PROV , prefix = "prov"
583
+ ),
584
+ localpart = str (obj_term ),
585
+ )
586
+ }
557
587
)
558
588
elif obj_nm == str (Constants .NIDM ):
559
589
nidm_obj .add_attributes (
560
- {predicate : QualifiedName (Constants .NIDM [obj_term ])}
590
+ {
591
+ predicate : pm .QualifiedName (
592
+ namespace = pm .Namespace (
593
+ uri = Constants .NIDM , prefix = "prov"
594
+ ),
595
+ localpart = str (obj_term ),
596
+ )
597
+ }
561
598
)
562
599
else :
563
600
found_uri = find_in_namespaces (
@@ -566,20 +603,27 @@ def add_metadata_for_subject(rdf_graph, subject_uri, namespaces, nidm_obj):
566
603
# if obj_nm is not in namespaces then it must just be part of some URI in the triple
567
604
# so just add it as a prov.Identifier
568
605
if not found_uri :
569
- nidm_obj .add_attributes ({predicate : Identifier (objects )})
606
+ nidm_obj .add_attributes ({predicate : URIRef (objects )})
570
607
# else add as explicit prov.QualifiedName because it's easier to read
571
608
else :
572
609
nidm_obj .add_attributes (
573
610
{predicate : pm .QualifiedName (found_uri , obj_term )}
574
611
)
575
612
except Exception :
576
- nidm_obj .add_attributes (
577
- {
578
- predicate : pm .QualifiedName (
579
- namespace = Namespace (str (objects )), localpart = ""
580
- )
581
- }
613
+ # here we likely have a uri without a localpart so we'll search and see if we have a namespace for
614
+ # it.
615
+ found_uri = find_in_namespaces (
616
+ search_uri = URIRef (str (objects )), namespaces = namespaces
582
617
)
618
+
619
+ # if objects is not in namespaces just add as a generic url
620
+ if not found_uri :
621
+ nidm_obj .add_attributes ({predicate : URIRef (objects )})
622
+ # else add as explicit prov.QualifiedName because it's easier to read
623
+ else :
624
+ nidm_obj .add_attributes (
625
+ {predicate : pm .QualifiedName (found_uri , "" )}
626
+ )
583
627
else :
584
628
# check if this is a qname and if so expand it
585
629
# added to handle when a value is a qname. this should expand it....
0 commit comments