Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 185 additions & 0 deletions examples/generate_rdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import os
from uuid import UUID, uuid5

import osw.model.entity as model
from osw.core import OSW
from osw.express import OswExpress
from osw.utils.wiki import get_full_title
from osw.wtsite import WtSite

# Create/update the password file under examples/accounts.pwd.yaml
pwd_file_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "accounts.pwd.yaml"
)
# pwd_file_path = "./accounts.pwd.yaml"
osw_obj = OswExpress(
domain="wiki-dev.open-semantic-lab.org", cred_filepath=pwd_file_path
)

# load dependencies
DEPENDENCIES = {
"Tool": "Category:OSWe427aafafbac4262955b9f690a83405d",
}
osw_obj.install_dependencies(DEPENDENCIES, mode="append", policy="if-missing")

# generate a UUID namespace
EXAMPLE_UUID_NAMESPACE = UUID("bd65611d-8669-4903-8a14-af88203acc38")

sensor1 = model.Device(
uuid=uuid5(EXAMPLE_UUID_NAMESPACE, "Sensor1"),
name="Sensor1",
label=[model.Label(text="Sensor 1")],
description=[model.Description(text="A sensor")],
)

sensor2 = model.Device(
uuid=uuid5(EXAMPLE_UUID_NAMESPACE, "Sensor2"),
name="Sensor2",
label=[model.Label(text="Sensor 2")],
description=[model.Description(text="Another sensor")],
)

example_machine = model.Device(
uuid=uuid5(EXAMPLE_UUID_NAMESPACE, "ExampleMachine"),
name="ExampleMachine",
label=[model.Label(text="Example Machine")],
# note: components are modelled as statements to define
# the relationship between the machine and the components
components=[
model.Component(
# note: uuid format needs to be fixed in schema
uuid=str(uuid5(EXAMPLE_UUID_NAMESPACE, "sensor_at_position_1")),
component_id="sensor_at_position_1",
label=[model.Label(text="Sensor at position 1")],
component_instance=get_full_title(sensor1),
),
model.Component(
uuid=str(uuid5(EXAMPLE_UUID_NAMESPACE, "sensor_at_position_2")),
component_id="sensor_at_position_2",
label=[model.Label(text="Sensor at position 2")],
component_instance=get_full_title(sensor2),
),
],
)

result = osw_obj.export_jsonld(
params=OSW.ExportJsonLdParams(
entities=[example_machine, sensor1, sensor2],
mode=OSW.JsonLdMode.expand,
build_rdf_graph=True,
context_loader_config=WtSite.JsonLdContextLoaderParams(
prefer_external_vocal=False
),
)
)
graph = result.graph
# print all triples in the graph
qres = graph.query(
"""
SELECT ?s ?p ?o
WHERE {
?s ?p ?o .
}
"""
)

print("\nAll triples in the graph:")
for row in qres:
print(row.s, row.p, row.o)

# query all components of example_machine
qres = graph.query(
"""
SELECT ?component ?clabel
WHERE {
?s Property:HasStatement ?statement .
?statement Property:HasProperty Property:HasPart .
?statement Property:HasObject ?component .
?component Property:HasLabel ?clabel .
}
"""
)
print("\nComponents of example_machine:")
for row in qres:
print(row.clabel)

print("\n\nDefine a custom context:")

result = osw_obj.export_jsonld(
params=OSW.ExportJsonLdParams(
entities=[example_machine, sensor1, sensor2],
mode=OSW.JsonLdMode.expand,
build_rdf_graph=True,
context={
"ex": "http://example.org/",
"components": "@nest",
"component_instance": {"@id": "ex:hasPart", "@type": "@id"},
"name": {"@id": "ex:name"},
},
)
)
graph = result.graph
# print all triples in the graph
qres = graph.query(
"""
SELECT ?s ?p ?o
WHERE {
?s ?p ?o .
}
"""
)
print("\nAll triples in the graph:")
for row in qres:
print(row.s, row.p, row.o)

# query all components of example_machine
qres = graph.query(
"""
SELECT ?clabel
WHERE {
?s <http://example.org/hasPart> ?component .
?component <http://example.org/name> ?clabel
}
"""
)
print("\nComponents of example_machine:")
for row in qres:
print(row.clabel)


print(
"\n\ncreate a custom class with an additional property mapped in a context extension"
)


class MyCustomDeviceClass(model.Device):
my_property: str


my_custom_device = example_machine.cast(MyCustomDeviceClass, my_property="test")

result = osw_obj.export_jsonld(
params=OSW.ExportJsonLdParams(
entities=[my_custom_device, sensor1, sensor2],
mode=OSW.JsonLdMode.expand,
build_rdf_graph=True,
context_loader_config=WtSite.JsonLdContextLoaderParams(
prefer_external_vocal=False
),
additional_context={"my_property": "http://example.org/my_property"},
)
)

graph = result.graph
# print all triples in the graph
qres = graph.query(
"""
SELECT ?s ?p ?o
WHERE {
?s ?p ?o .
}
"""
)
print("\nAll triples in the graph:")
for row in qres:
print(row.s, row.p, row.o)
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ testing =
sqlalchemy
psycopg2-binary # see: https://www.psycopg.org/docs/install.html#psycopg-vs-psycopg-binary
mwparserfromhell
jsondiff

[options.entry_points]
# Add here console scripts like:
Expand Down
125 changes: 124 additions & 1 deletion src/osw/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
import sys
from copy import deepcopy
from enum import Enum
from typing import Dict, List, Optional, Type, Union
from typing import Any, Dict, List, Optional, Type, Union
from uuid import UUID
from warnings import warn

import rdflib
from jsonpath_ng.ext import parse
from pydantic.v1 import BaseModel, PrivateAttr, create_model, validator
from pyld import jsonld

import osw.model.entity as model
from osw.model.static import OswBaseModel
Expand All @@ -23,6 +25,7 @@
)
from osw.utils.util import parallelize
from osw.utils.wiki import (
get_full_title,
get_namespace,
get_title,
get_uuid,
Expand Down Expand Up @@ -1300,6 +1303,126 @@ def query_instances(
full_page_titles = self.site.semantic_search(search_param)
return full_page_titles

class JsonLdMode(str, Enum):
"""enum for jsonld processing mode"""

expand = "expand"
flatten = "flatten"
compact = "compact"
frame = "frame"

class ExportJsonLdParams(OswBaseModel):
context_loader_config: Optional[WtSite.JsonLdContextLoaderParams] = None
"""The configuration for the JSON-LD context loader."""
entities: Union[OswBaseModel, List[OswBaseModel]]
"""The entities to convert to JSON-LD. Can be a single entity or a list of
entities."""
resolve_context: Optional[bool] = True
"""If True, remote context URLs are resolved."""
mode: Optional[OSW.JsonLdMode] = "expand"
"""The JSON-LD processing mode to apply if resolve_context is True."""
context: Optional[Union[str, list, Dict[str, Any]]] = None
"""The JSON-LD context to apply. Replaces any existing context."""
additional_context: Optional[Union[str, list, Dict[str, Any]]] = None
"""The JSON-LD context to apply on top of the existing context."""
frame: Optional[Dict[str, Any]] = None
"""The JSON-LD frame to use for framed mode. If not set, the existing context is used"""
build_rdf_graph: Optional[bool] = False
"""If True, the output is a graph."""
debug: Optional[bool] = False

def __init__(self, **data):
super().__init__(**data)
if not isinstance(self.entities, list):
self.entities = [self.entities]

class ExportJsonLdResult(OswBaseModel):
documents: List[Union[Dict[str, Any]]]
"""A single JSON-LD document per entity"""
graph_document: Dict[str, Any] = None
"""A single JSON-LD document with a @graph element containing all entities"""
graph: rdflib.Graph = None
"""RDF graph containing all entities. Build only if build_rdf_graph is True"""

class Config:
arbitrary_types_allowed = True

def export_jsonld(self, params: ExportJsonLdParams) -> ExportJsonLdResult:
"""Exports the given entity/entities as JSON-LD."""

if params.resolve_context:
jsonld.set_document_loader(
self.site.get_jsonld_context_loader(params.context_loader_config)
)

documents = []
graph_document = {"@graph": []}
graph = None
if params.build_rdf_graph:
graph = rdflib.Graph()
prefixes = self.site.get_prefix_dict()
for prefix in prefixes:
graph.bind(prefix, prefixes[prefix])

for e in params.entities:
data = json.loads(e.json(exclude_none=True, indent=4, ensure_ascii=False))

data["@context"] = []
if params.context is None:
for t in e.type:
data["@context"].append("/wiki/" + t)
if params.context is not None:
data["@context"].append(params.context)
else:
data["@context"] = {
**self.site.get_jsonld_context_prefixes(),
**params.context,
}
if params.additional_context is not None:
if data["@context"] is None:
data["@context"] = []
elif not isinstance(data["@context"], list):
data["@context"] = [data["@context"]]
data["@context"].append(params.additional_context)

data["@id"] = get_full_title(e)

if params.resolve_context:
graph_document["@graph"].append(jsonld.expand(data))
if params.mode == "expand":
data = jsonld.expand(data)
if isinstance(data, list):
data = data[0]
elif params.mode == "flatten":
data = jsonld.flatten(data)
elif params.mode == "compact":
# data = jsonld.expand(data)
# if isinstance(data, list): data = data[0]
data = jsonld.compact(
data,
data["@context"] if params.context is None else params.context,
)
elif params.mode == "frame":
data = jsonld.frame(
data,
(
{"@context": data["@context"]}
if params.frame is None
else params.frame
),
)

if params.build_rdf_graph:
graph.parse(data=json.dumps(data), format="json-ld")

documents.append(data)

result = OSW.ExportJsonLdResult(
documents=documents, graph_document=graph_document, graph=graph
)
return result


OSW._ApplyOverwriteParam.update_forward_refs()
OSW.StoreEntityParam.update_forward_refs()
OSW.ExportJsonLdParams.update_forward_refs()
32 changes: 1 addition & 31 deletions src/osw/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import uuid
from typing import Dict, List, Literal, Optional, Type

import pyld
from pydantic.v1 import PrivateAttr
from pyld import jsonld
from rdflib import Graph
Expand Down Expand Up @@ -214,36 +213,7 @@ def import_ontology(self, config: ImportConfig):
"""

# overwrite the default document loader to load relative context from the wiki
def myloader(*args, **kwargs):
requests_loader = pyld.documentloader.requests.requests_document_loader(
*args, **kwargs
)

def loader(url, options=None):
if options is None:
options = {}
if "/wiki/" in url:
title = url.replace("/wiki/", "").split("?")[0]
page = self.osw.site.get_page(
WtSite.GetPageParam(titles=[title])
).pages[0]
schema = page.get_slot_content("jsonschema")
if isinstance(schema, str):
schema = json.loads(schema)
doc = {
"contentType": "application/json",
"contextUrl": None,
"documentUrl": url,
"document": schema,
}
return doc

else:
return requests_loader(url, options)

return loader

jsonld.set_document_loader(myloader())
jsonld.set_document_loader(self.osw.site.get_jsonld_context_loader())

self.import_config = config

Expand Down
Loading
Loading