EMMC-ASBL · jesper-friis · Mar 17, 2025 · Jan 7, 2025 · Jan 7, 2025 · Jan 7, 2025
diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml
@@ -17,6 +17,7 @@ jobs:
 
       ## pre-commit
       run_pre-commit: false
+      skip_pre-commit_hooks: "generate-context-and-doc"
 
       ## pylint & safety
       python_version_pylint_safety: "3.9"

diff --git a/.hooks/generate-context-and-doc.sh b/.hooks/generate-context-and-doc.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+# Enter repository root directory
+HERE="$(cd "$(dirname "$0")" && pwd)"
+cd "$HERE"/..
+
+# Generate JSON-LD context and keyword documentation
+python tripper/datadoc/keywords.py \
+    --context=tripper/context/0.3/context.json \
+    --keywords=docs/datadoc/keywords.md \
+    --prefixes=docs/datadoc/prefixes.md
+
+# Don't crash pre-commit in case the above fails on GitHub
+exit 0
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -60,6 +60,7 @@ repos:
       exclude: ^tests/.*$
       additional_dependencies:
         - "types-requests"
+        - "types-pyyaml"
         - "pydantic"
 
   - repo: https://github.com/SINTEF/ci-cd
@@ -72,6 +73,13 @@ repos:
 
   - repo: local
     hooks:
+    - id: generate-context-and-doc
+      name: Generate JSON-LD context and documentation
+      entry: .hooks/generate-context-and-doc.sh
+      language: script
+      pass_filenames: false
+      stages: ["pre-commit"]
+
     - id: pylint
       name: pylint
       entry: pylint
@@ -82,6 +90,7 @@ repos:
       require_serial: true
       files: ^.*$
       exclude: ^tests/.*$
+
     - id: pylint-tests
       name: pylint - tests
       entry: pylint

diff --git a/docs/api_reference/datadoc/errors.md b/docs/api_reference/datadoc/errors.md
@@ -0,0 +1,3 @@
+# errors
+
+::: tripper.datadoc.errors
diff --git a/docs/api_reference/datadoc/keywords.md b/docs/api_reference/datadoc/keywords.md
@@ -0,0 +1,3 @@
+# keywords
+
+::: tripper.datadoc.keywords
diff --git a/docs/datadoc/documenting-a-resource.md b/docs/datadoc/documenting-a-resource.md
@@ -18,7 +18,9 @@ Below is a simple example of how to document a SEM image dataset as a Python dic
 >>> dataset = {
 ...     "@id": "kb:image1",
 ...     "@type": "sem:SEMImage",
-...     "creator": "Sigurd Wenner",
+...     "creator": {
+...         "name": "Sigurd Wenner",
+...     },
 ...     "description": "Back-scattered SEM image of cement, polished with 1 µm diamond compound.",
 ...     "distribution": {
 ...         "downloadURL": "https://github.com/EMMC-ASBL/tripper/raw/refs/heads/master/tests/input/77600-23-001_5kV_400x_m001.tif",
@@ -60,7 +62,13 @@ We therefore have to define them explicitly
         "https://w3id.com/emmo/domain/sem/0.1#SEMImage"
       ],
       "@id": "http://example.com/kb/image1",
-      "creator": "Sigurd Wenner",
+      "creator": {
+        "@type": [
+          "http://xmlns.com/foaf/0.1/Agent",
+          "https://w3id.org/emmo#EMMO_2480b72b_db8d_460f_9a5f_c2912f979046"
+        ],
+        "name": "Sigurd Wenner"
+      },
       "description": "Back-scattered SEM image of cement, polished with 1 \u00b5m diamond compound.",
       "distribution": {
         "@type": "http://www.w3.org/ns/dcat#Distribution",

diff --git a/docs/datadoc/keywords.md b/docs/datadoc/keywords.md
diff --git a/docs/datadoc/prefixes.md b/docs/datadoc/prefixes.md
@@ -1,27 +1,30 @@
-Predefined prefixes
-===================
+# Predefined prefixes
 All namespace prefixes listed on this page are defined in the [default JSON-LD context].
 See [User-defined prefixes] for how to extend this list with additional namespace prefixes.
 
-* adms: http://www.w3.org/ns/adms#
-* dcat: http://www.w3.org/ns/dcat#
-* dcterms: http://purl.org/dc/terms/
-* dctype: http://purl.org/dc/dcmitype/
-* foaf: http://xmlns.com/foaf/0.1/
-* odrl: http://www.w3.org/ns/odrl/2/
-* owl: http://www.w3.org/2002/07/owl#
-* prov: http://www.w3.org/ns/prov#
-* rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
-* rdfs: http://www.w3.org/2000/01/rdf-schema#
-* schema: http://schema.org/
-* skos: http://www.w3.org/2004/02/skos/core#
-* spdx: http://spdx.org/rdf/terms#
-* vcard: http://www.w3.org/2006/vcard/ns#
-* xsd: http://www.w3.org/2001/XMLSchema#
-
-* emmo: https://w3id.org/emmo#
-* oteio: https://w3id.org/emmo/domain/oteio#
-* chameo: https://w3id.org/emmo/domain/characterisation-methodology/chameo#
+| Prefix  | Namespace                                                         |
+| ------- | ----------------------------------------------------------------- |
+| adms    | http://www.w3.org/ns/adms#                                        |
+| dcat    | http://www.w3.org/ns/dcat#                                        |
+| dcatap  | http://data.europa.eu/r5r/                                        |
+| dcterms | http://purl.org/dc/terms/                                         |
+| dctype  | http://purl.org/dc/dcmitype/                                      |
+| eli     | http://data.europa.eu/eli/ontology#                               |
+| foaf    | http://xmlns.com/foaf/0.1/                                        |
+| locn    | http://www.w3.org/ns/locn#                                        |
+| odrl    | http://www.w3.org/ns/odrl/2/                                      |
+| owl     | http://www.w3.org/2002/07/owl#                                    |
+| prov    | http://www.w3.org/ns/prov#                                        |
+| rdf     | http://www.w3.org/1999/02/22-rdf-syntax-ns#                       |
+| rdfs    | http://www.w3.org/2000/01/rdf-schema#                             |
+| schema  | http://schema.org/                                                |
+| skos    | http://www.w3.org/2004/02/skos/core#                              |
+| spdx    | http://spdx.org/rdf/terms#                                        |
+| vcard   | http://www.w3.org/2006/vcard/ns#                                  |
+| xsd     | http://www.w3.org/2001/XMLSchema#                                 |
+| emmo    | https://w3id.org/emmo#                                            |
+| oteio   | https://w3id.org/emmo/domain/oteio#                               |
+| chameo  | https://w3id.org/emmo/domain/characterisation-methodology/chameo# |
 
 
 [default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json

diff --git a/pyproject.toml b/pyproject.toml
@@ -85,6 +85,7 @@ Package = "https://pypi.org/project/tripper"
 
 [project.scripts]
 datadoc = "tripper.datadoc.clitool:main"
+keywords = "tripper.datadoc.keywords:main"
 
 [tool.isort]
 line_length = 79  # PEP8
@@ -108,6 +109,7 @@ max-locals = 20
 disable = [
     "fixme",
     "invalid-name",
+    "too-many-positional-arguments",
 ]
 good-names = [
     # Default
@@ -132,3 +134,8 @@ filterwarnings = [
 
 [tool.setuptools.package-data]
 "tripper.context" = ["*.json", "*.yaml"]
+
+
+# Note the quotes around dlite.python_storage_plugins to escape the embedded dot
+[project.entry-points."tripper.keywords"]
+"tripper/context/0.3" = "default"
diff --git a/tests/backends/datadocumentation_sample.yaml b/tests/backends/datadocumentation_sample.yaml
@@ -1,10 +1,12 @@
-datasets:
+Dataset:
 - "@id": https://onto-ns.com/datasets#our_nice_dataset
   "@type": http://domain-onto.org/FancyDataset
   title: This is a title of a completely invented dataset
   description: "This is a dataset description. I include:some strange character? to check that it does nåt crash anything."
-  creator: Tripper-team
-  contactPoint: Tripper-team
+  creator:
+    name: Tripper-team
+  contactPoint:
+    hasName: Tripper-team
   distribution.accessURL: https://onto-ns.com/datasets/our_nice_dataset
   distribution.mediaType: application/hdf5
   distribution.format: HDF5
diff --git a/tests/backends/datadocumentation_sample2.yaml b/tests/backends/datadocumentation_sample2.yaml
@@ -1,10 +1,12 @@
-datasets:
+Dataset:
 - "@id": https://onto-ns.com/datasets#our_nice_dataset2
   "@type": http://domain-onto.org/FancyDataset2
   title: This is a title of a completely invented dataset 2
   description: "This is a dataset description. I include:some strange characters to check that it does nåt crash anything."
-  creator: Tripper-team
-  contactPoint: Tripper-team
+  creator:
+    name: Tripper-team
+  contactPoint:
+    hasName: Tripper-team
   distribution.accessURL: https://onto-ns.com/datasets/our_nice_dataset2
   distribution.mediaType: application/hdf5
   distribution.format: HDF5
diff --git a/tests/backends/test_sparqlwrapper_graphdb_fuseki.py b/tests/backends/test_sparqlwrapper_graphdb_fuseki.py
@@ -82,6 +82,8 @@ def get_triplestore(tsname: str) -> "Triplestore":
     return ts
 
 
+# if True:
+#    tsname = "Fuseki"
 def populate_and_search(tsname):  # pylint: disable=too-many-statements
     """Do the test on the desried backend."""
     # Test adding triples
@@ -176,7 +178,7 @@ def populate_and_search(tsname):  # pylint: disable=too-many-statements
     save_datadoc(ts, datasetinput2)
 
     # search for datasets in triplestore
-    datasets = search_iris(ts, type="dataset")
+    datasets = search_iris(ts, type="Dataset")
 
     print("Found datasets:")
     print(datasets)
@@ -188,18 +190,18 @@ def populate_and_search(tsname):  # pylint: disable=too-many-statements
     )
 
     retreived_info = load_dict(ts, datasets[0])
-    print("Info on one dataset")
-    print(retreived_info)
-    assert retreived_info.creator == "Tripper-team"
+    # print("Info on one dataset")
+    # print(retreived_info)
+    assert retreived_info.creator.name == "Tripper-team"
     assert (
         retreived_info.title
         == "This is a title of a completely invented dataset"
     )
 
     ts.bind("dataset", "https://onto-ns.com/datasets#")
     retreived_info_2 = load_dict(ts, f"dataset:{datasets[0].split('#')[-1]}")
-    print(retreived_info_2)
-    assert retreived_info_2.creator == "Tripper-team"
+    # print(retreived_info_2)
+    assert retreived_info_2.creator.name == "Tripper-team"
     assert (
         retreived_info_2.title
         == "This is a title of a completely invented dataset"
@@ -209,7 +211,7 @@ def populate_and_search(tsname):  # pylint: disable=too-many-statements
 
     ts.remove(subject="https://onto-ns.com/datasets#our_nice_dataset2")
 
-    datasets3 = search_iris(ts, type="dataset")
+    datasets3 = search_iris(ts, type="Dataset")
 
     print("Found datasets after deletion:")
     print(datasets3)

diff --git a/tests/datadoc/dataset_paths.py b/tests/datadoc/dataset_paths.py
@@ -6,7 +6,8 @@
 
 from pathlib import Path
 
-testdir = Path(__file__).resolve().parent.parent
+testdir = Path(__file__).absolute().parent.parent.resolve()
+rootdir = testdir.parent.resolve()
 ontodir = testdir / "ontologies"
 indir = testdir / "input"
 outdir = testdir / "output"
diff --git a/tests/datadoc/test_dataaccess.py b/tests/datadoc/test_dataaccess.py
@@ -8,7 +8,6 @@
 pytest.importorskip("requests")
 
 
-# if True:
 def test_save_and_load():
     """Test save() and load()."""
     # pylint: disable=too-many-statements
@@ -40,15 +39,20 @@ def test_save_and_load():
                     "https://github.com/EMMC-ASBL/tripper/raw/refs/heads/"
                     "master/tests/input/77600-23-001_5kV_400x_m001.tif"
                 ),
-                "format": "tiff",
+                "mediaType": (
+                    "http://www.iana.org/assignments/media-types/image/tiff"
+                ),
             },
         },
-        type="dataset",
+        type="Dataset",
     )
     newdistr = load_dict(ts, SEMDATA.img1)
     assert newdistr["@type"] == [DCAT.Dataset, EMMO.Dataset]
     assert newdistr.distribution["@type"] == DCAT.Distribution
-    assert newdistr.distribution.format == "tiff"
+    assert (
+        newdistr.distribution.mediaType
+        == "http://www.iana.org/assignments/media-types/image/tiff"
+    )
 
     save_dict(
         ts,
@@ -57,7 +61,7 @@ def test_save_and_load():
             "generatorType": "application/vnd.dlite-generate",
             "configuration": {"driver": "hitachi"},
         },
-        type="generator",
+        type="Generator",
     )
 
     # Test load dataset (this downloads an actual image from github)
@@ -98,7 +102,9 @@ def test_save_and_load():
         distribution={
             "@id": SEMDATA.newdistr2,
             "downloadURL": f"file:{newfile2}",
-            "mediaType": "image/png",
+            "mediaType": (
+                "http://www.iana.org/assignments/media-types/image/png"
+            ),
             "generator": GEN.sem_hitachi,
             "parser": PARSER.sem_hitachi,
         },
@@ -108,9 +114,12 @@ def test_save_and_load():
     newimage2 = load_dict(ts, SEMDATA.newimage2)
     assert newimage2["@id"] == SEMDATA.newimage2
     assert newimage2["@type"] == [DCAT.Dataset, EMMO.Dataset]
-    assert newimage2.distribution["@id"] == SEMDATA.newdistr2
-    assert newimage2.distribution["@type"] == DCAT.Distribution
-    assert newimage2.distribution.downloadURL == f"file:{newfile2}"
+    assert newimage2.distribution == SEMDATA.newdistr2
+
+    newdist2 = load_dict(ts, newimage2.distribution)
+    assert newdist2["@id"] == newimage2.distribution
+    assert newdist2["@type"] == DCAT.Distribution
+    assert newdist2.downloadURL == f"file:{newfile2}"
 
     # Test save anonymous dataset with existing distribution
     newfile2.unlink(missing_ok=True)