EMMC-ASBL · francescalb · Apr 3, 2025 · Apr 3, 2025 · Apr 3, 2025 · Apr 3, 2025
diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml
@@ -29,22 +29,7 @@ jobs:
         --rcfile=pyproject.toml --disable=import-outside-toplevel,redefined-outer-name tests
 
       # safety-specific settings
-      run_safety: true
-      # 48547: RDFLib vulnerability: https://pyup.io/vulnerabilities/PVE-2022-48547/48547/
-      # 44715-44717: NumPy vulnerabilities:
-      #  https://pyup.io/vulnerabilities/CVE-2021-41495/44715/
-      #  https://pyup.io/vulnerabilities/CVE-2021-41496/44716/
-      #  https://pyup.io/vulnerabilities/CVE-2021-34141/44717/
-      # 70612: Jinja2 vulnerability. Only used as subdependency for mkdocs++ in tripper.
-      #  https://data.safetycli.com/v/70612/97c/
-      #  https://data.safetycli.com/v/72715/97c/   # update to mkdocs>=9.5.32
-      safety_options: |
-        --ignore=48547
-        --ignore=44715
-        --ignore=44716
-        --ignore=44717
-        --ignore=70612
-        --ignore=72715
+      run_safety: false
 
       ## Build package
       run_build_package: true
@@ -63,6 +48,16 @@ jobs:
       update_docs_landing_page: true
       package_dirs: tripper
 
+  safety:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@main
+      - name: Run Safety CLI to check for vulnerabilities
+        uses: pyupio/safety-action@v1
+        with:
+          api-key: ${{ secrets.SAFETY_API_KEY }}
+          args: --detailed-output # To always see detailed output from this action
+
   pytest:
     runs-on: ubuntu-latest
 

diff --git a/docs/datadoc/keywords.md b/docs/datadoc/keywords.md
@@ -1,6 +1,6 @@
 <!-- Do not edit! This file is generated with Tripper. Edit the keywords.yaml file instead. -->
 
-# Keywords for default
+# Keywords for domain: default
 The tables below lists the keywords the domain default.
 
 The meaning of the columns are as follows:
@@ -169,6 +169,7 @@ A collection of operations that provides access to one or more datasets or data
 | [endpointURL]         | [rdfs:Literal]<br>(xsd:anyURI) | mandatory   | The root location or primary endpoint of the service (an IRI).                                          |            |
 | [endpointDescription] | [rdfs:Resource]                | recommended | A description of the services available via the end-points, including their operations, parameters etc. |            |
 | [servesDataset]       | [dcat:Dataset]                 | recommended | This property refers to a collection of data that this data service can distribute.                     |            |
+| [parser]              | [oteio:Parser]                 |             | A parser that can parse the distribution.                                                               |            |
 
 
 ## Properties on [DatasetSeries]
@@ -257,6 +258,10 @@ A standard or other specification to which a resource conforms.
 A media type, e.g. the format of a computer file.
 
 
+## Properties on [GenericResource]
+A generic resource.
+
+
 
 
 [Resource]: http://www.w3.org/ns/dcat#Resource
@@ -431,6 +436,8 @@ A media type, e.g. the format of a computer file.
 [rdfs:Literal]: http://www.w3.org/2000/01/rdf-schema#Literal
 [servesDataset]: http://www.w3.org/ns/dcat#servesDataset
 [dcat:Dataset]: http://www.w3.org/ns/dcat#Dataset
+[parser]: https://w3id.org/emmo/domain/oteio#parser
+[oteio:Parser]: https://w3id.org/emmo/domain/oteio#Parser
 [dcat:Dataset]: http://www.w3.org/ns/dcat#Dataset
 [DatasetSeries]: http://www.w3.org/ns/dcat#DatasetSeries
 [Geometry]: http://www.w3.org/ns/locn#Geometry
@@ -506,3 +513,4 @@ A media type, e.g. the format of a computer file.
 [LegalResource]: http://data.europa.eu/eli/ontology#LegalResource
 [Standard]: http://purl.org/dc/terms/Standard
 [MediaType]: http://purl.org/dc/terms/MediaType
+[GenericResource]: http://www.w3.org/2000/01/rdf-schema#Resource
diff --git a/docs/datadoc/prefixes.md b/docs/datadoc/prefixes.md
@@ -25,6 +25,7 @@ See [User-defined prefixes] for how to extend this list with additional namespac
 | emmo    | https://w3id.org/emmo#                                            |
 | oteio   | https://w3id.org/emmo/domain/oteio#                               |
 | chameo  | https://w3id.org/emmo/domain/characterisation-methodology/chameo# |
+| iana    | https://www.iana.org/assignments/media-types/                     |
 
 
 [default JSON-LD context]: https://raw.githubusercontent.com/EMMC-ASBL/tripper/refs/heads/master/tripper/context/0.2/context.json

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -100,8 +100,8 @@ nav:
   - Units and quantities: units/units.md
   - Session: session.md
   - ... | api_reference/**
-  - Known issues: known-issues.md
   - For developers: developers.md
+  - Known issues: known-issues.md
   - Changelog: CHANGELOG.md
   - License: LICENSE.md
 

diff --git a/tests/datadoc/test_datadoc_cli.py b/tests/datadoc/test_datadoc_cli.py
@@ -18,6 +18,7 @@ def test_delete():
     iri = "semdata:SEM_cement_batch2/77600-23-001/77600-23-001_5kV_400x_m001"
 
     cmd = [
+        "--debug",
         "--triplestore=FusekiTest",
         f"--config={indir/'session.yaml'}",
         "delete",
@@ -27,6 +28,7 @@ def test_delete():
 
     # Ensure that KB doesn't contain the removed dataset
     findcmd = [
+        "--debug",
         "--triplestore=FusekiTest",
         f"--config={indir/'session.yaml'}",
         "find",
@@ -44,6 +46,7 @@ def test_delete_regex():
     iri_regexp = "https://he-matchmaker.eu/data/sem/.*"
 
     cmd = [
+        "--debug",
         "--triplestore=FusekiTest",
         f"--config={indir/'session.yaml'}",
         "delete",
@@ -53,6 +56,7 @@ def test_delete_regex():
 
     # Ensure that KB doesn't contain the removed dataset
     findcmd = [
+        "--debug",
         "--triplestore=FusekiTest",
         f"--config={indir/'session.yaml'}",
         "find",
@@ -68,6 +72,7 @@ def test_add():
     from dataset_paths import indir, outdir  # pylint: disable=import-error
 
     cmd = [
+        "--debug",
         "--triplestore=FusekiTest",
         f"--config={indir/'session.yaml'}",
         "add",
@@ -103,6 +108,7 @@ def test_find():
     from dataset_paths import indir  # pylint: disable=import-error
 
     cmd = [
+        "--debug",
         "--triplestore=FusekiTest",
         f"--config={indir/'session.yaml'}",
         "find",
@@ -129,6 +135,7 @@ def test_find_json():
     from dataset_paths import indir  # pylint: disable=import-error
 
     cmd = [
+        "--debug",
         "--triplestore=FusekiTest",
         f"--config={indir/'session.yaml'}",
         "find",
@@ -155,6 +162,7 @@ def test_fetch():
     )
 
     cmd = [
+        "--debug",
         "--triplestore=FusekiTest",
         f"--config={indir/'session.yaml'}",
         "fetch",

diff --git a/tests/datadoc/test_tabledoc.py b/tests/datadoc/test_tabledoc.py
@@ -218,3 +218,23 @@ def test_csv_duplicated_columns():
         "distribution.downloadURL",
     ]
     td2.write_csv(outdir / "tem.csv", prefixes=prefixes)
+
+
+def test_csvsniff():
+    """Test csvsniff()."""
+    pytest.importorskip("yaml")
+    from tripper.datadoc.tabledoc import csvsniff
+
+    lines = [
+        "A,B,C,D",
+        "a,'b,bb','c1;c2;c3;c4',d",
+    ]
+    dialect = csvsniff("\r\n".join(lines))
+    assert dialect.delimiter == ","
+    assert dialect.lineterminator == "\r\n"
+    assert dialect.quotechar == "'"
+
+    dialect = csvsniff("\n".join(lines))
+    assert dialect.delimiter == ","
+    assert dialect.lineterminator == "\n"
+    assert dialect.quotechar == "'"
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -37,15 +37,46 @@ def test_recursive_update():
     d = {"a": []}
     recursive_update(d, other)
     assert d == other
+    assert isinstance(d["a"][1], dict)
+
+    d = {"a": []}
+    recursive_update(d, other, cls=AttrDict)
+    assert d == other
+    assert isinstance(d["a"][1], AttrDict)
 
     d = AttrDict()
     recursive_update(d, other)
     assert d == other
+    assert isinstance(d.a[1], AttrDict)
 
     d = {"d": 1}
     recursive_update(d, other)
     assert d == {"a": [1, {"b": 2, "c": [3, 4]}, 5], "d": [1, 6]}
 
+    d = {"d": 1}
+    recursive_update(d, other, append=False)
+    assert d == {"a": [1, {"b": 2, "c": [3, 4]}, 5], "d": 6}
+
+    d = {"a": {"b": 2}}
+    recursive_update(d, {"a": [1, {"b": 2}]})
+    assert d == {"a": [1, {"b": 2}]}
+
+    d = {"a": {"b": 2}}
+    recursive_update(d, {"a": [1, {"b": 2}]}, append=False)
+    assert d == {"a": [1, {"b": 2}]}
+
+    d = {"a": [1, {"b": 2}]}
+    recursive_update(d, {"a": [1, {"b": 2}]})
+    assert d == {"a": [1, {"b": 2}]}
+
+    d = {"a": {"b": 2}}
+    recursive_update(d, {"a": [1, {"b": 3}]})
+    assert d == {"a": [1, {"b": [2, 3]}]}
+
+    d = {"a": {"b": 2}}
+    recursive_update(d, {"a": [1, {"b": 3}]}, append=False)
+    assert d == {"a": [1, {"b": 3}]}
+
 
 def test_openfile():
     """Test openfile()."""

diff --git a/tripper/context/0.3/context.json b/tripper/context/0.3/context.json
@@ -22,6 +22,7 @@
     "emmo": "https://w3id.org/emmo#",
     "oteio": "https://w3id.org/emmo/domain/oteio#",
     "chameo": "https://w3id.org/emmo/domain/characterisation-methodology/chameo#",
+    "iana": "https://www.iana.org/assignments/media-types/",
     "accessRights": {
       "@id": "dcterms:accessRights",
       "@type": "@id"
@@ -353,6 +354,7 @@
     "RightsStatement": "dcterms:RightsStatement",
     "LegalResource": "eli:LegalResource",
     "Standard": "dcterms:Standard",
-    "MediaType": "dcterms:MediaType"
+    "MediaType": "dcterms:MediaType",
+    "GenericResource": "rdfs:Resource"
   }
 }
diff --git a/tripper/context/0.3/keywords.yaml b/tripper/context/0.3/keywords.yaml
@@ -25,6 +25,7 @@ prefixes:
   emmo: "https://w3id.org/emmo#"
   oteio: "https://w3id.org/emmo/domain/oteio#"
   chameo: "https://w3id.org/emmo/domain/characterisation-methodology/chameo#"
+  iana: "https://www.iana.org/assignments/media-types/"
 
 
 resources:
@@ -577,6 +578,11 @@ resources:
         conformance: recommended
         description: This property refers to a collection of data that this data service can distribute.
 
+      parser:
+        iri: oteio:parser
+        range: oteio:Parser
+        description: A parser that can parse the distribution.
+
 
   DatasetSeries:
     iri: dcat:DatasetSeries
@@ -742,3 +748,7 @@ resources:
     iri: dcterms:MediaType
     description: A media type, e.g. the format of a computer file.
     usageNote: Media type instances follow the [IANA](https://www.w3.org/TR/vocab-dcat-3/#bib-iana-media-types) vocabulary using the <http://www.iana.org/assignments/media-types/> namespace. For example, the IRI of the media type `text/turtle` is <http://www.iana.org/assignments/media-types/text/turtle>.
+
+  GenericResource:
+    iri: rdfs:Resource
+    description: A generic resource.
diff --git a/tripper/datadoc/clitool.py b/tripper/datadoc/clitool.py
@@ -31,8 +31,8 @@
         save_datadoc(ts, infile)
     elif fmt in ("csv",):
         kw = {}
-        if args.csv_options:
-            for token in args.csv_options:
+        if args.csv_option:
+            for token in args.csv_option:
                 option, value = token.split("=", 1)
                 kw[option] = value
         td = TableDoc.parse_csv(
@@ -131,6 +131,8 @@
 
 def maincommand(argv=None):
     """Main command."""
+    # pylint: disable=too-many-statements
+
     parser = argparse.ArgumentParser(
         description=(
             "Tool for data documentation.\n\n"
@@ -164,13 +166,13 @@
         ),
     )
     parser_add.add_argument(
-        "--csv-options",
-        action="extend",
-        nargs="+",
+        "--csv-option",
+        action="append",
         metavar="OPTION=VALUE",
         help=(
             "Options describing the CSV dialect for --input-format=csv. "
-            "Common options are 'dialect', 'delimiter' and 'quotechar'."
+            "Common options are 'dialect', 'delimiter' and 'quotechar'. "
+            "This option may be provided multiple times."
         ),
     )
     parser_add.add_argument(
@@ -291,6 +293,9 @@
         "-c",
         help="Session configuration file.",
     )
+    parser.add_argument(
+        "--debug", action="store_true", help="Show Python traceback on error."
+    )
     parser.add_argument(
         "--triplestore",
         "-t",
@@ -367,17 +372,19 @@
             ts.bind(prefix, ns)
 
     # Call subcommand handler
-    return args.func(ts, args)
+    try:
+        return args.func(ts, args)
+    except Exception as exc:  # pylint: disable=broad-exception-caught
+        if args.debug:
+            raise
+        print(f"{exc.__class__.__name__}: {exc}")
+        return exc
 
 
 def main(argv=None):
     """Main function."""
-    try:
-        maincommand(argv)
-    except Exception as exc:  # pylint: disable=broad-exception-caught
-        print(exc)
-        return 1
-    return 0
+    retval = maincommand(argv)
+    return 1 if isinstance(retval, Exception) else 0
 
 
 if __name__ == "__main__":