Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,50 @@ def test_prefix_iri():
prefix_iri("xxx", prefixes, require_prefixed=True)


def test_substitute_query():
"""Test substitute_query()."""
from tripper import FOAF
from tripper.utils import substitute_query

assert (
substitute_query(
query="SELECT ?s WHERE { ?s $name $obj }",
iris={"name": "foaf:name"},
literals={"obj": "John Dow"},
prefixes={"foaf": str(FOAF)},
)
== f'SELECT ?s WHERE {{ ?s <{FOAF.name}> "John Dow" }}'
)

assert (
substitute_query(
query="SELECT ?s WHERE { ?s $name $obj }",
iris={
"name": (
'http://xmlns.com/foaf/0.1/name> "x" . '
"<something nasty> <"
)
},
literals={"obj": 'John Dow" . <something nasty> "'},
)
) == (
"SELECT ?s WHERE { ?s <http://xmlns.com/foaf/0.1/name"
"%3E%20%22x%22%20.%20%3Csomething%20nasty%3E%20%3C>"
r' "John Dow\" . <something nasty> \"" }'
)

assert substitute_query("$x $y", iris={"x": "X"}) == "<X> $y"
assert substitute_query("$x", iris={"x": "X"}, iriquote="[]") == "[X]"
assert substitute_query("$x", iris={"x": "X"}, iriquote=" ") == " X "
assert substitute_query("$x", iris={"x": "X"}, iriquote=None) == "X"

with pytest.raises(ValueError):
substitute_query("$x", iris={"x": "X"}, iriquote="xxx")

with pytest.warns(UserWarning):
substitute_query("$x", iris={"x": "X"}, iriquote="--")


def test_get_entry_points():
"""Test get_entry_points()"""
from tripper.utils import get_entry_points
Expand Down
73 changes: 67 additions & 6 deletions tripper/triplestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
infer_iri,
prefix_iri,
split_iri,
substitute_query,
)

if TYPE_CHECKING: # pragma: no cover
Expand Down Expand Up @@ -412,11 +413,32 @@ def serialize(
ts.bind(prefix, iri)
return ts.serialize(destination=destination, format=format, **kwargs)

def query(self, query_object, **kwargs) -> "Any":
def query(
self,
query: str,
iris: "Optional[dict]" = None,
literals: "Optional[dict]" = None,
**kwargs,
) -> "Any":
"""SPARQL query.

The `query` argument may contain variables for IRIs and literals,
to be substituted using the `iris` and `literals` arguments. These
variables are prefixed `$`. This makes them easy to distinguish from
query variables, that are typically prefixed with `?`.

The query substitutions may be useful when the query is constructed
from user input, since they are properly escaped and will be inserted
in the query as a single token. This may prevent sparql injection
attacks.

Arguments:
query_object: String with the SPARQL query.
query: String with the SPARQL query.
iris: Dict used for query substitutions that maps IRI variables
to IRIs. The IRIs may be provided as fully expanded or
prefixed with a prefix registered in the triplestore namespace.
literals: Dict used for query substitutions that maps literal
variables to literals.
kwargs: Keyword arguments passed to the backend query() method.

Returns:
Expand All @@ -432,24 +454,63 @@ def query(self, query_object, **kwargs) -> "Any":

Not all backends may support all types of queries.

Examples:
Query for everyone with the name "John Dow":

>>> from tripper import FOAF, Literal, Triplestore
>>> ts = Triplestore(backend="rdflib")
>>> ts.bind("foaf", FOAF)
Namespace('http://xmlns.com/foaf/0.1/')

>>> ts.add_triples([
... (":john", FOAF.name, Literal("John Dow")),
... (":jack", FOAF.name, Literal("Jack Hudson")),
... ])
>>> ts.query(
... "SELECT ?s WHERE { ?s $name $obj .}",
... iris={"name": "foaf:name"},
... literals={"obj": "John Dow"},
... )
[(':john',)]

"""
self._check_method("query")
return self.backend.query(query_object=query_object, **kwargs)
new_query = substitute_query(
query, iris=iris, literals=literals, prefixes=self.namespaces
)
return self.backend.query(new_query, **kwargs)

def update(self, update_object, **kwargs) -> None:
def update(
self,
query: str,
iris: "Optional[dict]" = None,
literals: "Optional[dict]" = None,
**kwargs,
) -> None:
"""Update triplestore with SPARQL.

Arguments:
update_object: String with the SPARQL query.
query: String with the SPARQL query.
iris: Dict used for query substitutions that maps IRI variables
to IRIs. The IRIs may be provided as fully expanded or
prefixed with a prefix registered in the triplestore namespace.
literals: Dict used for query substitutions that maps literal
variables to literals.
kwargs: Keyword arguments passed to the backend update() method.

Note:
See `query()` for how to the query substitution arguments `iris`
and `literals`.

This method is intended for INSERT and DELETE queries. Use
the query() method for SELECT, ASK, CONSTRUCT and DESCRIBE queries.

"""
self._check_method("update")
return self.backend.update(update_object=update_object, **kwargs)
new_query = substitute_query(
query, iris=iris, literals=literals, prefixes=self.namespaces
)
return self.backend.update(new_query, **kwargs)

@overload
def bind(
Expand Down
68 changes: 68 additions & 0 deletions tripper/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import sys
import tempfile
import urllib
import warnings
from contextlib import contextmanager
from pathlib import Path
from typing import TYPE_CHECKING
Expand Down Expand Up @@ -64,7 +65,9 @@
"extend_namespace",
"expand_iri",
"prefix_iri",
"substitute_query",
"get_entry_points",
"check_service_availability",
)

MATCH_PREFIXED_IRI = re.compile(
Expand Down Expand Up @@ -732,6 +735,71 @@ def prefix_iri(
return iri


def substitute_query(
query: str,
iris: "Optional[dict]" = None,
literals: "Optional[dict]" = None,
prefixes: "Optional[dict]" = None,
iriquote: str = "<>",
) -> str:
"""Substitute IRI and literal variables in a SPARQL query.

Arguments:
query: String with the SPARQL query.
iris: Dict used for query substitutions that maps IRI variables
to IRIs. The IRIs may be provided as fully expanded or
prefixed with the prefix defined in `prefixes`.
literals: Dict used for query substitutions that maps literal
variables to literals. For common datatypes, like strings
and numbers, the values can just be normal Python objects.
For special cases or more control, provide the values as
instances of `tripper.Literal`.
prefixes: Dict mapping prefixes to namespace URLs.
iriquote: Quote characters to use for IRIs. Should be a string of
length 2, with the start and end quote.

Notes:
The `query` argument may contain variables for IRIs and literals,
to be substituted using the `iris` and `literals` arguments. These
variables are prefixed `$`. This makes them easy to distinguish from
query variables, that are typically prefixed with `?`.

The query substitutions may be useful when the query is constructed
from user input, since they are properly escaped and will be inserted
in the query as a single token. This may prevent sparql injection
attacks.
"""
safe = "-._~:/?#@+&;=" # special IRI characters that are not escaped
mapping = {}

if iriquote:
if len(iriquote) == 1:
iriquote = iriquote[0] * 2
elif len(iriquote) > 2:
raise ValueError(
f"`iriquote` cannot be more than 2 characters: '{iriquote}'"
)
if iriquote[1].isalnum() or iriquote[1] in safe:
warnings.warn(
f"End quote '{iriquote[1]}' is alphanumeric or in '{safe}'"
)

if iris:
if prefixes is None:
prefixes = {}
for k, v in iris.items():
expanded = expand_iri(v, prefixes=prefixes)
quoted = urllib.parse.quote(expanded, safe=safe)
q1, q2 = iriquote if iriquote else ("", "") # type: ignore[misc]
mapping[k] = f"{q1}{quoted}{q2}"

if literals:
for k, v in literals.items():
mapping[k] = Literal(v).n3()

return string.Template(query).safe_substitute(mapping)


def get_entry_points(group: str):
"""Consistent interface to entry points for the given group.

Expand Down