Skip to content

Commit f014e98

Browse files
authored
Merge pull request #1810 from aucampia/iwana-20220410T1009-sparql_result_serdes
test: Add tests for SPARQL parsing and serialization
2 parents 4b30926 + 4464b73 commit f014e98

File tree

1 file changed

+339
-6
lines changed

1 file changed

+339
-6
lines changed

test/test_sparql/test_result.py

Lines changed: 339 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,35 @@
1+
import enum
12
import inspect
3+
import itertools
24
import logging
3-
from io import StringIO
4-
from typing import Mapping, Sequence, Type, Union
5+
import re
6+
import sys
7+
from contextlib import contextmanager
8+
from dataclasses import dataclass
9+
from io import BytesIO, StringIO
10+
from pathlib import Path
11+
from typing import (
12+
IO,
13+
BinaryIO,
14+
Dict,
15+
Iterator,
16+
Mapping,
17+
Optional,
18+
Sequence,
19+
Set,
20+
TextIO,
21+
Tuple,
22+
Type,
23+
Union,
24+
)
525

626
import pytest
27+
from _pytest.mark.structures import Mark, MarkDecorator, ParameterSet
728
from pyparsing import ParseException
829

9-
from rdflib.query import Result
10-
from rdflib.term import Identifier, Literal, Variable
30+
from rdflib.graph import Graph
31+
from rdflib.query import Result, ResultRow
32+
from rdflib.term import Identifier, Literal, Node, Variable
1133

1234
BindingsType = Sequence[Mapping[Variable, Identifier]]
1335
ParseOutcomeType = Union[BindingsType, Type[Exception]]
@@ -40,8 +62,7 @@ def test_select_result_parse(
4062
data: str, format: str, parse_outcome: ParseOutcomeType
4163
) -> None:
4264
"""
43-
Round tripping of a select query through the serializer and parser of a
44-
specific format results in an equivalent result object.
65+
Parsing serialized SPARQL result produces expected bindings.
4566
"""
4667
logging.debug("data = %s", data)
4768

@@ -51,3 +72,315 @@ def test_select_result_parse(
5172
else:
5273
parsed_result = Result.parse(StringIO(data), format=format)
5374
assert parse_outcome == parsed_result.bindings
75+
76+
77+
@pytest.fixture(scope="module")
78+
def select_result(rdfs_graph: Graph) -> Result:
79+
query = """
80+
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
81+
SELECT ?subject ?predicate ?object WHERE {
82+
VALUES ?subject { rdfs:Resource }
83+
?subject ?predicate ?object
84+
}
85+
ORDER BY ?subject ?predicate ?object
86+
"""
87+
result = rdfs_graph.query(query)
88+
return result
89+
90+
91+
def check_serialized(format: str, result: Result, data: str) -> None:
92+
if format == "txt":
93+
# This does somewhat of a smoke tests that data is the txt
94+
# serialization of the given result. This is by no means perfect but
95+
# better than nothing.
96+
txt_lines = data.splitlines()
97+
assert (len(txt_lines) - 2) == len(result)
98+
assert re.match(r"^[-]+$", txt_lines[1])
99+
header = txt_lines[0]
100+
assert result.vars is not None
101+
for var in result.vars:
102+
assert var in header
103+
for row_index, row in enumerate(result):
104+
txt_row = txt_lines[row_index + 2]
105+
value: Node
106+
assert isinstance(row, ResultRow)
107+
for key, value in row.asdict().items():
108+
assert f"{value}" in txt_row
109+
else:
110+
parsed_result = Result.parse(StringIO(data), format=format)
111+
assert result == parsed_result
112+
113+
114+
class ResultType(str, enum.Enum):
115+
CONSTRUCT = "CONSTRUCT"
116+
DESCRIBE = "DESCRIBE"
117+
SELECT = "SELECT"
118+
ASK = "ASK"
119+
120+
121+
class ResultFormatTrait(enum.Enum):
122+
HAS_SERIALIZER = enum.auto()
123+
HAS_PARSER = enum.auto()
124+
125+
126+
@dataclass(frozen=True)
127+
class ResultFormat:
128+
name: str
129+
supported_types: Set[ResultType]
130+
traits: Set[ResultFormatTrait]
131+
encodings: Set[str]
132+
133+
134+
class ResultFormats(Dict[str, ResultFormat]):
135+
@classmethod
136+
def make(cls, *result_format: ResultFormat) -> "ResultFormats":
137+
result = cls()
138+
for item in result_format:
139+
result[item.name] = item
140+
return result
141+
142+
143+
result_formats = ResultFormats.make(
144+
ResultFormat(
145+
"csv",
146+
{ResultType.SELECT},
147+
{
148+
ResultFormatTrait.HAS_PARSER,
149+
ResultFormatTrait.HAS_SERIALIZER,
150+
},
151+
{"utf-8", "utf-16"},
152+
),
153+
ResultFormat(
154+
"txt",
155+
{ResultType.SELECT},
156+
{
157+
ResultFormatTrait.HAS_SERIALIZER,
158+
},
159+
{"utf-8"},
160+
),
161+
ResultFormat(
162+
"json",
163+
{ResultType.SELECT},
164+
{
165+
ResultFormatTrait.HAS_PARSER,
166+
ResultFormatTrait.HAS_SERIALIZER,
167+
},
168+
{"utf-8", "utf-16"},
169+
),
170+
ResultFormat(
171+
"xml",
172+
{ResultType.SELECT},
173+
{
174+
ResultFormatTrait.HAS_PARSER,
175+
ResultFormatTrait.HAS_SERIALIZER,
176+
},
177+
{"utf-8"},
178+
),
179+
ResultFormat(
180+
"tsv",
181+
{ResultType.SELECT},
182+
{
183+
ResultFormatTrait.HAS_PARSER,
184+
},
185+
{"utf-8", "utf-16"},
186+
),
187+
)
188+
189+
190+
class DestinationType(enum.Enum):
191+
TEXT_IO = enum.auto()
192+
BINARY_IO = enum.auto()
193+
STR_PATH = enum.auto()
194+
195+
196+
class SourceType(enum.Enum):
197+
TEXT_IO = enum.auto()
198+
BINARY_IO = enum.auto()
199+
200+
201+
@dataclass(frozen=True)
202+
class DestRef:
203+
param: Union[str, IO[bytes], TextIO]
204+
path: Path
205+
206+
207+
@contextmanager
208+
def make_dest(
209+
tmp_path: Path, type: Optional[DestinationType]
210+
) -> Iterator[Optional[DestRef]]:
211+
if type is None:
212+
yield None
213+
return
214+
path = tmp_path / f"file-{type}"
215+
if type is DestinationType.STR_PATH:
216+
yield DestRef(f"{path}", path)
217+
elif type is DestinationType.BINARY_IO:
218+
with path.open("wb") as bfh:
219+
yield DestRef(bfh, path)
220+
elif type is DestinationType.TEXT_IO:
221+
with path.open("w") as fh:
222+
yield DestRef(fh, path)
223+
else:
224+
raise ValueError(f"unsupported type {type}")
225+
226+
227+
def make_select_result_serialize_parse_tests() -> Iterator[ParameterSet]:
228+
xfails: Dict[
229+
Tuple[str, Optional[DestinationType], str], Union[MarkDecorator, Mark]
230+
] = {
231+
("csv", DestinationType.TEXT_IO, "utf-8"): pytest.mark.xfail(raises=TypeError),
232+
("csv", DestinationType.TEXT_IO, "utf-16"): pytest.mark.xfail(raises=TypeError),
233+
("json", DestinationType.TEXT_IO, "utf-8"): pytest.mark.xfail(raises=TypeError),
234+
("json", DestinationType.TEXT_IO, "utf-16"): pytest.mark.xfail(
235+
raises=TypeError
236+
),
237+
("txt", DestinationType.BINARY_IO, "utf-8"): pytest.mark.xfail(
238+
raises=TypeError
239+
),
240+
("txt", DestinationType.BINARY_IO, "utf-16"): pytest.mark.xfail(
241+
raises=TypeError
242+
),
243+
("txt", DestinationType.STR_PATH, "utf-8"): pytest.mark.xfail(raises=TypeError),
244+
("txt", DestinationType.STR_PATH, "utf-16"): pytest.mark.xfail(
245+
raises=TypeError
246+
),
247+
}
248+
if sys.platform == "win32":
249+
xfails[("csv", DestinationType.STR_PATH, "utf-8")] = pytest.mark.xfail(
250+
raises=FileNotFoundError,
251+
reason="string path handling does not work on windows",
252+
)
253+
xfails[("csv", DestinationType.STR_PATH, "utf-16")] = pytest.mark.xfail(
254+
raises=FileNotFoundError,
255+
reason="string path handling does not work on windows",
256+
)
257+
xfails[("json", DestinationType.STR_PATH, "utf-8")] = pytest.mark.xfail(
258+
raises=FileNotFoundError,
259+
reason="string path handling does not work on windows",
260+
)
261+
xfails[("json", DestinationType.STR_PATH, "utf-16")] = pytest.mark.xfail(
262+
raises=FileNotFoundError,
263+
reason="string path handling does not work on windows",
264+
)
265+
xfails[("xml", DestinationType.STR_PATH, "utf-8")] = pytest.mark.xfail(
266+
raises=FileNotFoundError,
267+
reason="string path handling does not work on windows",
268+
)
269+
formats = [
270+
format
271+
for format in result_formats.values()
272+
if ResultFormatTrait.HAS_SERIALIZER in format.traits
273+
and ResultType.SELECT in format.supported_types
274+
]
275+
destination_types: Set[Optional[DestinationType]] = {None}
276+
destination_types.update(set(DestinationType))
277+
for format, destination_type in itertools.product(formats, destination_types):
278+
for encoding in format.encodings:
279+
xfail = xfails.get((format.name, destination_type, encoding))
280+
marks = (xfail,) if xfail is not None else ()
281+
yield pytest.param(
282+
(format, destination_type, encoding),
283+
id=f"{format.name}-{None if destination_type is None else destination_type.name}-{encoding}",
284+
marks=marks,
285+
)
286+
287+
288+
@pytest.mark.parametrize(
289+
["args"],
290+
make_select_result_serialize_parse_tests(),
291+
)
292+
def test_select_result_serialize_parse(
293+
tmp_path: Path,
294+
select_result: Result,
295+
args: Tuple[ResultFormat, Optional[DestinationType], str],
296+
) -> None:
297+
"""
298+
Round tripping of a select query through the serializer and parser of a
299+
specific format results in an equivalent result object.
300+
"""
301+
format, destination_type, encoding = args
302+
with make_dest(tmp_path, destination_type) as dest_ref:
303+
destination = None if dest_ref is None else dest_ref.param
304+
serialize_result = select_result.serialize(
305+
destination=destination,
306+
format=format.name,
307+
encoding=encoding,
308+
)
309+
310+
if dest_ref is None:
311+
assert isinstance(serialize_result, bytes)
312+
serialized_data = serialize_result.decode(encoding)
313+
else:
314+
assert serialize_result is None
315+
serialized_data = dest_ref.path.read_bytes().decode(encoding)
316+
317+
logging.debug("serialized_data = %s", serialized_data)
318+
check_serialized(format.name, select_result, serialized_data)
319+
320+
321+
def serialize_select(select_result: Result, format: str, encoding: str) -> bytes:
322+
if format == "tsv":
323+
# This is hardcoded as it is particularly diffficult to generate. If the result changes this will have to be adjusted by hand.
324+
return '''\
325+
?subject ?predicate ?object
326+
<http://www.w3.org/2000/01/rdf-schema#Resource> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class>
327+
<http://www.w3.org/2000/01/rdf-schema#Resource> <http://www.w3.org/2000/01/rdf-schema#comment> "The class resource, everything."
328+
<http://www.w3.org/2000/01/rdf-schema#Resource> <http://www.w3.org/2000/01/rdf-schema#isDefinedBy> <http://www.w3.org/2000/01/rdf-schema#>
329+
<http://www.w3.org/2000/01/rdf-schema#Resource> <http://www.w3.org/2000/01/rdf-schema#label> "Resource"'''.encode(
330+
encoding
331+
)
332+
else:
333+
result = select_result.serialize(format=format)
334+
assert result is not None
335+
return result
336+
337+
338+
def make_select_result_parse_serialized_tests() -> Iterator[ParameterSet]:
339+
xfails: Dict[Tuple[str, Optional[SourceType], str], Union[MarkDecorator, Mark]] = {}
340+
formats = [
341+
format
342+
for format in result_formats.values()
343+
if ResultFormatTrait.HAS_PARSER in format.traits
344+
and ResultType.SELECT in format.supported_types
345+
]
346+
source_types = set(SourceType)
347+
for format, destination_type in itertools.product(formats, source_types):
348+
for encoding in {"utf-8"}:
349+
xfail = xfails.get((format.name, destination_type, encoding))
350+
marks = (xfail,) if xfail is not None else ()
351+
yield pytest.param(
352+
(format, destination_type, encoding),
353+
id=f"{format.name}-{None if destination_type is None else destination_type.name}-{encoding}",
354+
marks=marks,
355+
)
356+
357+
358+
@pytest.mark.parametrize(
359+
["args"],
360+
make_select_result_parse_serialized_tests(),
361+
)
362+
def test_select_result_parse_serialized(
363+
tmp_path: Path,
364+
select_result: Result,
365+
args: Tuple[ResultFormat, SourceType, str],
366+
) -> None:
367+
"""
368+
Parsing a serialized result produces the expected result object.
369+
"""
370+
format, source_type, encoding = args
371+
372+
serialized_data = serialize_select(select_result, format.name, encoding)
373+
374+
logging.debug("serialized_data = %s", serialized_data.decode(encoding))
375+
376+
source: Union[BinaryIO, TextIO]
377+
if source_type is SourceType.TEXT_IO:
378+
source = StringIO(serialized_data.decode(encoding))
379+
elif source_type is SourceType.BINARY_IO:
380+
source = BytesIO(serialized_data)
381+
else:
382+
raise ValueError(f"Invalid source_type {source_type}")
383+
384+
parsed_result = Result.parse(source, format=format.name)
385+
386+
assert select_result == parsed_result

0 commit comments

Comments
 (0)