1+ import enum
12import inspect
3+ import itertools
24import logging
3- from io import StringIO
4- from typing import Mapping , Sequence , Type , Union
5+ import re
6+ import sys
7+ from contextlib import contextmanager
8+ from dataclasses import dataclass
9+ from io import BytesIO , StringIO
10+ from pathlib import Path
11+ from typing import (
12+ IO ,
13+ BinaryIO ,
14+ Dict ,
15+ Iterator ,
16+ Mapping ,
17+ Optional ,
18+ Sequence ,
19+ Set ,
20+ TextIO ,
21+ Tuple ,
22+ Type ,
23+ Union ,
24+ )
525
626import pytest
27+ from _pytest .mark .structures import Mark , MarkDecorator , ParameterSet
728from pyparsing import ParseException
829
9- from rdflib .query import Result
10- from rdflib .term import Identifier , Literal , Variable
30+ from rdflib .graph import Graph
31+ from rdflib .query import Result , ResultRow
32+ from rdflib .term import Identifier , Literal , Node , Variable
1133
1234BindingsType = Sequence [Mapping [Variable , Identifier ]]
1335ParseOutcomeType = Union [BindingsType , Type [Exception ]]
@@ -40,8 +62,7 @@ def test_select_result_parse(
4062 data : str , format : str , parse_outcome : ParseOutcomeType
4163) -> None :
4264 """
43- Round tripping of a select query through the serializer and parser of a
44- specific format results in an equivalent result object.
65+ Parsing serialized SPARQL result produces expected bindings.
4566 """
4667 logging .debug ("data = %s" , data )
4768
@@ -51,3 +72,315 @@ def test_select_result_parse(
5172 else :
5273 parsed_result = Result .parse (StringIO (data ), format = format )
5374 assert parse_outcome == parsed_result .bindings
75+
76+
77+ @pytest .fixture (scope = "module" )
78+ def select_result (rdfs_graph : Graph ) -> Result :
79+ query = """
80+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
81+ SELECT ?subject ?predicate ?object WHERE {
82+ VALUES ?subject { rdfs:Resource }
83+ ?subject ?predicate ?object
84+ }
85+ ORDER BY ?subject ?predicate ?object
86+ """
87+ result = rdfs_graph .query (query )
88+ return result
89+
90+
91+ def check_serialized (format : str , result : Result , data : str ) -> None :
92+ if format == "txt" :
93+ # This does somewhat of a smoke tests that data is the txt
94+ # serialization of the given result. This is by no means perfect but
95+ # better than nothing.
96+ txt_lines = data .splitlines ()
97+ assert (len (txt_lines ) - 2 ) == len (result )
98+ assert re .match (r"^[-]+$" , txt_lines [1 ])
99+ header = txt_lines [0 ]
100+ assert result .vars is not None
101+ for var in result .vars :
102+ assert var in header
103+ for row_index , row in enumerate (result ):
104+ txt_row = txt_lines [row_index + 2 ]
105+ value : Node
106+ assert isinstance (row , ResultRow )
107+ for key , value in row .asdict ().items ():
108+ assert f"{ value } " in txt_row
109+ else :
110+ parsed_result = Result .parse (StringIO (data ), format = format )
111+ assert result == parsed_result
112+
113+
114+ class ResultType (str , enum .Enum ):
115+ CONSTRUCT = "CONSTRUCT"
116+ DESCRIBE = "DESCRIBE"
117+ SELECT = "SELECT"
118+ ASK = "ASK"
119+
120+
121+ class ResultFormatTrait (enum .Enum ):
122+ HAS_SERIALIZER = enum .auto ()
123+ HAS_PARSER = enum .auto ()
124+
125+
126+ @dataclass (frozen = True )
127+ class ResultFormat :
128+ name : str
129+ supported_types : Set [ResultType ]
130+ traits : Set [ResultFormatTrait ]
131+ encodings : Set [str ]
132+
133+
134+ class ResultFormats (Dict [str , ResultFormat ]):
135+ @classmethod
136+ def make (cls , * result_format : ResultFormat ) -> "ResultFormats" :
137+ result = cls ()
138+ for item in result_format :
139+ result [item .name ] = item
140+ return result
141+
142+
143+ result_formats = ResultFormats .make (
144+ ResultFormat (
145+ "csv" ,
146+ {ResultType .SELECT },
147+ {
148+ ResultFormatTrait .HAS_PARSER ,
149+ ResultFormatTrait .HAS_SERIALIZER ,
150+ },
151+ {"utf-8" , "utf-16" },
152+ ),
153+ ResultFormat (
154+ "txt" ,
155+ {ResultType .SELECT },
156+ {
157+ ResultFormatTrait .HAS_SERIALIZER ,
158+ },
159+ {"utf-8" },
160+ ),
161+ ResultFormat (
162+ "json" ,
163+ {ResultType .SELECT },
164+ {
165+ ResultFormatTrait .HAS_PARSER ,
166+ ResultFormatTrait .HAS_SERIALIZER ,
167+ },
168+ {"utf-8" , "utf-16" },
169+ ),
170+ ResultFormat (
171+ "xml" ,
172+ {ResultType .SELECT },
173+ {
174+ ResultFormatTrait .HAS_PARSER ,
175+ ResultFormatTrait .HAS_SERIALIZER ,
176+ },
177+ {"utf-8" },
178+ ),
179+ ResultFormat (
180+ "tsv" ,
181+ {ResultType .SELECT },
182+ {
183+ ResultFormatTrait .HAS_PARSER ,
184+ },
185+ {"utf-8" , "utf-16" },
186+ ),
187+ )
188+
189+
190+ class DestinationType (enum .Enum ):
191+ TEXT_IO = enum .auto ()
192+ BINARY_IO = enum .auto ()
193+ STR_PATH = enum .auto ()
194+
195+
196+ class SourceType (enum .Enum ):
197+ TEXT_IO = enum .auto ()
198+ BINARY_IO = enum .auto ()
199+
200+
201+ @dataclass (frozen = True )
202+ class DestRef :
203+ param : Union [str , IO [bytes ], TextIO ]
204+ path : Path
205+
206+
207+ @contextmanager
208+ def make_dest (
209+ tmp_path : Path , type : Optional [DestinationType ]
210+ ) -> Iterator [Optional [DestRef ]]:
211+ if type is None :
212+ yield None
213+ return
214+ path = tmp_path / f"file-{ type } "
215+ if type is DestinationType .STR_PATH :
216+ yield DestRef (f"{ path } " , path )
217+ elif type is DestinationType .BINARY_IO :
218+ with path .open ("wb" ) as bfh :
219+ yield DestRef (bfh , path )
220+ elif type is DestinationType .TEXT_IO :
221+ with path .open ("w" ) as fh :
222+ yield DestRef (fh , path )
223+ else :
224+ raise ValueError (f"unsupported type { type } " )
225+
226+
227+ def make_select_result_serialize_parse_tests () -> Iterator [ParameterSet ]:
228+ xfails : Dict [
229+ Tuple [str , Optional [DestinationType ], str ], Union [MarkDecorator , Mark ]
230+ ] = {
231+ ("csv" , DestinationType .TEXT_IO , "utf-8" ): pytest .mark .xfail (raises = TypeError ),
232+ ("csv" , DestinationType .TEXT_IO , "utf-16" ): pytest .mark .xfail (raises = TypeError ),
233+ ("json" , DestinationType .TEXT_IO , "utf-8" ): pytest .mark .xfail (raises = TypeError ),
234+ ("json" , DestinationType .TEXT_IO , "utf-16" ): pytest .mark .xfail (
235+ raises = TypeError
236+ ),
237+ ("txt" , DestinationType .BINARY_IO , "utf-8" ): pytest .mark .xfail (
238+ raises = TypeError
239+ ),
240+ ("txt" , DestinationType .BINARY_IO , "utf-16" ): pytest .mark .xfail (
241+ raises = TypeError
242+ ),
243+ ("txt" , DestinationType .STR_PATH , "utf-8" ): pytest .mark .xfail (raises = TypeError ),
244+ ("txt" , DestinationType .STR_PATH , "utf-16" ): pytest .mark .xfail (
245+ raises = TypeError
246+ ),
247+ }
248+ if sys .platform == "win32" :
249+ xfails [("csv" , DestinationType .STR_PATH , "utf-8" )] = pytest .mark .xfail (
250+ raises = FileNotFoundError ,
251+ reason = "string path handling does not work on windows" ,
252+ )
253+ xfails [("csv" , DestinationType .STR_PATH , "utf-16" )] = pytest .mark .xfail (
254+ raises = FileNotFoundError ,
255+ reason = "string path handling does not work on windows" ,
256+ )
257+ xfails [("json" , DestinationType .STR_PATH , "utf-8" )] = pytest .mark .xfail (
258+ raises = FileNotFoundError ,
259+ reason = "string path handling does not work on windows" ,
260+ )
261+ xfails [("json" , DestinationType .STR_PATH , "utf-16" )] = pytest .mark .xfail (
262+ raises = FileNotFoundError ,
263+ reason = "string path handling does not work on windows" ,
264+ )
265+ xfails [("xml" , DestinationType .STR_PATH , "utf-8" )] = pytest .mark .xfail (
266+ raises = FileNotFoundError ,
267+ reason = "string path handling does not work on windows" ,
268+ )
269+ formats = [
270+ format
271+ for format in result_formats .values ()
272+ if ResultFormatTrait .HAS_SERIALIZER in format .traits
273+ and ResultType .SELECT in format .supported_types
274+ ]
275+ destination_types : Set [Optional [DestinationType ]] = {None }
276+ destination_types .update (set (DestinationType ))
277+ for format , destination_type in itertools .product (formats , destination_types ):
278+ for encoding in format .encodings :
279+ xfail = xfails .get ((format .name , destination_type , encoding ))
280+ marks = (xfail ,) if xfail is not None else ()
281+ yield pytest .param (
282+ (format , destination_type , encoding ),
283+ id = f"{ format .name } -{ None if destination_type is None else destination_type .name } -{ encoding } " ,
284+ marks = marks ,
285+ )
286+
287+
288+ @pytest .mark .parametrize (
289+ ["args" ],
290+ make_select_result_serialize_parse_tests (),
291+ )
292+ def test_select_result_serialize_parse (
293+ tmp_path : Path ,
294+ select_result : Result ,
295+ args : Tuple [ResultFormat , Optional [DestinationType ], str ],
296+ ) -> None :
297+ """
298+ Round tripping of a select query through the serializer and parser of a
299+ specific format results in an equivalent result object.
300+ """
301+ format , destination_type , encoding = args
302+ with make_dest (tmp_path , destination_type ) as dest_ref :
303+ destination = None if dest_ref is None else dest_ref .param
304+ serialize_result = select_result .serialize (
305+ destination = destination ,
306+ format = format .name ,
307+ encoding = encoding ,
308+ )
309+
310+ if dest_ref is None :
311+ assert isinstance (serialize_result , bytes )
312+ serialized_data = serialize_result .decode (encoding )
313+ else :
314+ assert serialize_result is None
315+ serialized_data = dest_ref .path .read_bytes ().decode (encoding )
316+
317+ logging .debug ("serialized_data = %s" , serialized_data )
318+ check_serialized (format .name , select_result , serialized_data )
319+
320+
321+ def serialize_select (select_result : Result , format : str , encoding : str ) -> bytes :
322+ if format == "tsv" :
323+ # This is hardcoded as it is particularly diffficult to generate. If the result changes this will have to be adjusted by hand.
324+ return '''\
325+ ?subject ?predicate ?object
326+ <http://www.w3.org/2000/01/rdf-schema#Resource> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class>
327+ <http://www.w3.org/2000/01/rdf-schema#Resource> <http://www.w3.org/2000/01/rdf-schema#comment> "The class resource, everything."
328+ <http://www.w3.org/2000/01/rdf-schema#Resource> <http://www.w3.org/2000/01/rdf-schema#isDefinedBy> <http://www.w3.org/2000/01/rdf-schema#>
329+ <http://www.w3.org/2000/01/rdf-schema#Resource> <http://www.w3.org/2000/01/rdf-schema#label> "Resource"''' .encode (
330+ encoding
331+ )
332+ else :
333+ result = select_result .serialize (format = format )
334+ assert result is not None
335+ return result
336+
337+
338+ def make_select_result_parse_serialized_tests () -> Iterator [ParameterSet ]:
339+ xfails : Dict [Tuple [str , Optional [SourceType ], str ], Union [MarkDecorator , Mark ]] = {}
340+ formats = [
341+ format
342+ for format in result_formats .values ()
343+ if ResultFormatTrait .HAS_PARSER in format .traits
344+ and ResultType .SELECT in format .supported_types
345+ ]
346+ source_types = set (SourceType )
347+ for format , destination_type in itertools .product (formats , source_types ):
348+ for encoding in {"utf-8" }:
349+ xfail = xfails .get ((format .name , destination_type , encoding ))
350+ marks = (xfail ,) if xfail is not None else ()
351+ yield pytest .param (
352+ (format , destination_type , encoding ),
353+ id = f"{ format .name } -{ None if destination_type is None else destination_type .name } -{ encoding } " ,
354+ marks = marks ,
355+ )
356+
357+
358+ @pytest .mark .parametrize (
359+ ["args" ],
360+ make_select_result_parse_serialized_tests (),
361+ )
362+ def test_select_result_parse_serialized (
363+ tmp_path : Path ,
364+ select_result : Result ,
365+ args : Tuple [ResultFormat , SourceType , str ],
366+ ) -> None :
367+ """
368+ Parsing a serialized result produces the expected result object.
369+ """
370+ format , source_type , encoding = args
371+
372+ serialized_data = serialize_select (select_result , format .name , encoding )
373+
374+ logging .debug ("serialized_data = %s" , serialized_data .decode (encoding ))
375+
376+ source : Union [BinaryIO , TextIO ]
377+ if source_type is SourceType .TEXT_IO :
378+ source = StringIO (serialized_data .decode (encoding ))
379+ elif source_type is SourceType .BINARY_IO :
380+ source = BytesIO (serialized_data )
381+ else :
382+ raise ValueError (f"Invalid source_type { source_type } " )
383+
384+ parsed_result = Result .parse (source , format = format .name )
385+
386+ assert select_result == parsed_result
0 commit comments