Skip to content

Commit 72287e4

Browse files
fix: RecursiveSerializer- outputs undeclared prefix for predicates that contains the base as a substring (#3267)
* fix: turtle, longturtle, and n3 serializers - outputs undeclared prefix when using base on a predicate that contains the base as a substring Fixes: #3160 * chore: remove print * chore: formatting --------- Co-authored-by: Nicholas Car <nick@kurrawong.net>
1 parent 2a902e5 commit 72287e4

File tree

5 files changed

+81
-2
lines changed

5 files changed

+81
-2
lines changed

rdflib/plugins/serializers/longturtle.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ def preprocessTriple(self, triple: _TripleType) -> None:
160160
self.base is not None
161161
and isinstance(node, URIRef)
162162
and node.startswith(self.base)
163+
and "#" not in node.replace(self.base, "")
164+
and "/" not in node.replace(self.base, "")
163165
):
164166
# predicate corresponds to base namespace
165167
continue

rdflib/plugins/serializers/turtle.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
Optional,
1818
Sequence,
1919
Tuple,
20+
TypeVar,
21+
Union,
2022
)
2123

2224
from rdflib.exceptions import Error
@@ -25,6 +27,8 @@
2527
from rdflib.serializer import Serializer
2628
from rdflib.term import BNode, Literal, Node, URIRef
2729

30+
_StrT = TypeVar("_StrT", bound=str)
31+
2832
if TYPE_CHECKING:
2933
from rdflib.graph import _PredicateType, _SubjectType, _TripleType
3034

@@ -169,6 +173,18 @@ def write(self, text: str) -> None:
169173
# type error: Item "None" of "Optional[IO[bytes]]" has no attribute "write"
170174
self.stream.write(text.encode(self.encoding, "replace")) # type: ignore[union-attr]
171175

176+
def relativize(self, uri: _StrT) -> Union[_StrT, URIRef]:
177+
base = self.base
178+
if (
179+
base is not None
180+
and uri.startswith(base)
181+
and "#" not in uri.replace(base, "")
182+
and "/" not in uri.replace(base, "")
183+
):
184+
# type error: Incompatible types in assignment (expression has type "str", variable has type "Node")
185+
uri = URIRef(uri.replace(base, "", 1)) # type: ignore[assignment]
186+
return uri
187+
172188

173189
SUBJECT = 0
174190
VERB = 1
@@ -271,6 +287,8 @@ def preprocessTriple(self, triple: _TripleType) -> None:
271287
self.base is not None
272288
and isinstance(node, URIRef)
273289
and node.startswith(self.base)
290+
and "#" not in node.replace(self.base, "")
291+
and "/" not in node.replace(self.base, "")
274292
):
275293
# predicate corresponds to base namespace
276294
continue

test/test_n3.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,10 @@ def test_base_serialize(self):
123123
URIRef("http://example.com/people/Linda"),
124124
)
125125
)
126-
s = g.serialize(base="http://example.com/", format="n3", encoding="latin-1")
127-
assert b"<people/Bob>" in s
126+
s = g.serialize(
127+
base="http://example.com/people/", format="n3", encoding="latin-1"
128+
)
129+
assert b"<Bob>" in s
128130
g2 = Dataset()
129131
g2.parse(data=s, format="n3")
130132
assert list(g) == list(g2.triples((None, None, None)))

test/test_serializers/test_serializer_longturtle.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import difflib
22
from pathlib import Path
3+
from textwrap import dedent
34

45
from rdflib import Graph, Namespace
56
from rdflib.namespace import GEO, SDO
@@ -181,3 +182,31 @@ def test_longturtle():
181182
diff = "\n".join(list(difflib.unified_diff(target.split("\n"), output.split("\n"))))
182183

183184
assert not diff, diff
185+
186+
187+
def test_longturtle_undeclared_prefix_when_using_base():
188+
"""
189+
See https://github.com/RDFLib/rdflib/issues/3160
190+
"""
191+
from rdflib import Graph, Literal, URIRef
192+
193+
g = Graph()
194+
g.add(
195+
(
196+
URIRef("https://example.com/subject"),
197+
URIRef("https://example.com/p/predicate"),
198+
Literal("object"),
199+
)
200+
)
201+
output = g.serialize(format="longturtle", base="https://example.com/")
202+
expected = dedent(
203+
"""
204+
BASE <https://example.com/>
205+
PREFIX ns1: <https://example.com/p/>
206+
207+
<subject>
208+
ns1:predicate "object" ;
209+
.
210+
"""
211+
)
212+
assert output.strip() == expected.strip()

test/test_serializers/test_serializer_turtle.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from textwrap import dedent
2+
13
from rdflib import RDF, RDFS, BNode, Graph, Literal, Namespace, URIRef
24
from rdflib.collection import Collection
35
from rdflib.plugins.serializers.turtle import TurtleSerializer
@@ -113,3 +115,29 @@ def test_turtle_namespace():
113115
assert "GENO:0000385" in output
114116
assert "SERIAL:0167-6423" in output
115117
assert r"EX:name_with_\(parenthesis\)" in output
118+
119+
120+
def test_turtle_undeclared_prefix_when_using_base():
121+
"""
122+
See https://github.com/RDFLib/rdflib/issues/3160
123+
"""
124+
from rdflib import Graph, Literal, URIRef
125+
126+
g = Graph()
127+
g.add(
128+
(
129+
URIRef("https://example.com/subject"),
130+
URIRef("https://example.com/p/predicate"),
131+
Literal("object"),
132+
)
133+
)
134+
output = g.serialize(format="turtle", base="https://example.com/")
135+
expected = dedent(
136+
"""
137+
@base <https://example.com/> .
138+
@prefix ns1: <https://example.com/p/> .
139+
140+
<subject> ns1:predicate "object" .
141+
"""
142+
)
143+
assert output.strip() == expected.strip()

0 commit comments

Comments
 (0)