Skip to content

Commit 5e983be

Browse files
committed
Fix escaping of long strings in Turtle/TriG encoder
1 parent 2c85372 commit 5e983be

File tree

5 files changed

+103
-8
lines changed

5 files changed

+103
-8
lines changed

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,19 @@ This project adheres to [Semantic Versioning](http://semver.org/) and
66

77

88

9+
## Unreleased
10+
11+
### Fixed
12+
13+
- The Turtle/TriG encoder didn't escape strings properly when using the long
14+
literal form, i.e. when the encoded string contains newlines, which could
15+
result in invalid output in edge-cases.
16+
17+
18+
[Compare v2.0.0...HEAD](https://github.com/rdf-elixir/rdf-ex/compare/v2.0.0...HEAD)
19+
20+
21+
922
## 2.0.0 - 2024-08-07
1023

1124
Elixir versions < 1.13 and OTP version < 23 are no longer supported

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.0.0
1+
2.0.1-pre

lib/rdf/serializations/ntriples/encoder.ex

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,11 @@ defmodule RDF.NTriples.Encoder do
125125
def escape_string(string) do
126126
string
127127
|> String.replace("\\", "\\\\")
128-
|> String.replace("\b", "\\b")
129-
|> String.replace("\f", "\\f")
130128
|> String.replace("\t", "\\t")
129+
|> String.replace("\b", "\\b")
131130
|> String.replace("\n", "\\n")
132131
|> String.replace("\r", "\\r")
132+
|> String.replace("\f", "\\f")
133133
|> String.replace("\"", ~S[\"])
134134
end
135135
end

lib/rdf/serializations/turtle_trig/encoder.ex

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ defmodule RDF.TurtleTriG.Encoder do
44
alias RDF.TurtleTriG.Encoder.{State, Sequencer, CompactStarGraph}
55
alias RDF.{BlankNode, Description, Graph, Dataset, IRI, XSD, Literal, LangString, PrefixMap}
66

7-
import RDF.NTriples.Encoder, only: [escape_string: 1]
8-
97
import State,
108
only: [
119
line_prefixed: 4,
@@ -483,9 +481,54 @@ defmodule RDF.TurtleTriG.Encoder do
483481

484482
defp quoted(string) do
485483
if String.contains?(string, ["\n", "\r"]) do
486-
[~s["""], string, ~s["""]]
484+
[~s["""], escape_string(string, :double, true), ~s["""]]
485+
else
486+
[~s["], escape_string(string, :double), ~s["]]
487+
end
488+
end
489+
490+
@doc false
491+
def escape_string(string, quotes \\ :double, long \\ false) do
492+
string
493+
|> String.replace("\\", "\\\\")
494+
|> String.replace("\b", "\\b")
495+
|> String.replace("\f", "\\f")
496+
|> escape_tabs(long)
497+
|> escape_newlines(long)
498+
|> escape_quotes(quotes, long)
499+
end
500+
501+
defp escape_tabs(string, true), do: string
502+
defp escape_tabs(string, false), do: String.replace(string, "\t", "\\t")
503+
504+
defp escape_newlines(string, true), do: string
505+
506+
defp escape_newlines(string, false) do
507+
string
508+
|> String.replace("\n", "\\n")
509+
|> String.replace("\r", "\\r")
510+
end
511+
512+
defp escape_quotes(string, :double, false), do: String.replace(string, ~S["], ~S[\"])
513+
defp escape_quotes(string, :single, false), do: String.replace(string, ~S['], ~S[\'])
514+
515+
defp escape_quotes(string, :double, true) do
516+
string
517+
|> String.replace(~S["""], ~S[\"""])
518+
|> escape_ending_quotes(~S["])
519+
end
520+
521+
defp escape_quotes(string, :single, true) do
522+
string
523+
|> String.replace(~S['''], ~S[\'''])
524+
|> escape_ending_quotes(~S['])
525+
end
526+
527+
defp escape_ending_quotes(string, quote) do
528+
if String.ends_with?(string, quote) do
529+
String.slice(string, 0..-2//1) <> "\\" <> quote
487530
else
488-
[~s["], escape_string(string), ~s["]]
531+
string
489532
end
490533
end
491534
end

test/unit/serializations/turtle_encoder_test.exs

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1138,7 +1138,7 @@ defmodule RDF.Turtle.EncoderTest do
11381138
)
11391139
end
11401140

1141-
test "backslash-escaping" do
1141+
test "backslash escaping" do
11421142
EX.S
11431143
|> EX.p("\\")
11441144
|> assert_serialization(matches: [~s["\\\\"]])
@@ -1148,6 +1148,45 @@ defmodule RDF.Turtle.EncoderTest do
11481148
|> assert_serialization(matches: [~s["\\\\\\\\"]])
11491149
end
11501150

1151+
test "double quote escaping" do
1152+
EX.S
1153+
|> EX.p(~s["foo"bar"])
1154+
|> assert_serialization(matches: [~s["\\"foo\\"bar\\""]])
1155+
1156+
# EX.S
1157+
# |> EX.p(~s[foo\n"])
1158+
# |> assert_serialization(matches: [~s["""foo\n\\""""]])
1159+
EX.S
1160+
|> EX.p(
1161+
"""
1162+
"foo"
1163+
\"""
1164+
"bar""
1165+
"""
1166+
|> String.trim_trailing()
1167+
)
1168+
|> assert_serialization(
1169+
matches: [
1170+
"""
1171+
\""""foo"
1172+
\\\"""
1173+
"bar"\\\""""
1174+
"""
1175+
|> String.trim_trailing()
1176+
]
1177+
)
1178+
end
1179+
1180+
test "tab escaping" do
1181+
EX.S
1182+
|> EX.p(~s[\tbar])
1183+
|> assert_serialization(matches: [~s["\\tbar"]])
1184+
1185+
EX.S
1186+
|> EX.p(~s[foo\n\tbar])
1187+
|> assert_serialization(matches: [~s["""foo\n\tbar"""]])
1188+
end
1189+
11511190
test "language-tagged literals with newlines embedded are encoded with long quotes" do
11521191
Turtle.read_string!(~s[<http://a> <http:/b> """testing string parsing in Turtle.
11531192
"""@en .])

0 commit comments

Comments
 (0)