Skip to content

Commit 05c906d

Browse files
committed
Fix handling of invalid bytes in stream decoders
1 parent 26e9cee commit 05c906d

File tree

2 files changed

+16
-5
lines changed

2 files changed

+16
-5
lines changed

src/Data/Text/Internal/Lazy/Encoding/Fusion.hs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ streamUtf8 onErr bs0 = Stream next (T bs0 S0 0) unknownSize
100100
S2 a b -> next (T bs (S3 a b x) (i+1))
101101
S3 a b c -> next (T bs (S4 a b c x) (i+1))
102102
S4 a b c d -> decodeError "streamUtf8" "UTF-8" onErr (Just a)
103-
(T bs (S3 b c d) (i+1))
103+
(T bs (S4 b c d x) (i+1))
104104
where x = B.unsafeIndex ps i
105105
consume (T Empty S0 _) = Done
106106
consume (T Empty _ i) = decodeError "streamUtf8" "UTF-8" onErr Nothing (T Empty S0 i)
@@ -140,7 +140,7 @@ streamUtf16LE onErr bs0 = Stream next (T bs0 S0 0) unknownSize
140140
S2 w1 w2 -> next (T bs (S3 w1 w2 x) (i+1))
141141
S3 w1 w2 w3 -> next (T bs (S4 w1 w2 w3 x) (i+1))
142142
S4 w1 w2 w3 w4 -> decodeError "streamUtf16LE" "UTF-16LE" onErr (Just w1)
143-
(T bs (S3 w2 w3 w4) (i+1))
143+
(T bs (S4 w2 w3 w4 x) (i+1))
144144
where x = B.unsafeIndex ps i
145145
consume (T Empty S0 _) = Done
146146
consume (T Empty _ i) = decodeError "streamUtf16LE" "UTF-16LE" onErr Nothing (T Empty S0 i)
@@ -180,7 +180,7 @@ streamUtf16BE onErr bs0 = Stream next (T bs0 S0 0) unknownSize
180180
S2 w1 w2 -> next (T bs (S3 w1 w2 x) (i+1))
181181
S3 w1 w2 w3 -> next (T bs (S4 w1 w2 w3 x) (i+1))
182182
S4 w1 w2 w3 w4 -> decodeError "streamUtf16BE" "UTF-16BE" onErr (Just w1)
183-
(T bs (S3 w2 w3 w4) (i+1))
183+
(T bs (S4 w2 w3 w4 x) (i+1))
184184
where x = B.unsafeIndex ps i
185185
consume (T Empty S0 _) = Done
186186
consume (T Empty _ i) = decodeError "streamUtf16BE" "UTF-16BE" onErr Nothing (T Empty S0 i)
@@ -224,7 +224,7 @@ streamUtf32BE onErr bs0 = Stream next (T bs0 S0 0) unknownSize
224224
S2 w1 w2 -> next (T bs (S3 w1 w2 x) (i+1))
225225
S3 w1 w2 w3 -> next (T bs (S4 w1 w2 w3 x) (i+1))
226226
S4 w1 w2 w3 w4 -> decodeError "streamUtf32BE" "UTF-32BE" onErr (Just w1)
227-
(T bs (S3 w2 w3 w4) (i+1))
227+
(T bs (S4 w2 w3 w4 x) (i+1))
228228
where x = B.unsafeIndex ps i
229229
consume (T Empty S0 _) = Done
230230
consume (T Empty _ i) = decodeError "streamUtf32BE" "UTF-32BE" onErr Nothing (T Empty S0 i)
@@ -268,7 +268,7 @@ streamUtf32LE onErr bs0 = Stream next (T bs0 S0 0) unknownSize
268268
S2 w1 w2 -> next (T bs (S3 w1 w2 x) (i+1))
269269
S3 w1 w2 w3 -> next (T bs (S4 w1 w2 w3 x) (i+1))
270270
S4 w1 w2 w3 w4 -> decodeError "streamUtf32LE" "UTF-32LE" onErr (Just w1)
271-
(T bs (S3 w2 w3 w4) (i+1))
271+
(T bs (S4 w2 w3 w4 x) (i+1))
272272
where x = B.unsafeIndex ps i
273273
consume (T Empty S0 _) = Done
274274
consume (T Empty _ i) = decodeError "streamUtf32LE" "UTF-32LE" onErr Nothing (T Empty S0 i)

tests/Tests/Regressions.hs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,16 @@ t525 = do
158158
LE.decodeUtf32BEWith E.lenientDecode "\0" @?= "\65533"
159159
LE.decodeUtf32LEWith E.lenientDecode "\0" @?= "\65533"
160160

161+
-- Stream decoders skip one invalid byte at a time
162+
t528 :: IO ()
163+
t528 = do
164+
let decodeUtf8With onErr bs = LF.unstream (E.streamUtf8 onErr bs)
165+
decodeUtf8With E.lenientDecode "\xC0\xF0\x90\x80\x80" @?= "\65533\65536"
166+
LE.decodeUtf16BEWith E.lenientDecode "\xD8\xD8\x00\xDC\x00" @?= "\65533\65536"
167+
LE.decodeUtf16LEWith E.lenientDecode "\xD8\xD8\x00\xD8\x00\xDC" @?= "\65533\65533\65536"
168+
LE.decodeUtf32BEWith E.lenientDecode "\xFF\x00\x00\x00\x00" @?= "\65533\0"
169+
LE.decodeUtf32LEWith E.lenientDecode "\x00\x00\xFF\x00\x00" @?= "\65533\65280"
170+
161171
tests :: F.TestTree
162172
tests = F.testGroup "Regressions"
163173
[ F.testCase "hGetContents_crash" hGetContents_crash
@@ -173,4 +183,5 @@ tests = F.testGroup "Regressions"
173183
, F.testCase "t301" t301
174184
, F.testCase "t330" t330
175185
, F.testCase "t525" t525
186+
, F.testCase "t528" t528
176187
]

0 commit comments

Comments
 (0)