Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix handling of invalid bytes in stream decoders #528

Merged
merged 1 commit into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/Data/Text/Internal/Lazy/Encoding/Fusion.hs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ streamUtf8 onErr bs0 = Stream next (T bs0 S0 0) unknownSize
S2 a b -> next (T bs (S3 a b x) (i+1))
S3 a b c -> next (T bs (S4 a b c x) (i+1))
S4 a b c d -> decodeError "streamUtf8" "UTF-8" onErr (Just a)
(T bs (S3 b c d) (i+1))
(T bs (S4 b c d x) (i+1))
where x = B.unsafeIndex ps i
consume (T Empty S0 _) = Done
consume (T Empty _ i) = decodeError "streamUtf8" "UTF-8" onErr Nothing (T Empty S0 i)
Expand Down Expand Up @@ -140,7 +140,7 @@ streamUtf16LE onErr bs0 = Stream next (T bs0 S0 0) unknownSize
S2 w1 w2 -> next (T bs (S3 w1 w2 x) (i+1))
S3 w1 w2 w3 -> next (T bs (S4 w1 w2 w3 x) (i+1))
S4 w1 w2 w3 w4 -> decodeError "streamUtf16LE" "UTF-16LE" onErr (Just w1)
(T bs (S3 w2 w3 w4) (i+1))
(T bs (S4 w2 w3 w4 x) (i+1))
where x = B.unsafeIndex ps i
consume (T Empty S0 _) = Done
consume (T Empty _ i) = decodeError "streamUtf16LE" "UTF-16LE" onErr Nothing (T Empty S0 i)
Expand Down Expand Up @@ -180,7 +180,7 @@ streamUtf16BE onErr bs0 = Stream next (T bs0 S0 0) unknownSize
S2 w1 w2 -> next (T bs (S3 w1 w2 x) (i+1))
S3 w1 w2 w3 -> next (T bs (S4 w1 w2 w3 x) (i+1))
S4 w1 w2 w3 w4 -> decodeError "streamUtf16BE" "UTF-16BE" onErr (Just w1)
(T bs (S3 w2 w3 w4) (i+1))
(T bs (S4 w2 w3 w4 x) (i+1))
where x = B.unsafeIndex ps i
consume (T Empty S0 _) = Done
consume (T Empty _ i) = decodeError "streamUtf16BE" "UTF-16BE" onErr Nothing (T Empty S0 i)
Expand Down Expand Up @@ -224,7 +224,7 @@ streamUtf32BE onErr bs0 = Stream next (T bs0 S0 0) unknownSize
S2 w1 w2 -> next (T bs (S3 w1 w2 x) (i+1))
S3 w1 w2 w3 -> next (T bs (S4 w1 w2 w3 x) (i+1))
S4 w1 w2 w3 w4 -> decodeError "streamUtf32BE" "UTF-32BE" onErr (Just w1)
(T bs (S3 w2 w3 w4) (i+1))
(T bs (S4 w2 w3 w4 x) (i+1))
where x = B.unsafeIndex ps i
consume (T Empty S0 _) = Done
consume (T Empty _ i) = decodeError "streamUtf32BE" "UTF-32BE" onErr Nothing (T Empty S0 i)
Expand Down Expand Up @@ -268,7 +268,7 @@ streamUtf32LE onErr bs0 = Stream next (T bs0 S0 0) unknownSize
S2 w1 w2 -> next (T bs (S3 w1 w2 x) (i+1))
S3 w1 w2 w3 -> next (T bs (S4 w1 w2 w3 x) (i+1))
S4 w1 w2 w3 w4 -> decodeError "streamUtf32LE" "UTF-32LE" onErr (Just w1)
(T bs (S3 w2 w3 w4) (i+1))
(T bs (S4 w2 w3 w4 x) (i+1))
where x = B.unsafeIndex ps i
consume (T Empty S0 _) = Done
consume (T Empty _ i) = decodeError "streamUtf32LE" "UTF-32LE" onErr Nothing (T Empty S0 i)
Expand Down
11 changes: 11 additions & 0 deletions tests/Tests/Regressions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,16 @@ t525 = do
LE.decodeUtf32BEWith E.lenientDecode "\0" @?= "\65533"
LE.decodeUtf32LEWith E.lenientDecode "\0" @?= "\65533"

-- Stream decoders skip one invalid byte at a time
t528 :: IO ()
t528 = do
let decodeUtf8With onErr bs = LF.unstream (E.streamUtf8 onErr bs)
decodeUtf8With E.lenientDecode "\xC0\xF0\x90\x80\x80" @?= "\65533\65536"
LE.decodeUtf16BEWith E.lenientDecode "\xD8\xD8\x00\xDC\x00" @?= "\65533\65536"
LE.decodeUtf16LEWith E.lenientDecode "\xD8\xD8\x00\xD8\x00\xDC" @?= "\65533\65533\65536"
LE.decodeUtf32BEWith E.lenientDecode "\xFF\x00\x00\x00\x00" @?= "\65533\0"
LE.decodeUtf32LEWith E.lenientDecode "\x00\x00\xFF\x00\x00" @?= "\65533\65280"

t529 :: IO ()
t529 = do
let decode = TE.decodeUtf8With E.lenientDecode
Expand All @@ -181,5 +191,6 @@ tests = F.testGroup "Regressions"
, F.testCase "t301" t301
, F.testCase "t330" t330
, F.testCase "t525" t525
, F.testCase "t528" t528
, F.testCase "t529" t529
]
Loading