Skip to content

Commit ffe1a07

Browse files
read(io, Char): fix read with too many leading ones (#50552)
Fixes #50532. The `read(io, Char)` method didn't correctly handle the case where the lead byte starts with too many leading ones; this fix makes it handle that case correctly, which makes `read(io, Char)` match `collect(s)` in its interpretation of what a character is in all invalid cases. Also fix and test `read(::File, Char)` which has the same bug.
1 parent 7141e73 commit ffe1a07

File tree

3 files changed

+34
-5
lines changed

3 files changed

+34
-5
lines changed

base/filesystem.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,11 +200,12 @@ end
200200

201201
function read(f::File, ::Type{Char})
202202
b0 = read(f, UInt8)
203-
l = 8 * (4 - leading_ones(b0))
203+
l = 0x08 * (0x04 - UInt8(leading_ones(b0)))
204204
c = UInt32(b0) << 24
205-
if l < 24
205+
if l 0x10
206206
s = 16
207207
while s l && !eof(f)
208+
# this works around lack of peek(::File)
208209
p = position(f)
209210
b = read(f, UInt8)
210211
if b & 0xc0 != 0x80

base/io.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -884,9 +884,9 @@ end
884884

885885
function read(io::IO, ::Type{Char})
886886
b0 = read(io, UInt8)::UInt8
887-
l = 8(4-leading_ones(b0))
887+
l = 0x08 * (0x04 - UInt8(leading_ones(b0)))
888888
c = UInt32(b0) << 24
889-
if l < 24
889+
if l 0x10
890890
s = 16
891891
while s l && !eof(io)::Bool
892892
peek(io) & 0xc0 == 0x80 || break

test/char.jl

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

33
@testset "basic properties" begin
4-
54
@test typemax(Char) == reinterpret(Char, typemax(UInt32))
65
@test typemin(Char) == Char(0)
76
@test typemax(Char) == reinterpret(Char, 0xffffffff)
@@ -214,6 +213,35 @@ end
214213
end
215214
end
216215

216+
# issue #50532
217+
@testset "invalid read(io, Char)" begin
218+
# byte values with different numbers of leading bits
219+
B = UInt8[
220+
0x3f, 0x4d, 0x52, 0x63, 0x81, 0x83, 0x89, 0xb6,
221+
0xc0, 0xc8, 0xd3, 0xe3, 0xea, 0xeb, 0xf0, 0xf2,
222+
0xf4, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
223+
]
224+
f = tempname()
225+
for b1 in B, b2 in B, t = 0:3
226+
bytes = [b1, b2]
227+
append!(bytes, rand(B, t))
228+
s = String(bytes)
229+
write(f, s)
230+
@test s == read(f, String)
231+
chars = collect(s)
232+
ios = [IOBuffer(s), open(f), Base.Filesystem.open(f, 0)]
233+
for io in ios
234+
chars′ = Char[]
235+
while !eof(io)
236+
push!(chars′, read(io, Char))
237+
end
238+
@test chars == chars′
239+
close(io)
240+
end
241+
end
242+
rm(f)
243+
end
244+
217245
@testset "overlong codes" begin
218246
function test_overlong(c::Char, n::Integer, rep::String)
219247
if isvalid(c)

0 commit comments

Comments
 (0)