Skip to content

Commit fbad020

Browse files
committed
Merge pull request #1470 from grahame/grahame
add new read_chars method, fix bug in read_char
2 parents 6b20e8c + ba69477 commit fbad020

File tree

2 files changed

+111
-20
lines changed

2 files changed

+111
-20
lines changed

src/libstd/io.rs

Lines changed: 63 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ type reader =
5050
fn unread_byte(int);
5151
fn read_bytes(uint) -> [u8];
5252
fn read_char() -> char;
53+
fn read_chars(uint) -> [char];
5354
fn eof() -> bool;
5455
fn read_line() -> str;
5556
fn read_c_str() -> str;
@@ -101,29 +102,71 @@ obj new_reader(rdr: buf_reader) {
101102
fn read_byte() -> int { ret rdr.read_byte(); }
102103
fn unread_byte(byte: int) { ret rdr.unread_byte(byte); }
103104
fn read_bytes(len: uint) -> [u8] { ret rdr.read(len); }
105+
fn read_chars(n: uint) -> [char] {
106+
// returns the (consumed offset, n_req), appends characters to &chars
107+
fn chars_from_buf(buf: [u8], &chars: [char]) -> (uint, uint) {
108+
let i = 0u;
109+
while i < vec::len(buf) {
110+
let b0 = buf[i];
111+
let w = str::utf8_char_width(b0);
112+
let end = i + w;
113+
i += 1u;
114+
assert (w > 0u);
115+
if w == 1u {
116+
chars += [ b0 as char ];
117+
cont;
118+
}
119+
// can't satisfy this char with the existing data
120+
if end > vec::len(buf) {
121+
ret (i - 1u, end - vec::len(buf));
122+
}
123+
let val = 0u;
124+
while i < end {
125+
let next = buf[i] as int;
126+
i += 1u;
127+
assert (next > -1);
128+
assert (next & 192 == 128);
129+
val <<= 6u;
130+
val += next & 63 as uint;
131+
}
132+
// See str::char_at
133+
val += (b0 << (w + 1u as u8) as uint)
134+
<< (w - 1u) * 6u - w - 1u;
135+
chars += [ val as char ];
136+
}
137+
ret (i, 0u);
138+
}
139+
let buf: [u8] = [];
140+
let chars: [char] = [];
141+
// might need more bytes, but reading n will never over-read
142+
let nbread = n;
143+
while nbread > 0u {
144+
let data = self.read_bytes(nbread);
145+
if vec::len(data) == 0u {
146+
// eof - FIXME should we do something if
147+
// we're split in a unicode char?
148+
break;
149+
}
150+
buf += data;
151+
let (offset, nbreq) = chars_from_buf(buf, chars);
152+
let ncreq = n - vec::len(chars);
153+
// again we either know we need a certain number of bytes
154+
// to complete a character, or we make sure we don't
155+
// over-read by reading 1-byte per char needed
156+
nbread = if ncreq > nbreq { ncreq } else { nbreq };
157+
if nbread > 0u {
158+
buf = vec::slice(buf, offset, vec::len(buf));
159+
}
160+
}
161+
ret chars;
162+
}
104163
fn read_char() -> char {
105-
let c0 = rdr.read_byte();
106-
if c0 == -1 {
164+
let c = self.read_chars(1u);
165+
if vec::len(c) == 0u {
107166
ret -1 as char; // FIXME will this stay valid?
108-
109167
}
110-
let b0 = c0 as u8;
111-
let w = str::utf8_char_width(b0);
112-
assert (w > 0u);
113-
if w == 1u { ret b0 as char; }
114-
let val = 0u;
115-
while w > 1u {
116-
w -= 1u;
117-
let next = rdr.read_byte();
118-
assert (next > -1);
119-
assert (next & 192 == 128);
120-
val <<= 6u;
121-
val += next & 63 as uint;
122-
}
123-
// See str::char_at
124-
125-
val += (b0 << (w + 1u as u8) as uint) << (w - 1u) * 6u - w - 1u;
126-
ret val as char;
168+
assert(vec::len(c) == 1u);
169+
ret c[0];
127170
}
128171
fn eof() -> bool { ret rdr.eof(); }
129172
fn read_line() -> str {

src/test/stdtest/io.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,54 @@ fn test_simple() {
2323
assert (str::eq(frood, frood2));
2424
}
2525

26+
#[test]
27+
fn test_readchars_empty() {
28+
let inp : io::reader = io::string_reader("");
29+
let res : [char] = inp.read_chars(128u);
30+
assert(vec::len(res) == 0u);
31+
}
32+
33+
#[test]
34+
fn test_readchars_wide() {
35+
let wide_test = "生锈的汤匙切肉汤hello生锈的汤匙切肉汤";
36+
let ivals : [int] = [
37+
29983, 38152, 30340, 27748,
38+
21273, 20999, 32905, 27748,
39+
104, 101, 108, 108, 111,
40+
29983, 38152, 30340, 27748,
41+
21273, 20999, 32905, 27748];
42+
fn check_read_ln(len : uint, s: str, ivals: [int]) {
43+
let inp : io::reader = io::string_reader(s);
44+
let res : [char] = inp.read_chars(len);
45+
if (len <= vec::len(ivals)) {
46+
assert(vec::len(res) == len);
47+
}
48+
assert(vec::slice(ivals, 0u, vec::len(res)) ==
49+
vec::map(res, {|x| x as int}));
50+
}
51+
let i = 0u;
52+
while i < 8u {
53+
check_read_ln(i, wide_test, ivals);
54+
i += 1u;
55+
}
56+
// check a long read for good measure
57+
check_read_ln(128u, wide_test, ivals);
58+
}
59+
60+
#[test]
61+
fn test_readchar() {
62+
let inp : io::reader = io::string_reader("生");
63+
let res : char = inp.read_char();
64+
assert(res as int == 29983);
65+
}
66+
67+
#[test]
68+
fn test_readchar_empty() {
69+
let inp : io::reader = io::string_reader("");
70+
let res : char = inp.read_char();
71+
assert(res as int == -1);
72+
}
73+
2674
#[test]
2775
fn file_reader_not_exist() {
2876
alt io::file_reader("not a file") {

0 commit comments

Comments
 (0)