@@ -50,6 +50,7 @@ type reader =
50
50
fn unread_byte ( int ) ;
51
51
fn read_bytes ( uint ) -> [ u8 ] ;
52
52
fn read_char ( ) -> char ;
53
+ fn read_chars ( uint ) -> [ char ] ;
53
54
fn eof ( ) -> bool ;
54
55
fn read_line ( ) -> str ;
55
56
fn read_c_str ( ) -> str ;
@@ -101,29 +102,71 @@ obj new_reader(rdr: buf_reader) {
101
102
fn read_byte ( ) -> int { ret rdr. read_byte ( ) ; }
102
103
fn unread_byte( byte: int) { ret rdr. unread_byte ( byte) ; }
103
104
fn read_bytes ( len : uint ) -> [ u8 ] { ret rdr. read ( len) ; }
105
+ fn read_chars ( n : uint ) -> [ char ] {
106
+ // returns the (consumed offset, n_req), appends characters to &chars
107
+ fn chars_from_buf ( buf : [ u8 ] , & chars: [ char ] ) -> ( uint , uint ) {
108
+ let i = 0 u;
109
+ while i < vec:: len ( buf) {
110
+ let b0 = buf[ i] ;
111
+ let w = str:: utf8_char_width ( b0) ;
112
+ let end = i + w;
113
+ i += 1 u;
114
+ assert ( w > 0 u) ;
115
+ if w == 1 u {
116
+ chars += [ b0 as char ] ;
117
+ cont;
118
+ }
119
+ // can't satisfy this char with the existing data
120
+ if end > vec:: len ( buf) {
121
+ ret ( i - 1 u, end - vec:: len ( buf) ) ;
122
+ }
123
+ let val = 0 u;
124
+ while i < end {
125
+ let next = buf[ i] as int ;
126
+ i += 1 u;
127
+ assert ( next > -1 ) ;
128
+ assert ( next & 192 == 128 ) ;
129
+ val <<= 6 u;
130
+ val += next & 63 as uint ;
131
+ }
132
+ // See str::char_at
133
+ val += ( b0 << ( w + 1 u as u8 ) as uint )
134
+ << ( w - 1 u) * 6 u - w - 1 u;
135
+ chars += [ val as char ] ;
136
+ }
137
+ ret ( i, 0 u) ;
138
+ }
139
+ let buf: [ u8 ] = [ ] ;
140
+ let chars: [ char ] = [ ] ;
141
+ // might need more bytes, but reading n will never over-read
142
+ let nbread = n;
143
+ while nbread > 0 u {
144
+ let data = self . read_bytes ( nbread) ;
145
+ if vec:: len ( data) == 0 u {
146
+ // eof - FIXME should we do something if
147
+ // we're split in a unicode char?
148
+ break ;
149
+ }
150
+ buf += data;
151
+ let ( offset, nbreq) = chars_from_buf ( buf, chars) ;
152
+ let ncreq = n - vec:: len ( chars) ;
153
+ // again we either know we need a certain number of bytes
154
+ // to complete a character, or we make sure we don't
155
+ // over-read by reading 1-byte per char needed
156
+ nbread = if ncreq > nbreq { ncreq } else { nbreq } ;
157
+ if nbread > 0 u {
158
+ buf = vec:: slice ( buf, offset, vec:: len ( buf) ) ;
159
+ }
160
+ }
161
+ ret chars;
162
+ }
104
163
fn read_char ( ) -> char {
105
- let c0 = rdr . read_byte ( ) ;
106
- if c0 == - 1 {
164
+ let c = self . read_chars ( 1 u ) ;
165
+ if vec :: len ( c ) == 0 u {
107
166
ret -1 as char ; // FIXME will this stay valid?
108
-
109
167
}
110
- let b0 = c0 as u8 ;
111
- let w = str:: utf8_char_width ( b0) ;
112
- assert ( w > 0 u) ;
113
- if w == 1 u { ret b0 as char ; }
114
- let val = 0 u;
115
- while w > 1 u {
116
- w -= 1 u;
117
- let next = rdr. read_byte ( ) ;
118
- assert ( next > -1 ) ;
119
- assert ( next & 192 == 128 ) ;
120
- val <<= 6 u;
121
- val += next & 63 as uint ;
122
- }
123
- // See str::char_at
124
-
125
- val += ( b0 << ( w + 1 u as u8 ) as uint ) << ( w - 1 u) * 6 u - w - 1 u;
126
- ret val as char ;
168
+ assert ( vec:: len ( c) == 1 u) ;
169
+ ret c[ 0 ] ;
127
170
}
128
171
fn eof ( ) -> bool { ret rdr. eof ( ) ; }
129
172
fn read_line ( ) -> str {
0 commit comments