@@ -4,7 +4,7 @@ import std::map;
4
4
import map:: hashmap;
5
5
import result;
6
6
7
- export rowreader, rowaccess , rowiter,
7
+ export rowreader, rowiter,
8
8
new_reader, new_reader_readlen,
9
9
hashmap_iter, hashmap_iter_full;
10
10
@@ -21,7 +21,7 @@ type rowreader = {
21
21
quote : char ,
22
22
f : io:: reader ,
23
23
mutable offset : uint ,
24
- mutable buffers : [ @ [ char ] ] ,
24
+ mutable buffers : [ [ char ] ] ,
25
25
mutable state : state ,
26
26
mutable trailing_nl : bool ,
27
27
mutable terminating : bool
@@ -33,7 +33,8 @@ type row = {
33
33
34
34
type bufferdescr = {
35
35
escaped : bool ,
36
- buffers : [ @[ char ] ] ,
36
+ sb : uint ,
37
+ eb : uint ,
37
38
start : uint ,
38
39
end : uint
39
40
} ;
@@ -44,26 +45,12 @@ enum fieldtype {
44
45
}
45
46
46
47
iface rowiter {
47
- fn readrow( ) -> result:: result<row, str >;
48
- }
49
-
50
- iface rowaccess {
51
- fn len( ) -> uint;
52
- fn getchars ( uint ) -> [ char ] ;
53
- fn getstr ( uint ) -> str ;
48
+ fn readrow( & row: [ str] ) -> bool;
54
49
}
55
50
56
51
fn new_reader ( +f : io:: reader , +delim : char , +quote : char ) -> rowreader {
57
52
{
58
- readlen: 1024 u,
59
- delim: delim,
60
- quote: quote,
61
- f: f,
62
- mutable offset : 0 u,
63
- mutable buffers : [ ] ,
64
- mutable state : fieldstart ( false ) ,
65
- mutable trailing_nl : false ,
66
- mutable terminating: false
53
+ new_reader_readlen ( f, delim, quote, 1024 u)
67
54
}
68
55
}
69
56
@@ -81,79 +68,8 @@ fn new_reader_readlen(+f: io::reader, +delim: char, +quote: char, rl: uint) -> r
81
68
}
82
69
}
83
70
84
- impl of rowaccess for row {
85
- fn len ( ) -> uint {
86
- vec:: len ( self . fields )
87
- }
88
- fn getchars ( field : uint ) -> [ char ] {
89
- fn unescape ( escaped : [ char ] ) -> [ char ] {
90
- let mut r : [ char ] = [ ] ;
91
- vec:: reserve ( r, vec:: len ( escaped) ) ;
92
- let mut in_q = false ;
93
- for c in escaped {
94
- if in_q {
95
- assert ( c == '"' ) ;
96
- in_q = false ;
97
- } else {
98
- in_q = c == '"' ;
99
- r += [ c] ;
100
- }
101
- }
102
- ret r;
103
- }
104
- alt self. fields [ field] {
105
- emptyfield ( ) { ret [ ] ; }
106
- bufferfield ( desc) {
107
- let mut buf = [ ] ;
108
- {
109
- let mut i = 0 u;
110
- while i < vec:: len ( desc. buffers ) {
111
- let from = if ( i == 0 u)
112
- { desc. start } else { 0 u } ;
113
- let to = if ( i == vec:: len ( desc. buffers ) - 1 u)
114
- { desc. end } else { vec:: len ( * desc. buffers [ i] ) } ;
115
- buf += vec:: slice ( * desc. buffers [ i] , from, to) ;
116
- i = i + 1 u;
117
- }
118
- }
119
- if field == self . len ( ) - 1 u {
120
- // there may be a trailing \r on the last field; we should strip it
121
- // if so. bodgy here but seems the most efficient place to deal with this
122
- if vec:: len ( buf) > 0 u {
123
- if buf[ vec:: len ( buf) -1 u] == '\r' {
124
- buf = vec:: slice ( buf, 0 u, vec:: len ( buf) -1 u) ;
125
- }
126
- }
127
- }
128
- if desc. escaped {
129
- buf = unescape ( buf) ;
130
- }
131
- ret buf;
132
- }
133
- } ;
134
- }
135
- fn getstr ( field : uint ) -> str {
136
- ret str:: from_chars ( self . getchars ( field) ) ;
137
- }
138
- fn getall ( ) -> [ str ] {
139
- let mut a = [ ] ;
140
- self . map ( ) { |s|
141
- a += [ s] ;
142
- }
143
- ret a;
144
- }
145
- fn map ( f : fn ( s : str ) ) {
146
- let mut i = 0 u;
147
- let len = self . len ( ) ;
148
- while i < len {
149
- f ( self . getstr ( i) ) ;
150
- i += 1 u;
151
- }
152
- }
153
- }
154
-
155
71
impl of rowiter for rowreader {
156
- fn readrow ( ) -> result :: result < row , str > {
72
+ fn readrow ( & row : [ str ] ) -> bool {
157
73
fn statestr ( state : state ) -> str {
158
74
alt state {
159
75
fieldstart( after_delim) {
@@ -174,28 +90,24 @@ impl of rowiter for rowreader {
174
90
fn new_bufferfield ( self : rowreader , escaped : bool , sb : uint , so : uint , eo : uint ) -> fieldtype {
175
91
let mut eb = vec:: len ( self . buffers ) - 1 u;
176
92
let mut sb = sb, so = so, eo = eo;
177
- //#debug("sb %u so %u eb %u eo %u", sb, so, eb, eo);
178
- //log(debug, vec::map(self.buffers) { |t| str::from_chars(*t) } );
179
- //log(debug, vec::map(self.buffers) { |t| vec::len(*t) });
180
93
if escaped {
181
94
so += 1 u;
182
- if so > vec:: len ( * self . buffers [ sb] ) {
95
+ if so > vec:: len ( self . buffers [ sb] ) {
183
96
sb += 1 u;
184
- so = vec:: len ( * self . buffers [ sb] ) - 1 u;
97
+ so = vec:: len ( self . buffers [ sb] ) - 1 u;
185
98
}
186
99
if eo > 0 u {
187
100
eo -= 1 u;
188
101
} else {
189
102
eb -= 1 u;
190
- eo = vec:: len ( * self . buffers [ eb] ) - 1 u;
103
+ eo = vec:: len ( self . buffers [ eb] ) - 1 u;
191
104
}
192
105
}
193
- //#debug("sb %u so %u eb %u eo %u", sb, so, eb, eo);
194
- bufferfield ( { escaped: escaped, buffers: vec:: slice ( self . buffers , sb, eb+1 u) , start: so, end: eo } )
106
+ bufferfield ( { escaped: escaped, sb: sb, eb: eb, start: so, end: eo } )
195
107
}
196
108
let cbuffer = vec:: len ( self . buffers ) - 1 u;
197
- let buf: @ [ char ] = self . buffers [ cbuffer] ;
198
- while self . offset < vec:: len ( * buf) {
109
+ let buf = self . buffers [ cbuffer] ;
110
+ while self . offset < vec:: len ( buf) {
199
111
let coffset = self . offset ;
200
112
let c : char = buf[ coffset] ;
201
113
#debug ( "got '%c' | %s" , c, statestr ( self . state ) ) ;
@@ -251,55 +163,97 @@ impl of rowiter for rowreader {
251
163
}
252
164
ret false;
253
165
}
254
-
255
166
self . state = fieldstart( false) ;
256
167
let mut do_read = vec:: len ( self . buffers ) == 0 u;
257
168
let mut fields = [ ] ;
258
169
259
170
while ! self. terminating {
260
171
if do_read {
261
- let mut data: @[ char ] = @self . f. read_chars( self . readlen) ;
262
- if vec:: len( * data) == 0 u {
172
+ let mut data = self . f. read_chars( self . readlen) ;
173
+ //log(error, ("aa", str::from_chars(data)));
174
+ if vec:: len( data) == 0 u {
263
175
if !self . trailing_nl {
264
176
self . terminating = true ;
265
- data = @ [ '\n' ] ;
177
+ data = [ '\n' ] ;
266
178
} else {
267
- ret result :: err ( "EOF" ) ;
179
+ ret false ;
268
180
}
269
- } else {
270
- self . trailing_nl = data[ vec:: len( * data) - 1 u] == '\n' ;
271
181
}
182
+ // this is horrible, but it avoids the whole parser needing
183
+ // to know about \r.
184
+ data = vec:: filter( data) { |c| c != '\r' } ;
185
+ let data_len = vec:: len( data) ;
186
+ if data_len == 0 u {
187
+ cont;
188
+ }
189
+ //log(error, ("here", str::from_chars(data)));
190
+ self . trailing_nl = data[ data_len - 1 u] == '\n' ;
272
191
self . buffers += [ data] ;
273
192
self . offset = 0 u;
274
193
}
275
194
276
195
if row_from_buf( self , fields) {
277
- let r: row = { fields: fields } ;
278
- fields = [ ] ;
196
+ let l = vec:: len( fields) ;
197
+ vec:: reserve( row, l) ;
198
+ row = vec:: map( fields) { |field|
199
+ fn unescape( escaped: [ char ] ) -> [ char ] {
200
+ let mut r : [ char ] = [ ] ;
201
+ vec:: reserve( r, vec:: len( escaped) ) ;
202
+ let mut in_q = false ;
203
+ for c in escaped {
204
+ if in_q {
205
+ assert( c == '"' ) ;
206
+ in_q = false ;
207
+ } else {
208
+ in_q = c == '"' ;
209
+ r += [ c] ;
210
+ }
211
+ }
212
+ ret r;
213
+ }
214
+ alt field {
215
+ emptyfield( ) { ret "" ; }
216
+ bufferfield( desc) {
217
+ let mut buf = [ ] ;
218
+ {
219
+ let mut i = desc. sb;
220
+ while i <= desc. eb {
221
+ let from = if ( i == desc. sb)
222
+ { desc. start } else { 0 u } ;
223
+ let to = if ( i == desc. eb)
224
+ { desc. end } else { vec:: len( self . buffers[ i] ) } ;
225
+ buf += vec:: slice( self . buffers[ i] , from, to) ;
226
+ i = i + 1 u;
227
+ }
228
+ }
229
+ if desc. escaped {
230
+ buf = unescape( buf) ;
231
+ }
232
+ ret str :: from_chars( buf) ;
233
+ }
234
+ } ;
235
+ } ;
279
236
if vec:: len( self . buffers) > 1 u {
280
237
self . buffers = vec:: slice( self . buffers, vec:: len( self . buffers) - 1 u, vec:: len( self . buffers) ) ;
281
238
}
282
- ret result:: ok( r) ;
239
+ fields = [ ] ;
240
+ ret true ;
283
241
}
284
242
do_read = true ;
285
243
}
286
- ret result :: err ( "unreachable ") ;
244
+ ret false ;
287
245
}
288
246
}
289
247
290
248
fn hashmap_iter_cols ( r : rowreader , cols : [ str ] , f : fn ( map:: hashmap < str , str > ) ) {
291
- loop {
292
- let res = r.readrow();
293
- if result::failure(res) {
294
- break;
295
- }
249
+ let mut fields : [ str ] = [ ] ;
250
+ while r. readrow ( fields) {
296
251
let m : map:: hashmap < str , str > = map:: str_hash ( ) ;
297
252
let mut col = 0 u;
298
- let row = result::get(res);
299
- if row.len() != vec::len(cols) {
253
+ if vec:: len ( fields) != vec:: len ( cols) {
300
254
cont; // FIXME: how to flag that we dropped a crazy row?
301
255
}
302
- result::get(res).map( ) { |s|
256
+ vec :: iter ( fields ) { |s|
303
257
m. insert ( cols[ col] , s) ;
304
258
col += 1 u;
305
259
} ;
@@ -310,70 +264,62 @@ fn hashmap_iter_cols(r: rowreader, cols: [str], f: fn(map::hashmap<str, str>)) {
310
264
// reads the first row as a header, to derive keys for a hashmap
311
265
// emitted for each subsequent row
312
266
fn hashmap_iter ( r : rowreader , f : fn ( map:: hashmap < str , str > ) ) {
313
- let res = r.readrow();
314
- alt res {
315
- result::ok(row) {
316
- hashmap_iter_cols(r, result::get(res).getall(), f);
317
- }
318
- result::err(_) { }
267
+ let mut row: [ str ] = [ ] ;
268
+ if r. readrow ( row) {
269
+ hashmap_iter_cols ( r, row, f) ;
319
270
}
320
271
}
321
272
322
273
// as hashmap_iter, but first apply 'hc' to each header; allows
323
274
// cleaning up headers; also allows verification that heads are
324
275
// satisfactory
325
276
fn hashmap_iter_full ( r : rowreader , hmap : fn ( & & h: str ) -> str , hver : fn ( cols : [ str ] ) -> bool , f : fn ( map:: hashmap < str , str > ) ) {
326
- let res = r.readrow();
327
- alt res {
328
- result::ok(row) {
329
- let cols : [str] = vec::map(result::get(res).getall(), hmap);
330
- if !hver(cols) {
331
- ret;
332
- }
333
- hashmap_iter_cols(r, cols, f);
277
+ let mut row: [ str ] = [ ] ;
278
+ if r. readrow ( row) {
279
+ let cols : [ str ] = vec:: map ( row, hmap) ;
280
+ if !hver ( cols) {
281
+ ret;
334
282
}
335
- result::err(_) { }
283
+ hashmap_iter_cols ( r , cols , f ) ;
336
284
}
337
285
}
338
286
339
287
#[ cfg( test) ]
340
288
mod test {
341
289
fn rowmatch ( testdata : str , expected : [ [ str ] ] ) {
342
- let chk = fn@(mk: fn(io::reader) -> rowreader) {
343
- let f = io::str_reader(testdata );
290
+ let chk = fn @( s : str , mk : fn ( io:: reader) -> rowreader) {
291
+ let f = io:: str_reader ( s ) ;
344
292
let r = mk ( f) ;
345
293
let mut i = 0 u;
346
294
loop {
347
- let res = r.readrow() ;
348
- if result::failure(res ) {
295
+ let mut row : [ str ] = [ ] ;
296
+ if !r . readrow ( row ) {
349
297
break ;
350
298
}
351
- let row = result::get(res);
352
299
let expect = expected[ i] ;
353
-
354
- assert(row.len() == vec::len(expect));
300
+ assert ( vec:: len ( row) == vec:: len ( expect) ) ;
355
301
let mut j = 0 u;
356
302
while j < row. len ( ) {
357
- assert(row.getstr(j) == expect[j]);
303
+ assert ( row[ j ] == expect[ j] ) ;
358
304
j += 1 u;
359
305
}
360
306
i += 1 u;
361
307
}
362
308
assert ( i == vec:: len ( expected) ) ;
363
309
} ;
364
- let runchecks = fn@(testdata : str) {
310
+ let runchecks = fn @( s : str) {
365
311
// test default reader params
366
- chk() { |inp|
312
+ chk ( s ) { |inp|
367
313
new_reader_readlen ( inp, ',' , '"' , 2 u)
368
314
} ;
369
315
// test default constructor
370
- chk ( ) { |inp|
316
+ chk ( s ) { |inp|
371
317
new_reader ( inp, ',' , '"' )
372
318
} ;
373
319
// test continuations over read buffers
374
320
let mut j = 1 u;
375
- while j < str:: len ( testdata ) {
376
- chk ( ) { |inp|
321
+ while j < str :: len( s ) {
322
+ chk( s ) { |inp|
377
323
new_reader_readlen( inp, ',' , '"' , j)
378
324
} ;
379
325
j += 1 u;
@@ -383,8 +329,11 @@ mod test {
383
329
// so we can test trailing newline case, testdata
384
330
// must not end in \n - leave off the last newline
385
331
runchecks ( testdata) ;
386
- runchecks ( testdata+"\n " ) ;
387
332
runchecks ( str:: replace ( testdata, "\n " , "\r \n " ) ) ;
333
+ if !str:: ends_with ( testdata, "\n " ) {
334
+ runchecks ( testdata+"\n " ) ;
335
+ runchecks ( str:: replace ( testdata+"\n " , "\n " , "\r \n " ) ) ;
336
+ }
388
337
}
389
338
390
339
#[ test]
0 commit comments