Skip to content

Commit eea5be3

Browse files
author
Grahame Bowland
committed
fix test, change interface significantly
1 parent 645824e commit eea5be3

File tree

1 file changed

+98
-149
lines changed

1 file changed

+98
-149
lines changed

csv.rs

Lines changed: 98 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import std::map;
44
import map::hashmap;
55
import result;
66

7-
export rowreader, rowaccess, rowiter,
7+
export rowreader, rowiter,
88
new_reader, new_reader_readlen,
99
hashmap_iter, hashmap_iter_full;
1010

@@ -21,7 +21,7 @@ type rowreader = {
2121
quote: char,
2222
f : io::reader,
2323
mutable offset : uint,
24-
mutable buffers : [@[char]],
24+
mutable buffers : [[char]],
2525
mutable state : state,
2626
mutable trailing_nl : bool,
2727
mutable terminating : bool
@@ -33,7 +33,8 @@ type row = {
3333

3434
type bufferdescr = {
3535
escaped: bool,
36-
buffers: [@[char]],
36+
sb: uint,
37+
eb: uint,
3738
start: uint,
3839
end: uint
3940
};
@@ -44,26 +45,12 @@ enum fieldtype {
4445
}
4546

4647
iface rowiter {
47-
fn readrow() -> result::result<row, str>;
48-
}
49-
50-
iface rowaccess {
51-
fn len() -> uint;
52-
fn getchars(uint) -> [char];
53-
fn getstr(uint) -> str;
48+
fn readrow(&row: [str]) -> bool;
5449
}
5550

5651
fn new_reader(+f: io::reader, +delim: char, +quote: char) -> rowreader {
5752
{
58-
readlen: 1024u,
59-
delim: delim,
60-
quote: quote,
61-
f: f,
62-
mutable offset : 0u,
63-
mutable buffers : [],
64-
mutable state : fieldstart(false),
65-
mutable trailing_nl : false,
66-
mutable terminating: false
53+
new_reader_readlen(f, delim, quote, 1024u)
6754
}
6855
}
6956

@@ -81,79 +68,8 @@ fn new_reader_readlen(+f: io::reader, +delim: char, +quote: char, rl: uint) -> r
8168
}
8269
}
8370

84-
impl of rowaccess for row {
85-
fn len() -> uint {
86-
vec::len(self.fields)
87-
}
88-
fn getchars(field: uint) -> [char] {
89-
fn unescape(escaped: [char]) -> [char] {
90-
let mut r : [char] = [];
91-
vec::reserve(r, vec::len(escaped));
92-
let mut in_q = false;
93-
for c in escaped {
94-
if in_q {
95-
assert(c == '"');
96-
in_q = false;
97-
} else {
98-
in_q = c == '"';
99-
r += [c];
100-
}
101-
}
102-
ret r;
103-
}
104-
alt self.fields[field] {
105-
emptyfield() { ret []; }
106-
bufferfield(desc) {
107-
let mut buf = [];
108-
{
109-
let mut i = 0u;
110-
while i < vec::len(desc.buffers) {
111-
let from = if (i == 0u)
112-
{ desc.start } else { 0u };
113-
let to = if (i == vec::len(desc.buffers) - 1u)
114-
{ desc.end } else { vec::len(*desc.buffers[i]) };
115-
buf += vec::slice(*desc.buffers[i], from, to);
116-
i = i + 1u;
117-
}
118-
}
119-
if field == self.len() - 1u {
120-
// there may be a trailing \r on the last field; we should strip it
121-
// if so. bodgy here but seems the most efficient place to deal with this
122-
if vec::len(buf) > 0u {
123-
if buf[vec::len(buf)-1u] == '\r' {
124-
buf = vec::slice(buf, 0u, vec::len(buf)-1u);
125-
}
126-
}
127-
}
128-
if desc.escaped {
129-
buf = unescape(buf);
130-
}
131-
ret buf;
132-
}
133-
};
134-
}
135-
fn getstr(field: uint) -> str {
136-
ret str::from_chars(self.getchars(field));
137-
}
138-
fn getall() -> [str] {
139-
let mut a = [];
140-
self.map() { |s|
141-
a += [s];
142-
}
143-
ret a;
144-
}
145-
fn map(f: fn(s: str)) {
146-
let mut i = 0u;
147-
let len = self.len();
148-
while i < len {
149-
f(self.getstr(i));
150-
i += 1u;
151-
}
152-
}
153-
}
154-
15571
impl of rowiter for rowreader {
156-
fn readrow() -> result::result<row, str> {
72+
fn readrow(&row: [str]) -> bool {
15773
fn statestr(state: state) -> str {
15874
alt state {
15975
fieldstart(after_delim) {
@@ -174,28 +90,24 @@ impl of rowiter for rowreader {
17490
fn new_bufferfield(self: rowreader, escaped: bool, sb: uint, so: uint, eo: uint) -> fieldtype {
17591
let mut eb = vec::len(self.buffers) - 1u;
17692
let mut sb = sb, so = so, eo = eo;
177-
//#debug("sb %u so %u eb %u eo %u", sb, so, eb, eo);
178-
//log(debug, vec::map(self.buffers) { |t| str::from_chars(*t) } );
179-
//log(debug, vec::map(self.buffers) { |t| vec::len(*t) });
18093
if escaped {
18194
so += 1u;
182-
if so > vec::len(*self.buffers[sb]) {
95+
if so > vec::len(self.buffers[sb]) {
18396
sb += 1u;
184-
so = vec::len(*self.buffers[sb]) - 1u;
97+
so = vec::len(self.buffers[sb]) - 1u;
18598
}
18699
if eo > 0u {
187100
eo -= 1u;
188101
} else {
189102
eb -= 1u;
190-
eo = vec::len(*self.buffers[eb]) - 1u;
103+
eo = vec::len(self.buffers[eb]) - 1u;
191104
}
192105
}
193-
//#debug("sb %u so %u eb %u eo %u", sb, so, eb, eo);
194-
bufferfield({ escaped: escaped, buffers: vec::slice(self.buffers, sb, eb+1u), start: so, end: eo })
106+
bufferfield({ escaped: escaped, sb: sb, eb: eb, start: so, end: eo })
195107
}
196108
let cbuffer = vec::len(self.buffers) - 1u;
197-
let buf: @[char] = self.buffers[cbuffer];
198-
while self.offset < vec::len(*buf) {
109+
let buf = self.buffers[cbuffer];
110+
while self.offset < vec::len(buf) {
199111
let coffset = self.offset;
200112
let c : char = buf[coffset];
201113
#debug("got '%c' | %s", c, statestr(self.state));
@@ -251,55 +163,97 @@ impl of rowiter for rowreader {
251163
}
252164
ret false;
253165
}
254-
255166
self.state = fieldstart(false);
256167
let mut do_read = vec::len(self.buffers) == 0u;
257168
let mut fields = [];
258169

259170
while !self.terminating {
260171
if do_read {
261-
let mut data: @[char] = @self.f.read_chars(self.readlen);
262-
if vec::len(*data) == 0u {
172+
let mut data = self.f.read_chars(self.readlen);
173+
//log(error, ("aa", str::from_chars(data)));
174+
if vec::len(data) == 0u {
263175
if !self.trailing_nl {
264176
self.terminating = true;
265-
data = @['\n'];
177+
data = ['\n'];
266178
} else {
267-
ret result::err("EOF");
179+
ret false;
268180
}
269-
} else {
270-
self.trailing_nl = data[vec::len(*data) - 1u] == '\n';
271181
}
182+
// this is horrible, but it avoids the whole parser needing
183+
// to know about \r.
184+
data = vec::filter(data) { |c| c != '\r' };
185+
let data_len = vec::len(data);
186+
if data_len == 0u {
187+
cont;
188+
}
189+
//log(error, ("here", str::from_chars(data)));
190+
self.trailing_nl = data[data_len - 1u] == '\n';
272191
self.buffers += [data];
273192
self.offset = 0u;
274193
}
275194

276195
if row_from_buf(self, fields) {
277-
let r: row = { fields: fields };
278-
fields = [];
196+
let l = vec::len(fields);
197+
vec::reserve(row, l);
198+
row = vec::map(fields) { |field|
199+
fn unescape(escaped: [char]) -> [char] {
200+
let mut r : [char] = [];
201+
vec::reserve(r, vec::len(escaped));
202+
let mut in_q = false;
203+
for c in escaped {
204+
if in_q {
205+
assert(c == '"');
206+
in_q = false;
207+
} else {
208+
in_q = c == '"';
209+
r += [c];
210+
}
211+
}
212+
ret r;
213+
}
214+
alt field {
215+
emptyfield() { ret ""; }
216+
bufferfield(desc) {
217+
let mut buf = [];
218+
{
219+
let mut i = desc.sb;
220+
while i <= desc.eb {
221+
let from = if (i == desc.sb)
222+
{ desc.start } else { 0u };
223+
let to = if (i == desc.eb)
224+
{ desc.end } else { vec::len(self.buffers[i]) };
225+
buf += vec::slice(self.buffers[i], from, to);
226+
i = i + 1u;
227+
}
228+
}
229+
if desc.escaped {
230+
buf = unescape(buf);
231+
}
232+
ret str::from_chars(buf);
233+
}
234+
};
235+
};
279236
if vec::len(self.buffers) > 1u {
280237
self.buffers = vec::slice(self.buffers, vec::len(self.buffers) - 1u, vec::len(self.buffers));
281238
}
282-
ret result::ok(r);
239+
fields = [];
240+
ret true;
283241
}
284242
do_read = true;
285243
}
286-
ret result::err("unreachable");
244+
ret false;
287245
}
288246
}
289247

290248
fn hashmap_iter_cols(r: rowreader, cols: [str], f: fn(map::hashmap<str, str>)) {
291-
loop {
292-
let res = r.readrow();
293-
if result::failure(res) {
294-
break;
295-
}
249+
let mut fields : [str] = [];
250+
while r.readrow(fields) {
296251
let m : map::hashmap<str, str> = map::str_hash();
297252
let mut col = 0u;
298-
let row = result::get(res);
299-
if row.len() != vec::len(cols) {
253+
if vec::len(fields) != vec::len(cols) {
300254
cont; // FIXME: how to flag that we dropped a crazy row?
301255
}
302-
result::get(res).map() { |s|
256+
vec::iter(fields) { |s|
303257
m.insert(cols[col], s);
304258
col += 1u;
305259
};
@@ -310,70 +264,62 @@ fn hashmap_iter_cols(r: rowreader, cols: [str], f: fn(map::hashmap<str, str>)) {
310264
// reads the first row as a header, to derive keys for a hashmap
311265
// emitted for each subsequent row
312266
fn hashmap_iter(r: rowreader, f: fn(map::hashmap<str, str>)) {
313-
let res = r.readrow();
314-
alt res {
315-
result::ok(row) {
316-
hashmap_iter_cols(r, result::get(res).getall(), f);
317-
}
318-
result::err(_) { }
267+
let mut row: [str] = [];
268+
if r.readrow(row) {
269+
hashmap_iter_cols(r, row, f);
319270
}
320271
}
321272

322273
// as hashmap_iter, but first apply 'hc' to each header; allows
323274
// cleaning up headers; also allows verification that heads are
324275
// satisfactory
325276
fn hashmap_iter_full(r: rowreader, hmap: fn(&&h: str) -> str, hver: fn(cols: [str]) -> bool, f: fn(map::hashmap<str, str>)) {
326-
let res = r.readrow();
327-
alt res {
328-
result::ok(row) {
329-
let cols : [str] = vec::map(result::get(res).getall(), hmap);
330-
if !hver(cols) {
331-
ret;
332-
}
333-
hashmap_iter_cols(r, cols, f);
277+
let mut row: [str] = [];
278+
if r.readrow(row) {
279+
let cols : [str] = vec::map(row, hmap);
280+
if !hver(cols) {
281+
ret;
334282
}
335-
result::err(_) { }
283+
hashmap_iter_cols(r, cols, f);
336284
}
337285
}
338286

339287
#[cfg(test)]
340288
mod test {
341289
fn rowmatch(testdata: str, expected: [[str]]) {
342-
let chk = fn@(mk: fn(io::reader) -> rowreader) {
343-
let f = io::str_reader(testdata);
290+
let chk = fn@(s: str, mk: fn(io::reader) -> rowreader) {
291+
let f = io::str_reader(s);
344292
let r = mk(f);
345293
let mut i = 0u;
346294
loop {
347-
let res = r.readrow();
348-
if result::failure(res) {
295+
let mut row: [str] = [];
296+
if !r.readrow(row) {
349297
break;
350298
}
351-
let row = result::get(res);
352299
let expect = expected[i];
353-
354-
assert(row.len() == vec::len(expect));
300+
assert(vec::len(row) == vec::len(expect));
355301
let mut j = 0u;
356302
while j < row.len() {
357-
assert(row.getstr(j) == expect[j]);
303+
assert(row[j] == expect[j]);
358304
j += 1u;
359305
}
360306
i += 1u;
361307
}
362308
assert(i == vec::len(expected));
363309
};
364-
let runchecks = fn@(testdata: str) {
310+
let runchecks = fn@(s: str) {
365311
// test default reader params
366-
chk() { |inp|
312+
chk(s) { |inp|
367313
new_reader_readlen(inp, ',', '"', 2u)
368314
};
369315
// test default constructor
370-
chk() { |inp|
316+
chk(s) { |inp|
371317
new_reader(inp, ',', '"')
372318
};
373319
// test continuations over read buffers
374320
let mut j = 1u;
375-
while j < str::len(testdata) {
376-
chk() { |inp|
321+
while j < str::len(s) {
322+
chk(s) { |inp|
377323
new_reader_readlen(inp, ',', '"', j)
378324
};
379325
j += 1u;
@@ -383,8 +329,11 @@ mod test {
383329
// so we can test trailing newline case, testdata
384330
// must not end in \n - leave off the last newline
385331
runchecks(testdata);
386-
runchecks(testdata+"\n");
387332
runchecks(str::replace(testdata, "\n", "\r\n"));
333+
if !str::ends_with(testdata, "\n") {
334+
runchecks(testdata+"\n");
335+
runchecks(str::replace(testdata+"\n", "\n", "\r\n"));
336+
}
388337
}
389338

390339
#[test]

0 commit comments

Comments
 (0)