Skip to content

Commit fbefa03

Browse files
committed
input/csv: improve reliabilty of text line isolation
Slightly unobfuscate the "end of current input chunk" marker in the data processing loop. Make the variable's identifier reflect that it's not a temporary, but instead something worth keeping around until needed again. Unbreak the calculation of line numbers in those situations where input chunks (including previously accumulated unprocessed data) happens to start with a line termination. This covers input files which start with empty lines, as well as environments with mutli-byte line termination sequences (CR/LF) and arbitrary distribution of bytes across chunks. This fixes bug #968. Accept when there is no line termination in the current input chunk. We cannot assume that calling applications always provide file content in large enough chunks to span complete lines. And any arbitrary chunk size which applications happen to use can get exceeded by input files (e.g. for generated files with wide data or long comments).
1 parent cb3b805 commit fbefa03

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

src/input/csv.c

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1502,7 +1502,8 @@ static int process_buffer(struct sr_input *in, gboolean is_eof)
15021502
const struct column_details *details;
15031503
col_parse_cb parse_func;
15041504
int ret;
1505-
char *p, **lines, *line, **columns, *column;
1505+
char *processed_up_to;
1506+
char **lines, *line, **columns, *column;
15061507

15071508
inc = in->priv;
15081509
if (!inc->started) {
@@ -1526,16 +1527,17 @@ static int process_buffer(struct sr_input *in, gboolean is_eof)
15261527
if (!in->buf->len)
15271528
return SR_OK;
15281529
if (is_eof) {
1529-
p = in->buf->str + in->buf->len;
1530+
processed_up_to = in->buf->str + in->buf->len;
15301531
} else {
1531-
p = g_strrstr_len(in->buf->str, in->buf->len, inc->termination);
1532-
if (!p)
1533-
return SR_ERR;
1534-
*p = '\0';
1535-
p += strlen(inc->termination);
1532+
processed_up_to = g_strrstr_len(in->buf->str, in->buf->len,
1533+
inc->termination);
1534+
if (!processed_up_to)
1535+
return SR_OK;
1536+
*processed_up_to = '\0';
1537+
processed_up_to += strlen(inc->termination);
15361538
}
1537-
g_strstrip(in->buf->str);
15381539

1540+
/* Split input text lines and process their columns. */
15391541
ret = SR_OK;
15401542
lines = g_strsplit(in->buf->str, inc->termination, 0);
15411543
for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
@@ -1612,7 +1614,7 @@ static int process_buffer(struct sr_input *in, gboolean is_eof)
16121614
g_strfreev(columns);
16131615
}
16141616
g_strfreev(lines);
1615-
g_string_erase(in->buf, 0, p - in->buf->str);
1617+
g_string_erase(in->buf, 0, processed_up_to - in->buf->str);
16161618

16171619
return ret;
16181620
}

0 commit comments

Comments
 (0)