Skip to content

Commit fdd2a9c

Browse files
authored
Fix whitespace as a separator in CSV processor (#67045) (#67050)
This change fixes problem when using space or tab as a separator in CSV processor - we check if current character is separator before we check if it is whitespace. This also improves tests to always check all combinations of separators and quotes. Closes #67013
1 parent 6175e18 commit fdd2a9c

File tree

2 files changed

+23
-20
lines changed

2 files changed

+23
-20
lines changed

modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvParser.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -127,15 +127,15 @@ private boolean processUnquoted() {
127127
char c = currentChar();
128128
if (c == LF || c == CR || c == quote) {
129129
throw new IllegalArgumentException("Illegal character inside unquoted field at " + currentIndex);
130-
} else if (trim && isWhitespace(c)) {
131-
spaceCount++;
132130
} else if (c == separator) {
133131
state = State.START;
134132
if (setField(currentIndex - spaceCount)) {
135133
return true;
136134
}
137135
startIndex = currentIndex + 1;
138136
return false;
137+
} else if (trim && isWhitespace(c)) {
138+
spaceCount++;
139139
} else {
140140
spaceCount = 0;
141141
}
@@ -163,20 +163,20 @@ private boolean processQuotedEnd() {
163163
boolean shouldSetField = true;
164164
for (; currentIndex < length; currentIndex++) {
165165
c = currentChar();
166-
if (isWhitespace(c)) {
167-
if (shouldSetField) {
168-
if (setField(currentIndex - 1)) {
169-
return true;
170-
}
171-
shouldSetField = false;
172-
}
173-
} else if (c == separator) {
166+
if (c == separator) {
174167
if (shouldSetField && setField(currentIndex - 1)) {
175168
return true;
176169
}
177170
startIndex = currentIndex + 1;
178171
state = State.START;
179172
return false;
173+
} else if (isWhitespace(c)) {
174+
if (shouldSetField) {
175+
if (setField(currentIndex - 1)) {
176+
return true;
177+
}
178+
shouldSetField = false;
179+
}
180180
} else {
181181
throw new IllegalArgumentException("character '" + c + "' after quoted field at " + currentIndex);
182182
}

modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CsvProcessorTests.java

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,33 +24,36 @@
2424
import org.elasticsearch.ingest.IngestDocument;
2525
import org.elasticsearch.ingest.RandomDocumentPicks;
2626
import org.elasticsearch.test.ESTestCase;
27-
import org.junit.Before;
2827

2928
import java.util.Arrays;
3029
import java.util.HashMap;
3130
import java.util.LinkedHashMap;
31+
import java.util.LinkedList;
3232
import java.util.Map;
3333
import java.util.stream.Collectors;
3434

3535
public class CsvProcessorTests extends ESTestCase {
3636

37-
private static final Character[] SEPARATORS = new Character[]{',', ';', '|', '.'};
37+
private static final Character[] SEPARATORS = new Character[]{',', ';', '|', '.', '\t'};
38+
private static final String[] QUOTES = new String[]{"'", "\"", ""};
3839
private final String quote;
39-
private char separator;
40+
private final char separator;
4041

4142

42-
public CsvProcessorTests(@Name("quote") String quote) {
43+
public CsvProcessorTests(@Name("quote") String quote, @Name("separator") char separator) {
4344
this.quote = quote;
45+
this.separator = separator;
4446
}
4547

4648
@ParametersFactory
4749
public static Iterable<Object[]> parameters() {
48-
return Arrays.asList(new Object[]{"'"}, new Object[]{"\""}, new Object[]{""});
49-
}
50-
51-
@Before
52-
public void setup() {
53-
separator = randomFrom(SEPARATORS);
50+
LinkedList<Object[]> list = new LinkedList<>();
51+
for (Character separator : SEPARATORS) {
52+
for (String quote : QUOTES) {
53+
list.add(new Object[]{quote, separator});
54+
}
55+
}
56+
return list;
5457
}
5558

5659
public void testExactNumberOfFields() {

0 commit comments

Comments
 (0)