Skip to content

Commit

Permalink
Should not quote with strict quoting when line starts with #, but com…
Browse files Browse the repository at this point in the history
…ments are disabled. Quote only the first column, fix #270 (#271)

Co-authored-by: Krzysztof Debski <Krzysztof.Debski@IGT.com>
  • Loading branch information
kdebski85 and Krzysztof Debski authored May 28, 2021
1 parent 00f7cc0 commit 3f33e43
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ public class CsvEncoder
* @since 2.5
*/
final protected char[] _cfgNullValue;


final protected boolean _cfgAllowsComments;

final protected int _cfgLineSeparatorLength;

protected int _cfgMaxQuoteCheckChars;
Expand All @@ -89,7 +91,7 @@ public class CsvEncoder
/**
* Marker flag used to determine if to do optimal (aka "strict") quoting
* checks or not (looser conservative check)
*
*
* @since 2.4
*/
protected boolean _cfgOptimalQuoting;
Expand Down Expand Up @@ -146,7 +148,7 @@ public class CsvEncoder
* Index of the last buffered value
*/
protected int _lastBuffered = -1;

/*
/**********************************************************
/* Output buffering, low-level
Expand Down Expand Up @@ -212,7 +214,7 @@ public CsvEncoder(IOContext ctxt, int csvFeatures, Writer out, CsvSchema schema)
_cfgLineSeparator = schema.getLineSeparator();
_cfgLineSeparatorLength = (_cfgLineSeparator == null) ? 0 : _cfgLineSeparator.length;
_cfgNullValue = schema.getNullValueOrEmpty();

_cfgAllowsComments = schema.allowsComments();
_columnCount = schema.size();

_cfgMinSafeChar = _calcSafeChar();
Expand Down Expand Up @@ -252,6 +254,7 @@ public CsvEncoder(CsvEncoder base, CsvSchema newSchema)
_cfgLineSeparator = newSchema.getLineSeparator();
_cfgLineSeparatorLength = _cfgLineSeparator.length;
_cfgNullValue = newSchema.getNullValueOrEmpty();
_cfgAllowsComments = newSchema.allowsComments();
_cfgMinSafeChar = _calcSafeChar();
_columnCount = newSchema.size();
_cfgQuoteCharEscapeChar = _getQuoteCharEscapeChar(
Expand Down Expand Up @@ -359,7 +362,7 @@ public final void write(int columnIndex, String value) throws IOException
appendColumnSeparator();
}
final int len = value.length();
if (_cfgAlwaysQuoteStrings || _mayNeedQuotes(value, len)) {
if (_cfgAlwaysQuoteStrings || _mayNeedQuotes(value, len, columnIndex)) {
if (_cfgEscapeCharacter > 0) {
_writeQuotedAndEscaped(value, (char) _cfgEscapeCharacter);
} else {
Expand Down Expand Up @@ -516,7 +519,7 @@ public void endRow() throws IOException
System.arraycopy(_cfgLineSeparator, 0, _outputBuffer, _outputTail, _cfgLineSeparatorLength);
_outputTail += _cfgLineSeparatorLength;
}

/*
/**********************************************************
/* Writer API, writes via buffered values
Expand All @@ -535,7 +538,7 @@ protected void appendValue(String value) throws IOException
* only check for short Strings, stop if something found
*/
final int len = value.length();
if (_cfgAlwaysQuoteStrings || _mayNeedQuotes(value, len)) {
if (_cfgAlwaysQuoteStrings || _mayNeedQuotes(value, len, _nextColumnToWrite)) {
if (_cfgEscapeCharacter > 0) {
_writeQuotedAndEscaped(value, (char) _cfgEscapeCharacter);
} else {
Expand Down Expand Up @@ -996,7 +999,7 @@ public void close(boolean autoClose, boolean flushStream) throws IOException
// Internal buffer(s) generator has can now be released as well
_releaseBuffers();
}

/*
/**********************************************************
/* Internal methods
Expand All @@ -1007,7 +1010,7 @@ public void close(boolean autoClose, boolean flushStream) throws IOException
* Helper method that determines whether given String is likely
* to require quoting; check tries to optimize for speed.
*/
protected boolean _mayNeedQuotes(String value, int length)
protected boolean _mayNeedQuotes(String value, int length, int columnIndex)
{
// 21-Mar-2014, tatu: If quoting disabled, don't quote
if (_cfgQuoteCharacter < 0) {
Expand All @@ -1016,9 +1019,9 @@ protected boolean _mayNeedQuotes(String value, int length)
// may skip checks unless we want exact checking
if (_cfgOptimalQuoting) {
if (_cfgEscapeCharacter > 0) {
return _needsQuotingStrict(value, _cfgEscapeCharacter);
return _needsQuotingStrict(value, columnIndex, _cfgEscapeCharacter);
}
return _needsQuotingStrict(value);
return _needsQuotingStrict(value, columnIndex);
}
if (length > _cfgMaxQuoteCheckChars) {
return true;
Expand Down Expand Up @@ -1069,7 +1072,7 @@ protected final boolean _needsQuotingLoose(String value, int esc)
/**
* @since 2.4
*/
protected boolean _needsQuotingStrict(String value)
protected boolean _needsQuotingStrict(String value, int columnIndex)
{
final int minSafe = _cfgMinSafeChar;

Expand All @@ -1086,7 +1089,7 @@ protected boolean _needsQuotingStrict(String value)
|| (c < escLen && escCodes[c] != 0)
|| (c == lfFirst)
// 31-Dec-2014, tatu: Comment lines start with # so quote if starts with #
|| (c == '#' && i == 0)) {
|| (columnIndex == 0 && _cfgAllowsComments && c == '#' && i == 0)) {
return true;
}
}
Expand All @@ -1097,7 +1100,7 @@ protected boolean _needsQuotingStrict(String value)
/**
* @since 2.7
*/
protected boolean _needsQuotingStrict(String value, int esc)
protected boolean _needsQuotingStrict(String value, int columnIndex, int esc)
{
final int minSafe = _cfgMinSafeChar;
final int[] escCodes = _outputEscapes;
Expand All @@ -1113,7 +1116,7 @@ protected boolean _needsQuotingStrict(String value, int esc)
|| (c < escLen && escCodes[c] != 0)
|| (c == lfFirst)
// 31-Dec-2014, tatu: Comment lines start with # so quote if starts with #
|| (c == '#' && i == 0)) {
|| (columnIndex == 0 && _cfgAllowsComments && c == '#' && i == 0)) {
return true;
}
} else if (c == esc) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,11 @@ public void testForcedQuotingEmptyStrings() throws Exception
assertEquals(",2.5\n", result);
}

// Must comment '#', at least if it starts the line
public void testQuotingOfCommentChar() throws Exception
// Must quote '#' when it starts the line
public void testQuotingOfCommentCharForFirstColumn() throws Exception
{
// First, with default quoting
final CsvSchema schema = MAPPER.schemaFor(IdDesc.class);
final CsvSchema schema = MAPPER.schemaFor(IdDesc.class).withComments();
String csv = MAPPER.writer(schema)
.writeValueAsString(new IdDesc("#123", "Foo"));
assertEquals("\"#123\",Foo\n", csv);
Expand All @@ -245,6 +245,42 @@ public void testQuotingOfCommentChar() throws Exception
assertEquals("\"#123\",Foo\n", csv);
}

// In strict mode when the second column starts with '#', does not have to quote it
public void testQuotingOfCommentCharForSecondColumn() throws Exception
{
// First, with default quoting
CsvMapper mapper = mapperForCsv();
final CsvSchema schema = mapper.schemaFor(IdDesc.class).withComments();
String csv = mapper.writer(schema)
.writeValueAsString(new IdDesc("123", "#Foo"));
assertEquals("123,\"#Foo\"\n", csv);

// then with strict/optimal
mapper = mapperForCsv();
csv = mapper.writer(schema)
.with(CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING)
.writeValueAsString(new IdDesc("123", "#Foo"));
assertEquals("123,#Foo\n", csv);
}

// In strict mode when comments are disabled, does not have to quote '#'
public void testQuotingOfCommentCharWhenCommentsAreDisabled() throws Exception
{
// First, with default quoting
CsvMapper mapper = mapperForCsv();
final CsvSchema schema = mapper.schemaFor(IdDesc.class).withoutComments();
String csv = mapper.writer(schema)
.writeValueAsString(new IdDesc("#123", "Foo"));
assertEquals("\"#123\",Foo\n", csv);

// then with strict/optimal
mapper = mapperForCsv();
csv = mapper.writer(schema)
.with(CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING)
.writeValueAsString(new IdDesc("#123", "Foo"));
assertEquals("#123,Foo\n", csv);
}

// for [dataformat-csv#98]
public void testBackslashEscape() throws Exception
{
Expand Down

0 comments on commit 3f33e43

Please sign in to comment.