Skip to content

Commit

Permalink
Add ignoreZeroDecimal to ReadOptions (jtablesaw#748)
Browse files Browse the repository at this point in the history
  • Loading branch information
larshelge authored Apr 13, 2020
1 parent 5caac35 commit 4658e63
Show file tree
Hide file tree
Showing 12 changed files with 217 additions and 16 deletions.
13 changes: 9 additions & 4 deletions core/src/main/java/tech/tablesaw/columns/numbers/IntParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@
import tech.tablesaw.api.ColumnType;
import tech.tablesaw.columns.AbstractColumnParser;
import tech.tablesaw.io.ReadOptions;
import tech.tablesaw.util.StringUtils;

public class IntParser extends AbstractColumnParser<Integer> {

private final boolean ignoreZeroDecimal;

public IntParser(ColumnType columnType) {
super(columnType);
ignoreZeroDecimal = ReadOptions.DEFAULT_IGNORE_ZERO_DECIMAL;
}

public IntParser(IntColumnType columnType, ReadOptions readOptions) {
super(columnType);
if (readOptions.missingValueIndicator() != null) {
missingValueStrings = Lists.newArrayList(readOptions.missingValueIndicator());
}
ignoreZeroDecimal = readOptions.ignoreZeroDecimal();
}

@Override
Expand All @@ -25,8 +30,8 @@ public boolean canParse(String str) {
}
String s = str;
try {
if (s.endsWith(".0")) {
s = s.substring(0, s.length() - 2);
if (ignoreZeroDecimal) {
s = StringUtils.removeZeroDecimal(s);
}
Integer.parseInt(AbstractColumnParser.remove(s, ','));
return true;
Expand All @@ -52,8 +57,8 @@ public int parseInt(String str) {
return IntColumnType.missingValueIndicator();
}
String s = str;
if (s.endsWith(".0")) {
s = s.substring(0, s.length() - 2);
if (ignoreZeroDecimal) {
s = StringUtils.removeZeroDecimal(s);
}
return Integer.parseInt(AbstractColumnParser.remove(s, ','));
}
Expand Down
13 changes: 9 additions & 4 deletions core/src/main/java/tech/tablesaw/columns/numbers/LongParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@
import tech.tablesaw.api.ColumnType;
import tech.tablesaw.columns.AbstractColumnParser;
import tech.tablesaw.io.ReadOptions;
import tech.tablesaw.util.StringUtils;

public class LongParser extends AbstractColumnParser<Long> {

private final boolean ignoreZeroDecimal;

public LongParser(ColumnType columnType) {
super(columnType);
ignoreZeroDecimal = ReadOptions.DEFAULT_IGNORE_ZERO_DECIMAL;
}

public LongParser(LongColumnType columnType, ReadOptions readOptions) {
super(columnType);
if (readOptions.missingValueIndicator() != null) {
missingValueStrings = Lists.newArrayList(readOptions.missingValueIndicator());
}
ignoreZeroDecimal = readOptions.ignoreZeroDecimal();
}

@Override
Expand All @@ -25,8 +30,8 @@ public boolean canParse(String str) {
}
String s = str;
try {
if (s.endsWith(".0")) {
s = s.substring(0, s.length() - 2);
if (ignoreZeroDecimal) {
s = StringUtils.removeZeroDecimal(s);
}
Long.parseLong(AbstractColumnParser.remove(s, ','));
return true;
Expand All @@ -52,8 +57,8 @@ public long parseLong(String str) {
return LongColumnType.missingValueIndicator();
}
String s = str;
if (s.endsWith(".0")) {
s = s.substring(0, s.length() - 2);
if (ignoreZeroDecimal) {
s = StringUtils.removeZeroDecimal(s);
}
return Long.parseLong(AbstractColumnParser.remove(s, ','));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,23 @@
import com.google.common.collect.Lists;
import tech.tablesaw.columns.AbstractColumnParser;
import tech.tablesaw.io.ReadOptions;
import tech.tablesaw.util.StringUtils;

public class ShortParser extends AbstractColumnParser<Short> {

private final boolean ignoreZeroDecimal;

public ShortParser(ShortColumnType columnType) {
super(columnType);
ignoreZeroDecimal = ReadOptions.DEFAULT_IGNORE_ZERO_DECIMAL;
}

public ShortParser(ShortColumnType columnType, ReadOptions readOptions) {
super(columnType);
if (readOptions.missingValueIndicator() != null) {
missingValueStrings = Lists.newArrayList(readOptions.missingValueIndicator());
}
ignoreZeroDecimal = readOptions.ignoreZeroDecimal();
}

@Override
Expand All @@ -24,8 +29,8 @@ public boolean canParse(String str) {
}
String s = str;
try {
if (s.endsWith(".0")) {
s = s.substring(0, s.length() - 2);
if (ignoreZeroDecimal) {
s = StringUtils.removeZeroDecimal(s);
}
Short.parseShort(AbstractColumnParser.remove(s, ','));
return true;
Expand All @@ -51,8 +56,8 @@ public short parseShort(String str) {
return ShortColumnType.missingValueIndicator();
}
String s = str;
if (s.endsWith(".0")) {
s = s.substring(0, s.length() - 2);
if (ignoreZeroDecimal) {
s = StringUtils.removeZeroDecimal(s);
}
return Short.parseShort(AbstractColumnParser.remove(s, ','));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ default DoubleColumn distance(Column<String> column2) {
* @param columns the column to append
* @return the new column
*/
default StringColumn join(String separator, Column... columns) {
default StringColumn join(String separator, Column<?>... columns) {
StringColumn newColumn = StringColumn.create(name() + "[column appended]", this.size());
for (int r = 0; r < size(); r++) {
StringBuilder result = new StringBuilder(getString(r));
Expand Down Expand Up @@ -347,11 +347,11 @@ default StringColumn concatenate(Object... stringsToAppend) {
* @param stringColumns the string columns to append
* @return the new column
*/
default StringColumn concatenate(Column... stringColumns) {
default StringColumn concatenate(Column<?>... stringColumns) {
StringColumn newColumn = StringColumn.create(name() + "[append]", this.size());
for (int r = 0; r < size(); r++) {
StringBuilder s = new StringBuilder(getString(r));
for (Column stringColumn : stringColumns) s.append(stringColumn.getString(r));
for (Column<?> stringColumn : stringColumns) s.append(stringColumn.getString(r));
newColumn.set(r, s.toString());
}
return newColumn;
Expand Down
15 changes: 15 additions & 0 deletions core/src/main/java/tech/tablesaw/io/ReadOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@

public class ReadOptions {

public static final boolean DEFAULT_IGNORE_ZERO_DECIMAL = true;

private static final List<ColumnType> DEFAULT_TYPES =
Lists.newArrayList(
LOCAL_DATE_TIME, LOCAL_TIME, LOCAL_DATE, BOOLEAN, INTEGER, LONG, DOUBLE, STRING);
Expand Down Expand Up @@ -75,6 +77,7 @@ public class ReadOptions {
protected final String missingValueIndicator;
protected final boolean minimizeColumnSizes;
protected final int maxCharsPerColumn;
protected final boolean ignoreZeroDecimal;

protected final DateTimeFormatter dateFormatter;
protected final DateTimeFormatter dateTimeFormatter;
Expand All @@ -94,6 +97,7 @@ protected ReadOptions(ReadOptions.Builder builder) {
minimizeColumnSizes = builder.minimizeColumnSizes;
header = builder.header;
maxCharsPerColumn = builder.maxCharsPerColumn;
ignoreZeroDecimal = builder.ignoreZeroDecimal;

dateFormatter = builder.dateFormatter;
timeFormatter = builder.timeFormatter;
Expand Down Expand Up @@ -138,6 +142,10 @@ public boolean header() {
return header;
}

public boolean ignoreZeroDecimal() {
return ignoreZeroDecimal;
}

public DateTimeFormatter dateTimeFormatter() {
if (dateTimeFormatter != null) {
return dateTimeFormatter;
Expand Down Expand Up @@ -186,6 +194,7 @@ protected static class Builder {
protected boolean minimizeColumnSizes = false;
protected boolean header = true;
protected int maxCharsPerColumn = 4096;
protected boolean ignoreZeroDecimal = DEFAULT_IGNORE_ZERO_DECIMAL;

protected Builder() {
source = null;
Expand Down Expand Up @@ -269,6 +278,12 @@ public Builder maxCharsPerColumn(int maxCharsPerColumn) {
return this;
}

/** Ignore zero value decimals in data values. Defaults to {@code true}. */
public Builder ignoreZeroDecimal(boolean ignoreZeroDecimal) {
this.ignoreZeroDecimal = ignoreZeroDecimal;
return this;
}

public Builder sample(boolean sample) {
this.sample = sample;
return this;
Expand Down
6 changes: 6 additions & 0 deletions core/src/main/java/tech/tablesaw/io/csv/CsvReadOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -314,5 +314,11 @@ public Builder minimizeColumnSizes() {
super.minimizeColumnSizes();
return this;
}

@Override
public Builder ignoreZeroDecimal(boolean ignoreZeroDecimal) {
super.ignoreZeroDecimal(ignoreZeroDecimal);
return this;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -294,5 +294,11 @@ public Builder minimizeColumnSizes() {
super.minimizeColumnSizes();
return this;
}

@Override
public Builder ignoreZeroDecimal(boolean ignoreZeroDecimal) {
super.ignoreZeroDecimal(ignoreZeroDecimal);
return this;
}
}
}
19 changes: 19 additions & 0 deletions core/src/main/java/tech/tablesaw/util/StringUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.google.common.base.Strings;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

/**
* Operations on {@link java.lang.String} that are {@code null} safe.
Expand Down Expand Up @@ -66,6 +67,8 @@ public class StringUtils {
/** The maximum size to which the padding constant(s) can expand. */
private static final int PAD_LIMIT = 8192;

private static final Pattern ZERO_DECIMAL_PATTERN = Pattern.compile("\\.0+$");

private StringUtils() {}

// Empty checks
Expand Down Expand Up @@ -520,6 +523,22 @@ public static boolean isAllUpperCase(final String cs) {
return true;
}

/**
* Removes all trailing zero decimals from the given String, assuming all decimals are zero and
* any zero decimals actually exist.
*
* <p>A {@code null} input String returns {@code null}.
*
* @param str the String to handle, may be null
* @return string without trailing zero decimals
*/
public static String removeZeroDecimal(final String str) {
if (Strings.isNullOrEmpty(str)) {
return str;
}
return ZERO_DECIMAL_PATTERN.matcher(str).replaceFirst(EMPTY);
}

// Abbreviating
// -----------------------------------------------------------------------
/**
Expand Down
26 changes: 26 additions & 0 deletions core/src/test/java/tech/tablesaw/io/csv/CsvReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,32 @@ public void testDataTypeDetection() throws IOException {
assertArrayEquals(bus_types, columnTypes);
}

@Test
public void testNumberTypeDetectionIgnoreZeroDecimal() throws IOException {
Reader reader = new FileReader("../data/immunization.csv");
CsvReadOptions options =
CsvReadOptions.builder(reader).header(true).sample(false).ignoreZeroDecimal(true).build();

// Column index 3 and 7 contain values with none to 3 zero values as suffix
// Should map to type INTEGER when ignoreZeroDecimal = true
ColumnType[] columnTypes = new CsvReader().detectColumnTypes(reader, options);
assertEquals(INTEGER, columnTypes[3]);
assertEquals(INTEGER, columnTypes[7]);
}

@Test
public void testNumberTypeDetectionRetainZeroDecimal() throws IOException {
Reader reader = new FileReader("../data/immunization.csv");
CsvReadOptions options =
CsvReadOptions.builder(reader).header(true).sample(false).ignoreZeroDecimal(false).build();

// Column index 3 and 7 contain values with none to 3 zero values as suffix
// Should map to type DOUBLE when ignoreZeroDecimal = false
ColumnType[] columnTypes = new CsvReader().detectColumnTypes(reader, options);
assertEquals(DOUBLE, columnTypes[3]);
assertEquals(DOUBLE, columnTypes[7]);
}

@Test
public void testMillis() {
long[] times = {1530486314124L, 1530488214124L};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
public class FixedWidthReaderTest {

private final FixedWidthFields car_fields_specs = new FixedWidthFields(4, 5, 40, 40, 8);
private final ColumnType[] car_types = {SHORT, STRING, STRING, STRING, FLOAT};
private final ColumnType[] car_types = {SHORT, STRING, STRING, STRING, SHORT};
private final ColumnType[] car_types_with_SKIP = {SHORT, STRING, STRING, SKIP, FLOAT};

@Test
Expand Down
43 changes: 43 additions & 0 deletions core/src/test/java/tech/tablesaw/util/StringUtilsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package tech.tablesaw.util;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;

import org.junit.jupiter.api.Test;

public class StringUtilsTest {

@Test
public void testRemoveZeroDecimal() {
// Assert zero decimals being removed
assertEquals("246", StringUtils.removeZeroDecimal("246.0"));
assertEquals("146", StringUtils.removeZeroDecimal("146.00"));
assertEquals("357", StringUtils.removeZeroDecimal("357.000"));
assertEquals("347", StringUtils.removeZeroDecimal("347.0000"));

// Assert no change to input value
assertEquals("468", StringUtils.removeZeroDecimal("468"));
assertEquals("24", StringUtils.removeZeroDecimal("24"));
assertEquals("468.02", StringUtils.removeZeroDecimal("468.02"));
assertEquals("246.004", StringUtils.removeZeroDecimal("246.004"));
assertEquals("246.4000", StringUtils.removeZeroDecimal("246.4000"));

// Assert empty string and null handling
assertEquals("", StringUtils.removeZeroDecimal(""));
assertNull(StringUtils.removeZeroDecimal(null));
}
}
Loading

0 comments on commit 4658e63

Please sign in to comment.