diff --git a/core-plugins/pom.xml b/core-plugins/pom.xml
index fcbf95697..70644a501 100644
--- a/core-plugins/pom.xml
+++ b/core-plugins/pom.xml
@@ -279,12 +279,6 @@
org.mockito
mockito-core
-
- io.cdap.plugin
- format-xls
- ${project.version}
- test
-
io.cdap.plugin
format-xls
diff --git a/format-xls/src/main/java/io/cdap/plugin/format/xls/input/XlsInputFormat.java b/format-xls/src/main/java/io/cdap/plugin/format/xls/input/XlsInputFormat.java
index 155f75bec..0b79787f6 100644
--- a/format-xls/src/main/java/io/cdap/plugin/format/xls/input/XlsInputFormat.java
+++ b/format-xls/src/main/java/io/cdap/plugin/format/xls/input/XlsInputFormat.java
@@ -16,7 +16,6 @@
package io.cdap.plugin.format.xls.input;
-import com.google.common.base.Preconditions;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.plugin.format.input.PathTrackingInputFormat;
@@ -48,7 +47,7 @@
*/
public class XlsInputFormat extends PathTrackingInputFormat {
- public static final String SHEET_NO = "Sheet Number";
+ public static final String SHEET_NUM = "Sheet Number";
public static final String SHEET_VALUE = "sheetValue";
public static final String NAME_SKIP_HEADER = "skipHeader";
public static final String TERMINATE_IF_EMPTY_ROW = "terminateIfEmptyRow";
@@ -58,10 +57,10 @@ protected RecordReader createRecordReade
FileSplit split, TaskAttemptContext context, @Nullable String pathField,
@Nullable Schema schema) throws IOException {
Configuration jobConf = context.getConfiguration();
- boolean skipFirstRow = jobConf.getBoolean(NAME_SKIP_HEADER, true);
+ boolean skipFirstRow = jobConf.getBoolean(NAME_SKIP_HEADER, false);
boolean terminateIfEmptyRow = jobConf.getBoolean(TERMINATE_IF_EMPTY_ROW, false);
- Schema outputSchema = schema != null ? Schema.parseJson(context.getConfiguration().get("schema")) : null;
- String sheet = jobConf.get(SHEET_NO);
+ Schema outputSchema = schema != null ? Schema.parseJson(context.getConfiguration().get("schema")) : null;
+ String sheet = jobConf.get(SHEET_NUM);
String sheetValue = jobConf.get(SHEET_VALUE, "0");
return new XlsRecordReader(sheet, sheetValue, outputSchema, terminateIfEmptyRow, skipFirstRow);
}
diff --git a/format-xls/src/main/java/io/cdap/plugin/format/xls/input/XlsInputFormatProvider.java b/format-xls/src/main/java/io/cdap/plugin/format/xls/input/XlsInputFormatProvider.java
index 4b98cf74f..bedcb48a3 100644
--- a/format-xls/src/main/java/io/cdap/plugin/format/xls/input/XlsInputFormatProvider.java
+++ b/format-xls/src/main/java/io/cdap/plugin/format/xls/input/XlsInputFormatProvider.java
@@ -43,7 +43,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
-import java.util.Objects;
import javax.annotation.Nullable;
@@ -84,12 +83,7 @@ public void validate(FormatContext context) {
if (!conf.containsMacro(XlsInputFormatConfig.NAME_SHEET_VALUE) &&
conf.getSheet().equals(XlsInputFormatConfig.SHEET_NUMBER) &&
!Strings.isNullOrEmpty(conf.getSheetValue())) {
- try {
- Integer.parseInt(conf.getSheetValue());
- } catch (NumberFormatException e) {
- collector.addFailure("Sheet number must be a number.", null)
- .withConfigProperty(XlsInputFormatConfig.NAME_SHEET_VALUE);
- }
+ getSheetAsNumber(collector);
}
if (!conf.containsMacro(PathTrackingConfig.NAME_SCHEMA) && schema == null && context.getInputSchema() == null) {
collector.addFailure("XLS format cannot be used without specifying a schema.", "Schema must be specified.")
@@ -99,7 +93,7 @@ public void validate(FormatContext context) {
@Override
protected void addFormatProperties(Map properties) {
- properties.put(XlsInputFormat.SHEET_NO, conf.getSheet());
+ properties.put(XlsInputFormat.SHEET_NUM, conf.getSheet());
if (!Strings.isNullOrEmpty(conf.getSheetValue())) {
properties.put(XlsInputFormat.SHEET_VALUE, conf.getSheetValue());
}
@@ -122,15 +116,9 @@ public Schema detectSchema(FormatContext context, InputFiles inputFiles) throws
Sheet workSheet;
// Check if user wants to access with name or number
if (conf.getSheet() != null && conf.getSheet().equals(XlsInputFormatConfig.SHEET_NUMBER)) {
- int sheetValue = 0;
- if (!Strings.isNullOrEmpty(conf.getSheetValue())) {
- try {
- sheetValue = Integer.parseInt(conf.getSheetValue());
- } catch (NumberFormatException e) {
- failureCollector.addFailure("Sheet number must be a number.", null)
- .withConfigProperty(XlsInputFormatConfig.NAME_SHEET_VALUE);
- return null;
- }
+ Integer sheetValue = getSheetAsNumber(failureCollector);
+ if (sheetValue == null) {
+ return null;
}
workSheet = workbook.getSheetAt(sheetValue);
} else {
@@ -204,4 +192,20 @@ public Schema detectSchema(FormatContext context, InputFiles inputFiles) throws
return null;
}
+ private Integer getSheetAsNumber(FailureCollector failureCollector) {
+ if (!Strings.isNullOrEmpty(conf.getSheetValue())) {
+ try {
+ int sheetValue = Integer.parseInt(conf.getSheetValue());
+ if (sheetValue >= 0) {
+ return sheetValue;
+ }
+ failureCollector.addFailure("Sheet number must be a positive number.", null)
+ .withConfigProperty(XlsInputFormatConfig.NAME_SHEET_VALUE);
+ } catch (NumberFormatException e) {
+ failureCollector.addFailure("Sheet number must be a number.", null)
+ .withConfigProperty(XlsInputFormatConfig.NAME_SHEET_VALUE);
+ }
+ }
+ return null;
+ }
}
diff --git a/format-xls/src/main/java/io/cdap/plugin/format/xls/input/XlsRowConverter.java b/format-xls/src/main/java/io/cdap/plugin/format/xls/input/XlsRowConverter.java
index 2d0ee046f..c7e8a8d8b 100644
--- a/format-xls/src/main/java/io/cdap/plugin/format/xls/input/XlsRowConverter.java
+++ b/format-xls/src/main/java/io/cdap/plugin/format/xls/input/XlsRowConverter.java
@@ -26,6 +26,7 @@
import org.apache.poi.ss.usermodel.Row;
import java.util.List;
+import javax.annotation.Nullable;
/**
* Converts a row of XLS cells to a StructuredRecord.
@@ -42,6 +43,7 @@ public class XlsRowConverter {
* Converts a row of XLS cells to a StructuredRecord.
* Returns null if the row is null or empty.
*/
+ @Nullable
public StructuredRecord.Builder convert(Row row, Schema outputSchema) {
if (row == null) {
return null;
@@ -49,12 +51,7 @@ public StructuredRecord.Builder convert(Row row, Schema outputSchema) {
boolean isRowEmpty = true;
StructuredRecord.Builder builder = StructuredRecord.builder(outputSchema);
List fields = outputSchema.getFields();
- for (int cellIndex = 0; cellIndex < row.getLastCellNum(); cellIndex++) {
- if (cellIndex >= fields.size()) {
- throw new IllegalArgumentException(
- String.format("Schema contains fewer fields than the number of columns in the excel file. " +
- "Schema fields: %s, Excel columns: %s", fields.size(), row.getLastCellNum()));
- }
+ for (int cellIndex = 0; cellIndex < row.getLastCellNum() && cellIndex < fields.size(); cellIndex++) {
Cell cell = row.getCell(cellIndex, Row.MissingCellPolicy.RETURN_BLANK_AS_NULL);
if (cell == null) {
// Blank cells are skipped, builder will set null for the field, no processing needed.
@@ -75,6 +72,7 @@ public StructuredRecord.Builder convert(Row row, Schema outputSchema) {
cellValue = getCellAsBoolean(cell);
break;
default:
+ // As we only support string, double and boolean, this should never happen.
throw new IllegalArgumentException(
String.format("Field '%s' is of unsupported type '%s'. Supported types are: %s",
field.getName(), type, "string, double, boolean"));