diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java index 25fcc449c9a1..24d47f3a7514 100644 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java +++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelReader.java @@ -21,6 +21,7 @@ import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.lifecycle.OnEnabled; import org.apache.nifi.components.AllowableValue; +import org.apache.nifi.components.DescribedValue; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.context.PropertyContext; import org.apache.nifi.controller.ConfigurationContext; @@ -61,6 +62,34 @@ + "(XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents.") public class ExcelReader extends SchemaRegistryService implements RecordReaderFactory { + public enum ProtectionType implements DescribedValue { + UNPROTECTED("Unprotected", "An Excel spreadsheet not protected by a password"), + PASSWORD("Password Protected", "An Excel spreadsheet protected by a password"); + + ProtectionType(String displayName, String description) { + this.displayName = displayName; + this.description = description; + } + + private final String displayName; + private final String description; + + @Override + public String getValue() { + return name(); + } + + @Override + public String getDisplayName() { + return displayName; + } + + @Override + public String getDescription() { + return description; + } + } + public static final PropertyDescriptor REQUIRED_SHEETS = new PropertyDescriptor .Builder().name("Required Sheets") .displayName("Required Sheets") @@ -83,6 +112,25 @@ public class ExcelReader extends SchemaRegistryService implements RecordReaderFa .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) .build(); + public static final PropertyDescriptor PROTECTION_TYPE = new PropertyDescriptor + .Builder().name("Protection Type") + .displayName("Protection Type") + .description("Specifies whether an Excel spreadsheet is protected by a password or not.") + .required(true) + .allowableValues(ProtectionType.class) + .defaultValue(ProtectionType.UNPROTECTED) + .build(); + + public static final PropertyDescriptor PASSWORD = new PropertyDescriptor + .Builder().name("Password") + .displayName("Password") + .description("The password for a password protected Excel spreadsheet") + .required(true) + .sensitive(true) + .addValidator(StandardValidators.NON_BLANK_VALIDATOR) + .dependsOn(PROTECTION_TYPE, ProtectionType.PASSWORD) + .build(); + private volatile ConfigurationContext configurationContext; private volatile String dateFormat; private volatile String timeFormat; @@ -106,6 +154,7 @@ public RecordReader createRecordReader(final Map variables, fina final List requiredSheets = getRequiredSheets(variables); final int firstRow = getStartingRow(variables); + final String password = configurationContext.getProperty(PASSWORD).getValue(); final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() .withDateFormat(dateFormat) .withRequiredSheets(requiredSheets) @@ -113,6 +162,7 @@ public RecordReader createRecordReader(final Map variables, fina .withSchema(schema) .withTimeFormat(timeFormat) .withTimestampFormat(timestampFormat) + .withPassword(password) .build(); return new ExcelRecordReader(configuration, in, logger); @@ -123,6 +173,8 @@ protected List getSupportedPropertyDescriptors() { final List properties = new ArrayList<>(super.getSupportedPropertyDescriptors()); properties.add(STARTING_ROW); properties.add(REQUIRED_SHEETS); + properties.add(PROTECTION_TYPE); + properties.add(PASSWORD); properties.add(DateTimeUtils.DATE_FORMAT); properties.add(DateTimeUtils.TIME_FORMAT); properties.add(DateTimeUtils.TIMESTAMP_FORMAT); diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java index 78a776867930..f9df73a38c2b 100644 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java +++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReader.java @@ -68,7 +68,7 @@ public ExcelRecordReader(ExcelRecordReaderConfiguration configuration, InputStre } try { - this.rowIterator = new RowIterator(inputStream, configuration.getRequiredSheets(), configuration.getFirstRow(), logger); + this.rowIterator = new RowIterator(inputStream, configuration, logger); } catch (RuntimeException e) { throw new MalformedRecordException("Read initial Record from Excel XLSX failed", e); } diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java index 2275b9133364..6faa826ad53d 100644 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java +++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordReaderConfiguration.java @@ -28,6 +28,8 @@ public class ExcelRecordReaderConfiguration { private String dateFormat; private String timeFormat; private String timestampFormat; + private String password; + private boolean avoidTempFiles; private ExcelRecordReaderConfiguration() { } @@ -56,6 +58,14 @@ public String getTimestampFormat() { return timestampFormat; } + public String getPassword() { + return password; + } + + public boolean isAvoidTempFiles() { + return avoidTempFiles; + } + public static final class Builder { private RecordSchema schema; private List requiredSheets; @@ -63,6 +73,8 @@ public static final class Builder { private String dateFormat; private String timeFormat; private String timestampFormat; + private String password; + private boolean avoidTempFiles; public Builder withSchema(RecordSchema schema) { this.schema = schema; @@ -94,6 +106,16 @@ public Builder withTimestampFormat(String timestampFormat) { return this; } + public Builder withPassword(String password) { + this.password = password; + return this; + } + + public Builder withAvoidTempFiles(boolean avoidTempFiles) { + this.avoidTempFiles = avoidTempFiles; + return this; + } + public ExcelRecordReaderConfiguration build() { ExcelRecordReaderConfiguration excelRecordReaderConfiguration = new ExcelRecordReaderConfiguration(); excelRecordReaderConfiguration.schema = this.schema; @@ -102,6 +124,9 @@ public ExcelRecordReaderConfiguration build() { excelRecordReaderConfiguration.requiredSheets = this.requiredSheets == null ? Collections.emptyList() : this.requiredSheets; excelRecordReaderConfiguration.dateFormat = this.dateFormat; excelRecordReaderConfiguration.firstRow = this.firstRow; + excelRecordReaderConfiguration.password = password; + excelRecordReaderConfiguration.avoidTempFiles = avoidTempFiles; + return excelRecordReaderConfiguration; } } diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java index 9ad00882a430..24f4a9fcff2a 100644 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java +++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/ExcelRecordSource.java @@ -35,7 +35,13 @@ public ExcelRecordSource(final InputStream in, final PropertyContext context, fi final Integer rawFirstRow = context.getProperty(ExcelReader.STARTING_ROW).evaluateAttributeExpressions(variables).asInteger(); final int firstRow = rawFirstRow == null ? NumberUtils.toInt(ExcelReader.STARTING_ROW.getDefaultValue()) : rawFirstRow; final int zeroBasedFirstRow = ExcelReader.getZeroBasedIndex(firstRow); - this.rowIterator = new RowIterator(in, requiredSheets, zeroBasedFirstRow, logger); + final String password = context.getProperty(ExcelReader.PASSWORD).getValue(); + final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() + .withRequiredSheets(requiredSheets) + .withFirstRow(zeroBasedFirstRow) + .withPassword(password) + .build(); + this.rowIterator = new RowIterator(in, configuration, logger); } @Override diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java index 2b5f4e987a01..733697efd1d4 100644 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java +++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/excel/RowIterator.java @@ -41,12 +41,15 @@ class RowIterator implements Iterator, Closeable { private Iterator currentRows; private Row currentRow; - RowIterator(final InputStream in, final List requiredSheets, final int firstRow, final ComponentLog logger) { + RowIterator(final InputStream in, final ExcelRecordReaderConfiguration configuration, final ComponentLog logger) { this.workbook = StreamingReader.builder() .rowCacheSize(100) .bufferSize(4096) + .password(configuration.getPassword()) + .setAvoidTempFiles(configuration.isAvoidTempFiles()) .open(in); + final List requiredSheets = configuration.getRequiredSheets(); if (requiredSheets == null || requiredSheets.isEmpty()) { this.sheets = this.workbook.iterator(); } else { @@ -65,7 +68,7 @@ class RowIterator implements Iterator, Closeable { .collect(Collectors.toList()).iterator(); } - this.firstRow = firstRow; + this.firstRow = configuration.getFirstRow(); this.logger = logger; setCurrent(); } diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java index 6fde3d07f115..6f5a0afeedd1 100644 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java +++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-services/src/test/java/org/apache/nifi/excel/TestExcelRecordReader.java @@ -26,6 +26,16 @@ import org.apache.nifi.serialization.record.RecordFieldType; import org.apache.nifi.serialization.record.RecordSchema; import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.poifs.crypt.EncryptionInfo; +import org.apache.poi.poifs.crypt.EncryptionMode; +import org.apache.poi.poifs.crypt.Encryptor; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.params.ParameterizedTest; @@ -33,7 +43,10 @@ import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.InputStream; +import java.io.OutputStream; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; @@ -50,10 +63,60 @@ public class TestExcelRecordReader { private static final String DATA_FORMATTING_FILE = "dataformatting.xlsx"; private static final String MULTI_SHEET_FILE = "twoSheets.xlsx"; + private static final String PASSWORD = "nifi"; + private static final ByteArrayOutputStream PASSWORD_PROTECTED = new ByteArrayOutputStream(); + private static final Object[][] DATA = { + {"ID", "Name"}, + {1, "Manny"}, + {2, "Moe"}, + {3, "Jack"}, + }; @Mock ComponentLog logger; + @BeforeAll + static void setUpBeforeAll() throws Exception { + //Generate an Excel file and populate it with data + final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + try (XSSFWorkbook workbook = new XSSFWorkbook()) { + final XSSFSheet sheet = workbook.createSheet("User Info"); + populateSheet(sheet); + workbook.write(outputStream); + } + + //Protect the Excel file with a password + try (POIFSFileSystem poifsFileSystem = new POIFSFileSystem()) { + EncryptionInfo encryptionInfo = new EncryptionInfo(EncryptionMode.agile); + Encryptor encryptor = encryptionInfo.getEncryptor(); + encryptor.confirmPassword(PASSWORD); + + try (OPCPackage opc = OPCPackage.open(new ByteArrayInputStream(outputStream.toByteArray())); + OutputStream os = encryptor.getDataStream(poifsFileSystem)) { + opc.save(os); + } + poifsFileSystem.writeFilesystem(PASSWORD_PROTECTED); + } + } + + private static void populateSheet(XSSFSheet sheet) { + //Adding the data to the Excel worksheet + int rowCount = 0; + for (Object[] dataRow : DATA) { + Row row = sheet.createRow(rowCount++); + int columnCount = 0; + + for (Object field : dataRow) { + Cell cell = row.createCell(columnCount++); + if (field instanceof String) { + cell.setCellValue((String) field); + } else if (field instanceof Integer) { + cell.setCellValue((Integer) field); + } + } + } + } + @Test public void testNonExcelFile() { ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() @@ -220,4 +283,36 @@ public void testSelectAllSheets() throws MalformedRecordException { assertEquals(7, records.size()); } + + @Test + void testPasswordProtected() throws Exception { + RecordSchema schema = getPasswordProtectedSchema(); + ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() + .withSchema(schema) + .withPassword(PASSWORD) + .withAvoidTempFiles(true) + .build(); + + InputStream inputStream = new ByteArrayInputStream(PASSWORD_PROTECTED.toByteArray()); + ExcelRecordReader recordReader = new ExcelRecordReader(configuration, inputStream, logger); + List records = getRecords(recordReader, false, false); + + assertEquals(DATA.length, records.size()); + } + + @Test + void testPasswordProtectedWithoutPassword() { + RecordSchema schema = getPasswordProtectedSchema(); + ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder() + .withSchema(schema) + .build(); + + InputStream inputStream = new ByteArrayInputStream(PASSWORD_PROTECTED.toByteArray()); + assertThrows(Exception.class, () -> new ExcelRecordReader(configuration, inputStream, logger)); + } + + private RecordSchema getPasswordProtectedSchema() { + return new SimpleRecordSchema(Arrays.asList(new RecordField("id", RecordFieldType.INT.getDataType()), + new RecordField("name", RecordFieldType.STRING.getDataType()))); + } }