From 06630386a062770e7cb23beb31326d5a2cd4611c Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 5 May 2022 16:08:30 +0100 Subject: [PATCH] PR comments set 1 --- .../Standard/Table/0.0.0-dev/src/Error.enso | 2 +- .../Table/0.0.0-dev/src/Io/Excel.enso | 41 +++++++----- .../org/enso/table/format/xlsx/Range.java | 40 ++++++++---- .../org/enso/table/format/xlsx/Reader.java | 62 +++++++++++++++++++ 4 files changed, 117 insertions(+), 28 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso index dc7f8b2ea4dc0..4b8ecbc571d33 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso @@ -117,4 +117,4 @@ type Invalid_Location (location:Text) Invalid_Location.to_display_text : Text Invalid_Location.to_display_text = - "The location '"+this.location+"' is not valid." \ No newline at end of file + "The location '"+this.location+"' is not valid." diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso index cbe42e7bfc63f..45f03760f8bf7 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso @@ -66,10 +66,18 @@ type Excel_Range to_text = "Excel_Range " + this.address ## Validates if a column index (1-based) is within the valid range for Excel. + + Arguments: + - column: 1-based index to check + - limit: maximum valid index, defaults to Excel 2007+ limit of 16,384 is_valid_column : Integer -> Integer -> Boolean is_valid_column column (limit=16384) = (column > 0) && (column <= limit) ## Validates if a row index (1-based) is within the valid range for Excel. + + Arguments: + - row: 1-based index to check + - limit: maximum valid index, defaults to Excel 2007+ limit of 1,048,576 is_valid_row : Integer -> Integer -> Boolean is_valid_row row (limit=1048576) = (row > 0) && (row <= limit) @@ -89,10 +97,11 @@ type Excel_Range for_cell sheet column row = col_index = Excel_Range.column_index column - col_valid = here.validate (Excel_Range.is_valid_column col_index) ("Invalid column for Excel: " + column.to_text + ".") _ - all_valid = here.validate (Excel_Range.is_valid_row row) ("Invalid row for Excel: " + row.to_text + ".") (col_valid _) + col_valid = here.validate (Excel_Range.is_valid_column col_index) ("Invalid column for Excel: " + column.to_text + ".") + row_valid = here.validate (Excel_Range.is_valid_row row) ("Invalid row for Excel: " + row.to_text + ".") - all_valid <| Excel_Range (Java_Range.new sheet col_index row col_index row) + col_valid <| row_valid <| + Excel_Range (Java_Range.new sheet col_index row col_index row) ## Create a Range for a range of cells. for_range : Text -> (Text|Integer) -> Integer -> (Text|Integer) -> Integer -> Excel_Range @@ -100,12 +109,13 @@ type Excel_Range left_index = Excel_Range.column_index left right_index = Excel_Range.column_index right - left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".") _ - right_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".") (left_valid _) - top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".") (right_valid _) - all_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".") (top_valid _) + left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".") + right_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".") + top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".") + bottom_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".") - all_valid <| Excel_Range (Java_Range.new sheet left_index top right_index bottom) + left_valid <| right_valid <| top_valid <| bottom_valid <| + Excel_Range (Java_Range.new sheet left_index top right_index bottom) ## Create an Excel_Range for a set of columns. for_columns : Text -> (Text|Integer) -> (Text|Integer) -> Excel_Range @@ -113,18 +123,20 @@ type Excel_Range left_index = Excel_Range.column_index left right_index = Excel_Range.column_index right - left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".") _ - all_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".") (left_valid _) + left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".") + right_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".") - all_valid <| Excel_Range (Java_Range.new sheet left_index 0 right_index 0) + left_valid <| right_valid <| + Excel_Range (Java_Range.new sheet left_index 0 right_index 0) ## Create an Excel_Range for a set of rows. for_rows : Text -> Integer -> Integer -> Excel_Range for_rows sheet top (bottom=top) = - top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".") _ - all_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".") (top_valid _) + top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".") + bottom_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".") - all_valid <| Excel_Range (Java_Range.new sheet 0 top 0 bottom) + top_valid <| bottom_valid <| + Excel_Range (Java_Range.new sheet 0 top 0 bottom) ## PRIVATE @@ -133,6 +145,7 @@ validate : Boolean -> Text -> Any validate validation ~error_message ~wrapped = if validation then wrapped else Error.throw (Illegal_Argument_Error error_message) + read_excel : File -> Excel_Section -> Problem_Behavior -> Boolean -> (Table | Vector) read_excel file section _ xls_format=False = reader stream = case section of diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java index 2071bba4adb7d..e076623b04c47 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java @@ -23,6 +23,10 @@ private static String[] parseFullAddress(String fullAddress) throws IllegalArgum return new String[] {matcher.group(1), matcher.group(2)}; } + private static String unescapeSheetName(String sheetName) { + return sheetName.replaceAll("^'(.*)'$", "$1").replaceAll("''", "'"); + } + private static final String ADDRESS_A1 = "\\$?[A-Z]{1,3}\\$?\\d+"; private static final String ADDRESS_COL = "\\$?[A-Z]{1,3}"; private static final String ADDRESS_ROW = "\\$?\\d+"; @@ -40,7 +44,7 @@ private static String[] parseFullAddress(String fullAddress) throws IllegalArgum private static int[] parseRange(String range) throws IllegalArgumentException { for (Pattern pattern : new Pattern[] {RANGE_A1, RANGE_COL, RANGE_ROW, RANGE_RC}) { Optional parsed = - parseRange(range, pattern, pattern == RANGE_RC ? Range::parseRC : Range::parseA1); + parseRange(range, pattern, pattern == RANGE_RC ? Range::parseR1C1StyleAddress : Range::parseA1StyleAddress); if (parsed.isPresent()) { return parsed.get(); @@ -87,13 +91,13 @@ private static int skipDollar(CharSequence address, int index) { return index; } - private static int[] parseA1(CharSequence address) { - ParsedInteger col = parseColumn(address); + private static int[] parseA1StyleAddress(CharSequence address) { + ParsedInteger col = parseColumn(address, skipDollar(address, 0)); ParsedInteger row = parseInteger(address, skipDollar(address, col.index)); return new int[] {row.value, col.value}; } - private static int[] parseRC(CharSequence address) throws IllegalArgumentException { + private static int[] parseR1C1StyleAddress(CharSequence address) throws IllegalArgumentException { int index = 0; int row = 0; @@ -127,7 +131,7 @@ private static int[] parseRC(CharSequence address) throws IllegalArgumentExcepti * @return Column index (A=1 ...) */ public static int parseA1Column(CharSequence column) throws IllegalArgumentException { - ParsedInteger parsed = parseColumn(column); + ParsedInteger parsed = parseColumn(column, skipDollar(column, 0)); if (parsed.index != column.length() || parsed.value == 0) { throw new IllegalArgumentException(column + " is not a valid Excel Column Name."); } @@ -136,7 +140,13 @@ public static int parseA1Column(CharSequence column) throws IllegalArgumentExcep } private static class ParsedInteger { + /** + * Index to the next character after the parsed value + */ public final int index; + /** + * Parsed integer value or 0 if not valid + */ public final int value; public ParsedInteger(int index, int value) { @@ -154,11 +164,10 @@ private static ParsedInteger parseInteger(CharSequence address, int index) { endIndex, endIndex == index ? 0 : Integer.parseInt(address, index, endIndex, 10)); } - private static ParsedInteger parseColumn(CharSequence column) { + private static ParsedInteger parseColumn(CharSequence column, int startIndex) { int col = 0; - int index = skipDollar(column, 0); - + int index = startIndex; while (index < column.length() && isLetter(column.charAt(index))) { col = 26 * col + (column.charAt(index) - 'A' + 1); index++; @@ -175,7 +184,7 @@ private static ParsedInteger parseColumn(CharSequence column) { public Range(String fullAddress) throws IllegalArgumentException { String[] sheetAndRange = parseFullAddress(fullAddress); - this.sheetName = sheetAndRange[0].replaceAll("^'(.*)'$", "$1").replaceAll("''", "'"); + this.sheetName = unescapeSheetName(sheetAndRange[0]); int[] range = parseRange(sheetAndRange[1]); this.leftColumn = range[1]; @@ -196,6 +205,14 @@ public String getSheetName() { return sheetName; } + public String getEscapedSheetName() { + String sheetNameEscaped = sheetName; + if (sheetNameEscaped.contains(" ") || sheetNameEscaped.contains("'")) { + sheetNameEscaped = "'" + sheetNameEscaped.replace("'", "''") + "'"; + } + return sheetNameEscaped; + } + public boolean isWholeRow() { return leftColumn == 0; } @@ -221,10 +238,7 @@ public int getBottomRow() { } public String getAddress() { - String sheetNameEscaped = getSheetName(); - if (sheetNameEscaped.contains(" ") || sheetNameEscaped.contains("'")) { - sheetNameEscaped = "'" + sheetNameEscaped.replace("'", "''") + "'"; - } + String sheetNameEscaped = getEscapedSheetName(); String range = (isWholeRow() ? "" : CellReference.convertNumToColString(getLeftColumn() - 1)) diff --git a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java index 3d73ca1c404d0..81c32dec5aa5b 100644 --- a/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java +++ b/std-bits/table/src/main/java/org/enso/table/format/xlsx/Reader.java @@ -304,6 +304,14 @@ private static Object getCellValue(Cell cell) { return null; } + /** + * Reads a list of sheet names for the specified XLSX/XLS file into an array. + * + * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @return a String[] containing the sheet names. + * @throws IOException when the input stream cannot be read. + */ public static String[] readSheetNames(InputStream stream, boolean xls_format) throws IOException { Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); int sheetCount = workbook.getNumberOfSheets(); @@ -314,11 +322,30 @@ public static String[] readSheetNames(InputStream stream, boolean xls_format) th return output; } + /** + * Reads a list of range names for the specified XLSX/XLS file into an array. + * + * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @return a String[] containing the range names. + * @throws IOException when the input stream cannot be read. + */ public static String[] readRangeNames(InputStream stream, boolean xls_format) throws IOException { Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); return workbook.getAllNames().stream().map(Name::getNameName).toArray(String[]::new); } + /** + * Reads a sheet by name for the specified XLSX/XLS file into a table. + * + * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param sheetName the name of the sheet to read. + * @param skip_rows skip rows from the top the sheet. + * @param row_limit maximum number of rows to read. + * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @return a {@link Table} containing the specified data. + * @throws IOException when the input stream cannot be read. + */ public static Table readSheetByName( InputStream stream, String sheetName, @@ -341,6 +368,17 @@ public static Table readSheetByName( row_limit == null ? Integer.MAX_VALUE : row_limit); } + /** + * Reads a sheet by index for the specified XLSX/XLS file into a table. + * + * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param index the 1-based index to the sheet. + * @param skip_rows skip rows from the top the sheet. + * @param row_limit maximum number of rows to read. + * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @return a {@link Table} containing the specified data. + * @throws IOException when the input stream cannot be read. + */ public static Table readSheetByIndex( InputStream stream, int index, Integer skip_rows, Integer row_limit, boolean xls_format) throws IOException, IllegalArgumentException { @@ -360,6 +398,18 @@ public static Table readSheetByIndex( row_limit == null ? Integer.MAX_VALUE : row_limit); } + + /** + * Reads a range by name or address for the specified XLSX/XLS file into a table. + * + * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param rangeNameOrAddress name or address of the range to read. + * @param skip_rows skip rows from the top of the range. + * @param row_limit maximum number of rows to read. + * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @return a {@link Table} containing the specified data. + * @throws IOException when the input stream cannot be read. + */ public static Table readRangeByName( InputStream stream, String rangeNameOrAddress, @@ -374,6 +424,18 @@ public static Table readRangeByName( return readRange(workbook, range, skip_rows, row_limit); } + + /** + * Reads a range for the specified XLSX/XLS file into a table. + * + * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param range the range to read. + * @param skip_rows skip rows from the top of the range. + * @param row_limit maximum number of rows to read. + * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @return a {@link Table} containing the specified data. + * @throws IOException when the input stream cannot be read. + */ public static Table readRange( InputStream stream, Range range, Integer skip_rows, Integer row_limit, boolean xls_format) throws IOException {