Skip to content

Commit

Permalink
PR comments set 1
Browse files Browse the repository at this point in the history
  • Loading branch information
jdunkerley committed May 5, 2022
1 parent f4f1719 commit 0663038
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 28 deletions.
2 changes: 1 addition & 1 deletion distribution/lib/Standard/Table/0.0.0-dev/src/Error.enso
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,4 @@ type Invalid_Location (location:Text)

Invalid_Location.to_display_text : Text
Invalid_Location.to_display_text =
"The location '"+this.location+"' is not valid."
"The location '"+this.location+"' is not valid."
41 changes: 27 additions & 14 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Io/Excel.enso
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,18 @@ type Excel_Range
to_text = "Excel_Range " + this.address

## Validates if a column index (1-based) is within the valid range for Excel.

Arguments:
- column: 1-based index to check
- limit: maximum valid index, defaults to Excel 2007+ limit of 16,384
is_valid_column : Integer -> Integer -> Boolean
is_valid_column column (limit=16384) = (column > 0) && (column <= limit)

## Validates if a row index (1-based) is within the valid range for Excel.

Arguments:
- row: 1-based index to check
- limit: maximum valid index, defaults to Excel 2007+ limit of 1,048,576
is_valid_row : Integer -> Integer -> Boolean
is_valid_row row (limit=1048576) = (row > 0) && (row <= limit)

Expand All @@ -89,42 +97,46 @@ type Excel_Range
for_cell sheet column row =
col_index = Excel_Range.column_index column

col_valid = here.validate (Excel_Range.is_valid_column col_index) ("Invalid column for Excel: " + column.to_text + ".") _
all_valid = here.validate (Excel_Range.is_valid_row row) ("Invalid row for Excel: " + row.to_text + ".") (col_valid _)
col_valid = here.validate (Excel_Range.is_valid_column col_index) ("Invalid column for Excel: " + column.to_text + ".")
row_valid = here.validate (Excel_Range.is_valid_row row) ("Invalid row for Excel: " + row.to_text + ".")

all_valid <| Excel_Range (Java_Range.new sheet col_index row col_index row)
col_valid <| row_valid <|
Excel_Range (Java_Range.new sheet col_index row col_index row)

## Create a Range for a range of cells.
for_range : Text -> (Text|Integer) -> Integer -> (Text|Integer) -> Integer -> Excel_Range
for_range sheet left top right bottom =
left_index = Excel_Range.column_index left
right_index = Excel_Range.column_index right

left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".") _
right_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".") (left_valid _)
top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".") (right_valid _)
all_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".") (top_valid _)
left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".")
right_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".")
top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".")
bottom_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".")

all_valid <| Excel_Range (Java_Range.new sheet left_index top right_index bottom)
left_valid <| right_valid <| top_valid <| bottom_valid <|
Excel_Range (Java_Range.new sheet left_index top right_index bottom)

## Create an Excel_Range for a set of columns.
for_columns : Text -> (Text|Integer) -> (Text|Integer) -> Excel_Range
for_columns sheet left (right=left) =
left_index = Excel_Range.column_index left
right_index = Excel_Range.column_index right

left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".") _
all_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".") (left_valid _)
left_valid = here.validate (Excel_Range.is_valid_column left_index) ("Invalid left column for Excel: " + left.to_text + ".")
right_valid = here.validate (Excel_Range.is_valid_column right_index) ("Invalid right column for Excel: " + right.to_text + ".")

all_valid <| Excel_Range (Java_Range.new sheet left_index 0 right_index 0)
left_valid <| right_valid <|
Excel_Range (Java_Range.new sheet left_index 0 right_index 0)

## Create an Excel_Range for a set of rows.
for_rows : Text -> Integer -> Integer -> Excel_Range
for_rows sheet top (bottom=top) =
top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".") _
all_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".") (top_valid _)
top_valid = here.validate (Excel_Range.is_valid_row top) ("Invalid top row for Excel: " + top.to_text + ".")
bottom_valid = here.validate (Excel_Range.is_valid_row bottom) ("Invalid bottom row for Excel: " + bottom.to_text + ".")

all_valid <| Excel_Range (Java_Range.new sheet 0 top 0 bottom)
top_valid <| bottom_valid <|
Excel_Range (Java_Range.new sheet 0 top 0 bottom)


## PRIVATE
Expand All @@ -133,6 +145,7 @@ validate : Boolean -> Text -> Any
validate validation ~error_message ~wrapped =
if validation then wrapped else Error.throw (Illegal_Argument_Error error_message)


read_excel : File -> Excel_Section -> Problem_Behavior -> Boolean -> (Table | Vector)
read_excel file section _ xls_format=False =
reader stream = case section of
Expand Down
40 changes: 27 additions & 13 deletions std-bits/table/src/main/java/org/enso/table/format/xlsx/Range.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ private static String[] parseFullAddress(String fullAddress) throws IllegalArgum
return new String[] {matcher.group(1), matcher.group(2)};
}

private static String unescapeSheetName(String sheetName) {
return sheetName.replaceAll("^'(.*)'$", "$1").replaceAll("''", "'");
}

private static final String ADDRESS_A1 = "\\$?[A-Z]{1,3}\\$?\\d+";
private static final String ADDRESS_COL = "\\$?[A-Z]{1,3}";
private static final String ADDRESS_ROW = "\\$?\\d+";
Expand All @@ -40,7 +44,7 @@ private static String[] parseFullAddress(String fullAddress) throws IllegalArgum
private static int[] parseRange(String range) throws IllegalArgumentException {
for (Pattern pattern : new Pattern[] {RANGE_A1, RANGE_COL, RANGE_ROW, RANGE_RC}) {
Optional<int[]> parsed =
parseRange(range, pattern, pattern == RANGE_RC ? Range::parseRC : Range::parseA1);
parseRange(range, pattern, pattern == RANGE_RC ? Range::parseR1C1StyleAddress : Range::parseA1StyleAddress);

if (parsed.isPresent()) {
return parsed.get();
Expand Down Expand Up @@ -87,13 +91,13 @@ private static int skipDollar(CharSequence address, int index) {
return index;
}

private static int[] parseA1(CharSequence address) {
ParsedInteger col = parseColumn(address);
private static int[] parseA1StyleAddress(CharSequence address) {
ParsedInteger col = parseColumn(address, skipDollar(address, 0));
ParsedInteger row = parseInteger(address, skipDollar(address, col.index));
return new int[] {row.value, col.value};
}

private static int[] parseRC(CharSequence address) throws IllegalArgumentException {
private static int[] parseR1C1StyleAddress(CharSequence address) throws IllegalArgumentException {
int index = 0;

int row = 0;
Expand Down Expand Up @@ -127,7 +131,7 @@ private static int[] parseRC(CharSequence address) throws IllegalArgumentExcepti
* @return Column index (A=1 ...)
*/
public static int parseA1Column(CharSequence column) throws IllegalArgumentException {
ParsedInteger parsed = parseColumn(column);
ParsedInteger parsed = parseColumn(column, skipDollar(column, 0));
if (parsed.index != column.length() || parsed.value == 0) {
throw new IllegalArgumentException(column + " is not a valid Excel Column Name.");
}
Expand All @@ -136,7 +140,13 @@ public static int parseA1Column(CharSequence column) throws IllegalArgumentExcep
}

private static class ParsedInteger {
/**
* Index to the next character after the parsed value
*/
public final int index;
/**
* Parsed integer value or 0 if not valid
*/
public final int value;

public ParsedInteger(int index, int value) {
Expand All @@ -154,11 +164,10 @@ private static ParsedInteger parseInteger(CharSequence address, int index) {
endIndex, endIndex == index ? 0 : Integer.parseInt(address, index, endIndex, 10));
}

private static ParsedInteger parseColumn(CharSequence column) {
private static ParsedInteger parseColumn(CharSequence column, int startIndex) {
int col = 0;

int index = skipDollar(column, 0);

int index = startIndex;
while (index < column.length() && isLetter(column.charAt(index))) {
col = 26 * col + (column.charAt(index) - 'A' + 1);
index++;
Expand All @@ -175,7 +184,7 @@ private static ParsedInteger parseColumn(CharSequence column) {

public Range(String fullAddress) throws IllegalArgumentException {
String[] sheetAndRange = parseFullAddress(fullAddress);
this.sheetName = sheetAndRange[0].replaceAll("^'(.*)'$", "$1").replaceAll("''", "'");
this.sheetName = unescapeSheetName(sheetAndRange[0]);

int[] range = parseRange(sheetAndRange[1]);
this.leftColumn = range[1];
Expand All @@ -196,6 +205,14 @@ public String getSheetName() {
return sheetName;
}

public String getEscapedSheetName() {
String sheetNameEscaped = sheetName;
if (sheetNameEscaped.contains(" ") || sheetNameEscaped.contains("'")) {
sheetNameEscaped = "'" + sheetNameEscaped.replace("'", "''") + "'";
}
return sheetNameEscaped;
}

public boolean isWholeRow() {
return leftColumn == 0;
}
Expand All @@ -221,10 +238,7 @@ public int getBottomRow() {
}

public String getAddress() {
String sheetNameEscaped = getSheetName();
if (sheetNameEscaped.contains(" ") || sheetNameEscaped.contains("'")) {
sheetNameEscaped = "'" + sheetNameEscaped.replace("'", "''") + "'";
}
String sheetNameEscaped = getEscapedSheetName();

String range =
(isWholeRow() ? "" : CellReference.convertNumToColString(getLeftColumn() - 1))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,14 @@ private static Object getCellValue(Cell cell) {
return null;
}

/**
* Reads a list of sheet names for the specified XLSX/XLS file into an array.
*
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a String[] containing the sheet names.
* @throws IOException when the input stream cannot be read.
*/
public static String[] readSheetNames(InputStream stream, boolean xls_format) throws IOException {
Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream);
int sheetCount = workbook.getNumberOfSheets();
Expand All @@ -314,11 +322,30 @@ public static String[] readSheetNames(InputStream stream, boolean xls_format) th
return output;
}

/**
* Reads a list of range names for the specified XLSX/XLS file into an array.
*
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a String[] containing the range names.
* @throws IOException when the input stream cannot be read.
*/
public static String[] readRangeNames(InputStream stream, boolean xls_format) throws IOException {
Workbook workbook = xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream);
return workbook.getAllNames().stream().map(Name::getNameName).toArray(String[]::new);
}

/**
* Reads a sheet by name for the specified XLSX/XLS file into a table.
*
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
* @param sheetName the name of the sheet to read.
* @param skip_rows skip rows from the top the sheet.
* @param row_limit maximum number of rows to read.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a {@link Table} containing the specified data.
* @throws IOException when the input stream cannot be read.
*/
public static Table readSheetByName(
InputStream stream,
String sheetName,
Expand All @@ -341,6 +368,17 @@ public static Table readSheetByName(
row_limit == null ? Integer.MAX_VALUE : row_limit);
}

/**
* Reads a sheet by index for the specified XLSX/XLS file into a table.
*
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
* @param index the 1-based index to the sheet.
* @param skip_rows skip rows from the top the sheet.
* @param row_limit maximum number of rows to read.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a {@link Table} containing the specified data.
* @throws IOException when the input stream cannot be read.
*/
public static Table readSheetByIndex(
InputStream stream, int index, Integer skip_rows, Integer row_limit, boolean xls_format)
throws IOException, IllegalArgumentException {
Expand All @@ -360,6 +398,18 @@ public static Table readSheetByIndex(
row_limit == null ? Integer.MAX_VALUE : row_limit);
}


/**
* Reads a range by name or address for the specified XLSX/XLS file into a table.
*
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
* @param rangeNameOrAddress name or address of the range to read.
* @param skip_rows skip rows from the top of the range.
* @param row_limit maximum number of rows to read.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a {@link Table} containing the specified data.
* @throws IOException when the input stream cannot be read.
*/
public static Table readRangeByName(
InputStream stream,
String rangeNameOrAddress,
Expand All @@ -374,6 +424,18 @@ public static Table readRangeByName(
return readRange(workbook, range, skip_rows, row_limit);
}


/**
* Reads a range for the specified XLSX/XLS file into a table.
*
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
* @param range the range to read.
* @param skip_rows skip rows from the top of the range.
* @param row_limit maximum number of rows to read.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a {@link Table} containing the specified data.
* @throws IOException when the input stream cannot be read.
*/
public static Table readRange(
InputStream stream, Range range, Integer skip_rows, Integer row_limit, boolean xls_format)
throws IOException {
Expand Down

0 comments on commit 0663038

Please sign in to comment.