-
-
Notifications
You must be signed in to change notification settings - Fork 17.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: Add calamite engine to read_excel
#50581
Changes from 1 commit
30da9a4
a47d3fb
6c1dd87
fd06ad9
8b6200a
6a8d822
efcb2fc
e1105de
6b50e0c
0784733
5971199
655318b
cc049cf
038133e
52c2cbd
2dc5e02
2076e11
6b0a7ac
a614089
256f9f9
eee8b4e
9fc2209
cf1268a
bebfec5
9019904
08a5616
677a224
8c55e5d
d817999
12aaf19
255e8fb
500fa9f
5d94728
15874c3
89ae49e
33e5b7e
85d31ec
a0d4193
a6b6fb2
0a431c5
745cd09
942a16a
8803ca9
2f5ffba
b8b1a9a
f5ab40d
02c2e7f
74a3e70
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,4 +61,4 @@ dependencies: | |
|
||
- pip: | ||
- pyqt5==5.15.1 | ||
- python-calamine==0.0.7 | ||
- python-calamine==0.0.8 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,12 +27,7 @@ | |
ValueT = Union[int, float, str, bool, time, date, datetime] | ||
|
||
|
||
class __calamine__: | ||
pass | ||
|
||
|
||
class CalamineExcelReader(BaseExcelReader): | ||
book: str | ||
_sheet_names: list[str] | None = None | ||
|
||
def __init__( | ||
|
@@ -44,10 +39,12 @@ def __init__( | |
super().__init__(filepath_or_buffer, storage_options=storage_options) | ||
|
||
@property | ||
def _workbook_class(self) -> type[__calamine__]: | ||
return __calamine__ | ||
def _workbook_class(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The protocol must not be explicitly stated in code, but whatever is returned here is supposed to represent the concept of a Workbook. Not very familiar with calamine but the name There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
from python_calamine import CalamineReader | ||
|
||
return CalamineReader | ||
|
||
def load_workbook(self, filepath_or_buffer) -> str: | ||
def load_workbook(self, filepath_or_buffer): | ||
if hasattr(filepath_or_buffer, "read") and hasattr(filepath_or_buffer, "seek"): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can get rid of all of this if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
ext = inspect_excel_format(filepath_or_buffer) | ||
with NamedTemporaryFile(suffix=f".{ext}", delete=False) as tmp_file: | ||
|
@@ -59,29 +56,24 @@ def load_workbook(self, filepath_or_buffer) -> str: | |
|
||
assert isinstance(filepath_or_buffer, str) | ||
|
||
from python_calamine import get_sheet_names | ||
from python_calamine import CalamineReader | ||
|
||
self._sheet_names = get_sheet_names(filepath_or_buffer) | ||
return filepath_or_buffer | ||
return CalamineReader.from_path(filepath_or_buffer) | ||
|
||
@property | ||
def sheet_names(self) -> list[str]: | ||
from python_calamine import get_sheet_names | ||
return self.book.sheet_names | ||
|
||
if self._sheet_names is None: | ||
self._sheet_names = get_sheet_names(self.book) | ||
return self._sheet_names | ||
|
||
def get_sheet_by_name(self, name: str) -> int: | ||
def get_sheet_by_name(self, name: str): | ||
self.raise_if_bad_sheet_by_name(name) | ||
return self.sheet_names.index(name) | ||
return self.book.get_sheet_by_name(name) | ||
|
||
def get_sheet_by_index(self, index: int) -> int: | ||
def get_sheet_by_index(self, index: int): | ||
self.raise_if_bad_sheet_by_index(index) | ||
return index | ||
return self.book.get_sheet_by_index(index) | ||
|
||
def get_sheet_data( | ||
self, sheet: int, file_rows_needed: int | None = None | ||
self, sheet, file_rows_needed: int | None = None | ||
) -> list[list[Scalar]]: | ||
def _convert_cell(value: ValueT) -> Scalar: | ||
if isinstance(value, float): | ||
|
@@ -97,9 +89,7 @@ def _convert_cell(value: ValueT) -> Scalar: | |
|
||
return value | ||
|
||
from python_calamine import get_sheet_data | ||
|
||
rows = get_sheet_data(self.book, sheet, skip_empty_area=False) | ||
rows: list[list[ValueT]] = sheet.to_python(skip_empty_area=False) | ||
data: list[list[Scalar]] = [] | ||
|
||
for row in rows: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we stick with the same
Scalar
import that the other readers use?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This type is for internal using in
_convert_cell
.get_sheet_data
returnsScalar
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you move this into the function it is actually used in? Sounds like it has a pretty localized use, so no need to be in the global namespace
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As I see in https://peps.python.org/pep-0613/#scope-restrictions, typealiases can't defined within function scope. But, I can make
ValueT
is protected in module level.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah fair point. At the very least I think a more descriptive name than
ValueT
would be better. What is this supposed to represent? Some type of cell scalar?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Renamed in _CellValueT.