Skip to content

Commit 9e2e128

Browse files
authored
[ENG-8488] Fix inconsistent Excel rendering by enforcing binary mode for .xls/.xlsx (#401)
1 parent a54c91a commit 9e2e128

File tree

2 files changed

+22
-28
lines changed

2 files changed

+22
-28
lines changed

mfr/extensions/tabular/render.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
logger = logging.getLogger(__name__)
1313

14+
BINARY_EXCEL_EXTS = {'.xls', '.xlsx'}
1415

1516
class TabularRenderer(extension.BaseRenderer):
1617

@@ -30,8 +31,14 @@ def render(self):
3031
extension=self.metadata.ext,
3132
)
3233

33-
with open(self.file_path, errors='replace') as fp:
34-
sheets, size, nbr_rows, nbr_cols = self._render_grid(fp, self.metadata.ext)
34+
ext = (self.metadata.ext or '').lower()
35+
if ext in BINARY_EXCEL_EXTS:
36+
open_kwargs = {'mode': 'rb'}
37+
else:
38+
open_kwargs = {'errors': 'replace'}
39+
40+
with open(self.file_path, **open_kwargs) as fp:
41+
sheets, size, nbr_rows, nbr_cols = self._render_grid(fp, ext)
3542

3643
# Force GC
3744
gc.collect()

mfr/extensions/tabular/utilities.py

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -90,41 +90,28 @@ def sav_to_csv(fp):
9090

9191
def to_bytes(fp):
9292
"""
93-
Return *exactly* the original bytes of the Excel file and rewind *fp*.
94-
Handles both binary and text wrappers that WaterButler may give us.
93+
Return exactly the original bytes and rewind fp.
94+
Requires a binary file-like object or a bytes object.
9595
"""
96-
try:
97-
fp.seek(0)
98-
except Exception:
99-
pass
100-
101-
raw = fp.read()
102-
if isinstance(raw, bytes):
103-
try:
104-
fp.seek(0)
105-
except Exception:
106-
pass
107-
return raw
96+
if isinstance(fp, (bytes, bytearray, memoryview)):
97+
return bytes(fp)
10898

109-
if hasattr(fp, "buffer"):
110-
buf = fp.buffer
99+
if hasattr(fp, "read"):
111100
try:
112-
buf.seek(0)
101+
if hasattr(fp, "seek"):
102+
fp.seek(0)
113103
except Exception:
114104
pass
115-
data = buf.read()
105+
raw = fp.read()
116106
try:
117-
buf.seek(0)
107+
if hasattr(fp, "seek"):
108+
fp.seek(0)
118109
except Exception:
119110
pass
120-
else:
121-
data = raw.encode("utf-8", "surrogateescape")
111+
if isinstance(raw, (bytes, bytearray, memoryview)):
112+
return bytes(raw)
122113

123-
try:
124-
fp.seek(0)
125-
except Exception:
126-
pass
127-
return data
114+
raise TypeError("Expected binary file-like object; got text/str")
128115

129116

130117
def _extract_rows(fields, raw_rows):

0 commit comments

Comments
 (0)