Skip to content

Commit 268ee80

Browse files
committed
ENH/TST: Support for non-ascii encodings in DataFrame.to_excel (GH3710)
1 parent a71ede3 commit 268ee80

File tree

4 files changed

+28
-5
lines changed

4 files changed

+28
-5
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ Improvements to existing features
151151
- perf improvements in single-dtyped indexing (:issue:`6484`)
152152
- ``StataWriter`` and ``DataFrame.to_stata`` accept time stamp and data labels (:issue:`6545`)
153153
- offset/freq info now in Timestamp __repr__ (:issue:`4553`)
154+
- Support passing ``encoding`` with xlwt (:issue:`3710`)
154155

155156
.. _release.bug_fixes-0.14.0:
156157

pandas/core/frame.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1151,7 +1151,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
11511151
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
11521152
float_format=None, cols=None, header=True, index=True,
11531153
index_label=None, startrow=0, startcol=0, engine=None,
1154-
merge_cells=True):
1154+
merge_cells=True, encoding=None):
11551155
"""
11561156
Write DataFrame to a excel sheet
11571157
@@ -1186,6 +1186,9 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
11861186
``io.excel.xlsm.writer``.
11871187
merge_cells : boolean, default True
11881188
Write MultiIndex and Hierarchical Rows as merged cells.
1189+
encoding: string, default None
1190+
encoding of the resulting excel file. Only necessary for xlwt,
1191+
other writers support unicode natively.
11891192
11901193
Notes
11911194
-----
@@ -1200,6 +1203,9 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
12001203
"""
12011204
from pandas.io.excel import ExcelWriter
12021205
need_save = False
1206+
if encoding == None:
1207+
encoding = 'ascii'
1208+
12031209
if isinstance(excel_writer, compat.string_types):
12041210
excel_writer = ExcelWriter(excel_writer, engine=engine)
12051211
need_save = True

pandas/io/excel.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -597,13 +597,15 @@ class _XlwtWriter(ExcelWriter):
597597
engine = 'xlwt'
598598
supported_extensions = ('.xls',)
599599

600-
def __init__(self, path, engine=None, **engine_kwargs):
600+
def __init__(self, path, engine=None, encoding=None, **engine_kwargs):
601601
# Use the xlwt module as the Excel writer.
602602
import xlwt
603603

604604
super(_XlwtWriter, self).__init__(path, **engine_kwargs)
605605

606-
self.book = xlwt.Workbook()
606+
if encoding is None:
607+
encoding = 'ascii'
608+
self.book = xlwt.Workbook(encoding=encoding)
607609
self.fm_datetime = xlwt.easyxf(num_format_str=self.datetime_format)
608610
self.fm_date = xlwt.easyxf(num_format_str=self.date_format)
609611

@@ -787,13 +789,13 @@ def _convert_to_style(self, style_dict, num_format_str=None):
787789

788790
# Create a XlsxWriter format object.
789791
xl_format = self.book.add_format()
790-
792+
791793
if num_format_str is not None:
792794
xl_format.set_num_format(num_format_str)
793795

794796
if style_dict is None:
795797
return xl_format
796-
798+
797799
# Map the cell font to XlsxWriter font properties.
798800
if style_dict.get('font'):
799801
font = style_dict['font']

pandas/io/tests/test_excel.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,20 @@ def test_to_excel_float_format(self):
796796
index=['A', 'B'], columns=['X', 'Y', 'Z'])
797797
tm.assert_frame_equal(rs, xp)
798798

799+
def test_to_excel_output_encoding(self):
800+
_skip_if_no_xlrd()
801+
ext = self.ext
802+
filename = '__tmp_to_excel_float_format__.' + ext
803+
df = DataFrame([[u('\u0192'), u('\u0193'), u('\u0194')],
804+
[u('\u0195'), u('\u0196'), u('\u0197')]],
805+
index=[u('A\u0192'), 'B'], columns=[u('X\u0193'), 'Y', 'Z'])
806+
807+
with ensure_clean(filename) as filename:
808+
df.to_excel(filename, sheet_name = 'TestSheet', encoding='utf8')
809+
result = read_excel(filename, 'TestSheet', encoding = 'utf8')
810+
tm.assert_frame_equal(result,df)
811+
812+
799813
def test_to_excel_unicode_filename(self):
800814
_skip_if_no_xlrd()
801815
with ensure_clean(u('\u0192u.') + self.ext) as filename:

0 commit comments

Comments
 (0)