Skip to content
This repository has been archived by the owner on Mar 29, 2024. It is now read-only.

Commit

Permalink
Merge pull request #23 from newsela/to-excel
Browse files Browse the repository at this point in the history
Adds to_excel
  • Loading branch information
richiverse authored Mar 29, 2019
2 parents 9c42f89 + b2bed63 commit 1173e2c
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 7 deletions.
2 changes: 1 addition & 1 deletion pandas_ext/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Versioning kept here."""
__version__ = '0.4.11'
__version__ = '0.4.12'
__license__ = "MIT"

__title__ = "pandas_ext"
Expand Down
24 changes: 18 additions & 6 deletions pandas_ext/excel.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,28 @@
"""Save xls[x] files to s3."""
import os

from tempfile import gettempdir

import pandas as pd
import s3fs

from pandas_ext.common.utils import is_s3_path


def to_excel(df: pd.DataFrame, path: str, **kwargs) -> None:
def to_excel(df: pd.DataFrame, file_path: str, engine='', **kwargs) -> None:
"""Given a df, write it to s3 if necessary."""
if is_s3_path(path):
bytes_to_write = df.to_excel(None, **kwargs).encode()
if is_s3_path(file_path):
ext = file_path.split('.')[-1].lower()
if not engine:
engine = dict(xls='xlwt', xlsx='xlsxwriter')[ext]
path_removed = file_path.split('/')[-1]
tmp_file = os.path.join(gettempdir(), path_removed)
with pd.ExcelWriter(tmp_file, engine=engine) as writer:
df.to_excel(writer, **kwargs)

s3 = s3fs.S3FileSystem()
with s3.open(path, 'wb') as dest:
return dest.write(bytes_to_write)
with open(tmp_file, 'rb') as source, s3.open(file_path, 'wb') as dest:
dest.write(source.read())
os.remove(tmp_file)

return df.to_excel(path, **kwargs)
return df.to_excel(file_path, **kwargs)

0 comments on commit 1173e2c

Please sign in to comment.