diff --git a/README.md b/README.md index 94f4e6f..8589def 100644 --- a/README.md +++ b/README.md @@ -3,28 +3,92 @@ Python Pandas extensions for pandas dataframes # Usage - ``` import pandas_ext as px +``` + +## CSV +By default, pandas will natively read to s3 but won't write to s3. +``` px.read_csv px.to_csv +``` +## Excel + +By default, pandas will natively read to s3 but won't write to s3. + +To write to xls: +```bash +pip install pandas_ext[xls] +``` + +To write xlsx: +```bash +pip install pandas_ext[xlsx] +``` + +## Gdrive +By default, pandas does not read/write to Gdrive. + +### For G Suite administrators +At the organization level, one must do the necessary installation and + deployment of the [gdrive lambda service](https://github.com/richiverse/gdrive-lambda/) to get this to work. + +You will have to create a service account that shares your G Apps domain with the following APIs enabled: + +- Google Drive + +- Google Sheets + +From there you must download the p12 credentials file and reference it in your settings.yml when deploying the gdrive service. + +### For Gdrive clients +Once that is complete, you must share the folder you are interested in reading/writing to the service account email you've received from your administrator. + +Locally, for client access you must set the `GDRIVE_URL` and `GDRIVE_KEY` in your projects environment variable in order to talk to +the gdrive lambda service. + + +``` px.read_gdrive px.to_gdrive +``` +## Parquet +By default, pandas ~does not read/write to Parquet~. This has been added in pandas version 24 and my methods will eventually update to use them but still allow writing to s3. + +``` px.read_parquet px.to_parquet +``` + +## Spectrum +to_spectrum is unique to pandas_ext. +``` px.to_spectrum +``` +## Salesforce +salesforce methods are unique to pandas_ext. + +``` px.read_sfdc px.sfdc_metadata px.patch_sfdc px.async_patch_sfdc +``` + +## SQL service +``` px.read_sql px.list_backends +## XML +Pandas doesn't natively support writing to XML format. +``` px.to_xml ``` diff --git a/pandas_ext/__init__.py b/pandas_ext/__init__.py index 34cd4d0..33fac32 100644 --- a/pandas_ext/__init__.py +++ b/pandas_ext/__init__.py @@ -1,5 +1,5 @@ """Versioning kept here.""" -__version__ = '0.4.12' +__version__ = '0.4.13' __license__ = "MIT" __title__ = "pandas_ext" @@ -14,6 +14,7 @@ from .amazon_spectrum import to_spectrum from .csv import to_csv +from .excel import to_excel from .gdrive import read_gdrive, to_gdrive from .parquet import read_parquet, to_parquet from .sfdc import ( @@ -28,6 +29,7 @@ del amazon_spectrum del common del csv +del excel del gdrive del parquet del sfdc diff --git a/pandas_ext/excel.py b/pandas_ext/excel.py index c2b57b3..79fb76b 100644 --- a/pandas_ext/excel.py +++ b/pandas_ext/excel.py @@ -24,5 +24,6 @@ def to_excel(df: pd.DataFrame, file_path: str, engine='', **kwargs) -> None: with open(tmp_file, 'rb') as source, s3.open(file_path, 'wb') as dest: dest.write(source.read()) os.remove(tmp_file) + return return df.to_excel(file_path, **kwargs) diff --git a/setup.py b/setup.py index 0579167..0d0e991 100644 --- a/setup.py +++ b/setup.py @@ -60,6 +60,7 @@ def find_meta(meta): version=find_meta("version"), license=find_meta("license"), install_requires=read("requirements/requirements.in"), + extras_require=dict(xls=["xlwt"], xlsx=["openpyxl", "xlsxwriter"]), long_description=README, packages=PACKAGES, classifiers=CLASSIFIERS,