Description
Apologies if this is a pyarrow issue.
Code Sample, a copy-pastable example if possible
pd.DataFrame({'a': range(5), 'b': range(5)}).to_parquet('s3://mybucket', partition_cols=['b'])
Problem description
Fails with AttributeError: 'NoneType' object has no attribute '_isfilestore'
Traceback (most recent call last):
File "/python/partparqs3.py", line 8, in <module>
pd.DataFrame({'a': range(5), 'b': range(5)}).to_parquet('s3://mybucket', partition_cols=['b'])
File "/python/lib/python3.7/site-packages/pandas/core/frame.py", line 2203, in to_parquet
partition_cols=partition_cols, **kwargs)
File "/python/lib/python3.7/site-packages/pandas/io/parquet.py", line 252, in to_parquet
partition_cols=partition_cols, **kwargs)
File "/python/lib/python3.7/site-packages/pandas/io/parquet.py", line 118, in write
partition_cols=partition_cols, **kwargs)
File "/python/lib/python3.7/site-packages/pyarrow/parquet.py", line 1342, in write_to_dataset
_mkdir_if_not_exists(fs, root_path)
File "/python/lib/python3.7/site-packages/pyarrow/parquet.py", line 1292, in _mkdir_if_not_exists
if fs._isfilestore() and not fs.exists(path):
AttributeError: 'NoneType' object has no attribute '_isfilestore'
Exception ignored in: <function AbstractBufferedFile.__del__ at 0x7f529985ca60>
Traceback (most recent call last):
File "/python/lib/python3.7/site-packages/fsspec/spec.py", line 1146, in __del__
self.close()
File "/python/lib/python3.7/site-packages/fsspec/spec.py", line 1124, in close
self.flush(force=True)
File "/python/lib/python3.7/site-packages/fsspec/spec.py", line 996, in flush
self._initiate_upload()
File "/python/lib/python3.7/site-packages/s3fs/core.py", line 941, in _initiate_upload
Bucket=bucket, Key=key, ACL=self.acl)
File "/python/lib/python3.7/site-packages/s3fs/core.py", line 928, in _call_s3
**kwargs)
File "/python/lib/python3.7/site-packages/s3fs/core.py", line 182, in _call_s3
return method(**additional_kwargs)
File "/python/lib/python3.7/site-packages/botocore/client.py", line 357, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/python/lib/python3.7/site-packages/botocore/client.py", line 648, in _make_api_call
operation_model, request_dict, request_context)
File "/python/lib/python3.7/site-packages/botocore/client.py", line 667, in _make_request
return self._endpoint.make_request(operation_model, request_dict)
File "/python/lib/python3.7/site-packages/botocore/endpoint.py", line 102, in make_request
return self._send_request(request_dict, operation_model)
File "/python/lib/python3.7/site-packages/botocore/endpoint.py", line 137, in _send_request
success_response, exception):
File "/python/lib/python3.7/site-packages/botocore/endpoint.py", line 231, in _needs_retry
caught_exception=caught_exception, request_dict=request_dict)
File "/python/lib/python3.7/site-packages/botocore/hooks.py", line 356, in emit
return self._emitter.emit(aliased_event_name, **kwargs)
File "/python/lib/python3.7/site-packages/botocore/hooks.py", line 228, in emit
return self._emit(event_name, kwargs)
File "/python/lib/python3.7/site-packages/botocore/hooks.py", line 211, in _emit
response = handler(**kwargs)
File "/python/lib/python3.7/site-packages/botocore/retryhandler.py", line 183, in __call__
if self._checker(attempts, response, caught_exception):
File "/python/lib/python3.7/site-packages/botocore/retryhandler.py", line 251, in __call__
caught_exception)
File "/python/lib/python3.7/site-packages/botocore/retryhandler.py", line 269, in _should_retry
return self._checker(attempt_number, response, caught_exception)
File "/python/lib/python3.7/site-packages/botocore/retryhandler.py", line 317, in __call__
caught_exception)
File "/python/lib/python3.7/site-packages/botocore/retryhandler.py", line 223, in __call__
attempt_number, caught_exception)
File "/python/lib/python3.7/site-packages/botocore/retryhandler.py", line 359, in _check_caught_exception
raise caught_exception
File "/python/lib/python3.7/site-packages/botocore/endpoint.py", line 200, in _do_get_response
http_response = self._send(request)
File "/python/lib/python3.7/site-packages/botocore/endpoint.py", line 244, in _send
return self.http_session.send(request)
File "/python/lib/python3.7/site-packages/botocore/httpsession.py", line 294, in send
raise HTTPClientError(error=e)
botocore.exceptions.HTTPClientError: An HTTP Client raised and unhandled exception: 'NoneType' object is not iterable
Expected Output
Expected to see partitioned data show up in S3.
Output of pd.show_versions()
INSTALLED VERSIONS
commit: None
python: 3.7.2.final.0
python-bits: 64
OS: Linux
OS-release: 3.10.0-957.21.3.el7.x86_64
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: en_US.UTF-8
pandas: 0.24.2
pytest: None
pip: 19.0.3
setuptools: 41.0.0
Cython: 0.29.7
numpy: 1.16.2
scipy: 1.3.0
pyarrow: 0.14.0
xarray: None
IPython: 7.5.0
sphinx: None
patsy: None
dateutil: 2.8.0
pytz: 2019.1
blosc: None
bottleneck: None
tables: None
numexpr: None
feather: None
matplotlib: 3.1.0
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml.etree: 4.3.3
bs4: None
html5lib: None
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.10.1
s3fs: 0.3.0
fastparquet: None
pandas_gbq: None
pandas_datareader: None
gcsfs: None