-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Deprecate read_feather nthreads argument + update feather-format to pyarrow.feather #23112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ dependencies: | |
- numpy | ||
- python-dateutil | ||
- nomkl | ||
- pyarrow | ||
- pytz | ||
- pytest | ||
- pytest-xdist | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,38 +3,35 @@ | |
from distutils.version import LooseVersion | ||
|
||
from pandas.compat import range | ||
from pandas.util._decorators import deprecate_kwarg | ||
|
||
from pandas import DataFrame, Int64Index, RangeIndex | ||
|
||
from pandas.io.common import _stringify_path | ||
|
||
|
||
def _try_import(): | ||
# since pandas is a dependency of feather | ||
# since pandas is a dependency of pyarrow | ||
# we need to import on first use | ||
|
||
try: | ||
import feather | ||
import pyarrow | ||
from pyarrow import feather | ||
except ImportError: | ||
|
||
# give a nice error message | ||
raise ImportError("the feather-format library is not installed\n" | ||
raise ImportError("pyarrow is not installed\n\n" | ||
"you can install via conda\n" | ||
"conda install feather-format -c conda-forge\n" | ||
"conda install pyarrow -c conda-forge\n" | ||
"or via pip\n" | ||
"pip install -U feather-format\n") | ||
"pip install -U pyarrow\n") | ||
|
||
try: | ||
LooseVersion(feather.__version__) >= LooseVersion('0.3.1') | ||
except AttributeError: | ||
raise ImportError("the feather-format library must be >= " | ||
"version 0.3.1\n" | ||
if LooseVersion(pyarrow.__version__) < LooseVersion('0.4.1'): | ||
raise ImportError("pyarrow >= 0.4.1 required for feather support\n\n" | ||
"you can install via conda\n" | ||
"conda install feather-format -c conda-forge" | ||
"conda install pyarrow -c conda-forge" | ||
"or via pip\n" | ||
"pip install -U feather-format\n") | ||
"pip install -U pyarrow\n") | ||
|
||
return feather | ||
return feather, pyarrow | ||
|
||
|
||
def to_feather(df, path): | ||
|
@@ -51,7 +48,7 @@ def to_feather(df, path): | |
if not isinstance(df, DataFrame): | ||
raise ValueError("feather only support IO with DataFrames") | ||
|
||
feather = _try_import() | ||
feather = _try_import()[0] | ||
valid_types = {'string', 'unicode'} | ||
|
||
# validate index | ||
|
@@ -83,10 +80,11 @@ def to_feather(df, path): | |
if df.columns.inferred_type not in valid_types: | ||
raise ValueError("feather must have string column names") | ||
|
||
feather.write_dataframe(df, path) | ||
feather.write_feather(df, path) | ||
|
||
|
||
def read_feather(path, nthreads=1): | ||
@deprecate_kwarg(old_arg_name='nthreads', new_arg_name='use_threads') | ||
def read_feather(path, use_threads=True): | ||
""" | ||
Load a feather-format object from the file path | ||
|
||
|
@@ -99,17 +97,25 @@ def read_feather(path, nthreads=1): | |
Number of CPU threads to use when reading to pandas.DataFrame | ||
|
||
.. versionadded 0.21.0 | ||
.. deprecated 0.24.0 | ||
use_threads: bool, default True | ||
Whether to parallelize reading using multiple threads | ||
|
||
.. versionadded 0.24.0 | ||
|
||
Returns | ||
------- | ||
type of object stored in file | ||
|
||
""" | ||
|
||
feather = _try_import() | ||
feather, pyarrow = _try_import() | ||
path = _stringify_path(path) | ||
|
||
if LooseVersion(feather.__version__) < LooseVersion('0.4.0'): | ||
ingwinlu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return feather.read_dataframe(path) | ||
if LooseVersion(pyarrow.__version__) < LooseVersion('0.11.0'): | ||
int_use_threads = int(use_threads) | ||
if int_use_threads < 1: | ||
int_use_threads = 1 | ||
return feather.read_feather(path, nthreads=int_use_threads) | ||
|
||
return feather.read_dataframe(path, nthreads=nthreads) | ||
return feather.read_feather(path, use_threads=bool(use_threads)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is fully correct. If someone did before There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Although, maybe that is not really a problem since the default in pyarrow also actually changed from |
Uh oh!
There was an error while loading. Please reload this page.