Skip to content

Commit 3ad0638

Browse files
Merge remote-tracking branch 'upstream/master' into arrow-string-array-dtype
2 parents 8adb08d + b117ab5 commit 3ad0638

37 files changed

+356
-177
lines changed

.travis.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ before_install:
4545
- echo "before_install"
4646
# Use blocking IO on travis. Ref: https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024
4747
- python -c 'import os,sys,fcntl; flags = fcntl.fcntl(sys.stdout, fcntl.F_GETFL); fcntl.fcntl(sys.stdout, fcntl.F_SETFL, flags&~os.O_NONBLOCK);'
48-
- source ci/travis_process_gbq_encryption.sh
4948
- export PATH="$HOME/miniconda3/bin:$PATH"
5049
- df -h
5150
- pwd

ci/travis_encrypt_gbq.sh

Lines changed: 0 additions & 34 deletions
This file was deleted.

ci/travis_gbq.json.enc

-2.3 KB
Binary file not shown.

ci/travis_gbq_config.txt

Lines changed: 0 additions & 2 deletions
This file was deleted.

ci/travis_process_gbq_encryption.sh

Lines changed: 0 additions & 12 deletions
This file was deleted.

doc/README.rst

Lines changed: 0 additions & 1 deletion
This file was deleted.

doc/source/whatsnew/v1.3.0.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,7 @@ Other enhancements
341341
- Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
342342
- Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
343343
- :meth:`Series.replace` will now cast results to ``PeriodDtype`` where possible instead of ``object`` dtype (:issue:`41526`)
344+
- Improved error message in ``corr` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`)
344345

345346
.. ---------------------------------------------------------------------------
346347
@@ -804,11 +805,12 @@ Deprecations
804805
- Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`)
805806
- Deprecated passing arguments as positional in :meth:`DataFrame.reset_index` (other than ``"level"``) and :meth:`Series.reset_index` (:issue:`41485`)
806807
- Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`)
808+
- In a future version, constructing :class:`Series` or :class:`DataFrame` with ``datetime64[ns]`` data and ``DatetimeTZDtype`` will treat the data as wall-times instead of as UTC times (matching DatetimeIndex behavior). To treat the data as UTC times, use ``pd.Series(data).dt.tz_localize("UTC").dt.tz_convert(dtype.tz)`` or ``pd.Series(data.view("int64"), dtype=dtype)`` (:issue:`33401`)
807809
- Deprecated passing arguments as positional in :meth:`DataFrame.set_axis` and :meth:`Series.set_axis` (other than ``"labels"``) (:issue:`41485`)
808810
- Deprecated passing arguments as positional in :meth:`DataFrame.where` and :meth:`Series.where` (other than ``"cond"`` and ``"other"``) (:issue:`41485`)
809811
- Deprecated passing arguments as positional (other than ``filepath_or_buffer``) in :func:`read_csv` (:issue:`41485`)
810812
- Deprecated passing arguments as positional in :meth:`DataFrame.drop` (other than ``"labels"``) and :meth:`Series.drop` (:issue:`41485`)
811-
-
813+
- Deprecated passing arguments as positional (other than ``filepath_or_buffer``) in :func:`read_table` (:issue:`41485`)
812814

813815

814816
.. _whatsnew_130.deprecations.nuisance_columns:

pandas/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@
6666
MultiIndex,
6767
)
6868

69+
# Until https://github.com/numpy/numpy/issues/19078 is sorted out, just suppress
70+
suppress_npdev_promotion_warning = pytest.mark.filterwarnings(
71+
"ignore:Promotion of numbers and bools:FutureWarning"
72+
)
73+
6974
# ----------------------------------------------------------------
7075
# Configuration / Settings
7176
# ----------------------------------------------------------------
@@ -112,6 +117,8 @@ def pytest_collection_modifyitems(items):
112117
if "/frame/" in item.nodeid:
113118
item.add_marker(pytest.mark.arraymanager)
114119

120+
item.add_marker(suppress_npdev_promotion_warning)
121+
115122

116123
# Hypothesis
117124
hypothesis.settings.register_profile(

pandas/core/arrays/interval.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
from pandas.compat.numpy import function as nv
3333
from pandas.util._decorators import Appender
3434

35-
from pandas.core.dtypes.cast import maybe_convert_platform
3635
from pandas.core.dtypes.common import (
3736
is_categorical_dtype,
3837
is_datetime64_dtype,
@@ -1650,4 +1649,6 @@ def _maybe_convert_platform_interval(values) -> ArrayLike:
16501649
else:
16511650
values = extract_array(values, extract_numpy=True)
16521651

1653-
return maybe_convert_platform(values)
1652+
if not hasattr(values, "dtype"):
1653+
return np.asarray(values)
1654+
return values

pandas/core/arrays/string_arrow.py

Lines changed: 52 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,19 @@
1919
Scalar,
2020
)
2121
from pandas.compat import (
22+
pa_version_under1p0,
2223
pa_version_under2p0,
2324
pa_version_under3p0,
2425
pa_version_under4p0,
2526
)
26-
from pandas.compat.pyarrow import pa_version_under1p0
2727
from pandas.util._decorators import doc
2828
from pandas.util._validators import validate_fillna_kwargs
2929

30+
from pandas.core.dtypes.base import ExtensionDtype
3031
from pandas.core.dtypes.common import (
3132
is_array_like,
3233
is_bool_dtype,
34+
is_dtype_equal,
3335
is_integer,
3436
is_integer_dtype,
3537
is_object_dtype,
@@ -44,38 +46,41 @@
4446
from pandas.core.arrays.base import ExtensionArray
4547
from pandas.core.arrays.boolean import BooleanDtype
4648
from pandas.core.arrays.integer import Int64Dtype
49+
from pandas.core.arrays.numeric import NumericDtype
4750
from pandas.core.arrays.string_ import StringDtype
4851
from pandas.core.indexers import (
4952
check_array_indexer,
5053
validate_indices,
5154
)
5255
from pandas.core.strings.object_array import ObjectStringArrayMixin
5356

54-
try:
57+
# PyArrow backed StringArrays are available starting at 1.0.0, but this
58+
# file is imported from even if pyarrow is < 1.0.0, before pyarrow.compute
59+
# and its compute functions existed. GH38801
60+
if not pa_version_under1p0:
5561
import pyarrow as pa
56-
except ImportError:
57-
pa = None
58-
else:
59-
# PyArrow backed StringArrays are available starting at 1.0.0, but this
60-
# file is imported from even if pyarrow is < 1.0.0, before pyarrow.compute
61-
# and its compute functions existed. GH38801
62-
if not pa_version_under1p0:
63-
import pyarrow.compute as pc
64-
65-
ARROW_CMP_FUNCS = {
66-
"eq": pc.equal,
67-
"ne": pc.not_equal,
68-
"lt": pc.less,
69-
"gt": pc.greater,
70-
"le": pc.less_equal,
71-
"ge": pc.greater_equal,
72-
}
62+
import pyarrow.compute as pc
63+
64+
ARROW_CMP_FUNCS = {
65+
"eq": pc.equal,
66+
"ne": pc.not_equal,
67+
"lt": pc.less,
68+
"gt": pc.greater,
69+
"le": pc.less_equal,
70+
"ge": pc.greater_equal,
71+
}
7372

7473

7574
if TYPE_CHECKING:
7675
from pandas import Series
7776

7877

78+
def _chk_pyarrow_available() -> None:
79+
if pa_version_under1p0:
80+
msg = "pyarrow>=1.0.0 is required for PyArrow backed StringArray."
81+
raise ImportError(msg)
82+
83+
7984
# TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from
8085
# ObjectStringArrayMixin because we want to have the object-dtype based methods as
8186
# fallback for the ones that pyarrow doesn't yet support
@@ -125,10 +130,8 @@ class ArrowStringArray(OpsMixin, ExtensionArray, ObjectStringArrayMixin):
125130
Length: 4, dtype: string[pyarrow]
126131
"""
127132

128-
_dtype = StringDtype(storage="pyarrow")
129-
130133
def __init__(self, values):
131-
self._chk_pyarrow_available()
134+
self._dtype = StringDtype(storage="pyarrow")
132135
if isinstance(values, pa.Array):
133136
self._data = pa.chunked_array([values])
134137
elif isinstance(values, pa.ChunkedArray):
@@ -141,19 +144,11 @@ def __init__(self, values):
141144
"ArrowStringArray requires a PyArrow (chunked) array of string type"
142145
)
143146

144-
@classmethod
145-
def _chk_pyarrow_available(cls) -> None:
146-
# TODO: maybe update import_optional_dependency to allow a minimum
147-
# version to be specified rather than use the global minimum
148-
if pa is None or pa_version_under1p0:
149-
msg = "pyarrow>=1.0.0 is required for PyArrow backed StringArray."
150-
raise ImportError(msg)
151-
152147
@classmethod
153148
def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False):
154149
from pandas.core.arrays.masked import BaseMaskedArray
155150

156-
cls._chk_pyarrow_available()
151+
_chk_pyarrow_available()
157152

158153
if dtype and not (isinstance(dtype, str) and dtype == "string"):
159154
dtype = pandas_dtype(dtype)
@@ -206,10 +201,14 @@ def to_numpy( # type: ignore[override]
206201
"""
207202
# TODO: copy argument is ignored
208203

209-
if na_value is lib.no_default:
210-
na_value = self._dtype.na_value
211-
result = self._data.__array__(dtype=dtype)
212-
result[isna(result)] = na_value
204+
result = np.array(self._data, dtype=dtype)
205+
if self._data.null_count > 0:
206+
if na_value is lib.no_default:
207+
if dtype and np.issubdtype(dtype, np.floating):
208+
return result
209+
na_value = self._dtype.na_value
210+
mask = self.isna()
211+
result[mask] = na_value
213212
return result
214213

215214
def __len__(self) -> int:
@@ -653,6 +652,24 @@ def value_counts(self, dropna: bool = True) -> Series:
653652

654653
return Series(counts, index=index).astype("Int64")
655654

655+
def astype(self, dtype, copy=True):
656+
dtype = pandas_dtype(dtype)
657+
658+
if is_dtype_equal(dtype, self.dtype):
659+
if copy:
660+
return self.copy()
661+
return self
662+
663+
elif isinstance(dtype, NumericDtype):
664+
data = self._data.cast(pa.from_numpy_dtype(dtype.numpy_dtype))
665+
return dtype.__from_arrow__(data)
666+
667+
elif isinstance(dtype, ExtensionDtype):
668+
cls = dtype.construct_array_type()
669+
return cls._from_sequence(self, dtype=dtype, copy=copy)
670+
671+
return super().astype(dtype, copy)
672+
656673
# ------------------------------------------------------------------------
657674
# String methods interface
658675

pandas/core/dtypes/cast.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,6 +1657,22 @@ def maybe_cast_to_datetime(
16571657
# Numeric values are UTC at this point,
16581658
# so localize and convert
16591659
# equiv: Series(dta).astype(dtype) # though deprecated
1660+
if getattr(vdtype, "kind", None) == "M":
1661+
# GH#24559, GH#33401 deprecate behavior inconsistent
1662+
# with DatetimeArray/DatetimeIndex
1663+
warnings.warn(
1664+
"In a future version, constructing a Series "
1665+
"from datetime64[ns] data and a "
1666+
"DatetimeTZDtype will interpret the data "
1667+
"as wall-times instead of "
1668+
"UTC times, matching the behavior of "
1669+
"DatetimeIndex. To treat the data as UTC "
1670+
"times, use pd.Series(data).dt"
1671+
".tz_localize('UTC').tz_convert(dtype.tz) "
1672+
"or pd.Series(data.view('int64'), dtype=dtype)",
1673+
FutureWarning,
1674+
stacklevel=5,
1675+
)
16601676

16611677
value = dta.tz_localize("UTC").tz_convert(dtype.tz)
16621678
except OutOfBoundsDatetime:

pandas/core/internals/construction.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -537,9 +537,6 @@ def _prep_ndarray(values, copy: bool = True) -> np.ndarray:
537537
def convert(v):
538538
if not is_list_like(v) or isinstance(v, ABCDataFrame):
539539
return v
540-
elif not hasattr(v, "dtype") and not isinstance(v, (list, tuple, range)):
541-
# TODO: should we cast these to list?
542-
return v
543540

544541
v = extract_array(v, extract_numpy=True)
545542
res = maybe_convert_platform(v)

0 commit comments

Comments
 (0)