Skip to content

Commit f96372e

Browse files
committed
Merge remote-tracking branch 'upstream/master' into issue20825
2 parents 27480ac + c85ab08 commit f96372e

File tree

24 files changed

+717
-212
lines changed

24 files changed

+717
-212
lines changed

ci/travis-36.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,10 @@ dependencies:
1818
- numexpr
1919
- numpy
2020
- openpyxl
21-
- pandas-datareader
2221
- psycopg2
2322
- pyarrow
2423
- pymysql
2524
- pytables
26-
- python-dateutil
2725
- python-snappy
2826
- python=3.6*
2927
- pytz
@@ -45,3 +43,5 @@ dependencies:
4543
- pip:
4644
- brotlipy
4745
- coverage
46+
- pandas-datareader
47+
- python-dateutil

doc/source/whatsnew/v0.23.1.txt

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ Groupby/Resample/Rolling
5252
^^^^^^^^^^^^^^^^^^^^^^^^
5353

5454
- Bug in :func:`DataFrame.agg` where applying multiple aggregation functions to a :class:`DataFrame` with duplicated column names would cause a stack overflow (:issue:`21063`)
55+
- Bug in :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` where the fill within a grouping would not always be applied as intended due to the implementations' use of a non-stable sort (:issue:`21207`)
5556

5657
Strings
5758
^^^^^^^
@@ -66,6 +67,7 @@ Categorical
6667
^^^^^^^^^^^
6768

6869
- Bug in :func:`pandas.util.testing.assert_index_equal` which raised ``AssertionError`` incorrectly, when comparing two :class:`CategoricalIndex` objects with param ``check_categorical=False`` (:issue:`19776`)
70+
- Bug in :meth:`Categorical.fillna` incorrectly raising a ``TypeError`` when `value` the individual categories are iterable and `value` is an iterable (:issue:`21097`, :issue:`19788`)
6971

7072
Conversion
7173
^^^^^^^^^^
@@ -78,12 +80,14 @@ Indexing
7880

7981
- Bug in :meth:`Series.reset_index` where appropriate error was not raised with an invalid level name (:issue:`20925`)
8082
- Bug in :func:`interval_range` when ``start``/``periods`` or ``end``/``periods`` are specified with float ``start`` or ``end`` (:issue:`21161`)
83+
- Bug in :meth:`MultiIndex.set_names` where error raised for a ``MultiIndex`` with ``nlevels == 1`` (:issue:`21149`)
8184
-
8285

8386
I/O
8487
^^^
8588

86-
-
89+
- Bug in IO methods specifying ``compression='zip'`` which produced uncompressed zip archives (:issue:`17778`, :issue:`21144`)
90+
- Bug in :meth:`DataFrame.to_stata` which prevented exporting DataFrames to buffers and most file-like objects (:issue:`21041`)
8791
-
8892

8993
Plotting
@@ -97,8 +101,3 @@ Reshaping
97101

98102
- Bug in :func:`concat` where error was raised in concatenating :class:`Series` with numpy scalar and tuple names (:issue:`21015`)
99103
-
100-
101-
Categorical
102-
^^^^^^^^^^^
103-
104-
-

doc/source/whatsnew/v0.24.0.txt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
.. _whatsnew_0240:
22

33
v0.24.0
4-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4+
-------
55

66
.. _whatsnew_0240.enhancements:
77

@@ -12,7 +12,7 @@ New features
1212

1313
Other Enhancements
1414
^^^^^^^^^^^^^^^^^^
15-
-
15+
- :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`)
1616
-
1717
-
1818

@@ -22,6 +22,13 @@ Other Enhancements
2222
Backwards incompatible API changes
2323
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2424

25+
.. _whatsnew_0240.api.datetimelike:
26+
27+
Datetimelike API Changes
28+
^^^^^^^^^^^^^^^^^^^^^^^^
29+
30+
- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with non-``None`` ``freq`` attribute, addition or subtraction of integer-dtyped array or ``Index`` will return an object of the same class (:issue:`19959`)
31+
2532
.. _whatsnew_0240.api.other:
2633

2734
Other API Changes

pandas/_libs/groupby.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,8 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
297297
# Make sure all arrays are the same size
298298
assert N == len(labels) == len(mask)
299299

300-
sorted_labels = np.argsort(labels).astype(np.int64, copy=False)
300+
sorted_labels = np.argsort(labels, kind='mergesort').astype(
301+
np.int64, copy=False)
301302
if direction == 'bfill':
302303
sorted_labels = sorted_labels[::-1]
303304

pandas/_libs/tslibs/strptime.pyx

Lines changed: 86 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ except:
2020
except:
2121
from _dummy_thread import allocate_lock as _thread_allocate_lock
2222

23+
import pytz
2324

2425
from cython cimport Py_ssize_t
2526
from cpython cimport PyFloat_Check
@@ -40,6 +41,27 @@ from util cimport is_string_object
4041
from nattype cimport checknull_with_nat, NPY_NAT
4142
from nattype import nat_strings
4243

44+
cdef dict _parse_code_table = {'y': 0,
45+
'Y': 1,
46+
'm': 2,
47+
'B': 3,
48+
'b': 4,
49+
'd': 5,
50+
'H': 6,
51+
'I': 7,
52+
'M': 8,
53+
'S': 9,
54+
'f': 10,
55+
'A': 11,
56+
'a': 12,
57+
'w': 13,
58+
'j': 14,
59+
'U': 15,
60+
'W': 16,
61+
'Z': 17,
62+
'p': 18, # an additional key, only with I
63+
'z': 19}
64+
4365

4466
def array_strptime(ndarray[object] values, object fmt,
4567
bint exact=True, errors='raise'):
@@ -58,15 +80,15 @@ def array_strptime(ndarray[object] values, object fmt,
5880
Py_ssize_t i, n = len(values)
5981
pandas_datetimestruct dts
6082
ndarray[int64_t] iresult
61-
int year, month, day, minute, hour, second, weekday, julian, tz
62-
int week_of_year, week_of_year_start
83+
ndarray[object] result_timezone
84+
int year, month, day, minute, hour, second, weekday, julian
85+
int week_of_year, week_of_year_start, parse_code, ordinal
6386
int64_t us, ns
64-
object val, group_key, ampm, found
87+
object val, group_key, ampm, found, timezone
6588
dict found_key
6689
bint is_raise = errors=='raise'
6790
bint is_ignore = errors=='ignore'
6891
bint is_coerce = errors=='coerce'
69-
int ordinal
7092

7193
assert is_raise or is_ignore or is_coerce
7294

@@ -79,6 +101,8 @@ def array_strptime(ndarray[object] values, object fmt,
79101
in fmt):
80102
raise ValueError("Cannot use '%W' or '%U' without "
81103
"day and year")
104+
elif '%Z' in fmt and '%z' in fmt:
105+
raise ValueError("Cannot parse both %Z and %z")
82106

83107
global _TimeRE_cache, _regex_cache
84108
with _cache_lock:
@@ -108,32 +132,10 @@ def array_strptime(ndarray[object] values, object fmt,
108132

109133
result = np.empty(n, dtype='M8[ns]')
110134
iresult = result.view('i8')
135+
result_timezone = np.empty(n, dtype='object')
111136

112137
dts.us = dts.ps = dts.as = 0
113138

114-
cdef dict _parse_code_table = {
115-
'y': 0,
116-
'Y': 1,
117-
'm': 2,
118-
'B': 3,
119-
'b': 4,
120-
'd': 5,
121-
'H': 6,
122-
'I': 7,
123-
'M': 8,
124-
'S': 9,
125-
'f': 10,
126-
'A': 11,
127-
'a': 12,
128-
'w': 13,
129-
'j': 14,
130-
'U': 15,
131-
'W': 16,
132-
'Z': 17,
133-
'p': 18 # just an additional key, works only with I
134-
}
135-
cdef int parse_code
136-
137139
for i in range(n):
138140
val = values[i]
139141
if is_string_object(val):
@@ -176,7 +178,7 @@ def array_strptime(ndarray[object] values, object fmt,
176178
year = 1900
177179
month = day = 1
178180
hour = minute = second = ns = us = 0
179-
tz = -1
181+
timezone = None
180182
# Default to -1 to signify that values not known; not critical to have,
181183
# though
182184
week_of_year = -1
@@ -266,21 +268,10 @@ def array_strptime(ndarray[object] values, object fmt,
266268
# W starts week on Monday.
267269
week_of_year_start = 0
268270
elif parse_code == 17:
269-
# Since -1 is default value only need to worry about setting tz
270-
# if it can be something other than -1.
271-
found_zone = found_dict['Z'].lower()
272-
for value, tz_values in enumerate(locale_time.timezone):
273-
if found_zone in tz_values:
274-
# Deal w/ bad locale setup where timezone names are the
275-
# same and yet time.daylight is true; too ambiguous to
276-
# be able to tell what timezone has daylight savings
277-
if (time.tzname[0] == time.tzname[1] and
278-
time.daylight and found_zone not in (
279-
"utc", "gmt")):
280-
break
281-
else:
282-
tz = value
283-
break
271+
timezone = pytz.timezone(found_dict['Z'])
272+
elif parse_code == 19:
273+
timezone = parse_timezone_directive(found_dict['z'])
274+
284275
# If we know the wk of the year and what day of that wk, we can figure
285276
# out the Julian day of the year.
286277
if julian == -1 and week_of_year != -1 and weekday != -1:
@@ -330,7 +321,9 @@ def array_strptime(ndarray[object] values, object fmt,
330321
continue
331322
raise
332323

333-
return result
324+
result_timezone[i] = timezone
325+
326+
return result, result_timezone
334327

335328

336329
"""_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
@@ -538,14 +531,13 @@ class TimeRE(dict):
538531
# XXX: Does 'Y' need to worry about having less or more than
539532
# 4 digits?
540533
'Y': r"(?P<Y>\d\d\d\d)",
534+
'z': r"(?P<z>[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|Z)",
541535
'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
542536
'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
543537
'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
544538
'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
545539
'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
546-
'Z': self.__seqToRE([tz for tz_names in self.locale_time.timezone
547-
for tz in tz_names],
548-
'Z'),
540+
'Z': self.__seqToRE(pytz.all_timezones, 'Z'),
549541
'%': '%'})
550542
base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
551543
base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
@@ -632,3 +624,50 @@ cdef _calc_julian_from_U_or_W(int year, int week_of_year,
632624
else:
633625
days_to_week = week_0_length + (7 * (week_of_year - 1))
634626
return 1 + days_to_week + day_of_week
627+
628+
cdef parse_timezone_directive(object z):
629+
"""
630+
Parse the '%z' directive and return a pytz.FixedOffset
631+
632+
Parameters
633+
----------
634+
z : string of the UTC offset
635+
636+
Returns
637+
-------
638+
pytz.FixedOffset
639+
640+
Notes
641+
-----
642+
This is essentially similar to the cpython implementation
643+
https://github.com/python/cpython/blob/master/Lib/_strptime.py#L457-L479
644+
"""
645+
646+
cdef:
647+
int gmtoff_fraction, hours, minutes, seconds, pad_number, microseconds
648+
int total_minutes
649+
object gmtoff_remainder, gmtoff_remainder_padding
650+
651+
if z == 'Z':
652+
return pytz.FixedOffset(0)
653+
if z[3] == ':':
654+
z = z[:3] + z[4:]
655+
if len(z) > 5:
656+
if z[5] != ':':
657+
msg = "Inconsistent use of : in {0}"
658+
raise ValueError(msg.format(z))
659+
z = z[:5] + z[6:]
660+
hours = int(z[1:3])
661+
minutes = int(z[3:5])
662+
seconds = int(z[5:7] or 0)
663+
664+
# Pad to always return microseconds.
665+
gmtoff_remainder = z[8:]
666+
pad_number = 6 - len(gmtoff_remainder)
667+
gmtoff_remainder_padding = "0" * pad_number
668+
microseconds = int(gmtoff_remainder + gmtoff_remainder_padding)
669+
670+
total_minutes = ((hours * 60) + minutes + (seconds / 60) +
671+
(microseconds / 60000000))
672+
total_minutes = -total_minutes if z.startswith("-") else total_minutes
673+
return pytz.FixedOffset(total_minutes)

pandas/core/arrays/categorical.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas.core.dtypes.generic import (
1313
ABCSeries, ABCIndexClass, ABCCategoricalIndex)
1414
from pandas.core.dtypes.missing import isna, notna
15+
from pandas.core.dtypes.inference import is_hashable
1516
from pandas.core.dtypes.cast import (
1617
maybe_infer_to_datetimelike,
1718
coerce_indexer_dtype)
@@ -1751,7 +1752,7 @@ def fillna(self, value=None, method=None, limit=None):
17511752
values[indexer] = values_codes[values_codes != -1]
17521753

17531754
# If value is not a dict or Series it should be a scalar
1754-
elif is_scalar(value):
1755+
elif is_hashable(value):
17551756
if not isna(value) and value not in self.categories:
17561757
raise ValueError("fill value must be in categories")
17571758

pandas/core/frame.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1774,8 +1774,11 @@ def to_stata(self, fname, convert_dates=None, write_index=True,
17741774
17751775
Parameters
17761776
----------
1777-
fname : str or buffer
1778-
String path of file-like object.
1777+
fname : path (string), buffer or path object
1778+
string, path object (pathlib.Path or py._path.local.LocalPath) or
1779+
object implementing a binary write() functions. If using a buffer
1780+
then the buffer will not be automatically closed after the file
1781+
data has been written.
17791782
convert_dates : dict
17801783
Dictionary mapping columns containing datetime types to stata
17811784
internal format to use when writing the dates. Options are 'tc',
@@ -4174,8 +4177,9 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
41744177
* 0, or 'index' : Drop rows which contain missing values.
41754178
* 1, or 'columns' : Drop columns which contain missing value.
41764179
4177-
.. deprecated:: 0.23.0: Pass tuple or list to drop on multiple
4178-
axes.
4180+
.. deprecated:: 0.23.0
4181+
Pass tuple or list to drop on multiple axes.
4182+
41794183
how : {'any', 'all'}, default 'any'
41804184
Determine if row or column is removed from DataFrame, when we have
41814185
at least one NA or all NA.
@@ -7085,6 +7089,9 @@ def quantile(self, q=0.5, axis=0, numeric_only=True,
70857089
0 <= q <= 1, the quantile(s) to compute
70867090
axis : {0, 1, 'index', 'columns'} (default 0)
70877091
0 or 'index' for row-wise, 1 or 'columns' for column-wise
7092+
numeric_only : boolean, default True
7093+
If False, the quantile of datetime and timedelta data will be
7094+
computed as well
70887095
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
70897096
.. versionadded:: 0.18.0
70907097
@@ -7112,7 +7119,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True,
71127119
--------
71137120
71147121
>>> df = pd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]),
7115-
columns=['a', 'b'])
7122+
columns=['a', 'b'])
71167123
>>> df.quantile(.1)
71177124
a 1.3
71187125
b 3.7
@@ -7122,6 +7129,20 @@ def quantile(self, q=0.5, axis=0, numeric_only=True,
71227129
0.1 1.3 3.7
71237130
0.5 2.5 55.0
71247131
7132+
Specifying `numeric_only=False` will also compute the quantile of
7133+
datetime and timedelta data.
7134+
7135+
>>> df = pd.DataFrame({'A': [1, 2],
7136+
'B': [pd.Timestamp('2010'),
7137+
pd.Timestamp('2011')],
7138+
'C': [pd.Timedelta('1 days'),
7139+
pd.Timedelta('2 days')]})
7140+
>>> df.quantile(0.5, numeric_only=False)
7141+
A 1.5
7142+
B 2010-07-02 12:00:00
7143+
C 1 days 12:00:00
7144+
Name: 0.5, dtype: object
7145+
71257146
See Also
71267147
--------
71277148
pandas.core.window.Rolling.quantile

0 commit comments

Comments
 (0)