-
-
Notifications
You must be signed in to change notification settings - Fork 18.8k
BUG: Fix some PeriodIndex resampling issues #16153
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
ca7b6f2
c27f430
390e16e
23566c2
a82879d
4b1c740
73c0990
fa6c1d3
7ea04e9
82a8275
432c623
c8814fb
486ad67
ad8519f
39fc7e2
efcad5b
41401d4
398a684
8358c41
6084e0c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1270,18 +1270,34 @@ def _get_period_bins(self, ax): | |
raise TypeError('axis must be a PeriodIndex, but got ' | ||
'an instance of %r' % type(ax).__name__) | ||
|
||
if not len(ax): | ||
memb = ax.asfreq(self.freq, how=self.convention) | ||
# NaT handling as in pandas._lib.lib.generate_bins_dt64() | ||
nat_count = 0 | ||
if memb.hasnans: | ||
import warnings | ||
with warnings.catch_warnings(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you don't need this
|
||
warnings.filterwarnings('ignore', 'numpy equal will not check ' | ||
'object identity') | ||
nat_mask = memb.base == tslib.NaT | ||
# raises "FutureWarning: numpy equal will not check object | ||
# identity in the future. The comparison did not return the | ||
# same result as suggested by the identity (`is`)) and will | ||
# change." | ||
nat_count = np.sum(nat_mask) | ||
memb = memb[~nat_mask] | ||
|
||
# if index contains no valid (non-NaT) values, return empty index | ||
if not len(memb): | ||
binner = labels = PeriodIndex( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I left this, ok for now. |
||
data=[], freq=self.freq, name=ax.name) | ||
return binner, [], labels | ||
|
||
start = ax[0].asfreq(self.freq, how=self.convention) | ||
end = ax[-1].asfreq(self.freq, how='end') | ||
start = ax.min().asfreq(self.freq, how=self.convention) | ||
end = ax.max().asfreq(self.freq, how='end') | ||
|
||
labels = binner = PeriodIndex(start=start, end=end, | ||
freq=self.freq, name=ax.name) | ||
|
||
memb = ax.asfreq(self.freq, how=self.convention) | ||
i8 = memb.asi8 | ||
freq_mult = self.freq.n | ||
# when upsampling to subperiods, we need to generate enough bins | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. blank line There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
@@ -1291,6 +1307,14 @@ def _get_period_bins(self, ax): | |
rng += freq_mult | ||
bins = memb.searchsorted(rng, side='left') | ||
|
||
if nat_count > 0: | ||
# NaT handling as in pandas._lib.lib.generate_bins_dt64() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this path tested sufficiently, e.g. 0, 1, 2 NaT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added a test case for consecutive NaTs in the index (1cad7fa) Should be sufficiently tested, cases covered:
Any ideas for more exhaustive test cases? |
||
# shift bins by the number of NaT | ||
bins += nat_count | ||
bins = np.insert(bins, 0, nat_count) | ||
binner = binner.insert(0, tslib.NaT) | ||
labels = labels.insert(0, tslib.NaT) | ||
|
||
return binner, bins, labels | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2913,6 +2913,45 @@ def test_upsampling_ohlc_freq_multiples(self): | |
result = s.resample('12H', kind='period').ohlc() | ||
assert_frame_equal(result, expected) | ||
|
||
def test_resample_with_nat(self): | ||
# GH 13224 | ||
index = PeriodIndex([pd.NaT, '1970-01-01 00:00:00', pd.NaT, | ||
'1970-01-01 00:00:01', '1970-01-01 00:00:02'], | ||
freq='S') | ||
frame = DataFrame([2, 3, 5, 7, 11], index=index) | ||
|
||
index_1s = PeriodIndex(['1970-01-01 00:00:00', '1970-01-01 00:00:01', | ||
'1970-01-01 00:00:02'], freq='S') | ||
frame_1s = DataFrame([3, 7, 11], index=index_1s) | ||
result_1s = frame.resample('1s').mean() | ||
assert_frame_equal(result_1s, frame_1s) | ||
|
||
index_2s = PeriodIndex(['1970-01-01 00:00:00', | ||
'1970-01-01 00:00:02'], freq='2S') | ||
frame_2s = DataFrame([5, 11], index=index_2s) | ||
result_2s = frame.resample('2s').mean() | ||
assert_frame_equal(result_2s, frame_2s) | ||
|
||
index_3s = PeriodIndex(['1970-01-01 00:00:00'], freq='3S') | ||
frame_3s = DataFrame([7], index=index_3s) | ||
result_3s = frame.resample('3s').mean() | ||
assert_frame_equal(result_3s, frame_3s) | ||
|
||
pi = PeriodIndex(['1970-01-01 00:00:00', pd.NaT, | ||
'1970-01-01 00:00:02'], freq='S') | ||
frame = DataFrame([2, 3, 5], index=pi) | ||
expected_index = period_range(pi[0], periods=len(pi), freq=pi.freq) | ||
expected = DataFrame([2, np.NaN, 5], index=expected_index) | ||
result = frame.resample('1s').mean() | ||
assert_frame_equal(result, expected) | ||
|
||
pi = PeriodIndex([pd.NaT] * 3, freq='S') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue number for this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done (not mentioned in the issue explicitly, is just an edge case) |
||
frame = DataFrame([2, 3, 5], index=pi) | ||
expected_index = PeriodIndex(data=[], freq=pi.freq) | ||
expected = DataFrame([], index=expected_index) | ||
result = frame.resample('1s').mean() | ||
assert_frame_equal(result, expected) | ||
|
||
|
||
class TestTimedeltaIndex(Base): | ||
_index_factory = lambda x: timedelta_range | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
blank line
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done