-
-
Notifications
You must be signed in to change notification settings - Fork 18.8k
Fix/time series interpolation is wrong 21351 #56515
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
ff6d12f
1593af0
db68c2d
dd8b8d3
a04a3a2
efbba10
0294464
537f8bf
901701c
4f78c75
a5bcd45
7d4b4ce
dbae717
05be840
0912249
49a7c4c
a5a7299
6a6fa88
4ebed74
0ee5b8d
b2bc373
6109102
d6af64a
2a86a27
9c90e23
4b2f3dc
d11c162
789c511
4f6d102
c0547b5
d6382f8
4e9a616
c655bf1
e916da9
eaa7e07
649bfa2
4cfbbf1
76794e3
6ad9b26
6555141
48850cc
8eea71c
7f957cf
51e95e0
12bdd90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -83,6 +83,7 @@ | |
TimedeltaIndex, | ||
timedelta_range, | ||
) | ||
from pandas.core.reshape.concat import concat | ||
|
||
from pandas.tseries.frequencies import ( | ||
is_subperiod, | ||
|
@@ -1086,7 +1087,23 @@ def interpolate( | |
""" | ||
assert downcast is lib.no_default # just checking coverage | ||
result = self._upsample("asfreq") | ||
return result.interpolate( | ||
|
||
# If the original data has timestamps which are not aligned with the | ||
# target timestamps, we need to add those points back to the data frame | ||
# that is supposed to be interpolated. This does not work with | ||
# PeriodIndex, so we skip this case. | ||
cbpygit marked this conversation as resolved.
Show resolved
Hide resolved
|
||
obj = self._selected_obj | ||
is_period_index = isinstance(obj.index, PeriodIndex) | ||
|
||
if not is_period_index: | ||
final_index = result.index | ||
missing_data_points_index = obj.index.difference(final_index) | ||
if len(missing_data_points_index) > 0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in the opposite case where the difference is empty, i think a the sort_index and There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jbrockmendel Can you please clarify? "in the opposite case where the difference is empty" --> then |
||
result = concat( | ||
[result, obj.loc[missing_data_points_index]] | ||
).sort_index() | ||
|
||
result_interpolated = result.interpolate( | ||
method=method, | ||
axis=axis, | ||
limit=limit, | ||
|
@@ -1097,6 +1114,12 @@ def interpolate( | |
**kwargs, | ||
) | ||
|
||
# We make sure that original data points which do not align with the | ||
# resampled index are removed | ||
if is_period_index: | ||
return result_interpolated | ||
return result_interpolated.loc[final_index] | ||
|
||
@final | ||
def asfreq(self, fill_value=None): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,51 @@ | |
from pandas.core.indexes.timedeltas import timedelta_range | ||
from pandas.core.resample import _asfreq_compat | ||
|
||
# a fixture value can be overridden by the test parameter value. Note that the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you just directly parameterize tests instead of using this pattern? This pattern was removed in the last release or so There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done and removed the unused parametrizations |
||
# value of the fixture can be overridden this way even if the test doesn't use | ||
# it directly (doesn't mention it in the function prototype). | ||
# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa: E501 | ||
# in this module we override the fixture values defined in conftest.py | ||
# tuples of '_index_factory,_series_name,_index_start,_index_end' | ||
DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10)) | ||
PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10)) | ||
TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day") | ||
|
||
all_ts = pytest.mark.parametrize( | ||
"_index_factory,_series_name,_index_start,_index_end", | ||
[DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE], | ||
) | ||
|
||
all_1d_no_arg_interpolation_methods = pytest.mark.parametrize( | ||
"method", | ||
[ | ||
"linear", | ||
"time", | ||
"index", | ||
"values", | ||
"nearest", | ||
"zero", | ||
"slinear", | ||
"quadratic", | ||
"cubic", | ||
"barycentric", | ||
"krogh", | ||
"from_derivatives", | ||
"piecewise_polynomial", | ||
"pchip", | ||
"akima", | ||
], | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def create_index(_index_factory): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't think this will be used anymore There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed |
||
def _create_index(*args, **kwargs): | ||
"""return the _index_factory created using the args, kwargs""" | ||
return _index_factory(*args, **kwargs) | ||
|
||
return _create_index | ||
|
||
|
||
@pytest.mark.parametrize("freq", ["2D", "1h"]) | ||
@pytest.mark.parametrize( | ||
|
@@ -89,6 +134,53 @@ def test_resample_interpolate(index): | |
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
@all_1d_no_arg_interpolation_methods | ||
def test_resample_interpolate_regular_sampling_off_grid(method): | ||
# GH#21351 | ||
index = date_range("2000-01-01 00:01:00", periods=5, freq="2h") | ||
ser = Series(np.arange(5.0), index) | ||
|
||
# Resample to 1 hour sampling and interpolate with the given method | ||
ser_resampled = ser.resample("1h").interpolate(method) | ||
|
||
# Check that none of the resampled values are NaN, except the first one | ||
# which lies 1 minute before the first actual data point | ||
assert np.isnan(ser_resampled.iloc[0]) | ||
assert not ser_resampled.iloc[1:].isna().any() | ||
|
||
if method not in ["nearest", "zero"]: | ||
# Check that the resampled values are close to the expected values | ||
# except for methods with known inaccuracies | ||
assert np.all( | ||
np.isclose(ser_resampled.values[1:], np.arange(0.5, 4.5, 0.5), rtol=1.0e-1) | ||
) | ||
|
||
|
||
@all_1d_no_arg_interpolation_methods | ||
def test_resample_interpolate_irregular_sampling(method): | ||
# GH#21351 | ||
ser = Series( | ||
np.linspace(0.0, 1.0, 5), | ||
index=DatetimeIndex( | ||
[ | ||
"2000-01-01 00:00:03", | ||
"2000-01-01 00:00:22", | ||
"2000-01-01 00:00:24", | ||
"2000-01-01 00:00:31", | ||
"2000-01-01 00:00:39", | ||
] | ||
), | ||
) | ||
|
||
# Resample to 5 second sampling and interpolate with the given method | ||
ser_resampled = ser.resample("5s").interpolate(method) | ||
|
||
# Check that none of the resampled values are NaN, except the first one | ||
# which lies 3 seconds before the first actual data point | ||
assert np.isnan(ser_resampled.iloc[0]) | ||
assert not ser_resampled.iloc[1:].isna().any() | ||
|
||
|
||
def test_raises_on_non_datetimelike_index(): | ||
# this is a non datetimelike index | ||
xp = DataFrame() | ||
|
Uh oh!
There was an error while loading. Please reload this page.