Skip to content

Commit bfc3a12

Browse files
committed
Merge pull request #7604 from seth-p/moments_diff_index_sets
BUG: {expanding,rolling}_{cov,corr} don't handle arguments with different index sets properly
2 parents eb7724e + c4a9484 commit bfc3a12

File tree

3 files changed

+107
-12
lines changed

3 files changed

+107
-12
lines changed

doc/source/v0.14.1.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,3 +271,5 @@ Bug Fixes
271271

272272
- Bug in ``Float64Index`` assignment with a non scalar indexer (:issue:`7586`)
273273
- Bug in ``pandas.core.strings.str_contains`` does not properly match in a case insensitive fashion when ``regex=False`` and ``case=False`` (:issue:`7505`)
274+
275+
- Bug in ``expanding_cov``, ``expanding_corr``, ``rolling_cov``, and ``rolling_corr`` for two arguments with mismatched index (:issue:`7512`)

pandas/stats/moments.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -209,11 +209,11 @@ def rolling_cov(arg1, arg2=None, window=None, min_periods=None, freq=None,
209209
pairwise = True if pairwise is None else pairwise # only default unset
210210
arg1 = _conv_timerule(arg1, freq, how)
211211
arg2 = _conv_timerule(arg2, freq, how)
212-
window = min(window, len(arg1), len(arg2))
213212

214213
def _get_cov(X, Y):
215-
mean = lambda x: rolling_mean(x, window, min_periods, center=center)
216-
count = rolling_count(X + Y, window, center=center)
214+
adj_window = min(window, len(X), len(Y))
215+
mean = lambda x: rolling_mean(x, adj_window, min_periods, center=center)
216+
count = rolling_count(X + Y, adj_window, center=center)
217217
bias_adj = count / (count - 1)
218218
return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
219219
rs = _flex_binary_moment(arg1, arg2, _get_cov, pairwise=bool(pairwise))
@@ -234,16 +234,17 @@ def rolling_corr(arg1, arg2=None, window=None, min_periods=None, freq=None,
234234
pairwise = True if pairwise is None else pairwise # only default unset
235235
arg1 = _conv_timerule(arg1, freq, how)
236236
arg2 = _conv_timerule(arg2, freq, how)
237-
window = min(window, len(arg1), len(arg2))
238237

239238
def _get_corr(a, b):
240-
num = rolling_cov(a, b, window, min_periods, freq=freq,
239+
adj_window = min(window, len(a), len(b))
240+
num = rolling_cov(a, b, adj_window, min_periods, freq=freq,
241241
center=center)
242-
den = (rolling_std(a, window, min_periods, freq=freq,
242+
den = (rolling_std(a, adj_window, min_periods, freq=freq,
243243
center=center) *
244-
rolling_std(b, window, min_periods, freq=freq,
244+
rolling_std(b, adj_window, min_periods, freq=freq,
245245
center=center))
246246
return num / den
247+
247248
return _flex_binary_moment(arg1, arg2, _get_corr, pairwise=bool(pairwise))
248249

249250

@@ -261,9 +262,9 @@ def _flex_binary_moment(arg1, arg2, f, pairwise=False):
261262
results = {}
262263
if isinstance(arg2, DataFrame):
263264
X, Y = arg1.align(arg2, join='outer')
264-
X = X + 0 * Y
265-
Y = Y + 0 * X
266265
if pairwise is False:
266+
X = X + 0 * Y
267+
Y = Y + 0 * X
267268
res_columns = arg1.columns.union(arg2.columns)
268269
for col in res_columns:
269270
if col in X and col in Y:
@@ -276,7 +277,7 @@ def _flex_binary_moment(arg1, arg2, f, pairwise=False):
276277
# Symmetric case
277278
results[k1][k2] = results[k2][k1]
278279
else:
279-
results[k1][k2] = f(arg1[k1], arg2[k2])
280+
results[k1][k2] = f(*_prep_binary(arg1[k1], arg2[k2]))
280281
return Panel.from_dict(results).swapaxes('items', 'major')
281282
else:
282283
raise ValueError("'pairwise' is not True/False")
@@ -917,7 +918,7 @@ def expanding_cov(arg1, arg2=None, min_periods=1, freq=None, center=False,
917918
min_periods = arg2
918919
arg2 = arg1
919920
pairwise = True if pairwise is None else pairwise
920-
window = max(len(arg1), len(arg2))
921+
window = len(arg1) + len(arg2)
921922
return rolling_cov(arg1, arg2, window,
922923
min_periods=min_periods, freq=freq,
923924
center=center, pairwise=pairwise)
@@ -935,7 +936,7 @@ def expanding_corr(arg1, arg2=None, min_periods=1, freq=None, center=False,
935936
min_periods = arg2
936937
arg2 = arg1
937938
pairwise = True if pairwise is None else pairwise
938-
window = max(len(arg1), len(arg2))
939+
window = len(arg1) + len(arg2)
939940
return rolling_corr(arg1, arg2, window,
940941
min_periods=min_periods,
941942
freq=freq, center=center, pairwise=pairwise)

pandas/stats/tests/test_moments.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,98 @@ def test_expanding_corr_pairwise(self):
759759
for i in result.items:
760760
assert_almost_equal(result[i], rolling_result[i])
761761

762+
def test_expanding_cov_diff_index(self):
763+
# GH 7512
764+
s1 = Series([1, 2, 3], index=[0, 1, 2])
765+
s2 = Series([1, 3], index=[0, 2])
766+
result = mom.expanding_cov(s1, s2)
767+
expected = Series([None, None, 2.0])
768+
assert_series_equal(result, expected)
769+
770+
s2a = Series([1, None, 3], index=[0, 1, 2])
771+
result = mom.expanding_cov(s1, s2a)
772+
assert_series_equal(result, expected)
773+
774+
s1 = Series([7, 8, 10], index=[0, 1, 3])
775+
s2 = Series([7, 9, 10], index=[0, 2, 3])
776+
result = mom.expanding_cov(s1, s2)
777+
expected = Series([None, None, None, 4.5])
778+
assert_series_equal(result, expected)
779+
780+
def test_expanding_corr_diff_index(self):
781+
# GH 7512
782+
s1 = Series([1, 2, 3], index=[0, 1, 2])
783+
s2 = Series([1, 3], index=[0, 2])
784+
result = mom.expanding_corr(s1, s2)
785+
expected = Series([None, None, 1.0])
786+
assert_series_equal(result, expected)
787+
788+
s2a = Series([1, None, 3], index=[0, 1, 2])
789+
result = mom.expanding_corr(s1, s2a)
790+
assert_series_equal(result, expected)
791+
792+
s1 = Series([7, 8, 10], index=[0, 1, 3])
793+
s2 = Series([7, 9, 10], index=[0, 2, 3])
794+
result = mom.expanding_corr(s1, s2)
795+
expected = Series([None, None, None, 1.])
796+
assert_series_equal(result, expected)
797+
798+
def test_rolling_cov_diff_length(self):
799+
# GH 7512
800+
s1 = Series([1, 2, 3], index=[0, 1, 2])
801+
s2 = Series([1, 3], index=[0, 2])
802+
result = mom.rolling_cov(s1, s2, window=3, min_periods=2)
803+
expected = Series([None, None, 2.0])
804+
assert_series_equal(result, expected)
805+
806+
s2a = Series([1, None, 3], index=[0, 1, 2])
807+
result = mom.rolling_cov(s1, s2a, window=3, min_periods=2)
808+
assert_series_equal(result, expected)
809+
810+
def test_rolling_corr_diff_length(self):
811+
# GH 7512
812+
s1 = Series([1, 2, 3], index=[0, 1, 2])
813+
s2 = Series([1, 3], index=[0, 2])
814+
result = mom.rolling_corr(s1, s2, window=3, min_periods=2)
815+
expected = Series([None, None, 1.0])
816+
assert_series_equal(result, expected)
817+
818+
s2a = Series([1, None, 3], index=[0, 1, 2])
819+
result = mom.rolling_corr(s1, s2a, window=3, min_periods=2)
820+
assert_series_equal(result, expected)
821+
822+
def test_expanding_cov_pairwise_diff_length(self):
823+
# GH 7512
824+
df1 = DataFrame([[1,5], [3, 2], [3,9]], columns=['A','B'])
825+
df1a = DataFrame([[1,5], [3,9]], index=[0,2], columns=['A','B'])
826+
df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y'])
827+
df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y'])
828+
result1 = mom.expanding_cov(df1, df2, pairwise=True)[2]
829+
result2 = mom.expanding_cov(df1, df2a, pairwise=True)[2]
830+
result3 = mom.expanding_cov(df1a, df2, pairwise=True)[2]
831+
result4 = mom.expanding_cov(df1a, df2a, pairwise=True)[2]
832+
expected = DataFrame([[-3., -5.], [-6., -10.]], index=['A','B'], columns=['X','Y'])
833+
assert_frame_equal(result1, expected)
834+
assert_frame_equal(result2, expected)
835+
assert_frame_equal(result3, expected)
836+
assert_frame_equal(result4, expected)
837+
838+
def test_expanding_corr_pairwise_diff_length(self):
839+
# GH 7512
840+
df1 = DataFrame([[1,2], [3, 2], [3,4]], columns=['A','B'])
841+
df1a = DataFrame([[1,2], [3,4]], index=[0,2], columns=['A','B'])
842+
df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y'])
843+
df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y'])
844+
result1 = mom.expanding_corr(df1, df2, pairwise=True)[2]
845+
result2 = mom.expanding_corr(df1, df2a, pairwise=True)[2]
846+
result3 = mom.expanding_corr(df1a, df2, pairwise=True)[2]
847+
result4 = mom.expanding_corr(df1a, df2a, pairwise=True)[2]
848+
expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], index=['A','B'], columns=['X','Y'])
849+
assert_frame_equal(result1, expected)
850+
assert_frame_equal(result2, expected)
851+
assert_frame_equal(result3, expected)
852+
assert_frame_equal(result4, expected)
853+
762854
def test_rolling_skew_edge_cases(self):
763855

764856
all_nan = Series([np.NaN] * 5)

0 commit comments

Comments
 (0)