Skip to content

Commit 5848fb8

Browse files
committed
Merge pull request scipy#5167 from aeklant/kendalltau
ENH: add nan_policy to `stats.kendalltau`.
2 parents 55b25fb + 4da2393 commit 5848fb8

File tree

2 files changed

+40
-2
lines changed

2 files changed

+40
-2
lines changed

scipy/stats/stats.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3399,7 +3399,7 @@ def pointbiserialr(x, y):
33993399
return PointbiserialrResult(rpb, prob)
34003400

34013401

3402-
def kendalltau(x, y, initial_lexsort=True):
3402+
def kendalltau(x, y, initial_lexsort=True, nan_policy='propagate'):
34033403
"""
34043404
Calculates Kendall's tau, a correlation measure for ordinal data.
34053405
@@ -3419,6 +3419,10 @@ def kendalltau(x, y, initial_lexsort=True):
34193419
`kendalltau` is of complexity O(n log(n)). If False, the complexity is
34203420
O(n^2), but with a smaller pre-factor (so quicksort may be faster for
34213421
small arrays).
3422+
nan_policy : {'propagate', 'raise', 'omit'}, optional
3423+
Defines how to handle when input contains nan. 'propagate' returns nan,
3424+
'raise' throws an error, 'omit' performs the calculations ignoring nan
3425+
values. Default is 'propagate'.
34223426
34233427
Returns
34243428
-------
@@ -3428,6 +3432,11 @@ def kendalltau(x, y, initial_lexsort=True):
34283432
The two-sided p-value for a hypothesis test whose null hypothesis is
34293433
an absence of association, tau = 0.
34303434
3435+
See also
3436+
--------
3437+
spearmanr : Calculates a Spearman rank-order correlation coefficient.
3438+
theilslopes : Computes the Theil-Sen estimator for a set of points (x, y).
3439+
34313440
Notes
34323441
-----
34333442
The definition of Kendall's tau that is used is::
@@ -3462,9 +3471,24 @@ def kendalltau(x, y, initial_lexsort=True):
34623471

34633472
KendalltauResult = namedtuple('KendalltauResult', ('correlation', 'pvalue'))
34643473

3465-
if not x.size or not y.size:
3474+
if x.size != y.size:
3475+
raise ValueError("All inputs to `kendalltau` must be of the same size, "
3476+
"found x-size %s and y-size %s" % (x.size, y.size))
3477+
elif not x.size or not y.size:
34663478
return KendalltauResult(np.nan, np.nan) # Return NaN if arrays are empty
34673479

3480+
# check both x and y
3481+
contains_nan, nan_policy = (_contains_nan(x, nan_policy) or
3482+
_contains_nan(y, nan_policy))
3483+
3484+
if contains_nan and nan_policy == 'propagate':
3485+
return KendalltauResult(np.nan, np.nan)
3486+
3487+
elif contains_nan and nan_policy == 'omit':
3488+
x = ma.masked_invalid(x)
3489+
y = ma.masked_invalid(y)
3490+
return mstats_basic.kendalltau(x, y)
3491+
34683492
n = np.int64(len(x))
34693493
temp = list(range(n)) # support structure used by mergesort
34703494
# this closure recursively sorts sections of perm[] by comparing

scipy/stats/tests/test_stats.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,20 @@ def test_kendalltau():
726726
# and do we get a tau of 1 for identical inputs?
727727
assert_approx_equal(stats.kendalltau([1,1,2], [1,1,2])[0], 1.0)
728728

729+
# test nan_policy
730+
x = np.arange(10.)
731+
x[9] = np.nan
732+
assert_array_equal(stats.kendalltau(x, x), (np.nan, np.nan))
733+
assert_allclose(stats.kendalltau(x, x, nan_policy='omit'),
734+
(1.0, 0.00017455009626808976), rtol=1e-06)
735+
assert_raises(ValueError, stats.kendalltau, x, x, nan_policy='raise')
736+
assert_raises(ValueError, stats.kendalltau, x, x, nan_policy='foobar')
737+
738+
# test unequal length inputs
739+
x = np.arange(10.)
740+
y = np.arange(20.)
741+
assert_raises(ValueError, stats.kendalltau, x, y)
742+
729743

730744
class TestFindRepeats(TestCase):
731745

0 commit comments

Comments
 (0)