@@ -3399,7 +3399,7 @@ def pointbiserialr(x, y):
3399
3399
return PointbiserialrResult (rpb , prob )
3400
3400
3401
3401
3402
- def kendalltau (x , y , initial_lexsort = True ):
3402
+ def kendalltau (x , y , initial_lexsort = True , nan_policy = 'propagate' ):
3403
3403
"""
3404
3404
Calculates Kendall's tau, a correlation measure for ordinal data.
3405
3405
@@ -3419,6 +3419,10 @@ def kendalltau(x, y, initial_lexsort=True):
3419
3419
`kendalltau` is of complexity O(n log(n)). If False, the complexity is
3420
3420
O(n^2), but with a smaller pre-factor (so quicksort may be faster for
3421
3421
small arrays).
3422
+ nan_policy : {'propagate', 'raise', 'omit'}, optional
3423
+ Defines how to handle when input contains nan. 'propagate' returns nan,
3424
+ 'raise' throws an error, 'omit' performs the calculations ignoring nan
3425
+ values. Default is 'propagate'.
3422
3426
3423
3427
Returns
3424
3428
-------
@@ -3428,6 +3432,11 @@ def kendalltau(x, y, initial_lexsort=True):
3428
3432
The two-sided p-value for a hypothesis test whose null hypothesis is
3429
3433
an absence of association, tau = 0.
3430
3434
3435
+ See also
3436
+ --------
3437
+ spearmanr : Calculates a Spearman rank-order correlation coefficient.
3438
+ theilslopes : Computes the Theil-Sen estimator for a set of points (x, y).
3439
+
3431
3440
Notes
3432
3441
-----
3433
3442
The definition of Kendall's tau that is used is::
@@ -3462,9 +3471,24 @@ def kendalltau(x, y, initial_lexsort=True):
3462
3471
3463
3472
KendalltauResult = namedtuple ('KendalltauResult' , ('correlation' , 'pvalue' ))
3464
3473
3465
- if not x .size or not y .size :
3474
+ if x .size != y .size :
3475
+ raise ValueError ("All inputs to `kendalltau` must be of the same size, "
3476
+ "found x-size %s and y-size %s" % (x .size , y .size ))
3477
+ elif not x .size or not y .size :
3466
3478
return KendalltauResult (np .nan , np .nan ) # Return NaN if arrays are empty
3467
3479
3480
+ # check both x and y
3481
+ contains_nan , nan_policy = (_contains_nan (x , nan_policy ) or
3482
+ _contains_nan (y , nan_policy ))
3483
+
3484
+ if contains_nan and nan_policy == 'propagate' :
3485
+ return KendalltauResult (np .nan , np .nan )
3486
+
3487
+ elif contains_nan and nan_policy == 'omit' :
3488
+ x = ma .masked_invalid (x )
3489
+ y = ma .masked_invalid (y )
3490
+ return mstats_basic .kendalltau (x , y )
3491
+
3468
3492
n = np .int64 (len (x ))
3469
3493
temp = list (range (n )) # support structure used by mergesort
3470
3494
# this closure recursively sorts sections of perm[] by comparing
0 commit comments