@@ -517,98 +517,6 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarr
517
517
518
518
return result
519
519
520
-
521
- # ----------------------------------------------------------------------
522
- # Kendall correlation
523
- # Wikipedia article: https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient
524
-
525
- @ cython.boundscheck (False )
526
- @ cython.wraparound (False )
527
- def nancorr_kendall (ndarray[float64_t , ndim = 2 ] mat, Py_ssize_t minp = 1 ) -> ndarray:
528
- """
529
- Perform kendall correlation on a 2d array
530
-
531
- Parameters
532
- ----------
533
- mat : np.ndarray[float64_t , ndim = 2 ]
534
- Array to compute kendall correlation on
535
- minp : int , default 1
536
- Minimum number of observations required per pair of columns
537
- to have a valid result.
538
-
539
- Returns
540
- -------
541
- numpy.ndarray[float64_t , ndim = 2 ]
542
- Correlation matrix
543
- """
544
- cdef:
545
- Py_ssize_t i , j , k , xi , yi , N , K
546
- ndarray[float64_t , ndim = 2 ] result
547
- ndarray[float64_t , ndim = 2 ] ranked_mat
548
- ndarray[uint8_t , ndim = 2 ] mask
549
- float64_t currj
550
- ndarray[uint8_t , ndim = 1 ] valid
551
- ndarray[int64_t] sorted_idxs
552
- ndarray[float64_t , ndim = 1 ] col
553
- int64_t n_concordant
554
- int64_t total_concordant = 0
555
- int64_t total_discordant = 0
556
- float64_t kendall_tau
557
- int64_t n_obs
558
-
559
- N , K = (< object > mat).shape
560
-
561
- result = np.empty((K, K), dtype = np.float64)
562
- mask = np.isfinite(mat)
563
-
564
- ranked_mat = np.empty((N, K), dtype = np.float64)
565
-
566
- for i in range(K ):
567
- ranked_mat[:, i] = rank_1d(mat[:, i])
568
-
569
- for xi in range (K):
570
- sorted_idxs = ranked_mat[:, xi].argsort()
571
- ranked_mat = ranked_mat[sorted_idxs]
572
- mask = mask[sorted_idxs]
573
- for yi in range (xi + 1 , K):
574
- valid = mask[:, xi] & mask[:, yi]
575
- if valid.sum() < minp:
576
- result[xi, yi] = NaN
577
- result[yi, xi] = NaN
578
- else :
579
- # Get columns and order second column using 1st column ranks
580
- if not valid.all():
581
- col = ranked_mat[valid.nonzero()][:, yi]
582
- else :
583
- col = ranked_mat[:, yi]
584
- n_obs = col.shape[0 ]
585
- total_concordant = 0
586
- total_discordant = 0
587
- for j in range (n_obs - 1 ):
588
- currj = col[j]
589
- # Count num concordant and discordant pairs
590
- n_concordant = 0
591
- for k in range (j, n_obs):
592
- if col[k] > currj:
593
- n_concordant += 1
594
- total_concordant += n_concordant
595
- total_discordant += (n_obs - 1 - j - n_concordant)
596
- # Note: we do total_concordant+total_discordant here which is
597
- # equivalent to the C(n, 2), the total # of pairs,
598
- # listed on wikipedia
599
- kendall_tau = (total_concordant - total_discordant) / \
600
- (total_concordant + total_discordant)
601
- result[xi, yi] = kendall_tau
602
- result[yi, xi] = kendall_tau
603
-
604
- if mask[:, xi].sum() > minp:
605
- result[xi, xi] = 1
606
- else :
607
- result[xi, xi] = NaN
608
-
609
- return result
610
-
611
-
612
520
# ----------------------------------------------------------------------
613
521
614
522
ctypedef fused algos_t:
0 commit comments