# R/KendallTau.R In mixedCCA: Sparse Canonical Correlation Analysis for High-Dimensional Mixed Data

#### Documented in Kendall_matrixKendallTau

#' Kendall's tau correlation
#'
#' Calculate Kendall's tau correlation.
#' \deqn{ \hat{\tau}_{jk} = \frac{2}{n(n-1)}\sum_{1\le i<i'\le n} sign(X_{ji}-X_{ji'}) sign(X_{ki}-X_{ki'}) }
#' The function \code{KendallTau} calculates Kendall's tau correlation between two variables, returning a single correlation value. The function \code{Kendall_matrix} returns a correlation matrix.
#'
#' @param x A numeric vector.
#' @param y A numeric vector.
#'
#' @rdname KendallTau
#' @return \code{KendallTau(x, y)} returns one Kendall's tau correlation value between two vectors, \code{x} and \code{y}.
#' @examples
#'
#' n <- 100 # sample size
#' r <- 0.8 # true correlation
#'
#' ### vector input
#' # Data generation (X1: truncated continuous, X2: continuous)
#' Z <- mvrnorm(n, mu = c(0, 0), Sigma = matrix(c(1, r, r, 1), nrow = 2))
#' X1 <- Z[,1]
#' X1[Z[,1] < 1] <- 0
#' X2 <- Z[,2]
#'
#' KendallTau(X1, X2)
#' Kendall_matrix(X1, X2)
#'
#' ### matrix data input
#' p1 <- 3; p2 <- 4 # dimension of X1 and X2
#' JSigma <- matrix(r, nrow = p1+p2, ncol = p1+p2); diag(JSigma) <- 1
#' Z <- mvrnorm(n, mu = rep(0, p1+p2), Sigma = JSigma)
#' X1 <- Z[,1:p1]
#' X1[Z[,1:p1] < 0] <- 0
#' X2 <- Z[,(p1+1):(p1+p2)]
#'
#' Kendall_matrix(X1, X2)
#'
#' @importFrom pcaPP cor.fk
#' @export
KendallTau <- function(x, y){ # both x and y are vectors, not matrix.
# Based on cor.fk function from pcaPP package to make the computation faster.
# It can handle ties.
if (length(x) != length(y)){ # Check of they have the same length
stop ("x and y must have same length.")
}
if (sum(is.na(x)) + sum(is.na(y)) > 0){
index_new = which(!is.na(x) & !is.na(y))
if (length(index_new) < 2){
stop("Too many missing measuments in one of the variables, filter for missing data.")
}else{
x <- x[index_new]
y <- y[index_new]
}
}

n <- length(x)
n0 <- n * (n - 1) / 2
if (length(unique(x)) != n) {
x <- as.vector(x) # sometimes input x is a matrix n by 1, which gives errors for rle function below.
x.info <- rle(sort(x))
t1 <- x.info$lengths[x.info$lengths > 1]
n1 <- sum(t1 * (t1 - 1) / 2)
} else {
n1 <- 0
}
if (length(unique(y)) != n) {
y <- as.vector(y) # sometimes input y is a matrix n by 1, which gives errors for rle function below.
y.info <- rle(sort(y))
u1 <- y.info$lengths[y.info$lengths > 1]
n2 <- sum(u1 * (u1 - 1) / 2)
} else {
n2 <- 0
}

tau <- pcaPP::cor.fk(x, y) * sqrt(n0 - n1) * sqrt(n0 - n2) / n0

return(tau)
}

#' @param X A numeric matrix (n by p1).
#' @param Y A numeric matrix (n by p2).
#'
#' @rdname KendallTau
#'
#' @return \code{Kendall_matrix(X)} returns a p1 by p1 matrix of Kendall's tau correlation coefficients. \code{Kendall_matrix(X, Y)} returns a p1 by p2 matrix of Kendall's tau correlation coefficients.
#'
#' @export
#' @importFrom pcaPP cor.fk
Kendall_matrix <- function(X, Y = NULL){ # X and Y are matrix.
if (is.null(Y)) {
X <- as.matrix(X) # In case that X is vector, this line gives you one column matrix.
p1 <- ncol(X)
tau <- matrix(1, p1, p1)
if (p1 > 1){
for (i in 1:(p1 - 1)){
for (j in (i + 1):p1){ # calculate KendallTau elementwise.
tau[i, j] <- tau[j, i] <- KendallTau(X[, i], X[, j])
}
}
}
} else {
X <- as.matrix(X); Y <- as.matrix(Y) # In case that X and Y are vectors, this line gives you one column matrix.
p1 <- ncol(X); p2 <- ncol(Y)
if (p1 == 1 & p2 == 1){
tau <- KendallTau(X, Y)
} else {
tau <- matrix(0, p1, p2)
for (i in 1:p1){
for (j in 1:p2){ # calculate KendallTau elementwise.
tau[i, j] <- KendallTau(X[, i], Y[, j])
}
}
}
}
return(tau)
}


## Try the mixedCCA package in your browser

Any scripts or data that you put into this service are public.

mixedCCA documentation built on March 21, 2021, 1:07 a.m.