#' Projection based Permutation Test (PPT) for canonical correlations (singular values)
#'
#' This function performs Projection based Permutation Test on singular values of cross-covariance matrix between data matrices X and Y.
#'
#' @param X Data matrix, each row is one sample, each column is one feature.
#' @param Y Data matrix, each row is one sample, each column is one feature.
#' @param l_range A vector of integers indicating which singular value to be tested.
#' @param penalty "Fixed" or "CV": how to choose the penalty parameter, using fixed value or through cross validation.
#' @param rho_x Penalty parameter used for PMD estimation of data X. If penalty = "Fixed", rho should be a single value, if penalty = "CV", rho_x should be a vector containing candidate penalty parameters for cross validation.
#' @param rho_y Penalty parameter used for PMD estimation of data Y. If penalty = "Fixed", rho should be a single value, if penalty = "CV", rho_y should be a vector containing candidate penalty parameters for cross validation.
#' @param permutation_no Integer: number of permutations for each test.
#' @import PMA
#' @importFrom stats cov
#' @importFrom MASS ginv
#' @export
#' @return A vector of p-values for singular values at position l_range.
#' @examples
#' library(TestPMD)
#' data("covid")
#' PPTvalue(X = covid$metabolite, Y = covid$protein, l_range = 1:3, penalty = "Fixed",
#' rho_x = 0.9, rho_y = 0.9, permutation_no = 100)
PPTvalue <- function(X, Y, l_range, penalty = c("Fixed","CV"), rho_x = NULL, rho_y = NULL, permutation_no){
# check input
if(!is.matrix(X) & !is.data.frame(X)){stop("X is not in form n*p matrix of data frame.")}
if(!is.matrix(Y) & !is.data.frame(Y)){stop("Y is not in form n*q matrix of data frame.")}
# if(!is.numeric(X) | !is.numeric(Y)){stop("Your data including non-numeric values.")}
if(!is.numeric(l_range)){stop("l_range can only include numeric values")}
if(!(penalty %in% c("Fixed","CV"))){stop("penalty must choose from Fixed or CV")}
if(!is.numeric(permutation_no)){stop("permutation_no must be numeric")}
if(nrow(X) != nrow(Y)){stop("Two datasets have different sample size")}
X <- standsdmu(X)
Y <- standsdmu(Y)
n <- nrow(X)
p <- ncol(X)
q <- ncol(Y)
r <- l_range[length(l_range)]
if(penalty == "CV"){
print("Using CV to choose penalty parameter for PMD")
if((length(rho_x) < 2) || (length(rho_y) < 2)){stop("Provide a vector of candidate penalty parameters for CV to rho_x and rho_y")}
cv_out <- PMA::CCA.permute(x = X, z = Y, typex = "standard", typez="standard", penaltyxs = rho_x, penaltyzs = rho_y, standardize = F, trace = F)
print(paste("The best penalty parameter for X and Y chosen by CV are: ", cv_out$bestpenaltyx, cv_out$bestpenaltyz, sep=""))
penaltyx <- cv_out$bestpenaltyx
penaltyz <- cv_out$bestpenaltyz
}
else if (penalty == "Fixed"){
print("Using provided penalty parameter for PMD")
if((length(rho_x) > 1) || (length(rho_y) > 1)){stop("Provide a single value to rho_x and rho_y")}
print(paste("The provided penalty parameters for X and Y are: ", rho_x, ", ", rho_y, sep = ""))
penaltyx <- rho_x
penaltyz <- rho_y
}
out <- PMA::CCA(x = X, z = Y, typex = "standard", typez = "standard", penaltyx = penaltyx, penaltyz = penaltyz, K = r, standardize = F, trace = F)
U <- out$u
V <- out$v
#PPT
pvalue <- c()
for (l in l_range) {
A <- U[,1:(l-1)]
B <- V[,1:(l-1)]
PA <- A%*%MASS::ginv(t(A)%*%A)%*%t(A)
PB <- B%*%MASS::ginv(t(B)%*%B)%*%t(B)
HA <- diag(p) - PA
HB <- diag(q) - PB
W <- X%*%HA
Z <- Y%*%HB
R0 <- svd(stats::cov(W,Z))$d[1]
count <- 0
for (perm in 1:permutation_no) {
W_perm <- W[sample(1:n, replace = F), ]
R_perm <- svd(stats::cov(W_perm, Z))$d[1]
if(abs(R_perm) >= abs(R0)){count = count + 1}
}
pvalue <- c(pvalue, count/permutation_no)
}
print(paste("p-values for canonical correlations at positions", l_range, "are:", pvalue, collapse = " "))
return(pvalue)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.