#' Determine the best low rank using Projection based Permutation Test (PPT)
#'
#' This function sequentially performs Projection based Permutation Test on singular values of cross-covariance matrix between data matrices X and Y, thus determine the best low rank.
#'
#' @param X Data matrix, each row is one sample, each column is one feature.
#' @param Y Data matrix, each row is one sample, each column is one feature.
#' @param max The largest index that will be considered.
#' @param penalty "Fixed" or "CV": how to choose the penalty parameter, using fixed value or through cross validation.
#' @param rho_x Penalty parameter used for PMD estimation of data X. If penalty = "Fixed", rho should be a single value, if penalty = "CV", rho_x should be a vector containing candidate penalty parameters for cross validation.
#' @param rho_y Penalty parameter used for PMD estimation of data Y. If penalty = "Fixed", rho should be a single value, if penalty = "CV", rho_y should be a vector containing candidate penalty parameters for cross validation.
#' @param permutation_no Integer: number of permutations for each test.
#' @param alpha Significance level, 0.05 by default
#' @import PMA
#' @importFrom stats cov
#' @importFrom MASS ginv
#' @export
#' @return A list containing best low rank and p-values
#' @examples
#' library(TestPMD)
#' data("covid")
#' PPTrank(X = covid$metabolite, Y = covid$protein, max = 2, penalty = "Fixed",
#' rho_x = 0.9, rho_y = 0.9, permutation_no = 100, alpha = 0.1)
PPTrank <- function(X, Y, max, penalty = c("Fixed","CV"), rho_x = NULL, rho_y = NULL, permutation_no, alpha = 0.05){
# check input
if(!is.matrix(X) & !is.data.frame(X)){stop("X is not in form n*p matrix of data frame.")}
if(!is.matrix(Y) & !is.data.frame(Y)){stop("Y is not in form n*q matrix of data frame.")}
# if(!is.numeric(X) | !is.numeric(Y)){stop("Your data including non-numeric values.")}
if(!is.numeric(max)){stop("max should be numeric")}
if(!(penalty %in% c("Fixed","CV"))){stop("penalty must choose from Fixed or CV")}
if(!is.numeric(permutation_no)){stop("permutation_no must be numeric")}
if(nrow(X) != nrow(Y)){stop("Two datasets have different sample size")}
if(!is.numeric(alpha)){stop("alpha must be numeric")}
if(alpha >= 1 | alpha <= 0){stop("alpha must be within (0, 1)")}
X <- standsdmu(X)
Y <- standsdmu(Y)
n <- nrow(X)
p <- ncol(X)
q <- ncol(Y)
r <- max
if(penalty == "CV"){
print("Using CV to choose penalty parameter for PMD")
if((length(rho_x) < 2) || (length(rho_y) < 2)){stop("Provide a vector of candidate penalty parameters for CV to rho_x and rho_y")}
cv_out <- PMA::CCA.permute(x = X, z = Y, typex = "standard", typez="standard", penaltyxs = rho_x, penaltyzs = rho_y, standardize = F, trace = F)
print(paste("The best penalty parameter for X and Y chosen by CV are: ", cv_out$bestpenaltyx, cv_out$bestpenaltyz, sep=""))
penaltyx <- cv_out$bestpenaltyx
penaltyz <- cv_out$bestpenaltyz
}
else if (penalty == "Fixed"){
print("Using provided penalty parameter for PMD")
if((length(rho_x) > 1) || (length(rho_y) > 1)){stop("Provide a single value to rho_x and rho_y")}
print(paste("The provided penalty parameters for X and Y are: ", rho_x, ", ", rho_y, sep = ""))
penaltyx <- rho_x
penaltyz <- rho_y
}
out <- PMA::CCA(x = X, z = Y, typex = "standard", typez = "standard", penaltyx = penaltyx, penaltyz = penaltyz, K = r, standardize = F, trace = F)
U <- out$u
V <- out$v
#PPT
K <- max
pvalue <- c()
for (l in 1:max) {
A <- U[,1:(l-1)]
B <- V[,1:(l-1)]
PA <- A%*%MASS::ginv(t(A)%*%A)%*%t(A)
PB <- B%*%MASS::ginv(t(B)%*%B)%*%t(B)
HA <- diag(p) - PA
HB <- diag(q) - PB
W <- X%*%HA
Z <- Y%*%HB
R0 <- svd(stats::cov(W,Z))$d[1]
count <- 0
for (perm in 1:permutation_no) {
W_perm <- W[sample(1:n, replace = F), ]
R_perm <- svd(stats::cov(W_perm, Z))$d[1]
if(abs(R_perm) >= abs(R0)){count = count + 1}
}
pvalue <- c(pvalue, count/permutation_no)
if(count/permutation_no > alpha){
K <- l-1
break
}
}
return(list(rank = K, pvalue = pvalue))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.