R/CPTloading.R

Defines functions CPTloading

Documented in CPTloading

#' Correlation based Permutation Test (CPT) for canonical loadings (singular vectors)
#'
#' This function performs Correlation based Permutation Test on singular vectors of cross-covariance matrix between data matrices X and Y  (or say canonical loadings in SCCA) .
#'
#' @param X Data matrix, each row is one sample, each column is one feature.
#' @param Y Data matrix, each row is one sample, each column is one feature.
#' @param side Test singular vector with respect to X or Y, choose from "X", "Y".
#' @param K  The index of singular vector to be tested.
#' @param r Number of components to be computed, r>=K.
#' @param penalty "Fixed" or "CV": how to choose the penalty parameter, using fixed value or through cross validation.
#' @param rho_x Penalty parameter used for PMD estimation of data X. If penalty = "Fixed", rho should be a single value, if penalty = "CV", rho_x should be a vector containing candidate penalty parameters for cross validation.
#' @param rho_y Penalty parameter used for PMD estimation of data Y. If penalty = "Fixed", rho should be a single value, if penalty = "CV", rho_y should be a vector containing candidate penalty parameters for cross validation.
#' @param permutation_no Integer: number of permutations for each test.
#' @import PMA
#' @importFrom MASS ginv
#' @importFrom stats cor
#' @export
#' @return A vector of p-values for K th singular vector  with respect to side.
#' @examples
#' library(TestPMD)
#' data("covid")
#' CPTloading(X = covid$metabolite, Y = covid$protein, side = "X", K = 1, r = 10,
#'  penalty = "Fixed", rho_x = 0.5, rho_y = 0.5, permutation_no = 100)
CPTloading <- function(X, Y, side = c("X", "Y"), K, r, penalty = c("Fixed","CV"), rho_x, rho_y, permutation_no){
  # check input
  if(!is.matrix(X) & !is.data.frame(X)){stop("X is not in form n*p matrix of data frame.")}
  if(!is.matrix(Y) & !is.data.frame(Y)){stop("Y is not in form n*q matrix of data frame.")}
  # if(!is.numeric(X) | !is.numeric(Y)){stop("Your data including non-numeric values.")}
  if(!is.numeric(K)){stop("K should be numeric")}
  if(!is.numeric(r)){stop("r should be numeric")}
  if(r < K){stop("r must be larger than K")}
  if(!(penalty %in% c("Fixed","CV"))){stop("penalty must choose from Fixed or CV")}
  if(!(side %in% c("X","Y"))){stop("side must choose from X or Y")}
  if(!is.numeric(permutation_no)){stop("permutation_no must be numeric")}
  if(nrow(X) != nrow(Y)){stop("Two datasets have different sample size")}

  X <- standsdmu(X)
  Y <- standsdmu(Y)
  n <- nrow(X)
  p <- ncol(X)
  q <- ncol(Y)
  #PMD first
  if(penalty == "CV"){
    print("Using CV to choose penalty parameter for PMD")
    if((length(rho_x) < 2) || (length(rho_y) < 2)){stop("Provide a vector of candidate penalty parameters for CV to rho_x and rho_y")}
    cv_out <- PMA::CCA.permute(x = X, z = Y, typex = "standard", typez="standard", penaltyxs = rho_x, penaltyzs = rho_y, standardize = F, trace = F)
    print(paste("The best penalty parameter for X and Y chosen by CV are: ", cv_out$bestpenaltyx, ",", cv_out$bestpenaltyz, sep=""))
    penaltyx <- cv_out$bestpenaltyx
    penaltyz <- cv_out$bestpenaltyz
  }
  else if (penalty == "Fixed"){
    print("Using provided penalty parameter for PMD")
    if((length(rho_x) > 1) || (length(rho_y) > 1)){stop("Provide a single value to rho_x and rho_y")}
    print(paste("The provided penalty parameters for X and Y are: ", rho_x, ", ", rho_y, sep = ""))
    penaltyx <- rho_x
    penaltyz <- rho_y
  }

  out <- PMA::CCA(x = X, z = Y, typex = "standard", typez = "standard", penaltyx = penaltyx, penaltyz = penaltyz, K = r, standardize = F, trace = F)
  U <- out$u[,1:r]
  V <- out$v[,1:r]
  d <- out$d

  Viminus <- V[,-K]
  Uiminus <- U[,-K]
  P <- X%*%(diag(1,p)-Uiminus%*%MASS::ginv(t(Uiminus)%*%Uiminus)%*%t(Uiminus))%*%U[,K]
  Q <- Y%*%(diag(1,q)-Viminus%*%MASS::ginv(t(Viminus)%*%Viminus)%*%t(Viminus))%*%V[,K]
  if(side == "Y"){
    G <- as.matrix(Y)
    H <- as.matrix(P)
  }
  if(side == "X"){
    G <- as.matrix(X)
    H <- as.matrix(Q)
  }
  corr0 <- cor(G,H)
  count <- matrix(0, nrow = dim(corr0)[1], ncol = dim(corr0)[2])
  for (perm in 1:permutation_no) {
    G_perm <- G[sample(1:dim(G)[1], dim(G)[1]),]
    for (a in 1:dim(count)[1]) {
      for (b in 1:dim(count)[2]) {
        if(abs(cor(G_perm,H)[a,b]) >= abs(corr0[a,b])){count[a,b] <- count[a,b]+1}
      }
    }
  }
  pvalue <- count/permutation_no
  return(pvalue)
}
YunhuiQi/TestPMD documentation built on May 5, 2022, 8:23 p.m.