R/aSPC_dCor.R

#' An Adaptive Sum of Powered Correlation Test (aSPC) with dcor
#'
#' @param df1, first matrix
#' @param df2, second matrix
#' @param pow, power integer candidates, default c(1:8, Inf)
#' @param B, number of permutations to calculate a P-value
#' @return the P-values of SPC and aSPC tests
#' @references Xu Z., Pan W. An adaptive and powerful test for two groups of variables with high dimension
#' @examples
#' library(mvtnorm)
#' sigma = diag(0.9, 10) + 0.1
#' n = 200 # sample size
#' Z = rmvnorm(n=n, mean=rep(0,10), sigma=sigma)
#' X = rmvnorm(n=n, mean=rep(0,25), sigma=diag(1, 25))
#' Y = rmvnorm(n=n, mean=rep(0,25), sigma=diag(1, 25))
#' X = as.data.frame(cbind(Z[,1:5], X))
#' Y = as.data.frame(cbind(Z[,6:10], Y))
#' dim(X)
#' dim(Y)
#' set.seed(123) # to ensure we can replicate the permutation P-value
#' B = 10
#' a = proc.time()
#' aSPC_dcor(X, Y, pow = c(1:8, Inf), B = 50)
#' proc.time() - a
#'
#' a = proc.time()
#' aSPC(X, Y, pow = c(1:8, Inf), B = 50, method = "dcor_fast")
#' proc.time() - a
#'
#' a = proc.time()
#' aSPC(X, Y, pow = c(1:8, Inf), B = 50, method = "dcor")
#' proc.time() - a
#'
#' a = proc.time()
#' aSPC_dcor2(X, Y, pow = c(1:8, Inf), B = 2)
#' proc.time() - a
#'
#' @export
#' @importFrom energy dcor


aSPC_dcor = function(df1, df2, pow = pow, B = B){
  # test
  # df1 = X; df2 = Y; pow = c(1:8, Inf); B=2
  # X = scale(df1)
  # Y = scale(df2)

  n = nrow(X) ## number of subjects
  ### X and Y has to be standardize before input
  ls1 = lapply(1:ncol(X), function(x) dist(X[,x]))
  ls1_mat =  lapply(1:ncol(X), function(x) as.matrix(dist(X[,x])))
  ls2 = lapply(1:ncol(Y), function(x) dist(Y[,x]))
  a = proc.time()
  mat_obs = dcor_list(ls1, ls2)
  proc.time() - a

  ## obs statistics
  T_obs = rep(NA,length(pow))


  for(k in 1:length(pow)){
    if(pow[k]<Inf) T_obs[k] = sum(mat_obs^pow[k]) else T_obs[k] = max(abs(mat_obs))
  }

  pPerm0 = rep(NA,length(pow))
  T0s = numeric(B)
  s <- sample(1:10^5,1)


  T0s = matrix(nrow = B, ncol = length(pow))
  for(i in 1: B){
    index = sample(nrow(X))
    ls1_sample = lapply(1:length(ls1_mat), function(x) as.dist(ls1_mat[[x]][index, index]))
    mat0 = dcor_list(ls1_sample, ls2)
    for(j in 1:length(pow)){
      if(pow[j] < Inf) T0s[i,j] = sum(mat0^pow[j]) else T0s[i,j] = max(abs(mat0))
    }
    # print(i)
  }

  for(j in 1:length(pow)){
    pPerm0[j] = round((sum(abs(T_obs[j])<=abs(T0s[1:(B-1),j]))+1)/(B), digits=8)
    P0s = (B-rank(abs(T0s[,j]))+1)/(B)
    if (j==1) minp0=P0s else minp0[which(minp0>P0s)]=P0s[which(minp0>P0s)]

  }

  Paspu<-(sum(minp0<=min(pPerm0))+1)/(B+1)
  pvs <- c(pPerm0, Paspu)
  names(pvs) = c(paste0("SPC.",pow),"aSPC")
  return(pvs)


}
jasonzyx/aSPC documentation built on May 18, 2019, 5:55 p.m.