acPCoA: Adjustment for Confounding Factors using Principal Coordinate Analysis

Documented in acPCoA_prediction

#' Perform AC-PCoA on new data points for classification and prediction
#'
#' @param training.dist the n by n data distance matrix, where n is the number of training samples.
#' @param testing.dist the n by m data distance matrix, where n is the number of training samples and m is the number of testing samples.
#' @param training.confounder the n by q confounder matrix of training samples, where n is the number of samples, q is the number of confounding factors.
#' @param training.label Label of traning samples for classification. Optional.
#' @param testing.label Label of testing samples for classification. Optional.
#' @param nPC number of principal components to compute
#' @param kernel the kernel to use: "linear", "gaussian".
#' @param bandwidth bandwidth h for Gaussian kernel. Optional.
#' @param lambdas the tuning parameter, non-negative.
#' @param anov True or False. Whether the penalty term has the between groups sum of squares interpretation. Default is True.
#' @param perc the best lambda is defined to be the smallest lambda with R(lambda)<=perc (if anov=T), or R(lambda)<=perc*R(lambda=0) (if anov=F) in the nPC principal components.
#'
#' @return principal components and the projected data of training and testing samples
#' \item{v}{the principal components, p by nPC matrix}
#' \item{Xv_training}{the projected data of training samples, i.e. X times v}
#' \item{Xv_testing}{the projected data of testing samples}
#' \item{eigenvalueX}{eigenvalues for the PCs}
#' \item{varianceX}{variance explained by the PCs}
#' \item{PredictingAccuracy}{the accuracy of predicting labels of testing samples using Random Forest, if training and testing labels are provided.}
#' @export
#'
#' @examples
#' \dontrun{
#' training.dist=data_mbqc_groupA$DistMat.BC[1:679,1:679]
#' testing.dist=data_mbqc_groupA$DistMat.BC[1:679,680:848]
#' training.confounder=data_mbqc_groupA$ConfounderMat[1:679,]
#' label=as.factor(data_mbqc_groupA$Specimen)
#' training.label=label[1:679]
#' testing.label=label[680:848]
#' result_prediction=acPCoA_prediction(training.dist,testing.dist,training.confounder,training.label,testing.label,nPC=2,kernel="linear",bandwidth=NULL,lambdas=seq(0, 5, 0.05),anov=T,perc=0.05)
#' }

acPCoA_prediction <- function(training.dist,testing.dist,training.confounder,training.label,testing.label,nPC=2,kernel="linear",bandwidth=NULL,lambdas=seq(0, 5, 0.05),anov=T,perc=0.05){
  D_star <- as.matrix(testing.dist)
  D <- as.matrix(training.dist)
  n <- nrow(D)
  n_star <- ncol(D_star)
  centre <- function(D, n) {
    One <- matrix(1, n, n)
    mat <- diag(n) - One/n
    mat.cen <- mat %*% D %*% mat
  }
  centre_testing <- function(K,K_star,n,n_star){
    One1 <- matrix(1/n, n, n)
    One2 <- matrix(1/n, n, n_star)
    mat.cen_testing <- K_star - One1%*%K_star - K%*%One2 + One1%*%K%*%One2;
  }
  names <- rownames(D)
  D_colMean0=D-t(matrix(rep(colMeans(D),n),n,n))
  D_bothMean0=D_colMean0-matrix(rep(rowMeans(D_colMean0),n),n,n)
  epsilon <- sqrt(.Machine$double.eps)*1000000
  ## begin
  delta1 <- centre((-0.5 * D^2), n)
  Gram_testing <- centre_testing((-0.5 * D^2),(-0.5 * D_star^2), n,n_star)
  trace <- sum(diag(delta1))
  D.eig <- eigen(delta1,symmetric=TRUE)
  min.eig <- min(D.eig$values)
  zero.eig <- which(abs(D.eig$values) < epsilon)
  #zero.eig <- which(D.eig$values < epsilon)
  D.eig$values[zero.eig] <- 0
  eig <- D.eig$values
  k <- length(which(eig > epsilon))
  pcoa_vectors <- sweep(D.eig$vectors[, 1:k], 2, sqrt(eig[1:k]),
                   FUN = "*")
  vectors_nor <- sweep(D.eig$vectors[, 1:k], 2, 1/sqrt(eig[1:k]),
                       FUN = "*")
  pcoa_vector_testing = t(Gram_testing)%*%vectors_nor
  values <- data.frame(eig[1:k])
  colnames(values) <- c("Eigenvalues")
  rownames(values) <- 1:nrow(values)
  rownames(pcoa_vectors) <- names
  colnames(pcoa_vectors) <- colnames(pcoa_vectors, do.NULL = FALSE, prefix = "Axis.")
  #out <- (list(vector_testing=vector_testing,values = values, vectors = vectors, vectors_nor = vectors_nor))

  if(dim(pcoa_vectors)[2]<nPC){
    nPC=dim(pcoa_vectors)[2]
    }
  result_tune <- acPCAtuneLambda(X=pcoa_vectors, Y=training.confounder, nPC, lambdas, kernel, bandwidth,perc, centerX=T, centerY=T, scaleX=F, scaleY=F,  anov,  quiet=T)
  result <- acPCA(X=pcoa_vectors, Y=training.confounder, nPC, lambda=result_tune$best_lambda, kernel, bandwidth=NULL)
  training.input <- result$Xv
  validation.input <- pcoa_vector_testing%*%result$v
  result_prediction=list("Xv_training"=training.input,"v"=result$v,"Xv_testing"=validation.input,"eigenvalueX"=result$eigenX,"varianceX"=result$varX)
  if(is.null(training.label) || is.null(testing.label)){
    return(result_prediction)
  }else{
    rf_classifier <- randomForest(x=training.input,y=training.label)
    prediction <- predict(rf_classifier,validation.input)
    accuracy <- mean(testing.label==prediction)
    result_prediction$PredictingAccuracy=accuracy
    return(result_prediction)
  }
}

YuWang28/acPCoA documentation built on Dec. 18, 2021, 8:20 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

YuWang28/acPCoA
Adjustment for Confounding Factors using Principal Coordinate Analysis

R/acPCoA_prediction.R
In YuWang28/acPCoA: Adjustment for Confounding Factors using Principal Coordinate Analysis

Defines functions acPCoA_prediction

Documented in acPCoA_prediction

R Package Documentation

Browse R Packages

We want your feedback!

YuWang28/acPCoA Adjustment for Confounding Factors using Principal Coordinate Analysis

R/acPCoA_prediction.R In YuWang28/acPCoA: Adjustment for Confounding Factors using Principal Coordinate Analysis

Defines functions acPCoA_prediction

Documented in acPCoA_prediction

R Package Documentation

Browse R Packages

We want your feedback!

YuWang28/acPCoA
Adjustment for Confounding Factors using Principal Coordinate Analysis

R/acPCoA_prediction.R
In YuWang28/acPCoA: Adjustment for Confounding Factors using Principal Coordinate Analysis