R/extParaMatrix.R

Defines functions extParaMatrix

# extract distribution parameters from input data
#' @importFrom stats quantile
#' @importFrom vsn justvsn
#' @import ROTS
extParaMatrix <- function(dataMatrix, groupVec,
                          isLogTransformed=FALSE,
                          dataType=c("RNASeq","Proteomics"),
                          enableROTS=FALSE,
                          minLFC=0.5,
                          paraROTS=list(B=1000,
                                        K=NULL,
                                        paired=FALSE,
                                        a1=NULL,
                                        a2=NULL,
                                        progress=FALSE),
                          seed=123,
                          parallel=FALSE, 
                          BPPARAM=bpparam()){
  # determine dataType
  dataTypeSelect <- switch (dataType,
                            RNASeq=TRUE,
                            Proteomics=FALSE,
                            stop("Incorrect dataType."))
  set.seed(seed, kind = "L'Ecuyer-CMRG")
  if(length(groupVec) != ncol(dataMatrix))
    stop("groupVec and sample number vary in length.")
  # determine input group and replicate numbers
  groupVec <- as.character(groupVec) # unfactorize the group vector
  ###numRep <- floor(length(groupVec)/numGroup) #[!] unequal repNum
  numRep <- as.numeric(table(groupVec))
  numGroup <- length(numRep)

  if(dataTypeSelect & !enableROTS) {
      cat("[DESeq2] Estimating distribution parameters...\n")
      # RNASeq raw counts and use DESeq2
      # construct a colData for DESeq2
      colData <- data.frame(group=groupVec, row.names=colnames(dataMatrix))
      res <- estimationByDESeq2(dataMatrix, colData, parallel=parallel, BPPARAM=BPPARAM)
      dispersion <- res$dispersion
      LFCRes <- res$LFCRes
      dataMatrix <- res$normCounts
  }
  else{
    # calcualte log fold changes between each two groups
    if(!isLogTransformed) {
      cat("[vsn] Transforming data...\n")
      dataMatrix <- vsn::justvsn(dataMatrix, verbose=FALSE)
    }
    LFCRes <- calLFC(dataMatrix, groupVec)
  }

  groups <- unique(groupVec)
  comp_index <- combn(seq_len(numGroup), 2)
  comp_index <- split(comp_index, col(comp_index))
  idx <- split(seq_along(groupVec),
               rep(seq_along(groups), table(groupVec)))
  names(comp_index) <- lapply(comp_index, function(x)
                        paste0(groups[x[1]], ".vs.",groups[x[2]]))
  paraMatrices <- lapply(comp_index, function(x){
    g1 <- x[1]; g2 <- x[2]
    comp_idx <- paste0(groups[g1], ".vs.",groups[g2])
    cat(sprintf("\n  ------ <%s> ------  \n", comp_idx))
    cat("\nLog2 Fold Change Quantiles:\n")
    show(round(quantile(abs(LFCRes[,comp_idx]),
                        probs=seq(0,1,0.1),
                        type=9),2))

    # extract data from each group
    data.case.g1 <- dataMatrix[, idx[[g1]]]
    data.case.g2 <- dataMatrix[, idx[[g2]]]

    if(enableROTS){
      if(is.null(paraROTS[["B"]]))
        paraROTS[["B"]] <- 1000
      if(is.null(paraROTS[["paired"]]))
        paraROTS[["paired"]] <- FALSE
      if(is.null(paraROTS[["progress"]]))
        paraROTS[["progress"]] <- FALSE

      cat("\n[ROTS] Estimating statistics optimizing parameters...\n")
      rots.res <- suppressMessages(
                    ROTS(data=dataMatrix[,c(idx[[g1]], idx[[g2]])],
                    groups=rep(c(1,2), numRep[c(g1, g2)]),
                    log=isLogTransformed,
                    B=paraROTS[["B"]],
                    K=paraROTS[["K"]],
                    paired=paraROTS[["paired"]],
                    a1=paraROTS[["a1"]],
                    a2=paraROTS[["a2"]],
                    progress=paraROTS[["progress"]],
                    seed=seed))
      optPara <-  c(a1=rots.res$a1, a2=rots.res$a2)
      cat(sprintf("[ROTS] optimization parameters:\na1 = %s, a2 = %s \n",
                      optPara[1], optPara[2]))
    }

    if(dataTypeSelect & !isLogTransformed & !enableROTS) {
      #RNASeq-raw
      # calculate the means of all genes
      mean.g1 <- rowMeans(data.case.g1, na.rm = TRUE)
      mean.g2 <- rowMeans(data.case.g2, na.rm = TRUE)
      paraMatrix <- cbind(mean.g1, dispersion,
                          mean.g2, dispersion)
    }
    else{ #Proteomics / RNASeq-log
      meanSD.g1 <- apply(data.case.g1, 1, normDistMLE)
      meanSD.g2 <- apply(data.case.g2, 1, normDistMLE)
      paraMatrix <- t(rbind(meanSD.g1, meanSD.g2))

    }
    # record number of missing values
    numMiss.g1 <- apply(data.case.g1, 1,
                        function(x) sum(is.na(g1)))
    numMiss.g2 <- apply(data.case.g2, 1,
                        function(x) sum(is.na(g2)))
    paraMatrix <- cbind(paraMatrix, numMiss.g1, numMiss.g2)

    # LFC filter
    if(!is.na(minLFC) && minLFC > 0) {
      # filter the entrys with log fold change below minLFC
      abs.lfc <- abs(LFCRes[,comp_idx])
      filter <- names(abs.lfc[abs.lfc >= minLFC])
      # apply filter
      paraMatrix <- paraMatrix[filter, ]
      if(dataTypeSelect) {
        cat(sprintf("\n%s of %s genes are over minLFC threshold %s.\n",
                    length(filter), length(abs.lfc), minLFC))
      }
      else{
        cat(sprintf("\n%s of %s proteins are over minLFC threshold %s.\n",
                    length(filter), length(abs.lfc), minLFC))
      }
    }
    attr(paraMatrix, "Comparison") <- paste0(groups[x[1]], ".vs.",groups[x[2]])
    attr(paraMatrix, "numRep") <- numRep[c(g1, g2)]
    if(enableROTS)
      attr(paraMatrix, "optPara") <- optPara
    return(paraMatrix)
})
  attr(paraMatrices, "LFCRes") <- LFCRes
  return(paraMatrices)
}
xuqiao93/PowerExplorer documentation built on May 16, 2019, 9:13 p.m.