DExMA: Differential Expression Meta-Analysis

Documented in makeHeatmap

#' Visualization of the meta-analysis results
#'
#' It allows to see how the different significant genes are expressed in the 
#' different samples. Missing genes appear in gray
#'
#'
#' @param objectMA A list of list. Each list contains two elements. The first
#' element is the expression matrix (genes in rows and sample in columns) and
#' the second element is a vector of zeros and ones that represents the state
#' of the different samples of the expression matrix. 0 represents one group
#' (controls) and 1 represents the other group (cases).
#' The result of the CreateobjectMA can be used too.
#'
#' @param resMA Output generated by the  function that performs
#' meta-analysis (metaAnalysisDE).
#'
#'
#' @param scaling Character variable to choose between different scaling 
#' approaches. See "Details" for more information.
#' 
#' @param regulation Character variable that indicates whether we want the 
#' heatmap to show all significant genes ("all"), only the up-regulated genes
#' ("up") or only the down-regulated genes("down")
#' 
#' @param breaks Numeric vector of length 2 that contains the extreme values
#' (minimum and maximum) of the range of values in which the heatmap 
#' color scale will be distributed. Default a vector By default a 
#' vector of -2 and 2 as extreme values.
#' 
#' @param fdrSig Adjusted p-value from which a gene is considered significant.
#' Default 0.05
#' 
#' @param logFCSig In absolute value. Log Fold Change threshold from which genes 
#' are considered in the heatmap.
#' 
#' @param numSig The number of most significant genes to be represented. 
#' If numSig = "all", all significant genes that meet the selected parameters 
#' will be represented.
#' 
#' @param color vector of colors used in heatmap.
#' 
#' @param na_col color of the NA cell in the heatmap.
#' 
#' @param legend logical to determine if legend should be drawn or not.
#' 
#' @param cluster_cols boolean values determining if columns should be 
#' clustered.
#' 
#' @param cluster_rows boolean values determining if rows should be clustered.
#' 
#' @param show_rownames boolean specifying if row names are be shown.
#' 
#' @param show_colnames boolean specifying if column names are be shown.
#' 
#' @details Scaling approaches that can be used are:
#' \itemize{
#' \item "rscale": it applies rescale function of \emph{scales} package. Values
#' will be between -1 and 1)
#' \item "zscor":  It calculates a z-score value for each gene, that is, the 
#' mean gene expression from each gene is subtracted from each gene expression 
#' value and then it is divided by the standard deviation
#' \item "swr": it applys scaling relative to reference dataset approach
#' \item "none": any scaling approach it is applied.
#' }
#' 
#'
#' @return The matrix represented in the heatmap
#'
#'
#' @author Juan Antonio Villatoro Garcia,
#' \email{juanantoniovillatorogarcia@@gmail.com}
#'
#' @seealso \code{\link{createObjectMA}}, \code{\link{metaAnalysisDE}}
#'
#' @references 
#' 
#' Hadley Wickham and Dana Seidel (2020). scales: Scale Functions for 
#' Visualization. R package version 1.1.1. 
#' \url{https://CRAN.R-project.org/package=scales}
#' 
#' Lazar, C, Meganck, S, Taminau, J, and et al. 2013. “Batch Effect 
#' Removal Methods for Microarray Gene Expression Data Integration: 
#' A Survey,” 469–90.
#' 
#' Raivo Kolde 2019. pheatmap: Pretty Heatmaps. R package version 1.0.12.
#' \url{https://CRAN.R-project.org/package=pheatmap}
#' 
#'
#' @examples
#'
#' data(DExMAExampleData)
#'
#' resultsMA <- metaAnalysisDE(maObject, typeMethod="REM")
#' makeHeatmap(objectMA=maObject,
#' scaling = "zscor", regulation = "all",breaks=c(-2,2), 
#' fdrSig = 0.05, logFCSig = 1.5, numSig=40)
#'
#' @export

makeHeatmap <- function(
    objectMA,
    resMA,
    scaling=c("zscor","rscale","swr","none"),
    regulation=c("all", "up","down"),
    breaks=c(-2,2),
    fdrSig = 0.05,
    logFCSig = 1.5,
    numSig = "all",
    color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
    na_col = "darkgrey",
    legend = TRUE, 
    cluster_cols= FALSE, 
    cluster_rows= FALSE,
    show_rownames = TRUE,
    show_colnames = FALSE){
  scaling<-match.arg(scaling)
  regulation<-match.arg(regulation)
  heatmap.var="none"
  if(!is.null(breaks)){
    bks=seq(breaks[1],breaks[2],length.out = 100)
  }else{
    bks=NULL
  }
  #Select genes
  if(!is.null(logFCSig)){
  logFCSig <- abs(logFCSig)}
  sig.genes <- .genesSig(resMA, regulation, numSig, fdrSig, logFCSig)
  ## Create a matrix with all datasets
  all.cl <- NULL
  all.colnames <- NULL
  sizes <- NULL
  for(i in seq_len(length(objectMA))){
    sizes <- c(sizes,rep(names(objectMA[i]), length(objectMA[[i]][[2]])))
    all.cl <- c(all.cl,objectMA[[i]][[2]])
    colnames(objectMA[[i]][[1]]) <- paste0(colnames(objectMA[[i]][[1]]),
                                           ".", names(objectMA[i]), 
                                           sep="")
    all.colnames <- c(all.colnames,colnames(objectMA[[i]][[1]]))
  }
  exp.ALL <- matrix(data=NA, ncol=length(all.colnames),nrow=length(sig.genes))
  rownames(exp.ALL) <- sig.genes
  colnames(exp.ALL) <- all.colnames
  ## Scaling datasets
  switch(scaling,
         rscale={exp.ALL<- .rscale(objectMA, sig.genes, exp.ALL)
         bks=NULL},
         zscor={exp.ALL<- .zscor(objectMA, sig.genes, exp.ALL)},
         none={heatmap.var="row"
         exp.ALL<- .none(objectMA, sig.genes, exp.ALL)},
         swr={heatmap.var="row"
         exp.ALL<- .swr(objectMA, resMA, sig.genes, exp.ALL)}
  )
  ## Heatmap
  ann <- data.frame(State=ifelse(all.cl == 0, "Healthy","Case"),
                    Dataset=sizes, row.names=colnames(exp.ALL))
  pheatmap(exp.ALL[,order(all.cl, decreasing=TRUE)],annotation_col=ann,
           cluster_cols=cluster_cols, cluster_rows=cluster_rows, 
           scale=heatmap.var, breaks=bks,
           show_colnames=show_colnames,show_rownames=show_rownames, na_col = na_col, 
           color = color, legend = legend)
  return(exp.ALL)
}



################################################################################

##Significant genes in results
.genesSig <- function(resMA, regulation, numSig, fdrSig, logFCSig){
  if(regulation=="up"){resMA <- resMA[rownames(resMA)[resMA$AveFC>0],]}
  if(regulation=="down"){resMA <- resMA[rownames(resMA)[resMA$AveFC<0],]}
  sig.genes <- rownames(resMA)[resMA$FDR<=fdrSig]
  if(!is.null(logFCSig)){
    sig.genes2 <- rownames(resMA)[abs(resMA$AveFC)>=logFCSig]
    sig.genes <- intersect(sig.genes, sig.genes2)}
  ord <- resMA[sig.genes,]
  sig.genes <- sig.genes[order(ord$FDR,decreasing = FALSE)]
  if(numSig == "all"){numSig <- length(sig.genes)}
  if(length(sig.genes) < numSig){numSig <- length(sig.genes)}
  sig.genes <- sig.genes[seq_len(numSig)]
  ord <- resMA[sig.genes,]
  sig.genes <- sig.genes[order(ord$AveFC,decreasing = TRUE)]
}

##############################################################################

#rscale function
.rscale <- function(objectMA, sig.genes, exp.ALL){
  print("scaling using rescale function...")
  for(set in seq_len(length(objectMA))){
    temp<-objectMA[[set]][[1]][intersect(sig.genes,
                                         rownames(
                                           objectMA[[set]][[1]]
                                         )),]
    
    for(gene in seq_len(nrow(temp))){
      temp[gene,] <- rescale(x=temp[gene,],from=c(min(temp[gene,]), 
                                                  max(temp[gene,])),
                             to=c(-1,1))
    }
    exp.ALL[rownames(temp),colnames(temp)] <- temp
  }
  return(exp.ALL)
}

#scaling zscor function
.zscor <- function(objectMA, sig.genes, exp.ALL){
  print("scaling using z-score...")
  for(set in seq_len(length(objectMA))){
    temp<-objectMA[[set]][[1]][intersect(
      sig.genes,
      rownames(objectMA[[set]][[1]])),]
    
    for(gene in seq_len(nrow(temp))){
      temp[gene,] <- (temp[gene,]-mean(temp[gene,],
                                       na.rm=TRUE))/sd(temp[gene,], 
                                                       na.rm = TRUE)
    }
    exp.ALL[rownames(temp),colnames(temp)] <- temp
  }
  return(exp.ALL)
}

#scaling with reference scaling function
.swr <- function(objectMA,resMA,sig.genes, exp.ALL){
  allgenes <- rownames(resMA)
  for (study in seq_len(length(objectMA))){
    studygenes <- rownames(objectMA[[study]][[1]])
    union <- unique(allgenes %in% studygenes)
    if(all(union)){
      ref <- names(objectMA[study])
      break}
    else{ref <- NULL}
  }
  if(is.null(ref)){
    stop("scaling with reference method can not be used. Please use 
        another method")
  }
  else{
    print("scaling with reference...")
    refH<-objectMA[[ref]][[1]][intersect(sig.genes,
                                         rownames(objectMA[[ref]][[1]])),
                               objectMA[[ref]][[2]]==0]
    refC<-objectMA[[ref]][[1]][intersect(sig.genes,
                                         rownames(objectMA[[ref]][[1]])),
                               objectMA[[ref]][[2]]==1]
    exp.ALL[rownames(refH),colnames(refH)] <- refH
    exp.ALL[rownames(refC),colnames(refC)] <- refC
    for(set in names(objectMA)){
      if(set!=ref){
        tempH<-objectMA[[set]][[1]][intersect(
          sig.genes,
          rownames(objectMA[[set]][[1]])),
          objectMA[[set]][[2]]==0]
        tempC<-objectMA[[set]][[1]][intersect(
          sig.genes, rownames(objectMA[[set]][[1]])),
          objectMA[[set]][[2]]==1]
        for(gene in seq_len(nrow(tempH))){
          tempH[gene,]<-(tempH[gene,]*(sd(refH[gene,],
                                          na.rm=TRUE)/sd(tempH[gene,],
                                                         na.rm=TRUE)))-
            (((mean(tempH[gene,],na.rm=TRUE)*
                 (sd(refH[gene,],na.rm=TRUE)/sd(tempH[gene,],
                                                na.rm=TRUE)))-
                mean(refH[gene,],na.rm=TRUE)))
          tempC[gene,]<-(tempC[gene,]*
                           (sd(refC[gene,],na.rm=TRUE)/
                              sd(tempC[gene,],na.rm=TRUE)))-
            (((mean(tempC[gene,],na.rm=TRUE)*
                 (sd(refC[gene,],na.rm=TRUE)/sd(tempC[gene,],
                                                na.rm=TRUE)))-
                mean(refC[gene,],na.rm=TRUE)))
        }
        exp.ALL[rownames(tempH),colnames(tempH)] <- tempH
        exp.ALL[rownames(tempC),colnames(tempC)] <- tempC
      }
    }
  }
  return(exp.ALL)
}

#none scaling function
.none <- function(objectMA, sig.genes, exp.ALL){
  for(set in seq_len(length(objectMA))){
    temp<-objectMA[[set]][[1]][intersect(
      sig.genes,rownames(objectMA[[set]][[1]])),]
    exp.ALL[rownames(temp),colnames(temp)] <- temp
  }
  return(exp.ALL)
}