R/heatmapPaths.R
In GSEMA: Gene Set Enrichment Meta-Analysis

Documented in heatmapPaths

#' Visualization of the gene set enrichment meta-analysis results
#'
#' It allows to see how the different significant gene sets are expressed in the
#' different samples
#'
#'
#' @param objectMApath A list of list. Each list contains two elements.
#' The first element is the Gene Set matrix (gene sets in rows and samples in
#' columns) and the second element is a vector of zeros and ones that represents
#' the state of the different samples of the Gene Sets matrix.
#' 0 represents one group (controls) and 1 represents the other group (cases).
#'
#' @param resMA Output generated by the  function that performs
#' meta-analysis (metaAnalysisESpath).
#'
#' @param scaling Character variable to choose between different scaling
#' approaches. See "Details" for more information.
#'
#' @param regulation Character variable that indicates whether we want the
#' heatmap to show all significant paths ("all"), only the up-regulated paths
#' ("up") or only the down-regulated paths("down")
#'
#' @param breaks Numeric vector of length 2 that contains the extreme values
#' (minimum and maximum) of the range of values in which the heatmap
#' color scale will be distributed. Default a vector By default a
#' vector of -2 and 2 as extreme values.
#'
#' @param fdrSig Adjusted p-value from which a gene set is considered
#' significant. Default 0.05
#'
#' @param comES_Sig In absolute value. Combine effect size
#' threshold from which gene sets are considered. Default 0.5
#'
#' @param numSig The number of most significant paths to be represented.
#' If numSig = "all", all significant paths that meet the selected parameters
#' will be represented.
#'
#' @param color vector of colors used in heatmap.
#'
#' @param na_col color of the NA cell in the heatmap.
#'
#' @param legend logical to determine if legend should be drawn or not.
#'
#' @param cluster_cols boolean values determining if columns should be
#' clustered.
#'
#' @param cluster_rows boolean values determining if rows should be clustered.
#'
#' @param show_rownames boolean specifying if row names are be shown.
#'
#' @param show_colnames boolean specifying if column names are be shown.
#'
#' @param fontsize base fontsize for the plot
#'
#' @param fontsize_row fontsize for rownames (Default: fontsize)
#'
#' @param fontsize_col fontsize for colnames (Default: fontsize)
#'
#'
#'
#' @details Scaling approaches that can be used are:
#' \itemize{
#' \item "rscale": it applies rescale function of \emph{scales} package. Values
#' will be between -1 and 1)
#' \item "zscor":  It calculates a z-score value for each gene, that is, the
#' mean gene expression from each gene is subtracted from each gene expression
#' value and then it is divided by the standard deviation
#' \item "swr": it applys scaling relative to reference dataset approach
#' \item "none": any scaling approach it is applied.
#' }
#'
#'
#' @return The matrix represented in the heatmap
#'
#'
#' @author Juan Antonio Villatoro Garcia,
#' \email{juanantoniovillatorogarcia@@gmail.com}
#'
#' @seealso \code{\link{createObjectMApath}}, \code{\link{metaAnalysisESpath}}
#'
#' @references
#'
#' Hadley Wickham and Dana Seidel (2020). scales: Scale Functions for
#' Visualization. R package version 1.1.1.
#' \url{https://CRAN.R-project.org/package=scales}
#'
#' Lazar, C, Meganck, S, Taminau, J, and et al. 2013. “Batch Effect
#' Removal Methods for Microarray Gene Expression Data Integration:
#' A Survey,” 469–90.
#'
#' Raivo Kolde 2019. pheatmap: Pretty Heatmaps. R package version 1.0.12.
#' \url{https://CRAN.R-project.org/package=pheatmap}
#'
#'
#' @examples
#'
#' data("simulatedData")
#' resultsMA <- metaAnalysisESpath(objectMApathSim, typeMethod="REM")
#' heatmapPaths(objectMA=objectMApathSim, resultsMA,
#'     scaling = "zscor", regulation = "all",breaks=c(-2,2),
#'     fdrSig = 0.05, comES_Sig = 1.5, numSig=20)
#'
#' @export

heatmapPaths <- function(
        objectMApath,
    resMA,
    scaling=c("zscor","rscale","swr","none"),
    regulation=c("all", "up","down"),
    breaks=c(-2,2),
    fdrSig = 0.05,
    comES_Sig = 0.5,
    numSig = "all",
    color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
    na_col = "darkgrey",
    legend = TRUE,
    cluster_cols= FALSE,
    cluster_rows= FALSE,
    show_rownames = TRUE,
    show_colnames = FALSE,
    fontsize = 10,
    fontsize_row = fontsize,
    fontsize_col = fontsize){
    scaling<-match.arg(scaling)
    regulation<-match.arg(regulation)
    heatmap.var="none"
    if(!is.null(breaks)){
        bks=seq(breaks[1],breaks[2],length.out = 100)
    }else{
        bks=NULL
    }
    #Select paths
    if(!is.null(comES_Sig)){
        comES_Sig <- abs(comES_Sig)}
    sig.paths <- .pathsSig(resMA, regulation, numSig, fdrSig, comES_Sig)
    ## Create a matrix with all datasets
    all.cl <- NULL
    all.colnames <- NULL
    sizes <- NULL
    for(i in seq_len(length(objectMApath))){
        sizes <- c(sizes,rep(names(objectMApath[i]), length(objectMApath[[i]][[2]])))
        all.cl <- c(all.cl,objectMApath[[i]][[2]])
        colnames(objectMApath[[i]][[1]]) <- paste0(colnames(objectMApath[[i]][[1]]),
            ".", names(objectMApath[i]),
            sep="")
        all.colnames <- c(all.colnames,colnames(objectMApath[[i]][[1]]))
    }
    exp.ALL <- matrix(data=NA, ncol=length(all.colnames),nrow=length(sig.paths))
    rownames(exp.ALL) <- sig.paths
    colnames(exp.ALL) <- all.colnames
    ## Scaling datasets
    switch(scaling,
        rscale={exp.ALL<- .rscale(objectMApath, sig.paths, exp.ALL)
        bks=NULL},
        zscor={exp.ALL<- .zscor(objectMApath, sig.paths, exp.ALL)},
        none={heatmap.var="row"
        exp.ALL<- .none(objectMApath, sig.paths, exp.ALL)},
        swr={heatmap.var="row"
        exp.ALL<- .swr(objectMApath, resMA, sig.paths, exp.ALL)}
    )
    ## Heatmap
    ann <- data.frame(State=ifelse(all.cl == 0, "Healthy","Case"),
        Dataset=sizes, row.names=colnames(exp.ALL))
    if(nrow(exp.ALL) == 1){
        matrixPath <- exp.ALL[,order(all.cl, decreasing=TRUE)]
        matrixPath <- t(as.matrix(matrixPath))
        rownames(matrixPath) <- rownames(exp.ALL)
    }
    else{
        matrixPath <- exp.ALL[,order(all.cl, decreasing=TRUE)]
    }
    pheatmap(matrixPath,annotation_col=ann,
        cluster_cols=cluster_cols, cluster_rows=cluster_rows,
        scale=heatmap.var, breaks=bks,
        show_colnames=show_colnames,show_rownames=show_rownames, na_col = na_col,
        color = color, legend = legend, fontsize = fontsize,
        fontsize_row = fontsize_row, fontsize_col = fontsize_col)
    return(exp.ALL)
}



################################################################################

##Significant paths in results
.pathsSig <- function(resMA, regulation, numSig, fdrSig, comES_Sig){
    if(regulation=="up"){resMA <- resMA[rownames(resMA)[resMA$Com.ES>0],]}
    if(regulation=="down"){resMA <- resMA[rownames(resMA)[resMA$Com.ES<0],]}
    sig.paths <- rownames(resMA)[resMA$FDR<=fdrSig]
    if(!is.null(comES_Sig)){
        sig.paths2 <- rownames(resMA)[abs(resMA$Com.ES)>=comES_Sig]
        sig.paths <- intersect(sig.paths, sig.paths2)}
    ord <- resMA[sig.paths,]
    ord <- ord[order(-log10(ord$FDR+0.000000001), ord$Com.ES, decreasing = TRUE),]
    sig.paths <- ord$Pathway
    if(numSig == "all"){numSig <- length(sig.paths)}
    if(length(sig.paths) < numSig){numSig <- length(sig.paths)}
    sig.paths <- sig.paths[seq_len(numSig)]
    ord <- resMA[sig.paths,]
    sig.paths <- sig.paths[order(ord$Com.ES,decreasing = TRUE)]
}


##############################################################################

#rscale function
.rscale <- function(objectMApath, sig.paths, exp.ALL){
    message("scaling using rescale function...")
    for(set in seq_len(length(objectMApath))){
        temp<-objectMApath[[set]][[1]][intersect(sig.paths,
            rownames(
                objectMApath[[set]][[1]]
            )),]
        if(is.null(nrow(temp))){
            temp <- t(as.matrix(temp))
            rownames(temp) <- sig.paths
                }
        for(gene in seq_len(nrow(temp))){
            temp[gene,] <- rescale(x=temp[gene,],from=c(min(temp[gene,]),
                max(temp[gene,])),
                to=c(-1,1))
        }
        exp.ALL[rownames(temp),colnames(temp)] <- temp
    }
    return(exp.ALL)
}

#scaling zscor function
.zscor <- function(objectMApath, sig.paths, exp.ALL){
    message("scaling using z-score...")
    for(set in seq_len(length(objectMApath))){
        temp<-objectMApath[[set]][[1]][intersect(
            sig.paths,
            rownames(objectMApath[[set]][[1]])),]
        if(is.null(nrow(temp))){
            temp <- t(as.matrix(temp))
            rownames(temp) <- sig.paths
        }
        for(gene in seq_len(nrow(temp))){
            temp[gene,] <- (temp[gene,]-mean(temp[gene,],
                na.rm=TRUE))/sd(temp[gene,],
                    na.rm = TRUE)
        }
        exp.ALL[rownames(temp),colnames(temp)] <- temp
    }
    return(exp.ALL)
}

#scaling with reference scaling function
.swr <- function(objectMApath,resMA,sig.paths, exp.ALL){
    allpaths <- rownames(resMA)
    for (study in seq_len(length(objectMApath))){
        studypaths <- rownames(objectMApath[[study]][[1]])
        union <- unique(allpaths %in% studypaths)
        if(all(union)){
            ref <- names(objectMApath[study])
            break}
        else{ref <- NULL}
    }
    if(is.null(ref)){
        stop("scaling with reference method can not be used. Please use
        another method")
    }
    else{
        message("scaling with reference...")
        refH<-objectMApath[[ref]][[1]][intersect(sig.paths,
            rownames(objectMApath[[ref]][[1]])),
            objectMApath[[ref]][[2]]==0]
        refC<-objectMApath[[ref]][[1]][intersect(sig.paths,
            rownames(objectMApath[[ref]][[1]])),
            objectMApath[[ref]][[2]]==1]
        if(is.null(nrow(refH))){
            refH <- t(as.matrix(refH))
            rownames(refH) <- sig.paths
        }
        if(is.null(nrow(refC))){
            refC <- t(as.matrix(refC))
            rownames(refC) <- sig.paths
        }
        exp.ALL[rownames(refH),colnames(refH)] <- refH
        exp.ALL[rownames(refC),colnames(refC)] <- refC
        for(set in names(objectMApath)){
            if(set!=ref){
                tempH<-objectMApath[[set]][[1]][intersect(
                    sig.paths,
                    rownames(objectMApath[[set]][[1]])),
                    objectMApath[[set]][[2]]==0]
                tempC<-objectMApath[[set]][[1]][intersect(
                    sig.paths, rownames(objectMApath[[set]][[1]])),
                    objectMApath[[set]][[2]]==1]
                if(is.null(nrow(tempH))){
                    tempH <- t(as.matrix(tempH))
                    rownames(tempH) <- sig.paths
                }
                if(is.null(nrow(tempC))){
                    tempC <- t(as.matrix(tempC))
                    rownames(tempC) <- sig.paths
                }
                for(gene in seq_len(nrow(tempH))){
                    tempH[gene,]<-(tempH[gene,]*(sd(refH[gene,],
                        na.rm=TRUE)/sd(tempH[gene,],
                            na.rm=TRUE)))-
                        (((mean(tempH[gene,],na.rm=TRUE)*
                                (sd(refH[gene,],na.rm=TRUE)/sd(tempH[gene,],
                                    na.rm=TRUE)))-
                                mean(refH[gene,],na.rm=TRUE)))
                    tempC[gene,]<-(tempC[gene,]*
                            (sd(refC[gene,],na.rm=TRUE)/
                                    sd(tempC[gene,],na.rm=TRUE)))-
                        (((mean(tempC[gene,],na.rm=TRUE)*
                                (sd(refC[gene,],na.rm=TRUE)/sd(tempC[gene,],
                                    na.rm=TRUE)))-
                                mean(refC[gene,],na.rm=TRUE)))
                }
                exp.ALL[rownames(tempH),colnames(tempH)] <- tempH
                exp.ALL[rownames(tempC),colnames(tempC)] <- tempC
            }
        }
    }
    return(exp.ALL)
}

#none scaling function
.none <- function(objectMApath, sig.paths, exp.ALL){
    for(set in seq_len(length(objectMApath))){
        temp<-objectMApath[[set]][[1]][intersect(
            sig.paths,rownames(objectMApath[[set]][[1]])),]
        if(is.null(nrow(temp))){
            temp <- t(as.matrix(temp))
            rownames(temp) <- sig.paths
        }
        exp.ALL[rownames(temp),colnames(temp)] <- temp
    }
    return(exp.ALL)
}