R/plotHeatmap.R

#' Plot heatmap with optional clustering
#' 
#' Draw the heatmap plot from \code{\link{PlotSetArray}}, 
#' \code{\link{PlotSetList}}, \code{\link{PlotSetPair}} classes or properly 
#' formatted \code{\link[base]{list}} (see details) in active graphics window. 
#' Axes and titles, keys and other plot elements are controlled by function 
#' parameters.
#' 
#' 
#' @param plotset The dataset to plot - can be \code{\link{PlotSetArray}}, 
#'   \code{\link{PlotSetList}}, \code{\link{PlotSetPair}} or properly formatted 
#'   \code{\link[base]{list}}
#' @param clstmethod Determines the heatmap clustering algorithm "kmeans" for 
#'   k-means (default, see \code{\link[stats]{kmeans}}), "hclust" (see 
#'   \code{\link[stats]{hclust}}) for hierarchical clustering, "ssom" for 
#'   (super) self organising map (see \code{\link[kohonen]{supersom}}) with 
#'   torus topology and "none" of FALSE to turn off the clustering
#' @param clusters The number of cluster for "kmeans" and "hclust", ignored for 
#'   "ssom", defaults to 5L
#' @param ssomt1 Determines , the dimensionality of SOM - number of neurons in 
#'   1st dimension, number of resulting clusters equals ssomt1*ssomt2, defaults 
#'   to 2L
#' @param ssomt2 Determines , the dimensionality of SOM - number of neurons in 
#'   2st dimension, number of resulting clusters equals ssomt1*ssomt2, defaults 
#'   to 2L
#' @param include The logical vector indicating if given subplot should 
#'   influence clustering and sorting, if given element is FALSE the sub-heatmap
#'   will be still plotted, and the order of data rows will be determined by 
#'   clustering/sorting other sub-heatmaps, defaults to NULL, which incluses all
#'   - equivalent to \code{rep(TRUE, length(plotset))}
#' @param sortrows If \code{"increasing"} or \code{"decreasing"} the rows of 
#'  heatmap will be sorted by mean value across all heatmaps,
#'  defaults to \code{FALSE} - not sorted. For backwards compatibility \code{TRUE} 
#'  is synonymous to "increasing".
#' @param main The main title of the plot, shown in top-centre part of the 
#'   figure; defaults to NULL (not visible)
#' @param labels The character vector giving sub-titles of heatmaps (plotted 
#'   over the heatmap and below the main title). The defaults NULL value 
#'   indicates that feature/track file names will be used to generate the 
#'   sub-titles.
#' @param plotScale scale the available data before plotting, can be "linear" 
#'   (do not scale, default), "log2" or "zscore"
#' @param legend if TRUE plot the colour key
#' @param keepratio If TRUE keep 1:1 aspect ratio of the figure; defaults to 
#'   FALSE
#' @param xlab label below x-axis
#' @param ylab label below y-axis
#' @param cex.main Main title font size in points, defaults to 16
#' @param cex.axis Axis numbers font size in points, defaults to 12
#' @param cex.lab Axis labels font size in points, Defaults to 12
#' @param cex.legend Keys labels font size in points, defaults to 12
#' @param autoscale if TRUE the colour keys will be auto scaled
#' @param zmin global minimum value on colour key, ignored if \code{autoscale} 
#'   is TRUE
#' @param zmax global maximum value on colour key, ignored if \code{autoscale} 
#'   is TRUE
#' @param xlim the x limits (x1, x2) of the plot. Note that x1 > x2 is allowed 
#'   and leads to a "reversed axis". The default value, NULL, indicates that the
#'   whole range present in \code{plotset} will be plotted.
#' @param ln.v Determins if vertical guide line(s) should be plotted (TRUE) or 
#'   ommitted (FALSE). For anchored plots 2 lines indicating the start and end 
#'   of anchored distance are plotted.
#' @param s The saturation value used to auto scale colour key limits, defaults 
#'   to 0.01
#' @param indi If TRUE (defaults) the independent colour keys will be plotted 
#'   below heatmaps, if FALSE the commmon colour key is shown rightmost
#' @param o_min vector of length equal to number of sub heatmaps determining 
#'   minimum value on color key for each sub plot, if NULL (default) or NA the 
#'   global settings are used, ignored in \code{indi} is FALSE
#' @param o_max vector of length equal to number of sub heatmaps determining 
#'   maximum value on color key for each sub plot, if NULL (default) or NA the 
#'   global settings are used, ignored in \code{indi} is FALSE
#' @param colvec The vector of colours used to plot the lines and error estimate
#'   fields. If set value NULL (default) the automatically generated colour 
#'   values will be used. Accpeted values are: vector of any of the three kinds 
#'   of R colour specifications, i.e., either a color name (as listed by 
#'   colors()), a hexadecimal string of the form "#rrggbb" or "#rrggbbaa" (see 
#'   rgb), or a positive integer i meaning palette()[i]. See 
#'   \code{\link[grDevices]{col2rgb}}.
#' @param clspace The colours pace of the heatmap, see 
#'   \code{\link[grDevices]{grDevices}}
#' @param pointsize The default font point size to be used for plots. Defaults 
#'   to 12 (1/72 inch).
#' @param embed If TRUE plot single (first) heatmap without using grid system. 
#'   Useful to embed heatmap in complex layouts, see 
#'   \code{\link[graphics]{layout}} and \code{\link[graphics]{par}} for details.
#'   Defaults to FALSE.
#' @param ggplot Use ggplot2 package instead of standard  R graphics, 
#'   defaults to FALSE
#' @param raster The bitmap raster is used to plot the heatmap image, see 
#'   "useRaster" option in \code{\link[graphics]{image}} function and 
#'   \code{\link[ggplot2]{geom_raster}} function for details, defaults to FALSE
#' @param ... parameters passed to internal plotting function
#'   
#' @return The cluster report \code{data.frame}, giving cluster assignments and
#' sorting order for each feature. It contains following columns: \itemize{ 
#' \item \strong{originalOrder} - number of feature (row) in GFF/BED, can be
#' used to restore original order after sorting on cluster ID \item
#' \strong{ClusterID} - the numeric ID of the cluster. The topmost cluster on
#' the heatmap is annotated with 1, and the bottom cluster with k, where k
#' equals to number of clusters selected, exported only if clustering is enabled
#' \item \strong{SortingOrder} - the order imposed on heatmap by sorting by mean
#' row(s) values, exported only if sorting is enabled \item \strong{FinalOrder}
#' - the final order of heatmap's rows, this can be influenced by sorting and
#' clustering; 1 indicates topmost row }
#' 
#' @export
#' @family plotting functions
#'   
#' @examples
#' # Get the paths of example files                      
#' bed1 <- system.file("extdata", 
#'      "Transcripts_ce10_chrI_100Kb.bed", package="seqplots")
#' bed2 <- system.file("extdata", 
#'      "GSM1208361_chrI_100Kb_PeakCalls.bed", package="seqplots")
#' bw1 <- system.file("extdata", 
#'      "GSM1208360_chrI_100Kb_q5_sample.bw", package="seqplots")
#' 
#' #If required install C. elegans genomic package from Bioconductor
#' if(!"BSgenome.Celegans.UCSC.ce10" %in% BSgenome::installed.genomes()) {
#'     if(.Platform$OS.type != "windows" || .Machine$sizeof.pointer != 4) {
#'          source("http://bioconductor.org/biocLite.R")
#'          biocLite("BSgenome.Celegans.UCSC.ce10")
#'      }
#' }
#' 
#' #Get getPlotSetArray for track and feature files
#' if(.Platform$OS.type != "windows" || .Machine$sizeof.pointer != 4) {
#'     plotset1 <- getPlotSetArray(bw1, c(bed1, bed2), 'ce10')
#' } else {
#'     load(system.file("extdata", "precalc_plotset.Rdata", package="seqplots"))
#' }
#' 
#' # equivalent to plot(plotset1, what='h') or plotset1$plot(what='h')
#' plotHeatmap(plotset1[1]) 
#' 
setGeneric(
    "plotHeatmap",
    function(
        plotset, main="", labels=NA, legend=TRUE, keepratio=FALSE, 
        plotScale="no", sortrows=FALSE, clusters=5L,
        clstmethod="kmeans", include=NULL, ssomt1=2L, ssomt2=2L, cex.main=16,  
        cex.lab=12.0, cex.axis=12.0, cex.legend=12.0, xlab='', ylab="",
        autoscale=TRUE, zmin=0, zmax=10, xlim=NULL, ln.v=TRUE, s = 0.01, 
        indi=TRUE, o_min=NA, o_max=NA, colvec=NULL, clspace=NULL, pointsize=12, 
        embed=FALSE, ggplot=FALSE, raster=FALSE, ...
    ) standardGeneric("plotHeatmap")
)

#' @describeIn plotHeatmap Method for signature \code{\link[base]{list}} with 
#' following format: \code{list[[FEATURE]][[TRACK/MOTIF]][[KEY_VALUE]]}
setMethod(
    "plotHeatmap", signature(plotset='list'),
    function(plotset, ...) {
        opar <- par(no.readonly = TRUE)[c('pty')]
        
        if(keepratio) par(pty='s')
        
        if( is.null(plotset[[1]]$heatmap) ) 
            stop(
                'Heatmap plotting: No heatmap data avilabe!
                Re-run with "Calculate Heatmap" option selected.', call.=FALSE
            )
        if(length(unique(sapply(
            plotset, function(x) nrow(x[['heatmap']])))) != 1) 
            stop(
                'Heatmap plotting: All plots must have equal number of features.
                Do not plot heatmaps on multiple GFF/BED.', call.=FALSE
            )
        
        if(is.null(include)) { include <- rep(TRUE, length(plotset)) }
        
        #Heatmap data aquizition (as list of matrixes)
        HLST <- lapply(plotset, '[[', 'heatmap')
        
        #Optional scalling
        if ( plotScale ==  "log2" ) {
            HLST <- lapply(HLST, log2 )
        } else if ( plotScale == "zscore" ) {
            HLST <- lapply(HLST, scale )
        }
        
        #Preparing flat matrix fro sorrting an clustering
        Hclc <- do.call(cbind, HLST[ include ])
        
        finalOrd <- 1:nrow(Hclc)
        
        #Sorting
        if(sortrows == 'decreasing' | as.character(sortrows) == "TRUE") { 
            sorting_order <- order(
                rowMeans(Hclc, na.rm=TRUE), decreasing = TRUE
            ) 
            finalOrd <- finalOrd[sorting_order] 
            Hclc <- Hclc[sorting_order,]
        } else if(sortrows == 'increasing') { 
            sorting_order <- order(
                rowMeans(Hclc, na.rm=TRUE), decreasing = FALSE
            ) 
            finalOrd <- finalOrd[sorting_order] 
            Hclc <- Hclc[sorting_order,]
        } else {
            sorting_order <- 1:nrow(Hclc)
        }
        
        #Clustering
        if(clusters > 1 & clstmethod == 'kmeans') {
            
            Hcl <- Hclc; Hcl[is.na(Hcl)] <- 0
            k <- kmeans(Hcl, clusters)
            cls_order <- order(k$cluster)
            classes <- k$cluster
            
            finalOrd <- finalOrd[cls_order]
            #clusts <- k$size
            clusts <- table(classes)
            
        } else if(clstmethod == 'hclust') {
            
            Hcl <- Hclc; Hcl[is.na(Hcl)] <- 0
            cls <- hclust(dist(Hcl))
            cls_order <- cls$order
            
            #Awkward hack to rename class labels, so that they are in order
            init_cut <- cutree(cls, clusters)
            cut_map <- table(init_cut)[unique(init_cut[cls_order])]
            
            finalOrd <- finalOrd[cls_order]
            
            classes <- rep(1:clusters, cut_map)[order(finalOrd)]
            clusts <- cut_map
            
            #browser()
            
        } else if(clstmethod == 'ssom') {
            
            Hlist <- HLST[ include ]
            Hlist <- lapply(Hlist, function(x) {
                x[is.na(x)] <- 0; 
                if(sortrows == 'decreasing' | as.character(sortrows) == "TRUE" 
                    | sortrows == 'increasing') {
                    x <- x[sorting_order,]
                }
                return(x)
            })
            
            ssom <- supersom(
                Hlist, grid = class::somgrid(
                    xdim = ssomt1, ydim = ssomt2, "hexagonal"), 
                rlen = 100, toroidal=TRUE)
            
            classes <- ssom$unit.classif
            cls_order <- order(ssom$unit.classif)
            
            finalOrd <- finalOrd[cls_order]
            clusts <- table(classes)
            
        } else {
            classes <- NA
            clusts <- NULL
        }
        
        HLST <- lapply(HLST, function(x) { return(x[finalOrd, ]) } )
        
        lab <- sapply(plotset, '[[', 'desc')
        labels <- labels[1:length(plotset)]
        lab[!is.na(labels)] <- labels[!is.na(labels)]
        
        
        if( nchar(main) > 0 & !embed) par(oma=c(0,0,(cex.main/12)+1,0) )
        
        if( ggplot ) {
            ggHeatmapPlotWrapper( 
                HLST, axhline=clusts, bins=plotset[[1]]$all_ind, titles=lab, 
                e=plotset[[1]]$e, Leg=legend, cex.lab=cex.lab, cex.axis=cex.axis, 
                cex.legend=cex.legend, xlab=xlab, ylab=ylab, autoscale=autoscale, 
                zmin=zmin, zmax=zmax, xlim=xlim, ln.v=ln.v, s=s, indi=indi,
                o_min=o_min, o_max=o_max, colvec=colvec, colorspace=clspace, 
                pointsize=pointsize, embed=embed, main=main,
                ...
            )
        } else {
            heatmapPlotWrapper( 
                HLST, axhline=clusts, bins=plotset[[1]]$all_ind, titles=lab, 
                e=plotset[[1]]$e, Leg=legend, cex.lab=cex.lab, cex.axis=cex.axis, 
                cex.legend=cex.legend, xlab=xlab, ylab=ylab, autoscale=autoscale, 
                zmin=zmin, zmax=zmax, xlim=xlim, ln.v=ln.v, s=s, indi=indi,
                o_min=o_min, o_max=o_max, colvec=colvec, colorspace=clspace, 
                pointsize=pointsize, embed=embed, raster=raster, 
                dendro=if(clstmethod == 'hclust') as.dendrogram(cls) else NULL,
                ...
            )
            title(main, outer = TRUE, cex.main=cex.main/pointsize)
        }
        

        
        infile <- strsplit( plotset[[1]]$desc, '\n@')[[1]][[2]]
        # TODO: implement saving GenomicRanges in GetPlotSetArray
        #           elementMetadata(gr) <- elementMetadata(gr)[!
        #               sapply( elementMetadata(gr), function(x) all(is.na(x)))]
        #           if( length(colnames(elementMetadata(gr))) ) { 
        #               colnames(elementMetadata(gr)) <- 
        #                    paste0('metadata_', colnames(elementMetadata(gr))) 
        #           }
        #              
        #           gr$OriginalOrder <- 1:length(gr); 
        #           if( nchar(input$clusters) ) 
        #               gr$ClusterID <- fromJSON(input$clusters)
        #           if( nchar(input$sortingord) ) 
        #               gr$SortingOrder <- order(fromJSON(input$sortingord))
        #               
        #           gr$FinalOrder <- order(finalOrd)
        #               
        #           out <- as.data.frame(gr); colnames(out)[1] <- 'chromosome'
        #           out <- out[finalOrd,]
        par(opar)
        out <- data.frame(
            originalOrder=1:length(finalOrd), 
            ClusterID=classes[order(sorting_order)], 
            SortingOrder=sorting_order, 
            FinalOrder=finalOrd
        )
        return( invisible(out) )
    }
)

#' @describeIn plotHeatmap Method for signature \code{\link{PlotSetPair}}
#' @include PlotSetPair-class.R
setMethod(
    "plotHeatmap", signature(plotset='PlotSetPair'),
    function(plotset, ...) {
        plotHeatmap(
            list(plotset), main, labels, legend, keepratio, 
            plotScale, sortrows, clusters, clstmethod, 
            include, ssomt1, ssomt2, cex.main,  cex.lab, cex.axis, 
            cex.legend, xlab, ylab, autoscale, zmin, zmax, xlim, ln.v, 
            s, indi, o_min, o_max, colvec, clspace, pointsize, 
            embed, ggplot, raster, ...
        )
    }
)

#' @describeIn plotHeatmap Method for signature \code{\link{PlotSetList}}
#' @include PlotSetList-class.R
setMethod(
    "plotHeatmap", signature(plotset='PlotSetList'), 
    function(plotset, ...) {
        plotHeatmap(
            plotset$data, main, labels, legend, keepratio, 
            plotScale, sortrows, clusters, clstmethod, 
            include, ssomt1, ssomt2, cex.main,  cex.lab, cex.axis, 
            cex.legend, xlab, ylab, autoscale, zmin, zmax, xlim, ln.v, 
            s, indi, o_min, o_max, colvec, clspace, pointsize, 
            embed, ggplot, raster, ...
        )
    }
)

#' @describeIn plotHeatmap Method for signature \code{\link{PlotSetArray}}
#' @include PlotSetArray-class.R
setMethod(
    "plotHeatmap", signature(plotset='PlotSetArray'), 
    function(plotset, ...) {
        plotHeatmap(
            unlist(plotset)$data, main, labels, legend, keepratio, 
            plotScale, sortrows, clusters, clstmethod, 
            include, ssomt1, ssomt2, cex.main,  cex.lab, cex.axis, 
            cex.legend, xlab, ylab, autoscale, zmin, zmax, xlim, ln.v, 
            s, indi, o_min, o_max, colvec, clspace, pointsize, 
            embed, ggplot, raster, ...
        )
    }
)
Przemol/seqplots-release documentation built on May 8, 2019, 3:47 a.m.