R/p06_profile_heatmap.R

Defines functions profile_heatmap

Documented in profile_heatmap

##################################################################################
#' Generate profile heatmap
#'
#' This function generate a profile heatmap using EnrichedHeatmap function. The genes
#' are clustered into different groups based on the profile using k-means clustering
#' algorithm. Optionally, the prior clustering information can be provided through
#' argument geneGroups
#'
#' @param profileMat profile matrix of class \code{normalizedMatrix}
#' @param signalName name of the signal track
#' @param profileColor Color function for profile heatmap generated by \code{colorRamp2}
#' @param columnTitle title to be placed on the column. Default: signalName
#' @param createClusters Logical: whether or not to do kmeans clustering. If FALSE,
#' clusterStorePath must be provided which has the cluster data. Default: FALSE
#' @param numClust number of clusters to generate using k-means clustering. Default: 7
#' @param geneGroups Either a dataframe or a file path with cluster information or NULL.
#' Two columns must be present in this dataframe: \code{geneId, cluster}. In case it is NULL, heatmap
#' rows are not split into different groups.
#' @param showAnnotation Logical: to show or hide top annotation. Default: TRUE
#' @param clusterColor Color assignment object for cluster colors. If nothing provided, colors
#' are generated internally. Default: NULL
#' @param ylimFraction A numeric vector with one or two elements for deciding the \code{ylimit} of top
#' profile heatmap annotation. If the value is single number, it has to be floating point number
#' to extract the quantile and use limit \code{[0, quantile(x)]}. If the value is numeric vector of
#' length 2, the two numbers are used as lower and upper limits for ylimit of top annotation.
#' Default: NULL.
#' @param posLineGpar \code{pos_line_gp} parameter of \code{EnrichedHeatmap}. Default:
#' \code{gpar(lty = 2, alpha = 0.4, lwd = 0.5)}
#' @param rasterPar rasterization parameters as list() with two elements: use, qual. Default:
#' \code{list(use = TRUE, qual = 5)}. \code{rasterPar$use} is used for \code{use_raster} argument and
#' \code{rasterPar$qual} is used for \code{raster_quality} argument of \code{Heatmap} function.
#' @param ... Other arguments for EnrichedHeatmap function
#'
#' @return A list object with following elements:
#' \itemize{
#' \item heatmap: Enriched heatmap
#' \item rowGroupHt: row group heatmap
#' \item legend: Annotation legend for clusters
#' \item cluster: dataframe with cluster information
#' \item clusterColor: cluster color assignment information
#' \item profileColor: Heatmap profile color function generated by colorRamp2
#' \item ylim: ylimit used for the top annotation Y axis limit
#' \item kmeans: if kmeans clustering was performed, a kmeans clustering result object
#' }
#'
#' @export
#'
#' @examples NA
#'
profile_heatmap <- function(profileMat,
                            signalName,
                            profileColor,
                            columnTitle = NULL,
                            createClusters = FALSE,
                            numClust = 7,
                            geneGroups = NULL,
                            showAnnotation = TRUE,
                            clusterColor = NULL,
                            ylimFraction = NULL,
                            posLineGpar = gpar(lty = 2, alpha = 0.4, lwd = 0.5),
                            rasterPar = list(use = TRUE, qual = 5),
                            ...){

  cat("Generating profile heatmap for sample", signalName, "\n")

  if ( missing(columnTitle) || is.null(columnTitle) ) {
    columnTitle <- signalName
  }


  if (! is.null(geneGroups) ){
    if( (!is.data.frame(geneGroups)) && isFALSE(createClusters) ){
      if(! file.exists(geneGroups) ){
        stop("geneGroups argument has to be either NULL or a dataframe or a file which has cluster information")
      }
    }

  }


  ## hirerchical clustering
  # dend = hclust(dist(profileMat), method = "ward.D")
  # clusterCut = cutree(dend, numClust)
  # clusterData = data.frame(geneId = names(clusterCut), cluster = clusterCut, stringsAsFactors = F, row.names = names(clusterCut))

  clusterData <- NULL
  lgd <- NULL
  clusterHt <- NULL


  if( isTRUE(createClusters) ){
    ## do k-means clustering or read existing clustering information
    profileKm <- profile_matrix_kmeans(mat = profileMat,
                                       km = numClust,
                                       clustFile = geneGroups,
                                       name = signalName)

    clusterData <- profileKm$geneClusters

  } else if( ! is.null(geneGroups) ){

    if(is.data.frame(geneGroups)){
      ## read the clustering information from dataframe

      # cat("Using the provided clusterData...\n")
      clusterData <- as.data.frame(geneGroups)
      rownames(clusterData) <- NULL

    } else if(file.exists(geneGroups)) {
      ## read clustering information from cluster file
      # cat("Reading k-means clustering information sample", signalName, "\nNumber of clusters:", numClust, "\n")
      clusterData <- suppressMessages(readr::read_tsv(file = geneGroups))
    }

  } else if( is.null(geneGroups) ){

    cat("Heatmap wont be split as geneGroups information is NULL\n")

  }



  if( !is.null(clusterData) ){

    clusterData <- dplyr::select(clusterData, geneId, cluster) %>%
      tibble::column_to_rownames(var = "geneId") %>%
      as.data.frame()

    ## make sure that the rows of clusterdata and profile matrix are in same order
    ## if the order is different, the splitting will be changed
    if(! all(rownames(profileMat) == rownames(clusterData) ) ){
      stop("Row order for the profile matrix and clustering dataframe does not match")
    }


    ## set colors for clusters. IMP to sort as they will be arranged on heatmap in order
    if(is.null(clusterColor)){

      clusterNames <- levels(clusterData$cluster)
      # cat("Colors for clusters not provided. Generating new colors for clusters: ", clusterNames, "\n")

      # clusterColor <- structure(rainbow(length(clusterNames)), names = clusterNames)
      clusterColor <- structure(
        chipmineEnv$customColors[1:length(clusterNames)],
        names = clusterNames)

      ## if there are too many clusters, use rainbow colors
      if(length(clusterNames) > length(chipmineEnv$customColors)){
        clusterColor <- structure(rainbow(length(clusterNames)), names = clusterNames)
      }

    } else{
      ## modify the clusterColor: remove the cluster_x which is not present in the clusterData
      clusterNames <- levels(clusterData$cluster)
      clusterColor <- clusterColor[clusterNames]
    }

    # print(clusterColor)


    ## annotation legend
    lgd <- Legend(at = names(clusterColor), title = "Clusters",
                  type = "lines", legend_gp = gpar(col = clusterColor))


    ## use heatmap instead of rowAnnotation for cluster
    clusterHt <- ComplexHeatmap::Heatmap(
      matrix = as.matrix(clusterData["cluster"]),
      name = "clusterAn",
      col = clusterColor,
      width = unit(0.3, "cm"),
      show_row_names = FALSE,
      show_heatmap_legend = FALSE
    )

  } else if( is.null(clusterData)){
    ## if there is no splitting of the rows, use single color
    clusterColor <- "red"
  }


  ## ylimit for the annotation
  ylimit <- NULL

  if(!is.null(ylimFraction) && length(ylimFraction) == 1){
    ylimit <- c(0, quantile(profileMat, ylimFraction, names = FALSE))
  } else if(length(ylimFraction) == 2){
    ylimit <- ylimFraction
  }


  ## set the top annotation
  topAnn <- NULL
  if(isTRUE(showAnnotation)){
    topAnn <- HeatmapAnnotation(
      lines = anno_enriched(gp = gpar(col = clusterColor),
                            show_error = F,
                            ylim = ylimit
      ),
      height = unit(2, "cm")
    )
  }


  ## axis_names:
  axis_names <- NULL

  if(isTRUE(attributes(profileMat)$target_is_single_point)){
    ## when region length == 1
    axis_names[1] <- attributes(profileMat)$extend[1] * -1
    axis_names[2] <- attributes(profileMat)$target_name
    axis_names[3] <- attributes(profileMat)$extend[2]

  } else{
    ## when region length > 1
    axis_names[1] <- attributes(profileMat)$extend[1] * -1
    axis_names[2] <- "start"
    axis_names[3] <- "end"
    axis_names[4] <- attributes(profileMat)$extend[2]

  }

  ## generate profile heatmap
  ht <- EnrichedHeatmap(
    mat = profileMat,
    col = profileColor,
    name = signalName,
    axis_name_rot = 90,
    axis_name = axis_names,
    column_title = columnTitle,
    split = clusterData,
    top_annotation = topAnn,
    row_title_rot = 0,
    pos_line_gp = posLineGpar,
    use_raster = rasterPar$use,
    raster_quality = rasterPar$qual,
    ...
  )


  ## reconvert the factor to character
  clusterData$cluster <- as.character(clusterData$cluster)
  clusterData$geneId <- rownames(clusterData)

  returnList <- list(
    "heatmap" = ht,
    "rowGroupHt" = clusterHt,
    "legend" = lgd,
    "cluster" = clusterData,
    "clusterColor" = clusterColor,
    "profileColor" = profileColor,
    "ylim" = ylimit
  )

  if(isTRUE(createClusters)){
    returnList[["kmeans"]] <- profileKm$km
  }

  ##returns: profile heatmap, annotation legend, clusterData
  return(returnList)
}

##################################################################################
lakhanp1/chipmine documentation built on March 6, 2021, 9:06 a.m.