R/plotFindMarkerHeatmap.R

Defines functions plotMarkerDiffExp plotFindMarkerHeatmap

Documented in plotFindMarkerHeatmap plotMarkerDiffExp

#' Plot a heatmap to visualize the result of \code{\link{runFindMarker}}
#' @description This function will first reads the result saved in
#' \code{metadata} slot, named by \code{"findMarker"} and generated by
#' \code{\link{runFindMarker}}. Then it do the filtering on the statistics
#' based on the input parameters and get unique genes to plot. We choose the
#' genes that are identified as up-regulated only. As for the genes identified
#' as up-regulated for multiple clusters, we only keep the belonging towards the
#' one they have the highest Log2FC value.
#' In the heatmap, there will always be a cell annotation for the cluster
#' labeling used when finding the markers, and a feature annotation for which
#' cluster each gene belongs to. And by default we split the heatmap by these
#' two annotations. Additional legends can be added and the splitting can be
#' canceled.
#' @param inSCE \linkS4class{SingleCellExperiment} inherited object.
#' @param log2fcThreshold Only use DEGs with the absolute values of log2FC
#' larger than this value. Default \code{1}
#' @param fdrThreshold Only use DEGs with FDR value smaller than this value.
#' Default \code{0.05}
#' @param minClustExprPerc A numeric scalar. The minimum cutoff of the
#' percentage of cells in the cluster of interests that expressed the marker
#' gene. Default \code{0.7}.
#' @param maxCtrlExprPerc A numeric scalar. The maximum cutoff of the
#' percentage of cells out of the cluster (control group) that expressed the
#' marker gene. Default \code{0.4}.
#' @param minMeanExpr A numeric scalar. The minimum cutoff of the mean
#' expression value of the marker in the cluster of interests. Default \code{1}.
#' @param topN An integer. Only to plot this number of top markers for each
#' cluster in maximum, in terms of log2FC value. Use \code{NULL} to cancel the
#' top N subscription. Default \code{10}.
#' @param orderBy The ordering method of the clusters on the splitted heatmap.
#' Can be chosen from \code{"size"} or \code{"name"}, specified with vector of
#' ordered unique cluster labels, or set as \code{NULL} for unsplitted heatmap.
#' Default \code{"size"}.
#' @param decreasing Order the cluster decreasingly. Default \code{TRUE}.
#' @param rowLabel \code{TRUE} for displaying \code{rownames} of \code{inSCE}, a
#' \code{rowData} variable to use other feature identifiers, or a vector for
#' customized row labels. Use \code{FALSE} for not displaying. Default
#' \code{TRUE}.
#' @param rowDataName character. The column name(s) in \code{rowData} that need
#' to be added to the annotation. Default \code{NULL}.
#' @param colDataName character. The column name(s) in \code{colData} that need
#' to be added to the annotation. Default \code{NULL}.
#' @param featureAnnotations \code{data.frame}, with \code{rownames} containing
#' all the features going to be plotted. Character columns should be factors.
#' Default \code{NULL}.
#' @param cellAnnotations \code{data.frame}, with \code{rownames} containing
#' all the cells going to be plotted. Character columns should be factors.
#' Default \code{NULL}.
#' @param featureAnnotationColor A named list. Customized color settings for
#' feature labeling. Should match the entries in the \code{featureAnnotations}
#' or \code{rowDataName}. For each entry, there should be a list/vector of
#' colors named with categories. Default \code{NULL}.
#' @param cellAnnotationColor A named list. Customized color settings for
#' cell labeling. Should match the entries in the \code{cellAnnotations} or
#' \code{colDataName}. For each entry, there should be a list/vector of colors
#' named with categories. Default \code{NULL}.
#' @param colSplitBy character vector. Do semi-heatmap based on the grouping of
#' this(these) annotation(s). Should exist in either \code{colDataName} or
#' \code{names(cellAnnotations)}. Default is the value of \code{cluster} in
#' \code{\link{runFindMarker}} when \code{orderBy} is not \code{NULL}, or
#' \code{NULL} otherwise.
#' @param rowSplitBy character vector. Do semi-heatmap based on the grouping of
#' this(these) annotation(s). Should exist in either \code{rowDataName} or
#' \code{names(featureAnnotations)}. Default \code{"marker"}, which indicates an
#' auto generated annotation for this plot.
#' @param rowDend Whether to display row dendrogram. Default \code{FALSE}.
#' @param colDend Whether to display column dendrogram. Default \code{FALSE}.
#' @param title Text of the title, at the top of the heatmap. Default
#' \code{"Top Marker Heatmap"}.
#' @param ... Other arguments passed to \code{\link{plotSCEHeatmap}}.
#' @return A \code{\link[ComplexHeatmap]{Heatmap}} object
#' @seealso \code{\link{runFindMarker}}, \code{\link{getFindMarkerTopTable}}
#' @author Yichen Wang
#' @export
#' @examples
#' data("sceBatches")
#' logcounts(sceBatches) <- log1p(counts(sceBatches))
#' sce.w <- subsetSCECols(sceBatches, colData = "batch == 'w'")
#' sce.w <- runFindMarker(sce.w, method = "wilcox", cluster = "cell_type")
#' plotFindMarkerHeatmap(sce.w)
plotFindMarkerHeatmap <- function(inSCE, orderBy = 'size',
                              log2fcThreshold = 1, fdrThreshold = 0.05,
                              minClustExprPerc = 0.7, maxCtrlExprPerc = 0.4,
                              minMeanExpr = 1, topN = 10, decreasing = TRUE,
                              rowLabel = TRUE,
                              rowDataName = NULL, colDataName = NULL,
                              featureAnnotations = NULL, cellAnnotations = NULL,
                              featureAnnotationColor = NULL,
                              cellAnnotationColor = NULL,
                              colSplitBy = NULL,
                              rowSplitBy = "marker", rowDend = FALSE,
                              colDend = FALSE,
                              title = "Top Marker Heatmap", ...) {
  if (!inherits(inSCE, 'SingleCellExperiment')) {
    stop('"inSCE" should be a SingleCellExperiment inherited Object.')
  }
  if (!'findMarker' %in% names(S4Vectors::metadata(inSCE))) {
    stop('"findMarker" result not found in metadata. ',
         'Run runFindMarker() before plotting.')
  }
  colSplitBy <-
    ifelse(is.null(orderBy), NULL, colnames(metadata(inSCE)$findMarker)[5])

  if (!is.null(orderBy)) {
    if (length(orderBy) == 1) {
      if (!orderBy %in% c('size', 'name')) {
        stop('Single charater setting for "orderBy" should be chosen',
             'from "size" or "name".')
      }
    }# else if(any(!SummarizedExperiment::colData(inSCE)[[cluster]] %in%
    #             orderBy)){
    #   stop('Invalid "orderBy", please input a vector of unique ordered ',
    #        'cluster identifiers that match all clusters in ',
    #        'colData(inSCE) specified by "cluster" to adjust the order ',
    #        'of clusters.')
    #}
  }
  # Extract and basic filter
  degFull <- S4Vectors::metadata(inSCE)$findMarker
  if (!all(c("Gene", "Pvalue", "Log2_FC", "FDR") %in%
          colnames(degFull)[seq_len(4)])) {
    stop('"findMarker" result cannot be interpreted properly')
  }
  clusterVar <- attr(degFull, "cluster")
  clusterName <- attr(degFull, "clusterName")
  #if(length(which(!degFull$Gene %in% rownames(inSCE))) > 0){
  # Remove genes happen in deg table but not in sce. Weird.
  # degFull <- degFull[-which(!degFull$Gene %in% rownames(inSCE)),]
  #}
  if (!is.null(log2fcThreshold)) {
    degFull <- degFull[degFull$Log2_FC > log2fcThreshold,]
  }
  if (!is.null(fdrThreshold)) {
    degFull <- degFull[degFull$FDR < fdrThreshold,]
  }
  if (!is.null(minClustExprPerc)) {
    degFull <- degFull[degFull$clusterExprPerc >= minClustExprPerc,]
  }
  if (!is.null(maxCtrlExprPerc)) {
    degFull <- degFull[degFull$ControlExprPerc <= maxCtrlExprPerc,]
  }
  if (!is.null(minMeanExpr)) {
    degFull <- degFull[degFull$clusterAveExpr >= minMeanExpr,]
  }
  # Remove duplicate by assigning the duplicated genes to the cluster where
  # their log2FC is the highest.
  # Done by keeping all unique genes and the rows  with highest Log2FC entry
  # for each duplicated gene.
  dup.gene <- unique(degFull$Gene[duplicated(degFull$Gene)])
  for (g in dup.gene) {
    deg.gix <- degFull$Gene == g
    deg.gtable <- degFull[deg.gix,]
    toKeep <- which.max(deg.gtable$Log2_FC)
    toRemove <- which(deg.gix)[-toKeep]
    degFull <- degFull[-toRemove,]
  }
  selected <- character()
  if (!is.null(topN)) {
    for (c in unique(degFull[[clusterName]])) {
      deg.cluster <- degFull[degFull[[clusterName]] == c,]
      deg.cluster <- deg.cluster[order(deg.cluster$Log2_FC,
                                       decreasing = TRUE),]
      if (dim(deg.cluster)[1] > topN) {
        deg.cluster <- deg.cluster[seq_len(topN),]
      }
      selected <- c(selected, deg.cluster$Gene)
    }
  } else {
    selected <- degFull$Gene
  }
  degFull <- degFull[degFull$Gene %in% selected,]

  if (!is.null(attr(degFull, "useReducedDim"))) {
    mat <- t(expData(inSCE, attr(degFull, "useReducedDim")))
    assayList <- list(mat)
    names(assayList) <- attr(degFull, "useReducedDim")
    tmpSCE <- SingleCellExperiment::SingleCellExperiment(assays = assayList)
    SummarizedExperiment::colData(tmpSCE) <-
      SummarizedExperiment::colData(inSCE)
    assayName <- attr(degFull, "useReducedDim")

  } else {
    tmpSCE <- inSCE
    assayName <- attr(degFull, "useAssay")

  }

  inSCE <- tmpSCE[degFull$Gene,]
  z <- clusterVar
  if (is.factor(z)) {
    z.order <- levels(z)
    # When 'z' is a subset factor, there would be redundant levels.
    z.order <- z.order[z.order %in% as.vector(unique(z))]
    z <- factor(z, levels = z.order)
    SummarizedExperiment::rowData(inSCE)[[clusterName]] <-
      factor(degFull[[clusterName]], levels = z.order)
  } else {
    SummarizedExperiment::rowData(inSCE)[[clusterName]] <-
      degFull[[clusterName]]
  }
  if (!is.null(orderBy)) {
    if (length(orderBy) == 1 && orderBy == 'size') {
      z.order <- names(table(z)[order(table(z), decreasing = decreasing)])
    } else if (length(orderBy) == 1 && orderBy == 'name') {
      if (is.factor(z)) {
        z.order <- sort(levels(z), decreasing = decreasing)
      } else {
        z.order <- sort(unique(z), decreasing = decreasing)
      }
    } else {
      z.order <- orderBy[-which(!orderBy %in% z)]
    }
  }
  SummarizedExperiment::colData(inSCE)[[clusterName]] <-
    factor(z, levels = z.order)


  y <- SummarizedExperiment::rowData(inSCE)[[clusterName]]
  SummarizedExperiment::rowData(inSCE)[["marker"]] <-
    factor(y, levels = z.order)

  #markerConsistColor <-
  #  list(marker = dataAnnotationColor(inSCE, 'col')[[clusterName]])

  # Organize plotSCEHeatmap arguments
  colDataName <- c(colDataName, clusterName)
  rowDataName <- c(rowDataName, "marker")

  #featureAnnotationColor <- c(featureAnnotationColor, markerConsistColor)

  hm <- plotSCEHeatmap(inSCE, useAssay = assayName, colDataName = colDataName,
                       rowDataName = rowDataName, colSplitBy = colSplitBy,
                       rowSplitBy = rowSplitBy,
                       featureAnnotations = featureAnnotations,
                       cellAnnotations = cellAnnotations,
                       featureAnnotationColor = featureAnnotationColor,
                       cellAnnotationColor = cellAnnotationColor,
                       cluster_row_slices = FALSE, rowLabel = rowLabel,
                       cluster_column_slices = FALSE,
                       rowDend = rowDend, colDend = colDend, title = title, ...)
  return(hm)
}

#' @rdname plotFindMarkerHeatmap
#' @export
plotMarkerDiffExp <- function(inSCE, orderBy = 'size',
                              log2fcThreshold = 1, fdrThreshold = 0.05,
                              minClustExprPerc = 0.7, maxCtrlExprPerc = 0.4,
                              minMeanExpr = 1, topN = 10, decreasing = TRUE,
                              rowDataName = NULL, colDataName = NULL,
                              featureAnnotations = NULL, cellAnnotations = NULL,
                              featureAnnotationColor = NULL,
                              cellAnnotationColor = NULL,
                              colSplitBy = NULL,
                              rowSplitBy = "marker", rowDend = FALSE,
                              colDend = FALSE,
                              title = "Top Marker Heatmap", ...) {
  .Deprecated("plotFindMarkerHeatmap")
  plotFindMarkerHeatmap(inSCE = inSCE, orderBy = orderBy,
                        log2fcThreshold = log2fcThreshold,
                        fdrThreshold = fdrThreshold,
                        minClustExprPerc = minClustExprPerc,
                        maxCtrlExprPerc = maxCtrlExprPerc,
                        minMeanExpr = minMeanExpr, topN = topN,
                        decreasing = decreasing,
                        rowDataName = rowDataName, colDataName = colDataName,
                        featureAnnotations = featureAnnotations,
                        cellAnnotations = cellAnnotations,
                        featureAnnotationColor = featureAnnotationColor,
                        cellAnnotationColor = cellAnnotationColor,
                        colSplitBy = colSplitBy,
                        rowSplitBy = rowSplitBy, rowDend = rowDend,
                        colDend = colDend, title = title, ...)
}
compbiomed/singleCellTK documentation built on May 8, 2024, 6:58 p.m.