R/dge_PlotGOHeatmap.R

Defines functions plotGOHeatmap goToTermList clustToGO collapseSigGenes dge_PlotGOHeatmap

Documented in dge_PlotGOHeatmap

#' Plot all significant genes in a heatmap, with GO-annotated clusters
#'
#' This function takes the output from dge_OneFactor, finds all significant
#' genes in all comparisons, and then runs GO term enrichment and annotates a
#' heatmap with cluster-specific GO enrichments.
#'
#' This strategy is designed to reduce the implicit bias GO analysis has if you
#' use "the whole genome" as your gene universe. Since the cells you are
#' comparing come from the same tissue, any false positive genes (i.e. 5-10% of
#' your significant list) are likely to be cell type/tissue relevant, and so are
#' likely to enrich for "relevant" looking GO terms. This approach instead uses
#' GO terms only to annotate clusters of similarly-expressed genes, using only
#' all significant genes as the gene universe.
#'
#' @param dgeRes List, output of dge_OneFactor. Note that this must contain a
#'   DGEList object!
#' @param genePVal PValue cutoff for identifying a gene as significant. Advise
#'   0.1 since the GO term enrichment is robust to false positives in this
#'   approach, and works better with a larger pool of genes to draw on.
#' @param geneID The column in dgeRes that contains the gene IDs. Usually Newid
#'   if name conversion was done and GeneID if not.
#' @param nClusters Number of clusters to use for unsupervised k-medoid
#'   clustering. Needs to be data-driven, so try something in the 5-10 range and
#'   then re-run if necessary to get the number that best fits your data. Can
#'   also map this if you like but the k-medoids algorithm is slow so it'll take
#'   a while.
#' @param convertMatrix Boolean, should the matrix gene IDs be converted? See
#'   dge_GetMatrix for more.
#' @param returnFullGO Boolean. If FALSE (default), only plots the heatmap. If
#'   true, returns a list containing the ggplot (which you can plot yourself),
#'   plus the intermediate data that shows the clusters and the genes
#'   contributing to the enriched GO terms.
#' @param ... Additional arguments to pass to convertBiomart (i.e. convert_from
#'   and convert_to). See convertBiomart for more.
#'
#'
#' @return A ggplot heatmap of all significant genes, clustered, and annotated
#'   with cluster-specific GO enrichments.
#'
#' @examples
#' dgeRes_full<- system.file("extdata", "dgeRes_full.Rds",
#'                       package = "seqHelpers") |>
#'     readRDS()
#'
#' \dontrun{
#' dge_PlotGOHeatmap(dgeRes_full, genePVal = 0.1, geneID = Newid, nClusters = 2, convertMatrix = TRUE)
#' }
#'
#' @importFrom rlang .data
#' @import topGO
#' @export
dge_PlotGOHeatmap <- function(dgeRes, genePVal, geneID, nClusters, convertMatrix = FALSE, returnFullGO = FALSE, ...){
  if(is.null(dgeRes$DGEList)){
    stop("dgeRes must contain a DGEList entry. Please run dge_OneFactor with retainDGEList == TRUE")
  }

  geneID <- rlang::enquo(geneID)

  resList <- dgeRes[!names(dgeRes) == "DGEList"]

  sigMat <- collapseSigGenes(resList, genePVal, !!geneID) |>
    dge_GetMatrix(dgeRes$DGEList, scale = "rows", genes = _, convertMatrix = convertMatrix, ...)

  clust <- cluster::pam(sigMat, k = nClusters)

  GOAnnot <- clustToGO(clust, sigMat)
  Terms <- goToTermList(GOAnnot)

  gg <- plotGOHeatmap(sigMat, clust, Terms)

  if(returnFullGO == TRUE){
    ret <- list("Plot" = gg,
                "GO_Table" = GOAnnot,
                "Clustering" = clust)
    return(ret)
  } else{
    return(gg)
  }
}

# Helper functions -------------------------------------------------------------

collapseSigGenes <- function(dgeList, p, genes = .data$Newid){
  genes <- rlang::enquo(genes)
  sigG <- purrr::map(dgeList, \(df) dplyr::filter(df, .data$PValue < p)) |>
    purrr::map(\(df) dplyr::select(df, !!genes)) |>
    dplyr::bind_rows() |>
    dplyr::distinct() |>
    unlist()

  return(sigG)
}

clustToGO <- function(clustering, sigMat, species = "Mouse", geneID = "symbol"){
  if(species != "Mouse"){
    stop("Currently, only Mouse is supported. Please submit an issue to add other species")
  }

  TableList <- clustering$clustering |>
    tibble::enframe(name = "Gene", value = "Cluster") |>
    dplyr::group_by(.data$Cluster) |>
    tidyr::nest() |>
    dplyr::mutate(geneUniverse = purrr::map(.data$data, \(data) factor(as.integer(rownames(sigMat) %in% data$Gene))),
           geneUniverse = purrr::map(.data$geneUniverse, \(univ) stats::setNames(univ, rownames(sigMat)))) |>
    dplyr::mutate(GoData = purrr::map(.data$geneUniverse, \(univ) methods::new("topGOdata",
                                                  ontology = "BP",
                                                  allGenes = univ,
                                                  annot = annFUN.org,
                                                  mapping = "org.Mm.eg.db",
                                                  ID = geneID))) |>
    dplyr::mutate(GoTest = purrr::map(.data$GoData, runTest, algorithm = "classic", statistic = "fisher")) |>
    dplyr::mutate(GoTable = purrr::map2(.data$GoData, .data$GoTest, \(data, test) GenTable(data, PValue = test, orderBy = "PValue", ranksOf = "PValue", topNodes = 20))) |>
    dplyr::select(.data$Cluster, .data$GoTable) |>
    tidyr::unnest(.data$GoTable)

  return(TableList)
}

goToTermList <- function(GOTable, maxTerms = 5){
  terms <- GOTable |>
    dplyr::select(.data$Cluster, .data$GO.ID, .data$Term, .data$PValue) |>
    dplyr::group_by(.data$Cluster, .data$PValue) |>
    dplyr::slice_head(n = 1) |>
    dplyr::ungroup() |>
    dplyr::mutate(TermString = paste0(.data$GO.ID, ": ", .data$Term, " (P = ", .data$PValue, ")")) |>
    dplyr::group_by(.data$Cluster) |>
    dplyr::mutate(TermString = dplyr::case_when(min(.data$PValue) >= 0.05 ~ "No terms significantly enriched",
                                  TRUE ~ .data$TermString)) |>
    dplyr::ungroup() |>
    dplyr::filter(.data$TermString == "No terms significantly enriched" | .data$PValue < 0.05) |>
    dplyr::select(.data$Cluster, .data$TermString) |>
    dplyr::distinct() |>
    dplyr::group_by(.data$Cluster) |>
    dplyr::slice_head(n = maxTerms) |>
    dplyr::summarise(TermString = paste(.data$TermString, collapse = "\n")) |>
    dplyr::ungroup() |>
    tidyr::pivot_wider(names_from = .data$Cluster, values_from = .data$TermString) |>
    unlist()

  return(terms)
}

plotGOHeatmap <- function(matrix, clustering, GOAnnot){
  GOAnnot <- GOAnnot

  ht <- ComplexHeatmap::Heatmap(matrix,
                name = "zScore",
                cluster_columns = FALSE,
                show_row_dend = FALSE,
                show_row_names = FALSE,
                split = clustering$clustering,
                row_title = "@{GOAnnot[ x[1] ]}",
                row_title_rot = 0,
                gap = ggplot2::unit(2.5, "mm"),
                row_title_gp = grid::gpar(fontsize = 8))

  gg <- grid::grid.grabExpr(ComplexHeatmap::draw(ht)) |>
    ggpubr::as_ggplot()

  return(gg)
}
NKInstinct/seqHelpers documentation built on Aug. 20, 2022, 4:30 a.m.