R/gs_dendro.R

Defines functions gs_dendro

Documented in gs_dendro

#' Dendrogram of the gene set enrichment results
#'
#' Calculate (and plot) the dendrogram of the gene set enrichment results
#'
#' @param res_enrich A `data.frame` object, storing the result of the functional
#' enrichment analysis. See more in the main function, [GeneTonic()], to see the
#' formatting requirements.
#' @param n_gs Integer value, corresponding to the maximal number of gene sets to
#' be included (from the top ranked ones). Defaults to the number of rows of
#' `res_enrich`
#' @param gs_ids Character vector, containing a subset of `gs_id` as they are
#' available in `res_enrich`. Lists the gene sets to be included, additionally to
#' the ones specified via `n_gs`. Defaults to NULL.
#' @param gs_dist_type Character string, specifying which type of similarity (and
#' therefore distance measure) will be used. Defaults to `kappa`, which uses
#' [create_kappa_matrix()]
#' @param clust_method Character string defining the agglomeration method to be
#' used for the hierarchical clustering. See [stats::hclust()] for details, defaults
#' to `ward.D2`
#' @param color_leaves_by Character string, which columns of `res_enrich` will
#' define the color of the leaves. Defaults to `z_score`
#' @param size_leaves_by Character string, which columns of `res_enrich` will
#' define the size of the leaves. Defaults to the `gs_pvalue`
#' @param color_branches_by Character string, which columns of `res_enrich` will
#' define the color of the branches. Defaults to `clusters`, which calls
#' [dynamicTreeCut::cutreeDynamic()] to define the clusters
#' @param create_plot Logical, whether to create the plot as well.
#'
#' @return A dendrogram object is returned invisibly, and a plot can be generated
#' as well on that object.
#' @export
#'
#' @examples
#'
#' library("macrophage")
#' library("DESeq2")
#' library("org.Hs.eg.db")
#' library("AnnotationDbi")
#'
#' # dds object
#' data("gse", package = "macrophage")
#' dds_macrophage <- DESeqDataSet(gse, design = ~line + condition)
#' rownames(dds_macrophage) <- substr(rownames(dds_macrophage), 1, 15)
#' dds_macrophage <- estimateSizeFactors(dds_macrophage)
#'
#' # annotation object
#' anno_df <- data.frame(
#'   gene_id = rownames(dds_macrophage),
#'   gene_name = mapIds(org.Hs.eg.db,
#'                      keys = rownames(dds_macrophage),
#'                      column = "SYMBOL",
#'                      keytype = "ENSEMBL"),
#'   stringsAsFactors = FALSE,
#'   row.names = rownames(dds_macrophage)
#' )
#'
#' # res object
#' data(res_de_macrophage, package = "GeneTonic")
#' res_de <- res_macrophage_IFNg_vs_naive
#'
#' # res_enrich object
#' data(res_enrich_macrophage, package = "GeneTonic")
#' res_enrich <- shake_topGOtableResult(topgoDE_macrophage_IFNg_vs_naive)
#' res_enrich <- get_aggrscores(res_enrich, res_de, anno_df)
#'
#' gs_dendro(res_enrich,
#'           n_gs = 100)
gs_dendro <- function(res_enrich,
                      n_gs = nrow(res_enrich),
                      gs_ids = NULL,
                      gs_dist_type = "kappa", # alternatives
                      clust_method = "ward.D2",
                      color_leaves_by = "z_score",
                      size_leaves_by = "gs_pvalue",
                      color_branches_by = "clusters",
                      create_plot = TRUE) {

  if (!is.null(color_leaves_by)) {
    if (!color_leaves_by %in% colnames(res_enrich))
      stop("Your res_enrich object does not contain the ",
           color_leaves_by,
           " column.\n",
           "Compute this first or select another column to use for the leaves color.")
  }
  if (!is.null(size_leaves_by)) {
    if (!size_leaves_by %in% colnames(res_enrich))
      stop("Your res_enrich object does not contain the ",
           size_leaves_by,
           " column.\n",
           "Compute this first or select another column to use for the leaves size.")
  }

  n_gs <- min(n_gs, nrow(res_enrich))
  gs_to_use <- unique(
    c(
      res_enrich$gs_id[seq_len(n_gs)],  # the ones from the top
      gs_ids[gs_ids %in% res_enrich$gs_id]  # the ones specified from the custom list
    )
  )

  if (gs_dist_type == "kappa") {
    dmat <- create_kappa_matrix(res_enrich, n_gs, gs_ids)
  } else if (gs_dist_type == "jaccard") {
    dmat <- create_jaccard_matrix(res_enrich, n_gs, gs_ids, return_sym = TRUE)
  }

  rownames(dmat) <- colnames(dmat) <- res_enrich[gs_to_use, "gs_description"]

  my_hclust <- as.dist(1 - dmat) %>%
    hclust(method = clust_method)
  my_dend <- my_hclust %>%
    as.dendrogram(hang = -1) %>%
    set("leaves_pch", 19)

  dend_idx <- order.dendrogram(my_dend) # keep the sorted index vector for the leaves

  # leaves: colored by Z score
  # size: colored by squared size?
  # branches: indicating the treecut clusters

  if (!is.null(color_leaves_by)) {
    # setup color
    mypal <- rev(scales::alpha(colorRampPalette(RColorBrewer::brewer.pal(name = "RdYlBu", 11))(50), 1))
    col_var <- res_enrich[gs_to_use, color_leaves_by]
    leaves_col <- map2color(col_var, mypal, limits = range(col_var))[dend_idx]

    my_dend <- set(my_dend, "leaves_col", leaves_col)
  }

  if (!is.null(size_leaves_by)) {
    # setup size
    size_var <- -log10(as.numeric(res_enrich[gs_to_use, "gs_pvalue"]))[dend_idx]
    leaves_size <- 2 * (size_var - min(size_var)) / (max(size_var) - min(size_var) + 1e-10) + 0.3

    my_dend <- set(my_dend, "leaves_cex", leaves_size)     # or to use gs_size?
  }

  if (!is.null(color_branches_by)) {
    my.clusters <- unname(dynamicTreeCut::cutreeDynamic(my_hclust,
                                                        distM = as.matrix(dmat),
                                                        minClusterSize = 4,
                                                        verbose = 0))
    # or use rainbow_hcl from colorspace
    clust_pal <- RColorBrewer::brewer.pal(max(my.clusters), "Set1")
    clust_cols <- (clust_pal[my.clusters])[dend_idx]

    my_dend <-  branches_attr_by_clusters(my_dend, my.clusters[dend_idx], values = clust_pal)
  }

  if (create_plot) {
    par(mar = c(0, 0, 1, 25))
    plot(my_dend, horiz = TRUE)
  }

  return(invisible(my_dend))
  # to be plotted with plot(my_dend, horiz = TRUE)
  # as.ggdend() %>% ggplot() %>% plotly::ggplotly()
}

Try the GeneTonic package in your browser

Any scripts or data that you put into this service are public.

GeneTonic documentation built on Nov. 8, 2020, 5:27 p.m.