R/ame_methods.R

Defines functions ame_compare_heatmap_methods plot_ame_heatmap ame_order_by_cluster

Documented in ame_compare_heatmap_methods plot_ame_heatmap

#' Order AME results by clusters
#'
#' Reorders ame results from one or more runs for plotting heatmap showing
#' unique/shared motifs across groups.
#'
#' @param ame ame results data.frame
#' @param id column of motif ID's to use. (default: `motif_id`)
#' @param group column to group samples by. To control order, this column must
#'   be a factor.
#'
#' @return
#'
#' @importFrom magrittr %>%
#' @importFrom rlang enquo
#' @importFrom rlang !!
#' @importFrom rlang .data
#' @importFrom tibble rowid_to_column
#'
#' @noRd
ame_order_by_cluster <- function(ame, id = motif_id, group = NULL, name = NULL){
  # orders data in "order" column first by TFs unique to each type,
  # then by motifs shared between types such that tfs are shown by unique,
  # pairwise, 3-wise, etc. starting from the first type upwards.

  # consider a factor: F with 3 levels (j), and 6 rows (i)
  # for a heatmap of the following:
  #
  # i6 | .   2   3
  # i5 | 1   .   3
  # i4 | 1   2   .
  # i3 | .   .   3
  # i2 | .   2   .
  # i1 | 1   .   .
  #    |___________
  #      j1  j2  j3
  #
  # We can compute the rank
  # by counting the number of entries for each i (ex i1 = 1, i5 = 2)
  # we can rank order by j-pairwise clusters by ranking by
  # count(F), min(F), max(F) in that order.
  # (ex i4 = c(2,1,2), while i5 = c(2,1,3))
  #
  # where F is implemented below as type_rank
  #
  # NOTE: I'm pretty sure this problem could be better solved using a binary
  # matrix & heirarchical clustering, but this works fine in my tests. If it's
  # not performant, consider revising.

  group <- enquo(group)
  id <- enquo(id)

  if (rlang::quo_is_null(group)){
    # thank you:
    # https://rpubs.com/tjmahr/quo_is_missing
    if (is.null(name)){name <- "All Regions"}

    res <- ame %>%
      dplyr::mutate(type = factor(name)) %>%
      tibble::rowid_to_column("order")

    return(res)
  }

  ame %>%
    dplyr::mutate(type = factor(!!group),
                  type_rank = as.integer(.data$type)) %>%
    dplyr::group_by(!!id) %>%
    dplyr::mutate(nType = dplyr::n(),
                  minType = min(.data$type_rank),
                  maxType = max(.data$type_rank)) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(!!sym("nType"), !!sym("type_rank"), 
                   !!sym("minType"), !!sym("maxType")) %>% 
    tibble::rowid_to_column(var = "order")

}

#' Plot AME heatmap clustered by similarity in detected motifs
#'
#' @param ame ame results data.frame
#' @param id column of motif ids to use (default: motif_id).
#' @param group grouping column if comparing across multiple ame runs (optional,
#'   default: NULL).
#' @param value value to display as heatmap intensity. Default:
#'   -log10(adj.pvalue). Takes function or column name as input. If set to
#'   "normalize", will use normalized rank within `group` as the heatmap values.
#'   **If in doubt**, prefer the -log10(adj.pvalue) plot potentially in
#'   conjunction with adjusting `scale_max`. (See "Normalized rank
#'   visualization" section below for more details on how to interpret these
#'   data)
#'
#' @details Normalized rank visualization
#' **NOTE:** The normalized rank visualization eliminates all real values
#' related to statistical significance! Instead, this visualization represents
#' the relative ranks of hits within an AME run, which already pass a
#' significance threshold set during `runAME()`. This means that even if several
#' motifs have similar or even identical pvalues, their heatmap representation
#' will be a different color value based on their ranked order in the results
#' list. This also means that using the normalized rank visualization will
#' be misleading if there are only a few AME hits; it is only worth using if the
#' number of hits is very large (>100). Both visualizations can be useful and
#' reveal different properties of the data to the user during data
#' exploration. Use `ame_compare_heatmap_methods()` to help assess
#' differences in the two visualizations. **If in doubt**, prefer the
#' `-log10(adj.pvalue)` representation.
#'
#' @param group_name when group = NULL, name to use for input regions. Ignored
#'   if group is set.
#' @param scale_max max heatmap value to limit upper-value of scale. Useful if
#'   distribution of `value`s vary greatly between groups. Usually a better to
#'   tweak this option than to use value = "normalize". The cumulative
#'   distribution plot generated by `ame_compare_heatmap_methods()` can be
#'   useful for selecting this value, try to pick a value which captures the
#'   largest fraction of hits across all groups while excluding outliers.
#'
#' @details
#' Common mistake: if `value` is set to a string that is not "normalize", it
#' will return: "Error: Discrete value supplied to continuous scale". To use a
#' column by name, do not quote the column name.
#'
#' @return `ggplot` object
#' @export
#'
#' @importFrom ggplot2 ggplot geom_tile theme aes theme_bw labs
#'   scale_fill_gradient2 element_text .pt
#' @importFrom ggplot2 ggplot scale_fill_continuous
#' @importFrom magrittr %>%
#' @importFrom rlang enquo .data
#' @importFrom rlang !!
#' @importFrom stats reorder
#'
#' @examples
#' data("example_ame", package = "memes")
#' 
#' # Plot a single category heatmap
#' plot_ame_heatmap(example_ame$Decreasing)
#' 
#' # Plot a multi category heatmap
#' grouped_ame <- dplyr::bind_rows(example_ame, .id = "category")
#' plot_ame_heatmap(grouped_ame, group = category)
plot_ame_heatmap <- function(ame, id = motif_id, group = NULL, value = -log10(adj.pvalue), group_name = NULL, scale_max = NA){
  id <- enquo(id)
  group <- enquo(group)
  value <- enquo(value)

  # Only order by group if group is set
  if (rlang::quo_is_null(group)){
    res <- ame %>%
      ame_order_by_cluster(id = id, group = NULL, name = group_name)
  } else {
    res <- ame %>%
      ame_order_by_cluster(id = !!id, group = !!group)
  }

  # ggplot theme for ame heatmap
  heatmap_theme <- theme_bw() +
   theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
         axis.text = element_text(color = "black",
                                  size = 34 / .pt))

  # Check whether value is set to "normalize" (quotes important)
  # have to do as_label because as_name errors w/ expressions
  value_eval <- rlang::as_label(value) != "\"normalize\""

  if (value_eval){
    # Default behavior is to do tidyeval on value
    plot <-  res %>%
      ggplot(aes(reorder(!!id, order), as.factor(.data$type))) +
        geom_tile(aes(fill = !!value), color = 'black', size = 0.3) +
        heatmap_theme +
        labs(x = substitute(id),
             y = substitute(group),
             fill = substitute(value)) +
        scale_fill_gradient2(low = "white",
                             high = "firebrick")

  } else {
    # Otherwise use normalized rank
    plot <-  res %>%
      dplyr::group_by(!!group) %>%
      dplyr::mutate(norm_rank = rank_normalize(rank)) %>%
      ggplot(aes(stats::reorder(!!id, order), as.factor(.data$type))) +
        geom_tile(aes(fill = .data$norm_rank), color = 'black', size = 0.3) +
        heatmap_theme +
        labs(x = substitute(id),
             y = substitute(group),
             fill = "Normalized Rank") +
        scale_fill_continuous(low = "firebrick",
                              high = "white",
                              breaks = c(0, 1),
                              labels = c("High", "Low"))

  }

  # Couldn't come up with a clever way to do this in conjunction with above logic,
  # so just handle scale_max plotting separately
  if (!is.na(scale_max) & value_eval) {

    plot <- res %>%
      dplyr::mutate("scale_data" = ifelse(!!value > scale_max, scale_max, !!value)) %>%
      ggplot(aes(reorder(!!id, order), as.factor(.data$type))) +
        geom_tile(aes(fill = .data$scale_data), color = 'black', size = 0.3) +
        heatmap_theme +
        labs(x = substitute(id),
             y = substitute(group),
             fill = substitute(value)) +
        scale_fill_gradient2(low = "white", high = "firebrick",
                             limits = c(0, scale_max),
                             breaks = c(0, scale_max/4, scale_max/2, 0.75 * scale_max, scale_max),
                             labels = c(0, scale_max/4, scale_max/2, 0.75 * scale_max, paste0(scale_max, "+")))

  }

  if (!value_eval & !is.na(scale_max)) {
    stop("scale_max is not valid when using value = normalize")
  }

  return(plot)

}

#' Compare AME heatmap methods
#'
#' This helper function allows the user to visualize the distribution of their
#' AME results data on different scales to help understand the implications of
#' using different values in `plot_ame_heatmap()`
#'
#' @param ame ame results data.frame
#' @param group optional name of group to split results by
#' @param value value to compare to "normalize" method (default:
#'   -log10(adj.pvalue))
#'
#' @return a cowplot 2 panel plot comparing the distribution of `value` to
#'   normalized rank values
#' @export
#'
#' @importFrom rlang enquo
#' @importFrom ggplot2 ggplot stat_ecdf scale_x_continuous labs theme_bw aes
#'   theme
#'
#' @examples
#' data("example_ame", package = "memes")
#' ame_compare_heatmap_methods(example_ame$Decreasing)
#' 
#' ame_compare_heatmap_methods(dplyr::bind_rows(example_ame, .id = "type"), type)
ame_compare_heatmap_methods <- function(ame, group, value = -log10(adj.pvalue)){

  group <- rlang::enquo(group)
  value <- rlang::enquo(value)

  if (rlang::quo_name(group) == ""){
    stat_plot <- stat_ecdf(size = 1, pad = FALSE)
    rel_widths <- c(1, 1)
  } else {
    stat_plot <- stat_ecdf(aes(color = !!group), size = 1, pad = FALSE)
    # Add extra padding for the legend
    rel_widths <- c(1, 1.25)
  }

  value_dist <- ame %>%
    ggplot(aes(!!value)) +
      stat_plot +
      labs(y = "Fraction of Motifs") +
      theme_bw() +
      theme(legend.position = "none") +
      labs(title = paste0("value = ", rlang::as_label(value)))

  normrank_dist <- ame %>%
    ggplot(aes(rank_normalize(rank))) +
      stat_plot +
      scale_x_continuous(
                         breaks = c(0, 0.25, 0.5, 0.75, 1),
                         labels = c("0\n(High)", 0.25, 0.5, 0.75, "1\n(Low)"),
                         ) +
      labs(title = "value = \"normalize\"",
           y = NULL,
           x = "Normalized Rank")  +
      theme_bw()

  cowplot::plot_grid(value_dist,
                     normrank_dist,
                     rel_widths = rel_widths,
                     labels = "AUTO")

}
snystrom/memes documentation built on Feb. 6, 2023, 2:57 a.m.