#' Order AME results by clusters
#'
#' Reorders ame results from one or more runs for plotting heatmap showing
#' unique/shared motifs across groups.
#'
#' @param ame ame results data.frame
#' @param id column of motif ID's to use. (default: `motif_id`)
#' @param group column to group samples by. To control order, this column must
#' be a factor.
#'
#' @return
#'
#' @importFrom magrittr %>%
#' @importFrom rlang enquo
#' @importFrom rlang !!
#' @importFrom rlang .data
#' @importFrom tibble rowid_to_column
#'
#' @noRd
ame_order_by_cluster <- function(ame, id = motif_id, group = NULL, name = NULL){
# orders data in "order" column first by TFs unique to each type,
# then by motifs shared between types such that tfs are shown by unique,
# pairwise, 3-wise, etc. starting from the first type upwards.
# consider a factor: F with 3 levels (j), and 6 rows (i)
# for a heatmap of the following:
#
# i6 | . 2 3
# i5 | 1 . 3
# i4 | 1 2 .
# i3 | . . 3
# i2 | . 2 .
# i1 | 1 . .
# |___________
# j1 j2 j3
#
# We can compute the rank
# by counting the number of entries for each i (ex i1 = 1, i5 = 2)
# we can rank order by j-pairwise clusters by ranking by
# count(F), min(F), max(F) in that order.
# (ex i4 = c(2,1,2), while i5 = c(2,1,3))
#
# where F is implemented below as type_rank
#
# NOTE: I'm pretty sure this problem could be better solved using a binary
# matrix & heirarchical clustering, but this works fine in my tests. If it's
# not performant, consider revising.
group <- enquo(group)
id <- enquo(id)
if (rlang::quo_is_null(group)){
# thank you:
# https://rpubs.com/tjmahr/quo_is_missing
if (is.null(name)){name <- "All Regions"}
res <- ame %>%
dplyr::mutate(type = factor(name)) %>%
tibble::rowid_to_column("order")
return(res)
}
ame %>%
dplyr::mutate(type = factor(!!group),
type_rank = as.integer(.data$type)) %>%
dplyr::group_by(!!id) %>%
dplyr::mutate(nType = dplyr::n(),
minType = min(.data$type_rank),
maxType = max(.data$type_rank)) %>%
dplyr::ungroup() %>%
dplyr::arrange(!!sym("nType"), !!sym("type_rank"),
!!sym("minType"), !!sym("maxType")) %>%
tibble::rowid_to_column(var = "order")
}
#' Plot AME heatmap clustered by similarity in detected motifs
#'
#' @param ame ame results data.frame
#' @param id column of motif ids to use (default: motif_id).
#' @param group grouping column if comparing across multiple ame runs (optional,
#' default: NULL).
#' @param value value to display as heatmap intensity. Default:
#' -log10(adj.pvalue). Takes function or column name as input. If set to
#' "normalize", will use normalized rank within `group` as the heatmap values.
#' **If in doubt**, prefer the -log10(adj.pvalue) plot potentially in
#' conjunction with adjusting `scale_max`. (See "Normalized rank
#' visualization" section below for more details on how to interpret these
#' data)
#'
#' @details Normalized rank visualization
#' **NOTE:** The normalized rank visualization eliminates all real values
#' related to statistical significance! Instead, this visualization represents
#' the relative ranks of hits within an AME run, which already pass a
#' significance threshold set during `runAME()`. This means that even if several
#' motifs have similar or even identical pvalues, their heatmap representation
#' will be a different color value based on their ranked order in the results
#' list. This also means that using the normalized rank visualization will
#' be misleading if there are only a few AME hits; it is only worth using if the
#' number of hits is very large (>100). Both visualizations can be useful and
#' reveal different properties of the data to the user during data
#' exploration. Use `ame_compare_heatmap_methods()` to help assess
#' differences in the two visualizations. **If in doubt**, prefer the
#' `-log10(adj.pvalue)` representation.
#'
#' @param group_name when group = NULL, name to use for input regions. Ignored
#' if group is set.
#' @param scale_max max heatmap value to limit upper-value of scale. Useful if
#' distribution of `value`s vary greatly between groups. Usually a better to
#' tweak this option than to use value = "normalize". The cumulative
#' distribution plot generated by `ame_compare_heatmap_methods()` can be
#' useful for selecting this value, try to pick a value which captures the
#' largest fraction of hits across all groups while excluding outliers.
#'
#' @details
#' Common mistake: if `value` is set to a string that is not "normalize", it
#' will return: "Error: Discrete value supplied to continuous scale". To use a
#' column by name, do not quote the column name.
#'
#' @return `ggplot` object
#' @export
#'
#' @importFrom ggplot2 ggplot geom_tile theme aes theme_bw labs
#' scale_fill_gradient2 element_text .pt
#' @importFrom ggplot2 ggplot scale_fill_continuous
#' @importFrom magrittr %>%
#' @importFrom rlang enquo .data
#' @importFrom rlang !!
#' @importFrom stats reorder
#'
#' @examples
#' data("example_ame", package = "memes")
#'
#' # Plot a single category heatmap
#' plot_ame_heatmap(example_ame$Decreasing)
#'
#' # Plot a multi category heatmap
#' grouped_ame <- dplyr::bind_rows(example_ame, .id = "category")
#' plot_ame_heatmap(grouped_ame, group = category)
plot_ame_heatmap <- function(ame, id = motif_id, group = NULL, value = -log10(adj.pvalue), group_name = NULL, scale_max = NA){
id <- enquo(id)
group <- enquo(group)
value <- enquo(value)
# Only order by group if group is set
if (rlang::quo_is_null(group)){
res <- ame %>%
ame_order_by_cluster(id = id, group = NULL, name = group_name)
} else {
res <- ame %>%
ame_order_by_cluster(id = !!id, group = !!group)
}
# ggplot theme for ame heatmap
heatmap_theme <- theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
axis.text = element_text(color = "black",
size = 34 / .pt))
# Check whether value is set to "normalize" (quotes important)
# have to do as_label because as_name errors w/ expressions
value_eval <- rlang::as_label(value) != "\"normalize\""
if (value_eval){
# Default behavior is to do tidyeval on value
plot <- res %>%
ggplot(aes(reorder(!!id, order), as.factor(.data$type))) +
geom_tile(aes(fill = !!value), color = 'black', size = 0.3) +
heatmap_theme +
labs(x = substitute(id),
y = substitute(group),
fill = substitute(value)) +
scale_fill_gradient2(low = "white",
high = "firebrick")
} else {
# Otherwise use normalized rank
plot <- res %>%
dplyr::group_by(!!group) %>%
dplyr::mutate(norm_rank = rank_normalize(rank)) %>%
ggplot(aes(stats::reorder(!!id, order), as.factor(.data$type))) +
geom_tile(aes(fill = .data$norm_rank), color = 'black', size = 0.3) +
heatmap_theme +
labs(x = substitute(id),
y = substitute(group),
fill = "Normalized Rank") +
scale_fill_continuous(low = "firebrick",
high = "white",
breaks = c(0, 1),
labels = c("High", "Low"))
}
# Couldn't come up with a clever way to do this in conjunction with above logic,
# so just handle scale_max plotting separately
if (!is.na(scale_max) & value_eval) {
plot <- res %>%
dplyr::mutate("scale_data" = ifelse(!!value > scale_max, scale_max, !!value)) %>%
ggplot(aes(reorder(!!id, order), as.factor(.data$type))) +
geom_tile(aes(fill = .data$scale_data), color = 'black', size = 0.3) +
heatmap_theme +
labs(x = substitute(id),
y = substitute(group),
fill = substitute(value)) +
scale_fill_gradient2(low = "white", high = "firebrick",
limits = c(0, scale_max),
breaks = c(0, scale_max/4, scale_max/2, 0.75 * scale_max, scale_max),
labels = c(0, scale_max/4, scale_max/2, 0.75 * scale_max, paste0(scale_max, "+")))
}
if (!value_eval & !is.na(scale_max)) {
stop("scale_max is not valid when using value = normalize")
}
return(plot)
}
#' Compare AME heatmap methods
#'
#' This helper function allows the user to visualize the distribution of their
#' AME results data on different scales to help understand the implications of
#' using different values in `plot_ame_heatmap()`
#'
#' @param ame ame results data.frame
#' @param group optional name of group to split results by
#' @param value value to compare to "normalize" method (default:
#' -log10(adj.pvalue))
#'
#' @return a cowplot 2 panel plot comparing the distribution of `value` to
#' normalized rank values
#' @export
#'
#' @importFrom rlang enquo
#' @importFrom ggplot2 ggplot stat_ecdf scale_x_continuous labs theme_bw aes
#' theme
#'
#' @examples
#' data("example_ame", package = "memes")
#' ame_compare_heatmap_methods(example_ame$Decreasing)
#'
#' ame_compare_heatmap_methods(dplyr::bind_rows(example_ame, .id = "type"), type)
ame_compare_heatmap_methods <- function(ame, group, value = -log10(adj.pvalue)){
group <- rlang::enquo(group)
value <- rlang::enquo(value)
if (rlang::quo_name(group) == ""){
stat_plot <- stat_ecdf(size = 1, pad = FALSE)
rel_widths <- c(1, 1)
} else {
stat_plot <- stat_ecdf(aes(color = !!group), size = 1, pad = FALSE)
# Add extra padding for the legend
rel_widths <- c(1, 1.25)
}
value_dist <- ame %>%
ggplot(aes(!!value)) +
stat_plot +
labs(y = "Fraction of Motifs") +
theme_bw() +
theme(legend.position = "none") +
labs(title = paste0("value = ", rlang::as_label(value)))
normrank_dist <- ame %>%
ggplot(aes(rank_normalize(rank))) +
stat_plot +
scale_x_continuous(
breaks = c(0, 0.25, 0.5, 0.75, 1),
labels = c("0\n(High)", 0.25, 0.5, 0.75, "1\n(Low)"),
) +
labs(title = "value = \"normalize\"",
y = NULL,
x = "Normalized Rank") +
theme_bw()
cowplot::plot_grid(value_dist,
normrank_dist,
rel_widths = rel_widths,
labels = "AUTO")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.