# Combines two matrices
coalesce <- function(...) {
x <- lapply(list(...), function(z) {
if (is.factor(z)) as.character(z) else z
})
m <- is.na(x[[1]])
i <- 2
while (any(m) && i <= length(x)) {
if (length(x[[i]]) == length(x[[1]])) {
x[[1]][m] <- x[[i]][m]
} else if (length(x[[i]]) == 1) {
x[[1]][m] <- x[[i]]
} else {
stop(paste("length mismatch in argument", i, " - found:", length(x[[i]]), "expected:", length(x[[1]])))
}
m <- is.na(x[[1]])
i <- i + 1
}
return(x[[1]])
}
#' Draw a heatmap of effects between variables, such as correlations
#'
#' Draws a heatmap of e.g. correlations between variables (see perform_correlation_tests).
#' It is possible to draw only the lower triangular of the heatmap, order rows and columns
#' with hierarchical clustering, and add circles for p-values.
#'
#' @param data a data frame with x and y variables and the effect (used to fill the tiles)
#' @param x,y the column names of data with the x and y variables
#' @param effect the column name of the effect, e.g. correlation
#' @param p optional, the column name with p-values. If provided, points that scale by p-value are drawn
#' on top of the heatmap tiles
#' @param p_limit numeric, only p-values below the limit are plotted as points
#' @param point_size_range a numeric vector of length 2. The upper and lower limits for the point sizes.
#' This needs to be adjusted to make the point size look good when compared to the tiles
#' @param log2_effect logical, whether the effect should be plotted on a logarithmic scale (in case of fold change etc.)
#' @param discretize_effect logical, whether the effect range should be divided into discrete levels instead of using
#' a continuous scale. Can sometimes make patterns more visible, but the hard limits can blur the big picture as well.
#' @param breaks if \code{discretize_effect = TRUE}, either the number of breaks or
#' the points where to cut for the levels, see \code{\link{cut}}
#' @param clustering logical, whether the order of rows and columns should be ordered by hierarchical clustering?
#' @param dist_method distance method used in clustering, see \code{\link{dist}}
#' @param clust_method clustering method used in clustering, see \code{\link{hclust}}
#' @param lower_tri logical, should only the lower triangular be plotted?
#' @param reverse_y logical, if \code{clustering = FALSE, lower_tri = FALSE}, should the order of the y-axis
#' be reversed so that the diagonal is from top left to bottom right?
#' @param use_coord_fixed logical, should the heatmap tiles be squares?
#' If yes, this uses \code{\link[ggplot2]{coord_fixed}}
#' @param symmetric_aspect_ratio logical, should the plot panel be a square?
#' If yes, uses ggplot2::theme(aspect.ratio = 1).
#' @param title,subtitle the title and subtitle of the plot
#' @param fill_scale fill scale for the heatmap as returned by a ggplot function.
#' Set to NA to choose the appropriate scale based on the class of the effect variable.
#'
#' @return a ggplot object
#'
#' @details All missing effects between variables are replaced by 0 before clustering,
#' since \code{hclust} can't deal with missing values.
#'
#' @seealso \code{\link{cut}} for discretizing the effect, \code{\link{dist}} for distance calculation for clustering,
#' \code{\link{hclust}} for hierarchical clustering.
#'
#' @examples
#' # Compute correlations between variables
#' correlations <- perform_correlation_tests(example_set,
#' x = featureNames(example_set),
#' duplicates = TRUE
#' )
#'
#' # Minimal example
#' plot_effect_heatmap(correlations, x = "X", y = "Y", effect = "Correlation_coefficient")
#'
#' # Lower triangular with discrete effect and p-value dots
#' plot_effect_heatmap(correlations,
#' x = "X", y = "Y", effect = "Correlation_coefficient",
#' p = "Correlation_P", point_size_range = c(2, 8),
#' discretize_effect = TRUE, breaks = 7, lower_tri = TRUE
#' )
#'
#' @export
plot_effect_heatmap <- function(data, x, y, effect, p = NULL, p_limit = 0.1, point_size_range = c(1, 6),
log2_effect = FALSE, discretize_effect = FALSE, breaks = 5,
clustering = TRUE, dist_method = "euclidean", clust_method = "ward.D2",
lower_tri = FALSE, reverse_y = TRUE,
use_coord_fixed = TRUE, symmetric_aspect_ratio = TRUE,
title = NULL, subtitle = NULL, fill_scale = NA) {
# Get default fill scales
if (!is.null(fill_scale)) {
if (is.na(fill_scale)) {
if (discretize_effect || class(data[, effect]) %in% c("factor", "character")) {
fill_scale <- getOption("notame.fill_scale_div_dis")
} else {
fill_scale <- getOption("notame.fill_scale_div_con")
}
}
}
# Possible log-transform effect, should show on legend
legend_label <- effect
if (log2_effect) {
data[, effect] <- log2(data[, effect])
legend_label <- paste0("log2(", effect, ")")
}
if (lower_tri) {
# Clustering is handled inside to_lowertri
data <- to_lowertri(data, x, y, effect, clustering, clust_method, dist_method)
} else if (clustering) {
data <- hclust_effects(data, x, y, effect, clust_method, dist_method)
} else if (reverse_y) {
# Reverse order of y axis so diagonal is from top left to bottom right
data[y] <- factor(data[, y], levels = rev(levels(factor(data[, y]))))
}
if (discretize_effect) {
data[effect] <- cut(data[, effect],
breaks = breaks,
dig.lab = 2
)
data[effect] <- factor(data[, effect], levels = rev(levels(data[, effect])))
}
ggp <- ggplot(data, aes(x = .data[[x]], y = .data[[y]], fill = .data[[effect]])) +
geom_tile() +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90)
) +
labs(
x = "", y = "", fill = legend_label,
title = title, subtitle = subtitle
) +
fill_scale
if (use_coord_fixed) {
ggp <- ggp +
coord_fixed()
}
if (symmetric_aspect_ratio) {
ggp <- ggp +
theme(aspect.ratio = 1)
}
if (!is.null(p)) {
small_p <- data[data[, p] < p_limit, ]
small_p[p] <- -log10(small_p[, p])
ggp <- ggp +
geom_point(aes(size = .data[[p]]),
data = small_p,
colour = "grey10", fill = "grey30", alpha = 0.3, shape = 21
) +
scale_size(name = "-log10(p-value)", range = point_size_range)
}
ggp
}
# Helper function to cluster effects without setting to lower triangular
hclust_effects <- function(data, x, y, effect, clust_method, dist_method) {
# Convert to wide format matrix for clustering
data_wide <- data %>%
dplyr::select(x, y, effect) %>%
tidyr::spread(y, effect) %>%
dplyr::filter(!is.na(x)) %>%
tibble::column_to_rownames(x) %>%
as.matrix()
data_wide[is.na(data_wide)] <- 0
# Separate clustering for y and x
x_order <- hclust(dist(data_wide, method = dist_method), method = clust_method)$order
y_order <- hclust(dist(t(data_wide), method = dist_method), method = clust_method)$order
data_wide <- data_wide[x_order, y_order]
data[x] <- factor(data[, x], levels = rev(rownames(data_wide)))
data[y] <- factor(data[, y], levels = colnames(data_wide))
data
}
# Converts data to only include the lower triangular
to_lowertri <- function(data, x, y, effect, clustering, clust_method, dist_method) {
# Rename columns for simplicity
data <- data %>% dplyr::rename("x" = x, "y" = y, "effect" = effect)
dat <- data[c("x", "y", "effect")]
dat$x <- as.character(dat$x)
dat$y <- as.character(dat$y)
# This makes sure all the variables will be included in both axes
vars <- unique(c(dat$x, dat$y))
x_missing <- setdiff(vars, dat$x)
y_missing <- setdiff(vars, dat$y)
append_len <- max(length(x_missing), length(y_missing))
if (append_len) {
append_df <- data.frame(
x = c(x_missing, rep(NA, append_len - length(x_missing))),
y = c(y_missing, rep(NA, append_len - length(y_missing))),
effect = rep(NA, append_len)
)
colnames(append_df) <- colnames(dat)
dat <- rbind(dat, append_df)
}
# Converting data into wide format and tidying data
dat_w <- dat %>%
tidyr::spread(y, effect) %>%
dplyr::filter(!is.na(x))
# Move column x to rownames
dat_w <- tibble::column_to_rownames(dat_w, "x")
dat_w <- dat_w[rownames(dat_w) != "NA", !colnames(dat_w) %in% c("NA", "<NA>")] # one of the columns or rows is named NA # nolint: line_length_linter.
dat_w <- dat_w[rev(order(names(dat_w))), rev(order(names(dat_w)))] # this makes the image lie on the lower triangular
# dat_w only has one-directional interactions
# for example, metformine vs morphine = 1.09, but morphine vs metformine = NA
# transpose values are added to make the matrix symmetrical
dat_w_t <- data.frame(t(dat_w))
dat_w_whole <- coalesce(dat_w, dat_w_t)
if (clustering) {
data_w_zeros <- dat_w_whole
data_w_zeros[is.na(data_w_zeros)] <- 0
hc <- hclust(dist(data_w_zeros, dist_method), method = clust_method)
dat_w_whole <- dat_w_whole[hc$order, hc$order]
dat_w_whole
}
# Only half of the associations are needed for plotting
dat_w_whole[upper.tri(dat_w_whole)] <- NA
# Diagonal should be included in the plot
# Melt back to long format for ggplot2
dat_w_whole$x <- rownames(dat_w_whole)
dat_l <- tidyr::gather(dat_w_whole, y, effect, -x)
dat_l$y <- as.character(dat_l$y)
# The order of Variable1 and 2 has changed for some associations, so two joins are required
combined1 <- dplyr::inner_join(dat_l, data, by = c("x", "y", "effect"))
combined2 <- dplyr::inner_join(dat_l, data, by = c("x" = "y", "y" = "x", "effect"))
dat_l <- rbind(combined1, combined2) %>%
dplyr::distinct() # Remove duplicated associations with same x and y
# Setting the factor levels to correctly draw the heatmap
# This ensures the tiles are plotted in correct order to make a lower triangular heat map
dat_l$x <- dat_l$x %>%
factor(levels = rev(rownames(dat_w_whole)))
dat_l$y <- dat_l$y %>%
factor(levels = rownames(dat_w_whole))
colnames(dat_l)[colnames(dat_l) == "x"] <- x
colnames(dat_l)[colnames(dat_l) == "y"] <- y
colnames(dat_l)[colnames(dat_l) == "effect"] <- effect
dat_l
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.