#' Plot multiple events or genes as a heatmap (experimental)
#'
#' Visualize multiple PSI or cRPKM values in a single plot.
#'
#' @details
#' By default, \code{\link[ggplot2]{geom_tile}} is used to generate a heatmap
#' and a \code{ggplot2} object is returned. Alternatively, other heatmap
#' packages are supported including: \code{\link[pheatmap]{pheatmap}}
#' (use \code{usepkgs = "pheatmap"}) and
#' \code{\link[gplots]{heatmap.2}} (use \code{usepkgs = "gplots"}).
#'
#' Input is similar to \code{\link{plot_event}}, \code{\link{plot_expr}}, or
#' \code{\link{plot_multievent}}. Subgrouping of samples is also supported (see
#' \code{\link{plot_event}} and \code{\link{preprocess_sample_colors}} for details
#' on subgrouping). If subgroups are activated, the PSI for each subgroup is taken
#' as the average of all the samples in the subgroup.
#'
#' If \code{cluster_rows = TRUE}, then a hierarchical clustering using \code{hclust}
#' will be performed on a distance matrix computed by \code{dist}. If \code{config}
#' is not specified, then the samples will also be clustered.
#'
#' To set the colours, use \code{fill} option to specify vector of colours.
#' By default (\code{fill=NULL}), PSI uses a yellow/blue gradient, while cRPKMs
#' uses the "YlOrRd" color brewer palette.
#'
#' @param df A data frame of input values (PSI or cRPKM). If the latter, need to
#' set \code{expr = TRUE}.
#' @param config Optional configuration settings. Can be
#' a path to the \code{.config} file, or 4/5-column data frame of the \code{.config}
#' file.
#' @param subg Logical indicating whether samples should be subgrouped for plotting.
#' @param expr Logical - \code{TRUE} if plotting cRPKMs, \code{FALSE} otherwise
#' @param counts Logical indicating whether the data frame contains read counts.
#' Set to \code{TRUE} if the data frame contains two rows per sample (cRPKM and
#' counts), otherwise leave as \code{FALSE} (default).
#' @param trim_colnames String that must be searched for and trimmed at the end
#' of every sample column in x. Useful to trim the "-cRPKM" suffix from expression
#' tables. If no string must be trimmed, leave as \code{FALSE}.
#' @param qual String indicating the minimun \emph{vast-tools} quality score
#' for the PSI to be accepted. Defaults to \code{'VLOW'}. See the
#' \href{https://github.com/vastgroup/vast-tools/blob/master/README.md}{vast-tools
#' documentation} for details.
#' @param xlab The x-axis label
#' @param ylab The y-axis label
#' @param title Title of the plot
#' @param cluster_rows Logical to cluster rows using hierarchical clustering
#' @param cluster_cols Logical to cluster columns using hierarchical clustering
#' @param fill A vector of colours. e.g. from \code{colorRampPalette}.
#' Default is \code{NULL}, which will choose the palette automatically.
#' @param usepkg Default is \code{ggplot2}, which creates the heatmap using
#' \code{\link[ggplot2]{geom_tile}}. Otherwise, use \code{gplots}, which calls
#' \code{\link[gplots]{heatmap.2}}.
#' @param ... Additional parameters passed to \code{\link[gplots]{heatmap.2}} or
#' \code{\link[pheatmap]{pheatmap}}.
#' @export
#' @import ggplot2
#' @import dplyr
#' @import tidyr
#' @importFrom grDevices colorRampPalette
#' @importFrom stats dist
#' @importFrom stats hclust
#' @examples
#' # Uses ggplot2 by default
#' plot_multi(psi)
#' plot_multi(psi, config = config)
#'
#' # Use expr = TRUE for cRPKMs
#' plot_multi(crpkm, expr = TRUE)
#' plot_multi(crpkm, config = config, expr = TRUE)
#' plot_multi(crpkm, config = config, expr = TRUE, cluster_rows = TRUE)
#'
#' # To use pheatmap or gplots for plotting, set usepgk option
#' plot_multi(psi, config = config, usepkg = "pheatmap")
#'
#' # To enable sample subgroups, set subg = TRUE
#' plot_multi(psi, config = config, subg = TRUE)
#'
#' # Working with expression tables with read counts and suffixes, enabling subgroups
#' plot_multi(crpkm_counts, config = config, expr = TRUE, trim_colnames = "-cRPKM", counts = TRUE)
plot_multi <- function(df, config = NULL, subg = FALSE, expr = FALSE,
trim_colnames = FALSE, qual = c("VLOW","N","LOW","OK","SOK"),
counts = FALSE, xlab = "", ylab = "",
title = "", cluster_rows = TRUE,
cluster_cols = ifelse(is.null(config), TRUE, FALSE),
fill = NULL, usepkg = c("ggplot2","gplots","pheatmap"),
... ) {
qual = match.arg(qual)
usepkg = match.arg(usepkg)
# Format input
formatted_df <- format_table(df,
qual = qual,
expr = expr,
counts = counts,
trim_colnames = trim_colnames,
short_ids = FALSE)
if (expr == FALSE) {
formatted_df$ID <- make_title.2(df$GENE, df$EVENT)
}
reordered <- preprocess_sample_colors(formatted_df,
config = config,
subg = subg,
expr = expr)
psi <- reordered$data %>%
mutate(ID=formatted_df$ID) %>%
dplyr::select(ID,colnames(reordered$data))
mdata <- psi %>%
group_by(ID) %>%
gather(key="SampleName",
-ID,
value="value")
#Determine if samples must be subgrouped
subg <- all(c(subg==TRUE,
"SubgroupName" %in% colnames(reordered$original_config)))
#Do subgroups if subg==T, use the samples as subgroups if subg==FALSE.
#This part could be a separate function, called also from plot_multievent.
if(subg){
sm <- left_join(mdata,reordered$subgroup,by="SampleName")
smsum <- sm %>%
dplyr::group_by(ID,SubgroupName) %>%
dplyr::summarise(value=mean(value,na.rm=T)) %>%
mutate(value=replace(value,is.na(value),NA)) %>%
dplyr::select(ID,SubgroupName,value)
} else{
sm <- left_join(mdata,reordered$subgroup,by="SampleName")
smsum <- sm %>%
dplyr::select(ID,SubgroupName,value)
}
smsum <- left_join(reordered$subgroup_order,smsum,by="SubgroupName") %>%
dplyr::arrange(ID,SubgroupOrder)
smsum <- left_join(smsum,reordered$group,by="SubgroupName") %>%
left_join(reordered$group_order,by="GroupName") %>%
arrange(ID,SubgroupOrder) %>%
group_by(ID) %>%
mutate(SubgroupName=factor(SubgroupName,levels=unique(SubgroupName))) %>%
arrange(ID,GroupOrder) %>%
mutate(GroupName=factor(GroupName,levels=unique(GroupName))) %>%
dplyr::select(ID,
Order=SubgroupOrder,
Sample=SubgroupName,
colnames(smsum),
GroupName,
RColorCode)
heatmap_data <- smsum %>%
dplyr::select(ID,Sample,value) %>%
spread(key=Sample,value=value) %>%
as.data.frame()
rownames(heatmap_data) <- heatmap_data$ID
heatmap_data <- subset(heatmap_data, select=(-ID))
if (is.null(fill)) {
if (expr) {
# Use RColorBrewer::brewer.pal(6, "YlOrRd")
fill <- c("#FFFFB2", "#FED976", "#FEB24C", "#FD8D3C", "#F03B20", "#BD0026")
} else {
fill <- colorRampPalette(c("yellow", "blue"))(20)
}
}
if (usepkg == "gplots" && requireNamespace("gplots", quietly = TRUE)) {
#Make heatmap with gplots
#Preformat data for gplots::heatmap.2
col_colors <- smsum %>%
ungroup() %>%
dplyr::filter(ID==smsum$ID[1]) %>%
pull(RColorCode)
dendro <- "none"
if (cluster_cols && cluster_rows) {
dendro <- "both"
} else if (cluster_cols) {
dendro <- "column"
} else if (cluster_rows) {
dendro <- "row"
}
# Not ideal, but need two function calls if we want option to have
# ColSideColors or not. Seems like heatmap.2 can't take an empty
# ColSideColors -- you have to specify something or don't use it at all.
if (!is.null(reordered$original_config)) {
gplots::heatmap.2(as.matrix(heatmap_data),
Colv = cluster_cols, Rowv = cluster_rows,
dendrogram = dendro,
ColSideColors = col_colors,
col = fill,
margins = c(10, 35),
trace = "none",
key.xlab = ifelse(expr, "cRPKM", "PSI"),
xlab = xlab, ylab = ylab,
main = title,
...
)
} else {
gplots::heatmap.2(as.matrix(reordered$data),
Colv = cluster_cols, Rowv = cluster_rows,
dendrogram = dendro,
col = fill,
margins = c(10, 35),
trace = "none",
key.xlab = ifelse(expr, "cRPKM", "PSI"),
xlab = xlab, ylab = ylab,
main = title,
...
)
}
} else if (usepkg == "pheatmap" && requireNamespace("pheatmap", quietly = TRUE)) {
if(!is.null(reordered$original_config)){
smsum1 <- smsum %>%
ungroup() %>%
dplyr::filter(ID==smsum$ID[1])
col_colors <- smsum1 %>% pull(RColorCode)
names(col_colors) <- smsum1 %>% pull(Sample)
anno_colors <- reordered$group_order %>% pull(RColorCode)
names(anno_colors) <- reordered$group_order %>% pull(GroupName)
anno_colors <- list(Group=anno_colors)
anno_col <- smsum1 %>% select(Group=GroupName) %>% as.data.frame()
rownames(anno_col) <- smsum1 %>% pull(Sample)
} else{
col_colors <- NA
anno_colors <- NA
anno_col <- NA
}
pheatmap::pheatmap(as.matrix(heatmap_data),
cluster_rows = cluster_rows,
cluster_cols = cluster_cols,
main = title,
col = fill,
annotation_colors = anno_colors,
annotation_col = anno_col,
...
)
} else {
#We go with ggplot2
if (cluster_rows) {
# Perform hierarchical clustering of events/genes
hr <- hclust(dist(heatmap_data))
heatmap_data <- heatmap_data[hr$order,]
}
if (is.null(reordered$original_config)) {
hc <- hclust(dist(t(heatmap_data)))
heatmap_data <- heatmap_data[,hc$order]
}
heatmap_data$id <- rownames(heatmap_data)
m <- gather(heatmap_data,key="variable",value="value",-id)
m$variable <- factor(m$variable,
levels = colnames(subset(heatmap_data, select = -id)),
ordered = TRUE)
m$id <- factor(m$id, levels = unique(m$id), ordered = TRUE)
gp <- ggplot(m, aes(x = variable, y = id)) +
geom_tile(aes(fill = value)) +
theme_bw() +
theme(axis.text.x = element_text(angle= 45, hjust = 1, size = 9),
axis.text.y = element_text(size = 8),
panel.grid = element_blank(),
panel.border = element_blank()) +
xlab(xlab) + ylab(ylab) + ggtitle(title) +
coord_fixed(ratio = 1)
gp <- gp + scale_fill_gradientn(colours = fill, na.value = "white",
name = ifelse(expr == FALSE, "PSI", "cRPKM"))
return(gp)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.