#' @import ggplot2
#' @importFrom rlang .data
#' @import dplyr
#' @import tidyr
#' @import tidyverse
NULL
#' Plot distribution of sample counts
#'
#' Displays the proportion of counts for each mutation type across one
#' or more samples.
#'
#' @param musica A \code{\linkS4class{musica}} object.
#' @param sample_names Names of the samples to plot.
#' @param table_name Name of table used for plotting counts. If \code{NULL},
#' then the first table in the \code{\linkS4class{musica}} object will be used.
#' Default \code{NULL}.
#' @return Generates a ggplot object
#' @examples
#' data(musica_sbs96)
#' plot_sample_counts(musica_sbs96, sample_names =
#' sample_names(musica_sbs96)[1])
#' @export
plot_sample_counts <- function(musica, sample_names, table_name = NULL) {
if (is.null(table_name)) {
table_name <- names(tables(musica))[1]
}
# Extract counts for specific samples
tab <- .extract_count_table(musica, table_name)
ix <- match(sample_names, colnames(tab))
if (all(is.na(ix))) {
stop("The values in 'sample_names' did not match any sample IDs in table '",
table_name, "'.")
}
else if (anyNA(ix)) {
warning("The following samples in 'sample_names' were not found in ",
"table '", table_name,
"' and will ", "be exlcuded from the plot: ",
paste(sample_names[is.na(ix)], collapse = ", "))
ix <- ix[!is.na(ix)]
}
sample_counts <- tab[, ix, drop = FALSE]
result <- methods::new("musica_result",
signatures = sample_counts, exposures = matrix(),
algorithm = "sample", musica = musica,
table_name = table_name)
g <- plot_signatures(result) + ggplot2::ylab("Mutation Counts")
return(g)
}
#' Plots the mutational signatures
#'
#' After mutational signature discovery has been performed, this function
#' can be used to display the distribution of each mutational signature. The
#' \code{color_variable} and \code{color_mapping} parameters can be used
#' to change the default color scheme of the bars.
#'
#' @param result A \code{\linkS4class{musica_result}} object generated by
#' a mutational discovery or prediction tool.
#' @param plotly If \code{TRUE}, the the plot will be made interactive
#' using \code{\link[plotly]{plotly}}. Default \code{FALSE}.
#' @param color_variable Name of the column in the variant annotation data.frame
#' to use for coloring the mutation type bars. The variant annotation data.frame
#' can be found within the count table of the \code{\linkS4class{musica}}
#' object. If \code{NULL}, then the default column specified in the count
#' table will be used. Default \code{NULL}.
#' @param color_mapping A character vector used to map items in the
#' \code{color_variable} to a color. The items in \code{color_mapping}
#' correspond to the colors. The names of the items in \code{color_mapping}
#' should correspond to the uniqeu items in \code{color_variable}. If
#' \code{NULL}, then the default \code{color_mapping} specified in the count
#' table will be used. Default \code{NULL}.
#' @param text_size Size of axis text. Default \code{10}.
#' @param show_x_labels If \code{TRUE}, the labels for the mutation types
#' on the x-axis will be shown. Default \code{TRUE}.
#' @param show_y_labels If \code{TRUE}, the y-axis ticks and labels will be
#' shown. Default \code{TRUE}.
#' @param same_scale If \code{TRUE}, the scale of the probability for each
#' signature will be the same. If \code{FALSE}, then the scale of the y-axis
#' will be adjusted for each signature. Default \code{TRUE}.
#' @param y_max Vector of maximum y-axis limits for each signature. One value
#' may also be provided to specify a constant y-axis limit for all signatures.
#' Vector length must be 1 or equivalent to the number of signatures. Default
#' \code{NULL}.
#' @param annotation Vector of annotations to be displayed in the top right
#' corner of each signature. Vector length must be equivalent to the number of
#' signatures. Default \code{NULL}.
#' @return Generates a ggplot or plotly object
#' @examples
#' data(res)
#' plot_signatures(res)
#' @export
plot_signatures <- function(result, plotly = FALSE,
color_variable = NULL, color_mapping = NULL,
text_size = 10,
show_x_labels = TRUE, show_y_labels = TRUE,
same_scale = TRUE, y_max = NULL, annotation = NULL) {
loc_num <- NULL
mutation_color <- NULL
x <- NULL
xend <- NULL
y <- NULL
yend <- NULL
signatures <- signatures(result)
sig_names <- colnames(signatures)
table_name <- table_selected(result)
tab <- tables(result)[[table_name]]
annot <- get_annot_tab(tab)
num_sigs <- length(sig_names)
if (is.null(color_mapping)) {
color_mapping <- get_color_mapping(tab)
}
plot_dat <- .pivot_signatures(signatures, tab,
color_variable = color_variable)
width <- 0.45
motif_label_locations <- plot_dat$df[plot_dat$df$signature == plot_dat$df[1,2],] %>%
ungroup() %>%
mutate(loc_num = c(1:dim(plot_dat$df[plot_dat$df$signature == plot_dat$df[1,2],])[1])) %>%
group_by(mutation_color) %>%
summarise(x = min(loc_num) - width, xend = max(loc_num) + width,
y = 0, yend = 0.01)
# Whether to re-scale y axis
scales <- ifelse(isTRUE(same_scale), "fixed", "free_y")
# If annotation supplied
if (!is.null(annotation)){
annotation_text <- data.frame(
label = annotation,
signature = names(plot_dat$names),
mutation_color = c("T>G")
)
}
# Rename signature labels
sig_name_labels <- data.frame(
label = sig_names,
signature = names(plot_dat$names),
mutation_color = c("C>A")
)
# Add potential forced y-axis max
plot_dat$df$ymax <- rep(y_max, length(unique(plot_dat$df$motif)))
# Convert exposure probabilities to percentages
plot_dat$df$exposure <- plot_dat$df$exposure * 100
# Plot signatures
plot_dat$df %>%
ggplot(aes_string(y = "exposure", x = "motif", fill = "mutation_color")) +
geom_bar(stat = "identity") +
facet_grid(factor(signature) ~ ., scales = scales) +
ggplot2::xlab("Motifs") + ggplot2::ylab("Percent of Mutations") +
ggplot2::guides(fill = ggplot2::guide_legend(nrow = 1)) +
ggplot2::scale_fill_manual(values = color_mapping) +
ggplot2::scale_x_discrete(labels = annot$context) +
ggplot2::scale_y_continuous(expand = expansion(mult = c(0, 0.2)),
limits = c(0, NA), n.breaks = 4) +
ggplot2::geom_text(data = sig_name_labels,
mapping = aes_string(x = -Inf, y = Inf, label = "label"),
hjust = -0.075, vjust = 1.5,
fontface = "bold") -> p
# Add annotations, if necessary
if (exists("annotation_text") == TRUE){
p <- p + ggplot2::geom_text(data = annotation_text,
mapping = aes_string(x = Inf, y = Inf, label = "label"),
hjust = 1.075, vjust = 1.5,
fontface = "bold")
}
# Change y-axis scale, if necessary
if (!is.null(y_max)){
p <- p + geom_blank(aes_string(y = "ymax"))
}
# Plot motif labels
plot_dat$df$exposure_null <- rep(0, dim(plot_dat$df)[1])
plot_dat$df %>%
ggplot(aes_string(y = "exposure_null", x = "motif")) +
geom_bar(stat = "identity") +
ggplot2::scale_y_continuous(expand = expansion(mult = c(0, 0)),
limits = c(0, NA), breaks = c(0, 0.01),
labels = c(" "," "), n.breaks = 4) +
ggplot2::ylab("") +
ggplot2::geom_rect(data = motif_label_locations,
aes(xmin = x, xmax = xend, ymin = max(y), ymax = max(yend)),
fill = color_mapping, color = "black",
linewidth = 0.25, inherit.aes = FALSE) +
ggplot2::geom_text(data=motif_label_locations,
aes(x=x+(xend-x)/2, y=y+(yend-y)/2,
label = stringr::str_to_title(mutation_color)),
fontface = "bold", size = 4,
color = c("black", "white", rep("black", 4))) -> p2
# Adjust theme
p <- .gg_default_theme(p, text_size = text_size) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
theme(legend.position = "none") +
theme(plot.margin = margin(0,1,0,1)) +
theme(strip.background = element_blank(), strip.text.y = element_blank())
p2 <- .gg_default_theme(p2, text_size = text_size) +
theme(plot.margin = margin(0,1,0,1)) + # see function below
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.major.x = element_blank(),
legend.position = "none")
if (!isTRUE(show_x_labels)) {
p <- p + theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
panel.grid.major.x = element_blank())
}
if (!isTRUE(show_y_labels)){
p <- p + theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank())
p2 <- p2 + theme(axis.text.y = element_blank(),
axis.title.y = element_blank())
}
figure <- ggpubr::ggarrange(p2, p, ncol = 1, nrow = 2, heights = c(1,15))
if (isTRUE(plotly)) {
figure <- plotly::ggplotly(p)
}
return(figure)
}
#' Plot reconstruction error for a sample
#'
#' Displays the observed distribution of counts for each mutation type,
#' the distribution of reconstructed counts for each mutation type using
#' the inferred mutational signatures, and the difference between the
#' two distributions.
#'
#' @param result A \code{\linkS4class{musica_result}} object generated by
#' a mutational discovery or prediction tool.
#' @param sample Name of the sample within the
#' \code{\linkS4class{musica_result}} object.
#' @param plotly If \code{TRUE}, the the plot will be made interactive
#' using \code{\link[plotly]{plotly}}. Default \code{FALSE}.
#' @return Generates a ggplot or plotly object
#' @examples
#' data(res)
#' plot_sample_reconstruction_error(res, "TCGA-ER-A197-06A-32D-A197-08")
#' @export
plot_sample_reconstruction_error <- function(result, sample,
plotly = FALSE) {
signatures <- .extract_count_table(get_musica(result),
table_selected(result))[, sample,
drop = FALSE]
sample_name <- colnames(signatures)
reconstructed <- reconstruct_sample(result, sample)
sigs <- cbind(signatures, reconstructed, signatures - reconstructed)
colnames(sigs) <- c("Counts", "Reconstructed", "Difference")
recontruct_result <- methods::new("musica_result",
signatures = sigs,
exposures = matrix(), algorithm = "NMF",
musica = get_musica(result),
table_name = table_selected(result))
plot_signatures(recontruct_result, same_scale = FALSE) +
ggplot2::ggtitle("Reconstruction error", subtitle = sample_name) + ylab("")
}
# Utility functions -------------------------------
.pivot_signatures <- function(signatures, tab, sig_names = NULL,
color_variable = NULL) {
if (is.null(sig_names)) {
sig_names <- colnames(signatures)
}
annot <- tab@annotation
rownames(annot) <- annot$motif
# Ensure signature colnames are unique
# They can not be unique in the sig_compare function if one signature
# is matched up against several others in the second result object
colnames(signatures) <- paste0(colnames(signatures),
"-", seq(ncol(signatures)))
names(sig_names) <- colnames(signatures)
# Rormat signature matrix into long data.frame
signatures %>%
as.data.frame %>%
tibble::rownames_to_column(var = "motif") %>%
tidyr::pivot_longer(cols = dplyr::all_of(names(sig_names)),
names_to = "signature",
values_to = "exposure",
names_repair = "minimal") -> df
# Check for mutation color variable in annot table
final_color_variable <- NULL
if (is.null(color_variable) && !is.null(tab@color_variable)) {
color_variable <- tab@color_variable
}
# Set up color variable if supplied as vector or the name of a column in
# the table annotation
if (length(color_variable) == 1 && color_variable %in% colnames(annot)) {
final_color_variable <- annot[df$motif, tab@color_variable]
} else if (length(color_variable) == nrow(signatures)) {
final_color_variable <- color_variable
} else {
warning("'color_variable' must be a column in the table annotation: ",
paste(colnames(annot), collapse = ", "), ". Or it must be the ",
"same length as the number of motifs in the signatures: ",
nrow(signatures))
}
# Save color variable to df if it was specified
if (!is.null(final_color_variable)) {
df <- cbind(df, mutation_color = final_color_variable)
}
# Make sure signature order is preserved using factor
df$signature <- factor(df$signature, levels = names(sig_names))
return(list(df = df, names = sig_names))
}
.addSmallLegend <- function(myPlot, pointSize = 2,
textSize = 10, spaceLegend = 0.5) {
myPlot +
ggplot2::guides(shape = ggplot2::guide_legend(override.aes =
list(size = pointSize)),
color = ggplot2::guide_legend(override.aes =
list(size = pointSize))) +
ggplot2::theme(legend.title = element_text(size = textSize),
legend.text = element_text(size = textSize),
legend.key.size = ggplot2::unit(spaceLegend, "lines"),
legend.box.background = ggplot2::element_rect(colour = "black"),
legend.spacing.x = ggplot2::unit(0.25, "cm"))
}
.gg_default_theme <- function(p, text_size = 10) {
p <- p + theme_bw() + theme(
panel.grid = element_blank(),
text = element_text(size = text_size))
if("mono" %in% names(grDevices::pdfFonts())) {
p <- p + theme(text = element_text(family = "mono",
size = text_size))
}
return(p)
}
.discrete_colors <- function(n) {
hues <- seq(15, 375, length = n + 1)
colors <- grDevices::hcl(h = hues, l = 65, c = 100)[seq_len(n)]
return(colors)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.