Nothing
#' MA-plot of a differential testing result
#'
#' @param de_res An object returned by `DESeq2::results()` or `DESeq2::lfcShrink()`
#' @param dds The DESeqDataSet that was used to build the ´de_res´ object. This is needed for gene name annotation.
#' @param annotate_top_n Annotate the top n significant genes by fold change (up- and down-regulated)
#' @param highlight_genes Vector of gene names or gene IDs to highlight on the plot (overwrites top_n annotation)
#'
#' @return A ggplot object of the ggplot2 package that contains the MA-plot. The plot shows three classes of points:
#' Light gray points are genes with low counts that are removed from the analysis by independent filtering. Darker gray points
#' are not significant genes that show a density map to visualize where the majority of non-significant points are located. Finally,
#' red point show significant genes.
#'
#'
#' @examples
#' library("DESeq2")
#' set.seed(1)
#' dds <- makeExampleDESeqDataSet(n=1500, m=6, betaSD=.3, interceptMean=6)
#' rowData(dds)$gene_name <- rownames(dds)
#' dds <- DESeq(dds)
#' de_res <- results(dds)
#' plot_ma(de_res, dds)
#'
#' @export
plot_ma <- function(de_res, dds, annotate_top_n = 5, highlight_genes = NULL) {
fdr <- de_res@metadata$alpha
if(!identical(rownames(dds),rownames(de_res))){
stop("Rownames of de_res and dds do not match.")
}
# add gene names from rowdata
de_res <- de_res %>%
as_tibble(rownames = "gene_id") %>%
left_join(
rowData(dds) %>%
as_tibble(rownames = "gene_id") %>%
select(gene_id, gene_name),
by = "gene_id"
)
# treat special case if de_res resulted from lfcShrink()
# with a method that produced svalues
if ("svalue" %in% colnames(de_res)) {
# remove differential testing columns in case
# lfcShrink() was provided with a DESeqResults object
# in the 'res' argument
de_res$pvalue <- NULL
de_res$svalue <- NULL
colnames(de_res)[colnames(de_res)=="svalue"] <- "padj"
}
de_res <- de_res %>%
mutate(significant = padj < fdr)
# top n significant genes
annotate_top_n_genes <- de_res %>%
filter(significant) %>%
filter(
(rank(log2FoldChange) <= annotate_top_n) |
(rank(-log2FoldChange) <= annotate_top_n)
) %>%
pull(gene_id)
# either regular MA-plot with top n genes annotated or
# with highlighted genes
if (is.null(highlight_genes)) {
p <- de_res %>%
filter(!significant) %>%
ggplot(aes(baseMean, log2FoldChange, label = gene_name)) +
# not passing independent filtering
geom_point(data = filter(de_res, is.na(padj)), color = "gray85") +
# not significant
ggpointdensity::geom_pointdensity()
# if there are no significant genes, geom_point will throw an error,
# so this case needs to be treated separately
if(any(de_res$significant, na.rm=T)==TRUE){
# significant
p <- p +
geom_point(data = filter(de_res, significant), aes(fill = "significant"), color = "red", alpha = .7) +
# highlight top significant
ggrepel::geom_text_repel(
data = de_res %>%
filter(gene_id %in% annotate_top_n_genes),
color = "red", max.overlaps = Inf
)
}
p + scale_color_gradient(low = "gray70", high = "gray10") +
guides(
color = guide_colorbar(order = 1),
fill = guide_legend(order = 2)
) +
scale_x_log10() +
labs(x = "mean normalized count", y = "log2-fold change", fill = NULL) +
cowplot::theme_cowplot()
} else {
de_res %>%
arrange(significant) %>%
ggplot(aes(baseMean, log2FoldChange, color = significant)) +
geom_point() +
gghighlight::gghighlight(gene_name %in% highlight_genes | gene_id %in% highlight_genes,
label_key = gene_name, use_group_by = F,
label_params = list(fill = NA, label.size = NA, fontface = "bold")
) +
scale_color_manual(values = c("red", "black"), breaks = c(TRUE, FALSE)) +
scale_x_log10() +
labs(x = "mean normalized count", y = "log2-fold change") +
cowplot::theme_cowplot()
}
}
#' Plot correlations of samples within a level of a group
#'
#' For the given level, the gene-wise median over all samples is computed
#' to obtain a reference sample. Then, each sample is plotted against the
#' reference as MA-plot.
#' @param vsd An object generated by `DESeq2::vst()`
#' @param group A grouping variable, must be a column of `colData(vsd)`
#' @param level A level of the grouping variable
#' @param y_lim Y-axis limits, the axis will run from `-y_lim` to `y_lim`
#' @param rasterise Whether to rasterise the points using ggrastr.
#' @param ... Other parameters passed on to ggrastr::rasterise
#'
#' @return A list of ggplot objects of the ggplot2 package that contains for
#' each sample of the specified level the the sample vs reference MA-plot.
#'
#' @examples
#' library("DESeq2")
#' set.seed(1)
#' dds <- makeExampleDESeqDataSet(n=1000, m=4, interceptMean=10)
#' colData(dds)$type <- c("A","A","B","B")
#' vsd <- vst(dds)
#' plot_within_level_sample_MAs(vsd, group="type", level="A")
#'
#' @export
plot_within_level_sample_MAs <- function(vsd, group, level, y_lim = 4, rasterise = FALSE, ...) {
# prevent 'no visible binding for global variable' package warnings
reference <- NULL
# samples of the level
level_samples <- colnames(vsd)[vsd[[group]] == level]
# gene-wise median
median_sample <- rowMedians(assay(vsd)[, level_samples, drop = F], na.rm=T)
if(rasterise == TRUE){
raster_fun <- function(x) {ggrastr::rasterise(x, ...)}
} else {
raster_fun <- identity
}
map(level_samples, function(s) {
plot <- tibble(sample = median_sample, reference = assay(vsd)[, s]) %>%
ggplot(aes(x = (sample + reference) / 2, y = sample - reference)) +
raster_fun(ggpointdensity::geom_pointdensity()) +
scale_color_viridis_c() +
ylim(-y_lim, y_lim) +
labs(x = "mean log2(norm. count)", y = paste0("log2-fold change\nsample vs. reference"), title = paste0(s, " vs. ", level)) +
geom_hline(yintercept = 0, linetype = "55") +
cowplot::theme_cowplot()
})
}
#' MA plots of samples
#'
#' For each level of the grouping variable, the gene-wise median over all samples is computed
#' to obtain a reference sample. Then, each sample is plotted against the reference.
#' @param vsd An object generated by `DESeq2::vst()`
#' @param group A grouping variable, must be a column of `colData(vsd)`
#' @param y_lim Y-axis limits, the axis will run from `-y_lim` to `y_lim`
#' @param rasterise Whether to rasterise the points using ggrastr.
#' @param ... Other parameters passed on to ggrastr::rasterise
#'
#' @examples
#' library("DESeq2")
#' set.seed(1)
#' dds <- makeExampleDESeqDataSet(n=1000, m=4, interceptMean=10)
#' colData(dds)$type <- c("A","A","B","B")
#' vsd <- vst(dds)
#' plot_sample_MAs(vsd, group="type")
#'
#' @return A list of ggplot objects of the ggplot2 package, with each element corresponding to one MA-plot.
#' @export
plot_sample_MAs <- function(vsd, group, y_lim = 3, rasterise = FALSE, ...) {
if (!group %in% colnames(colData(vsd))) {
stop("Variable 'group' must be a column of colData(vsd).")
}
vsd[[group]] %>%
unique() %>%
sort() %>%
map(~ plot_within_level_sample_MAs(vsd, group, level = .x, y_lim = y_lim, rasterise = rasterise, ...)) %>%
purrr::flatten()
}
#' Plot a gene
#'
#' @param gene A gene ID or gene name, i.e. an element of `rownames(dds)` or of `rowData(dds)$gene_name`
#' @param dds a DESeqDataSet
#' @param x_var Variable to plot on the x-axis. If NULL, then each sample is plotted separately.
#' @param color_by Variable (column in `colData(dds)`) to color points by.
#' @param point_alpha alpha value of `geom_point()`
#' @param point_rel_size relative size of `geom_point()`
#' @param show_plot Whether to show the plot or not
#'
#' @return The function displays the plot and returns invisible the data frame of
#' expression values and colData annotation for the gene.
#'
#' @examples
#' library("DESeq2")
#' set.seed(1)
#' dds <- makeExampleDESeqDataSet()
#' colData(dds)$type <- c("A","A","A","B","B","B")
#' colData(dds)$patient <- c("1","1","2","2","3","3")
#' dds <- estimateSizeFactors(dds)
#' plot_gene("gene1", dds)
#' plot_gene("gene1", dds, x_var="patient", color_by="type")
#'
#' @export
plot_gene <- function(gene, dds, x_var = NULL, color_by = NULL, point_alpha = .7, point_rel_size = 2, show_plot = TRUE) {
if (gene %in% rownames(dds)) {
gene_id <- gene
gene_name <- rowData(dds)$gene_name[rownames(dds) == gene]
} else if (gene %in% rowData(dds)$gene_name) {
if( length(get_gene_id(gene, dds)) > 1) {
stop(str_c("Gene ", gene, "appears multiple times in the dataset. Use 'get_gene_id()' and specify a gene ID."))
}
gene_id <- rownames(dds)[rowData(dds)$gene_name == gene]
gene_name <- gene
} else {
stop("Variable 'gene' must be either an element of rownames(dds) or of rowData(dds)$gene_name.")
}
gene_data <- log2(1 + counts(dds, normalized = T)[gene_id, ]) %>%
enframe(name = "sample_id", value = "count") %>%
bind_cols(colData(dds) %>% as_tibble() %>% select(-tidyselect::any_of("sample_id")))
if (is.null(x_var)) {
x_var <- "sample_id"
}
plot <- gene_data %>%
ggplot(aes(x = .data[[x_var]], y = count, color = if (!is.null(color_by)){.data[[color_by]]} else {NULL})) +
geom_point(size = rel(point_rel_size), alpha = point_alpha) +
labs(y = "log2(1 + norm. count)", title = gene_name, color = color_by) +
cowplot::theme_cowplot()
if (x_var == "sample_id") {
plot <- plot + theme(axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1))
}
if (show_plot) {
print(plot)
}
invisible(list("plot" = plot, "data" = gene_data))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.