#' make plots for publication
#'
#' @param x Path to the xls file of DEseq2 output, eg, function
#' DESeq2_for_featureCounts, result "transcripts_deseq2.csv.fix.xls"
#'
#' @param path_pdf Directory to save the pdf file, contains
#' scatter, MA, Volcano plots
#'
#' @param save2df A logical value, whether or not save the plot to PDF file,
#' default: TRUE
#'
#'
#' @import ggplot2
#' @import ggrepel
#'
#' @export
DESeq2_publish_plot <- function(x, path_pdf,
gene_labels = NULL, #c("nxf2", "piwi", "CG9754"),
x_name = NULL,
y_name = NULL,
save2pdf = TRUE) {
stopifnot(file.exists(x))
# read file
df <- DESeq2_xls2df(x)
# rename id FBgn0000001_te
# convert to FB00000001 and te
id_tag <- sum(grepl("FBgn.*_", df$id)) / length(df$id)
if(ceiling(id_tag) == 1) {
df <- df %>%
tidyr::separate(id, into = c("name", "id"), sep = "_")
df$name <- NULL
}
names(df) <- gsub("[^A-Za-z0-9]", "_", names(df)) # remove unsupport characters
x.name <- colnames(df)[2]
y.name <- colnames(df)[3]
# make plots
p1 <- DESeq2_de_scatter(df, x.name, y.name,
show.sig.genes = TRUE,
labels = gene_labels,
pval_cutoff = 0.05, max_labels = 5)
p2 <- DESeq2_de_ma(df, show.sig.genes = TRUE,
labels = gene_labels,
pval_cutoff = 0.05, max_labels = 5,
show_volcano = FALSE)
p3 <- DESeq2_de_ma(df, show.sig.genes = TRUE,
labels = gene_labels,
pval_cutoff = 0.05, max_labels = 5,
show_volcano = TRUE)
# add legend
figure_legend <- "Figure. Differentially expression analysis. (a-b) Comparasion of
gene expression is shown as rpm from two conditions. Dashed lines indicate
twofold change. a. scatter plot, b. MA plot. (c) Volcano plot showing
enrichment values and corresponding significance levels."
pg1 <- cowplot::plot_grid(p1, p2, p3, align = "hv",
ncol = 2, labels = "auto")
pg2 <- cowplot::add_sub(pg1, figure_legend, x = 0, hjust = 0, size = 10)
# cowplot::ggdraw(pg2)
# save plot
if (isTRUE(save2pdf)) {
if (! dir.exists(path_pdf)) {
dir.create(path_pdf, showWarnings = FALSE, recursive = TRUE, mode = "0740")
}
rpt_fname <- paste0("DESeq2.", x.name, ".vs.", y.name, ".publish.pdf")
rpt_file <- file.path(path_pdf, rpt_fname)
pdf(rpt_file, width = 6, height = 6, paper = "a4")
print(cowplot::ggdraw(pg2))
dev.off()
} else {
return(pg2)
}
}
#' generate scatter plot for DESeq2 analysis
#'
#' @param data A data frame with gene expression,
#'
#' @param x.name Name in column names of data frame, present on x axis
#'
#' @param y.name Same as x.name, but present on y axis
#'
#' @param show.sig.genes A logical or int value, whether or not to show
#' significantly changes genes, default: FALSE
#'
#' @param labels A set of gene ids to show in plot, default: NULL
#'
#' @param pval_cutoff A value to select significantly changes genes, default: 0.05
#'
#' @param max_labels A integer value, how many genes to be labeled on plot,
#' default: 3
#'
#' @import ggplot2
#' @import ggrepeal
#'
#' @export
#'
DESeq2_de_scatter <- function(data, x.name, y.name, show.sig.genes = FALSE,
labels = NULL, pval_cutoff = 0.05, max_labels = 3) {
stopifnot(is.data.frame(data))
stopifnot(all(c("id", x.name, y.name, "padj") %in% names(data)))
# rename column
data <- dplyr::rename(data, pval = padj)
# re-arrange data.frame
data_sig <- dplyr::filter(data, pval <= pval_cutoff) %>% dplyr::arrange(pval)
# pick labels
data_label <- dplyr::filter(data, id %in% labels)
if (isTRUE(show.sig.genes)) {
data_label <- rbind(data_label, data_sig)
}
if (nrow(data_label) > 0) {
max_labels <- ifelse(nrow(data_label) > max_labels, max_labels, nrow(data_label))
data_label <- data_label[1:max_labels, ]
}
p <- plot_scatter(data = data,
data_sig = data_sig,
x.name = x.name,
y.name = y.name,
data_label = data_label)
return(p)
}
#' create MA or Volcano plot
#'
#' @param data A data frame with gene expression,
#'
#' @param show.sig.genes A logical or int value, whether or not to show
#' significantly changes genes, default: FALSE
#'
#' @param labels A set of gene ids to show in plot, default: NULL
#'
#' @param pval_cutoff A value to select significantly changes genes, default: 0.05
#'
#' @param max_labels A integer value, how many genes to be labeled on plot,
#' default: 3
#'
#' @import ggplot2
#' @import ggrepeal
#'
#' @export
#'
DESeq2_de_ma <- function(data, show.sig.genes = FALSE, labels = NULL,
pval_cutoff = 0.05, max_labels = 3,
show_volcano = FALSE) {
stopifnot(is.data.frame(data))
stopifnot(all(c("id", "baseMean", "log2FoldChange", "padj") %in% names(data)))
# rename columns
data <- dplyr::rename(data, logFC = log2FoldChange, pval = padj)
# re-arrange data.frame
data_sig <- dplyr::filter(data, pval <= pval_cutoff) %>% dplyr::arrange(pval)
data_non_sig <- dplyr::filter(data, pval <= pval_cutoff)
# pick labels
data_label <- dplyr::filter(data, id %in% labels)
if (isTRUE(show.sig.genes)) {
data_label <- rbind(data_label, data_sig)
}
if (nrow(data_label) > 0) {
max_labels <- ifelse(nrow(data_label) > max_labels, max_labels, nrow(data_label))
data_label <- data_label[1:max_labels, ]
}
# volcano plot
if (isTRUE(show_volcano)) {
p <- plot_volcano(data, data_sig, data_label)
} else {
# change log2foldchange
data$logFC <- ifelse(data$logFC > 4, 4,
ifelse(data$logFC < -4, -4, data$logFC))
p <- plot_ma(data, data_sig, data_label)
}
return(p)
}
#' generate scatter plot for DE analysis
#'
#' @param data A data frame for the full version of data, required
#'
#' @param data_sig A data frame for significantly changes genes, required
#'
#' @param data_label A data frame for points to be labeled, default: NULL
#'
#' @import ggplot2
#' @import ggrepel
#'
#' @export
#'
plot_scatter <- function(data, data_sig, x.name, y.name, x.label = NULL,
y.label = NULL, data_label = NULL) {
# data_sig is required
stopifnot(is.data.frame(data))
stopifnot(is.data.frame(data_sig))
stopifnot(is.character(x.name))
stopifnot(is.character(y.name))
stopifnot(all(c("id", x.name, y.name) %in% names(data)))
stopifnot(all(c("id", x.name, y.name) %in% names(data_sig)))
# labels on axis
x.label <- ifelse(is.null(x.label), x.name, x.label)
y.label <- ifelse(is.null(y.label), y.name, y.label)
xn <- bquote(.(x.label)~"["*log["10"]~"rpm]")
yn <- bquote(.(y.label)~"["*log["10"]~"rpm]")
# pre-process
if (nrow(data) == 0) {
return(NULL)
}
# function
to_log10 <- function(data, x, y) {
stopifnot(is.data.frame(data))
stopifnot(all(c("id", x, y) %in% names(data)))
# select columns
df <- dplyr::select_(data, .dots = c("id", x, y)) %>%
as.data.frame()
# remove old row names
rownames(df) <- NULL
df <- tibble::column_to_rownames(df, "id") %>% log10()
df <- df[complete.cases(df), ]
return(df)
}
df <- to_log10(data, x.name, y.name)
if (nrow(df) == 0) {
stop("empty dataframe")
}
p <- ggplot(df, aes_string(x.name, y.name)) +
geom_abline(slope = 1, intercept = 0, color = "grey10") +
geom_abline(slope = 1, intercept = log10(2),
color = "grey30", linetype = 2) +
geom_abline(slope = 1, intercept = -log10(2),
color = "grey30", linetype = 2) +
geom_point(color = "grey60", size = .4) +
xlab(xn) + ylab(yn) +
scale_x_continuous(limits = c(0, 6),
breaks = seq(0, 6, 1),
labels = seq(0, 6, 1),
expand = c(0, 0, 0, 0)) +
scale_y_continuous(limits = c(0, 6),
breaks = seq(0, 6, 1),
labels = seq(0, 6, 1),
expand = c(0, 0, 0, 0)) +
theme_classic() +
theme(panel.border = element_rect(color = "black", fill = NA, size = .7),
plot.title = element_text(color = "black", hjust = .5, size = 14),
panel.grid = element_blank(),
axis.line = element_blank(),
axis.ticks.length = unit(.2, "cm"),
axis.ticks = element_line(color = "black", size = .5),
axis.text = element_text(color = "black", size = 10),
axis.title = element_text(color = "black", size = 12))
if (nrow(data_sig) > 0) {
df_sig <- to_log10(data_sig, x.name, y.name)
p <- p +
geom_point(mapping = aes_string(x.name, y.name),
data = df_sig,
color = "red",
size = .6)
}
if (is.data.frame(data_label) & nrow(data_label) > 0) {
df_label <- to_log10(data_label, x.name, y.name) %>%
tibble::rownames_to_column("id")
stopifnot(all(c("id", x.name, y.name) %in% names(data)))
p <- p +
geom_text_repel(
mapping = aes_string(x.name, y.name),
data = df_label,
label = df_label$id,
# size = 10,
box.padding = 1,
segment.size = 0.4,
segment.color = "grey50",
direction = "both") +
geom_point(mapping = aes_string(x.name, y.name),
data = data_label,
color = "red",
size = 1.2)
}
return(p)
}
#' generate volcano plot for DE analysis
#'
#' @param data A data frame for the full version of data, required
#'
#' @param data_sig A data frame for significantly changes genes, required
#'
#' @param data_label A data frame for points to be labeled, default: NULL
#'
#' @import ggplot2
#' @import ggrepel
#'
#' @export
#'
plot_ma <- function(data, data_sig, data_label = NULL) {
# data_sig is required
stopifnot(is.data.frame(data))
stopifnot(is.data.frame(data_sig))
stopifnot(all(c("id", "baseMean", "logFC", "pval") %in% names(data)))
stopifnot(all(c("id", "baseMean", "logFC", "pval") %in% names(data_sig)))
xn <- expression(paste(log["10"], "(baseMean)"))
yn <- expression(paste(log["2"], "(Fold-change)"))
data_non_sig <- dplyr::filter(data, ! id %in% data_sig$id)
# plot
p <- ggplot() +
geom_hline(yintercept = 0, color = "grey10", size = .7) +
geom_point(mapping = aes(baseMean, logFC),
data = data_non_sig,
color = "grey60",
size = .6) +
geom_point(mapping = aes(baseMean, logFC),
data = data_sig,
color = "red",
size = .6) +
scale_x_log10() +
scale_y_continuous(limits = c(-4, 4)) +
xlab(xn) + ylab(yn) +
theme_classic() +
theme(panel.border = element_rect(color = "black", fill = NA, size = .7),
plot.title = element_text(color = "black", hjust = .5, size = 14),
panel.grid = element_blank(),
axis.line = element_blank(),
axis.ticks.length = unit(.2, "cm"),
axis.ticks = element_line(color = "black", size = .5),
axis.text = element_text(color = "black", size = 10),
axis.title = element_text(color = "black", size = 12))
if (is.data.frame(data_label) & nrow(data_label) > 0) {
stopifnot(all(c("id", "baseMean", "logFC", "pval") %in% names(data_label)))
p <- p +
geom_text_repel(
mapping = aes(baseMean, logFC),
data = data_label,
label = data_label$id,
# size = 10,
box.padding = 1,
segment.size = 0.4,
segment.color = "grey50",
direction = "both"
) +
geom_point(mapping = aes(baseMean, logFC),
data = data_label,
color = "red",
size = 1.2)
}
return(p)
}
#' generate volcano plot for DE analysis
#'
#' @param data A data frame for the full version of data
#'
#' @param data_sig A data frame for significantly changed genes
#'
#' @param data_label A data frame for points to be labeled
#'
#' @import ggplot2
#' @import ggrepel
#'
#' @export
#'
plot_volcano <- function(data, data_sig = NULL, data_label = NULL) {
# data_sig not required
stopifnot(is.data.frame(data))
stopifnot(all(c("id", "logFC", "pval") %in% names(data)))
xn <- expression(paste(log["2"], "(mean ratio of Treatment/Control)"))
yn <- expression(paste(-log["10"], "(", italic("P"), " value)"))
data$logPval <- -log10(data$pval)
p <- ggplot() +
geom_vline(xintercept = 0, size = .6, color = "black") +
geom_vline(xintercept = c(-1, 1), size = .5, linetype = 2, color = "grey60") +
geom_hline(yintercept = 2, size = .5, linetype = 2, color = "grey60") +
geom_point(mapping = aes(logFC, logPval),
data = data,
color = "grey70",
size = .6) +
xlab(xn) + ylab(yn) +
theme_bw() +
theme(panel.border = element_blank(),
panel.grid = element_blank(),
axis.line = element_line(color = "black", size = .7),
axis.ticks.length = unit(.2, "cm"),
axis.ticks = element_line(color = "black", size = .7),
axis.text = element_text(color = "black", size = 10),
axis.title = element_text(color = "black", size = 12))
if (is.data.frame(data_sig)) {
stopifnot(all(c("id", "logFC", "pval") %in% names(data)))
data_sig$logPval <- -log10(data_sig$pval)
p <- p + geom_point(mapping = aes(logFC, logPval),
data = data_sig,
# color = "chocolate2",
color = "red",
size = .6)
}
if (is.data.frame(data_label) & nrow(data_label) > 0) {
stopifnot(all(c("id", "logFC", "pval") %in% names(data_label)))
data_label$logPval <- -log10(data_label$pval)
p <- p +
geom_text_repel(
mapping = aes(logFC, logPval),
data = data_label,
label = data_label$id,
# size = 10,
nudge_x = 1.2,
nudge_y = .02,
point.padding = .4,
box.padding = .4,
segment.size = .6,
segment.color = "grey60",
direction = "both") +
geom_point(mapping = aes(logFC, logPval),
data = data_label,
# color = "chocolate2",
color = "red",
size = 1.2)
}
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.