Nothing
#' Create a cross plot comparing differential expression (DE) results
#' @description This function creates a cross plot visualising the differences
#' in log2(fold-change) between two DE analyses.
#' @inheritParams volcano_enhance
#' @param DEtable1,DEtable2,DEtable1Subset,DEtable2Subset tables of DE results,
#' usually generated by \code{\link{DEanalysis_edger}}; the first two should
#' contain all genes, while the second two should only contain DE genes
#' @param mask whether to hide genes that were not called DE in either
#' comparison; default is FALSE
#' @param df Optionally, pre-computed cross plot table, from cross_plot_prep
#' @param lfc.threshold the log2(fold-change)
#' threshold to determine whether a gene is DE
#' @param labnames,cols.chosen the legend labels and colours for the 4
#' categories of genes ("not DE", "DE both", "DE comparison 1", "DE comparison 2")
#' @param labels.per.region how many labels to show in each region of the plot;
#' the plot is split in 8 regions using the axes and major diagonals, and the
#' points closest to the origin in each region are labelled; default is
#' 5, set to 0 for no labels
#' @param fix.axis.ratio whether to ensure the x and y axes have the same
#' units, resulting in a square plot; default is TRUE
#' @return The cross plot as a ggplot object.
#' @export
#' @examples
#' expression.matrix.preproc <- as.matrix(read.csv(
#' system.file("extdata", "expression_matrix_preprocessed.csv", package = "bulkAnalyseR"),
#' row.names = 1
#' ))[1:500, 1:4]
#'
#' anno <- AnnotationDbi::select(
#' getExportedValue('org.Mm.eg.db', 'org.Mm.eg.db'),
#' keys = rownames(expression.matrix.preproc),
#' keytype = 'ENSEMBL',
#' columns = 'SYMBOL'
#' ) %>%
#' dplyr::distinct(ENSEMBL, .keep_all = TRUE) %>%
#' dplyr::mutate(NAME = ifelse(is.na(SYMBOL), ENSEMBL, SYMBOL))
#'
#' edger <- DEanalysis_edger(
#' expression.matrix = expression.matrix.preproc,
#' condition = rep(c("0h", "12h"), each = 2),
#' var1 = "0h",
#' var2 = "12h",
#' anno = anno
#' )
#' deseq <- DEanalysis_edger(
#' expression.matrix = expression.matrix.preproc,
#' condition = rep(c("0h", "12h"), each = 2),
#' var1 = "0h",
#' var2 = "12h",
#' anno = anno
#' )
#' cross_plot(
#' DEtable1 = edger,
#' DEtable2 = deseq,
#' DEtable1Subset = dplyr::filter(edger, abs(log2FC) > 1, pvalAdj < 0.05),
#' DEtable2Subset = dplyr::filter(deseq, abs(log2FC) > 1, pvalAdj < 0.05),
#' labels.per.region = 0
#' )
cross_plot = function(
DEtable1,
DEtable2,
DEtable1Subset,
DEtable2Subset,
df = NULL,
lfc.threshold = NULL,
raster = FALSE,
mask = FALSE,
labnames = c("not DE", "DE both", "DE comparison 1", "DE comparison 2"),
cols.chosen = c("grey", "purple", "dodgerblue", "lightcoral"),
labels.per.region = 5,
fix.axis.ratio = TRUE,
add.guide.lines = TRUE,
add.labels.custom = FALSE,
genes.to.label = NULL,
seed = 0,
label.force = 1
){
if(is.null(lfc.threshold)){
lfc.threshold <- min(abs(DEtable1Subset$log2FC), abs(DEtable2Subset$log2FC), na.rm = TRUE)
}
if (is.null(df)){
df <- cross_plot_prep(
DEtable1 = DEtable1,
DEtable2 = DEtable2,
DEtable1Subset = DEtable1Subset,
DEtable2Subset = DEtable2Subset,
lfc.threshold = lfc.threshold,
mask = mask,
labnames = labnames
)
}
names(cols.chosen) <- labnames
lfc1 = NULL
lfc2 = NULL
if(raster){
cp <- ggplot(df) +
theme_minimal() +
ggrastr::rasterise(geom_point(aes(x = lfc1, y = lfc2, colour = colours), alpha = 0.5)) +
scale_colour_manual(values = cols.chosen)
}else{
cp <- ggplot(df) +
theme_minimal() +
geom_point(aes(x = lfc1, y = lfc2, colour = colours), alpha = 0.5) +
scale_colour_manual(values = cols.chosen)
}
if(fix.axis.ratio){
max.lfc <- max(abs(df$lfc1), abs(df$lfc2), na.rm=TRUE)
cp <- cp + xlim(-max.lfc, max.lfc) + ylim(-max.lfc, max.lfc)
}
if(add.guide.lines){
cp <- cp +
geom_hline(yintercept = 0, color = "black") +
geom_vline(xintercept = 0, color = "black") +
geom_abline(slope = 1, intercept = 0, color = "black") +
geom_hline(yintercept = lfc.threshold, color = "green") +
geom_hline(yintercept = -lfc.threshold, color = "green") +
geom_vline(xintercept = lfc.threshold, color = "green") +
geom_vline(xintercept = -lfc.threshold, color = "green") +
geom_hline(yintercept = 2 * lfc.threshold, color = "blue") +
geom_hline(yintercept = -2 * lfc.threshold, color = "blue") +
geom_vline(xintercept = 2 * lfc.threshold, color = "blue") +
geom_vline(xintercept = -2 * lfc.threshold, color = "blue")
}
if(add.labels.custom){
genes.to.rename <- genes.to.label[names(genes.to.label) != ""]
# genes.to.label <- df$gene_name[(match(genes.to.label, c(df$gene_name, df$gene_id)) - 1) %% nrow(df) + 1]
genes.to.label <- unique(genes.to.label[!is.na(genes.to.label)])
genes.to.rename <- genes.to.rename[genes.to.rename %in% genes.to.label]
df.label <- dplyr::filter(df, .data$gene_name %in% genes.to.label)
df.label$name[match(genes.to.rename, df.label$gene_name)] <- names(genes.to.rename)
if(nrow(df.label) == 0){
message(paste0("add.labels.custom was TRUE but no genes specified; ",
"did you forget to supply genes.to.label or annotation?"))
}
cp <- cp +
ggrepel::geom_label_repel(data = df.label,
mapping = aes(x = lfc1, y = lfc2, label = .data$gene_name),
max.overlaps = Inf,
force = label.force,
point.size = NA)
}
if(labels.per.region > 0){
colours <- NULL
df.lab <- df %>%
dplyr::mutate(
dist = sqrt(lfc1 ^ 2 + lfc2 ^ 2),
quad = ifelse(colours == labnames[2],
ifelse(lfc1 * lfc2 > 0,
ifelse(lfc1 > 0, 1, 5),
ifelse(lfc1 > 0, 7, 3)),
ifelse(colours == labnames[3],
ifelse(lfc1 > 0, 8, 4),
ifelse(lfc2 > 0, 2, 6))))
df.top.distances <- data.frame()
for(region in 1:8){
df.lab.sub <- dplyr::filter(df.lab, .data$quad == region) %>%
dplyr::arrange(dplyr::desc(.data$dist))
df.top.dist <- utils::head(df.lab.sub, labels.per.region)
df.top.distances <- rbind(df.top.distances, df.top.dist)
}
set.seed(seed = seed)
cp <- cp +
ggrepel::geom_label_repel(data = df.top.distances,
mapping = aes(x = lfc1, y = lfc2, label = .data$gene_name),
max.overlaps = Inf,
force = label.force,
point.size = NA)
}
cp
}
cross_plot_prep <- function(
DEtable1,
DEtable2,
DEtable1Subset,
DEtable2Subset,
lfc.threshold,
mask = FALSE,
labnames = c("not DE", "DE both", "DE comparison 1", "DE comparison 2")
){
de1 <- DEtable1Subset$gene_id
de2 <- DEtable2Subset$gene_id
if(mask){
all.genes <- unique(c(de1, de2))
}else{
all.genes <- unique(c(DEtable1$gene_id, DEtable2$gene_id))
}
lfc1 <- DEtable1$log2FC[match(all.genes, DEtable1$gene_id)]
pval1 <- DEtable1$pvalAdj[match(all.genes, DEtable1$gene_id)]
lfc2 <- DEtable2$log2FC[match(all.genes, DEtable2$gene_id)]
pval2 <- DEtable2$pvalAdj[match(all.genes, DEtable2$gene_id)]
colours = vector(length=length(all.genes))
colours[] = labnames[1]
colours[all.genes %in% de1] = labnames[3]
colours[all.genes %in% de2] = labnames[4]
colours[all.genes %in% intersect(de1, de2)] = labnames[2]
df <- data.frame(
gene_id = all.genes,
gene_name = c(DEtable1$gene_name, DEtable2$gene_name)[match(all.genes, c(DEtable1$gene_id, DEtable2$gene_id))],
lfc1 = lfc1,
pval1 = pval1,
lfc2 = lfc2,
pval2 = pval2,
colours = colours
)
return(df)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.