#' @title Draw Overlap Plot
#' @description ne list overlap enrichment test + volcano plot
#' @param df something
#' @author April Kim
#' @family genoppi
#' @export
### --------------------------------------------------
### gene list overlap enrichment test + volcano plot
plotOverlap <- function(df, bait, reference, title = '', subtitle = NULL, drawLabel = T,
col.genelist.sig = 'yellow'){
require(ggplot2)
require(ggrepel)
# If a df with only genes are inputted assume
# that the user would like to just overlay genes
if (ncol(reference) == 1){
reference <- data.frame(gene=reference$gene, significant=TRUE)
warn('[plotOverlap] no "significant"-column, assuming all genes significant.')
}
# generate statistics for enrichement
statistics <- enrichment(df, bait, reference)
# start volcano plot
p <- ggplot(df, aes(x=logFC, y=-log10(pvalue))) +
geom_hline(yintercept=0, color="grey") + geom_vline(xintercept=0, color="grey") +
xlab(bquote(log[2]*"(Fold change)")) + ylab(bquote(-log[10]*"("*italic(.("P"))*"-value)")) +
# plot all proteins (green = significant, blue = not significant)
geom_point(alpha=0.5, size=1.5, color=ifelse(df$significant, "springgreen3", "royalblue2")) +
# label bait (red = signficant, orange = not significant)
geom_point(subset(df, gene %in% bait & significant), mapping=aes(x=logFC, y=-log10(pvalue)), size=2, color="red") +
geom_point(subset(df, gene %in% bait & !significant), mapping=aes(x=logFC, y=-log10(pvalue)), size=2, color="orange") +
# label sig genes in gene list (yellow = significant, white = not significant)
geom_point(subset(df, gene %in% statistics$sigGenes & significant), mapping=aes(x=logFC, y=-log10(pvalue)), size=2, color=col.genelist.sig) +
geom_point(subset(df, gene %in% statistics$sigGenes & !significant), mapping=aes(x=logFC, y=-log10(pvalue)), size=2, color="white") +
geom_point(subset(df, gene %in% bait | gene %in% statistics$sigGenes), mapping=aes(x=logFC, y=-log10(pvalue)),
size=2, color="black", shape=1) +
# title (with statistics$fisherP) and theme
labs(title = title,
subtitle = ifelse(is.null(subtitle),
paste(length(statistics$sigGenes)," detected. ",length(statistics$overlap),
" significant. p-value = ", format(statistics$fisherP,digits=3),sep=""),
subtitle))+
#ggtitle( +
theme_bw() + theme(axis.line=element_line(color="grey"), plot.title=element_text(size=10)) + ggstamp()
### only draw text labels for gene list genes if drawLabel==TRUE
if (drawLabel==TRUE) {
p <- p + geom_text_repel(subset(df, gene==bait | gene %in% statistics$sigGenes), mapping=aes(label=gene),
arrow=arrow(length=unit(0.015, 'npc')), box.padding=unit(0.15, "lines"),
point.padding=unit(0.2, "lines"), color="black", size=3)
} else {
p <- p + geom_text_repel(subset(df, gene==bait), mapping=aes(label=gene),
arrow=arrow(length=unit(0.015, 'npc')), box.padding=unit(0.15, "lines"),
point.padding=unit(0.2, "lines"), color="black", size=3)
}
print(p)
}
plotOverlap.old <- function(df,bait,listName,listDf,drawLabel, debug = F){
if (debug) browser()
# all proteins detected in experiment + in gene list (REMOVE BAIT)
#preys <- unique(intersect(df$gene, listDf$gene))
#preys <- unique(df$gene[df$gene %in% listDf$gene & df$gene != bait])
preys <- unique(df$gene[df$gene != bait])
# significant preys
sigPreys <- unique(df$gene[df$significant & df$gene %in% preys])
# significant genes in gene list, limited to detected preys
sigGenes <- listDf$gene[listDf$significant & listDf$gene %in% preys]
# overlap of significant preys and significant genes
overlap <- intersect(sigPreys,sigGenes)
# sig preys but not sig genes
sigPreysOnly <- setdiff(sigPreys,sigGenes)
# sig genes but not sig preys
sigGenesOnly <- setdiff(sigGenes,sigPreys)
# neither sig preys nor sig genes
neither <- setdiff(setdiff(preys,sigPreys),sigGenes)
# Fisher's exact test (one-tailed)
fisherP <- fisher.test(matrix(c(length(overlap),length(sigPreysOnly),
length(sigGenesOnly),length(neither)),nrow=2),alternative="greater")$p
# start volcano plot
p <- ggplot(df, aes(x=logFC, y=-log10(pvalue))) +
geom_hline(yintercept=0, color="grey") + geom_vline(xintercept=0, color="grey") +
xlab(bquote(log[2]*"(Fold change)")) + ylab(bquote(-log[10]*"("*italic(.("P"))*"-value)")) +
# plot all proteins (green = significant, blue = not significant)
geom_point(alpha=0.5, size=1.5, color=ifelse(df$significant, "springgreen3", "royalblue2")) +
# label bait (red = signficant, orange = not significant)
geom_point(subset(df, gene==bait & significant), mapping=aes(x=logFC, y=-log10(pvalue)), size=2, color="red") +
geom_point(subset(df, gene==bait & !significant), mapping=aes(x=logFC, y=-log10(pvalue)), size=2, color="orange") +
# label sig genes in gene list (yellow = significant, white = not significant)
geom_point(subset(df, gene %in% sigGenes & significant), mapping=aes(x=logFC, y=-log10(pvalue)), size=2, color="yellow") +
geom_point(subset(df, gene %in% sigGenes & !significant), mapping=aes(x=logFC, y=-log10(pvalue)), size=2, color="white") +
geom_point(subset(df, gene==bait | gene %in% sigGenes), mapping=aes(x=logFC, y=-log10(pvalue)),
size=2, color="black", shape=1) +
# title (with FisherP) and theme
ggtitle(paste(listName,": ",length(sigGenes)," detected, ",length(overlap)," significant, p = ",
format(fisherP,digits=3),sep="")) +
theme_bw() + theme(axis.line=element_line(color="grey"), plot.title=element_text(size=10))
### only draw text labels for gene list genes if drawLabel==TRUE
if (drawLabel==TRUE) {
p <- p + geom_text_repel(subset(df, gene==bait | gene %in% sigGenes), mapping=aes(label=gene),
arrow=arrow(length=unit(0.015, 'npc')), box.padding=unit(0.15, "lines"),
point.padding=unit(0.2, "lines"), color="black", size=3)
} else {
p <- p + geom_text_repel(subset(df, gene==bait), mapping=aes(label=gene),
arrow=arrow(length=unit(0.015, 'npc')), box.padding=unit(0.15, "lines"),
point.padding=unit(0.2, "lines"), color="black", size=3)
}
print(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.