#' TF ChIPseq data summary plots
#'
#' This method generates various summary plots for TF ChIPseq data. \cr
#' \itemize{
#' \item A table summarizing quantils of peak enrichment, p-value and q-value
#' \item A distribution beeswarm plot for peak enrichment values.
#' \item A distribution beeswarm plot for peak p-value.
#' \item A distribution beeswarm plot for peak width.
#' \item A pie chart showing peak annotation distribution.
#' }
#' If there is no peak information, it returns blank plot with "No Data" text
#'
#' @param sampleId sampleId
#' @param peakAnnotation macs2 peak annotation file generated by \code{narrowPeak_annotate()}
#' @param peakFile narrowPeak or broadPeak file generated by macs2
#' @param peakFormat One of narrowPeak or broadPeak
#' @param pval_cutoff A cutoff for -log10(p-value) for drawing horizontal line in beeswarm
#' plot. Default: 20
#' @param enrichment_cutoff A cutoff for peak enrichment for drawing horizontal line in
#' beeswarm plot. Default: 3
#' @param markTargets A named list of target genes which needs to be annotated in the
#' scatter plot. Default: NULL
#' @param pointColor A named vector with colors for each target category in \code{markTargets}
#' list. Default: NULL
#' @param pointAlpha A named vector of alpha vlues for each target category in
#' \code{markTargets} list. Default: NULL
#'
#' @return A list with following elements is returned.
#' \itemize{
#' \item \strong{data:} plot data used for plotting
#' \item \strong{figure:} A combined figure generated by \code{ggpubr::ggarrange}
#' \item \strong{plots:} Individual ggplot list for each plot.
#' \itemize{
#' \item \strong{table:} a \code{ggtable} object for quantile stats
#' \item \strong{distribution:} A list with two beeswarm plot objects: enrichment, pval.
#' \item \strong{annoPie:} A pie chart for peak annotation distribution
#' }}
#'
#' @importFrom ggpubr annotate_figure ggarrange
#'
#' @export
#'
#' @examples NA
chip_summary <- function(sampleId, peakAnnotation, peakFile, peakFormat,
pval_cutoff = 20, enrichment_cutoff = 3,
markTargets = NULL, pointColor = NULL, pointAlpha = NULL){
peakFormat <- match.arg(peakFormat, choices = c("narrowPeak", "broadPeak"))
if(!is.null(markTargets)){
if(!is.list(markTargets)) stop("markTargets should be a named list")
if(!all(names(markTargets) %in% names(pointColor))){
stop("pointColor should be a named vector with same names as markTargets")
}
if(!all(names(markTargets) %in% names(pointAlpha))){
stop("pointAlpha should be a named vector with same names as markTargets")
}
}
pointColor["peaks"] <- "grey"
pointAlpha["peaks"] <- 0.5
markingPreference <- tibble(geneType = c(names(markTargets), "peaks"),
drawOrder = 1:(length(markTargets)+1))
peakMarkDf <- purrr::map_dfr(
.x = markTargets,
.f = function(x){tibble(geneId = x, stringsAsFactors = F)},
.id = "geneType")
peaksGr <- rtracklayer::import(con = peakFile, format = peakFormat)
if(file.exists(peakAnnotation)){
## import peak data with annotation
peakAnno <- markPeaks::import_peak_annotation(
sampleId = sampleId,
peakAnnoFile = peakAnnotation,
column_suffix = FALSE
)
## add target gene type information if points need to be colored
if(!is.null(markTargets)){
peakAnno <- dplyr::left_join(x = peakAnno, y = peakMarkDf, by = "geneId")
pointAlpha["peaks"] <- 1
} else{
peakAnno <- dplyr::mutate(peakAnno, geneType = "peaks")
}
peakAnno <- peakAnno %>%
tidyr::replace_na(replace = list(geneType = "peaks")) %>%
dplyr::left_join(y = markingPreference, by = c("geneType" = "geneType"))
## for each peak, select one gene. preference is decided by order of names in markTargets list
plotData <- dplyr::group_by(peakAnno, peakId) %>%
dplyr::arrange(drawOrder) %>%
dplyr::slice(1L) %>%
dplyr::ungroup() %>%
dplyr::distinct() %>%
dplyr::mutate(
sampleId = sampleId,
peakWidth = peakEnd - peakStart + 1,
geneType = forcats::fct_relevel(.f = geneType, !!!markingPreference$geneType)
)
# plotData$geneType <- factor(
# x = plotData$geneType, levels = markingPreference$geneType, ordered = TRUE
# )
## summary table
summaryTable <- purrr::map_dfr(
.x = structure(c("peakEnrichment", "peakPval", "peakQval"),
names = c("peakEnrichment", "peakPval", "peakQval")),
.f = function(x){
as.list(summary(plotData[[x]]))
},
.id = "metric")
gg_stable <- ggpubr::ggtexttable(summaryTable, rows = NULL,
theme = ttheme("mOrange"))
## set outliers to 99.5 quantile
if(nrow(plotData) > 20){
plotData$peakEnrichment <- pmin(plotData$peakEnrichment, quantile(plotData$peakEnrichment, 0.995))
plotData$peakPval <- pmin(plotData$peakPval, quantile(plotData$peakPval, 0.99))
plotData$peakWidth <- pmin(plotData$peakWidth, quantile(plotData$peakWidth, 0.99))
}
# quantile(plotData$peakEnrichment,
# c(seq(0, 0.9, by = 0.1), 0.95, 0.99, 0.992, 0.995, 0.999, 0.9999, 1), na.rm = T)
#
# quantile(plotData$peakPval,
# c(seq(0, 0.9, by = 0.1), 0.95, 0.99, 0.992, 0.995, 0.999, 0.9999, 1), na.rm = T)
theme_scatter <- theme_bw() +
theme(
axis.title.x = element_blank(),
axis.title = element_text(size = 12),
axis.text = element_text(size = 12),
panel.grid = element_blank()
)
## macs2 fold-enrichment density scatter plot
gg_dot_enrichment <- ggplot(
data = plotData,
mapping = aes(x = !!sampleId, y = peakEnrichment)) +
geom_hline(yintercept = enrichment_cutoff, color = "black", linetype = "dashed") +
ggbeeswarm::geom_quasirandom(mapping = aes(color = geneType, alpha = geneType)) +
geom_boxplot(width=0.1, fill = NA, outlier.colour = NA, color = alpha("black", 0.7)) +
scale_color_manual(name = "Peak annotations",
values = pointColor) +
scale_alpha_manual(values = pointAlpha) +
labs(title = "macs2 fold enrichment distribution",
y = "peak enrichment") +
guides(alpha = FALSE,
color = guide_legend(override.aes = list(size = 4))) +
theme_scatter
## macs2 p-value density scatter plot
gg_dot_pval <- ggplot(
data = plotData,
mapping = aes(x = !!sampleId, y = peakPval)) +
geom_hline(yintercept = pval_cutoff, color = "black", linetype = "dashed") +
ggbeeswarm::geom_quasirandom(mapping = aes(color = geneType, alpha = geneType)) +
geom_boxplot(width=0.1, fill = NA, outlier.colour = NA, color = alpha("black", 0.7)) +
scale_color_manual(name = "Peak annotations",
values = pointColor) +
scale_alpha_manual(values = pointAlpha) +
labs(title = "macs2 p-value distribution",
y = "-log10(p-value)") +
guides(alpha = FALSE,
color = guide_legend(override.aes = list(size = 4))) +
theme_scatter
## peak width distribution
gg_dot_width <- ggplot(
data = plotData,
mapping = aes(x = !!sampleId, y = peakWidth)) +
geom_hline(yintercept = 300, color = "black", linetype = "dashed") +
ggbeeswarm::geom_quasirandom(mapping = aes(color = geneType, alpha = geneType)) +
geom_boxplot(width=0.1, fill = NA, outlier.colour = NA, color = alpha("black", 0.7)) +
scale_color_manual(name = "Peak annotations",
values = pointColor) +
scale_alpha_manual(values = pointAlpha) +
labs(title = "macs2 peak width distribution",
y = "peak width (bp)") +
guides(alpha = FALSE,
color = guide_legend(override.aes = list(size = 4))) +
theme_scatter
## peak annotation pie chart
peakAnSummary <- dplyr::group_by(peakAnno, peakAnnotation, peakCategory) %>%
dplyr::summarise(count = n()) %>%
dplyr::ungroup() %>%
dplyr::mutate(label = round(count / sum(count), digits = 4)) %>%
dplyr::mutate(
peakAnnotation = forcats::fct_relevel(
peakAnnotation,
"upstream", "promoter", "5UTR", "tx_start", "include_tx", "EXON",
"INTRON", "tx_end", "3UTR", "intergenic"
)
) %>%
dplyr::arrange(desc(peakAnnotation)) %>%
dplyr::mutate(
ypos = cumsum(count)- 0.5*count
)
peakAnnotationCol <- structure(
.Data = rainbow(n = length(levels(peakAnSummary$peakAnnotation))),
names = length(levels(peakAnSummary$peakAnnotation))
)
gg_pie_peakAn <- ggplot(
data = peakAnSummary,
mapping = aes(x = 1, y = count, fill = peakAnnotation, label = scales::percent(label))
) +
geom_bar(color = "black", stat = "identity") +
coord_polar(theta = "y", start = 0, direction = -1) +
geom_label_repel(
mapping = aes(x = 1.5, y = ypos),
hjust = 0.5, direction = "x",
size = 5, show.legend = FALSE) +
scale_fill_manual(
name = "Peak annotations",
values = peakAnnotationCol
) +
scale_x_continuous(limits = c(0, 1.75), expand = expansion(add = c(0, 0))) +
labs(title = "Peak annotation distribution") +
theme_bw() +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
# panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
plot.title=element_text(size=14, face="bold")
)
plotList <- list(
table = gg_stable,
distribution = list(enrichment = gg_dot_enrichment,
pval = gg_dot_pval,
width = gg_dot_width),
annoPie = gg_pie_peakAn
)
## combine plots + table to make a summary figure
summaryFig <- ggpubr::ggarrange(
ggpubr::ggarrange(gg_dot_enrichment, gg_dot_pval, gg_dot_width,
nrow = 1, ncol = 3, legend = "bottom", common.legend = TRUE),
ggpubr::ggarrange(gg_stable, gg_pie_peakAn,
nrow = 1, ncol = 2, legend = "right", widths = c(1, 1),
hjust = 0),
nrow = 2,
heights = c(5, 5)
) +
theme(
plot.margin = unit(c(0.2, 0.2, 0.2, 0.2), "cm")
)
summaryFig <- ggpubr::annotate_figure(
p = summaryFig,
top = ggpubr::text_grob(
label = paste(sampleId, "ChIPseq summary (#peaks =",length(peaksGr), ")"),
size = 16, face = "bold")
)
} else{
## blank figure
plotData <- NULL
summaryFig <- ggpubr::annotate_figure(
p = ggplot() + geom_text(mapping = aes(x = 0.5, y = 0.5), label = "No data", size = 30) +
theme_void(),
top = ggpubr::text_grob(
label = paste(sampleId, "ChIPseq summary\n#peaks =",length(peaksGr)),
size = 16, face = "bold")
)
plotList <- list(table = NULL,
distribution = list(enrichment = NULL, pval = NULL),
annoPie = NULL)
}
return(list(
data = plotData,
figure = summaryFig,
plots = plotList
))
}
##################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.