#' Diagnostic plots for the differential binding background
#'
#' To perform differential binding analysis between two conditions the
#' \code{\link{calculateBsBackground}} function groups crosslinks per gene
#' into those from binding sites and those from background regions.
#' The \code{\link{filterBsBackground}} function perfroms certain
#' filtering operations on that background to ensure that it's suitable
#' for differential testing. This function visually displays the effect
#' of these filtering operations.
#'
#'
#' @param object a \code{\link{BSFDataSet}} object with background counts filtered
#' by \code{\link{filterBsBackground}}
#' @param filter character; which filter to display in the plot (one of:
#' 'minCounts', 'balanceBackground', 'balanceCondition')
#'
#' @return a plot of type \code{\link{ggplot}}
#'
#' @seealso \code{\link{calculateBsBackground}}
#' @seealso \code{\link{filterBsBackground}}
#'
#' @import ggplot2
#'
#' @examples
#' # load clip data
#' files <- system.file("extdata", package="BindingSiteFinder")
#' load(list.files(files, pattern = ".rda$", full.names = TRUE))
#' load(list.files(files, pattern = ".rds$", full.names = TRUE)[1])
#'
#' # make testset
#' bds = makeBindingSites(bds, bsSize = 7)
#' bds = assignToGenes(bds, anno.genes = gns)
#' bds = imputeBsDifferencesForTestdata(bds)
#' bds = calculateBsBackground(bds, anno.genes = gns, use.offset = FALSE)
#'
#' # use all filters and remove binding sites that fail (default settings)
#' bds = filterBsBackground(bds)
#'
#' # display minCount filter
#' plotBsBackgroundFilter(bds, filter = "minCounts")
#'
#' # display balance background filter
#' plotBsBackgroundFilter(bds, filter = "balanceBackground")
#'
#' # display balance condition filter
#' plotBsBackgroundFilter(bds, filter = "balanceCondition")
#'
#' @export
plotBsBackgroundFilter <- function(object, filter = c("minCounts", "balanceBackground" ,"balanceCondition")) {
# bind locally used variables
s <- r <- name <- value <- ratio.ref <- ratio.comp <- NULL
# INPUT CHECKS
# --------------------------------------------------------------------------
stopifnot(is(object, "BSFDataSet"))
if (is.null(object@params$filterBsBackground)) {
msg0 = paste0("Global filter was not applied yet. Run filterBsBackground() to compute background first. \n")
stop(msg0)
}
# handle display plot options
filter = match.arg(filter, choices = c("minCounts", "balanceBackground", "balanceCondition"))
# check for plot specific data
if (is.null(object@plotData$filterBsBackground$data.minCounts) & filter == "minCounts") {
msg0 = paste0("Plot selection: '", filter, "' is not possible.")
msg1 = paste0("Make sure to run filterBsBackground with option '", filter, "' first.")
stop(c(msg0,msg1))
}
if (is.null(object@plotData$filterBsBackground$data.balanceBackground) & filter == "balanceBackground") {
msg0 = paste0("Plot selection: '", filter, "' is not possible.")
msg1 = paste0("Make sure to run filterBsBackground with option '", filter, "' first.")
stop(c(msg0,msg1))
}
if (is.null(object@plotData$filterBsBackground$data.balanceCondition) & filter == "balanceCondition") {
msg0 = paste0("Plot selection: '", filter, "' is not possible.")
msg1 = paste0("Make sure to run filterBsBackground with option '", filter, "' first.")
stop(c(msg0,msg1))
}
# MAIN
# --------------------------------------------------------------------------
# make plot for minCounts filter
if (filter == "minCounts") {
# get options used
optstr = object@params$filterBsBackground
optstrNice = paste0("Cutoff=", optstr$minCounts.cutoff)
# get data
df = object@plotData$filterBsBackground$data.minCounts %>%
mutate(r = rank(s))
p = ggplot(df, aes(x = r, y = s)) +
ggrastr::rasterise(ggpointdensity::geom_pointdensity(size = 2), dpi = 300) +
theme_bw() +
geom_hline(yintercept = optstr$minCounts.cutoff, linetype = "dashed") +
scale_y_log10() +
viridis::scale_color_viridis(option = "rocket") +
theme(legend.position = "top") +
guides(color = guide_colorbar(title.position = 'top', title.hjust = 0.5,
barwidth = unit(20, 'lines'), barheight = unit(.5, 'lines'))) +
annotate("text", x = max(df$r)/2, y = optstr$minCounts.cutoff,
label = paste0("Cutoff=", optstr$minCounts.cutoff),
vjust = -1.1) +
labs(
title = "filterBsBackground() - minCounts",
subtitle = optstrNice,
x = "Rank",
y = "#Crosslinks per gene",
color = "#Genes"
)
}
# make plot for balanceBackground filter
if (filter == "balanceBackground") {
# get options used
optstr = object@params$filterBsBackground
optstrNice = paste0("Cutoff.bs=", optstr$balanceBackground.cutoff.bs,
" cutoff.bg=", optstr$balanceBackground.cutoff.bg)
# get data
df = object@plotData$filterBsBackground$data.balanceBackground %>%
mutate(name = ifelse(name == "ratio.bg", "Background", "Binding site"))
df = df[!is.na(df$value),]
p = ggplot(df, aes(x = name, y = value)) +
ggdist::stat_halfeye(aes(fill = name), adjust = 0.5, width = .6, .width = 0, justification = -.2, point_colour = NA) +
geom_boxplot(aes(color = name), width = .12, outlier.size = 0.5) +
geom_point(data = subset(df, name == "ratio.bg" & value < optstr$balanceBackground.cutoff.bg), aes(fill = name), shape = 21, alpha = 0.3) +
geom_point(data = subset(df, name == "ratio.bs" & value > optstr$balanceBackground.cutoff.bs), aes(fill = name), shape = 21, alpha = 0.3) +
annotate("rect", xmin = 1.75, xmax = 2.25, ymin = optstr$balanceBackground.cutoff.bs, ymax = 1, alpha = .3, fill = "#435B66") +
annotate("rect", xmin = 0.75, xmax = 1.25, ymin = 0, ymax = optstr$balanceBackground.cutoff.bg, alpha = .3, fill = "#A76F6F") +
annotate("text", x = 2.25, y = 0.5, vjust = -1, label = paste0("Remove genes with bs ratio above: ", optstr$balanceBackground.cutoff.bs)) +
annotate("text", x = 1.25, y = 0.5, vjust = -1, label = paste0("Remove genes with bg ratio below: ", optstr$balanceBackground.cutoff.bg)) +
coord_flip() +
theme_bw() +
scale_fill_manual(values = c("#A76F6F", "#435B66")) +
scale_color_manual(values = c("#A76F6F", "#435B66")) +
theme(legend.position = "none") +
labs(
title = "filterBsBackground() - balanceBackground",
subtitle = optstrNice,
x = NULL,
y = "Background to binding site count ratio"
)
}
# make plot for balanceCondition filter
if (filter == "balanceCondition") {
# get options used
optstr = object@params$filterBsBackground
optstrNice = paste0("Cutoff=", optstr$balanceCondition.cutoff)
# get reference condition
this.meta = getMeta(object)
this.condition.reference = levels(this.meta$condition)[1]
this.condition.comp = levels(this.meta$condition)[2]
# get data
df = object@plotData$filterBsBackground$data.balanceCondition %>% as.data.frame()
df$ratio.ref = df[,3] / df[,2]
df$ratio.comp = df[,4] / df[,2]
df = df %>% select(ratio.ref, ratio.comp) %>% pivot_longer(everything())
p = ggplot(df, aes(x = name, y = value)) +
ggdist::stat_halfeye(aes(fill = name), adjust = 0.5, width = .6, .width = 0, justification = -.2, point_colour = NA) +
geom_boxplot(aes(color = name), width = .12, outlier.size = 0.5) +
geom_point(data = subset(df, name == "ratio.ref" & value < optstr$balanceCondition.cutoff), aes(fill = name), shape = 21, alpha = 0.3) +
geom_point(data = subset(df, name == "ratio.comp" & value < optstr$balanceCondition.cutoff), aes(fill = name), shape = 21, alpha = 0.3) +
annotate("rect", xmin = 0.05, xmax = 2.95, ymin = 0, ymax = optstr$balanceCondition.cutoff, alpha = .3, fill = "darkgrey") +
coord_flip() +
theme_bw() +
scale_fill_manual(values = c("#A76F6F", "#435B66")) +
scale_color_manual(values = c("#A76F6F", "#435B66")) +
theme(legend.position = "none") +
labs(
title = "filterBsBackground() - balanceCondition",
subtitle = optstrNice,
x = NULL,
y = "Condition count ratio"
) +
scale_x_discrete(labels = c(paste0(this.condition.comp, "\n (comparison)"),
paste0(this.condition.reference, "\n (reference)")))
}
return(p)
}
#' MA style plot
#'
#' Wrapper that plots differential binding results as MA plot. For each binding
#' site the estimated baseMean (log2) is shown on X and the fold-change (log2)
#' is shown on Y.
#'
#' @param object a \code{\link{BSFDataSet}} object with results calculated by
#' \code{\link{calculateBsFoldChange}}
#' @param what character; whether to show results for binding sites or the
#' background (one of: 'bs', 'bg')
#' @param sig.threshold numeric; what P value significance level to use
#' (default = 0.05)
#'
#' @return a plot of type \code{\link{ggplot}}
#'
#' @seealso \code{\link{calculateBsFoldChange}}
#'
#' @import ggplot2
#'
#' @examples
#' # load clip data
#' files <- system.file("extdata", package="BindingSiteFinder")
#' load(list.files(files, pattern = ".rda$", full.names = TRUE))
#' load(list.files(files, pattern = ".rds$", full.names = TRUE)[1])
#'
#' # make testset
#' bds = makeBindingSites(bds, bsSize = 7)
#' bds = assignToGenes(bds, anno.genes = gns)
#' bds = imputeBsDifferencesForTestdata(bds)
#' bds = calculateBsBackground(bds, anno.genes = gns, use.offset = FALSE)
#'
#' # use all filters and remove binding sites that fail (default settings)
#' bds = filterBsBackground(bds)
#'
#' # calculate fold-changes
#' bds = calculateBsFoldChange(bds)
#'
#' # make MA plot
#' plotBsMA(bds)
#'
#' @export
plotBsMA <- function(object,
what = c("bs", "bg"),
sig.threshold = 0.05) {
# bind locally used variables
bs.padj <- bs.log2FoldChange <- sig <- bs.baseMean <- bg.padj <- NULL
bg.log2FoldChange <- bg.baseMean <- NULL
# INPUT CHECKS
# --------------------------------------------------------------------------
stopifnot(is(object, "BSFDataSet"))
if (is.null(object@params$calculateBsFoldChange)) {
msg0 = paste0("Fold-changes were not calculated yet. Run calculateBsFoldChange() first. \n")
stop(msg0)
}
# handle display plot options
what = match.arg(what, choices = c("bs", "bg"))
this.ranges = getRanges(object)
expected.cols = c("bs.padj", "bs.log2FoldChange", "bs.baseMean",
"bg.padj", "bg.log2FoldChange", "bg.baseMean")
if (!all(expected.cols %in% colnames(as.data.frame(mcols(this.ranges))))) {
msg0 = paste0("MA plot not possible, results columns: ", paste(expected.cols, collapse = ','), " not present.\n")
msg1 = paste0("Make sure to run calculateBsFoldChange() first.\n")
stop(c(msg0, msg1))
}
# MAIN
# --------------------------------------------------------------------------
optstr = object@params$calculateBsFoldChange
optstrNice = paste0("alpha=", optstr$alpha)
bright_up_down_not = c("#999999", "#68b1a5", "#874C62")
dark_up_down_not = c("#4d4d4d", "#2b544d", "#623747")
if (what == "bs") {
df = as.data.frame(mcols(this.ranges)) %>%
mutate(sig = ifelse(bs.padj < sig.threshold & bs.log2FoldChange > 0, "Up",
ifelse(bs.padj < sig.threshold & bs.log2FoldChange < 0, "Down", "Not"))) %>%
mutate(sig = factor(sig, levels = c("Not", "Up", "Down"))) %>%
arrange(sig)
p = ggplot(df, aes(x = log2(bs.baseMean), y = bs.log2FoldChange, color = sig, fill = sig)) +
ggrastr::rasterise(geom_point(shape = 21, stroke = 0.5, size = 1.5), dpi = 300) +
geom_hline(yintercept = 0, color = "black", alpha = .5) +
theme_bw() +
scale_fill_manual(values = bright_up_down_not) +
scale_color_manual(values = dark_up_down_not) +
guides(color = guide_legend(override.aes = list(size = 4))) +
theme(legend.key.size = unit(1, 'cm'), legend.position = "top") +
labs(
title = "plotBsMA() - binding sites",
subtitle = optstrNice,
x = "Base mean",
y = "Fold-change (log2)",
color = "Regulation",
fill = "Regulation")
}
if (what == "bg") {
df = as.data.frame(mcols(this.ranges)) %>%
mutate(sig = ifelse(bg.padj < sig.threshold & bg.log2FoldChange > 0, "Up",
ifelse(bg.padj < sig.threshold & bg.log2FoldChange < 0, "Down", "Not"))) %>%
mutate(sig = factor(sig, levels = c("Not", "Up", "Down"))) %>%
arrange(sig)
p = ggplot(df, aes(x = log2(bg.baseMean), y = bg.log2FoldChange, color = sig, fill = sig)) +
ggrastr::rasterise(geom_point(shape = 21, stroke = 0.5, size = 1.5), dpi = 300) +
geom_hline(yintercept = 0, color = "black", alpha = .5) +
theme_bw() +
scale_fill_manual(values = bright_up_down_not) +
scale_color_manual(values = dark_up_down_not) +
guides(color = guide_legend(override.aes = list(size = 4))) +
theme(legend.key.size = unit(1, 'cm'), legend.position = "top") +
labs(
title = "plotBsMA() - background",
subtitle = optstrNice,
x = "Base mean",
y = "Fold-change (log2)",
color = "Regulation",
fill = "Regulation")
}
return(p)
}
#' Volcano style plot
#'
#' Wrapper that plots differential binding results as volcano plot. For each binding
#' site the estimated fold-change (log2) is shown on X and the adjusted
#' P value (-log10) is shown on Y.
#'
#' @param object a \code{\link{BSFDataSet}} object with results calculated by
#' \code{\link{calculateBsFoldChange}}
#' @param what character; whether to show results for binding sites or the
#' background (one of: 'bs', 'bg')
#' @param sig.threshold numeric; what P value significance level to use
#' (default = 0.05)
#'
#' @return a plot of type \code{\link{ggplot}}
#'
#' @seealso \code{\link{calculateBsFoldChange}}
#'
#' @import ggplot2
#'
#' @examples
#' # load clip data
#' files <- system.file("extdata", package="BindingSiteFinder")
#' load(list.files(files, pattern = ".rda$", full.names = TRUE))
#' load(list.files(files, pattern = ".rds$", full.names = TRUE)[1])
#'
#' # make testset
#' bds = makeBindingSites(bds, bsSize = 7)
#' bds = assignToGenes(bds, anno.genes = gns)
#' bds = imputeBsDifferencesForTestdata(bds)
#' bds = calculateBsBackground(bds, anno.genes = gns, use.offset = FALSE)
#'
#' # use all filters and remove binding sites that fail (default settings)
#' bds = filterBsBackground(bds)
#'
#' # calculate fold-changes
#' bds = calculateBsFoldChange(bds)
#'
#' # make volcano plot
#' plotBsVolcano(bds)
#'
#' @export
plotBsVolcano <- function(object,
what = c("bs", "bg"),
sig.threshold = 0.05) {
# bind locally used variables
bs.padj <- bs.log2FoldChange <- sig <- bg.padj <- bg.log2FoldChange <- NULL
# INPUT CHECKS
# --------------------------------------------------------------------------
stopifnot(is(object, "BSFDataSet"))
if (is.null(object@params$calculateBsFoldChange)) {
msg0 = paste0("Fold-changes were not calculated yet. Run calculateBsFoldChange() first. \n")
stop(msg0)
}
# handle display plot options
what = match.arg(what, choices = c("bs", "bg"))
this.ranges = getRanges(object)
expected.cols = c("bs.padj", "bs.log2FoldChange", "bs.baseMean",
"bg.padj", "bg.log2FoldChange", "bg.baseMean")
if (!all(expected.cols %in% colnames(as.data.frame(mcols(this.ranges))))) {
msg0 = paste0("MA plot not possible, results columns: ", paste(expected.cols, collapse = ','), " not present.\n")
msg1 = paste0("Make sure to run calculateBsFoldChange() first.\n")
stop(c(msg0, msg1))
}
# MAIN
# --------------------------------------------------------------------------
optstr = object@params$calculateBsFoldChange
optstrNice = paste0("alpha=", optstr$alpha)
bright_up_down_not = c("#999999", "#68b1a5", "#874C62")
dark_up_down_not = c("#4d4d4d", "#2b544d", "#623747")
if (what == "bs") {
df = as.data.frame(mcols(this.ranges)) %>%
mutate(sig = ifelse(bs.padj < sig.threshold & bs.log2FoldChange > 0, "Up",
ifelse(bs.padj < sig.threshold & bs.log2FoldChange < 0, "Down", "Not"))) %>%
mutate(sig = factor(sig, levels = c("Not", "Up", "Down"))) %>%
arrange(sig)
p = ggplot(df, aes(x = bs.log2FoldChange, y = -log10(bs.padj), color = sig, fill = sig)) +
ggrastr::rasterise(geom_point(shape = 21, stroke = 0.5, size = 1.5), dpi = 300) +
geom_vline(xintercept = 0, color = "black", alpha = .5) +
theme_bw() +
scale_fill_manual(values = bright_up_down_not) +
scale_color_manual(values = dark_up_down_not) +
guides(color = guide_legend(override.aes = list(size = 4))) +
theme(legend.key.size = unit(1, 'cm'), legend.position = "top") +
labs(
title = "plotBsVolcano() - binding sites",
subtitle = optstrNice,
x = "Fold-change (log2)",
y = "Adjusted P value (-log10)",
color = "Regulation",
fill = "Regulation")
}
if (what == "bg") {
df = as.data.frame(mcols(this.ranges)) %>%
mutate(sig = ifelse(bg.padj < sig.threshold & bg.log2FoldChange > 0, "Up",
ifelse(bg.padj < sig.threshold & bg.log2FoldChange < 0, "Down", "Not"))) %>%
mutate(sig = factor(sig, levels = c("Not", "Up", "Down"))) %>%
arrange(sig)
p = ggplot(df, aes(x = bg.log2FoldChange, y = -log10(bg.padj), color = sig, fill = sig)) +
ggrastr::rasterise(geom_point(shape = 21, stroke = 0.5, size = 1.5), dpi = 300) +
geom_vline(xintercept = 0, color = "black", alpha = .5) +
theme_bw() +
scale_fill_manual(values = bright_up_down_not) +
scale_color_manual(values = dark_up_down_not) +
guides(color = guide_legend(override.aes = list(size = 4))) +
theme(legend.key.size = unit(1, 'cm'), legend.position = "top") +
labs(
title = "plotBsVolcano() - background",
subtitle = optstrNice,
x = "Fold-change (log2)",
y = "Adjusted P value (-log10)",
color = "Regulation",
fill = "Regulation")
}
return(p)
}
#' Gene Regulation Plot
#'
#' Display the fold-change of all binding sites from a given gene on a relative
#' per-nucleotide scale. Binding sites are displayed as dots and with increasing
#' log2 fold-change, they deviate stronger from the center line.
#'
#' For this function to work, binding sites must be assigned to hosting genes
#' using \code{\link{assignToGenes}}. It is also recommended to assing
#' binding sites to transcript regions with \code{\link{assignToTranscriptRegions}}.
#'
#' It is also necessary to calculate the log2 fold-change of binding sites between
#' two conditions using the differential binding workflow \code{\link{calculateBsFoldChange}}.
#'
#' If in addition the transcript regions of the binding sites are given, then
#' shapes are changed accordingly. An edge case can arise from the merging of two
#' \code{\link{BSFDataSet}} objects. If binding sites are overlapping and
#' slightly offset close to the end of a particular transcript region annotation,
#' they might be assigned to different regions in both objects. This results in
#' some ambiguity after the merge, where for instance a binding site can be
#' assigned to CDS and 3'UTR. To handle how such edge cases are displayed, the
#' \code{transcript.regions.outlier.handle} exists. As default, simply the
#' region of the object that was merged first is shown. If one is interested in
#' showing all regions, then the options \code{both} displays both annotations
#' at the same time and labels them accordingly.
#'
#' @param object object; a \code{\link{BSFDataSet}} object
#' @param plot.geneID character; the gene id of the gene to display. The id must
#' match with the gene ids given in the annotation object.
#' @param anno.annoDB object; an object of class \code{OrganismDbi} that contains
#' the gene annotation (!!! Experimental !!!).
#' @param anno.genes object; an object of class \code{\link{GenomicRanges}}
#' that represents the gene ranges directly.
#' @param match.geneID character; meta column name of the gene ID
#' @param match.geneName character; meta column name of the gene name
#' @param plot.gene.n.tiles numeric; number of tiles the gene should be split in
#' @param alpha numeric; the alpha value to show significantly regulated
#' binding sites. This should match the alpha value used in \code{\link{calculateBsFoldChange}}.
#' @param lfc.cutoff numeric; log2 fold-change cutoff to show significantly
#' regulated binding sites. This should match the lfc.cutoff value used in
#' \code{\link{calculateBsFoldChange}}.
#' @param transcript.regions.outlier.handle character; the option how to handle
#' multiple transcript region annotations being present for the same binding
#' site.
#' @param quiet logical; whether to print messages
#'
#' @return an object of class \code{ggplot2}
#'
#' @seealso \code{\link{BSFind}},
#' \code{\link{calculateBsFoldChange}}
#' \code{\link{assignToGenes}}
#' \code{\link{assignToTranscriptRegions}}
#'
#' @import ggplot2
#'
#' @examples
#'
#' # load clip data
#' files <- system.file("extdata", package="BindingSiteFinder")
#' load(list.files(files, pattern = ".rda$", full.names = TRUE))
#' load(list.files(files, pattern = ".rds$", full.names = TRUE)[1])
#' load(list.files(files, pattern = ".rds$", full.names = TRUE)[2])
#'
#' # make testset
#' bds = makeBindingSites(bds, bsSize = 7)
#' bds = assignToGenes(bds, anno.genes = gns)
#' bds = assignToTranscriptRegions(object = bds, anno.transcriptRegionList = regions)
#' bds = imputeBsDifferencesForTestdata(bds)
#' bds = calculateBsBackground(bds, anno.genes = gns, use.offset = FALSE)
#'
#' # use all filters and remove binding sites that fail (default settings)
#' bds = filterBsBackground(bds)
#'
#' # calculate fold-changes
#' bds = calculateBsFoldChange(bds)
#'
#' # make example plot
#' exampleGeneId = "ENSG00000253352.10"
#' geneRegulationPlot(bds, plot.geneID = exampleGeneId, anno.genes = gns)
#'
#' @export
geneRegulationPlot <- function(object,
plot.geneID = NULL,
anno.annoDB = NULL,
anno.genes = NULL,
match.geneID = "gene_id",
match.geneName = "gene_name",
plot.gene.n.tiles = 100,
alpha = 0.05,
lfc.cutoff = 2,
transcript.regions.outlier.handle = c("first", "second", "both", "remove"),
quiet = FALSE) {
# bind locally used variables
bs.padj <- bs.log2FoldChange <- sig <- sigDir <- plot.position <- transcriptRegion <- NULL
# INPUT CHECKS
# --------------------------------------------------------------------------
# type checks
stopifnot(is(object, "BSFDataSet"))
stopifnot(is.logical(quiet))
# check presence of fold-changes -> must be present
if (is.null(object@params$calculateBsFoldChange)) {
msg0 = paste0("Fold-changes were not calculated yet. Run calculateBsFoldChange() first. \n")
stop(msg0)
}
# check presence of gene assignment -> must be present
if (is.null(getRanges(object)$geneID)) {
msg0 = paste0("Gene assignment was not calculated. Run assignToGenes() first. \n")
stop(msg0)
# TODO ideally this should test for the param present in object@params$assignToGenes,
# but this gets overwritten by the mergeing of two bds objects. Change this in the future.
}
# check presence of transcript regions -> can be missing
if (is.null(getRanges(object)$transcriptRegion)) {
msg0 = paste0("Transcript region assignment was not calculated. Run assignToTranscriptRegions() first to link binding sites to transcript regions. \n")
warning(msg0)
# TODO ideally this should test for the param present in object@params$assignToTranscriptRegions,
# but this gets overwritten by the mergeing of two bds objects. Change this in the future.
# TODO make a version of the plot that does not encode the transcript regions as shape information
}
# check presence of correct annotation resource -> one of both must be present
# Check if none is specified
if (is.null(anno.annoDB) & is.null(anno.genes)) {
msg = paste0("None of the required annotation sources anno.annoDB or anno.genes was specified. ")
stop(msg)
}
# Check if both are specified
if (!is.null(anno.annoDB) & !is.null(anno.genes)) {
msg = paste0("Both of the required annotation sources anno.annoDB or anno.genes are specified. Please provide only one of the two. ")
stop(msg)
}
# Checks if anno.annoDB should be used
if (!is.null(anno.annoDB) & is.null(anno.genes)) {
stopifnot(is(anno.annoDB, "OrganismDb"))
if (!is.null(anno.genes)) {
msg = paste0("Parameter anno.annoDB and anno.genes are specified at the same time. Use only one of them.")
stop(msg)
} else {
datasource = "anno.annoDB"
# extract relevant annotation
anno.genes = genes(anno.annoDB, columns=c("ENSEMBL", "SYMBOL", "GENEID"))
# Create matching vectors for columns from input annotation
# --------------------------------------------------------------------------
selectID = as.character(anno.genes$GENEID)
selectName = as.character(anno.genes$SYMBOL)
}
}
# Checks if anno.genes should be used
if (is.null(anno.annoDB) & !is.null(anno.genes)) {
stopifnot(is(anno.genes, "GenomicRanges"))
if (!is.null(anno.annoDB)) {
msg = paste0("Parameter anno.annoDB and anno.genes are specified at the same time. Use only one of them.")
stop(msg)
} else {
datasource = "anno.genes"
# extract relevant annotation
# check for duplicated annotation columns
if (sum(colnames(mcols(anno.genes)) == match.geneID) > 1) {
msg = paste0("The names of multiple columns of the annotation match the match.geneID parameter. Please use a unique column name for match.geneID in anno.genes.")
stop(msg)
}
if (sum(colnames(mcols(anno.genes)) == match.geneName) > 1) {
msg = paste0("The names of multiple columns of the annotation match the match.geneName parameter. Please use a unique column name for match.geneName in anno.genes.")
stop(msg)
}
# check correct annotation columns
inNames = c(match.geneID, match.geneName)
annoColNames = colnames(mcols(anno.genes))
presentNames = inNames[inNames %in% annoColNames]
# extract columns from annotation based on available names
present.cols = lapply(presentNames, function(x){
match.col = mcols(anno.genes)[match(x, colnames(mcols(anno.genes)))][[1]]
return(match.col)
})
names(present.cols) = presentNames
# extract values from annotation for all present columns
if (match.geneID %in% names(present.cols)) {
selectID = present.cols[[match(match.geneID, names(present.cols))]]
} else {
msg = paste0("No meta column for ", match.geneID, " present. Creating custom ID.\n")
if (!quiet) message(msg)
selectID = paste0("CUSTOM", seq_along(anno.genes))
}
if (match.geneName %in% names(present.cols)) {
selectName = present.cols[[match(match.geneName, names(present.cols))]]
} else {
msg = paste0("No meta column for ", match.geneName, " present.\n")
if (!quiet) message(msg)
}
}
}
# check if gene to plot is in bds object -> must be present
if (!(plot.geneID %in% getRanges(object)$geneID)) {
msg0 = stop(paste0("Selected geneID: ", plot.geneID, " is not present in the meta columns of the given bds object. \n"))
stop(msg0)
}
# check if gene to plot is in annotation object -> must be present
if (!(plot.geneID %in% selectID)) {
msg0 = stop(paste0("Selected geneID: ", plot.geneID, " is not present in the meta columns of the given gene annotation. \n"))
stop(msg0)
}
transcript.regions.outlier.handle = match.arg(transcript.regions.outlier.handle, choices = c("first", "second", "both", "remove"))
# --------------------------------------------------------------------------
# MAIN
# --------------------------------------------------------------------------
# get ranges of binding sites for selected gene
plot.bs = getRanges(object)
plot.bs = plot.bs[plot.bs$geneID == plot.geneID]
plot.bs = sort(plot.bs)
plot.bs = resize(plot.bs, width = 1, fix = "center")
# get gene ranges for selected gene
plot.gene = anno.genes[anno.genes$gene_id == plot.geneID]
plot.gene.name = plot.gene$gene_name
# claculate relative position of binding sites in gene
plot.gene.tiles = unlist(tile(plot.gene, n = plot.gene.n.tiles))
plot.gene.tiles$position = 1:length(plot.gene.tiles)
ols = findOverlaps(plot.bs, plot.gene.tiles)
gene.strand = unique(as.character(strand(plot.gene)))
if(gene.strand == "-") {
position = plot.gene.n.tiles-(subjectHits(ols))
} else {
position = subjectHits(ols)
}
plot.bs$plot.position = position
# --------------------------------------------------------------------------
# PLOTS
# --------------------------------------------------------------------------
# make plots without transcript region annotation
if (is.null(getRanges(object)$transcriptRegion)) {
df.plot = mcols(plot.bs) %>% as.data.frame() %>%
mutate(sig = (ifelse(bs.padj < alpha & abs(bs.log2FoldChange) > log2(lfc.cutoff), TRUE, FALSE))) %>%
mutate(dir = factor(ifelse(bs.log2FoldChange > 0, "Up", "Down"), levels = c("Up", "Down"))) %>%
mutate(sigDir = ifelse(sig == TRUE & dir == "Up", "Up", ifelse(sig == TRUE & dir == "Down", "Down", "Not"))) %>%
mutate(sigDir = factor(sigDir, levels = c("Not", "Up", "Down")))
p = ggplot(df.plot, aes(x = plot.position, y = bs.log2FoldChange, color = sigDir, fill = sigDir)) +
geom_hline(yintercept = 0, color = "#4d4d4d") +
geom_segment(aes(x=plot.position, xend=plot.position, y=0, yend=bs.log2FoldChange), size = 1, color = "#4d4d4d") +
geom_point(size = 4, stroke = 1, shape = 21) +
scale_fill_manual(values = c("Not" = "#999999", "Up" = "#68b1a5", "Down" = "#874C62")) +
scale_color_manual(values = c("Not" = "#4d4d4d", "Up" = "#2b544d", "Down" = "#623747")) +
theme_minimal() +
guides(color = guide_legend(override.aes = list(size = 4))) +
theme(legend.key.size = unit(1, 'cm'), legend.position = "top") +
ylim(-max(abs(df.plot$bs.log2FoldChange)), max(abs(df.plot$bs.log2FoldChange))) +
xlim(0,plot.gene.n.tiles) +
labs(
title = paste0(plot.gene.name, " (", plot.geneID, ")"),
x = "Relative position on the gene",
y = "Fold-change (log2)",
fill = "Regulation",
color = "Regulation"
)
return(p)
}
# make plots with transcript region annotation
if (!is.null(getRanges(object)$transcriptRegion)) {
expected.regions = c("INTRON", "CDS", "UTR3", "UTR5", "OTHER")
all.regions.present = unique(plot.bs$transcriptRegion)
# plot cases where no region duplication is present
if (all(all.regions.present %in% expected.regions)) {
df.plot = mcols(plot.bs) %>% as.data.frame() %>%
mutate(sig = (ifelse(bs.padj < alpha & abs(bs.log2FoldChange) > log2(lfc.cutoff), TRUE, FALSE))) %>%
mutate(dir = factor(ifelse(bs.log2FoldChange > 0, "Up", "Down"), levels = c("Up", "Down"))) %>%
mutate(sigDir = ifelse(sig == TRUE & dir == "Up", "Up", ifelse(sig == TRUE & dir == "Down", "Down", "Not"))) %>%
mutate(sigDir = factor(sigDir, levels = c("Not", "Up", "Down"))) %>%
mutate(duplicated = "no")
p = ggplot(df.plot, aes(x = plot.position, y = bs.log2FoldChange, color = sigDir, fill = sigDir, shape = transcriptRegion)) +
geom_hline(yintercept = 0, color = "#4d4d4d") +
geom_segment(aes(x=plot.position, xend=plot.position, y=0, yend=bs.log2FoldChange), size = 1, color = "#4d4d4d") +
geom_point(size = 4, stroke = 1) +
scale_shape_manual(values=c("INTRON" = 23, "CDS" = 22, "UTR3" = 25, "UTR5" = 24, "OTHER" = 21)) +
scale_fill_manual(values = c("Not" = "#999999", "Up" = "#68b1a5", "Down" = "#874C62")) +
scale_color_manual(values = c("Not" = "#4d4d4d", "Up" = "#2b544d", "Down" = "#623747")) +
theme_minimal() +
guides(color = guide_legend(override.aes = list(size = 4))) +
theme(legend.key.size = unit(1, 'cm'), legend.position = "top") +
ylim(-max(abs(df.plot$bs.log2FoldChange)), max(abs(df.plot$bs.log2FoldChange))) +
xlim(0,plot.gene.n.tiles) +
labs(
title = paste0(plot.gene.name, " (", plot.geneID, ")"),
x = "Relative position on the gene",
y = "Fold-change (log2)",
fill = "Regulation",
color = "Regulation",
shape = "Transcript Region"
)
return(p)
}
# plot cases where region duplication is present
if (any(all.regions.present %in% expected.regions)) {
msg0 = paste0("At least one additional transcript region type detected compared to the currently supported expected regions: ",
paste(expected.regions, collapse = ", "),
". Using transcript.regions.outlier.handle=", transcript.regions.outlier.handle," to solve. \n")
if (!quiet) {
warning(msg0)
}
if (transcript.regions.outlier.handle == "remove") {
df.plot = mcols(plot.bs) %>% as.data.frame() %>%
filter(transcriptRegion %in% expected.regions) %>%
mutate(sig = (ifelse(bs.padj < alpha & abs(bs.log2FoldChange) > log2(lfc.cutoff), TRUE, FALSE))) %>%
mutate(dir = factor(ifelse(bs.log2FoldChange > 0, "Up", "Down"), levels = c("Up", "Down"))) %>%
mutate(sigDir = ifelse(sig == TRUE & dir == "Up", "Up", ifelse(sig == TRUE & dir == "Down", "Down", "Not"))) %>%
mutate(sigDir = factor(sigDir, levels = c("Not", "Up", "Down"))) %>%
mutate(duplicated = "no")
p = ggplot(df.plot, aes(x = plot.position, y = bs.log2FoldChange, color = sigDir, fill = sigDir, shape = transcriptRegion)) +
geom_hline(yintercept = 0, color = "#4d4d4d") +
geom_segment(aes(x=plot.position, xend=plot.position, y=0, yend=bs.log2FoldChange), size = 1, color = "#4d4d4d") +
geom_point(size = 4, stroke = 1) +
scale_shape_manual(values=c("INTRON" = 23, "CDS" = 22, "UTR3" = 25, "UTR5" = 24, "OTHER" = 21)) +
scale_fill_manual(values = c("Not" = "#999999", "Up" = "#68b1a5", "Down" = "#874C62")) +
scale_color_manual(values = c("Not" = "#4d4d4d", "Up" = "#2b544d", "Down" = "#623747")) +
theme_minimal() +
guides(color = guide_legend(override.aes = list(size = 4))) +
theme(legend.key.size = unit(1, 'cm'), legend.position = "top") +
ylim(-max(abs(df.plot$bs.log2FoldChange)), max(abs(df.plot$bs.log2FoldChange))) +
xlim(0,plot.gene.n.tiles) +
labs(
title = paste0(plot.gene.name, " (", plot.geneID, ")"),
x = "Relative position on the gene",
y = "Fold-change (log2)",
fill = "Regulation",
color = "Regulation",
shape = "Transcript Region",
caption = paste0("Not all binding sites might be shown due to: transcript.regions.outlier.handle=", transcript.regions.outlier.handle)
)
return(p)
}
if (transcript.regions.outlier.handle == "first") {
df.duplicated = mcols(plot.bs) %>% as.data.frame() %>% filter(grepl(",", transcriptRegion)) %>% mutate(duplicated = "yes") %>%
mutate(transcriptRegion = sapply(strsplit(transcriptRegion,","), `[`, 1))
df.single = mcols(plot.bs) %>% as.data.frame() %>% filter(!grepl(",", transcriptRegion)) %>% mutate(duplicated = "no")
df.plot = rbind.data.frame(df.duplicated, df.single) %>%
mutate(sig = (ifelse(bs.padj < alpha & abs(bs.log2FoldChange) > log2(lfc.cutoff), TRUE, FALSE))) %>%
mutate(dir = factor(ifelse(bs.log2FoldChange > 0, "Up", "Down"), levels = c("Up", "Down"))) %>%
mutate(sigDir = ifelse(sig == TRUE & dir == "Up", "Up", ifelse(sig == TRUE & dir == "Down", "Down", "Not"))) %>%
mutate(sigDir = factor(sigDir, levels = c("Not", "Up", "Down"))) %>%
mutate(transcriptRegion = stringr::str_trim(transcriptRegion, "left")) %>%
mutate(transcriptRegion = stringr::str_trim(transcriptRegion, "right"))
p = ggplot(df.plot, aes(x = plot.position, y = bs.log2FoldChange, color = sigDir, fill = sigDir, shape = transcriptRegion)) +
geom_hline(yintercept = 0, color = "#4d4d4d") +
geom_segment(aes(x=plot.position, xend=plot.position, y=0, yend=bs.log2FoldChange), size = 1, color = "#4d4d4d") +
geom_point(size = 4, stroke = 1) +
scale_shape_manual(values=c("INTRON" = 23, "CDS" = 22, "UTR3" = 25, "UTR5" = 24, "OTHER" = 21)) +
scale_fill_manual(values = c("Not" = "#999999", "Up" = "#68b1a5", "Down" = "#874C62")) +
scale_color_manual(values = c("Not" = "#4d4d4d", "Up" = "#2b544d", "Down" = "#623747")) +
theme_minimal() +
guides(color = guide_legend(override.aes = list(size = 4))) +
theme(legend.key.size = unit(1, 'cm'), legend.position = "top") +
ylim(-max(abs(df.plot$bs.log2FoldChange)), max(abs(df.plot$bs.log2FoldChange))) +
xlim(0,plot.gene.n.tiles) +
labs(
title = paste0(plot.gene.name, " (", plot.geneID, ")"),
x = "Relative position on the gene",
y = "Fold-change (log2)",
fill = "Regulation",
color = "Regulation",
shape = "Transcript Region",
caption = paste0("Not all binding sites might be shown due to: transcript.regions.outlier.handle=", transcript.regions.outlier.handle)
)
return(p)
}
if (transcript.regions.outlier.handle == "second") {
df.duplicated = mcols(plot.bs) %>% as.data.frame() %>% filter(grepl(",", transcriptRegion)) %>% mutate(duplicated = "yes") %>%
mutate(transcriptRegion = sapply(strsplit(transcriptRegion,","), `[`, 2))
df.single = mcols(plot.bs) %>% as.data.frame() %>% filter(!grepl(",", transcriptRegion)) %>% mutate(duplicated = "no")
df.plot = rbind.data.frame(df.duplicated, df.single) %>%
mutate(sig = (ifelse(bs.padj < alpha & abs(bs.log2FoldChange) > log2(lfc.cutoff), TRUE, FALSE))) %>%
mutate(dir = factor(ifelse(bs.log2FoldChange > 0, "Up", "Down"), levels = c("Up", "Down"))) %>%
mutate(sigDir = ifelse(sig == TRUE & dir == "Up", "Up", ifelse(sig == TRUE & dir == "Down", "Down", "Not"))) %>%
mutate(sigDir = factor(sigDir, levels = c("Not", "Up", "Down"))) %>%
mutate(transcriptRegion = stringr::str_trim(transcriptRegion, "left")) %>%
mutate(transcriptRegion = stringr::str_trim(transcriptRegion, "right"))
p = ggplot(df.plot, aes(x = plot.position, y = bs.log2FoldChange, color = sigDir, fill = sigDir, shape = transcriptRegion)) +
geom_hline(yintercept = 0, color = "#4d4d4d") +
geom_segment(aes(x=plot.position, xend=plot.position, y=0, yend=bs.log2FoldChange), size = 1, color = "#4d4d4d") +
geom_point(size = 4, stroke = 1) +
scale_shape_manual(values=c("INTRON" = 23, "CDS" = 22, "UTR3" = 25, "UTR5" = 24, "OTHER" = 21)) +
scale_fill_manual(values = c("Not" = "#999999", "Up" = "#68b1a5", "Down" = "#874C62")) +
scale_color_manual(values = c("Not" = "#4d4d4d", "Up" = "#2b544d", "Down" = "#623747")) +
theme_minimal() +
guides(color = guide_legend(override.aes = list(size = 4))) +
theme(legend.key.size = unit(1, 'cm'), legend.position = "top") +
ylim(-max(abs(df.plot$bs.log2FoldChange)), max(abs(df.plot$bs.log2FoldChange))) +
xlim(0,plot.gene.n.tiles) +
labs(
title = paste0(plot.gene.name, " (", plot.geneID, ")"),
x = "Relative position on the gene",
y = "Fold-change (log2)",
fill = "Regulation",
color = "Regulation",
shape = "Transcript Region",
caption = paste0("Not all binding sites might be shown due to: transcript.regions.outlier.handle=", transcript.regions.outlier.handle)
)
return(p)
}
if (transcript.regions.outlier.handle == "both") {
df.duplicated = mcols(plot.bs) %>% as.data.frame() %>% filter(grepl(",", transcriptRegion)) %>% mutate(duplicated = "yes") %>%
separate_longer_delim(transcriptRegion, delim = ",")
df.single = mcols(plot.bs) %>% as.data.frame() %>% filter(!grepl(",", transcriptRegion)) %>% mutate(duplicated = "no")
df.plot = rbind.data.frame(df.duplicated, df.single) %>%
mutate(sig = (ifelse(bs.padj < alpha & abs(bs.log2FoldChange) > log2(lfc.cutoff), TRUE, FALSE))) %>%
mutate(dir = factor(ifelse(bs.log2FoldChange > 0, "Up", "Down"), levels = c("Up", "Down"))) %>%
mutate(sigDir = ifelse(sig == TRUE & dir == "Up", "Up", ifelse(sig == TRUE & dir == "Down", "Down", "Not"))) %>%
mutate(sigDir = factor(sigDir, levels = c("Not", "Up", "Down"))) %>%
mutate(transcriptRegion = stringr::str_trim(transcriptRegion, "left")) %>%
mutate(transcriptRegion = stringr::str_trim(transcriptRegion, "right"))
p = ggplot(df.plot, aes(x = plot.position, y = bs.log2FoldChange, color = sigDir, fill = sigDir, shape = transcriptRegion)) +
geom_hline(yintercept = 0, color = "#4d4d4d") +
geom_segment(aes(x=plot.position, xend=plot.position, y=0, yend=bs.log2FoldChange), size = 1, color = "#4d4d4d") +
geom_point(size = 4, stroke = 1) +
ggrepel::geom_label_repel(data = df.plot %>% filter(duplicated == "yes"), aes(label = transcriptRegion)) +
scale_shape_manual(values=c("INTRON" = 23, "CDS" = 22, "UTR3" = 25, "UTR5" = 24, "OTHER" = 21)) +
scale_fill_manual(values = c("Not" = "#999999", "Up" = "#68b1a5", "Down" = "#874C62")) +
scale_color_manual(values = c("Not" = "#4d4d4d", "Up" = "#2b544d", "Down" = "#623747")) +
theme_minimal() +
guides(color = guide_legend(override.aes = list(size = 4))) +
theme(legend.key.size = unit(1, 'cm'), legend.position = "top") +
ylim(-max(abs(df.plot$bs.log2FoldChange)), max(abs(df.plot$bs.log2FoldChange))) +
xlim(0,plot.gene.n.tiles) +
labs(
title = paste0(plot.gene.name, " (", plot.geneID, ")"),
x = "Relative position on the gene",
y = "Fold-change (log2)",
fill = "Regulation",
color = "Regulation",
shape = "Transcript Region",
size = "Duplicated Region",
caption = paste0("Some binding sites might not be visible due to overplotting: transcript.regions.outlier.handle=", transcript.regions.outlier.handle)
)
return(p)
}
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.