#' A gene length bin frequency bar plot generator
#'
#' A function that allows you to plot a bar plot of the number of gene hits for each bin of gene length
#' @param samples a list of data frames of annotated peaks
#' @param plot_dir the directory in which you want the plots, default = "plots"
#' @keywords frequencies
#' @import tidyverse gridExtra
#' @export
#' @examples
#' DSBs_per_gene_length(samples, plot_dir)
DSBs_per_gene_length <- function(samples, plot_dir = "plots"){
count <- 0
dir.create(file.path(plot_dir), showWarnings = FALSE)
samples_filtered_ordered <- lapply(samples, function(condition){
condition %>%
filter(str_detect(GenomicAnnotation, "Promoter.*|.*Exon.*|Downstream.*")) %>%
arrange(desc(PeakPvalue))
})
pl_gene_length <- lapply(samples_filtered_ordered, function(condition){
count <<- count + 1
gene_length <- condition %>%
select(c("GeneName", "GeneLength"))
very_short_gene_dsbs <- gene_length %>% filter(GeneLength <= 100)
short_gene_dsbs <- gene_length %>% filter(GeneLength <= 1000, GeneLength >= 100)
medium_gene_dsbs <- gene_length %>% filter(GeneLength <= 10000, GeneLength >= 1000)
long_gene_dsbs <- gene_length %>% filter(GeneLength <= 100000, GeneLength >= 10000)
very_long_gene_dsbs <- gene_length %>% filter(GeneLength <= 500000, GeneLength >= 100000)
huge_genes_dsbs <- gene_length %>% filter(GeneLength >= 500000)
length_window_x_number_of_genes <- data.frame("length_window" = c("1bp-100bp", "100bp-1'000bp", "1'000bp-10'000bp", "10'000bp-100'000bp", "100'000bp-500'000bp", "more_than_500'000bp"),
"number_of_genes" = c(nrow(very_short_gene_dsbs), nrow(short_gene_dsbs), nrow(medium_gene_dsbs), nrow(long_gene_dsbs), nrow(very_long_gene_dsbs), nrow(huge_genes_dsbs)))
length_window_x_number_of_genes$length_window <- factor(length_window_x_number_of_genes$length_window, levels = length_window_x_number_of_genes$length_window)
ggplot(length_window_x_number_of_genes, aes(x=length_window, y=number_of_genes)) +
geom_col(col = pal_npg()(length(samples))[count], fill = pal_npg("nrc", alpha = 0.8)(length(samples))[count]) +
ggtitle(names(samples_filtered_ordered)[count]) +
theme(axis.text = element_text( size = rel(0.4), angle = 25))
})
pdf(file.path(plot_dir, "peaks_x_length_window_plot.pdf"))
do.call(grid.arrange, c(pl_gene_length, nrow = round(length(samples_filtered_ordered)/2)))
dev.off()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.