#' Plots the coverage over a region of interest
#'
#' \code{plot_coverage}
#'
#' @param coverage_case Either bw path(s) or dataframe containing coverage
#' returned from \code{get_coverage}
#' @param coverage_ctrl Either bw path(s) or dataframe containing coverage
#' returned from \code{get_coverage}
#' @param region_to_plot gr. A genomic range of length 1 used to grab coverage (required if coverage is a bw_path) and to determine the xlimits to plot. If NULL, will use take
#' the min and the max position from the coverage from case (and ctrls).
#' @param binwidth int.
#'
#' @return tibble with the columns detailing chromosome, position 1 for every base in region_of_interest and normalised coverage
#'
#' @export
plot_coverage <- function(coverage_case, coverage_ctrl = NULL, region_to_plot = NULL, binwidth = 50){
# if you have inputted a bw, then load the coverage
# these will load the coverage based on the region_to_plot
if(!is.data.frame(coverage_case)){
coverage_case <- get_coverage(bw_paths = coverage_case,
region_of_interest = region_to_plot)
}
if(!is.null(coverage_ctrl)){
if(!is.data.frame(coverage_ctrl)){
coverage_ctrl <- get_coverage(bw_paths = coverage_ctrl,
region_of_interest = region_to_plot)
}
coverage_to_plot <-
coverage_case %>%
dplyr::mutate(case_control = "case") %>%
dplyr::bind_rows(coverage_ctrl %>% dplyr::mutate(case_control = "ctrl"))
}else{
coverage_to_plot <-
coverage_case %>%
dplyr::mutate(case_control = "case")
}
# take the start and end points from plotting from the region to plot or from coverage_to_plot
if(!is.null(region_to_plot)){
xmin <- region_to_plot %>% GenomicRanges::start() %>% min()
xmax <- region_to_plot %>% GenomicRanges::end() %>% max()
}else{
xmin <- coverage_to_plot$position %>% min()
xmax <- coverage_to_plot$position %>% max()
}
# if the chromosome has a "chr" remove this for plotting
chromosome <- coverage_to_plot$chromosome %>%
unique() %>%
stringr::str_replace("chr", "")
coverage_plot <-
ggplot() +
stat_summary_bin(data = coverage_to_plot,
aes(x = position,
y = coverage,
colour = case_control,
fill = case_control),
fun.y = mean,
geom = "area",
binwidth = binwidth, alpha = 0.2,
show.legend = ifelse(is.null(coverage_ctrl), F, T)) +
scale_x_continuous(name = stringr::str_c("Chromosome ", coverage_to_plot$chromosome %>%
unique() %>%
stringr::str_replace("chr", "")),
limits = c(xmin, xmax)) +
scale_y_continuous(name = "Coverage") +
scale_colour_manual(name = "", values = ggpubr::get_palette("jco", 2)) +
scale_fill_manual(name = "", values = ggpubr::get_palette("jco", 2)) +
ggpubr::theme_pubclean(flip = T) +
theme(axis.line = element_line(colour = "black"),
axis.ticks.x = element_blank())
return(coverage_plot)
}
#' Get coverage from bigwig file(s) across a region of interest
#'
#' \code{get_coverage}
#'
#' @param bw_paths chr. bigwig file path(s) from which to obtain coverage
#' @param region_of_interest gr. A genomic range of length 1. If NULL, will use
#' the gene start and end.
#' @param normalise lgl. if TRUE will normalise using the coverage over reduced exons of the gene.
#' @param sum_func func. Any function to summarise numeric values. Defaults to
#' mean.
#' @param gene_name chr. Gene symbol, will use to obtain region_of_interest and
#' normalise to reduced exons definitions.
#' @param ref gr. gtf annotation imported through \code{rtracklayer}.
#' @param change_chr lgl. Add "chr" to start of seqlevels to match the bw format?
#' @param ... additional arguments to pass into the \code{sum_func}
#'
#' @return tibble with the columns detailing chromosome, position (1 for every
#' base in region_of_interest) and normalised coverage
#'
#' @export
get_coverage <- function(bw_paths, region_of_interest = NULL, normalise = F, sum_func = mean, gene_name = NULL, ref = NULL, change_chr = F, ...){
# if user has not put in region, take the genic bounderies
if(is.null(region_of_interest)){
if(is.null(gene_name) | is.null(ref)){
stop("No region of interest inputted, so need both gene name and ref...")
}
all_exons <-
ref[ref$type == "exon" & ref$gene_name == gene_name]
chromosome_to_plot <- all_exons %>% GenomicRanges::seqnames() %>% as.vector() %>% unique()
min_start_coord <- GenomicRanges::start(all_exons)%>% min()
max_end_coord <- GenomicRanges::end(all_exons) %>% max()
region_of_interest <- GenomicRanges::GRanges(str_c(chromosome_to_plot, ":", min_start_coord, "-", max_end_coord))
}
# add chr to seqlevels to match bigwig format
if(change_chr){
region_of_interest <- .change_chr_format(region_of_interest)
}
if(normalise){
reduced_exons <-
ref[ref$type == "exon" & ref$gene_name == gene_name] %>%
GenomicRanges::reduce()
if(change_chr) reduced_exons <- .change_chr_format(reduced_exons)
}
# generate matrix of columns as samples, rows as positions
# containing the normalised coverage
cov_matrix <- matrix(nrow = GenomicRanges::width(region_of_interest),
ncol = length(bw_paths))
for(i in 1:length(bw_paths)){
print(stringr::str_c(Sys.time(), " - getting",
ifelse(normalise, " normalised ", " "),
"coverage for sample ", i, "/", length(bw_paths)))
bw_path <- bw_paths[i]
region_of_interest_cov <-
rtracklayer::import(con = bw_path,
which = region_of_interest,
as = "NumericList") %>%
unlist()
if(normalise){
reduced_exons_sum_cov <-
rtracklayer::import(con = bw_path,
which = reduced_exons,
as = "NumericList") %>%
unlist() %>%
sum()
cov_matrix[, i] <- region_of_interest_cov/reduced_exons_sum_cov
}else{
cov_matrix[, i] <- region_of_interest_cov
}
}
print(stringr::str_c(Sys.time(), " - summarising coverage..."))
cov_summarised <-
dplyr::tibble(chromosome = chromosome_to_plot,
position = GenomicRanges::start(region_of_interest):GenomicRanges::end(region_of_interest),
coverage = apply(cov_matrix, MARGIN = 1, FUN = sum_func, ...))
return(cov_summarised)
}
.change_chr_format <- function(x){
if(stringr::str_detect(GenomeInfoDb::seqlevels(x)[1], "chr")){
GenomeInfoDb::seqlevels(x) <- GenomeInfoDb::seqlevels(x) %>%
stringr::str_replace("chr", "")
}else{
GenomeInfoDb::seqlevels(x) <- GenomeInfoDb::seqlevels(x) %>%
stringr::str_c("chr", .)
}
return(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.