#' @title calcTracks
#'
#' @description Prepare ChIP, repeat, and gene annotation data for plotting tracks
#' for a given region of interest (ROI).
#'
#' @details calcTracks is used to prepare ChIP, repeat, and gene annotation data for plotting tracks
#' for a given region of interest (ROI).
#'
#' @param bigwigFiles Character vector containing the filenames (including the full path) of read alignment files in bigwig format.
#' @param bigwigNames Character vector containing the names to describe the \code{bigwigFiles} you are using (for example: "H3K9me3_replicate1").
#' @param bigwigGroupNames Character vector containing the group names to describe the \code{bigwigFiles} you are using (for example: "H3K9me3").
#' @param ROI A GRanges object containing your single region of interest that should be plotted. Must include seqnames (chromosomes), start, end.
#' @param CoverageSmoothing Logical scalar, indicating whether the coverage tracks should be smoothed. Default: TRUE.
#' @param smoothing_window Numeric scalar providing the window size for smoothing. Default: 20.
#' @param reps A GRanges object containing all repeats in the genome, or at least the ones overlapping the \code{ROI}.
#' @param genes A GRanges object containing all genes in the genome, or at least the ones overlapping the \code{ROI}.
#' @param exons A GRanges object containing all exons in the genome, or at least the ones overlapping the \code{ROI}.
#'
#' @return A list of data frames containing all necessary information to plot
#' the screenshots using the \code{plotTracks} function.
#'
#' @examples
#' #see vignette
#'
#' @importFrom rtracklayer import.bw
#' @importFrom dplyr group_by mutate ungroup summarise
#' @importFrom zoo rollmean
#' @importFrom GenomicRanges GRanges start end strand seqnames promoters terminators start<- end<-
#' @importFrom IRanges IRanges subsetByOverlaps
#' @importFrom methods is
#'
#' @export
calcTracks <- function(bigwigFiles, bigwigNames, bigwigGroupNames, ROI, CoverageSmoothing=TRUE, smoothing_window=20,
reps, genes, exons){
# Input validation
# Check if required packages are installed
required_packages <- c("rtracklayer", "GenomicRanges", "IRanges", "dplyr", "zoo")
missing_packages <- required_packages[!sapply(required_packages, function(p) requireNamespace(p, quietly = TRUE))]
if (length(missing_packages) > 0) {
stop(paste("Missing required packages:", paste(missing_packages, collapse=", "),
"\nPlease install them using BiocManager::install() or install.packages()"))
}
# Check bigwigFiles parameter
if (missing(bigwigFiles)) {
stop("Error: 'bigwigFiles' argument is missing. Please provide paths to your bigwig files.")
}
if (!is.character(bigwigFiles)) {
stop("Error: 'bigwigFiles' must be a character vector containing file paths.")
}
# Check file existence
non_existent_files <- bigwigFiles[!file.exists(bigwigFiles)]
if (length(non_existent_files) > 0) {
stop(paste("Error: The following bigwig files do not exist:",
paste(non_existent_files, collapse=", ")))
}
# Check bigwigNames parameter
if (missing(bigwigNames)) {
stop("Error: 'bigwigNames' argument is missing. Please provide names for your bigwig files.")
}
if (!is.character(bigwigNames)) {
stop("Error: 'bigwigNames' must be a character vector.")
}
if (length(bigwigNames) != length(bigwigFiles)) {
stop(paste("Error: The length of 'bigwigNames' (", length(bigwigNames),
") does not match the length of 'bigwigFiles' (", length(bigwigFiles), ")."))
}
# Check bigwigGroupNames parameter
if (missing(bigwigGroupNames)) {
stop("Error: 'bigwigGroupNames' argument is missing. Please provide group names for your bigwig files.")
}
if (!is.character(bigwigGroupNames)) {
stop("Error: 'bigwigGroupNames' must be a character vector.")
}
if (length(bigwigGroupNames) != length(bigwigFiles)) {
stop(paste("Error: The length of 'bigwigGroupNames' (", length(bigwigGroupNames),
") does not match the length of 'bigwigFiles' (", length(bigwigFiles), ")."))
}
# Check ROI parameter
if (missing(ROI)) {
stop("Error: 'ROI' argument is missing. Please provide a GRanges object with your region of interest.")
}
if (!methods::is(ROI, "GRanges")) {
stop("Error: 'ROI' must be a GRanges object.")
}
if (length(ROI) != 1) {
stop(paste("Error: 'ROI' must contain exactly one region, but contains", length(ROI), "regions."))
}
if (any(is.na(GenomicRanges::start(ROI))) || any(is.na(GenomicRanges::end(ROI)))) {
stop("Error: 'ROI' contains NA values in start or end positions.")
}
# Check CoverageSmoothing parameter
if (!is.logical(CoverageSmoothing)) {
stop("Error: 'CoverageSmoothing' must be a logical value (TRUE or FALSE).")
}
# Check smoothing_window parameter
if (!is.numeric(smoothing_window) || length(smoothing_window) != 1) {
stop("Error: 'smoothing_window' must be a single numeric value.")
}
if (smoothing_window < 1) {
stop("Error: 'smoothing_window' must be at least 1.")
}
if (smoothing_window %% 1 != 0) {
warning("Warning: 'smoothing_window' is not an integer and will be rounded.")
smoothing_window <- round(smoothing_window)
}
# Check reps parameter
if (missing(reps)) {
warning("Warning: 'reps' argument is missing. No repeat annotations will be included.")
reps <- GenomicRanges::GRanges()
} else if (!methods::is(reps, "GRanges")) {
stop("Error: 'reps' must be a GRanges object.")
} else {
# Check for required metadata columns in reps
if (!"repeat_name" %in% colnames(GenomicRanges::mcols(reps))) {
stop("Error: 'reps' must contain a metadata column named 'repeat_name'.")
}
if (any(is.na(GenomicRanges::mcols(reps)$repeat_name))) {
warning("Warning: NA values found in repeat_name column of 'reps'.")
}
}
# Check genes parameter
if (missing(genes)) {
warning("Warning: 'genes' argument is missing. No gene annotations will be included.")
genes <- GenomicRanges::GRanges()
} else if (!methods::is(genes, "GRanges")) {
stop("Error: 'genes' must be a GRanges object.")
} else {
# Check for required metadata columns in genes
if (!"gene_name" %in% colnames(GenomicRanges::mcols(genes))) {
stop("Error: 'genes' must contain a metadata column named 'gene_name'.")
}
if (any(is.na(GenomicRanges::mcols(genes)$gene_name))) {
warning("Warning: NA values found in gene_name column of 'genes'.")
}
}
# Check exons parameter
if (missing(exons)) {
warning("Warning: 'exons' argument is missing. No exon annotations will be included.")
exons <- GenomicRanges::GRanges()
} else if (!methods::is(exons, "GRanges")) {
stop("Error: 'exons' must be a GRanges object.")
}
# Initialize a list to store data for all BigWig files
coverage_data_list <- list()
# Loop through each BigWig file
for (i in seq_along(bigwigFiles)) {
tryCatch({
# Import the BigWig file for the given region
bwf <- rtracklayer::import.bw(bigwigFiles[i], which = ROI)
if (length(bwf) == 0) {
warning(paste("Warning: No data found in", bigwigFiles[i], "for the specified ROI."))
next
}
# Convert to data frame for processing
for_plotting <- as.data.frame(bwf)
# Expand scores and indices
expanded_scores <- unlist(mapply(rep, for_plotting$score, for_plotting$width))
expanded_seqnames <- unlist(mapply(rep, for_plotting$seqnames, for_plotting$width))
expanded_index <- unlist(mapply(function(start, width) seq(start, by = 1, length.out = width),
for_plotting$start, for_plotting$width))
# Create a new data frame for coverage
coverage_table <- data.frame(
seqnames = expanded_seqnames,
index = expanded_index,
score = expanded_scores,
file_name = bigwigNames[i], # Add a column for the BigWig file name
group = bigwigGroupNames[i] # Add a column for the sample group name
)
# Store the result in the list
coverage_data_list[[i]] <- coverage_table
}, error = function(e) {
stop(paste("Error processing", bigwigFiles[i], ":", e$message))
})
}
# Check if any coverage data was successfully processed
if (length(coverage_data_list) == 0) {
stop("Error: No coverage data could be extracted from any of the bigwig files. Please check your files and ROI.")
}
# Combine all the coverage data into a single data frame
combined_coverage <- do.call(rbind, coverage_data_list)
if (nrow(combined_coverage) == 0) {
stop("Error: No coverage data found. Please check if your ROI overlaps with data in the bigwig files.")
}
# Define a smoothing function
smooth_coverage <- function(data, smoothing_window = 20) {
data %>%
group_by(file_name) %>%
mutate(score = zoo::rollmean(score, k = smoothing_window, fill = NA, align = "center")) %>%
ungroup()
}
if (CoverageSmoothing == TRUE) {
# Apply smoothing to the combined coverage data
tryCatch({
combined_coverage <- smooth_coverage(combined_coverage, smoothing_window = smoothing_window)
}, error = function(e) {
warning(paste("Warning: Error during smoothing:", e$message,
"Continuing with unsmoothed data."))
})
}
# Filter annotations to the combined index range
### generate window GRanges
score_window <- GenomicRanges::GRanges(
seqnames = unique(combined_coverage$seqnames),
ranges = IRanges::IRanges(start = min(combined_coverage$index), end = max(combined_coverage$index))
)
# Initialize empty data frames for return values
reps_df <- data.frame(
start = numeric(0),
end = numeric(0),
repeat_name = character(0),
ypos = numeric(0)
)
genes_df <- data.frame(
start = numeric(0),
end = numeric(0),
gene_name = character(0),
ypos = numeric(0)
)
exons_df <- data.frame(
start = numeric(0),
end = numeric(0),
ypos = numeric(0)
)
# Process repeats if they exist
if (length(reps) > 0) {
#filter repeats
reps_filtered <- IRanges::subsetByOverlaps(reps, score_window, ignore.strand=TRUE)
if (length(reps_filtered) > 0) {
#truncate start and end coordinates to fit into plotting window
GenomicRanges::start(reps_filtered) <- ifelse(GenomicRanges::start(reps_filtered) >= GenomicRanges::start(score_window),
GenomicRanges::start(reps_filtered),
GenomicRanges::start(score_window))
GenomicRanges::end(reps_filtered) <- ifelse(GenomicRanges::end(reps_filtered) <= GenomicRanges::end(score_window),
GenomicRanges::end(reps_filtered),
GenomicRanges::end(score_window))
# Convert filtered reps to a data frame for plotting
reps_df <- data.frame(
start = GenomicRanges::start(GenomicRanges::promoters(reps_filtered, upstream=0, downstream=1)),
end = GenomicRanges::end(GenomicRanges::terminators(reps_filtered, upstream=0, downstream=1)),
repeat_name = GenomicRanges::mcols(reps_filtered)$repeat_name,
ypos = as.numeric(paste0(GenomicRanges::strand(reps_filtered), 5))
)
}
}
# Process genes if they exist
if (length(genes) > 0) {
#filter genes
genes_filtered <- IRanges::subsetByOverlaps(genes, score_window, ignore.strand=TRUE)
if (length(genes_filtered) > 0) {
#truncate start and end coordinates to fit into plotting window
GenomicRanges::start(genes_filtered) <- ifelse(GenomicRanges::start(genes_filtered) >= GenomicRanges::start(score_window),
GenomicRanges::start(genes_filtered),
GenomicRanges::start(score_window))
GenomicRanges::end(genes_filtered) <- ifelse(GenomicRanges::end(genes_filtered) <= GenomicRanges::end(score_window),
GenomicRanges::end(genes_filtered),
GenomicRanges::end(score_window))
# Convert filtered genes to a data frame for plotting
genes_df <- data.frame(
start = GenomicRanges::start(GenomicRanges::promoters(genes_filtered, upstream=0, downstream=1)),
end = GenomicRanges::end(GenomicRanges::terminators(genes_filtered, upstream=0, downstream=1)),
gene_name = GenomicRanges::mcols(genes_filtered)$gene_name,
ypos = as.numeric(paste0(GenomicRanges::strand(genes_filtered), 5))
)
}
}
# Process exons if they exist
if (length(exons) > 0) {
#filter exons
exons_filtered <- IRanges::subsetByOverlaps(exons, score_window, ignore.strand=TRUE)
if (length(exons_filtered) > 0) {
#truncate start and end coordinates to fit into plotting window
GenomicRanges::start(exons_filtered) <- ifelse(GenomicRanges::start(exons_filtered) >= GenomicRanges::start(score_window),
GenomicRanges::start(exons_filtered),
GenomicRanges::start(score_window))
GenomicRanges::end(exons_filtered) <- ifelse(GenomicRanges::end(exons_filtered) <= GenomicRanges::end(score_window),
GenomicRanges::end(exons_filtered),
GenomicRanges::end(score_window))
# Convert filtered exons to a data frame for plotting
exons_df <- data.frame(
start = GenomicRanges::start(GenomicRanges::promoters(exons_filtered, upstream=0, downstream=1)),
end = GenomicRanges::end(GenomicRanges::terminators(exons_filtered, upstream=0, downstream=1)),
ypos = as.numeric(paste0(GenomicRanges::strand(exons_filtered), 5))
)
}
}
# Final check that all components have the correct structure
if (!all(c("seqnames", "index", "score", "file_name", "group") %in% colnames(combined_coverage))) {
stop("Error: combined_coverage is missing required columns. This may be due to an internal error.")
}
# Return the combined data
return(list(combined_coverage = combined_coverage,
reps_df = reps_df,
genes_df = genes_df,
exons_df = exons_df))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.