#' Filter out events located in heterogeneity areas
#'
#' @param event_data a \code{data.frame} of events such as generated by function
#' \code{\link{getdels}}
#' @param het_regions a \code{data.frame} of genomic ranges to be used for
#' filtering out events located in these regions.
#' @param min_overlap a single numeric value between 0 and 1. The minimal
#' proportion of the length of the event that must overlap with a region
#' listed in \code{het_sites} for this event to be filtered out. A value of
#' 0 results in an overlap of even a single nucleotide to be removed, whereas
#' a value of 1 results in only CNVs entirely located in a specified region to
#' be removed.
#'
#' @return a \code{data.frame} of read counts similar to that given as input,
#' but with events removed according to the specified filters.
#' @export
#'
#' @examples
#' NULL
filter_out <- function(event_data, het_regions, min_overlap = 0) {
# Converting the event_data object to a GRanges object
g_events <- GenomicRanges::GRanges(seqnames = event_data$chr,
ranges = IRanges::IRanges(start = event_data$start,
end = event_data$end))
# Converting the het_regions object to a GRanges object
g_regions <- GenomicRanges::GRanges(seqnames = het_regions$chr,
ranges = IRanges::IRanges(start = het_regions$start,
end = het_regions$stop))
# Checking which of g_events overlap with any region in g_regions
overlaps <- IRanges::overlapsAny(g_events, g_regions)
# The event data is returned directly if no overlap is found
if(!any(overlaps)) return(event_data)
# If min_overlap is not 0, the percentage of the query overlapping with the
# subject is computed and only those above min_overlap are deleted
if(min_overlap != 0) {
# Otherwise the percentage of the query overlapping with the subject is
# computed and only those above min_overlap are deleted
overlapping_regions <- IRanges::findOverlaps(g_events, g_regions)
# We will loop over the overlapping regions
overlap_proportions <- numeric(length(overlapping_regions))
for(i in 1:length(overlap_proportions)) {
query_range <- IRanges::IRanges(start = start(g_events[overlapping_regions@from[i]]),
end = end(g_events[overlapping_regions@from[i]]))
subject_range <- IRanges::IRanges(start = start(g_regions[overlapping_regions@to[i]]),
end = end(g_regions[overlapping_regions@to[i]]))
overlap_proportions[i] <- width(IRanges::overlapsRanges(query_range, subject_range)) / width(query_range)
}
which_overlaps <- which(overlaps)
which_overlaps <- which_overlaps[overlap_proportions >= min_overlap]
overlaps <- rep(FALSE, length(overlaps))
overlaps[which_overlaps] <- TRUE
}
return(event_data[!overlaps, ])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.