### =========================================================================
### cvg()
### -------------------------------------------------------------------------
###
### A better coverage().
###
### It all started when I came across this:
### https://stackoverflow.com/questions/17138760/counting-overlaps-of-integer-ranges
###
setGeneric("cvg", signature="x",
function(x, from=NA, to=NA, weight=1L, varname="cvg", collapse=FALSE, ...)
standardGeneric("cvg")
)
### TODO: Methods for IntegerRanges and IntegerRangesList objects (defined in
### this file) need to support the 'circle.length' argument.
### TODO: Implement method for GenomicRanges objects (in GenomicRanges package).
### Should it support the 'ignore.strand' argument?
### TODO: The end user should be able to switch between collapsed and expanded
### representation by calling collapse() and expand() on the returned object.
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Restriction window helpers
###
### Use these helpers in the "restrict" and "gaps" methods.
###
### Should we turn these helpers into methods of a generic function?
### Like effectiveRestrictionWindow()? or effectiveFromTo()?
### Also maybe export and document them so the user can actually use them
### to see what effective restriction windows are being used and also see
### the invalid windows causing an error (by calling
### effectiveRestrictionWindow() with check=FALSE).
###
### Return an integer vector of length 2.
effective_restriction_window_for_IntegerRanges <-
function(x, from=NA, to=NA, check=TRUE)
{
stopifnot(is(x, "IntegerRanges"),
isSingleNumberOrNA(from),
isSingleNumberOrNA(to))
if (!is.integer(from))
from <- as.integer(from)
if (!is.integer(to))
to <- as.integer(to)
if (is.na(from) || is.na(to)) {
if (length(x) == 0L)
return(c(from=from, to=to))
x_range <- range(x)
if (is.na(from))
from <- start(x_range)
if (is.na(to))
to <- end(x_range)
}
if (check) {
width <- to - from + 1L
if (width < 0L)
stop(wmsg("invalid from-to: ", from, "-", to))
}
c(from=from, to=to)
}
### Return an N x 2 integer matrix where N is length(x).
effective_restriction_windows_for_IntegerRangesList <-
function(x, from=NA, to=NA, check=TRUE)
{
stopifnot(is(x, "IntegerRangesList"),
is.numeric(from) || is.logical(from) && all(is.na(from)),
is.numeric(to) || is.logical(to) && all(is.na(to)))
if (!is.integer(from))
from <- as.integer(from)
if (!is.integer(to))
to <- as.integer(to)
from <- S4Vectors:::V_recycle(from, x, "from", "x")
to <- S4Vectors:::V_recycle(to, x, "to", "x")
x_range <- range(x)
na_idx <- which(is.na(from))
from[na_idx] <- as.integer(start(x_range))[na_idx]
na_idx <- which(is.na(to))
to[na_idx] <- as.integer(end(x_range))[na_idx]
if (check) {
width <- to - from + 1L
if (any(width < 0L, na.rm=TRUE))
stop(wmsg("some of the restriction windows defined by the ",
"supplied 'from' and 'to' have a negative width"))
}
cbind(from=from, to=to)
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### "cvg" methods
###
### Takes an IntegerRanges derivative 'x' and returns its coverage as the
### metadata column of an IPos object by default. If 'collapse' is TRUE',
### the coverage is returned in an IRanges object instead.
### The 'from' and 'to' arguments control the window of integer positions
### for which to compute and return coverage.
### Each of these arguments must be a single integer or NA. When set to NA
### (the default), 'from' is replaced internally with 'min(start(x))',
### and 'to' with 'max(end(x))'.
### All the integer positions in the from-to window are represented in the
### returned object. More precisely, the returned IPos or IRanges 'ans' is
### disjoint, strictly sorted, and with no gaps between the ranges, and its
### ranges span the from-to window (i.e. 'reduce(ans)' will return the single
### range from-to). In particular, when 'ans' is an IPos object, 'pos(ans)'
### returns the from:to sequence.
cvg_IntegerRanges <- function(x, from=NA, to=NA, weight=1L,
varname="cvg", collapse=FALSE)
{
stopifnot(isSingleString(varname), isTRUEorFALSE(collapse))
from_to <- effective_restriction_window_for_IntegerRanges(x, from, to)
shift <- 1L - from_to[[1L]]
width <- from_to[[2L]] + shift
if (length(x) == 0L) {
if (is.na(width) || width == 0L) {
ans <- IRanges()
} else {
ans <- IRanges(from_to[[1L]], width=width)
}
## 'weight' determines the type of Rle.
cvg0 <- Rle(weight * 0L, sum(width(ans)))
} else {
## Compute coverage as an Rle object.
cvg0 <- coverage(x, shift=shift, width=width, weight=weight)
ans_width <- runLength(cvg0)
ans_end <- cumsum(ans_width) - shift
ans <- IRanges(end=ans_end, width=ans_width)
}
if (collapse) {
var <- runValue(cvg0)
} else {
ans <- new_StitchedIPos(ans)
var <- cvg0
}
mcols(ans) <- S4Vectors:::new_DataFrame(setNames(list(var), varname))
ans
}
### Takes an IntegerRangesList derivative 'x' and returns its coverage as the
### inner metadata column of an IPosList object by default. If 'collapse' is
### TRUE, the coverage is returned in an IRangesList object instead.
### The 'from' and 'to' arguments control the windows of integer positions
### for which to compute and return coverage.
### Each of these arguments must be an integer vector parallel to 'x',
### possibly with NAs. If shorter than 'x', they'll be recycled to the length
### of 'x'.
### The object to return is computed with a fast implementation of
###
### mapply(cvg_IntegerRanges, x, from, to, weight,
### MoreArgs=list(varname=varname, collapse=collapse))
###
### and then returned as an IPosList or IRangesList, obeying 'collapse'.
cvg_IntegerRangesList <- function(x, from=NA, to=NA, weight=1L,
varname="cvg", collapse=FALSE)
{
stopifnot(isSingleString(varname), isTRUEorFALSE(collapse))
from_to <- effective_restriction_windows_for_IntegerRangesList(x, from, to)
shift <- 1L - unname(from_to[ , 1L])
width <- unname(from_to[ , 2L]) + shift
## Compute coverage as a SimpleRleList object.
cvg0 <- coverage(x, shift=shift, width=width, weight=weight)
ans_width <- as(runLength(cvg0), "CompressedIntegerList")
ans_end <- as(cumsum(ans_width), class(ans_width)) - shift
unlisted_ans <- IRanges(end=unlist(ans_end, use.names=FALSE),
width=unlist(ans_width, use.names=FALSE))
if (collapse) {
var <- unlist(runValue(cvg0), use.names=FALSE)
} else {
unlisted_ans <- new_StitchedIPos(unlisted_ans)
if (length(cvg0) == 0L) {
## 'weight' determines the type of Rle.
var <- Rle(weight * 0L, 0L)
} else {
var <- unlist(cvg0, use.names=FALSE)
}
}
mcols(unlisted_ans) <- S4Vectors:::new_DataFrame(
setNames(list(var), varname))
if (collapse) {
ans <- relist(unlisted_ans, ans_width)
} else {
ans <- relist(unlisted_ans, cvg0)
}
ans
}
setMethod("cvg", "IntegerRanges", cvg_IntegerRanges)
setMethod("cvg", "IntegerRangesList", cvg_IntegerRangesList)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.