R/inter-range-methods.R

Defines functions reconstructGRLfromGR deconstructGRLintoGR reconstructGRfromRGL deconstructGRintoRGL make_spaceid fix_inner_revmap_mcol translate_revmap

### =========================================================================
### Inter-range methods
### -------------------------------------------------------------------------
###

### 'revmap_unlisted' and 'revmap_partitioning': IntegerList and Partitioning
### objects representing the "revmap object", which is *conceptually* an
### IntegerListList object (of the same length as 'revmap_partitioning').
### *Conceptually* because, well, we don't actually have such container...
### 'old2new': IntegerList of the same length as the "revmap object".
.translate_revmap <- function(revmap_unlisted, revmap_partitioning, old2new)
{
    ## 'times' has the length of the "revmap object".
    times <- sum(relist(width(PartitioningByEnd(revmap_unlisted)),
                        revmap_partitioning))
    ## 'offset' has the length of '[email protected]'.
    offset <- rep.int(start(PartitioningByEnd(old2new)) - 1L, times)
    revmap_flat <- revmap_unlisted@unlistData
    revmap_unlisted@unlistData <- old2new@unlistData[revmap_flat + offset]
    revmap_unlisted
}

### 'rgl2' and 'rgl' must be List objects (typically RangesList or GRangesList)
### of the same length, both with a "revmap" inner metadata column.
.fix_inner_revmap_mcol <- function(rgl2, rgl)
{
    unlisted_rgl2 <- unlist(rgl2, use.names=FALSE)
    unlisted_revmap2 <- mcols(unlisted_rgl2)$revmap
    revmap <- relist(mcols(unlist(rgl, use.names=FALSE))$revmap, rgl)
    mcols(unlisted_rgl2)$revmap <- .translate_revmap(unlisted_revmap2, rgl2,
                                                     revmap)
    rgl2 <- relist(unlisted_rgl2, rgl2)
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Deconstruction/reconstruction of a GenomicRanges object into/from an
### IRangesList object.
###
### For internal use only (not exported).
###

.make_spaceid <- function(x, ignore.strand=FALSE, drop=FALSE)
{
    spaceid <- seqnames(x)
    runValue(spaceid) <- 3L * as.integer(runValue(spaceid))
    if (!ignore.strand) {
        strandid <- strand(x)
        runValue(strandid) <- as.integer(runValue(strandid)) - 3L
        spaceid <- spaceid + strandid
    }
    if (!drop) {
        levels <- as.character(seq_len(3L * length(seqlevels(x))))
        runValue(spaceid) <- structure(runValue(spaceid),
                                       levels=levels,
                                       class="factor")
    }
    spaceid
}

### Work on any GenomicRanges derivative and return a CompressedIRangesList
### instance.
deconstructGRintoRGL <- function(x, with.revmap=FALSE,
                                    ignore.strand=FALSE, drop=FALSE)
{
    if (!isTRUEorFALSE(with.revmap))
        stop("'with.revmap' must be TRUE or FALSE")
    if (!isTRUEorFALSE(ignore.strand))
        stop("'ignore.strand' must be TRUE or FALSE")
    x_ranges <- unname(ranges(x))
    if (with.revmap)
        mcols(x_ranges) <- DataFrame(revmap=seq_along(x_ranges))
    x_spaceid <- .make_spaceid(x, ignore.strand=ignore.strand, drop=drop)
    split(x_ranges, x_spaceid)
}

### Return a GRanges instance.
reconstructGRfromRGL <- function(rgl, x)
{
    ## Prepare 'ans_ranges'.
    ans_ranges <- unlist(rgl, use.names=FALSE)

    ## Prepare 'ans_seqnames' and 'ans_strand'.
    rgl_eltNROWS <- elementNROWS(rgl)
    spaceid <- as.integer(names(rgl)) - 1L
    ans_seqnames <- Rle(structure(spaceid %/% 3L + 1L,
                                  levels=seqlevels(x),
                                  class="factor"),
                        rgl_eltNROWS)
    ans_strand <- Rle(structure(spaceid %% 3L + 1L,
                                levels=levels(strand()),
                                class="factor"),
                      rgl_eltNROWS)

    ## Prepare 'ans_mcols'.
    ans_mcols <- mcols(ans_ranges)
    if (is.null(ans_mcols)) {
        ans_mcols <- S4Vectors:::make_zero_col_DataFrame(length(ans_ranges))
    } else {
        mcols(ans_ranges) <- NULL
    }

    ## Prepare 'ans_seqinfo'.
    ans_seqinfo <- seqinfo(x)

    ## To be as fast as possible, we don't use internal low-level constructor
    ## new_GRanges() and we don't check the new object.
    new2("GRanges", seqnames=ans_seqnames,
                    ranges=ans_ranges,
                    strand=ans_strand,
                    elementMetadata=ans_mcols,
                    seqinfo=ans_seqinfo,
                    check=FALSE)
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Deconstruction/reconstruction of a GRangesList object into/from a GRanges
### object.
###
### For internal use only (not exported).
###

### Unlist GRangesList object 'x' into a GRanges object but the differences
### with the "unlist" method for GRangesList objects are:
###   - The sequence names of the returned GRanges object are modified by
###     embedding the "grouping by top-level element" information in them.
###   - The seqinfo is modified accordingly.
deconstructGRLintoGR <- function(x, expand.levels=FALSE)
{
    ans <- x@unlistData
    f1 <- rep.int(seq_len(length(x)), elementNROWS(x))
    f2 <- as.integer(seqnames(ans))
    f12 <- paste(f1, f2, sep="|")

    ## Compute 'ans_seqinfo'.
    if (expand.levels) {
        x_nlev <- length(seqlevels(x))
        i1 <- rep(seq_len(length(x)), each=x_nlev)
        i2 <- rep.int(seq_len(x_nlev), length(x))
    } else {
        oo <- orderIntegerPairs(f1, f2)
        of1 <- f1[oo]
        of2 <- f2[oo]
        ## TODO: Support 'method="presorted"' in duplicatedIntegerPairs() for
        ## when the 2 input vectors are already sorted.
        notdups <- !duplicatedIntegerPairs(of1, of2)
        i1 <- of1[notdups]
        i2 <- of2[notdups]
    }
    x_seqinfo <- seqinfo(x)
    ans_seqlevels <- paste(i1, i2, sep="|")
    ans_seqlengths <- unname(seqlengths(x_seqinfo))[i2]
    ans_isCircular <- unname(isCircular(x_seqinfo))[i2]
    ans_seqinfo <- Seqinfo(ans_seqlevels, ans_seqlengths, ans_isCircular)

    ## The 2 following modifications must be seen as a single atomic
    ## operation since doing the 1st without doing the 2nd would leave 'ans'
    ## in a broken state.
    ans@seqnames <- Rle(factor(f12, ans_seqlevels))
    ans@seqinfo <- ans_seqinfo
    ans
}

### The "inverse" transform of deconstructGRLintoGR().
### More precisely, reconstructGRLfromGR() transforms GRanges object 'gr'
### with sequence names in the "f1|f2" format (as produced by
### deconstructGRLintoGR() above) back into a GRangesList object with the
### same length & names & metadata columns & seqinfo as 'x'.
### The fundamental property of this deconstruction/reconstruction mechanism
### is that, for any GRangesList object 'x':
###
###   reconstructGRLfromGR(deconstructGRLintoGR(x), x) is identical to x
###
reconstructGRLfromGR <- function(gr, x, with.revmap=FALSE)
{
    snames <- strsplit(as.character(seqnames(gr)), "|", fixed=TRUE)
    m12 <- matrix(as.integer(unlist(snames)), ncol=2, byrow=TRUE)

    ## Restore the real sequence names.
    f2 <- m12[ , 2L]
    x_seqlevels <- seqlevels(x)
    ## The 2 following modifications must be seen as a single atomic
    ## operation since doing the 1st without doing the 2nd would leave 'ans'
    ## in a broken state.
    gr@seqnames <- Rle(factor(x_seqlevels[f2], x_seqlevels))
    gr@seqinfo <- seqinfo(x)

    ## Split.
    f1 <- m12[ , 1L]
    ans <- split(gr, factor(f1, levels=seq_len(length(x))))

    ## Decorate 'ans'.
    metadata(ans) <- metadata(x)
    names(ans) <- names(x)
    if (with.revmap) {
        unlisted_ans <- unlist(ans, use.names=FALSE)
        mcols(unlisted_ans)$revmap <-
            IRanges:::global2local_revmap(mcols(unlisted_ans)$revmap, ans, x)
        ans <- relist(unlisted_ans, ans)
    }
    mcols(ans) <- mcols(x)
    ans
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### range()
###

### Always return a GRanges *instance* whatever GenomicRanges derivative the
### input is, so does NOT act like an endomorphism in general. 
setMethod("range", "GenomicRanges",
    function(x, ..., with.revmap=FALSE, ignore.strand=FALSE, na.rm=FALSE)
    {
        if (!identical(na.rm, FALSE))
            warning("'na.rm' argument is ignored")
        args <- unname(list(x, ...))
        args <- lapply(args, granges)
        gr <- do.call(c, args)

        rgl <- deconstructGRintoRGL(gr, with.revmap=with.revmap,
                                    ignore.strand=ignore.strand, drop=TRUE)
        rgl2 <- callGeneric(rgl, with.revmap=with.revmap)
        if (with.revmap)
            rgl2 <- .fix_inner_revmap_mcol(rgl2, rgl)
        reconstructGRfromRGL(rgl2, gr)
    }
)

### Overwrite above method with optimized method for GPos objects.
### Like the above method, return a GRanges instance.
setMethod("range", "GPos",
    function(x, ..., with.revmap=FALSE, ignore.strand=FALSE, na.rm=FALSE)
        callGeneric(x@pos_runs, ...,
                    with.revmap=with.revmap, ignore.strand=ignore.strand,
                    na.rm=na.rm)
)

setMethod("range", "GRangesList",
    function(x, ..., with.revmap=FALSE, ignore.strand=FALSE, na.rm=FALSE)
    {
        gr <- deconstructGRLintoGR(x)
        ## "range" method for GRanges objects is fast.
        gr2 <- callGeneric(gr, ..., with.revmap=with.revmap,
                           ignore.strand=ignore.strand, na.rm=na.rm)
        reconstructGRLfromGR(gr2, x, with.revmap=with.revmap)
    }
)

setMethod("range", "GenomicRangesList",
          function(x, ..., with.revmap=FALSE, ignore.strand=FALSE, na.rm=FALSE)
          {
              endoapply(x, range, ..., with.revmap=with.revmap,
                        ignore.strand=ignore.strand, na.rm=na.rm)
          })

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### reduce()
###

### Always return a GRanges *instance* whatever GenomicRanges derivative the
### input is, so does NOT act like an endomorphism in general. 
setMethod("reduce", "GenomicRanges",
    function(x, drop.empty.ranges=FALSE, min.gapwidth=1L,
                with.revmap=FALSE,
                with.inframe.attrib=FALSE, ignore.strand=FALSE)
    {
        if (!identical(with.inframe.attrib, FALSE))
            stop("'with.inframe.attrib' argument not supported ",
                 "when reducing a GenomicRanges object")
        rgl <- deconstructGRintoRGL(x, with.revmap=with.revmap,
                                       ignore.strand=ignore.strand, drop=TRUE)
        rgl2 <- callGeneric(rgl, drop.empty.ranges=drop.empty.ranges,
                                 min.gapwidth=min.gapwidth,
                                 with.revmap=with.revmap)
        if (with.revmap)
            rgl2 <- .fix_inner_revmap_mcol(rgl2, rgl)
        reconstructGRfromRGL(rgl2, x)
    }
)

setMethod("reduce", "GRangesList",
    function(x, drop.empty.ranges=FALSE, min.gapwidth=1L,
                with.revmap=FALSE,
                with.inframe.attrib=FALSE, ignore.strand=FALSE)
    {
        if (!identical(with.inframe.attrib, FALSE)) 
            stop("'with.inframe.attrib' argument is not supported ", 
                 "when reducing a GRangesList object")
        gr <- deconstructGRLintoGR(x)
        gr2 <- callGeneric(gr, drop.empty.ranges=drop.empty.ranges,
                               min.gapwidth=min.gapwidth,
                               with.revmap=with.revmap,
                               ignore.strand=ignore.strand)
        reconstructGRLfromGR(gr2, x, with.revmap=with.revmap)
    }
)

setMethod("reduce", "GenomicRangesList",
          function(x, drop.empty.ranges = FALSE, ...) {
    endoapply(x, reduce, drop.empty.ranges=drop.empty.ranges, ...)
})

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### gaps()
###

### Always return a GRanges *instance* whatever GenomicRanges derivative the
### input is, so does NOT act like an endomorphism in general. 
setMethod("gaps", "GenomicRanges",
    function(x, start=1L, end=seqlengths(x))
    {
        seqlevels <- seqlevels(x)
        if (!is.null(names(start)))
            start <- start[seqlevels]
        if (!is.null(names(end)))
            end <- end[seqlevels]
        start <- S4Vectors:::recycleVector(start, length(seqlevels))
        start <- rep(start, each=3L)
        end <- S4Vectors:::recycleVector(end, length(seqlevels))
        end <- rep(end, each=3L)
        rgl <- deconstructGRintoRGL(x)
        rgl2 <- callGeneric(rgl, start=start, end=end)
        reconstructGRfromRGL(rgl2, x)
    }
)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### disjoin()
###

### Always return a GRanges *instance* whatever GenomicRanges derivative the
### input is, so does NOT act like an endomorphism in general. 
setMethod("disjoin", "GenomicRanges",
    function(x, with.revmap=FALSE, ignore.strand=FALSE)
    {
        rgl <- deconstructGRintoRGL(x, with.revmap=with.revmap, 
                                       ignore.strand=ignore.strand, drop=TRUE)
        rgl2 <- callGeneric(rgl, with.revmap=with.revmap)
        if (with.revmap)
            rgl2 <- .fix_inner_revmap_mcol(rgl2, rgl)
        reconstructGRfromRGL(rgl2, x)
    }
)

setMethod("disjoin", "GRangesList",
    function(x, with.revmap=FALSE, ignore.strand=FALSE)
    {
        gr <- deconstructGRLintoGR(x)
        gr2 <- callGeneric(gr, with.revmap=with.revmap,
                               ignore.strand=ignore.strand)
        reconstructGRLfromGR(gr2, x, with.revmap=with.revmap)
    }
)

setMethod("disjoin", "GenomicRangesList",
          function(x, with.revmap=FALSE, ignore.strand=FALSE)
          {
              endoapply(x, disjoin, with.revmap=with.revmap,
                        ignore.strand=ignore.strand)
          })

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### isDisjoint()
###

setMethod("isDisjoint", "GenomicRanges",
    function(x, ignore.strand=FALSE)
    {
        rgl <- deconstructGRintoRGL(x, ignore.strand=ignore.strand, drop=TRUE)
        all(callGeneric(rgl))
    }
)
### Overwrite above method with optimized method for GPos objects.
setMethod("isDisjoint", "GPos",
    function(x, ignore.strand=FALSE) callGeneric(x@pos_runs, ignore.strand)
)

setMethod("isDisjoint", "GRangesList",
    function(x, ignore.strand=FALSE)
    {
        gr <- deconstructGRLintoGR(x, expand.levels=TRUE)
        rgl <- deconstructGRintoRGL(gr, ignore.strand=ignore.strand)
        ans <- callGeneric(rgl)
        ans <- colSums(matrix(!ans, ncol=length(x))) == 0L
        names(ans) <- names(x)
        ans
    }
)

setMethod("isDisjoint", "GenomicRangesList",
          function(x, ignore.strand=FALSE) {
              endoapply(x, isDisjoint, ignore.strand=ignore.strand)
          })

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### disjointBins()
###

setMethod("disjointBins", "GenomicRanges",
    function(x, ignore.strand=FALSE)
    {
        x_spaceid <- .make_spaceid(x, ignore.strand=ignore.strand, drop=TRUE)
        rgl <- split(unname(ranges(x)), x_spaceid)
        ans <- callGeneric(rgl)
        unsplit(ans, x_spaceid)
    }
)
LiNk-NY/GenomicRanges documentation built on May 9, 2017, 5:14 a.m.