IRanges: Foundation of integer range manipulation in Bioconductor

Documented in findOverlapPairs mergeByOverlaps

### =========================================================================
### findOverlaps (and related) methods
### -------------------------------------------------------------------------
###


## internal generic
setGeneric("process_self_hits",  signature="x", # not exported
    function(x, select=c("all", "first", "last", "arbitrary"),
                drop.self=FALSE, drop.redundant=FALSE)
        standardGeneric("process_self_hits"))

setMethod("process_self_hits", "SortedByQueryHits",
    function(x, select=c("all", "first", "last", "arbitrary"),
                drop.self=FALSE, drop.redundant=FALSE)
    {
        x <- as(x, "SortedByQuerySelfHits")
        select <- match.arg(select)
        if (!isTRUEorFALSE(drop.self))
            stop("'drop.self' must be TRUE or FALSE")
        if (!isTRUEorFALSE(drop.redundant))
            stop("'drop.redundant' must be TRUE or FALSE")
        if (drop.self) {
            self_idx <- which(isSelfHit(x))
            if (length(self_idx) != 0L)
                x <- x[-self_idx]
        }
        if (drop.redundant) {
            redundant_idx <- which(isRedundantHit(x))
            if (length(redundant_idx) != 0L)
                x <- x[-redundant_idx]
        }
        selectHits(x, select=select)
    }
)

setMethod("process_self_hits", "SortedByQueryHitsList",
    function(x, select=c("all", "first", "last", "arbitrary"),
                drop.self=FALSE, drop.redundant=FALSE)
    {
        x <- as(x, "SortedByQuerySelfHitsList")
        select <- match.arg(select)
        ans <- lapply(x, process_self_hits,
                         select, drop.self,  drop.redundant)
        if (select != "all")
            return(IntegerList(ans))
        S4Vectors:::new_SimpleList_from_list("HitsList",
                        ans,
                        subjectOffsets = x@subjectOffsets)
    }
)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### findOverlaps()
###
### Find objects in the query that overlap those in the subject.
###

setGeneric("findOverlaps", signature=c("query", "subject"),
    function(query, subject, maxgap=-1L, minoverlap=0L,
             type=c("any", "start", "end", "within", "equal"),
             select=c("all", "first", "last", "arbitrary"),
             ...)
        standardGeneric("findOverlaps")
)

findOverlaps_IntegerRanges <- function(query, subject,
             maxgap=-1L, minoverlap=0L,
             type=c("any", "start", "end", "within", "equal"),
             select=c("all", "first", "last", "arbitrary"))
{
    if (is.integer(query))
        query <- IRanges(query, width=1L)
    type <- match.arg(type)
    select <- match.arg(select)
    findOverlaps_NCList(query, subject,
                        maxgap=maxgap, minoverlap=minoverlap,
                        type=type, select=select)
}

setMethod("findOverlaps", c("IntegerRanges", "IntegerRanges"),
    findOverlaps_IntegerRanges
)

setMethod("findOverlaps", c("integer", "IntegerRanges"),
    findOverlaps_IntegerRanges
)

setMethod("findOverlaps", c("Vector", "missing"),
    function(query, subject, maxgap=-1L, minoverlap=0L,
             type=c("any", "start", "end", "within", "equal"),
             select=c("all", "first", "last", "arbitrary"),
             ...,
             drop.self=FALSE, drop.redundant=FALSE)
    {
        select <- match.arg(select)
        result <- findOverlaps(query, query,
                               maxgap=maxgap, minoverlap=minoverlap,
                               type=match.arg(type), select="all",
                               ...)
        process_self_hits(result, select, drop.self, drop.redundant)
    }
)

# might consider making this the following:
# setMethod("findOverlaps", c("IntegerRangesList", "IntegerRangesList"),
#           function(query, subject, maxgap = -1L, minoverlap = 0L,
#                    type = c("any", "start", "end", "within", "equal"),
#                    select = c("all", "first", "last", "arbitrary"),
#                    drop = FALSE)
#           {
#             findOverlaps(query, NCLists(query),
#               maxgap = maxgap, minoverlap = minoverlap,
#               type = match.arg(type), select = match.arg(select), drop = drop)
#           }
#   )

setMethod("findOverlaps", c("IntegerRangesList", "IntegerRangesList"),
          function(query, subject, maxgap = -1L, minoverlap = 0L,
                   type = c("any", "start", "end", "within", "equal"),
                   select = c("all", "first", "last", "arbitrary"),
                   drop = FALSE)
          {
            type <- match.arg(type)
            select <- match.arg(select)

            query <- as.list(query)
            subject <- as.list(subject)
            origSubject <- subject
            if (!is.null(names(subject)) && !is.null(names(query))) {
              subject <- subject[names(query)]
              names(subject) <- names(query) # get rid of NA's in names
            } else {
              subject <- subject[seq_along(query)]
            }
            ## NULL's are introduced where they do not match
            ## We replace those with empty IRanges
            subject[S4Vectors:::sapply_isNULL(subject)] <- list(IRanges())

            ans <- lapply(seq_len(length(subject)), function(i) {
              findOverlaps(query[[i]], subject[[i]],
                           maxgap = maxgap, minoverlap = minoverlap,
                           type = type, select = select)
            })
            names(ans) <- names(subject)
            if (select == "all") {
              ans <- HitsList(ans, origSubject)
            } else if (drop) {
              off <- head(c(0L, cumsum(sapply(origSubject, length))), -1)
              names(off) <- names(origSubject)
              if (is.null(names(ans)))
                off <- off[seq_along(ans)]
              else
                off <- off[names(ans)]
              ans <-
                unlist(ans, use.names=FALSE) +
                  rep.int(unname(off), sapply(ans, length))
            } else {
              ans <- IntegerList(ans)
            }
            ans
          })

setMethod("findOverlaps", c("Pairs", "missing"),
          function (query, subject, maxgap = -1L, minoverlap = 0L,
                    type = c("any",  "start", "end", "within", "equal"),
                    select = c("all", "first", "last", "arbitrary"), ...) {
              findOverlaps(zipup(query), maxgap=maxgap,
                           minoverlap=minoverlap, type=type, select=select, ...)
          })

setMethod("findOverlaps", c("Pairs", "ANY"),
          function (query, subject, maxgap = -1L, minoverlap = 0L,
                    type = c("any",  "start", "end", "within", "equal"),
                    select = c("all", "first", "last", "arbitrary"), ...) {
              findOverlaps(zipup(query), subject, maxgap=maxgap,
                           minoverlap=minoverlap, type=type, select=select, ...)
          })

setMethod("findOverlaps", c("ANY", "Pairs"),
          function (query, subject, maxgap = -1L, minoverlap = 0L,
                    type = c("any",  "start", "end", "within", "equal"),
                    select = c("all", "first", "last", "arbitrary"), ...) {
              findOverlaps(query, zipup(subject), maxgap=maxgap,
                           minoverlap=minoverlap, type=type, select=select, ...)
          })

setMethod("findOverlaps", c("Pairs", "Pairs"),
          function (query, subject, maxgap = -1L, minoverlap = 0L,
                    type = c("any",  "start", "end", "within", "equal"),
                    select = c("all", "first", "last", "arbitrary"), ...) {
              findOverlaps(zipup(query), zipup(subject),
                           maxgap=maxgap, minoverlap=minoverlap, type=type,
                           select=select, ...)
          })

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### countOverlaps()
###

setGeneric("countOverlaps", signature=c("query", "subject"),
    function(query, subject, maxgap=-1L, minoverlap=0L,
             type=c("any", "start", "end", "within", "equal"),
             ...)
        standardGeneric("countOverlaps")
)

.default_countOverlaps <-
    function(query, subject, maxgap=-1L, minoverlap=0L,
             type=c("any", "start", "end", "within", "equal"),
             ...)
{
    if (is.integer(query))
        query <- IRanges(query, width=1L)
    type <- match.arg(type)
    if (missing(subject)) {
        hits <- findOverlaps(query,
                             maxgap=maxgap, minoverlap=minoverlap,
                             type=type,
                             ...)
    } else {
        hits <- findOverlaps(query, subject,
                             maxgap=maxgap, minoverlap=minoverlap,
                             type=type,
                             ...)
    }
    ans <- countQueryHits(hits)
    names(ans) <- names(query)
    ans
}

setMethod("countOverlaps", c("Vector", "Vector"), .default_countOverlaps)
setMethod("countOverlaps", c("integer", "Vector"), .default_countOverlaps)
setMethod("countOverlaps", c("Vector", "missing"), .default_countOverlaps)

countOverlaps_IntegerRanges <- function(query, subject,
              maxgap=-1L, minoverlap=0L,
              type=c("any", "start", "end", "within", "equal"))
{
    type <- match.arg(type)
    ans <- findOverlaps_NCList(query, subject,
                               maxgap=maxgap, minoverlap=minoverlap,
                               type=type, select="count")
    names(ans) <- names(query)
    ans
}

setMethod("countOverlaps", c("IntegerRanges", "IntegerRanges"),
    countOverlaps_IntegerRanges
)

setMethod("countOverlaps", c("IntegerRangesList", "IntegerRangesList"),
          function(query, subject, maxgap=-1L, minoverlap=0L,
                   type = c("any", "start", "end", "within", "equal"))
          {
              IntegerList(mapply(countOverlaps, query, subject,
                              MoreArgs = list(maxgap = maxgap,
                                      minoverlap = minoverlap,
                                      type = match.arg(type)),
                              SIMPLIFY = FALSE))
          })


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### overlapsAny()
###

### Same args and signature as countOverlaps().
setGeneric("overlapsAny", signature=c("query", "subject"),
    function(query, subject, maxgap=-1L, minoverlap=0L,
             type=c("any", "start", "end", "within", "equal"),
             ...)
        standardGeneric("overlapsAny")
)

### NOT exported but used in the bsseq package.
default_overlapsAny <-
    function(query, subject, maxgap=-1L, minoverlap=0L,
             type=c("any", "start", "end", "within", "equal"),
             ...)
{
    if (is.integer(query))
        query <- IRanges(query, width=1L)
    type <- match.arg(type)
    if (missing(subject)) {
        ahit <- findOverlaps(query,
                             maxgap=maxgap, minoverlap=minoverlap,
                             type=type, select="arbitrary",
                             ...)
    } else {
        ahit <- findOverlaps(query, subject,
                             maxgap=maxgap, minoverlap=minoverlap,
                             type=type, select="arbitrary",
                             ...)
    }
    !is.na(ahit)
}

setMethod("overlapsAny", c("Vector", "Vector"), default_overlapsAny)
setMethod("overlapsAny", c("integer", "Vector"), default_overlapsAny)
setMethod("overlapsAny", c("Vector", "missing"), default_overlapsAny)

setMethod("overlapsAny", c("IntegerRangesList", "IntegerRangesList"),
    function(query, subject, maxgap=-1L, minoverlap=0L,
             type=c("any", "start", "end", "within", "equal"),
             ...)
    {
        query <- as.list(query)
        subject <- as.list(subject)
        type <- match.arg(type)
        if (!is.null(names(query)) && !is.null(names(subject))) {
            subject <- subject[names(query)]
            names(subject) <- names(query) # get rid of NA's in names
        } else {
            subject <- subject[seq_along(query)]
        }
        ## NULL's are introduced where they do not match
        ## We replace those with empty IRanges
        subject[S4Vectors:::sapply_isNULL(subject)] <- list(IRanges())
        LogicalList(lapply(structure(seq_len(length(query)),
                                     names = names(query)),
                           function(i)
                             overlapsAny(query[[i]], subject[[i]],
                                         maxgap=maxgap, minoverlap=minoverlap,
                                         type=type,
                                         ...)))
    }
)

### Convenience wrappers for the 3 most common use cases.
`%over%` <- function(query, subject) overlapsAny(query, subject)
`%within%` <- function(query, subject) overlapsAny(query, subject,
                                                   type="within")
`%outside%` <- function(query, subject) !overlapsAny(query, subject)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### subsetByOverlaps()
###

### First 2 arguments are 'x' and 'ranges' like for the
### transcriptsByOverlaps(), exonsByOverlaps(), and cdsByOverlaps() functions
### from the GenomicFeatures package and the snpsByOverlaps() function from
### the BSgenome package.
setGeneric("subsetByOverlaps", signature=c("x", "ranges"),
    function(x, ranges, maxgap=-1L, minoverlap=0L,
             type=c("any", "start", "end", "within", "equal"),
             invert=FALSE, ...)
        standardGeneric("subsetByOverlaps")
)

### NOT exported but used in the bsseq package.
default_subsetByOverlaps <-
    function(x, ranges, maxgap=-1L, minoverlap=0L,
             type=c("any", "start", "end", "within", "equal"),
             invert=FALSE, ...)
{
    ov_any <- overlapsAny(x, ranges,
                          maxgap=maxgap, minoverlap=minoverlap,
                          type=match.arg(type),
                          ...)
    if (invert)
        ov_any <- !ov_any
    x[ov_any]
}

setMethod("subsetByOverlaps", c("Vector", "Vector"), default_subsetByOverlaps)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### overlapsRanges()
###
### Extracts the actual regions of intersection between the overlapping ranges.
###

setGeneric("overlapsRanges", signature=c("query", "subject"),
    function(query, subject, hits=NULL, ...) standardGeneric("overlapsRanges")
)

setMethod("overlapsRanges", c("IntegerRanges", "IntegerRanges"),
    function(query, subject, hits=NULL, ...)
    {
        if (is.null(hits)) {
            hits <- findOverlaps(query, subject, ...)
        } else {
            if (!is(hits, "Hits"))
                stop("'hits' must be a Hits object")
            if (length(list(...)) != 0L)
                stop(wmsg("Extra arguments are only accepted when the 'hits' ",
                          "argument is not supplied, in which case they are ",
                          "passed to the internal call to findOverlaps(). ",
                          "See ?overlapsRanges for more information."))
            if (queryLength(hits) != length(query) ||
                subjectLength(hits) != length(subject))
                stop("'hits' is not compatible with 'query' and 'subject'")
        }
        ### Could be replaced by 1-liner:
        ###   pintersect(query[queryHits(hits)], subject[subjectHits(hits)])
        ### but will fail if 'query' or 'subject' is a kind of IntegerRanges
        ### object that cannot be subsetted (e.g. Partitioning object).
        m <- as.matrix(hits)
        qstart <- start(query)[m[,1L]]
        qend <- end(query)[m[,1L]]
        sstart <- start(subject)[m[,2L]]
        send <- end(subject)[m[,2L]]
        IRanges(pmax.int(qstart, sstart), pmin.int(send, qend))
    }
)

setMethod("overlapsRanges", c("IntegerRangesList", "IntegerRangesList"),
    function(query, subject, hits=NULL, ...)
    {
        if (is.null(hits)) {
            hits <- findOverlaps(query, subject, ...)
        } else {
            if (!is(hits, "HitsList"))
                stop("'hits' must be a HitsList object")
            if (length(list(...)) != 0L)
                stop(wmsg("Extra arguments are only accepted when the 'hits' ",
                          "argument is not supplied, in which case they are ",
                          "passed to the internal call to findOverlaps(). ",
                          "See ?overlapsRanges for more information."))
            if (length(hits) != length(query) ||
                length(hits) != length(subject))
                stop("'query', 'subject', and 'hits' must have the same length")
        }
        queries <- as.list(query, use.names = FALSE)
        subjects <- as.list(subject, use.names = FALSE)
        els <- as.list(hits, use.names = FALSE)
        ans <- lapply(seq_len(length(hits)),
            function(i) overlapsRanges(queries[[i]], subjects[[i]], els[[i]]))
        ans <- as(ans, "SimpleIRangesList")
        names(ans) <- names(hits)
        ans
    }
)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### poverlaps()
###

setGeneric("poverlaps", signature=c("query", "subject"),
           function(query, subject, maxgap=0L, minoverlap=1L,
                    type=c("any", "start", "end", "within", "equal"),
                    ...)
               standardGeneric("poverlaps")
           )

setMethod("poverlaps", c("IntegerRanges", "IntegerRanges"),
          function(query, subject, maxgap=0L, minoverlap=1L,
                   type=c("any", "start", "end", "within", "equal"))
          {
              stopifnot(isSingleNumber(maxgap))
              stopifnot(isSingleNumber(minoverlap))
              type <- match.arg(type)
              if (type == "any") {
                  query <- query + maxgap
              } else if (type == "within") {
                  if (maxgap > 0L) {
                      warning("'maxgap' is ignored when type=='within'")
                  }
                  return(start(query) >= start(subject) &
                             end(query) <= end(subject) &
                                 width(query) >= minoverlap)
              }
              amount <- pmin(end(query), end(subject)) -
                  pmax(start(query), start(subject)) + 1L
              overlaps <- amount >= minoverlap
              samePos <- function(x, y) {
                  x <= (y + maxgap) & x >= (y - maxgap)
              }
              keep <- switch(type,
                             any = TRUE,
                             start = samePos(start(query), start(subject)),
                             end = samePos(end(query), end(subject)),
                             equal = samePos(start(query), start(subject)) &
                                 samePos(end(query), end(subject)))
             overlaps & keep
          }
          )

setMethod("poverlaps", c("integer", "IntegerRanges"),
          function(query, subject, maxgap=0L, minoverlap=1L,
                   type=c("any", "start", "end", "within", "equal"))
          {
              poverlaps(IRanges(query, width=1L), subject,
                        maxgap=maxgap, minoverlap=minoverlap,
                        type=match.arg(type))
          })

setMethod("poverlaps", c("IntegerRanges", "integer"),
          function(query, subject, maxgap=0L, minoverlap=1L,
                   type=c("any", "start", "end", "within", "equal"))
          {
              poverlaps(query, IRanges(subject, width=1L),
                        maxgap=maxgap, minoverlap=minoverlap,
                        type=match.arg(type))
          })

### Convenience operators for poverlaps()
`%pover%` <- function(query, subject) poverlaps(query, subject)
`%pwithin%` <- function(query, subject) poverlaps(query, subject, type="within")
`%poutside%` <- function(query, subject) !poverlaps(query, subject)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Merge two sets of ranges by overlap into a DataFrame
###

mergeByOverlaps <- function(query, subject, ...) {
  hits <- findOverlaps(query, subject, ...)
  query_df <- as(extractROWS(query, queryHits(hits)), "DataFrame")
  colnames(query_df)[1L] <- deparse(substitute(query))
  subject_df <- as(extractROWS(subject, subjectHits(hits)), "DataFrame")
  colnames(subject_df)[1L] <- deparse(substitute(subject))
  cbind(query_df, subject_df)
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Convenience for dereferencing overlap hits to a Pairs
###

findOverlapPairs <- function(query, subject, ...) {
    if (missing(subject)) {
        hits <- findOverlaps(query, ...)
        subject <- query
    } else {
        hits <- findOverlaps(query, subject, ...)
    }
    Pairs(query, subject, hits=hits)
}
Bioconductor/IRanges documentation built on July 4, 2025, 1:16 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
Bioconductor/IRanges
Foundation of integer range manipulation in Bioconductor

R/findOverlaps-methods.R
In Bioconductor/IRanges: Foundation of integer range manipulation in Bioconductor

Defines functions findOverlapPairs mergeByOverlaps `%poutside%` `%pwithin%` `%pover%` default_subsetByOverlaps `%outside%` `%within%` `%over%` default_overlapsAny countOverlaps_IntegerRanges .default_countOverlaps findOverlaps_IntegerRanges

Documented in findOverlapPairs mergeByOverlaps

R Package Documentation

Browse R Packages

We want your feedback!

Bioconductor/IRanges Foundation of integer range manipulation in Bioconductor

R/findOverlaps-methods.R In Bioconductor/IRanges: Foundation of integer range manipulation in Bioconductor

Defines functions findOverlapPairs mergeByOverlaps `%poutside%` `%pwithin%` `%pover%` default_subsetByOverlaps `%outside%` `%within%` `%over%` default_overlapsAny countOverlaps_IntegerRanges .default_countOverlaps findOverlaps_IntegerRanges

Documented in findOverlapPairs mergeByOverlaps

R Package Documentation

Browse R Packages

We want your feedback!

Bioconductor/IRanges
Foundation of integer range manipulation in Bioconductor

R/findOverlaps-methods.R
In Bioconductor/IRanges: Foundation of integer range manipulation in Bioconductor