R/FWGRanges-class.R

Defines functions .strandCollapse .findOverlaps_FWGRanges

# Internal classes -------------------------------------------------------------

# The FWGRanges class is a container for the fixed-width genomic locations and
# their associated annotations.
# NOTE: The intention is to make this a fully-fledged GenomicRanges subclass
#       that is part of the GenomicRRanges package. For now, this class is
#       internal to bsseq and only a subset of methods are properly implemented.
.FWGRanges <- setClass(
    "FWGRanges",
    contains = "GenomicRanges",
    representation(
        seqnames = "Rle",
        ranges = "FWIRanges",
        strand = "Rle",
        seqinfo = "Seqinfo"
    ),
    prototype(
        seqnames = Rle(factor()),
        strand = Rle(strand())
    )
)

# Internal methods -------------------------------------------------------------

# NOTE: Combine the new "parallel slots" with those of the parent class. Make
#       sure to put the new parallel slots **first**. See R/Vector-class.R file
#       in the S4Vectors package for what slots should or should not be
#       considered "parallel".
setMethod(
    "parallel_slot_names",
    "FWGRanges",
    function(x) {
        c(GenomicRanges:::extraColumnSlotNames(x), "seqnames", "ranges",
          "strand", callNextMethod())
    }
)

setMethod("seqnames", "FWGRanges", function(x) x@seqnames)

setMethod("strand", "FWGRanges", function(x) x@strand)

setMethod("seqinfo", "FWGRanges", function(x) x@seqinfo)

setMethod(
    "ranges",
    "FWGRanges",
    function(x, use.names = TRUE, use.mcols = FALSE) {
        if (!isTRUEorFALSE(use.names)) stop("'use.names' must be TRUE or FALSE")
        if (!isTRUEorFALSE(use.mcols)) stop("'use.mcols' must be TRUE or FALSE")
        # TODO: This is a work-around the requirement in the GenomicRanges
        #       validity method that ranges(GenomicRanges) returns a IRanges or
        #       IPos instance.
        # ans <- x@ranges
        ans <- as(x@ranges, "IRanges")
        if (!use.names) names(ans) <- NULL
        if (use.mcols) mcols(ans) <- mcols(x)
        ans
    }
)

# TODO: These wouldn't be necessary if ranges(FWGRanges) returned a FWIRanges
#       instead of an IRanges.
setMethod("start", "FWGRanges", function(x) start(x@ranges))
setMethod("width", "FWGRanges", function(x) width(x@ranges))
setMethod("end", "FWGRanges", function(x) {
    if (x@ranges@width == 1L) {
        start(x)
    } else {
        width(x) - 1L + start(x)
    }
})

# TODO: Follow shift,GRanges-method
setMethod("shift", "FWGRanges", function(x, shift = 0L, use.names = TRUE) {
    stopifnot(use.names)
    new_ranges <- shift(x@ranges, shift, use.names)
    x@ranges <- new_ranges
    validObject(x)
    x
})

# NOTE: Needed for seqinfo<-,FWGRanges-method
setMethod("update", "FWGRanges", function(object, ...) {
    BiocGenerics:::replaceSlots(object, ...)
})

# NOTE: This is pieced together in order to get something up and running. It
#       should, however, be possible to this 'properly' via inheritance. Most
#       of this hack is to work around the fact that ranges(FWGRanges)
#       currently has to return a IRanges instance instead of a FWIRanges
#       instance.
.findOverlaps_FWGRanges <- function(query, subject, maxgap = -1L,
                                    minoverlap = 0L,
                                    type = c("any", "start", "end", "within",
                                             "equal"),
                                    select = c("all", "first", "last",
                                               "arbitrary"),
                                    ignore.strand = FALSE) {
    # findOverlaps,GenomicRanges,GenomicRanges-method ----------------------
    type <- match.arg(type)
    select <- match.arg(select)

    # GenomicRanges:::findOverlaps_GNCList() -------------------------------
    if (!(is(query, "FWGRanges") && is(subject, "FWGRanges"))) {
        stop("'query' and 'subject' must be FWGRanges objects")
    }
    if (!isTRUEorFALSE(ignore.strand)) {
        stop("'ignore.strand' must be TRUE or FALSE")
    }

    si <- merge(seqinfo(query), seqinfo(subject))
    q_seqlevels <- seqlevels(query)
    s_seqlevels <- seqlevels(subject)
    common_seqlevels <- intersect(q_seqlevels, s_seqlevels)
    NG <- length(common_seqlevels)
    q_group_idx <- match(common_seqlevels, q_seqlevels)
    s_group_idx <- match(common_seqlevels, s_seqlevels)

    # Extract 'q_groups' and 's_groups' (both of length NG).
    q_groups <- GenomicRanges:::.extract_groups_from_GenomicRanges(
        query)[q_group_idx]
    s_groups <- GenomicRanges:::.extract_groups_from_GenomicRanges(
        subject)[s_group_idx]

    # Extract 'nclists' and 'nclist_is_q' (both of length NG).
    if (is(subject, "GNCList")) {
        nclists <- subject@nclists[s_group_idx]
        nclist_is_q <- rep.int(FALSE, NG)
    } else if (is(query, "GNCList")) {
        nclists <- query@nclists[q_group_idx]
        nclist_is_q <- rep.int(TRUE, NG)
    } else {
        # We'll do "on-the-fly preprocessing".
        nclists <- vector(mode = "list", length = NG)
        nclist_is_q <- rep.int(NA, NG)
    }

    # Extract 'circle_length' (of length NG).
    circle_length <- GenomicRanges:::.get_circle_length(si)[q_group_idx]

    ##Extract 'q_space' and 's_space'.
    if (ignore.strand) {
        q_space <- s_space <- NULL
    } else {
        q_space <- as.integer(strand(query)) - 3L
        s_space <- as.integer(strand(subject)) - 3L
    }

    # IRanges:::find_overlaps_in_groups_NCList() ---------------------------
    q <- query@ranges
    s <- subject@ranges
    circle.length <- circle_length
    if (!(is(q, "IntegerRanges") && is(s, "IntegerRanges"))) {
        stop("'q' and 's' must be IntegerRanges object")
    }
    if (!is(q_groups, "CompressedIntegerList")) {
        stop("'q_groups' must be a CompressedIntegerList object")
    }
    if (!is(s_groups, "CompressedIntegerList")) {
        stop("'s_groups' must be a CompressedIntegerList object")
    }

    if (!isSingleNumber(maxgap)) {
        stop("'maxgap' must be a single integer")
    }
    if (!is.integer(maxgap)) {
        maxgap <- as.integer(maxgap)
    }

    if (!isSingleNumber(minoverlap)) {
        stop("'minoverlap' must be a single integer")
    }
    if (!is.integer(minoverlap)) {
        minoverlap <- as.integer(minoverlap)
    }

    q_circle_len <- circle.length
    q_circle_len[which(nclist_is_q)] <- NA_integer_
    q <- IRanges:::.shift_ranges_in_groups_to_first_circle(
        x = q,
        x_groups = q_groups,
        circle.length = q_circle_len)
    s_circle_len <- circle.length
    s_circle_len[which(!nclist_is_q)] <- NA_integer_
    s <- IRanges:::.shift_ranges_in_groups_to_first_circle(
        x = s,
        x_groups = s_groups,
        circle.length = s_circle_len)
    .Call2("C_find_overlaps_in_groups_NCList",
           start(q), end(q), q_space, q_groups,
           start(s), end(s), s_space, s_groups,
           nclists, nclist_is_q,
           maxgap, minoverlap, type, select, circle.length,
           PACKAGE = "IRanges")
}

setMethod("findOverlaps", c("FWGRanges", "FWGRanges"), .findOverlaps_FWGRanges)

# TODO: A dedicated duplicated method would be faster than
#       duplicated,GenomicRanges, because it could use end(x) instead of
#       width(x) in call to duplicatedIntegerQuads().

# Internal functions -----------------------------------------------------------

# TODO: Warn if x contains mix of + and * or - and * loci?
.strandCollapse <- function(x) {
    stopifnot(is(x, "GenomicRanges"))
    if (all(strand(x) == "*")) return(x)
    # Shift loci on negative strand by 1 to the left
    x[strand(x) == "-"] <- IRanges::shift(x[strand(x) == "-"], -1L)
    # Unstrand all loci
    x <- unstrand(x)
    # Extract unique loci
    unique(x)
}

# TODOs ------------------------------------------------------------------------

# TODO: Document internal classes, methods, and functions for my own sanity.
#       Also, some may be useful to users of bsseq (although I still won't
#       export these for the time being). Ultimately, I'd like to promote
#       FWGRanges and FWIRanges to 'official' GenomicRanges and IntegerRanges
#       subclasses.
hansenlab/bsseq documentation built on June 12, 2025, 7:42 p.m.