# Internal classes -------------------------------------------------------------
# The FWGRanges class is a container for the fixed-width genomic locations and
# their associated annotations.
# NOTE: The intention is to make this a fully-fledged GenomicRanges subclass
# that is part of the GenomicRRanges package. For now, this class is
# internal to bsseq and only a subset of methods are properly implemented.
.FWGRanges <- setClass(
"FWGRanges",
contains = "GenomicRanges",
representation(
seqnames = "Rle",
ranges = "FWIRanges",
strand = "Rle",
seqinfo = "Seqinfo"
),
prototype(
seqnames = Rle(factor()),
strand = Rle(strand())
)
)
# Internal methods -------------------------------------------------------------
# NOTE: Combine the new "parallel slots" with those of the parent class. Make
# sure to put the new parallel slots **first**. See R/Vector-class.R file
# in the S4Vectors package for what slots should or should not be
# considered "parallel".
setMethod(
"parallel_slot_names",
"FWGRanges",
function(x) {
c(GenomicRanges:::extraColumnSlotNames(x), "seqnames", "ranges",
"strand", callNextMethod())
}
)
setMethod("seqnames", "FWGRanges", function(x) x@seqnames)
setMethod("strand", "FWGRanges", function(x) x@strand)
setMethod("seqinfo", "FWGRanges", function(x) x@seqinfo)
setMethod(
"ranges",
"FWGRanges",
function(x, use.names = TRUE, use.mcols = FALSE) {
if (!isTRUEorFALSE(use.names)) stop("'use.names' must be TRUE or FALSE")
if (!isTRUEorFALSE(use.mcols)) stop("'use.mcols' must be TRUE or FALSE")
# TODO: This is a work-around the requirement in the GenomicRanges
# validity method that ranges(GenomicRanges) returns a IRanges or
# IPos instance.
# ans <- x@ranges
ans <- as(x@ranges, "IRanges")
if (!use.names) names(ans) <- NULL
if (use.mcols) mcols(ans) <- mcols(x)
ans
}
)
# TODO: These wouldn't be necessary if ranges(FWGRanges) returned a FWIRanges
# instead of an IRanges.
setMethod("start", "FWGRanges", function(x) start(x@ranges))
setMethod("width", "FWGRanges", function(x) width(x@ranges))
setMethod("end", "FWGRanges", function(x) {
if (x@ranges@width == 1L) {
start(x)
} else {
width(x) - 1L + start(x)
}
})
# TODO: Follow shift,GRanges-method
setMethod("shift", "FWGRanges", function(x, shift = 0L, use.names = TRUE) {
stopifnot(use.names)
new_ranges <- shift(x@ranges, shift, use.names)
x@ranges <- new_ranges
validObject(x)
x
})
# NOTE: Needed for seqinfo<-,FWGRanges-method
setMethod("update", "FWGRanges", function(object, ...) {
BiocGenerics:::replaceSlots(object, ...)
})
# NOTE: This is pieced together in order to get something up and running. It
# should, however, be possible to this 'properly' via inheritance. Most
# of this hack is to work around the fact that ranges(FWGRanges)
# currently has to return a IRanges instance instead of a FWIRanges
# instance.
.findOverlaps_FWGRanges <- function(query, subject, maxgap = -1L,
minoverlap = 0L,
type = c("any", "start", "end", "within",
"equal"),
select = c("all", "first", "last",
"arbitrary"),
ignore.strand = FALSE) {
# findOverlaps,GenomicRanges,GenomicRanges-method ----------------------
type <- match.arg(type)
select <- match.arg(select)
# GenomicRanges:::findOverlaps_GNCList() -------------------------------
if (!(is(query, "FWGRanges") && is(subject, "FWGRanges"))) {
stop("'query' and 'subject' must be FWGRanges objects")
}
if (!isTRUEorFALSE(ignore.strand)) {
stop("'ignore.strand' must be TRUE or FALSE")
}
si <- merge(seqinfo(query), seqinfo(subject))
q_seqlevels <- seqlevels(query)
s_seqlevels <- seqlevels(subject)
common_seqlevels <- intersect(q_seqlevels, s_seqlevels)
NG <- length(common_seqlevels)
q_group_idx <- match(common_seqlevels, q_seqlevels)
s_group_idx <- match(common_seqlevels, s_seqlevels)
# Extract 'q_groups' and 's_groups' (both of length NG).
q_groups <- GenomicRanges:::.extract_groups_from_GenomicRanges(
query)[q_group_idx]
s_groups <- GenomicRanges:::.extract_groups_from_GenomicRanges(
subject)[s_group_idx]
# Extract 'nclists' and 'nclist_is_q' (both of length NG).
if (is(subject, "GNCList")) {
nclists <- subject@nclists[s_group_idx]
nclist_is_q <- rep.int(FALSE, NG)
} else if (is(query, "GNCList")) {
nclists <- query@nclists[q_group_idx]
nclist_is_q <- rep.int(TRUE, NG)
} else {
# We'll do "on-the-fly preprocessing".
nclists <- vector(mode = "list", length = NG)
nclist_is_q <- rep.int(NA, NG)
}
# Extract 'circle_length' (of length NG).
circle_length <- GenomicRanges:::.get_circle_length(si)[q_group_idx]
##Extract 'q_space' and 's_space'.
if (ignore.strand) {
q_space <- s_space <- NULL
} else {
q_space <- as.integer(strand(query)) - 3L
s_space <- as.integer(strand(subject)) - 3L
}
# IRanges:::find_overlaps_in_groups_NCList() ---------------------------
q <- query@ranges
s <- subject@ranges
circle.length <- circle_length
if (!(is(q, "IntegerRanges") && is(s, "IntegerRanges"))) {
stop("'q' and 's' must be IntegerRanges object")
}
if (!is(q_groups, "CompressedIntegerList")) {
stop("'q_groups' must be a CompressedIntegerList object")
}
if (!is(s_groups, "CompressedIntegerList")) {
stop("'s_groups' must be a CompressedIntegerList object")
}
if (!isSingleNumber(maxgap)) {
stop("'maxgap' must be a single integer")
}
if (!is.integer(maxgap)) {
maxgap <- as.integer(maxgap)
}
if (!isSingleNumber(minoverlap)) {
stop("'minoverlap' must be a single integer")
}
if (!is.integer(minoverlap)) {
minoverlap <- as.integer(minoverlap)
}
q_circle_len <- circle.length
q_circle_len[which(nclist_is_q)] <- NA_integer_
q <- IRanges:::.shift_ranges_in_groups_to_first_circle(
x = q,
x_groups = q_groups,
circle.length = q_circle_len)
s_circle_len <- circle.length
s_circle_len[which(!nclist_is_q)] <- NA_integer_
s <- IRanges:::.shift_ranges_in_groups_to_first_circle(
x = s,
x_groups = s_groups,
circle.length = s_circle_len)
.Call2("C_find_overlaps_in_groups_NCList",
start(q), end(q), q_space, q_groups,
start(s), end(s), s_space, s_groups,
nclists, nclist_is_q,
maxgap, minoverlap, type, select, circle.length,
PACKAGE = "IRanges")
}
setMethod("findOverlaps", c("FWGRanges", "FWGRanges"), .findOverlaps_FWGRanges)
# TODO: A dedicated duplicated method would be faster than
# duplicated,GenomicRanges, because it could use end(x) instead of
# width(x) in call to duplicatedIntegerQuads().
# Internal functions -----------------------------------------------------------
# TODO: Warn if x contains mix of + and * or - and * loci?
.strandCollapse <- function(x) {
stopifnot(is(x, "GenomicRanges"))
if (all(strand(x) == "*")) return(x)
# Shift loci on negative strand by 1 to the left
x[strand(x) == "-"] <- IRanges::shift(x[strand(x) == "-"], -1L)
# Unstrand all loci
x <- unstrand(x)
# Extract unique loci
unique(x)
}
# TODOs ------------------------------------------------------------------------
# TODO: Document internal classes, methods, and functions for my own sanity.
# Also, some may be useful to users of bsseq (although I still won't
# export these for the time being). Ultimately, I'd like to promote
# FWGRanges and FWIRanges to 'official' GenomicRanges and IntegerRanges
# subclasses.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.