squareCounts <- function(files, param, width=50000, filter=1L, restrict.regions=FALSE)
# This function collates counts across multiple experiments to get the full set of results. It takes
# a list of lists of lists of integer matrices (i.e. a list of the outputs of convertToInteractions) and
# then compiles the counts into a list object for output.
#
# written by Aaron Lun
# some time ago
# last modified 17 November 2017
{
nlibs <- length(files)
if (nlibs==0L) {
stop("number of libraries must be positive")
} else if (width < 0) {
stop("width must be a non-negative integer")
}
width <- as.integer(width)
filter <- as.integer(filter)
# Constructing bins across the genome.
is.dnase <- .isDNaseC(param)
if (is.dnase) {
retainer <- c("anchor1.pos", "anchor2.pos", "anchor1.len", "anchor2.len")
bin.out <- .createBins(param, width, restricted=restrict.regions)
} else {
retainer <- c("anchor1.id", "anchor2.id")
bin.out <- .assignBins(param, width, restricted=restrict.regions)
}
bin.region <- bin.out$region
bin.id <- bin.out$id
bin.by.chr <- .splitByChr(bin.region)
# Output vectors.
full.sizes <- integer(nlibs)
out.counts <- list(matrix(0L, 0, nlibs))
out.a <- out.t <- list(integer(0))
idex <- 1L
# Running through each pair of chromosomes.
loadfuns <- preloader(files, param=param, retain=retainer)
for (anchor1 in names(loadfuns)) {
current <- loadfuns[[anchor1]]
first.anchor1 <- bin.by.chr$first[[anchor1]]
last.anchor1 <- bin.by.chr$last[[anchor1]]
for (anchor2 in names(current)) {
curfuns <- current[[anchor2]]
first.anchor2 <- bin.by.chr$first[[anchor2]]
last.anchor2 <- bin.by.chr$last[[anchor2]]
# Extracting counts.
pairs <- vector("list", nlibs)
for (lib in seq_len(nlibs)) {
cur.pairs <- curfuns[[lib]]()
if (is.dnase) {
cur.pairs <- .binReads(cur.pairs, width, first.anchor1, first.anchor2,
last.anchor1, last.anchor2)
}
pairs[[lib]] <- cur.pairs
}
full.sizes <- .addToTotal(full.sizes, vapply(pairs, FUN=nrow, FUN.VALUE=0L))
# Aggregating them in C++ to obtain count combinations for each bin pair.
out <- .Call(cxx_count_patch, pairs, bin.id, filter, first.anchor2, last.anchor2)
if (!length(out[[1]])) {
next
}
# Storing counts and locations.
if (any(out[[1]] < out[[2]])) { stop("anchor1 ID should not be less than anchor2 ID") }
out.a[[idex]] <- out[[1]]
out.t[[idex]] <- out[[2]]
out.counts[[idex]] <- out[[3]]
idex <- idex+1L
}
}
# Collating all the other results.
out.a <- unlist(out.a)
out.t <- unlist(out.t)
out.counts <- do.call(rbind, out.counts)
return(InteractionSet(list(counts=out.counts), colData=DataFrame(totals=full.sizes),
interactions=GInteractions(anchor1=out.a, anchor2=out.t, regions=bin.region, mode="reverse"),
metadata=List(param=param, width=width)))
}
## PROOF:
# Recall the enforcement of anchor1 >= anchor2. Bin pairs could technically be
# reflected around the diagonal, to ensure that all points are counted, e.g.,
# if a bin pair overlaps a point in (anchor2, anchor1) form but not in (anchor1,
# anchor2) form. However, this is not required, as shown below:
#
# Consider a point (x, y) past the diagonal (i.e., no enforcement), where y >
# x. Assume that this point is covered by our bin pair. This implies that the
# anchor2 range of our bin pair `[te, ts]` includes 'y', and the anchor1 range
# `[ae, as]` includes 'x'. The anchor1 range must be above the anchor2 range
# (i.e., as >= ts, ae >= te). If ts <= y <= te and as <= x <= ae, then you can
# fiddle with this to obtain ts <= x <= te and as <= y <= ae (as x < y), i.e.,
# the point (y, x) is also covered. So, we're guaranteed to have already
# counted anything past the diagonal, meaning that reflection is not needed.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.