#' A wrapper function to generate a GRanges object of chromatin domain inflection points
#'
#' @name getDomainInflections
#'
#' @param gr Input GRanges object with mcols column corresponding to chromatin domains
#' @param what The name of the column containing the chromatin domain information
#' @param res What resolution the domains were called
#' @param chrs Which chromosomes to work on
#' @param genome Which genome does the input data come from
#'
#' @return A GRanges object of compartment inflection points
#'
#' @import GenomicRanges
#' @importFrom S4Vectors subjectHits queryHits
#' @importFrom GenomeInfoDb seqlengths
#' @importFrom methods as is
#' @importFrom utils data
#' @export
#'
#' @examples
#' data("k562_scrna_chr14", package = "compartmap")
#' chr14_domains <- scCompartments(k562_scrna_chr14,
#' res = 1e6, genome = "hg19",
#' group = TRUE, bootstrap = FALSE
#' )
#' chr14_domain_inflections <- getDomainInflections(chr14_domains, what = "pc")
getDomainInflections <- function(
gr,
what = "score",
res = 1e6,
chrs = c(paste0("chr", 1:22), "chrX"),
genome = c("hg19", "hg38", "mm9", "mm10")
) {
# find the compartment inflection points
stopifnot("'gr' is not a GRanges object" = is(gr, "GenomicRanges"))
stopifnot("'what' is not present in mcols(gr)" = what %in% names(mcols(gr)))
# determine which genome we are working with
genome.name <- paste0(match.arg(genome), ".gr")
genome.gr <- get(genome.name)
# we may not be able to assume continuous compartment structure here
# so somehow, we need to find continuous runs
message("Tiling genome.")
tiles <- tileGenome(
seqlengths = seqlengths(genome.gr)[chrs],
tilewidth = res,
cut.last.tile.in.chrom = TRUE
)
# reset to 0-based
start(tiles) <- suppressWarnings(start(tiles) - 1)
end(tiles) <- suppressWarnings(end(tiles) - 1)
# add a column for continuous runs!
mcols(tiles)$run <- seq(1:length(tiles))
mcols(tiles)$score <- NA
# overlap
message("Finding overlaps.")
ol <- findOverlaps(tiles, gr)
# tack on the eigenvalues
message("Injecting eigenvalues.")
mcols(tiles)$score[queryHits(ol)] <- mcols(gr)[[what]][subjectHits(ol)]
tiles.sub <- tiles[!is.na(mcols(tiles)$score), ]
# look at continuous sequence only
# this is the tricky part...
d <- diff(mcols(tiles.sub)$run)
# theoretically, continuous sequence will always be 1
contig <- which(d == 1)
# need to do this in chunks
# technically, we really only need the places that are non-contiguous
# to bookend our vectors to calculate
non_contig <- which(d != 1)
# if we have fully continuous data, short circuit
if (!length(non_contig)) {
message("Contiguous runs. Finding inflections.")
grl.new <- .getInflections(gr, what = what)
return(grl.new)
}
# loop through the non-contiguous space
grl <- lapply(1:length(non_contig), function(i) {
message("Block processing non-contiguous space for block ", i)
# get block of contiguous sequence
if (non_contig[i] == non_contig[1]) {
block <- tiles.sub[contig[1]:non_contig[i], ]
} else {
block <- tiles.sub[(non_contig[i - 1] + 1):non_contig[i], ]
}
return(block)
})
# we now need to add the final block to the grl
# but what if we have a non-contig block at the end all by itself?
fin.block <- tiles.sub[(tail(non_contig, n = 1L) + 1):length(tiles.sub), ]
grl <- c(grl, fin.block)
# run it
grl.new <- lapply(grl, function(x) {
message("Finding inflections.")
return(.getInflections(x, what = what))
})
message("Returning inflections.")
return(unlist(as(grl.new, "GRangesList")))
}
# convert to pos and negative signs
.getInflections <- function(gr, what = "score") {
gr.signs <- sign(mcols(gr)[[what]])
# find the inflected compartment
gr.signs.bool <- gr.signs < 0
# big list of things that are less than zero
# where the flip will be FALSE
gr.inflect <- gr[!gr.signs.bool, ]
if (length(gr.inflect) == 0) {
return(GRanges())
}
# merge them
end(gr.inflect) <- end(gr.inflect) + 1 # shift
gr.inflect <- reduce(gr.inflect) # reduce
end(gr.inflect) <- end(gr.inflect) - 1 # shift back
# gr.signs.diff <- diff(gr.signs) != 0
# subset the original granges
# gr.inflect <- gr[gr.signs.diff,]
if (length(gr.signs) == 2 & any(gr.signs.bool)) {
gr.inflect <- gr[2, ] # special case
gr.inflect.new <- GRanges(
seqnames = seqnames(gr.inflect),
ranges = IRanges::IRanges(
start = start(gr.inflect),
end = start(gr.inflect)
),
strand = "*"
)
return(gr.inflect.new)
}
gr.inflect.new.start <- GRanges(
seqnames = seqnames(gr.inflect),
ranges = IRanges::IRanges(
start = start(gr.inflect),
end = start(gr.inflect)
),
strand = "*"
)
gr.inflect.new.end <- GRanges(
seqnames = seqnames(gr.inflect),
ranges = IRanges::IRanges(
start = end(gr.inflect),
end = end(gr.inflect)
),
strand = "*"
)
gr.inflect.new <- sort(c(gr.inflect.new.start, gr.inflect.new.end))
# NOTE: the start of the inflected compartment is what we want
return(gr.inflect.new)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.