get_context <- function(x, bsgenome, flank_size = 10, .pos_shift = NULL) {
log_debug("In function: `get_context`")
handle_error(flank_size >= 1, "`get_context` :: Flank size must be greater or equal to 1")
get_trinucs <- function(z) substr(z, flank_size, flank_size + 2)
handle_error(length(unique(IRanges::width(x))) == 1, "`get_context` :: Some positions are intervals, need to have single positions as input")
x$position = GenomicRanges::start(x)
# Check that flanking regions doesn't violate chromosome boundaries
flank_size_adjusted = flank_size + 1
x <- GenomicRanges::resize(x, flank_size_adjusted, fix='start')
x <- GenomicRanges::resize(x, flank_size * 2 + 1, fix='end')
invalid_small = (GenomicRanges::start(x) - flank_size) < 1
if (any(invalid_small)) {
log_warning("get_context: Some positions got flanking regions starting before the first nuc of chromosome, these will be removed")
log_debug(paste("Small values:", paste(
GenomicRanges::seqnames(x)[invalid_small],
GenomicRanges::start(x)[invalid_small])))
x <- x[!invalid_small]
}
upper_limits = genome_seqlengths = bsgenome@seqinfo@seqlengths
names(upper_limits) <- as.character(bsgenome@seqinfo@seqnames)
invalid_large = GenomicRanges::end(x) > upper_limits[as.character(GenomicRanges::seqnames(x))]
if (any(invalid_large, na.rm = TRUE)) {
log_warning("get_context: Some positions got flanking regions ending after the last nuc of chromosome, these will be removed")
log_debug(paste("Large values:", paste(
GenomicRanges::seqnames(x)[invalid_large],
GenomicRanges::start(x)[invalid_large])))
x <- x[!invalid_large]
}
x_vals = GenomicRanges::values(x)
# Obtain sequence context and choose the pyrimidine based strand
x.seq <- Biostrings::getSeq(bsgenome, x)
x.seq_rc <- Biostrings::reverseComplement(x.seq)
x.ref_nuc <- substr(x.seq, flank_size_adjusted, flank_size_adjusted)
x.ref_nuc_rc <- compl_map[x.ref_nuc]
x.trinuc <- substr(x.seq, flank_size, flank_size + 2)
x.trinuc_rc <- substr(x.seq_rc, flank_size, flank_size + 2)
if (!(is.null(.pos_shift))) {
log_warning("Using shifted position for troubleshooting, results shouldn't make sense")
x = GenomicRanges::shift(x, .pos_shift)
x.seq <- Biostrings::getSeq(bsgenome, x)
x.seq_rc <- Biostrings::reverseComplement(x.seq)
}
x.pyr_strand <- toupper(x.ref_nuc) %in% c("C", "T")
BiocGenerics::strand(x) = ifelse(x.pyr_strand, '+', '-')
x.pyr_nuc <- ifelse(x.pyr_strand, x.ref_nuc, x.ref_nuc_rc)
x.pyr_seq <- ifelse(x.pyr_strand, x.seq, x.seq_rc)
x.pyr_trinuc <- ifelse(x.pyr_strand, x.trinuc, x.trinuc_rc)
x_new_vals = cbind(x_vals, data.frame(
sequence_pyrbased = x.pyr_seq,
trinuc_pyrbased = x.pyr_trinuc,
nucl_pyrbased = x.pyr_nuc,
stringsAsFactors = FALSE
))
GenomicRanges::values(x) = x_new_vals
return(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.