#' @title Create a MotifMatcher object
#'
#' @description
#' A MotifMatcher object is used directly by the \code{\link{HumanDHSFilter}} class to match motif
#' matrices to where they occur in the supplied genome.
#'
#' @import methods
#' @import BSgenome
#'
#' @rdname MotifMatcher-class
#' @aliases MotifMatcher
#----------------------------------------------------------------------------------------------------
.MotifMatcher <- setClass('MotifMatcher',
representation(genome="BSgenome",
pfms="list",
quiet="logical")
)
#----------------------------------------------------------------------------------------------------
setGeneric("getPfms", signature="obj",
function(obj) standardGeneric ("getPfms"))
setGeneric("getSequence", signature="obj",
function(obj,tbl.regions,variants=NA_character_)
standardGeneric ("getSequence"))
setGeneric(".parseVariantString", signature="obj",
function(obj, variantString)
standardGeneric (".parseVariantString"))
setGeneric("findMatchesByChromosomalRegion", signature="obj",
function(obj, tbl.regions, pwmMatchMinimumAsPercentage, variants=NA_character_)
standardGeneric ("findMatchesByChromosomalRegion"))
#----------------------------------------------------------------------------------------------------
#' @title Class MotifMatcher
#' @name MotifMatcher-class
#' @rdname MotifMatcher-class
#'
#' @description
#' The MotifMatcher class is used to match motif position weight matrices to places where they occur
#' in a given genome. It requries specification of a genome to search in and a list of motifs to
#' search for. Ordinarily this class is primarily used by the HumanDHSFilter, but can alternatively
#' be used to search for motifs in a given genome without any filtering functionality.
#'
#' @param genomeName A character string identifying an object of type BSgenome. The genome object
#' contains the information for a specific human genome and should be either "hg38" or "hg19". The
#' supplied genome serves as the search space for matching motifs (default = "hg38").
#' @param pfms A list of motif matrices to serve as queries for the target genome. If supplied,
#' these should be created using a MotifList object from the \code{\link{MotifDb}} package (see
#' example below). If unspecified, the motifs will default to all vertebrates in the JASPAR
#' database (default = list())
#' @param quiet A logical denoting whether or not the MotifMatcher object should print output
#'
#' @return An object of the MotifMatcher class
#'
#' @export
#'
#' @seealso \code{\link{HumanDHSFilter}}
#'
#' @examples
#' # Specify the genome, and motif list to create a MotifMatcher for only human motifs
#' library(MotifDb)
#' mm <- MotifMatcher( genomeName="hg38",
#' pfms = as.list(query(MotifDb, "sapiens")))
MotifMatcher <- function(genomeName,
pfms,
quiet=TRUE)
{
genomeName <- tolower(genomeName)
stopifnot(is.list(pfms))
stopifnot(genomeName %in% c("hg19", "hg38", "saccer3", "tair10"))
if(genomeName == "hg38"){
reference.genome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
}
else if(genomeName == "hg19"){
reference.genome <- BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19
}
else if(genomeName == "mm10"){
reference.genome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
}
else if(tolower(genomeName) == "saccer3"){
reference.genome <- BSgenome.Scerevisiae.UCSC.sacCer3::BSgenome.Scerevisiae.UCSC.sacCer3
}
else if(tolower(genomeName) == "tair10"){
reference.genome <- BSgenome.Athaliana.TAIR.TAIR9::BSgenome.Athaliana.TAIR.TAIR9
}
else {
stop(sprintf("MotifMatch, genomeName not in hg19, hg38: '%s'", genomeName))
}
.MotifMatcher(genome=reference.genome, pfms=pfms, quiet=quiet)
} # MotifMatcher constructor
#----------------------------------------------------------------------------------------------------
#' Show a MotifMatcher object
#'
#' @rdname show.MotifMatcher
#' @aliases show.MotifMatcher
#'
#' @param object An object of the class MotifMatcher
#'
#' @return A truncated view of the supplied object
#'
#' @examples
#' load(system.file(package="trena", "extdata/ampAD.154genes.mef2cTFs.278samples.RData"))
#' target.gene <- "MEF2C"
#' tfs <- setdiff(rownames(mtx.sub), target.gene)
#' lassopv.solver <- LassoPVSolver(mtx.sub, target.gene, tfs)
#' show(lassopv.solver)
setMethod("show", "MotifMatcher",
function(object){
s <- sprintf("MotifMatcher for genome %s", object@genome)
cat(s, sep="\n")
})
#----------------------------------------------------------------------------------------------------
#' Find Motif Matches by Chromosomal Region
#'
#' Given a MotifMatcher object, a table of chromosomal regions, and a minimum match percentage,
#' pull out a list containing a data frame of the motifs in those regions and a character vector
#' of their associated transcription factors.
#'
#' @rdname findMatchesByChromosomalRegion
#' @aliases findMatchesByChromosomalRegion
#'
#' @param obj An object of class MotifMatcher
#' @param tbl.regions A data frame where each row contains a chromosomal region with the fields
#' "chrom", "start", and "end".
#' @param pwmMatchMinimumAsPercentage A percentage (0-100) used as a cutoff for what constitutes
#' a motif match
#' @param variants A character containing variants to use for the matching (default = NA_character_).
#' The variants should either have the same number of entries as rows in the \code{tbl.regions},
#' or they should not be supplied.
#'
#' @return A list containing a data frame of the motifs in the given regions and a character
#' vector of their associated transcription factors
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Perform a simple match in the rs13384219 neighborhood
#' library(MotifDb)
#' motifMatcher <- MotifMatcher(genomeName="hg38",
#' pfms = as.list(query(query(MotifDb, "sapiens"),"jaspar2016")), quiet=TRUE)
#' tbl.regions <- data.frame(chrom="chr2", start=57907313, end=57907333, stringsAsFactors=FALSE)
#' x <- findMatchesByChromosomalRegion(motifMatcher, tbl.regions, pwmMatchMinimumAsPercentage=92)
#'
#' # Perform the same match, but now include a variant
#' x.mut <- findMatchesByChromosomalRegion(motifMatcher, tbl.regions,
#' pwmMatchMinimumAsPercentage=92, variants = "rs13384219")
#' }
setMethod("findMatchesByChromosomalRegion", "MotifMatcher",
function(obj, tbl.regions, pwmMatchMinimumAsPercentage, variants=NA_character_){
x <- lapply(1:nrow(tbl.regions),
function(r) getSequence(obj, tbl.regions[r,], variants))
tbl.regions <- do.call(rbind, x)
if(!obj@quiet){
printf("---- MotifMatcher::findMatchesByChromosomalRegion")
print(tbl.regions)
}
tbl.motifs.list <- .getScoredMotifs(tbl.regions$seq, obj@pfms,
pwmMatchMinimumAsPercentage, obj@quiet)
region.count <- nrow(tbl.regions)
tbl.out <- data.frame()
all.tfs <- c()
for(i in seq_len(region.count)){
tbl.motifs <- tbl.motifs.list[[i]]
if(nrow(tbl.motifs) > 0){
colnames(tbl.motifs) <- c("motifStart", "motifEnd", "width",
"motifScore", "maxScore",
"motifRelativeScore", "motifName",
"match", "strand")
tbl.region <- tbl.regions[i,]
colnames(tbl.region) <- c("chrom", "chromStart", "chromEnd", "seq", "status")
tbl.out <- rbind(tbl.out, cbind(tbl.motifs, tbl.region))
}
} # for i
if(nrow(tbl.out) == 0)
return(tbl=data.frame())
tbl.out$motifStart <- tbl.out$motifStart + tbl.out$chromStart - 1;
tbl.out$motifEnd <- tbl.out$motifEnd + tbl.out$chromStart - 1;
# change some column names
#colnames(tbl.out)[4] <- "motifscore"
#colnames(tbl.out)[2] <- "endpos"
#colnames(tbl.out)[7] <- "motifname"
tbl.out <- tbl.out[, -c(3,5)] # get rid of width and maxScore columns
desired.column.order <- c("motifName", "chrom", "motifStart", "motifEnd", "strand",
"motifScore", "motifRelativeScore", "match",
"chromStart", "chromEnd", "seq", "status") #, "count", "score")
tbl.out <- tbl.out[, desired.column.order]
tbl.out <- tbl.out[order(tbl.out$motifScore, decreasing=TRUE),]
# for MotifDb motif names, simplify, keeping only the final token:
# "Hsapiens-jaspar2016-FOXH1-MA0479.1" -> "MA0479.1"
tokens <- strsplit(tbl.out$motifName, "-")
short.motif.names <- unlist(lapply(tokens, function(tokenSet) tokenSet[length(tokenSet)]))
tbl.out$shortMotif <- short.motif.names
# tbl.mg will soon come from MotifDb
#tbl.mg <- tfGeneSymbolForMotif(obj, tbl.out$motifName)
#tfs.by.motif <- lapply(tbl.out$motifName, function(m) subset(tbl.mg, motif==m)$geneSymbol)
#all.tfs <- sort(unique(unlist(tfs.by.motif)))
#tfs.by.motif.joined <- unlist(lapply(tfs.by.motif, function(m) paste(m, collapse=";")))
#tbl.out$tf <- tfs.by.motif.joined
max.sequence.chars <- 20
if(nchar(tbl.out$seq[1]) > max.sequence.chars)
tbl.out$seq <- paste(substring(tbl.out$seq, 1, max.sequence.chars-3), "...", sep="")
return(tbl.out)
}) #findMatchesByChromosomalRegion
#----------------------------------------------------------------------------------------------------
#' Retrieve the motifs from the pfms slot
#'
#' Given a MotifMatcher object, return the motifs, which are stored in the pfms slot.
#'
#' @rdname getPfms
#' @aliases getPfms
#'
#' @param obj An object of class MotifMatcher
#'
#' @return The list of motif matrices stored in the pfms slot.
#'
#' @export
#'
#' @examples
#'
#' # Return the default matrix of JASPAR motifs
#' library(MotifDb)
#' motifMatcher <- MotifMatcher(genomeName="hg38", pfms = as.list(query(MotifDb, "sapiens")))
#' motifs <- getPfms(motifMatcher)
setMethod("getPfms", "MotifMatcher",
function(obj){
return(obj@pfms)
})
#----------------------------------------------------------------------------------------------------
.matchPwmForwardAndReverse <- function(sequence, pfm, motifName, min.match.percentage=95, quiet=TRUE)
{
xyz <- .matchPwmForwardAndReverse
min.match.as.string <- sprintf("%02d%%",
min.match.percentage)
hits.fwd <- Biostrings::matchPWM(pfm, sequence, with.score=TRUE, min.score=min.match.as.string)
hits.rev <- Biostrings::matchPWM(Biostrings::reverseComplement(pfm),
sequence, with.score=TRUE, min.score=min.match.as.string)
max.score <- Biostrings::maxScore(pfm)
tbl <- data.frame()
if(length(hits.fwd) > 0){
if(!quiet) printf("%d +", length(hits.fwd))
match <- substring(as.character(subject(hits.fwd)), start(ranges(hits.fwd)), end(ranges(hits.fwd)))
actual.score <- mcols(hits.fwd)$score
relative.score <- actual.score/max.score
tbl <- data.frame(ranges(hits.fwd),
score=mcols(hits.fwd)$score, maxScore=max.score,relativeScore=relative.score,
motif=motifName, match=match, strand="+",
stringsAsFactors=FALSE)
} # hits.fwd
if(length(hits.rev) > 0){
if(!quiet) printf("%d -", length(hits.rev))
match <- substring(as.character(subject(hits.rev)), start(ranges(hits.rev)), end(ranges(hits.rev)))
actual.score <- mcols(hits.rev)$score
relative.score <- actual.score/max.score
tbl.rev <- data.frame(ranges(hits.rev),
score=mcols(hits.rev)$score, maxScore=max.score, relativeScore=relative.score,
motif=motifName, match=match, strand="-",
stringsAsFactors=FALSE)
# transform the start/end so that they are forward-strand relative
#true.start <- 1 + nchar(sequence) - tbl.rev$end
#true.end <- 1 + nchar(sequence) - tbl.rev$start
#tbl.rev$start <- true.start
#tbl.rev$end <- true.end
tbl <- rbind(tbl, tbl.rev)
}
#printf("returning match fwd/bwd tbl with %d rows", nrow(tbl))
#print(table(tbl$strand))
tbl
} # .matchPwmForwardAndReverse
#----------------------------------------------------------------------------------------------------
.findMotifs <- function(sequence, pfms, min.match.percentage=95, quiet=TRUE)
{
min.match.as.string <- sprintf("%02d%%", min.match.percentage)
count <- length(pfms)
xx <- lapply(1:count, function(i) {
.matchPwmForwardAndReverse(sequence, pfms[[i]], names(pfms)[i], min.match.percentage, quiet)
})
tbl.result <- do.call("rbind", xx)
if(nrow(tbl.result) == 0){
return(data.frame())
}
else{
tbl.result$motif <- as.character(tbl.result$motif)
tbl.result$match <- as.character(tbl.result$match)
tbl.result$strand <- as.character(tbl.result$strand)
return(tbl.result[order(tbl.result$score,decreasing=TRUE),])
}
} # .findMotifs
#------------------------------------------------------------------------------------------------------------------------
.getScoredMotifs <- function(seqs, pfms, min.match.percentage=95, quiet=TRUE)
{
parseLine <- function(textOfLine) {
# first delete the leading A, C, G or T. then the square brackets. then convert
x <- substr(textOfLine, 2, nchar(textOfLine))
x2 <- sub(" *\\[ *", "", x)
x3 <- sub(" *\\] *", "", x2)
counts <- as.integer(strsplit(x3, "\\s+", perl=TRUE)[[1]])
return(counts)
} # parseLine
parseJasparPwm = function (lines) {
stopifnot(length(lines)==5) # title line, one line for each base
motif.name.raw = strsplit(lines[1], "\t")[[1]][1]
motif.name <- gsub(">", "", motif.name.raw, fixed=TRUE)
# expect 4 rows, and a number of columns we can discern from the incoming text.
a.counts <- parseLine(lines[[2]])
c.counts <- parseLine(lines[[3]])
g.counts <- parseLine(lines[[4]])
t.counts <- parseLine(lines[[5]])
stopifnot(length(a.counts) == length(c.counts))
stopifnot(length(a.counts) == length(g.counts))
stopifnot(length(a.counts) == length(t.counts))
cols <- length(a.counts)
mtx <- matrix (nrow=4, ncol=cols, dimnames=list(c('A','C','G','T'), as.character(1:cols)))
mtx[1,] <- a.counts
mtx[2,] <- c.counts
mtx[3,] <- g.counts
mtx[4,] <- t.counts
return(list(title=motif.name, matrix=mtx))
} # parsePwm
readRawJasparMatrices = function (uri) {
all.lines <- scan(uri, what=character(0), sep='\n', quiet=TRUE)
title.lines <- grep ('^>', all.lines)
title.line.count <- length (title.lines)
max <- title.line.count - 1
pwms <- list()
for(i in 1:max){
start.line <- title.lines [i]
end.line <- title.lines [i+1] - 1
x <- parseJasparPwm (all.lines [start.line:end.line])
pwms[[i]] <- list(title=x$title, matrix=x$matrix)
} # for i
invisible (pwms)
} # readRawJasparMatrices
xyz <- "MotifMatcher .getScoredMotifs"
result <- lapply(seqs, function(seq) .findMotifs(seq, pfms, min.match.percentage, quiet))
if(is.null(result))
result <- data.frame()
result
} # .getScoredMotifs
#----------------------------------------------------------------------------------------------------
.parseLine <- function(textOfLine)
{
# first delete the leading A, C, G or T. then the square brackets. then convert
x <- substr(textOfLine, 2, nchar(textOfLine))
x2 <- sub(" *\\[ *", "", x)
x3 <- sub(" *\\] *", "", x2)
counts <- as.integer(strsplit(x3, "\\s+", perl=TRUE)[[1]])
return(counts)
} # .parseLine
#------------------------------------------------------------------------------------------------------------------------
.parseJasparPwm = function (lines)
{
stopifnot(length(lines)==5) # title line, one line for each base
motif.name.raw = strsplit(lines[1], "\t")[[1]][1]
motif.name <- gsub(">", "", motif.name.raw, fixed=TRUE)
# expect 4 rows, and a number of columns we can discern from the incoming text.
a.counts <- .parseLine(lines[[2]])
c.counts <- .parseLine(lines[[3]])
g.counts <- .parseLine(lines[[4]])
t.counts <- .parseLine(lines[[5]])
stopifnot(length(a.counts) == length(c.counts))
stopifnot(length(a.counts) == length(g.counts))
stopifnot(length(a.counts) == length(t.counts))
cols <- length(a.counts)
mtx <- matrix (nrow=4, ncol=cols, dimnames=list(c('A','C','G','T'), as.character(1:cols)))
mtx[1,] <- a.counts
mtx[2,] <- c.counts
mtx[3,] <- g.counts
mtx[4,] <- t.counts
return(list(title=motif.name, matrix=mtx))
} # .parsePwm
#------------------------------------------------------------------------------------------------------------------------
.readRawJasparMatrices = function (uri)
{
all.lines <- scan(uri, what=character(0), sep='\n', quiet=TRUE)
title.lines <- grep ('^>', all.lines)
title.line.count <- length (title.lines)
max <- title.line.count - 1
pwms <- list()
for(i in 1:max){
start.line <- title.lines [i]
end.line <- title.lines [i+1] - 1
x <- .parseJasparPwm (all.lines [start.line:end.line])
pwms[[i]] <- list(title=x$title, matrix=x$matrix)
} # for i
invisible (pwms)
} # .readRawJasparMatrices
#----------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------
# return a new version of tbl.regions:
#
# 1) with a "status" column added
# 2) in which regions with a variant have their sequence altered to include the alternate base.
# 3) if there # are multpiole alternate alleles, then a separate row is created for each alternate;
# 4) in which any regions without mutants are returned as is (with the alt column left empty)
#
.injectSnp <- function(tbl.regions, tbl.variants)
{
stopifnot(nrow(tbl.regions) == 1)
stopifnot(all(c("chrom", "start", "end", "seq") %in% colnames(tbl.regions)))
gr.regions <- with(tbl.regions, GRanges(seqnames=chrom, IRanges(start=start, end=end)))
gr.variants <- with(tbl.variants, GRanges(seqnames=chrom, IRanges(start=loc, end=loc)))
suppressWarnings(
tbl.overlaps <- as.data.frame(findOverlaps(gr.variants, gr.regions))
)
colnames(tbl.overlaps) <- c("variant", "region")
if(nrow(tbl.overlaps) == 0)
return(tbl.regions)
regions.without.snps <- setdiff(1:nrow(tbl.regions), tbl.overlaps$region)
tbl.regions.out <- tbl.regions[regions.without.snps,]
tbl.regions.out$status <- rep("wt", nrow(tbl.regions.out))
new.sequence <- tbl.regions$seq
status.string <- ""
for(r in seq_len(nrow(tbl.overlaps))){
wt <- as.list(tbl.regions[tbl.overlaps$region[r],])
alt <- as.list(tbl.variants[tbl.overlaps$variant[r],])
offset <- 1 + alt$loc - wt$start
mut.loc <- wt$start + offset
wt.base <- substring(new.sequence, offset, offset)
new.sequence <- sprintf("%s%s%s", substr(new.sequence, 1, (offset-1)), alt$mut,
substr(new.sequence, offset+1, nchar(new.sequence)))
#alt.string <- sprintf("%s:%d(%s->%s)", alt$chrom, alt$loc, alt$wt, alt$mut)
}
tbl.regions.out <- tbl.regions
tbl.regions.out$seq <- new.sequence
tbl.regions.out$status <- "mut"
tbl.regions.out
#tbl.regions.out[order(tbl.regions.out$start, decreasing=FALSE),]
} # .injectSnp
#----------------------------------------------------------------------------------------------------
# going obsolete. see function .parseVariantString
setMethod(".parseVariantString", "MotifMatcher",
function(obj, variantString) {
# quick sanity check to detect obvious errors in the variant string
is.rsid <- grepl("^rs", variantString)
is.explicit.chromLoc <- grepl("^chr", variantString) &&
length(strsplit(variantString, ":")[[1]]) == 4
is.plausible.variant.string <- is.rsid || is.explicit.chromLoc
if(!is.plausible.variant.string)
stop(sprintf("variant '%s' is neither a plausible rsID nor an explicit chrom:loc:ref:var", variantString))
if(grepl("^rs", variantString)){
explicitly.specified.alternate.allele <- NA
tokens <- strsplit(variantString, ":")[[1]]
if(length(tokens) == 2){ # eg, "rs3763040:A" when the snp includes multiple alternate alleles
variantString <- tokens[1]
explicitly.specified.alternate.allele <- tokens[2]
}
snp.info <- as.data.frame(BSgenome::snpsById(
SNPlocs.Hsapiens.dbSNP150.GRCh38::SNPlocs.Hsapiens.dbSNP150.GRCh38, variantString))[1,]
chrom <- as.character(snp.info$seqnames)
if(!grepl("ch", chrom))
chrom <- sprintf("chr%s", chrom)
if(!grepl("chr", chrom))
chrom <- sub("ch", "chr", chrom)
start <- as.numeric(snp.info$pos)
ambiguity.code <- snp.info$alleles_as_ambig
elements.string <- Biostrings::IUPAC_CODE_MAP[[ambiguity.code]]
elements <- strsplit(elements.string,'')[[1]]
wt <- as.character(BSgenome::getSeq(obj@genome, chrom, start, start))
mut <- setdiff(elements, wt)
snp <- list(chrom=chrom, loc=start, wt=wt, mut=mut)
variant.count <- length(snp$mut)
result <- vector(mode="list", length=variant.count)
for(v in 1:variant.count){
result[[v]] <- snp
result[[v]]$mut <- snp$mut[v]
} # for v
tbl.out <- do.call(rbind.data.frame, result)
if(variant.count > 1){
chosen.row <- 1 # the default if alternate allele not specified explicitly
if(is.na(explicitly.specified.alternate.allele)){
warning(sprintf("alternate allele of %d options not specified, choosing first: %s",
variant.count, tbl.out[chosen.row, "mut"]))
}
else{
chosen.row <- match(explicitly.specified.alternate.allele, tbl.out$mut)
if(is.na(chosen.row)) stop(sprintf("alternate allele %s not in %s", tokens[2], variantString))
}
tbl.out <- tbl.out[chosen.row,]
} # variant.count > 1
} # if rsid
else{ # no rsid string supplied: instead, an explicit chrom:loc:wt:mut string ("chr2:57907323:A:G")
tokens <- strsplit(variantString, ":")[[1]]
stopifnot(length(tokens) == 4)
tbl.out <- data.frame(chrom=tokens[1], loc=as.numeric(tokens[2]), wt=tokens[3], mut=tokens[4])
}
tbl.out$chrom <- as.character(tbl.out$chrom)
tbl.out$wt <- as.character(tbl.out$wt)
tbl.out$mut <- as.character(tbl.out$mut)
tbl.out
})
#----------------------------------------------------------------------------------------------------
#' Retrieve the Sequence for a Set of Regions
#'
#' Given a MotifMatcher object, a table of chromosomal regions, and an optional set of variants,
#' return the sequences as a new column of the table.
#'
#' @rdname getSequence
#' @aliases getSequence
#'
#' @param obj An object of class MotifMatcher
#' @param tbl.regions A data frame where each row contains a chromosomal region with the fields
#' "chrom", "start", and "end".
#' @param variants A character containing variants to use for the matching (default = NA_character_)
#' The variants should either have the same number of entries as rows in the \code{tbl.regions},
#' or they should not be supplied.
#'
#' @return The \code{tbl.regions} data frame with an added column containing the sequence for each
#' entry
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Retrieve the sequences for the rs13384219 neighborhood
#' library(MotifDb)
#' motifMatcher <- MotifMatcher(genomeName="hg38",
#' pfms = as.list(query(query(MotifDb, "sapiens"), "jaspar2016")))
#' tbl.regions <- data.frame(chrom="chr2", start=57907313, end=57907333, stringsAsFactors=FALSE)
#' x <- findMatchesByChromosomalRegion(motifMatcher, tbl.regions, pwmMatchMinimumAsPercentage=92)
#'
#' # Retrieve the sequences, but now include a variant
#' x.mut <- findMatchesByChromosomalRegion(motifMatcher, tbl.regions,
#' pwmMatchMinimumAsPercentage=92, "rs13384219")
#' }
setMethod("getSequence", "MotifMatcher",
function(obj, tbl.regions, variants=NA_character_){
# either no variants are supplied, or a variant string is offered for each row
stopifnot(nrow(tbl.regions) == 1)
gr.regions <- with(tbl.regions, GRanges(seqnames=chrom, IRanges(start=start, end=end)))
seqs <- as.character(BSgenome::getSeq(obj@genome, gr.regions))
tbl.regions$seq <- seqs
if(!all(is.na(variants))){ # successively inject each variant
tbl.variants <- do.call(rbind, lapply(variants, function(variant) .parseVariantString(obj, variant)))
for(rr in 1:nrow(tbl.variants))
tbl.regions <- .injectSnp(tbl.regions, tbl.variants[rr,])
} # !all(is.na(variants))
else{
tbl.regions$status <- rep("wt", nrow(tbl.regions))
}
tbl.regions
}) # getSequence
#----------------------------------------------------------------------------------------------------
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.