#' \code{Find SNP Position}
#'
#' Find the physical position of a short sequence in a given genome.
#'
#' @param blastbin The path of binary code for NCBI BLAST program. [String, default=NULL]
#' @param db Path of database to BLAST against. [String]
#' @param fa A fasta file of the sequences of interest. [String]
#' @param iden Percent identity of the alignment. [interger, default=95]
#' @param len Minimum length of the alignment. [interger, default=100]
#' @return return a data.frame.
#'
#' @examples
#'
#' library("Biostrings")
#' library("findpos")
#' ### Find the help document for the function
#' ?findpos
#' ### run
#' res <- findpos(blastbin = "~/bin/ncbi-blast-2.4.0+/bin",
#' db="example/16SMicrobialDB/16SMicrobial",
#' fa="example/rna.fasta", iden=95, len=100)
#'
#' @export
findpos <- function(blastbin=NULL, db, fa, iden, len) {
### set environment in case
if(!is.null(blastbin)){
Sys.setenv(PATH = paste(Sys.getenv("PATH"), blastbin, sep=":"))
}
#setMethod("nchar", "ANY", base::nchar)
## load some test data
seq <- readBStringSet(fa)
p <- vmatchPattern(pattern = "[", subject = seq)
snp <- unlist(start(p))
df <- as.data.frame(snp)
## load a BLAST database (replace db with the location + name of the BLAST DB)
bl <- blast(db)
print(bl, info=TRUE)
## query a sequence using BLAST
cl <- getblast(bl, seq)
cl <- subset(cl, Perc.Ident >= iden & Alignment.Length >= len)
cl2 <- merge(cl, df, by.x="QueryID", by.y="row.names")
cl2$pos <- cl2$S.start + cl2$snp + 1
return(cl2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.