Nothing
#PrimerTree
#Copyright (C) 2013 Jim Hester
#' Query a pair of primers using ncbi's Primer-BLAST, if primers contain iupac
#'
#' ambiguity codes, enumerate all possible combinations and combine the
#' results.
#' @param forward forward primer to search by 5'-3' on plus strand
#' @param reverse reverse primer to search by 5'-3' on minus strand
#' @param num_aligns number of alignment results to keep
#' @param num_permutations the number of primer permutations to search, if the
#' degenerate bases cause more than this number of permutations to exist,
#' this number will be sampled from all possible permutations.
#' @param ... additional arguments passed to Primer-Blast
#' @param .parallel if 'TRUE', perform in parallel, using parallel backend
#' provided by foreach
#' @param .progress name of the progress bar to use, see 'create_progress_bar'
#' @return httr response object of the query, pass to
#' \code{\link{parse_primer_hits}} to parse the results.
#' @export
primer_search <- function(
forward,
reverse,
num_aligns = 500,
num_permutations = 25,
...,
.parallel = FALSE,
.progress = "none"
) {
if (missing(forward) || missing(reverse)) {
BLAST_primer()
}
primers <- enumerate_primers(forward, reverse)
num_primers <- nrow(primers)
if (num_primers > num_permutations) {
warning(
immediate. = TRUE,
"primers have ",
num_primers,
" possible combinations due to degenerate bases, sampling ",
num_permutations,
" primers, use 'num_permutations' to change"
)
primers <- primers[
sample.int(num_primers, num_permutations, replace = FALSE),
]
}
message("BLASTing ", nrow(primers), " primer combinations")
#enumerate all combinations to handle ambiguity codes
alply(
primers,
.margins = 1,
.expand = FALSE,
.parallel = .parallel,
.progress = .progress,
function(row) {
BLAST_primer(
row$forward,
row$reverse,
num_targets_with_primers = max(num_aligns %/% nrow(primers), 1),
...
)
}
)
}
iupac <- list(
"M" = list("A", "C"),
"R" = list("A", "G"),
"W" = list("A", "T"),
"S" = list("C", "G"),
"Y" = list("C", "T"),
"K" = list("G", "T"),
"V" = list("A", "C", "G"),
"H" = list("A", "C", "T"),
"D" = list("A", "G", "T"),
"B" = list("C", "G", "T"),
"N" = list("A", "C", "G", "T"),
"I" = list("A", "T", "C")
)
enumerate_primers <- function(forward, reverse) {
forward_primers <- enumerate_ambiguity(forward)
data.frame(
forward = forward_primers,
reverse = rep(
enumerate_ambiguity(reverse),
each = length(forward_primers)
),
stringsAsFactors = FALSE
)
}
enumerate_ambiguity <- function(sequence) {
search_regex <- paste(names(iupac), collapse = "|")
locs <- str_locate_all(sequence, search_regex)
sequences <- list()
count <- 1
for (i in seq_len(nrow(locs[[1]]))) {
loc <- locs[[1]][i, ]
ambiguity <- str_sub(sequence, loc[1], loc[2])
for (type in iupac[[ambiguity]]) {
new_seq <- sequence
str_sub(new_seq, loc[1], loc[2]) <- type
sequences[[count]] <- enumerate_ambiguity(new_seq)
count <- count + 1
}
return(unlist(sequences))
}
return(sequence)
}
print_options <- function(options) {
output <- capture.output(print(
options[
is.na(options$type) | options$type != "hidden",
c("name", "type", "defval")
]
))
message(paste(output, "\n", sep = ""))
}
BLAST_primer <- function(
forward,
reverse,
...,
organism = "",
primer_specificity_database = "nt",
exclude_env = "on"
) {
url <- "https://www.ncbi.nlm.nih.gov/tools/primer-blast/"
form <- GET_retry(url)
content <- parsable_html(form)
all_options <- get_options(content)
if (missing(forward) || missing(reverse)) {
print_options(all_options)
stop("No primers specified")
}
options <- list(
...,
primer_left_input = forward,
primer_right_input = reverse,
organism = organism,
primer_specificity_database = primer_specificity_database,
exclude_env = exclude_env,
search_specific_primer = "on"
)
names(options) <- toupper(names(options))
match_args <- pmatch(names(options), all_options$name)
bad_args <- is.na(match_args)
if (any(bad_args)) {
print_options(all_options)
stop(
paste(names(options)[bad_args], collapse = ","),
" not valid option\n"
)
}
options <- get_defaults(options, all_options)
start_time <- now()
message("Submitting Primer-BLAST query")
response <- POST_retry(
paste(url, "primertool.cgi", sep = ""),
body = options
)
values <- get_refresh_from_meta(response)
while (length(values) > 0) {
message(
"BLAST alignment processing, refreshing in ",
values[1],
" seconds..."
)
Sys.sleep(values[1])
response <- GET_retry(values[2])
values <- get_refresh_from_meta(response)
}
total_time <- start_time %--% now()
message(
"BLAST alignment completed in ",
total_time %/% seconds(1),
" seconds"
)
response
}
#modify a function to check the status and retry until success
retry <- function(fun, num_retry = 5, ...) {
function(...) {
res <- fun(...)
itr <- 0
status <- http_status(res)
while (
itr < num_retry &&
inherits(res, "response") &&
tolower(status$category) != "success"
) {
warning("request failed, retry attempt ", itr + 1)
res <- fun(...)
status <- http_status(res)
itr <- itr + 1
#sleep to avoid hitting NCBI query rate limit :'-(
Sys.sleep(0.4)
}
res
}
}
GET_retry <- retry(httr::GET)
POST_retry <- retry(httr::POST)
#' Parse the primer hits
#'
#' @param response a httr response object obtained from
#' \code{\link{primer_search}}
#' @export
parse_primer_hits <- function(response) {
content <- parsable_html(response)
rbind.fill(xpathApply(content, "//pre", parse_pre))
}
parse_a <- function(a) {
#links like entrez/viewer.fcgi?db=nucleotide&id=452085006
m <- regexpr("id=\\d+", xmlAttrs(a)["href"])
gi <- gsub("id=", "", unlist(regmatches(xmlAttrs(a)["href"], m)))
if (length(gi) == 0L) {
#links like
# nucleotide/449036831?from=1107741&to=1107929&report=gbwithparts
m <- regexpr("nucleotide/\\d+", xmlAttrs(a)["href"])
gi <- gsub(
"nucleotide/",
"",
unlist(regmatches(xmlAttrs(a)["href"], m))
)
}
if (length(gi) == 0L) {
gi <- NA
}
data.frame(gi = as.character(gi), accession = as.character(xmlValue(a)))
}
parse_pre <- function(pre) {
pre_text <- xmlValue(pre)
a <- getNodeSet(pre, "./preceding-sibling::a[1]")
if (length(a) <= 0) {
stop("Parsing failed for ", pre_text)
}
ids <- parse_a(a[[1]])
product_length_regex <- "product length = (\\d+)"
template_regex <- "Template[^\\d]+(\\d+)[^.ACGT]+([.ACGT]+)[^\\d]+(\\d+)"
full_regex <- paste(
"[\\S\\W]*",
product_length_regex,
"[\\S\\W]*?",
template_regex,
"[\\S\\W]*",
template_regex,
"[\\S\\W]*",
sep = ""
)
values <- str_split(
gsub(
full_regex,
paste("\\", 1:8, sep = "", collapse = "|"),
pre_text,
perl = TRUE
),
"[|]"
)[[1]]
data.frame(
ids,
product_length = as.numeric(values[1]),
mismatch_forward = str_count(values[3], "[ACGT]"),
mismatch_reverse = str_count(values[6], "[ACGT]"),
forward_start = as.numeric(values[2]),
forward_stop = as.numeric(values[4]),
reverse_start = as.numeric(values[5]),
reverse_stop = as.numeric(values[7]),
product_start = min(as.numeric(values[c(2, 4, 5, 7)])),
product_stop = max(as.numeric(values[c(2, 4, 5, 7)]))
)
}
get_refresh_from_meta <- function(response) {
content <- parsable_html(response)
meta <- content['//meta[@http-equiv="Refresh"]']
if (length(meta) > 0) {
values <- str_split(xmlAttrs(meta[[1]])["content"], "; URL=")[[1]]
return(values)
}
return()
}
get_defaults <- function(set_options, options) {
#only look at values with set defaults
options <- options[!is.na(options$defval), ]
unchanged_options <- setdiff(options$name, names(set_options))
default_values <- as.character(options[
match(unchanged_options, options$name),
"defval"
])
names(default_values) <- unchanged_options
c(set_options, default_values)
}
get_options <- function(content) {
options <- plyr::rbind.fill(xpathApply(
content,
"//form//input | //form//select",
parse_attributes
))
options$type <- as.character(options$type)
#add dropdown type if they are NA
options$type[is.na(options$type)] <- "dropdown"
#make default values for checkboxes on or off rather than checked/unchecked
options$defval <- as.character(options$defval)
check_map <- c("checked" = "on", "unchecked" = "")
checkboxes <- which(options$type == "checkbox")
options$defval[checkboxes] <- check_map[options$defval[checkboxes]]
options[options$type != "hidden", c("name", "type", "defval")]
}
parse_attributes <- function(x) {
as.data.frame(t(xmlAttrs(x)))
}
parsable_html <- function(response) {
txt <- content(response, as = "text", encoding = "UTF-8")
# remove any unicode characters
Encoding(txt) <- "UTF-8"
txt <- iconv(txt, "UTF-8", "ASCII", sub = "")
#this gsub regex is to remove the definition lines, some of which have
# bracketed <junk> in them, which messes up the parsing
txt <- gsub('("new_entrez".*?</a>).*?<pre>\n\n', "\\1\n<pre>", txt)
htmlParse(txt)
}
filter_duplicates <- function(hits) {
location_columns <- c(
"accession",
"forward_start",
"forward_stop",
"reverse_start",
"reverse_stop"
)
hits[!duplicated(t(apply(hits[location_columns], 1, range))), ]
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.