Nothing
#' @importFrom foreach foreach registerDoSEQ "%dopar%"
#' @importFrom iterators iter
NULL
## declare "rec" global so that "codetools" don't complain
## see "http://r.789695.n4.nabble.com/R-CMD-check-and-foreach-code-td4687660.html"
globalVariables("rec")
parse_gb_record <- function(rcd, progress = FALSE) {
## check that the gbk/embl record is not empty
if (length(rcd) == 0L) {
stop("This \"gbRecord\" is empty.")
}
## check if gbk/embl record contains multiple entries
end_of_record <- grep('^//$', rcd)
n_records <- length(end_of_record)
pb <- make_progress_bar(n_records)
if (n_records > 1L) {
start_of_record <- c(1, end_of_record[-n_records] + 1)
irec <- iter(ixsplit(rcd, start_of_record))
#rec <- nextElem(irec)
} else {
registerDoSEQ()
irec <- iter(list(rcd))
}
rcd_list <- if (is.gbk(rcd)) {
foreach(rec = irec, .inorder = FALSE) %dopar% {
if (progress) pb()
gbk_record(rec)
}
} else if (is.embl(rcd)) {
foreach(rec = irec, .inorder = FALSE) %dopar% {
if (progress) pb()
embl_record(rec)
}
} else {
stop("Unknown format <", rcd[1], ">")
}
if (length(rcd_list) == 1L) {
rcd_list[[1L]]
} else {
gbRecordList(rcd_list)
}
}
make_progress_bar <- function(n) {
now <- function() {
proc.time()[[3]]
}
tick <- function() {
if (i < n) {
i <<- i + 1L
}
}
repstr <- function(x, i) {
paste(rep.int(x, i), collapse = "")
}
width <- function() {
getOption("width") - nchar("|[100%] - 100h to go.") - 2
}
cat_line <- function(...) {
msg <- paste0(..., collapse = "")
gap <- max(c(0, getOption("width") - nchar(msg, "width")))
cat("\r", msg, rep.int(" ", gap), sep = "")
utils::flush.console()
}
show_time <- function(x) {
if (x < 60) {
paste0(round(x), "s")
} else if (x < 60*60) {
paste0(round(x/60), "m")
} else {
paste0(round(x/(60*60)), "h")
}
}
i <- 0L
init_time <- now()
function() {
tick()
avg <- (now() - init_time) / i
time_left <- (n - i)*avg
nbars <- trunc(i/n*width())
cat_line("|", repstr("=", nbars), repstr(" ", width() - nbars), "| [",
format(round(i/n*100), width = 3), "%] - ", show_time(time_left),
" to go.")
}
}
## characters permitted to occur in feature table component names:
## [A-Z], [a-z], [0-9], [_'*-] --> [[:alnum]_'*\\-]
##
## Qualifiers:
## A qualifier takes the form of a slash (/) followed by its name,
## if applicable, an equals sign ( = ) and its value.
##
## Qualifier value formats:
## Free text; Controlled vocabulary or enumerated values; Citation or
## reference numbers
##
## Free text: "text"
## enclosed in double quotation marks; composed of printable
## characters (ASCII values 32-126 decimal); internal quotation markes are
## ""escaped"" by placing a second quotation mark immediately before.
##
## Controlled vocabulary/Enumerators:
## /anticodon = (pos:<base_range>,aa:<amino_acid>)
## /codon_start = 1, 2, or 3
## /direction = left, right, or both
## /estimated_length = unknown or <integer>
## /calculated_mol_wt = <integer>
## /mod_base = name of modified base
## /number = unquoted text
## /rpt_type = <repeat_type>
## /rpt_unit_range = <base_range>
## /tag_peptide = <base_range>
## /transl_except = (pos:location,aa:<amino_acid>)
## /transl_table = <integer>
##
## Citations:
## /citation = [number] e.g. /citation = [3]
## /compare = [accession-number.sequence-version] e.g. /compare = AJ634337.1
#' @keywords internal
parse_features <- function(x, seqinfo) {
feature_start <- which(substr(x, 6, 6) != " ")
fl <- ixsplit(x, feature_start)
mc_cores <- ifelse(Sys.info()['sysname'] == "Windows",
1,
floor(parallel::detectCores()*0.75))
id <- seq_along(feature_start)
ftbl <- parallel::mcmapply(
gbFeature, feature = fl, id = id,
MoreArgs = list(accession = getAccession(seqinfo)[1]),
SIMPLIFY = FALSE, USE.NAMES = FALSE, mc.cores = mc_cores
)
S4Vectors::new2('gbFeatureTable', .Data = ftbl, .id = id, .seqinfo = seqinfo, check = FALSE)
}
#' @keywords internal
parse_sequence <- function(seq, acc, seqtype, src) {
# read.BStringSet() does not support connections and
# currently only accepts fasta format. So we write out gb_sequence as
# a temporary fasta file and read it back in as an AAStringSet or
# DNAStringSet (mRNA etc seems to be encoded with Ts rather then Us,
# so we use DNAStringSets for RNA)
if (is.null(seq)) {
return(Biostrings::BStringSet())
} else {
tmp <- tempfile()
on.exit(unlink(tmp))
writeLines(text = join_seq(seq, acc, src), tmp)
origin <- switch(seqtype,
AA = Biostrings::readAAStringSet(tmp, format = "fasta"),
Biostrings::readDNAStringSet(tmp, format = "fasta"))
origin
}
}
join_seq <- function(seq, acc, src = c("gbk", "embl")) {
src <- match.arg(src, c("gbk", "embl"))
.substr <- switch(src,
gbk = Partial('substr', start = 11, stop = 75),
embl = Partial('substr', start = 6, stop = 70)
)
mc_cores <- ifelse(Sys.info()['sysname'] == "Windows",
1,
floor(parallel::detectCores()*0.75))
s <- unlist(parallel::mclapply(seq, function(x) {
paste0(strsplit(.substr(x), ' ')[[1L]], collapse = '')
}, mc.cores = mc_cores))
s <- c(paste0(">", acc), s)
s
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.