#' @include utils.r
#' @include eutil.r
NULL
# efetch-class -----------------------------------------------------------
#' efetch
#'
#' \dQuote{efetch} is an S4 class that provides a container for data retrived
#' by calls to the NCBI EFetch utility.
#'
#' @slot url A character vector containing the query URL.
#' @slot error Any error or warning messages parsed from
#' the output of the call submitted to Entrez.
#' @slot content A character vector holding the unparsed
#' contents of a request to Entrez.
#' @slot database A character vector giving the name of the queried database.
#' @slot rettype Retrieval Mode. A character vector specifying the record
#' view returned, such as \sQuote{Abstract} or \sQuote{MEDLINE} from
#' \emph{pubmed}, or \sQuote{GenPept} or \sQuote{FASTA} from \emph{protein}.
#' @slot retmode Retrieval Mode. A character vector specifying the data format
#' of the records returned, such as plain \sQuote{text}, \sQuote{HMTL} or
#' \sQuote{XML}.
#'
#' @rdname efetch
#' @export
#' @classHierarchy
#' @classMethods
setClass("efetch",
representation(database = "character",
rettype = "character",
retmode = "character"),
prototype(database = NA_character_,
rettype = NA_character_,
retmode = NA_character_),
contains = "eutil")
# accessor-methods -------------------------------------------------------
setMethod("database", "efetch", function(x) x@database)
setMethod("retmode", "efetch", function(x) x@retmode)
setMethod("rettype", "efetch", function(x) x@rettype)
#' @autoImports
setMethod("content", "efetch",
function (x, as = NULL) {
as <- as %|null|% retmode(x)
if (as == "asn.1")
as <- "text"
if (as == "xml" && retmode(x) != "xml")
stop("Document does not contain XML", call.=FALSE)
as <- match.arg(as, c("text", "xml"))
callNextMethod(x = x, as = as)
})
# show-method ------------------------------------------------------------
setMethod("show", "efetch",
function (object) {
if (retmode(object) == "xml")
print(content(object))
else
cat(content(object))
cat(sprintf("EFetch query using the %s database.\nQuery url: %s\n",
sQuote(database(object)), sQuote(queryUrl(object))))
cat(sprintf("Retrieval type: %s, retrieval mode: %s\n",
sQuote(rettype(object)), sQuote(retmode(object))))
invisible(NULL)
})
# write-method -----------------------------------------------------------
#' Write efetch data to file
#'
#' @param x An \code{\linkS4class{efetch}} instance.
#' @param file A connection, or a character string naming the file to write to.
#' @param append Append the data \code{x} to the connection.
#' @export
setMethod("write", "efetch",
function (x, file = "data", append = FALSE) {
write(x = content(x, "text"), file = file, append = append)
})
# c-method ---------------------------------------------------------------
#' Combining efetch objects
#'
#' Only data retrieved from the same datebase in the same \code{retmode} and
#' \code{rettype} can be combined.
#'
#' @usage c(...)
#' @param ... objects to be combined.
#' @export
#' @autoImports
setMethod("c", "efetch",
function (x, ..., recursive = FALSE) {
database <- compactNA(unique(c(database(x), vapply(list(...), database, character(1)))))
assert_that(length(database) == 1L)
rettype <- compactNA(unique(c(rettype(x), vapply(list(...), rettype, character(1)))))
assert_that(length(rettype) == 1L)
retmode <- compactNA(unique(c(retmode(x), vapply(list(...), retmode, character(1)))))
assert_that(length(retmode) == 1L)
url <- c(queryUrl(x), vapply(list(...), queryUrl, character(1)))
content <- c(content(x, "text"),
unlist(lapply(list(...), content, as="text")))
new("efetch", url = url, content = content, error = list(),
database = db, retmode = rm, rettype = rt)
})
#' \code{efetch} retrieves data records in the requested format from a
#' character vector of one or more primary UIDs or from a set of UIDs stored
#' in the user's web environment.
#'
#' @details
#' See the official online documentation for NCBI's
#' \href{http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch}{EUtilities}
#' for additional information.
#'
#' The default retrieval mode (\code{retmode}) for the \code{pubmed},
#' \code{nuccore}, \code{protein}, and \code{gene} databases is 'text'. Default
#' \code{rettype}s are 'medline', 'gb', 'gp', and 'gene_table', respectively.
#'
#' @param id (Required)
#' List of UIDs provided either as a character vector, as an
#' \code{\linkS4class{esearch}} instance, or by reference to a web environment
#' and a query key obtained directly from previous calls to
#' \code{\linkS4class{esearch}} (if \code{usehistory = TRUE}),
#' \code{\linkS4class{epost}} or \code{\linkS4class{elink}}.
#' If UIDs are provided as a plain character vector, \code{db} must be
#' specified explicitly, and all of the UIDs must be from the database
#' specified by \code{db}.
#' @param db (Required only when \code{id} is a vector of UIDs)
#' Database from which to retrieve records. See
#' \href{http://www.ncbi.nlm.nih.gov/books/NBK25497/table/chapter2.chapter2_table1/?report=objectonly}{here}
#' for the supported databases.
#' @param rettype A character string specifying the retrieval type,
#' such as 'abstract' or 'medline' from PubMed, 'gp' or 'fasta' from
#' protein, or 'gb', 'gbwithparts, or 'fasta_cds_na' from nuccore.
#' See
#' \href{http://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.chapter4_table1/?report=objectonly}{here}
#' for allowed values for each database.
#' @param retmode A character string specifying the data mode of the
#' records returned, such as plain text, XML, or asn.1. See
#' \href{http://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.chapter4_table1/?report=objectonly}{here}
#' for allowed values for each database.
#' @param retstart Numeric index of the first record to be retrieved.
#' @param retmax Total number of records from the input set to be retrieved.
#' @param query_key An integer specifying which of the UID lists attached
#' to a user's Web Environment will be used as input to \code{efetch}.
#' (Usually obtained drectely from objects returned by previous
#' \code{\linkS4class{esearch}}, \code{\linkS4class{epost}} or
#' \code{\linkS4class{elink}} calls.)
#' @param WebEnv A character string specifying the Web Environment that
#' contains the UID list. (Usually obtained directely from objects returned
#' by previous \code{\linkS4class{esearch}}, \code{\linkS4class{epost}} or
#' \code{\linkS4class{elink}} calls.)
#' @param strand Strand of DNA to retrieve. (1: plus strand, 2: minus strand)
#' @param seq_start First sequence base to retrieve.
#' @param seq_stop Last sequence base to retrieve.
#' @param complexity Data content to return. (0: entire data structure,
#' 1: bioseq, 2: minimal bioseq-set, 3: minimal nuc-prot, 4: minimal pub-set)
#' @return An \code{efetch} instance.
#' @seealso \code{\link{efetch.batch}} for downloading more than about 500
#' data records.
#' \code{\link{content}} to retrieve data from \code{\linkS4class{efetch}}
#' objects.
#' @example inst/examples/efetch.r
#' @export
#' @autoImports
efetch <- function (id, db = NULL, rettype = NULL, retmode = NULL,
retstart = NULL, retmax = NULL, query_key = NULL,
WebEnv = NULL, strand = NULL, seq_start = NULL,
seq_stop = NULL, complexity = NULL) {
## extract query parameters
params <- get_params(id, db, WebEnv, query_key)
# set default rettype and retmode for a given db
r <- set_record_type(params$db, rettype, retmode)
if (is.null(retmax))
retmax <- Inf
if (retmax > 500 && (is.finite(params$count) && (params$count > 500))) {
# if record_count exceeds 500 issue a warning and recommend
# efetch.batch()
message(gettextf("You are attempting to download %s records.\nOnly the first 500 are downloaded. Use efetch.batch() instead.",
min(c(params$count, retmax))))
retmax <- 500
params$uid <- params$uid[seq_len(500)]
} else if (is.na(params$count)) {
# this takes care of the cases where we don't actually know how many UIDs
# are stored on the history server
# message("A single download request is restricted to 500 records.\nUse efetch.batch() to download more records.")
retmax <- 500
}
method <- if (length(params$uid) < 100) "GET" else "POST"
o <- .equery('efetch', method, db = params$db, id = .collapse(params$uid),
query_key = params$query_key, WebEnv = params$WebEnv,
retmode = r$retmode, rettype = r$rettype, retstart = retstart,
retmax = retmax, strand = strand, seq_start = seq_start,
seq_stop = seq_stop, complexity = complexity)
error <- error(o)
error <- if (r$retmode == "xml" && all_empty(error)) checkErrors(o, FALSE) else error
new("efetch", url = queryUrl(o), content = content(o, "text"),
error = error, database = params$db,
retmode = r$retmode %||% NA_character_,
rettype = r$rettype %||% NA_character_)
}
#' Retrieve batches of data records in the requested format from NCBI
#'
#' \code{efetch.batch} retrieves large data sets from NCBI in batches.
#'
#' @details
#' See the official online documentation for NCBI's
#' \href{http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch}{EUtilities}
#' for additional information.
#'
#' @param id (Required)
#' List of UIDs provided (via the Entrez History server) by an
#' \code{\linkS4class{esearch}}, \code{\linkS4class{epost}} or
#' \code{\linkS4class{elink}} object.
#' @param chunk_size Number of records downloaded as a batch (default: 200;
#' maximum: 500).
#' @param rettype A character string specifying the record view returned,
#' such as 'abstract' or 'medline' from PubMed, or 'gp' or 'fasta' from
#' protein.
#' See
#' \href{http://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.chapter4_table1/?report=objectonly}{here}
#' for allowed values for each database.
#' @param retmode A character string specifying the data format of the
#' records returned, such as plain text, XML, or asn.1.
#' See
#' \href{http://www.ncbi.nlm.nih.gov/books/NBK25499/table/chapter4.chapter4_table1/?report=objectonly}{here}
#' for allowed values for each database.
#' @param retmax Total number of records from the input set to be retrieved.
#' @param strand Strand of DNA to retrieve. (1: plus strand, 2: minus strand)
#' @param seq_start First sequence base to retrieve.
#' @param seq_stop Last sequence base to retrieve.
#' @param complexity Data content to return. (0: entire data structure,
#' 1: bioseq, 2: minimal bioseq-set, 3: minimal nuc-prot, 4: minimal pub-set)
#' @return An \code{\linkS4class{efetch}} object.
#' @export
#' @example inst/examples/efetch.batch.r
#' @autoImports
efetch.batch <- function (id, chunk_size=200, rettype=NULL, retmode=NULL,
retmax=NULL, strand=NULL, seq_start=NULL,
seq_stop=NULL, complexity=NULL) {
if (class(id) %ni% c("esearch", "epost", "elink"))
stop("efetch.batch() expects an 'esearch', 'epost', or 'elink' object")
max_chunk <- 500
if (chunk_size > max_chunk) {
warning(sprintf("The maximum downloadable chunk size is %s."),
max_chunk, call.=FALSE)
chunk_size <- max_chunk
}
count <- count(id)
if (!is.null(retmax) && retmax < count) {
count <- retmax
}
if (count <= max_chunk) {
res <- efetch(id=id, rettype=rettype, retmode=retmode, retstart=NULL,
retmax=NULL, strand=strand, seq_start=seq_start,
seq_stop=seq_stop, complexity=complexity)
} else {
n_chunks <- count%/%chunk_size
retstart <- seq(from=1, to=n_chunks*chunk_size, by=chunk_size)
res <- new("efetch")
for (start in retstart) {
res <- c(res, efetch(id=id, rettype=rettype, retmode=retmode,
retstart=start, retmax=chunk_size, strand=strand,
seq_start=seq_start, seq_stop=seq_stop,
complexity=complexity))
Sys.sleep(time=0.33)
}
}
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.