Nothing
#' @inherit getBio
#' @title Retrieve a Collection: Genome, Proteome, CDS, RNA, GFF, Repeat Masker, AssemblyStats
#' @description Main collection retrieval function for an organism of interest.
#' By specifying the scientific name of an organism of interest a collection consisting of
#' the genome file, proteome file, CDS file, RNA file, GFF file, Repeat Masker file, AssemblyStats
#' file of the organism of interest
#' can be downloaded and stored locally. Collections can be retrieved from
#' several databases.
#' @param path a character string specifying the location (a folder) in which
#' the corresponding collection shall be stored. Default is
#' \code{path} = \code{file.path("_db_downloads","collections")}.
#' @examples \dontrun{
#' # download the collection of Homo sapiens from refseq
#' # and store the corresponding genome file in '_ncbi_downloads/collection'
#' Hsap_collection <- getCollection( db = "refseq",
#' organism = "Homo sapiens",
#' path = file.path("_db_downloads","collections"))
#' # download the collection of Homo sapiens from genbank
#' # and store the corresponding genome file in '_ncbi_downloads/collection'
#' Hsap_collection <- getCollection( db = "genbank",
#' organism = "Homo sapiens",
#' path = file.path("_db_downloads","collections"))
#' # download the collection of Homo sapiens from ensembl
#' # and store the corresponding genome file in '_ncbi_downloads/collection'
#' Hsap_collection <- getCollection( db = "ensembl",
#' organism = "Homo sapiens",
#' path = file.path("_db_downloads","collections"))
#' }
#'
#' @family getBio
#' @family collection
#' @export
getCollection <-
function(db = "refseq",
organism,
reference = TRUE,
skip_bacteria = TRUE,
release = NULL,
assembly_type = "toplevel",
analyse_genome = FALSE,
remove_annotation_outliers = FALSE,
gunzip = FALSE,
path = file.path("_db_downloads","collections"),
mute_citation = FALSE
) {
new_name <- stringr::str_replace_all(organism," ","_")
all_biotypes <- supported_biotypes(db)
message("-> Starting collection retrieval (", paste(all_biotypes, collapse = ", "),") for ", new_name, " ...")
org_exists <- is.genome.available(db = db, organism, details = TRUE)
if (isFALSE(org_exists) || length(org_exists) == 0)
stop("-> No entry was found for organism ",organism,". Could the name be misspelled?", call. = FALSE)
path <- file.path(path, db, new_name)
if (!dir.exists(path)) dir.create(path, recursive = TRUE)
for (type in all_biotypes) {
getBio(db, organism, type,
reference = reference, release = release, gunzip = gunzip,
update = FALSE, skip_bacteria = skip_bacteria,
path = path,
remove_annotation_outliers = remove_annotation_outliers,
analyse_genome = analyse_genome, assembly_type = assembly_type,
format = names(all_biotypes[all_biotypes == type]), mute_citation = TRUE)
message("\n")
}
browser()
output_files <- list.files(path)
# Remove md5 files
md5_files <- output_files[stringr::str_detect(output_files, "md5checksums.txt")]
file.remove(file.path(path, md5_files))
# Move doc files
doc_folder <- file.path(path, "doc")
if (!file.exists(doc_folder)) {
dir.create(doc_folder)
}
doc_files <- output_files[stringr::str_detect(output_files, "doc_")]
file.rename(file.path(path, doc_files),
file.path(doc_folder, doc_files))
# Verify assembly doc is now valid
readAssemblyDoc(doc_folder, db)
message("-> Collection retrieval finished successfully!")
message("\n")
please_cite_biomartr(mute_citation = mute_citation)
return(path)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.