#' Clean Sequences with PHMM (DEPRECATED)
#'
#' @param x A DNAbin or DNAStringset object
#' @param model A profile hidden Markov model (a "PHMM" object) generated by the aphid R package to align the sequences to.
#' @param min_score The minimum specificity (log-odds score for the optimal alignment) between the query sequence and the PHMM model for the sequence to be retained in the output object.
#' @param shave Whether bases that are outside (to the left or right) of the PHMM object should be shaved from the outputs.
#' @param maxNs The max number of ambiguous N bases to retain
#' @param cores The number of CPUs to parallelize the operation over
#' @param quiet Whether progress should be printed to the console.
#' @param progress Whether a progress bar should be printed.
#'
#' @return
#' @export
#'
clean_seqs <- function(x, model, min_score = 100, shave = TRUE, maxNs = 0, cores = 1,
quiet = FALSE, progress = FALSE) {
.Defunct("taxreturn::map_to_model", package="taxreturn")
}
# Propagate taxonomic assignments to species level ------------------------
#' Propagate taxonomy (DEPRECATED)
#'
#' @param tax A taxonomy table
#' @param from The taxonomic rank to propagate below
#'
#' @return
#' @export
#'
#'
propagate_tax <- function(tax, from = "Family") {
.Deprecated(new="seqateurs::na_to_unclassified", package="seqateurs", old="taxreturn::propagate_tax")
col.prefix <- substr(colnames(tax), 1, 1) # Assumes named Kingdom, ...
# Highest level to propagate from
if (from == "Phylum") (start <- 2)
if (from == "Class") (start <- 3)
if (from == "Order") (start <- 4)
if (from == "Family") (start <- 5)
if (from == "Genus") (start <- 6)
if (from == "Species") (start <- 7)
# Propagate
for (col in seq(start, ncol(tax))) {
prop <- is.na(tax[, col]) & !is.na(tax[, col - 1])
newtax <- tax[prop, col - 1]
needs.prefix <- !grepl("^[A-z]__", newtax)
newtax[needs.prefix] <- paste(col.prefix[col - 1], newtax[needs.prefix], sep = "__")
tax[prop, col] <- newtax
}
tax
}
# Fetchseqs function ----------------------------------------------
#' Fetchseqs function (DEPRECATED)
#'
#' @param x A taxon name or vector of taxon names to download sequences for.
#' @param database The database to download from. For NCBI GenBank this currently onlt accepts the arguments 'nuccore' or 'genbank' which is an alias for nuccore.
#' Alternatively sequences can be downloaded from the Barcode of Life Data System (BOLD) using 'bold'
#' @param marker The barcode marker used as a search term for the database. If you are targetting a gene, adding a suffix \[GENE\] will increase the search selectivity.
#' The default for Genbank is 'COI\[GENE\] OR COX1\[GENE\] OR COXI\[GENE\]', while the default for BOLD is 'COI-5P'.
#' If this is set to "mitochondria" and database is 'nuccore', or 'genbank'it will download mitochondrial genomes only.
#' If this is set to "genome" and database is 'nuccore', or 'genbank'it will download complete genome sequences only.
#' @param downstream Instead of search for the query sequence, this provides the option of instead searching for a downstream taxonomic rank.
#' This is useful for big queries where >100k sequences will be downloaded. For example, when x is 'Insecta', and downsteam is Order, this will download all Orders within insecta and thus not overload the query. Default is FALSE.
#' @param output The output format for the taxonomy in fasta headers.
#' Options include "h" for full heirarchial taxonomy (SeqID;Domain;Phylum;Class;Order;Family;Genus;Species),
#' "binom" for just genus species binomials (SeqID;Genus Species),
#' "bold" for BOLD taxonomic ID only (SeqID;BoldTaxID),
#' "gb" for genbank taxonomic ID (SeqID;GBTaxID),
#' "gb-binom" which outputs Genus species binomials, as well as genbank taxonomic ID's, and translates all BOLD taxonomic ID's to genbank taxonomic ID's in the process,
#' or "standard" which outputs the default format for each database. For bold this is `sampleid|species name|markercode|genbankid` while for genbank this is `Accession Sequence definition`
#' @param min_length The maximum length of the query sequence to return. Default 1.
#' @param max_length The maximum length of the query sequence to return.
#' This can be useful for ensuring no off-target sequences are returned. Default 2000.
#' @param subsample (Numeric) return a random subsample of sequences from the search.
#' @param out_dir Output directory to write fasta files to
#' @param compress Option to compress output fasta files using gzip
#' @param force Option to overwrite files if they already exist
#' @param chunk_size Split up the queries made (for genbank), or returned records(for BOLD) into chunks of this size to avoid overloading API servers.
#' if left NULL, the default for genbank searches will be 10,000 for regular queries, 1,000 if marker is "mitochondria", and 1 if marker is "genome"
#' For BOLD queries the default is 100,000 returned records
#' @param multithread Whether multithreading should be used, if TRUE the number of cores will be automatically detected, or provided a numeric vector to manually set the number of cores to use
#' Note, the way this is currently implemented, a seperate worker thread is assigned to each taxon, therefore multithreading will only work
#' if x is a vector, or of downstream is being used.
#' @param quiet Whether progress should be printed to the console.
#' @param progress A logical, for whether or not to print a progress bar when multithread is true. Note, this will slow down processing.
#'
#'
#' @return
#' @export
#'
fetchSeqs <- function(x, database, marker = NULL, downstream = FALSE,
output = "h", min_length = 1, max_length = 2000,
subsample=FALSE, chunk_size=NULL, out_dir = NULL, compress = TRUE,
force=FALSE, multithread = FALSE, quiet = TRUE, progress=FALSE) {
.Defunct("taxreturn::fetch_seqs", package="taxreturn")
}
# Make Blast DB -----------------------------------------------------------
#' Make blast Database (DEPRECATED)
#'
#' @param file (Required) A fasta file to create a database from.
#' @param dbtype (Optional) Molecule type of database, accepts "nucl" for nucleotide or "prot" for protein.
#' @param args (Optional) Extra arguments passed to BLAST
#' @param quiet (Optional) Whether progress should be printed to console, default is FALSE
#'
#' @return
#' @export
#' @import stringr
#' @importFrom R.utils gunzip
#'
makeblastdb <- function (file, dbtype = "nucl", args = NULL, quiet = FALSE) {
.Deprecated("taxreturn::make_blast_db", package="taxreturn", old = as.character(sys.call(sys.parent()))[1L])
time <- Sys.time() # get time
.findExecutable("makeblastdb") # Check blast is installed
if (is.null(args)){args <- ""}
if (stringr::str_detect(file, ".gz")) {
message("Unzipping file")
compressed <- TRUE
R.utils::gunzip(file, remove=FALSE)
file <- stringr::str_replace(file, ".gz", "")
}else (compressed <- FALSE)
results <- system2(command = .findExecutable("makeblastdb"),
args = c("-in", file, "-dbtype", dbtype, args),
wait = TRUE,
stdout = TRUE)
time <- Sys.time() - time
if (compressed) {file.remove(file)}
if (!quiet) (message(paste0("made BLAST DB in ", format(time, digits = 2))))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.