Nothing
#' @name get_ntaxa
#' @title Count number of unique taxa
#' @description Count the number of unique taxa represented by cluster(s) or
#' sequences in phylota table Use rnk to specify a taxonomic level to count. If
#' NULL counts will be made to the lowest level reported on NCBI.
#' @param phylota Phylota object
#' @param cid Cluster ID(s)
#' @param sid Sequence ID(s)
#' @param rnk Taxonomic rank
#' @param keep_higher Keep higher taxonomic ranks?
#' @return vector
#' @example examples/get_ntaxa.R
#' @export
#' @family tools-public
get_ntaxa <- function(phylota, cid = NULL, sid = NULL, rnk = NULL,
keep_higher = FALSE) {
count <- function(sids) {
txids <- get_txids(phylota = phylota, sid = sids, rnk = rnk,
keep_higher = keep_higher)
length(unique(txids))
}
get_sids_and_count <- function(cid) {
clstr <- phylota@clstrs[[cid]]
count(clstr@sids)
}
if (!is.null(sid)) {
return(count(sids = sid))
}
vapply(cid, get_sids_and_count, integer(1))
}
#' @name get_txids
#' @title Get taxonomic IDs by rank
#' @description Return taxonomic IDs for a vector of sequence IDs or all
#' sequences in a cluster. User can specify what rank the IDs should be
#' returned. If NULL, the lowest level is returned.
#' @param phylota Phylota object
#' @param cid Cluster ID
#' @param sid Sequence ID(s)
#' @param txids Vector of txids
#' @param rnk Taxonomic rank
#' @param keep_higher Keep higher taxonomic IDs?
#' @details txids can either be provided by user or they can be determined for
#' a vector of sids or for a cid. If keep_higher is TRUE, any sequence that has
#' a identity that is higher than the given rank will be returned. If FALSE,
#' these sequences will return ''.
#' @return vector
#' @export
#' @example examples/get_txids.R
#' @family tools-public
get_txids <- function(phylota, cid = NULL, sid = NULL, txids = NULL, rnk = NULL,
keep_higher = FALSE) {
get <- function(txid) {
tx <- phylota@txdct@recs[[txid]]
rnks <- tx@lng[['rnks']]
ids <- tx@lng[['ids']]
mtch <- which(rnk == rnks)
if (length(mtch) == 0) {
if (keep_higher) {
# if no match, use lowest available rank
mtch <- length(rnks)
} else {
return('')
}
}
ids[[mtch[[1]]]]
}
if (is.null(txids)) {
if (!is.null(cid)) {
clstr <- phylota@clstrs[[cid]]
sid <- clstr@sids
}
txids <- get_sq_slot(phylota = phylota, sid = sid,
slt_nm = 'txid')
}
if (is.null(rnk)) {
return(txids)
}
vapply(txids, get, '')
}
#' @name get_nsqs
#' @title Count number of sequences
#' @description Count the number of sequences in a cluster(s).
#' @param phylota Phylota object
#' @param cid Cluster ID(s)
#' @return vector
#' @export
#' @family tools-public
#' @example examples/get_nsqs.R
get_nsqs <- function(phylota, cid) {
count <- function(cid) {
clstr <- phylota@clstrs[[cid]]
length(clstr@sids)
}
vapply(cid, count, 1L)
}
#' @name get_sq_slot
#' @title Get slot data for each sequence
#' @description Get slot data for either or sequences in a cluster of
#' a vector of sequence IDs. Use list_seqrec_slots() for a list of
#' available slots.
#' @param phylota Phylota object
#' @param cid Cluster ID
#' @param sid Sequence ID(s)
#' @param slt_nm Slot name
#' @return vector
#' @export
#' @family tools-public
#' @example examples/get_sq_slot.R
get_sq_slot <- function(phylota, cid = NULL, sid = NULL,
slt_nm = list_seqrec_slots()) {
get <- function(sid) {
i <- which(sid == phylota@sqs@ids)
sq <- phylota@sqs@sqs[[i]]
slot(sq, slt_nm)
}
if (!is.null(cid)) {
clstr <- phylota@clstrs[[cid]]
sid <- clstr@sids
}
slt_nm <- match.arg(slt_nm)
expctd <- new(getSlots('SeqRec')[[slt_nm]], 1)
vapply(sid, get, expctd)
}
#' @name get_clstr_slot
#' @title Get slot data for each cluster record
#' @description Get slot data for cluster(s)
#' @param phylota Phylota object
#' @param cid Cluster ID
#' @param slt_nm Slot name
#' @return vector
#' @family tools-public
#' @export
#' @example examples/get_clstr_slot.R
get_clstr_slot <- function(phylota, cid,
slt_nm = list_clstrrec_slots()) {
get <- function(cid) {
i <- which(cid == phylota@clstrs@ids)
clstr <- phylota@clstrs@clstrs[[i]]
slot(clstr, slt_nm)
}
slt_nm <- match.arg(slt_nm)
expctd <- new(getSlots('ClstrRec')[[slt_nm]], 1)
vapply(cid, get, expctd)
}
#' @name get_tx_slot
#' @title Get slot data for each taxon record
#' @description Get slot data for taxa(s)
#' @param phylota Phylota object
#' @param txid Taxonomic ID
#' @param slt_nm Slot name
#' @return vector or list
#' @export
#' @family tools-public
#' @example examples/get_tx_slot.R
get_tx_slot <- function(phylota, txid, slt_nm = list_taxrec_slots()) {
get <- function(txid) {
tx <- phylota@txdct@recs[[txid]]
slot(tx, slt_nm)
}
slt_nm <- match.arg(slt_nm)
expctd <- new(getSlots('TaxRec')[[slt_nm]], 1)
vapply(txid, get, expctd)
}
#' @name get_stage_times
#' @title Get run times for different stages
#' @description Get slot data for taxa(s)
#' @param wd Working directory
#' @return list of runtimes in minutes
#' @export
#' @family tools-public
#' @example examples/get_stage_times.R
get_stage_times <- function(wd) {
lgfl <- file.path(wd, 'log.txt')
lines <- readLines(con = lgfl)
stage_tms <- stage_ends <- stage_starts <- c('taxise' = NA,
'download' = NA,
'cluster' = NA,
'cluster\\^2' = NA)
stage_nms <- names(stage_tms)
for (ln in lines) {
for (stgnm in stage_nms) {
pttrn <- paste0('Starting stage ', stgnm, ': ')
if (grepl(pattern = pttrn, x = ln, ignore.case = TRUE)) {
ln <- sub(pattern = pttrn,
replacement = '', x = ln, ignore.case = TRUE)
ln <- gsub(pattern = '(\\[|\\])', replacement = '', x = ln)
stage_starts[[stgnm]] <- ln
}
pttrn <- paste0('Completed stage ', stgnm, ': ')
if (grepl(pttrn, ln, ignore.case = TRUE)) {
ln <- sub(pattern = pttrn,
replacement = '', x = ln, ignore.case = TRUE)
ln <- gsub(pattern = '(\\[|\\])', replacement = '', x = ln)
stage_ends[[stgnm]] <- ln
}
}
}
for (stgnm in stage_nms) {
stage_tms[[stgnm]] <- difftime(as.POSIXct(stage_ends[[stgnm]]),
as.POSIXct(stage_starts[[stgnm]]),
units = 'mins')
}
stage_tms
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.