#' get.sraSTAT
#'
#' Retrieve the STAT-taxonomy (at the rank of 'order') and the grouping labels
#' for a set of SRA run_ids
#'
#' Data from NCBI-STAT (Katz et al., 2021)
#'
#' @param run_ids character, SRA 'run_id'
#' @param con pq-connection, use SerratusConnect()
#' @param prc_threshold numeric, threshold percent of total reads for tax to retain [5]
#' @return data.frame, with 'run_id', 'order_name', 'label' columns
#' @keywords palmid Serratus STAT taxonomy
#' @examples
#' \donttest{
#' # Retrieve STAT taxonomic orders for a SRA run
#' # and show all results with prc_threshold of 0%
#' con <- SerratusConnect()
#' sra.stat <- get.sraSTAT('ERR2756788', con, prc_threshold = 0)
#'
#' #NOTE: 'pident' column will be NA, match values from palm.sra object
#'
#' }
#' @import RPostgreSQL
#' @import dplyr ggplot2
#' @export
# Retrieve date from input of sra run_ids
get.sraSTAT <- function(run_ids, con = SerratusConnect(), prc_threshold = 5) {
# Bind Local Variables
run <- name <- tax_label <- kmer <- kmer_perc <- NULL
#get STAT kmer taxonomy classifications from SRA
sra.stat <- tbl(con, "sra_stat") %>%
dplyr::filter(run %in% run_ids) %>%
select(run, name, tax_label, kmer, kmer_perc) %>%
as.data.frame()
colnames(sra.stat) <- c("run_id", "order_name", "tax_label", "kmer", "kmer_perc")
# Filter
sra.stat <- sra.stat[(sra.stat$kmer_perc >= prc_threshold), ]
# Sort output
sra.stat <- sra.stat[order(sra.stat$kmer_perc, decreasing = T), ]
sra.stat <- sra.stat[order(sra.stat$run_id), ]
# Initiate empty pident vector
sra.stat$pident <- NA
return(sra.stat)
}
# # Test cases
# con <- SerratusConnect()
# frank <- get.sraSTAT('ERR2756788', con = con)
# fng <- get.sraSTAT(c('ERR2756788', 'SRR7287110'), con = con)
# lice <- get.sraSTAT('SRR1013695', con = con)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.