#' @rdname taxa_matrix
#' @export
taxa_table <- function (
biom, rank = -1, taxa = 6, lineage = FALSE,
md = ".all", unc = "singly", other = FALSE, trans = "none" ) {
biom <- as_rbiom(biom)
#________________________________________________________
# See if this result is already in the cache.
#________________________________________________________
params <- slurp_env()
cache_file <- get_cache_file('taxa_table', params)
if (isTRUE(attr(cache_file, 'exists', exact = TRUE)))
return (readRDS(cache_file))
params <- list2env(params)
tbl <- with(params, {
#________________________________________________________
# Validate user's arguments.
#________________________________________________________
validate_rank(max = Inf)
validate_biom_field('md', max = Inf, null_ok = TRUE)
#________________________________________________________
# Return multiple ranks in a single table.
#________________________________________________________
tbl <- NULL
for (r in rank)
tbl %<>% dplyr::bind_rows(local({
mtx <- taxa_matrix(
biom = biom,
rank = r,
taxa = taxa,
lineage = lineage,
sparse = FALSE,
unc = unc,
other = other,
trans = trans )
#________________________________________________________
# Pivot Longer
#________________________________________________________
tibble(
'.rank' = r,
'.sample' = colnames(mtx)[col(mtx)],
'.taxa' = rownames(mtx)[row(mtx)],
'.abundance' = as.numeric(mtx) )
}))
tbl[['.rank']] %<>% factor(., levels = rank)
tbl[['.sample']] %<>% {factor(., levels = intersect(biom$samples, .))}
tbl[['.taxa']] %<>% {factor(., levels = unique(.))}
#________________________________________________________
# Add Metadata
#________________________________________________________
if (length(md) > 0)
tbl %<>% left_join(
by = '.sample',
y = biom$metadata[,unique(c('.sample', md))] )
tbl
})
#________________________________________________________
# Descriptive label for y-axis.
#________________________________________________________
resp_label <- with(params, {
if (eq(trans, 'percent')) { "Relative Abundance" }
else if (is.null(biom$depth)) { "Unrarefied Counts" }
else { "Rarefied Counts" }
})
tbl %<>% as_rbiom_tbl()
attr(tbl, 'response') <- ".abundance"
attr(tbl, 'resp_label') <- resp_label
attr(tbl, 'cmd') <- current_cmd('taxa_table')
set_cache_value(cache_file, tbl)
return (tbl)
}
#' Taxa abundances per sample.
#'
#' \itemize{
#' \item{`taxa_matrix()` - }{ Accepts a single `rank` and returns a matrix. }
#' \item{`taxa_table()` - }{ Can accept more than one `rank` and returns a tibble data.frame. }
#' }
#'
#'
#' @inherit documentation_default
#'
#' @family taxa_abundance
#'
#' @return
#' \itemize{
#' \item{`taxa_matrix()` - }{
#' A numeric matrix with taxa as rows, and samples as columns. }
#' \item{`taxa_table()` - }{
#' A tibble data frame with column names .sample, .taxa, .abundance, and any requested by `md`. }
#' }
#'
#' @export
#' @examples
#' library(rbiom)
#'
#' biom$ranks
#'
#' taxa_matrix(hmp50, 'Phylum')[1:4,1:6]
#'
#' taxa_table(hmp50, 'Phylum')
taxa_matrix <- function (
biom, rank = -1, taxa = NULL, lineage = FALSE,
sparse = FALSE, unc = "singly", other = FALSE, trans = "none" ) {
biom <- as_rbiom(biom)
params <- eval_envir(environment())
cmd <- sprintf("taxa_matrix(%s)", as.args(params, fun = taxa_matrix))
#________________________________________________________
# See if this result is already in the cache.
#________________________________________________________
cache_file <- get_cache_file('taxa_matrix', params)
if (isTRUE(attr(cache_file, 'exists', exact = TRUE)))
return (readRDS(cache_file))
#________________________________________________________
# Validate user's arguments.
#________________________________________________________
validate_taxa(null_ok = TRUE)
validate_rank()
validate_bool("sparse")
validate_var_choices('trans', c("none", "rank", "log", "log1p", "sqrt", "percent"))
#________________________________________________________
# Fetch taxonomy map with ambiguous names corrected.
#________________________________________________________
map <- taxa_map(
biom = biom,
rank = rank,
unc = unc,
lineage = lineage )
#________________________________________________________
# Compute abundance matrix for this rank.
#________________________________________________________
mtx <- tryCatch(
error = function (e) stop("Unable to group by taxonomic level: ", e),
expr = local({
counts <- biom$counts
if (eq(trans, 'percent')) counts <- rescale_cols(counts)
mtx <- slam::rollup(counts[names(map),], 1L, map, sum)
mtx <- mtx[order(tolower(rownames(mtx))), colnames(counts), drop=FALSE]
stopifnot(is(mtx, "simple_triplet_matrix"))
return (mtx)
}))
#________________________________________________________
# Only retain taxa of interest (by abundance or name).
#________________________________________________________
if (!is_null(taxa)) {
if (is.numeric(taxa) && length(taxa) == 1) {
rel <- sort(slam::row_means(t(t(mtx) / slam::col_sums(mtx))), decreasing = TRUE)
if (taxa >= 1) { taxa <- head(names(rel), taxa)
} else { taxa <- names(rel)[rel >= taxa] }
} else if (is.character(taxa)) {
taxa <- intersect(as.character(taxa), rownames(mtx))
} else {
stop ("Invalid argument for `taxa`: ", capture.output(str(taxa)))
}
if (length(taxa) == 0)
stop("No taxa match the criteria: ", capture.output(str(taxa)))
if (isFALSE(other) || is.null(other)) {
mtx <- mtx[taxa,,drop=FALSE]
} else {
if (!is_scalar_character(other) || is_na(other))
other <- "Other"
mtx <- mtx[setdiff(rownames(mtx), taxa),,drop=FALSE] %>%
slam::col_sums() %>%
matrix(., nrow = 1, dimnames = list(other, names(.))) %>%
slam::as.simple_triplet_matrix() %>%
rbind(mtx[taxa,,drop=FALSE], .)
taxa <- c(taxa, other)
}
}
#________________________________________________________
# Optionally transform the computed abundance values.
#________________________________________________________
if (trans %in% c("rank", "log", "log1p", "sqrt"))
mtx$v <- do.call(`::`, list('base', trans))(mtx$v)
#________________________________________________________
# Convert from slam to base matrix if sparse=FALSE
#________________________________________________________
if (isFALSE(sparse))
mtx <- as.matrix(mtx)
attr(mtx, 'cmd') <- cmd
set_cache_value(cache_file, mtx)
return (mtx)
}
#' Get summary taxa abundances.
#'
#' @inherit documentation_default
#'
#' @family taxa_abundance
#'
#' @param rank The taxonomic rank to return sums or means for. The default,
#' \code{0}, returns per-OTU summaries.
#'
#' @return A named, sorted numeric vector.
#'
#' @export
#' @examples
#' library(rbiom)
#'
#' taxa_sums(hmp50) %>% head(4)
#'
#' taxa_means(hmp50, 'Family') %>% head(5)
#'
taxa_sums <- function (biom, rank = 0) {
taxa_matrix(biom, rank, sparse = TRUE) %>%
slam::row_sums() %>%
sort(decreasing = TRUE)
}
#' @rdname taxa_sums
#' @export
taxa_means <- function (biom, rank = 0) {
taxa_matrix(biom, rank, sparse = TRUE) %>%
slam::row_means() %>%
sort(decreasing = TRUE)
}
#' Apply `p.top` constraint to data about to be plotted.
#'
#' Used by boxplot_stats() and corrplot_stats().
#'
#' @noRd
#' @keywords internal
#'
apply_p.top <- function (params) {
p.top <- params$p.top
ggdata <- params$.ggdata
stats <- params$.plot_attrs[['stats']]
vline <- attr(ggdata, 'vline', exact = TRUE)
stopifnot(all(c('.taxa', '.rank') %in% names(ggdata)))
stopifnot('.taxa' %in% names(stats))
if (!hasName(stats, '.adj.p'))
stop ("No p-values to apply `p.top` constraint on.")
#________________________________________________________
# Locate the top taxa for each rank.
#________________________________________________________
if (!hasName(stats, '.rank'))
stats[['.rank']] <- factor(as.character(ggdata[['.rank']][[1]]))
keep_taxa <- plyr::dlply(stats, ply_cols('.rank'), function (x) {
taxa_min_p <- split(x[['.adj.p']], x[['.taxa']]) %>%
sapply(base::min, 1, na.rm = TRUE) %>%
sort()
if (p.top >= 1) { return (head(names(taxa_min_p), p.top))
} else { return (names(which(taxa_min_p <= p.top))) }
})
#________________________________________________________
# Drop rows for taxa that didn't make the cut-off.
#________________________________________________________
for (obj_name in c('ggdata', 'stats', 'vline')) {
df <- get(obj_name, inherits = FALSE)
if (!is.data.frame(df)) next
attrs <- attributes(df)
if (!hasName(df, '.rank'))
df[['.rank']] <- factor(names(keep_taxa)[[1]])
df %<>% plyr::ddply(ply_cols('.rank'), function (x) {
rank <- as.character(x[['.rank']][[1]])
x[x[['.taxa']] %in% keep_taxa[[rank]],,drop=FALSE]
}) %>% as_rbiom_tbl()
df[['.taxa']] %<>% {factor(., unique(unname(unlist(keep_taxa))))}
for (i in names(attrs))
if (is.null(attr(df, i, exact = TRUE)))
attr(df, i) <- attrs[[i]]
assign(obj_name, df)
}
params$.plot_attrs[['stats']] <- stats
attr(ggdata, 'vline') <- vline
params$.ggdata <- ggdata
return (invisible(params))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.