Nothing
#' @rdname taxa_matrix
#' @export
taxa_table <- function (
biom, rank = -1, taxa = 6, lineage = FALSE,
md = ".all", unc = "singly", other = FALSE,
transform = "none", ties = "random", seed = 0 ) {
biom <- as_rbiom(biom)
#________________________________________________________
# See if this result is already in the cache.
#________________________________________________________
params <- slurp_env()
cache_file <- get_cache_file('taxa_table', params)
if (isTRUE(attr(cache_file, 'exists', exact = TRUE)))
return (readRDS(cache_file))
params <- list2env(params)
tbl <- with(params, {
#________________________________________________________
# Validate user's arguments.
#________________________________________________________
validate_rank(max = Inf)
validate_biom_field('md', max = Inf, null_ok = TRUE)
#________________________________________________________
# Return multiple ranks in a single table.
#________________________________________________________
tbl <- NULL
for (r in rank)
tbl %<>% dplyr::bind_rows(local({
mtx <- taxa_matrix(
biom = biom,
rank = r,
taxa = taxa,
lineage = lineage,
sparse = FALSE,
unc = unc,
other = other,
transform = transform,
ties = ties,
seed = seed )
#________________________________________________________
# Pivot Longer
#________________________________________________________
tibble(
'.rank' = r,
'.sample' = colnames(mtx)[col(mtx)],
'.taxa' = rownames(mtx)[row(mtx)],
'.abundance' = as.numeric(mtx) )
}))
tbl[['.rank']] %<>% factor(., levels = rank)
tbl[['.sample']] %<>% {factor(., levels = intersect(biom$samples, .))}
tbl[['.taxa']] %<>% {factor(., levels = unique(.))}
#________________________________________________________
# Add Metadata
#________________________________________________________
if (length(md) > 0)
tbl %<>% left_join(
by = '.sample',
y = biom$metadata[,unique(c('.sample', md))] )
tbl
})
#________________________________________________________
# Descriptive label for y-axis.
#________________________________________________________
resp_label <- with(params, {
if (eq(transform, 'percent')) { "Relative Abundance" }
else if (is.null(biom$depth)) { "Unrarefied Counts" }
else { "Rarefied Counts" }
})
if (eq(params$transform, 'rank'))
resp_label %<>% tolower() %>% paste0("Ranked Abundance\n", "(from ", ., ")")
if (!params$transform %in% c('none', 'rank', 'percent'))
resp_label %<>% paste0("\n(", params$transform, " transformed)")
tbl %<>% as_rbiom_tbl()
attr(tbl, 'response') <- ".abundance"
attr(tbl, 'resp_label') <- resp_label
attr(tbl, 'cmd') <- current_cmd('taxa_table')
set_cache_value(cache_file, tbl)
return (tbl)
}
#' Taxa abundances per sample.
#'
#' \describe{
#' \item{`taxa_matrix()` - }{ Accepts a single `rank` and returns a matrix. }
#' \item{`taxa_table()` - }{ Can accept more than one `rank` and returns a tibble data.frame. }
#' }
#'
#'
#' @inherit documentation_default
#'
#' @family taxa_abundance
#'
#' @return
#' \describe{
#' \item{`taxa_matrix()` - }{
#' A numeric matrix with taxa as rows, and samples as columns. }
#' \item{`taxa_table()` - }{
#' A tibble data frame with column names .sample, .taxa, .abundance, and any requested by `md`. }
#' }
#'
#' @export
#' @examples
#' library(rbiom)
#'
#' hmp50$ranks
#'
#' taxa_matrix(hmp50, 'Phylum')[1:4,1:6]
#'
#' taxa_table(hmp50, 'Phylum')
taxa_matrix <- function (
biom, rank = -1, taxa = NULL, lineage = FALSE,
sparse = FALSE, unc = "singly", other = FALSE,
transform = "none", ties = "random", seed = 0 ) {
biom <- as_rbiom(biom)
params <- eval_envir(environment())
cmd <- sprintf("taxa_matrix(%s)", as.args(params, fun = taxa_matrix))
#________________________________________________________
# See if this result is already in the cache.
#________________________________________________________
cache_file <- get_cache_file('taxa_matrix', params)
if (isTRUE(attr(cache_file, 'exists', exact = TRUE)))
return (readRDS(cache_file))
#________________________________________________________
# Validate user's arguments.
#________________________________________________________
validate_taxa(null_ok = TRUE)
validate_rank()
validate_bool("sparse")
validate_var_choices('transform', c("none", "rank", "log", "log1p", "sqrt", "percent"))
validate_var_choices('ties', c("average", "first", "last", "random", "max", "min"))
validate_seed()
#________________________________________________________
# Fetch taxonomy map with ambiguous names corrected.
#________________________________________________________
map <- taxa_map(
biom = biom,
rank = rank,
unc = unc,
lineage = lineage )
#________________________________________________________
# Compute abundance matrix for this rank.
#________________________________________________________
mtx <- tryCatch(
error = function (e) stop("Unable to group by taxonomic level: ", e),
expr = local({
counts <- biom$counts
if (eq(transform, 'percent')) counts <- rescale_cols(counts)
mtx <- rollup(counts[names(map),], 1L, map, sum)
mtx <- mtx[order(tolower(rownames(mtx))), colnames(counts), drop=FALSE]
stopifnot(inherits(mtx, "simple_triplet_matrix"))
return (mtx)
}))
#________________________________________________________
# Only retain taxa of interest (by abundance or name).
#________________________________________________________
if (!is_null(taxa)) {
if (is.numeric(taxa) && length(taxa) == 1) {
rel <- sort(slam::row_means(t(t(mtx) / col_sums(mtx))), decreasing = TRUE)
if (taxa >= 1) { taxa <- head(names(rel), taxa)
} else { taxa <- names(rel)[rel >= taxa] }
} else if (is.character(taxa)) {
taxa <- intersect(as.character(taxa), rownames(mtx))
} else {
stop ("Invalid argument for `taxa`: ", capture.output(str(taxa)))
}
if (length(taxa) == 0)
stop("No taxa match the criteria: ", capture.output(str(taxa)))
if (isFALSE(other) || is.null(other)) {
mtx <- mtx[taxa,,drop=FALSE]
} else {
if (!is_scalar_character(other) || is_na(other))
other <- "Other"
mtx <- mtx[setdiff(rownames(mtx), taxa),,drop=FALSE] %>%
col_sums() %>%
matrix(., nrow = 1, dimnames = list(other, names(.))) %>%
as.simple_triplet_matrix() %>%
rbind(mtx[taxa,,drop=FALSE], .)
taxa <- c(taxa, other)
}
}
#________________________________________________________
# Optionally transform the computed abundance values.
#________________________________________________________
if (eq(transform, "rank")) {
mtx %<>% as.matrix() # need those zeroes
# Preserve current .Random.seed
oldseed <- if (exists(".Random.seed")) .Random.seed else NULL
set.seed(seed)
mtx[] <- base::rank(mtx, ties.method = ties)
if (!is.null(oldseed)) .Random.seed <- oldseed
mtx %<>% as.simple_triplet_matrix()
} else if (transform %in% c("log", "log1p", "sqrt")) {
mtx$v <- do.call(`::`, list('base', transform))(mtx$v)
}
#________________________________________________________
# Convert from slam to base matrix if sparse=FALSE
#________________________________________________________
if (isFALSE(sparse))
mtx <- as.matrix(mtx)
attr(mtx, 'cmd') <- cmd
set_cache_value(cache_file, mtx)
return (mtx)
}
#' Get summary taxa abundances.
#'
#' @inherit documentation_default
#'
#' @family taxa_abundance
#'
#' @param sort Sort the result. Options: `NULL`, `"asc"`, or `"desc"`, where
#' `NULL` will not sort the result. `"asc"` will sort in ascending order
#' (smallest to largest), and `"desc"` in descending order (largest to
#' smallest). Ignored when the result is not a simple numeric vector.
#' Default: `NULL`
#'
#' @param FUN The function to apply to each row of the `taxa_matrix()`.
#'
#' @param ... Optional arguments to `FUN`.
#'
#' @return For `taxa_sums` and `taxa_means`, a named numeric vector.
#' For `taxa_apply`, a named vector or list with the results of `FUN`.
#' The names are the taxa IDs.
#'
#' @export
#' @examples
#' library(rbiom)
#'
#' taxa_sums(hmp50) %>% head(4)
#'
#' taxa_means(hmp50, 'Family') %>% head(5)
#'
#' taxa_apply(hmp50, max) %>% head(5)
#'
#' taxa_apply(hmp50, fivenum) %>% head(5)
taxa_sums <- function (biom, rank = -1, sort = NULL, lineage = FALSE, unc = "singly", transform = "none") {
taxa_apply(biom, FUN = base::sum, rank, sort, lineage, unc, transform)
}
#' @rdname taxa_sums
#' @export
taxa_means <- function (biom, rank = -1, sort = NULL, lineage = FALSE, unc = "singly", transform = "none") {
taxa_apply(biom, FUN = base::mean, rank, sort, lineage, unc, transform)
}
#' @rdname taxa_sums
#' @export
taxa_apply <- function (biom, FUN, rank = -1, sort = NULL, lineage = FALSE, unc = "singly", transform = "none", ...) {
stopifnot(is.function(FUN))
validate_var_choices('sort', choices = c('asc', 'desc'), null_ok = TRUE)
mtx <- taxa_matrix(biom = biom, rank = rank, lineage = lineage, sparse = TRUE, unc = unc, transform = transform)
res <- if (identical(FUN, base::sum)) { slam::row_sums(mtx)
} else if (identical(FUN, base::mean)) { slam::row_means(mtx)
} else { slam::rowapply_simple_triplet_matrix(mtx, FUN, ...) }
if (is.numeric(res) && !is.null(sort))
res <- base::sort(res, decreasing = (sort == 'desc'))
return (res)
}
#' @rdname subset
#' @export
subset_taxa <- function (x, subset, clone = TRUE, ...) {
biom <- if (isTRUE(clone)) x$clone() else x
df <- biom$taxonomy
keep <- eval(expr = substitute(subset), envir = df, enclos = parent.frame())
df <- biom$counts[keep,,drop = FALSE]
suppressWarnings(biom$counts <- df)
if (isTRUE(clone)) { return (biom) } else { return (invisible(biom)) }
}
#' Apply `p.top` constraint to data about to be plotted.
#'
#' Used by boxplot_stats() and corrplot_stats().
#'
#' @noRd
#' @keywords internal
#'
apply_p.top <- function (params) {
p.top <- params$p.top
ggdata <- params$.ggdata
stats <- params$.plot_attrs[['stats']]
vline <- attr(ggdata, 'vline', exact = TRUE)
if (!is.finite(p.top)) return (invisible(params))
if (!hasName(stats, '.adj.p')) return (invisible(params))
if (!hasName(ggdata, '.taxa')) return (invisible(params))
if (!hasName(ggdata, '.rank')) return (invisible(params))
#________________________________________________________
# Locate the top taxa for each rank.
#________________________________________________________
if (!hasName(stats, '.rank'))
stats[['.rank']] <- factor(as.character(ggdata[['.rank']][[1]]))
keep_taxa <- plyr::dlply(stats, ply_cols('.rank'), function (x) {
taxa_min_p <- split(x[['.adj.p']], x[['.taxa']]) %>%
sapply(base::min, 1, na.rm = TRUE) %>%
sort()
if (p.top >= 1) { return (head(names(taxa_min_p), p.top))
} else { return (names(which(taxa_min_p <= p.top))) }
})
#________________________________________________________
# Drop rows for taxa that didn't make the cut-off.
#________________________________________________________
for (obj_name in c('ggdata', 'stats', 'vline')) {
df <- get(obj_name, inherits = FALSE)
if (!is.data.frame(df)) next
attrs <- attributes(df)
if (!hasName(df, '.rank'))
df[['.rank']] <- factor(names(keep_taxa)[[1]])
df %<>% plyr::ddply(ply_cols('.rank'), function (x) {
rank <- as.character(x[['.rank']][[1]])
x[x[['.taxa']] %in% keep_taxa[[rank]],,drop=FALSE]
}) %>% as_rbiom_tbl()
df[['.taxa']] %<>% {factor(., unique(unname(unlist(keep_taxa))))}
for (i in names(attrs))
if (is.null(attr(df, i, exact = TRUE)))
attr(df, i) <- attrs[[i]]
assign(obj_name, df)
}
params$.plot_attrs[['stats']] <- stats
attr(ggdata, 'vline') <- vline
params$.ggdata <- ggdata
return (invisible(params))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.