Nothing
#' Calculate the alpha diversity of each sample.
#'
#' @inherit documentation_default
#'
#' @family alpha_diversity
#'
#' @return A data frame of alpha diversity values.
#' Each combination of sample/depth/\code{adiv} has its own row.
#' Column names are \bold{.sample}, \bold{.depth}, \bold{.adiv},
#' and \bold{.diversity}, followed by any metadata fields requested by
#' \code{md}.
#'
#' @export
#' @examples
#' library(rbiom)
#'
#' # Subset to 10 samples.
#' biom <- slice(hmp50, 1:10)
#' adiv_table(biom)
#'
#' biom <- rarefy(biom)
#' adiv_table(biom, md = NULL)
adiv_table <- function (
biom, adiv = "Shannon", md = ".all", transform = "none", cpus = NULL ) {
biom <- as_rbiom(biom)
params <- eval_envir(environment())
cmd <- sprintf("adiv_table(%s)", as.args(params, fun = adiv_table))
#________________________________________________________
# See if this result is already in the cache.
#________________________________________________________
cache_file <- get_cache_file('adiv_table', params)
if (isTRUE(attr(cache_file, 'exists', exact = TRUE)))
return (readRDS(cache_file))
#________________________________________________________
# Check for valid arguments.
#________________________________________________________
validate_adiv(max = Inf)
validate_biom_field('md', null_ok = TRUE, max = Inf)
#________________________________________________________
# Compute adiv values
#________________________________________________________
mtx <- adiv_matrix(biom = biom, adiv = adiv, transform = transform)
tbl <- tibble(
'.sample' = rownames(mtx)[row(mtx)] %>% factor(levels = rownames(mtx)),
'.depth' = unname(mtx[,'Depth'][row(mtx)]),
'.adiv' = colnames(mtx)[col(mtx)] %>% factor(levels = adiv),
'.diversity' = as.numeric(mtx) ) %>%
dplyr::filter(!is.na(.data$.adiv))
#________________________________________________________
# Add Metadata
#________________________________________________________
if (!is.null(md))
tbl %<>% left_join(
y = biom$metadata[,unique(c('.sample', md))],
by = '.sample' )
#________________________________________________________
# Descriptive label for y-axis.
#________________________________________________________
if (length(adiv) == 1) {
resp_label <- switch(
EXPR = adiv,
'OTUs' = "Observed OTUs",
'Depth' = "Sequencing Depth",
paste(adiv, "Diversity") )
} else {
resp_label <- "Alpha Diversity"
}
tbl %<>% as_rbiom_tbl()
attr(tbl, 'cmd') <- cmd
attr(tbl, 'response') <- ".diversity"
attr(tbl, 'resp_label') <- resp_label
set_cache_value(cache_file, tbl)
return (tbl)
}
#' Create a matrix of samples x alpha diversity metrics.
#'
#' @inherit documentation_default
#'
#' @param adiv Alpha diversity metric(s) to use. Options are: `"OTUs"`,
#' `"Shannon"`, `"Chao1"`, `"Simpson"`, and/or
#' `"InvSimpson"`. Set `adiv=".all"` to use all metrics.
#' Multiple/abbreviated values allowed.
#' Default: `".all"`
#'
#' @return A numeric matrix with samples as rows. The first column is
#' \bold{Depth}. Remaining columns are the alpha diversity metric names
#' given by `adiv`: one or more of \bold{OTUs}, \bold{Shannon},
#' \bold{Chao1}, \bold{Simpson}, and \bold{InvSimpson}.
#'
#' @export
#' @examples
#' library(rbiom)
#'
#' biom <- slice_head(hmp50, n = 5)
#'
#' adiv_matrix(biom)
adiv_matrix <- function (biom, adiv = ".all", transform = "none", cpus = NULL) {
biom <- as_rbiom(biom)
params <- eval_envir(environment())
cmd <- sprintf("adiv_matrix(%s)", as.args(params, fun = adiv_matrix))
#________________________________________________________
# See if this result is already in the cache.
#________________________________________________________
cache_file <- get_cache_file('adiv_matrix', params)
if (isTRUE(attr(cache_file, 'exists', exact = TRUE)))
return (readRDS(cache_file))
remove("params")
#________________________________________________________
# Check for valid arguments.
#________________________________________________________
validate_adiv(max = Inf)
validate_var_choices('transform', c("none", "rank", "log", "log1p", "sqrt"))
validate_cpus()
#________________________________________________________
# We want a numeric matrix of samples x adiv metrics
#________________________________________________________
otu_mtx <- as.matrix(biom$counts)
algorithms <- 0L
if ('Shannon' %in% adiv) algorithms = algorithms + 1L
if ('Chao1' %in% adiv) algorithms = algorithms + 2L
if ('Simpson' %in% adiv || 'InvSimpson' %in% adiv) algorithms = algorithms + 4L
storage.mode(algorithms) <- 'integer'
n_threads <- as.integer(cpus)
storage.mode(otu_mtx) <- 'double'
mtx <- matrix(
data = .Call(C_alpha_div, otu_mtx, algorithms, n_threads),
nrow = biom$n_samples,
ncol = 6,
dimnames = list(biom$samples, c('Depth', 'OTUs', 'Shannon', 'Chao1', 'Simpson', 'InvSimpson')) )
mtx <- mtx[,c('Depth', adiv), drop=FALSE]
if (transform != 'none')
for (metric in adiv)
mtx[,metric] <- switch(
EXPR = transform,
'rank' = base::rank(mtx[,metric]),
'log' = base::log(mtx[,metric]),
'log1p' = base::log1p(mtx[,metric]),
'sqrt' = base::sqrt(mtx[,metric]),
'percent' = tryCatch(
expr = mtx[,metric] / sum(mtx[,metric]),
error = function (e) { warning(e); NA_real_ } ))
attr(mtx, 'cmd') <- cmd
set_cache_value(cache_file, mtx)
return (mtx)
}
#' Summarize the taxa observations in each sample.
#'
#' @inherit documentation_default
#'
#' @family samples
#' @family rarefaction
#' @family taxa_abundance
#'
#' @param sort Sort the result. Options: `NULL` - don't sort; `"asc"` - in
#' ascending order (smallest to largest); `"desc"` - in descending order
#' (largest to smallest). Ignored when the result is not a simple
#' numeric vector. Default: `NULL`
#'
#' @param FUN The function to apply to each column of `taxa_matrix()`.
#'
#' @param ... Optional arguments to `FUN`.
#'
#' @return For `sample_sums`, A named numeric vector of the number of
#' observations in each sample. For `sample_apply`, a named vector or
#' list with the results of `FUN`. The names are the taxa IDs.
#'
#' @export
#' @examples
#' library(rbiom)
#' library(ggplot2)
#'
#' sample_sums(hmp50, sort = 'asc') %>% head()
#'
#' # Unique OTUs and "cultured" classes per sample
#' nnz <- function (x) sum(x > 0) # number of non-zeroes
#' sample_apply(hmp50, nnz, 'otu') %>% head()
#' sample_apply(hmp50, nnz, 'class', unc = 'drop') %>% head()
#'
#' # Number of reads in each sample's most abundant family
#' sample_apply(hmp50, base::max, 'f', sort = 'desc') %>% head()
#'
#' ggplot() + geom_histogram(aes(x=sample_sums(hmp50)), bins = 20)
sample_sums <- function (biom, rank = -1, sort = NULL, unc = "singly") {
sample_apply(biom, FUN = base::sum, rank, sort, unc)
}
#' @rdname sample_sums
#' @export
sample_apply <- function (biom, FUN, rank = -1, sort = NULL, unc = "singly", ...) {
stopifnot(is.function(FUN))
validate_var_choices('sort', choices = c('asc', 'desc'), null_ok = TRUE)
mtx <- taxa_matrix(biom = biom, rank = rank, sparse = TRUE, unc = unc)
res <- if (identical(FUN, base::sum)) { slam::col_sums(mtx)
} else if (identical(FUN, base::mean)) { slam::col_means(mtx)
} else { slam::colapply_simple_triplet_matrix(mtx, FUN, ...) }
if (is.numeric(res) && !is.null(sort))
res <- base::sort(res, decreasing = (sort == 'desc'))
return (res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.