#'@title Calculates summary statistics for each feature
#'@description currently performs some tidy manipulations (grouping, nesting) and
#' calculates the dynamic range, the prevalence (proportion of nonzero samples),
#' mean and median relative abundances, as well as the variance
#'@param df a data frame of features x samples
#'@return a nested data frame with a list-column of the original data
#'@importFrom dplyr group_by filter select summarize summarize_all n mutate
#'@importFrom tidyr nest gather
#'@importFrom purrr map map_dbl
#'@importFrom moments skewness kurtosis
#'@export
feat.summary.stats <- function(df) {
df %>%
tidyr::gather(key = 'feature', value = 'abundance', -SampleID) %>%
dplyr::group_by(feature) %>%
tidyr::nest() %>%
dplyr::mutate(data = purrr::map(data,
~ as_tibble(.))) %>%
dplyr::mutate(dynamic_range = purrr::map_dbl(data,
~ ansimo::dyn.rng(get('abundance', .)))) %>%
dplyr::mutate(prevalence = purrr::map_dbl(data, function(x) {
x %>%
dplyr::select(abundance) %>%
dplyr::group_by(abundance) %>%
dplyr::summarize(n = dplyr::n()) %>%
dplyr::filter(abundance != 0) %>%
dplyr::summarize_all(~ sum(.)/nrow(x)) %>%
magrittr::use_series(n) %>%
magrittr::extract(1)})) %>%
dplyr::mutate(mean_ab = purrr::map_dbl(data, ~ mean(.$abundance))) %>%
dplyr::mutate(skewness = purrr::map_dbl(data, ~ moments::skewness(.$abundance))) %>%
dplyr::mutate(kurtosis = purrr::map_dbl(data, ~ moments::kurtosis(.$abundance))) %>%
dplyr::mutate(sd = purrr::map_dbl(data, ~ sd(.$abundance)))
}
feat.stratified.stats <- function(df.nest) {
df.nest %>%
dplyr::mutate(dyn.rng = map_dbl(data, ~ dyn.rng(.$abundance))) %>%
dplyr::mutate(prev = map_dbl(data, ~ calc.single.prevalence(.$abundance))) %>%
dplyr::mutate(mean.ab = map_dbl(data, ~ mean(.$abundance))) %>%
dplyr::mutate(sem = map_dbl(data, ~ (sd(.$abundance)/length(.$abundance)))) %>%
dplyr::mutate(skew = map_dbl(data, ~ moments::skewness(.$abundance))) %>%
dplyr::mutate(kurt = map_dbl(data, ~ moments::kurtosis(.$abundance))) %>%
dplyr::mutate(sd = map_dbl(data, ~ sd(.$abundance)))
}
# takes the ratio of the log10(max):log10(min) (found in publication)
# other possible ways to calculate include difference, base2 difference,
# and a raw ratio of min and max values
#'@title Calculates the dynamic range of a given feature
#'@description the dynamic range may be defined as the log of the ratio
#' of the highest and lowest values a feature may take
#'@param x a vector of relative abundances
#'@return an float value
dyn.rng <- function(x) {log10(max(x[x != 0])/min(x[x != 0]))}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.