# rank_abund ---------------------------------------------------------------
#' Rank Taxa by Abundance
#'
#' `rank_abund()` generates a data frame that's ready to be used by
#' [plot_rank_ab()]
#'
#' @section Value: A data frame whose taxa have been ranked by their mean
#' abundance in the user-specified baseline level of some grouping variable or
#' variables.
#'
#' @param phy_df A dataframe of a phyloseq object, like that generated by
#' [phyloseq::psmelt()] or [make_phy_df()]
#' @param gvars (`NULL`) A character vector of grouping variables from which the
#' baseline values are chosen to define the abundance ordering. If it is
#' `NULL`, the ordering will be based on mean abundances in the whole data
#' frame.
#' @param bases (`NULL`) A character vector of baseline values for the variables
#' given in `gvars`. The ordering of the taxa will be given based only on the
#' samples with these baseline values for these variables. Must be in the same
#' order as `gvars`.
#' @param abunds (`'Abundance'`) The name of the abundance column.
#' @param rank (`'Genus'`) The rank to base the ordering on. Must be a column in
#' `phy_df`
#' @param IDcol (`'X.SampleID'`) The column name of the sample IDs
#'
rank_abund = function(phy_df, gvars = NULL, bases = NULL, abunds = 'Abundance',
rank = 'Genus', IDcol){
# Set up the groups for the plotting totals
rank_abs = df_glom(phy_df, IDcol = IDcol, rank = rank, abunds = abunds)
# Subset and order
ranked = subset_order(rank_abs, gvars, bases, rank = rank)
# Order the bigger data frame by the above ordering
lev_ord = levels(ranked[,rank])
rank_abs[,rank] = factor(rank_abs[,rank], levels = lev_ord)
return(rank_abs)
}
# subset_order -------------------------------------------------------------
#' Subset and generate taxon ordering
#'
#' `subset_order` generates a data frame whose taxon column given by
#' `rank` has been ranked according to its mean abundance in the
#' `abunds` column. Used internally by
#' [rank_abund()]
#'
#' @param phy_df A phyloseq data frame, as generated by
#' [phyloseq::psmelt()], but probably generated by
#' [df_glom()] or
#' [make_phy_df()].
#' @param varbs (`NULL`) A character vector of grouping variables from
#' which the baseline values are chosen to define the abundance ordering. If
#' it is `NULL`, the ordering will be based on mean abundances in the
#' whole data frame.
#' @param bases (`NULL`) A character vector of baseline values for the
#' variables given in `vars`. The ordering of the taxa will be given
#' based only on the samples with these baseline values for these variables.
#' Must be in the same order as varbs.
#' @param abund (`'Abundance'`) The name of the abundance column.
#' @param rank (`'Genus'`) The taxonomic rank to base the ordering on.
subset_order = function(phy_df, varbs = NULL, bases = NULL, rank = 'Genus',
abunds = 'TotalAbunds'){
# Check inputs
if (is.null(varbs)){
warn('No grouping variables given. Using whole data set.')
} else if (is.null(bases)){
warn(paste('No baseline values given for grouping variables.',
'Using whole data set.'))
} else if (length(varbs) != length(bases)){
stop('vars and bases must have the same length.')
}
# Subset and order
ranked = phy_df
for(i in 1:length(varbs)){
ranked %>% dplyr::filter(UQ(sym(varbs[i])) == bases[i]) -> ranked
}
ranked %>%
dplyr::group_by_at(vars(c(varbs,rank))) %>%
dplyr::summarize(MetaMean = mean(UQ(sym(abunds)))) %>%
data.frame() %>%
order_taxa(rank, 'MetaMean', decreasing = TRUE) -> ranked
return(ranked)
}
# df_glom ------------------------------------------------------------------
#' Like tax_glom, but for data frames
#'
#' [df_glom()] take totals within sample at a given taxonomic rank.
#'
#' @param phy_df A phyloseq data frame, as generated by
#' [phyloseq::psmelt()] or [make_phy_df()]
#' @param ranks A character vector with the taxon rank names
#' @param IDcol The column name of the sample IDs
#' @param rank The taxonomic rank to glom at
#' @param abunds The column name of the abundances to sum
#' @param tots The desired column name of the summed (glommed) abundances
df_glom = function(phy_df, ranks, IDcol = 'X.SampleID', rank = 'Phylum',
abunds = 'Abundance'){
# Set up the groups for the plotting totals
phy_df %>%
dplyr::group_by_at(vars(IDcol,rank)) %>%
dplyr::mutate(TotalAbunds = sum(UQ(sym(abunds)))) %>%
data.frame() -> glommed_df
return(glommed_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.