##### make_phy_df --------------------------------------------------------------
#' Generate a Data Frame for Taxon Bar Charts
#'
#' `make_phy_df` generates a data frame that is useful for generating taxon
#' bar charts.
#'
#' @section Details: This function takes a phyloseq object and generates a data
#' frame that is useful for plotting taxon abundance information. By default
#' it propagates taxon assignment information down the tree into unassigned
#' leves, and aggregates all taxa below 0.1 percent into a single 'Other'
#' category. This function expects the phyloseq object to be relative
#' abundance, and weird things will happen if it is not.
#'
#' @section Value: A data frame similar in structure to that generated by
#' [phyloseq::psmelt()], but with an 'Other' category added and
#' taxon levels ordered for use in plotting.
#'
#' @param physeq A phyloseq object.
#' @param rank The rank at which to glom taxa. Must be one of 'OTU', 'Genus',
#' 'Family', 'Order', 'Class', 'Phylum'. Default is 'Genus'.
#' @param cutoff The abundance cutoff below which taxa are grouped into 'Other'.
#' If you don't want anything grouped into 'Other', set this to 0. Default is
#' 0.001.
#' @param indic a flag to indicate if the taxon names have level indicators. If
#' FALSE, they are added.
#' @param prop Specifies whether taxa need to be propogated down the taxonomy
#' table (default is `TRUE`) or if this has already been done.
#' @param count If `FALSE` (default) the function will expect a relative
#' abundance table and create an 'Other' category for taxa below the cutoff
#' (and will raise an error if the table is not relative abundance). If TRUE,
#' the function will not check for relative abundance and will not create an
#' 'Other' category.
#' @export
make_phy_df = function(physeq, rank = 'Genus', cutoff = 0.001, indic = FALSE,
prop = TRUE, count = FALSE){
# Check that its relab
if (!count & any(phyloseq::otu_table(physeq) > 1)){
stop('physeq must be a relative abundance table. You have counts > 1.')
}
ranks = colnames(phyloseq::tax_table(physeq))
if ((rank == 'OTU') & !('OTU' %in% ranks)) {
ranks = c(ranks, rank)
}
# Propogate taxonomic assignments down the tree
if (prop){
physeq = prop_tax_down(physeq, indic)
}
# Glom to the correct taxonomic rank
if (rank == 'OTU'){
phyl_glommed = physeq
} else {
phyl_glommed = phyloseq::tax_glom(physeq, taxrank = rank)
}
# Set all counts < the cutoff to zero
phyloseq::otu_table(phyl_glommed)[phyloseq::otu_table(phyl_glommed) <
cutoff] = 0
# Check for sample_data() and create it with sample names if it's missing
if (is_null(physeq@sam_data)){
samdat = data.frame('Sample' = phyloseq::sample_names(physeq))
rownames(samdat) = phyloseq::sample_names(physeq)
phyloseq::sample_data(physeq) = samdat
}
# Melt and sort, then filter out taxa that are 0 in their sample
abunds = (phyl_glommed
%>% phyloseq::psmelt()
%>% data.frame()
%>% dplyr::filter(Abundance > 0))
# List all the metadata columns so that they are included in the data frame
metacols = colnames(phyloseq::sample_data(physeq))
# Make an 'Other' row for each sample
others = (abunds
%>% dplyr::group_by(across(all_of(metacols)))
%>% dplyr::summarize(Abundance=remain(Abundance), .groups = 'drop'))
# Add in the taxonomic data columns
taxcols = names(abunds)[match(ranks[1],names(abunds)):ncol(abunds)]
if (rank == 'OTU'){
taxcols = c(taxcols, 'OTU')
}
if (! count){
others[taxcols] = 'Other'
# Combine the 'Other' data frame with the original
if (length(metacols) > 1){
newdf = abunds[,metacols]
} else {
newdf = data.frame(abunds[,metacols])
colnames(newdf) = metacols
}
newdf$Abundance = abunds$Abundance
newdf[,taxcols] = abunds[,taxcols]
newdf = rbind(as.data.frame(others),
as.data.frame(newdf))
} else {
newdf = abunds
}
# Order the tax cols by mean abundance
for (r in taxcols){
newdf = order_taxa(newdf, r)
}
return(newdf)
}
##### remain -------------------------------------------------------------------
#' Calculate the difference between included taxa and 100% for the 'Other'
#' section of a phyloseq data frame.
#'
#' @section Value: a numeric vector
#'
#' @param x A numeric vector
#' @param tot The desired total. Default is 1.
remain = function(x, tot = 1){
tot-sum(x)
}
##### order_taxa ---------------------------------------------------------------
#' Order Taxon Name Factors
#'
#' `order_taxa()` reorders the taxon names in a taxon column (e.g. 'Class' or
#' 'Phylum') by the taxon's mean abundance (but always makes sure to put Other
#' first).
#'
#' @section Value: A data frame that is identical to the one given, but with the
#' specified column re-ordered by its mean abundance
#'
#' @param phy_df A data frame of a phyloseq object, as produced by
#' [phyloseq::psmelt()] or [make_phy_df()].
#' @param rank The name of the column to be re-ordered
#' @param abund The name of the abundances column. Defaults to 'Abundance'
#' @param decreasing Specifies whether the taxon order should be based on
#' decreasing or increasing abundance. Defaults to FALSE.
#' @export
order_taxa = function(phy_df, rank, abund = 'Abundance', decreasing = FALSE){
phy_df[,rank] = factor(phy_df[,rank])
total_abunds = (phy_df
%>% dplyr::filter(.data[[rank]] != 'Other')
%>% dplyr::group_by(.data[[rank]])
%>% dplyr::summarize(Tot = sum(.data[[abund]]))
%>% data.frame())
lev_ord = levels(droplevels(total_abunds[,rank]))
if (decreasing){
lev_ord = lev_ord[order(-total_abunds$Tot)]
} else{
lev_ord = lev_ord[order(total_abunds$Tot)]
}
phy_df[,rank] = factor(phy_df[,rank], levels = c('Other',lev_ord))
return(phy_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.