# Attribution: The documentation for `psmelt()` and the first 54 lines of the
# function's code are from phyloseq,
# https://github.com/joey711/phyloseq/blob/master/R/plot-methods.R
#' Melt phyloseq data object into large data.frame
#'
#' The psmelt function is a specialized melt function for melting phyloseq
#' objects (instances of the phyloseq class), usually for producing graphics
#' with [ggplot2::ggplot2]. The naming conventions used in downstream phyloseq
#' graphics functions have reserved the following variable names that should
#' not be used as the names of [sample_variables()] or taxonomic
#' [rank_names()]. These reserved names are `c("Sample", "Abundance", "OTU")`.
#' Also, you should not have identical names for sample variables and taxonomic
#' ranks. That is, the intersection of the output of the following two
#' functions [sample_variables()], [rank_names()] should be an empty vector
#' (e.g. `intersect(sample_variables(physeq), rank_names(physeq))`). All of
#' these potential name collisions are checked-for and renamed automatically
#' with a warning. However, if you (re)name your variables accordingly ahead of
#' time, it will reduce confusion and eliminate the warnings.
#'
#' The `as` argument allows choosing between three classes for tabular data:
#'
#' - [data.frame-class] is chosen by "data.frame" or "df"
#' - [data.table-class] is chosen by "data.table" or "dt"
#' - [tbl_df-class] is chosen by "tbl_df", "tbl", or "tibble"
#'
#' The default is "data.frame" and can be overridden by setting the
#' "speedyseq.psmelt_class" option.
#'
#' Note that "melted" phyloseq data is stored much less efficiently, and so RAM
#' storage issues could arise with a smaller dataset (smaller number of
#' samples/OTUs/variables) than one might otherwise expect. For common sizes
#' of graphics-ready datasets, however, this should not be a problem. Because
#' the number of OTU entries has a large effect on the RAM requirement, methods
#' to reduce the number of separate OTU entries -- for instance by
#' agglomerating OTUs based on phylogenetic distance using [tip_glom()] -- can
#' help alleviate RAM usage problems. This function is made user-accessible for
#' flexibility, but is also used extensively by plot functions in phyloseq.
#'
#' @param physeq An [otu_table-class] or [phyloseq-class]; most useful for
#' phyloseq-class.
#' @param as Class of the output table; see Details.
#'
#' @return A table of the specified class
#'
#' @seealso
#' [plot_bar()]
#'
#' @export
#'
#' @examples
#' data("GlobalPatterns")
#' gp.ch = subset_taxa(GlobalPatterns, Phylum == "Chlamydiae")
#' mdf = psmelt(gp.ch)
#' nrow(mdf)
#' ncol(mdf)
#' colnames(mdf)
#' head(rownames(mdf))
#' # Create a ggplot similar to
#' library("ggplot2")
#' p = ggplot(mdf, aes(x=SampleType, y=Abundance, fill=Genus))
#' p = p + geom_bar(color="black", stat="identity", position="stack")
#' print(p)
psmelt <- function(physeq, as = getOption("speedyseq.psmelt_class")) {
stopifnot(as %in% c("data.frame", "df", "data.table", "dt", "tbl_df", "tbl",
"tibble"))
# Access covariate names from object, if present
if(!inherits(physeq, "phyloseq")){
rankNames = NULL
sampleVars = NULL
} else {
# Still might be NULL, but attempt access
rankNames = rank_names(physeq, FALSE)
sampleVars = sample_variables(physeq, FALSE)
}
# Define reserved names
reservedVarnames = c("Sample", "Abundance", "OTU")
# type-1a conflict: between sample_data
# and reserved psmelt variable names
type1aconflict = intersect(reservedVarnames, sampleVars)
if(length(type1aconflict) > 0){
wh1a = which(sampleVars %in% type1aconflict)
new1a = paste0("sample_", sampleVars[wh1a])
# First warn about the change
warning("The sample variables: \n",
paste(sampleVars[wh1a], collapse=", "),
"\n have been renamed to: \n",
paste0(new1a, collapse=", "), "\n",
"to avoid conflicts with special phyloseq plot attribute names.")
# Rename the sample variables.
colnames(sample_data(physeq))[wh1a] <- new1a
}
# type-1b conflict: between tax_table
# and reserved psmelt variable names
type1bconflict = intersect(reservedVarnames, rankNames)
if(length(type1bconflict) > 0){
wh1b = which(rankNames %in% type1bconflict)
new1b = paste0("taxa_", rankNames[wh1b])
# First warn about the change
warning("The rank names: \n",
paste(rankNames[wh1b], collapse=", "),
"\n have been renamed to: \n",
paste0(new1b, collapse=", "), "\n",
"to avoid conflicts with special phyloseq plot attribute names.")
# Rename the conflicting taxonomic ranks
colnames(tax_table(physeq))[wh1b] <- new1b
}
# type-2 conflict: internal between tax_table and sample_data
type2conflict = intersect(sampleVars, rankNames)
if(length(type2conflict) > 0){
wh2 = which(sampleVars %in% type2conflict)
new2 = paste0("sample_", sampleVars[wh2])
# First warn about the change
warning("The sample variables: \n",
paste0(sampleVars[wh2], collapse=", "),
"\n have been renamed to: \n",
paste0(new2, collapse=", "), "\n",
"to avoid conflicts with taxonomic rank names.")
# Rename the sample variables
colnames(sample_data(physeq))[wh2] <- new2
}
# Enforce OTU table orientation. Redundant-looking step
# supports "naked" otu_table as `physeq` input.
otutab = otu_table(physeq)
if(!taxa_are_rows(otutab)){otutab <- t(otutab)}
## Speedyseq changes start here
# Convert the otu table to a tibble in tall form (one sample-taxon obsevation
# per row)
dtb <- otutab %>%
as("matrix") %>%
data.table::as.data.table(keep.rownames = "OTU") %>%
data.table::melt(id.vars = c("OTU"), variable.name = "Sample",
value.name = "Abundance", variable.factor = FALSE)
# Add the sample data if it exists
if (!is.null(sampleVars)) {
sam <- sample_data(physeq) %>%
as("data.frame") %>%
data.table::as.data.table(keep.rownames = "Sample")
dtb <- dtb[sam, on = .(Sample = Sample)]
}
# Add the tax table if it exists
if (!is.null(rankNames)) {
tax <- tax_table(physeq) %>%
as("matrix") %>%
data.table::as.data.table(keep.rownames = "OTU")
dtb <- dtb[tax, on = .(OTU = OTU)]
}
# Arrange by Abundance, then OTU names (to approx. phyloseq behavior)
dtb <- dtb %>%
data.table::setorder(-Abundance, OTU)
# Return as requested class
if (as %in% c("data.table", "dt")) {
dtb
} else if (as %in% c("data.frame", "df")) {
dtb %>% as("data.frame")
} else if (as %in% c("tbl_df", "tbl", "tibble")) {
dtb <- dtb %>% tibble::as_tibble()
attr(dtb, ".internal.selfref") <- NULL
dtb
} else {
stop("Invalid output class specified by `as`.")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.