#' Area and bar plots of sample-level relative abundance at specified taxonomic levels
#'
#' @param pseq An object of class \code{phyloseq}
#' @param top_n Integer. Number of taxonomic groups to display, sorted by relative abundance
#' @param taxrank Character. Taxonomic rank to display. Should be a column name of the \code{taxa_table} in \code{pseq}.
#' @param prop_of Character. Either "top_n" or "total". Indicates whether heights of area/bars should represent the proportion out of the displayed taxonomic groups, or the out of the total.
#' @param orderby_OTU Character (optional). The OTU name (i.e. rowname in \code{otu_table}) whose relative abundance you would like to sort by. This is what creates the visual gradient from left to right of, say, Firmicutes.
#' @param PALETTE Character of same length \code{top_n} of palette for area or bar chart.
#' @param type Character. Either area or bar, specifying which type of plot. Area displays all samples left to right, whereas bars give mean abundances.
#' @param brewerpal Character. Name of RColorBrewer palette to use.
#' @param by Character. Variable in \code{sample_data} to stratify plots by.
#' @param ttle Character. Plot title.
#' @export
plot_abundance <- function(pseq, top_n=8, taxrank="Phylum", prop_of="top_n", type="area",
orderby_OTU, PALETTE)
{
suppressPackageStartupMessages({
require(dplyr)
require(magrittr)
require(phyloseq)
require(ggplot2)
})
#glom the pseq by specified taxrank
glom = tax_glom(pseq, taxrank = taxrank)
#if applicable, calculate abundance as a proportion of total n
if(prop_of=="total") glom <- transform_sample_counts(glom, function(i) i / sum(i))
#if top_n is larger than the number of taxa at this rank, replace it with the number of taxa
if(is.infinite(top_n)) {
top_n <- nrow(tax_table(glom))
} else {
top_n = min(c(top_n, nrow(tax_table(glom))))
}
#subset the pseq object to only top taxa
top_taxa = names(sort(taxa_sums(glom), TRUE)[1:top_n])
top_subset <- prune_taxa(top_taxa, glom)
#sort by taxa abundance
taxa_to_sort <- data.frame(
id=1:top_n,
taxa = as.character(tax_table(top_subset)[,taxrank]),
otu = as.character(taxa_names(top_subset)),
row.names = as.character(taxa_names(top_subset))
)
taxa_order <- top_subset %>% otu_table %>% rowSums %>% sort(T) %>% names
taxa_to_sort <- taxa_to_sort[taxa_order, ]
#if applicable, calculate abundance as a proportion of top n
if(prop_of=="top_n") top_subset <- transform_sample_counts(top_subset, function(i) i / sum(i))
#melt for use in plotting
areadat <- psmelt(top_subset) %>% dplyr::select(OTU, Sample, Abundance) %>%
mutate(Sample = as.numeric(factor(Sample)), # geom_area only seems to work with a numeric x
OTU = factor(OTU, # sort by taxa
levels=taxa_to_sort$otu,
labels=taxa_to_sort$taxa))
#if missing, orderby_OTU is the most abundant taxa
if(missing(orderby_OTU)) orderby_OTU <- taxa_to_sort$taxa[1]
# get order by particular OTU abundance
OTU_order <- areadat %>%
filter(OTU==orderby_OTU) %>%
arrange(Abundance) %>%
dplyr::select(Sample)
# apply OTU order to data.frame
areadat %<>% mutate(Sample = as.numeric(
factor(Sample, levels=factor(OTU_order$Sample))))
#get percentages for label
mean_abundance <- apply(otu_table(top_subset)[taxa_order,], 1, function(i) round(100*mean(i), 1))
sd_abundance <- apply(otu_table(top_subset)[taxa_order,], 1, function(i) round(100*sd(i), 1))
levels(areadat$OTU) <- paste0(levels(areadat$OTU), " (", mean_abundance, " ± ", sd_abundance, "%)")
#create plot
if(type=="area") {
plt <- ggplot(arrange(areadat, desc(OTU), Abundance),
aes(x=Sample, y=Abundance, fill=OTU))+
geom_area() + theme_minimal() +
labs(fill=taxrank)
#apply palette if applicable
if(!missing(PALETTE)) plt <- plt + scale_fill_manual(values=PALETTE)
} else if(type=="bar") {
plt <- ggplot(arrange(areadat, desc(OTU), Abundance),
aes(x=OTU, y=Abundance, fill=OTU)) +
geom_col() + theme_minimal() +
theme(axis.text.x = element_text(vjust=1, hjust=1, angle=50),
legend.position = "none")
#apply palette if applicable
if(!missing(PALETTE)) plt <- plt + scale_fill_manual(values=PALETTE)
}
plt + ggtitle(paste(taxrank, "abundance"))
}
#' @export
#' @rdname plot_abundance
plot_abundance_by <- function(pseq, brewerpal="Set1", by, type=c("area", "bar"),
ttle=paste0("Relative phylum abundances by ", by)){
suppressPackageStartupMessages({
library(RColorBrewer)
library(dplyr)
library(ggplot2)
})
set.seed(14)
palet=sample(brewer.pal(9, brewerpal), size = 9, replace = FALSE)
#extract by vector and its levels
#by_vect <- eval(quote(by), sample_data(pseq))
by_vect <- eval(parse(text=paste0("sample_data(pseq)$",by)))
by_levels <- levels(factor(by_vect))
#glom by phylum
glom <- tax_glom(pseq, taxrank = "Phylum")
#subset most abundant 8 phyla for each level of factor
phyla8_subsets <- vector("list", length(by_levels))
for(j in seq_along(by_levels)) {
samp <- prune_samples(by_vect==by_levels[[j]], glom)
phyla8_subsets[[j]] <-
prune_taxa(names(sort(taxa_sums(samp), TRUE)[1:8]),
samp)
rm(samp)
}
#sort by phylum abundance
phyla_to_sort <- vector("list", length(by_levels))
for(i in seq_along(by_levels)) {
phyla_to_sort[[i]] <- data.frame(id=1:8, phyla = as.character(tax_table(phyla8_subsets[[i]])[,"Phylum"]),
otu = as.character(taxa_names(phyla8_subsets[[i]])))
rownames(phyla_to_sort[[i]]) <- phyla_to_sort[[i]]$otu
phylum_ranks <- phyla8_subsets[[i]] %>% otu_table %>% rowSums %>% sort(TRUE) %>% names
phyla_to_sort[[i]] <- phyla_to_sort[[i]][phylum_ranks, ]
}
#calculate abundance as a proportion of top 8
prop <- vector("list", length(by_levels))
for(i in seq_along(by_levels)) {
prop[[i]] <- transform_sample_counts(phyla8_subsets[[i]], function(j) j / sum(j))
}
#melt for use in plotting
areadat <- vector("list", length(by_levels))
for(i in seq_along(by_levels)) {
areadat[[i]] <- psmelt(prop[[i]]) %>% dplyr::select(OTU, Sample, Abundance) %>%
mutate(Sample = as.numeric(factor(Sample))) # geom_area only seems to work with a numeric x
areadat[[i]]$OTU = factor(areadat[[i]]$OTU, # sort by phyla
levels=phyla_to_sort[[i]]$otu,
labels=as.character( phyla_to_sort[[i]]$phyla ) )
areadat[[i]]$level <- by_levels[i]
}
if(type[1]=="bar") {
#summarize mean abundance for bars
bardat <- do.call(rbind, areadat) %>%
group_by(level, OTU) %>%
summarise(mean_abundance = mean(Abundance))
bardat$level <- factor(bardat$level, levels=by_levels)
bar_abundance <- ggplot(data=arrange(bardat, level), aes(x=level, y=mean_abundance, fill=OTU)) +
geom_bar(position="fill", stat="identity") +
labs(fill="Phylum", x="", y="Relative Abundance") +
scale_fill_manual(values=palet) +
theme(legend.position="right") +
ggtitle(ttle)
bar_abundance
} else if (type[1]=="area") {
firmicutes_orders <- vector("list", length(by_levels))
for(i in seq_along(by_levels)) {
firmicutes_orders[[i]] <- areadat[[i]] %>% # get order by firmicutes abundance
filter(OTU=="Firmicutes") %>%
arrange(Abundance) %>%
dplyr::select(Sample)
#apply order to 'sample' variable in melted abundances
areadat[[i]]$Sample = as.numeric(factor(areadat[[i]]$Sample, levels=factor(firmicutes_orders[[i]]$Sample)))
}
areadat_bind <- do.call(rbind, areadat)
#area plot
area_abundance <- ggplot(arrange(areadat_bind, desc(OTU), Abundance), aes(x=Sample, y=Abundance, fill=OTU))+
geom_area()+
labs(fill="Phylum")+
scale_fill_manual(values=palet) +
facet_grid(. ~ level, scales = "free_x")+
theme(axis.text.x = element_blank(),
legend.position="bottom") +
ggtitle(ttle)
area_abundance
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.