This vignette explores the Global Patterns microbiome data available from phyloseq, which includes water samples, land samples, and human samples.
Learn more about the phyloseq package here.
Additionally, the package speedyseq is necessary to use the function prep_mdf()
. The package speedyseq provides faster versions of phyloseq’s plotting and taxonomic merging functions. Alternatively, the phyloseq object can be melted and transformed by using phyloseq functions tax_glom()
and/or transform_sample_counts()
, and melted by using psmelt()
.
library(microshades) library(phyloseq) library(ggplot2) library(dplyr) library(cowplot) library(patchwork) library(forcats) library(tidyverse) # The dataset Global Patterns is a phyloseq object available from the Phyloseq package data(GlobalPatterns)
prep_mdf
Use prep_mdf
to agglomerate and normalize the phyloseq object, and melt to a data frame. Here we specify that NA values should be removed with the remove_na
parameter, which can be adjusted according to the needs of your visualization and analysis.
mdf_prep <- prep_mdf(GlobalPatterns, remove_na = TRUE)
There is an alternative to using this function if you do not have speedyseq:
mdf_prep <- GlobalPatterns %>% tax_glom("Genus") %>% phyloseq::transform_sample_counts(function(x) { x/sum(x) }) %>% psmelt() %>% filter(Abundance > 0)
Both prep_mdf
and the above option will produce the same results.
However, prep_mdf
uses the speedyseq package to increase the speed of tax_glom
and psmelt
, which may be preferable when working with large datasets.
create_color_dfs
Use create_color_dfs
to generate a color object for the specified data. Then extract the objects used to plot. mdf
represents the object to plot; cdf
represents the coloring.
color_objs_GP <- create_color_dfs(mdf_prep, selected_groups = c("Verrucomicrobia", "Proteobacteria", "Actinobacteria", "Bacteroidetes", "Firmicutes") , cvd = TRUE) # Extract mdf_GP <- color_objs_GP$mdf cdf_GP <- color_objs_GP$cdf
Use mdf_GP
as the object to plot and use cdf_GP
to assign the correct color assignments.
plot <- plot_microshades(mdf_GP, cdf_GP) # add customizations with ggplot plot_1 <- plot + scale_y_continuous(labels = scales::percent, expand = expansion(0)) + theme(legend.key.size = unit(0.2, "cm"), text=element_text(size=10)) + theme(axis.text.x = element_text(size= 6)) plot_1
The plot_microshades
returns a ggplot object, which allows for additional specifications for the plot to be declared. For example, this allows users to facet samples and other descriptive elements.
plot_2 <- plot + scale_y_continuous(labels = scales::percent, expand = expansion(0)) + theme(legend.key.size = unit(0.2, "cm"), text=element_text(size=10)) + theme(axis.text.x = element_text(size= 6)) + facet_wrap(~SampleType, scales = "free_x", nrow = 2) + theme (strip.text.x = element_text(size = 6)) plot_2
To ensure that all elements of the custom legend are visible, adjust legend_key_size
and legend_text_size
. If using R Markdown, it may be helpful to adjust fig.height
and fig.width
to receive a plot with the appropriate dimensions.
Use plot_grid
from the cowplot package to plot the custom legend with the visualization.
To follow a detailed tutorial on how to use the custom_legend
function, see the custom legend vignette.
GP_legend <- custom_legend(mdf_GP, cdf_GP) plot_diff <- plot + scale_y_continuous(labels = scales::percent, expand = expansion(0)) + theme(legend.position = "none") + theme(axis.text.x = element_text(size= 6)) + facet_wrap(~SampleType, scales = "free_x", nrow = 2) + theme(axis.text.x = element_text(size= 6)) + theme(plot.margin = margin(6,20,6,6)) plot_grid(plot_diff, GP_legend, rel_widths = c(1, .25))
Here, we plot with extended Proteobacteria colors. Note the expansion of Proteobacteria groups in the legend.
new_groups <- extend_group(mdf_GP, cdf_GP, "Phylum", "Genus", "Proteobacteria", existing_palette = "micro_cvd_orange", new_palette = "micro_orange", n_add = 5) GP_legend_new <- custom_legend(new_groups$mdf, new_groups$cdf) plot_diff <- plot_microshades(new_groups$mdf, new_groups$cdf) + scale_y_continuous(labels = scales::percent, expand = expansion(0)) + theme(legend.position = "none") + theme(axis.text.x = element_text(size= 6)) + facet_wrap(~SampleType, scales = "free_x", nrow = 2) + theme(axis.text.x = element_text(size= 6)) + theme(plot.margin = margin(6,20,6,6)) plot_grid(plot_diff, GP_legend_new, rel_widths = c(1, .25))
Re-examine data with smaller groups by plotting subsets of the data. Here, we separate by sample type. Then, follow the prep → create → extract → plot sequence with each subset.
ps_water <- subset_samples(GlobalPatterns, SampleType %in% c("Freshwater", "Freshwater (creek)", "Ocean")) mdf_water <- prep_mdf(ps_water) color_objs_water <- create_color_dfs(mdf_water,selected_groups = c("Verrucomicrobia", "Proteobacteria", "Actinobacteria", "Bacteroidetes", "Firmicutes") , cvd = TRUE) color_objs_water <- reorder_samples_by(color_objs_water$mdf, color_objs_water$cdf) mdf_water <- color_objs_water$mdf cdf_water <- color_objs_water$cdf
water_legend <-custom_legend(mdf_water, cdf_water) water_plot <- plot_microshades(mdf_water, cdf_water) + scale_y_continuous(labels = scales::percent, expand = expansion(0)) + theme(legend.position = "none") + theme(axis.text.x = element_text(size= 8)) + facet_wrap(~SampleType, scales = "free_x") + theme (strip.text.x = element_text(size = 8)) plot_grid(water_plot, water_legend, rel_widths = c(1, .25))
Use plot_contributions
to create median and mean abundance barplots and boxplots.
freshwater_contribution <- plot_contributions(mdf_water, cdf_water, "SampleType", "Freshwater") creek_contribution <- plot_contributions(mdf_water, cdf_water, "SampleType", "Freshwater (creek)") ocean_contribution <- plot_contributions(mdf_water, cdf_water, "SampleType", "Ocean") freshwater_contribution$box + creek_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) + ocean_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) freshwater_contribution$mean + creek_contribution$mean + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) + ocean_contribution$mean + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) freshwater_contribution$median + creek_contribution$median + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) + ocean_contribution$median + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())
ps_land <- subset_samples(GlobalPatterns, SampleType %in% c("Soil", "Sediment (estuary)")) mdf_land <- prep_mdf(ps_land) color_objs_land <- create_color_dfs(mdf_land,selected_groups = c("Verrucomicrobia", "Proteobacteria", "Actinobacteria", "Bacteroidetes", "Firmicutes") , cvd = TRUE) color_objs_land <- reorder_samples_by(color_objs_land$mdf, color_objs_land$cdf) mdf_land <- color_objs_land$mdf cdf_land <- color_objs_land$cdf
land_legend <-custom_legend(mdf_land, cdf_land) land_plot <- plot_microshades(mdf_land, cdf_land) + scale_y_continuous(labels = scales::percent, expand = expansion(0)) + theme(legend.position = "none") + theme(axis.text.x = element_text(size= 8)) + facet_wrap(~SampleType, scales = "free_x") + theme (strip.text.x = element_text(size = 8)) plot_grid(land_plot, land_legend, rel_widths = c(1, .25))
sediment_contribution <- plot_contributions(mdf_land, cdf_land, "SampleType", "Sediment (estuary)") soil_contribution <- plot_contributions(mdf_land, cdf_land, "SampleType", "Soil") sediment_contribution$box + soil_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())
ps_human <- subset_samples(GlobalPatterns, SampleType %in% c("Skin", "Feces", "Tongue")) mdf_human <- prep_mdf(ps_human) color_objs_human <- create_color_dfs(mdf_human,selected_groups = c("Verrucomicrobia", "Proteobacteria", "Actinobacteria", "Bacteroidetes", "Firmicutes") , cvd = TRUE) color_objs_human <- reorder_samples_by(color_objs_human$mdf, color_objs_human$cdf) mdf_human <- color_objs_human$mdf cdf_human <- color_objs_human$cdf
human_legend <-custom_legend(mdf_human, cdf_human) human_plot <- plot_microshades(mdf_human, cdf_human) + scale_y_continuous(labels = scales::percent, expand = expansion(0)) + theme(legend.position = "none") + theme(axis.text.x = element_text(size= 8)) + facet_wrap(~SampleType, scales = "free_x") + theme (strip.text.x = element_text(size = 8)) plot_grid(human_plot, human_legend, rel_widths = c(1, .25))
feces_contribution <- plot_contributions(mdf_human, cdf_human, "SampleType", "Feces") skin_contribution <- plot_contributions(mdf_human, cdf_human, "SampleType", "Skin") tongue_contribution <- plot_contributions(mdf_human, cdf_human, "SampleType", "Tongue") feces_contribution$box + skin_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank()) + tongue_contribution$box + theme(axis.title.y=element_blank(), axis.text.y= element_blank(), axis.ticks.y=element_blank())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.