knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)

Longitudinal data presents challenges for visualization.

Microbiota plasticity

Calculate plasticity

See Grembi, J.A., Nguyen, L.H., Haggerty, T.D. et al. Gut microbiota plasticity is correlated with sustained weight loss on a low-carb or low-fat dietary intervention. Sci Rep 10, 1405 (2020).

Data from microbiome package is used here for example.

library(microbiome)
library(microbiomeutilities)
library(dplyr)
library(ggpubr)
data(peerj32)
pseq <- peerj32$phyloseq
pseq.rel <- microbiome::transform(pseq, "compositional")
pl <- plasticity(pseq.rel, dist.method = "bray", participant.col="subject")
head(pl)

Alternatively, using correlation methods for plasticity, one can check for similarity between technical replicates (2X sequenced same sample) for quality check.

Plot plasticity

ggplot(pl, aes(group_2,bray)) + 
  geom_boxplot(aes(fill=group_2), 
               alpha=0.5, 
               na.rm = TRUE, 
               width=0.5) +
  geom_jitter(aes(color=group_2), 
              alpha=0.5, 
              size=3, 
              na.rm = TRUE) +
  scale_fill_manual(values = c("#457b9d", "#e63946"))+
  scale_color_manual(values = c("#457b9d", "#e63946"))+
  stat_compare_means() + 
  theme_biome_utils()

Area plot

Area plots can be used to visualize changes in abundances in individual participants, or bioreactors sampled over time.
Here, we use a randomly chosen small subset of data from HMP2data.

library(microbiomeutilities)
library(RColorBrewer)
data("hmp2")
ps <- hmp2
ps.rel <- microbiome::transform(ps, "compositional") 
# chose specific participants to plot data.
pts <- c("Participant_1","Participant_2",
         "Participant_8","Participant_5")
#pts <- "Participant_1"
ps.rel <- subset_samples(ps.rel, subject_id %in% pts)
p <- plot_area(ps.rel, xvar="visit_number", 
               level = "Family",
               facet.by = "subject_id",
               abund.thres = 0.1,
               prev.thres=0.1,
               fill.colors=brewer.pal(12,"Paired"),
               ncol=2,
               nrow=2)
p + ylab("Relative abundance") + 
  scale_y_continuous(labels = scales::percent)

Paired abundances

We use a subset of HMP2 data

library(microbiome)
library(microbiomeutilities)
#library(gghalves)
library(tidyr)

data("hmp2")
ps <- hmp2 
# pick visit 1 and 2
ps.sub <- subset_samples(ps, visit_number %in% c(1,2))
ps.sub <- prune_taxa(taxa_sums(ps.sub)>0, ps.sub)

ps.rel <- microbiome::transform(ps.sub, "compositional")
ps.rel.f <- format_to_besthit(ps.rel)

#top_taxa(ps.rel.f, 5)
# Check how many have both time points
table(meta(ps.rel.f)$visit_number)
#table(meta(ps.rel.f)$visit_number, meta(ps.rel.f)$subject_id)
# there are two Participant_3 and Participant_11 with no time point2 sample 

select.taxa <- c("4430843:g__Prevotella", "580629:g__Bacteroides")

group.colors = c("brown3", "steelblue", "grey70")


p <- plot_paired_abundances (ps.rel.f,
                             select.taxa=select.taxa,
                             group="visit_number",
                             group.colors=group.colors,
                             dot.opacity = 0.25,
                             dot.size= 2,
                             add.violin = TRUE,
                             line = "subject_id",
                             line.down = "#7209b7",
                             line.stable = "red",
                             line.up = "#14213d", 
                             line.na.value = "red",
                             line.guide = "none",
                             line.opacity = 0.25,
                             line.size = 1)
print(p + xlab("Time point") + ylab("Relative abundance"))

The lines are colored according to their change in abundance from time 1 to time 2.

Spaghetti plots

One participant many taxa
Reference for visualization and original code Data to Viz.com

The ordering of panel on top of each other is better for comparisons. However, practical consideration can be made on number of columns and rows to distribute the panels.

library(microbiomeutilities)
data("hmp2")
pseq <- hmp2 # Ren
pseq.rel <- microbiome::transform(pseq, "compositional")
pseq.relF <- format_to_besthit(pseq.rel)

# Choose one participant
phdf.s <- subset_samples(pseq.relF, subject_id==
                           "Participant_1")
# Choose top 12 taxa to visualize
ntax <- top_taxa(phdf.s, 4)
phdf.s <- prune_taxa(ntax, phdf.s)

plot_spaghetti(phdf.s, plot.var= "by_taxa",
               select.taxa=ntax,
               xvar="visit_number",
               line.bg.color="#8d99ae",
               focus.color="brown3",
               focus.line.size = 1,
               ncol=1,
               nrow=4,
               line.size=0.2)

One taxa many participants

pseq.relF <- format_to_besthit(pseq.rel)
ntax2 <- core_members(pseq.relF, 0.001, 0.5)
# chose first for example
ntax2 # only
# check how many participants are there in "subject_id" 
length(unique(meta(pseq.relF)$subject_id))

# There are 13 participants. Choose ncol and nrow accordingly 

plot_spaghetti(pseq.relF, plot.var= "by_sample",
               select.taxa=ntax2,
               group= "subject_id",
               xvar="visit_number",
               line.bg.color="#8d99ae",
               focus.color="brown3",
               focus.line.size = 1,
               ncol=4,
               nrow=6,
               line.size=0.2)

This package is part of the microbiomeverse tools.
See also microbiome R/BioC package
Contributions are welcome:

Issue Tracker
Pull requests
Star us on the Github page

sessionInfo()


microsud/microbiomeutilities documentation built on Nov. 29, 2022, 12:18 a.m.