library(Cairo) knitr::opts_chunk$set(cache = FALSE, fig.width = 9, dpi=300, dev = "png", dev.args = list(type = "cairo-png"), message = FALSE, warning = FALSE)
library(mia) library(miaTime) library(dplyr) library(lubridate) library(knitr) library(TreeSummarizedExperiment)
Dense samples of the minimal gut microbiome. In the initial hours, MDb-MM was grown under batch condition and 24 h onwards, continuous feeding of media with pulse feeding cycles. This information is stored in the colData
.
library(miaTime) data(minimalgut) tse <- minimalgut # quick check of number of samples kable(table(colData(tse)$StudyIdentifier,colData(tse)$condition_1))
Visualize samples available for each of the bioreactors. This allows to identify if there are any missing samples for specific times.
library(ggplot2) library(tidySummarizedExperiment) tse |> ggplot(aes(as.factor(Time.hr), StudyIdentifier)) + geom_tile(aes(fill=condition_1), color="white") + scale_fill_manual("Condition Sampled", values = c("#ff006e", "#e07a5f", "#457b9d")) + theme_minimal() + theme(axis.text.x = element_text(size=8, angle = 90), legend.position = "top") + labs(x="Time (h)", y="")
The minimalgut
dataset, mucus-diet based minimal microbiome
(MDbMM-16), consists of 16 species assembled in three bioreactors. We
can investigate the succession of mdbMM16 from the start of experiment
here hour zero until the end of the experiment.
## Divergence from baseline i.e from hour zero. tse <- mia::relAbundanceCounts(minimalgut) # get relative abundance tse <- getBaselineDivergence(tse, group = "StudyIdentifier", time_field = "Time.hr", name_divergence = "divergence_from_baseline", name_timedifference = "time_from_baseline", assay.type="relabundance", FUN = vegan::vegdist, method="bray")
Visualize the divergence
# First define nice colors for bioreactors bioreac_cols <- c(`Bioreactor A`= "#b2182b", `Bioreactor B`= "#2166ac", `Bioreactor C` = "#35978f") tse |> ggplot(aes(x=Time.hr, y=divergence_from_baseline))+ geom_point(aes(color=StudyIdentifier), size=2, alpha=0.5) + geom_line(aes(color=StudyIdentifier)) + theme_minimal() + scale_color_manual(values = bioreac_cols) + labs(x="Time (h)", y="Divergence \nfrom baseline") + # highlight specific timepoints geom_vline(xintercept = 152, lty=2, color="#991720") + geom_vline(xintercept = 248, lty=2, color= "#0963bd")+ annotate("text",x=c(152, 248),y=c(0.8, 0.8), label=c("Addition of\nB.hydrogenotrophica","Acetate Discontinued"), hjust=c(1.05,-0.05))
Now visualize abundance of Blautia hydrogenotrophica using the miaViz::plotSeries
function.
library(miaViz) plotSeries(mia::relAbundanceCounts(minimalgut), x = "Time.hr", y = "Blautia_hydrogenotrophica", colour_by = "Species", assay.type = "relabundance")+ geom_vline(xintercept = 152, lty=2, color="#991720") + geom_vline(xintercept = 248, lty=2, color= "#0963bd")+ annotate("text",x=c(152, 248),y=c(0.2, 0.15), label=c("Addition of\nB.hydrogenotrophica","Acetate Discontinued"), hjust=c(1.05,-0.05))+ labs(x="Time (h)", y="B.hydrogenotrophica\nRelative Abundance") + theme(legend.position = "none")
Sample dissimilarity between consecutive time steps(step size n >= 1) within
a group(subject, age, reaction chamber, etc.) can be calculated by getStepwiseDivergence
. If we normalize this by the time interval, this gives an approximate slope for the change.
# Load libraries library(miaTime) library(dplyr) # Sort samples by time (necessary for getStepwiseDivergence) tse <- tse[, order(colData(tse)$Time_hr_num)] # Divergence between consecutive time points tse <- getStepwiseDivergence(tse, group = "StudyIdentifier", time_interval = 1, time_field = "Time_hr_num", name_divergence = "divergence_from_previous_step", name_timedifference = "time_from_previous_step", assay.type ="relabundance", FUN = vegan::vegdist, method="bray") # We have now new fields added in the colData: # time_from_previous_step, divergence_from_previous_step # print(colData(tse)) # Visualize the slope of dissimilarity between consecutive time points as a function of time (from baseline) library(ggplot2) theme_set(theme_bw(10)) p <- tse |> ggplot(aes(x=time_from_baseline, y=divergence_from_previous_step/time_from_previous_step, color=StudyIdentifier)) + geom_point() + geom_line() + labs(x="Time (hours)", y="Slope of dissimilarity (Bray-Curtis)") + geom_hline(aes(yintercept=0), linetype=2) print(p)
This shows how to calculate and plot moving average for the variable of interest (here: slope).
# Add slope explicitly in colData colData(tse)$slope <- colData(tse)$divergence_from_previous_step / colData(tse)$time_from_previous_step # Split by group and perform operation tselist <- mia::splitOn(tse, "StudyIdentifier") # colData(tse)$divergence_from_previous_step addmean <- function (x, k, field, field_name) { # Calculate rolling mean m <- zoo::rollmean(x[[field]], k = k) # Initialize empty field colData(x)[[field_name]] <- rep(NA, ncol(x)) # Fill in the rolling mean (length does not match with original data in rolling mean) colData(x)[1:length(m), field_name] <- m # Return the object with a new field added x } # Calculate sliding average for the field "divergence_from_previous_step" # and store the result in a new field with the name "sliding_average" tselist2 <- lapply(tselist, function (x) {addmean(x, k=3, field = "slope", field_name = "sliding_average")}) # Merge back tse <- mia::mergeSEs(tselist2) # Visualize theme_set(theme_bw(10)) p <- tse |> ggplot(aes(x = time_from_baseline, y = sliding_average, color=StudyIdentifier)) + geom_point() + geom_line() + labs(x="Time (hours)", y="Mean slope of dissimilarity (Bray-Curtis)") + geom_hline(aes(yintercept=0), linetype=2) print(p)
This shows how to visualize error bars on top of time series. In this example we create artificial replicates and variation for a brief example.
## Source: https://www.geeksforgeeks.org/adding-error-bars-to-a-line-graph-with-ggplot2-in-r/ ## Gives count, mean, standard deviation, standard error of the mean, ## and confidence interval (default 95%). ## data: a data frame. ## measurevar: the name of a column that contains the variable to be summariezed ## groupvars: a vector containing names of columns that contain grouping variables ## na.rm: a boolean that indicates whether to ignore NA's ## conf.interval: the percent range of the confidence interval (default is 95%) summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) { library(plyr) # New version of length which can handle NA's: if na.rm==T, don't count them length2 <- function (x, na.rm=FALSE) { if (na.rm) sum(!is.na(x)) else length(x) } # This does the summary. For each group's data frame, return a vector with # N, mean, and sd datac <- ddply(data, groupvars, .drop=.drop, .fun = function(xx, col) { c(N = length2(xx[[col]], na.rm=na.rm), mean = mean (xx[[col]], na.rm=na.rm), sd = sd (xx[[col]], na.rm=na.rm) ) }, measurevar ) # Rename the "mean" column datac <- rename(datac, c("mean" = measurevar)) datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean # Confidence interval multiplier for standard error # Calculate t-statistic for confidence interval: # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1 ciMult <- qt(conf.interval/2 + .5, datac$N-1) datac$ci <- datac$se * ciMult return(datac) } # Create artificial example on replicates because we don't have any in this demo data set.seed(3422) df <- as.data.frame(colData(tse)) sdlevel <- 0.1*mean(df$sliding_average, na.rm=TRUE) df1 <- df df2 <- df; df2$sliding_average <- rnorm(mean=df1$sliding_average, sd = sdlevel, n=nrow(df1)) df3 <- df; df3$sliding_average <- rnorm(mean=df1$sliding_average, sd = sdlevel, n=nrow(df1)) df <- bind_rows(list(df1, df2, df3)) # Calculate deviations df <- summarySE(df, measurevar="sliding_average", groupvars=c("StudyIdentifier", "time_from_baseline")) # Visualize theme_set(theme_bw(10)) p <- ggplot(df, aes(x = time_from_baseline, y = sliding_average, color=StudyIdentifier)) + geom_point() + geom_line() + geom_errorbar(aes(ymin=sliding_average-sd, ymax=sliding_average+sd), width=.2, position=position_dodge(0.05)) + labs(x="Time (hours)", y="Mean slope of dissimilarity (Bray-Curtis)") + geom_hline(aes(yintercept=0), linetype=2) print(p)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.