Introduction

library("knitr")
library("plyr")
library("dplyr")
library("phyloseq")
library("cluster")
library("treelapse")

Data Preparation

## code from http://statweb.stanford.edu/~susan/papers/Pregnancy/PNAS_Vaginal_Analysis.Rmd
pregnancy_path <- "http://statweb.stanford.edu/~susan/papers/Pregnancy/PregnancyClosed15.Rdata"
tmp <- tempfile()
download.file(pregnancy_path, tmp)
load(tmp)

site <- "Vaginal_Swab"
ps <- PSPreg[[site]] %>%
  filter_taxa(function(x) sum(x > 1) > 0.01 * length(x), TRUE) %>%
  transform_sample_counts(function(OTU) OTU/sum(OTU))

braydist <- phyloseq::distance(ps, method="bray")
ord = ordinate(ps, method = "MDS", distance = braydist)

NDIM <- 7
K <- 5
x <- ord$vectors[,1:NDIM]
clust <- as.factor(pam(x, k=K, cluster.only=T))
# SWAPPING THE ASSIGNMENT OF 2 AND 3 TO MATCH RAVEL CST ENUMERATION
clust[clust==2] <- NA
clust[clust==3] <- 2
clust[is.na(clust)] <- 3

# setup dates
sample_info <- sample_data(ps) %>%
  data.frame()
sample_info$CST <- clust
taxa <- taxa_edgelist(tax_table(ps))
ps <- PSPreg[[site]] %>%
  filter_taxa(function(x) sum(x > 1) > 0.01 * length(x), TRUE)

values <- tree_fun_multi(
  taxa,
  asinh(as(otu_table(ps), "matrix")),
  tree_sum
)

values$time <- sample_info$D2Del[values$row]
values$group <- paste0("cst_", sample_info$CST[values$row])

Visualization

doi_sankey(
  values %>%
    group_by(group, unit) %>%
    dplyr::summarise(value = mean(value)),
  taxa,
  "Bacteria",
  900,
  300,
  display_opts = list(
    "focus_font_size" = 20,
    "font_size" = 15,
    "leaf_width" = 37,
    "leaf_height" = 75
  )
)
# for each group, look at the timeboxes
for (i in seq_len(5)) {
  timebox_tree(
    values %>%
      filter(group == paste0("cst_", i)) %>%
      group_by(unit, time) %>%
      dplyr::summarise(value = mean(value)),
    taxa
  ) %>%
    print()

  treebox(
    values %>%
      filter(group == paste0("cst_", i)) %>%
      group_by(unit, time) %>%
      dplyr::summarise(value = mean(value)),
    taxa
  ) %>%
    print()
}
values$group <- sample_info$Outcome[values$row]
levels(values$group) <- c("VeryPretern", "Preterm", "Marginal", "Term")
doi_sankey(
  values %>%
  group_by(group, unit) %>%
  dplyr::summarise(value = mean(value)),
  taxa,
  "Bacteria",
  900,
  300,
  display_opts = list(
    "focus_font_size" = 20,
    "font_size" = 15,
    "leaf_width" = 37,
    "leaf_height" = 75
  )
)

Interpretation



krisrs1128/treelapse documentation built on Jan. 3, 2020, 6:29 p.m.