knitr::opts_chunk$set(error=FALSE)
library(SarcoidMicrobiome)
library(ggbeeswarm)
library(forcats)
library(stringr)
library(ggplot2)
library(dplyr)
opts$data_fp <- params$data_fp
opts$figure_fp <- params$figure_fp
opts$table_fp <- params$table_fp
theme_set(
  theme_classic(base_size = 12) +
    theme(
      plot.title = element_text(hjust=0),
      axis.ticks = element_line(size=0.75), 
      axis.line.x = element_line(colour = 'black', size=0.6, linetype='solid'),
      axis.line.y = element_line(colour = 'black', size=0.6, linetype='solid'),
      strip.background = element_blank(),
      strip.text = element_text(face = "bold", hjust=0)
    )
)

# Green/brown colors
# top.barchart.fills <- c(
#   "Healthy lymph node"="#a6611a",
#   "Healthy BAL"="#a6611a",
#   "Healthy paraffin"="#C0A675",
#   "Healthy prewash"="#C0A675",
#   "Sarcoidosis lymph node"="#018571",
#   "Sarcoidosis BAL"="#018571",
#   "Sarcoidosis paraffin"="#86C3AE",
#   "Sarcoidosis prewash"="#86C3AE"
# )

# Red/blue colors
top.barchart.fills <- c(
  "Healthy lymph node"="#1f78b4",
  "Healthy BAL"="#1f78b4",
  "Healthy paraffin"="#a6cee3",
  "Healthy prewash"="#a6cee3",
  "Sarcoidosis lymph node"="#ff7f00",
  "Sarcoidosis BAL"="#ff7f00",
  "Sarcoidosis paraffin"="#fdbf6f",
  "Sarcoidosis prewash"="#fdbf6f"
)

# Virus contaminants
tech.contam <- c(
  "Enterobacteria phage M13", "Enterobacteria phage T7", 
  "Enterobacteria phage phiX174 sensu lato","Bacillus phage phi29", 
  "Pseudomonas phage phi6", "Shamonda virus", "Human herpesvirus 7", 
  "Human herpesvirus 6", "Cyprinid herpesvirus 3")
a.16s <- LoadData("A", "16S")
a.its <- LoadData("A", "ITS")
b.16s <- LoadData("B", "16S")
b.its <- LoadData("B", "ITS")
c.16s <- LoadData("C", "16S")
c.its <- LoadData("C", "ITS")
c.vir <- LoadData("C", "virome")
de.16s <- LoadData("DE", "16S")
de.its <- LoadData("DE", "ITS")
de.wgs <- LoadData("DE", "WGS")

# Reconcile and anonymize all the sample names (e.g. subjects) between 
# 16S/ITS/WGS sequencings of a set

# BAL:
.c.vir.subjectid <- as.character(c.vir$s$OriginatingSampleID)
.c.vir.subjectid <- factor(str_replace(.c.vir.subjectid, "HUP", ""))
.bal.unified <- fct_unify(list(
  droplevels(c.16s$s$DerivingOriginalSampleID),
  droplevels(c.its$s$DerivingSampleID),
  .c.vir.subjectid))
c.16s$s$SubjectID <- fct_anon_deterministic(.bal.unified[[1]], prefix="C")
c.16s$agg <- left_join(c.16s$agg, c.16s$s)
c.its$s$SubjectID <- fct_anon_deterministic(.bal.unified[[2]], prefix="C")
c.its$agg <- left_join(c.its$agg, c.its$s)
c.vir$s$SubjectID <- fct_anon_deterministic(.bal.unified[[3]], prefix="C")
c.vir$agg <- left_join(c.vir$agg, c.vir$s)

# LN/A:
.a.all <- fct_unify(list(
  factor(a.16s$s$OriginatingSampleID),
  factor(a.its$s$OriginatingSampleID)
))
a.16s$s$SubjectID <- fct_anon_deterministic(.a.all[[1]], prefix="A")
a.16s$agg <- left_join(a.16s$agg, a.16s$s)
a.its$s$SubjectID <- fct_anon_deterministic(.a.all[[2]], prefix="A")
a.its$agg <- left_join(a.its$agg, a.its$s)

# LN/B:
.lnb.unified <- fct_unify(list(
  factor(make.names(as.character(b.16s$s$DerivingOriginalSampleID), allow_=FALSE)),
  factor(b.its$s$SampleID)
))
b.16s$s$SubjectID <- fct_anon_deterministic(.lnb.unified[[1]], prefix="B")
b.16s$agg <- left_join(b.16s$agg, b.16s$s)
b.its$s$SubjectID <- fct_anon_deterministic(.lnb.unified[[2]], prefix="B")
b.its$agg <- left_join(b.its$agg, b.its$s)

Fig 1: Set A Barcharts

invisible(with(a.16s, {  
  agg %>%
    filter(SampleType %in% c("lymph_node", "paraffin", "extr_blank")) %>%
    mutate(SampleType = fct_recode(SampleType, "water"="extr_blank")) %>%
    mutate(SampleType = relevel(SampleType, "paraffin")) %>%
    MakeBarchartData() %>%
    PlotBarchart() %>%
    SplitBarcharts() %>%
    SaveBarcharts(
      file.path(opts$figure_fp, sprintf("Fig1A_Set%s_%s_%%s.pdf", sampleset, datatype)))
}))
invisible(with(a.its, {
  # browser()
  agg %>%
    filter(SampleType %in% c("lymph_node", "paraffin", "extr_blank")) %>%
    mutate(SampleType = fct_recode(SampleType, "water"="extr_blank")) %>%
    mutate(SampleType = relevel(SampleType, "paraffin")) %>%
    ungroup() %>%
    mutate(
      MinRankOrder = SarcoidMicrobiome:::min_rank(., "Order")) %>%
    mutate(
      MinRankOrder = forcats::fct_recode(
        MinRankOrder, 
        Other="Fungi", 
        Other="uncultured fungus (phylum)", 
        Other="Indeterminate",
        Other="uncultured Acremonium (order)")) %>%
    MakeBarchartData(taxa.level="MinRankOrder") %>%
    PlotBarchart() %>%
    SplitBarcharts() %>%
    SaveBarcharts(
      file.path(opts$figure_fp, sprintf("Fig1B_Set%s_%s_%%s.pdf", sampleset, datatype)))
}))
invisible(within(a.its, {
  # browser()

  .s <- s %>% select(SampleID, StudyGroup, SampleType, SubjectID)

  clado <- agg %>% 
    filter(!SampleType %in% c("extr_blank", "pcr_h2o")) %>%
    select(SampleID, Family, count) %>%
    filter(Family == "Cladosporiaceae") %>%
    count(SampleID, wt=count) %>%
    droplevels() %>%
    left_join(s) %>%
    select(SampleID, n, SampleType, SubjectID) %>%
    complete(SampleType, SubjectID, fill=list(n=0)) %>%
    filter(SampleType %in% c("paraffin", "lymph_node")) %>%
    select(-SampleID) %>%
    left_join(.s) %>%
    filter(!is.na(SampleID))

  .s2 <- .s %>% distinct(SubjectID, StudyGroup) %>% droplevels()

  clado.diff <- group_by(clado, SubjectID) %>%
    select(SubjectID, n, SampleType) %>%
    spread(SampleType, n) %>%
    mutate(delta=lymph_node-paraffin) %>%
    ungroup() %>%
    distinct(SubjectID, delta) %>%
    left_join(.s2) %>%
    mutate(delta.log10=sign(delta)*log10(abs(delta))) %>%
    mutate(delta.log10=ifelse(is.nan(delta.log10), 0, delta.log10)) %>%
    mutate(sign = delta.log10 > 0)

  p <- ggplot(clado.diff, aes(x=StudyGroup, y=delta.log10)) +
    geom_hline(yintercept=0, linetype=2) +
    geom_boxplot(fill=NA, size=0.5, fatten=4) +
    # scale_fill_manual(values=pcoa.fills, guide=FALSE) +
    # scale_color_manual(values=pcoa.colors, guide=FALSE) +
    # scale_color_manual("",
    #   values=c("TRUE"="black", "FALSE"="black"),
    #   labels=c("TRUE"="Greater in sample", "FALSE"="No greater in sample than env."),
    #   breaks=c("TRUE", "FALSE")) +
    scale_fill_manual("",
      values=c("TRUE"="#2166ac", "FALSE"="#d1e5f0"),
      labels=c("TRUE"="Greater in sample", "FALSE"="No greater in sample than env."),
      breaks=c("TRUE", "FALSE")) +
    geom_quasirandom(
      aes(fill=sign), shape=21, bandwidth = 0.1, width=0.3, size=2 ) +
    scale_y_continuous(
      expand=c(0,0),
      breaks=c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5), 
      limits=c(-5,5), 
      labels = function(x) scales::comma(10^(abs(x)) * sign(x))
      ) +
    scale_x_discrete(
      labels=c("healthy"="Healthy", "sarcoidosis"="Sarcoidosis")) +
    labs(
      y="Sample - Environment (reads)", 
      x=NULL, 
      title=NULL) +
    theme_classic(base_size = 8) +
    theme(
      axis.text=element_text(color="black"),
      axis.line=element_line(size=1, color="black", linetype=1),
      axis.line.x=element_line(size=1, color="black", linetype=1),
      axis.line.y=element_line(size=1, color="black", linetype=1),
      panel.grid.minor=element_blank(),
      legend.position="bottom",
      legend.direction="vertical")

  plot(p)

  ggsave(
    file.path(opts$figure_fp, "Fig1C_SetA_Cladosporiaceae.pdf"),
    plot=p,
    device=cairo_failsafe, family="Helvetica",
    height=3, width=2.1)
}))
## This generates Whittaker and Preston plots but we decided not to include them.

invisible(within(a.16s, {
  browser()
  rank_abund <- agg %>%  
    mutate(label=SarcoidMicrobiome:::min_rank(., end.rank="Species")) %>%
    group_by(StudyGroup, SampleType) %>%
    filter(SampleType %in% c("lymph_node", "paraffin")) %>%
    mutate(rank = row_number(desc(proportion))) %>%
    arrange(rank) %>%
    mutate(relabund = proportion/sum(proportion))

  labelling <- rank_abund %>% filter(rank <= 5)

  whit <- ggplot(rank_abund, aes(x=rank, y=relabund, color=SampleType)) +
    geom_point(shape=21) +
    facet_wrap(~ StudyGroup, scales="free") +
    scale_y_log10() +
    annotation_logticks(sides = "l") +
    ggsci::scale_color_npg(labels=c("Lymph node", "Paraffin")) +
    labs(x="Rank", y="Relative abundance", title="Whittaker plot of relative species abundance in Set A (16S)")
  plot(whit)
  ggsave(file.path(
    opts$figure_fp, 
    sprintf("FigX1_Set%s_%s_WhittakerPlot.pdf", sampleset, datatype)),
    plot=whit, height=4, width=8)


  pres <- ggplot(rank_abund, aes(x=log2(relabund), fill=SampleType)) + 
    geom_histogram(binwidth=1, position="dodge", color="black") +
    facet_wrap(~ StudyGroup) +
    scale_x_continuous(labels = function(x) sprintf("%.1e", 2^x)) +
    scale_y_continuous(expand=c(0,0)) +
    ggsci::scale_fill_npg(name="Sample type", labels=c("Lymph node", "Paraffin")) +
    labs(x="Relative abundance (log2 scaled)", y="OTUs", title="Preston plot of relative species abundance in Set A (16S)")
  plot(pres)
  ggsave(file.path(
    opts$figure_fp, 
    sprintf("FigX1_Set%s_%s_PrestonPlot.pdf", sampleset, datatype)),
    plot=pres, height=4, width=8)
}))
invisible(within(c.vir, {
  browser()
  heatmap.data <- agg %>%
    filter(SampleType %in% c("BAL", "prewash"),
           !Species %in% tech.contam) %>%
    SarcoidMicrobiome:::MakeHeatmapData(s, min.samples = 1, min.rank.1 = "Species") %>%
    mutate(proportion = ifelse(is.na(proportion), 0, proportion)) %>%
    group_by(SampleType, StudyGroup, MinRank1) %>%
    summarize(proportion=median(proportion, na.rm=TRUE)) %>%
    mutate(rank = row_number(desc(proportion))) 
  top.taxa <- (heatmap.data %>% filter(rank <= 10))$MinRank1
  heatmap.data <- heatmap.data %>% filter(MinRank1 %in% top.taxa) %>%
    ungroup %>%
    mutate(
      StudyGroup = fct_recode(
        StudyGroup, "Healthy"="healthy", "Sarcoidosis"="sarcoidosis"),
      SampleType = fct_recode(
        SampleType, "Lymph node"="lymph_node", "Paraffin"="paraffin"
      ))

  .mat <- reshape2::dcast(
    heatmap.data, MinRank1 ~ StudyGroup + SampleType, value.var="proportion", fill=0
  )
  rownames(.mat) <- .mat$MinRank1
  .mat$MinRank1 <- NULL
  minrank_order <- (dist(.mat) %>% hclust(method="centroid"))$order

  heatmap.data <- heatmap.data %>% 
    mutate(MinRank1 = factor(MinRank1, levels=rownames(.mat)[minrank_order]))

  heatmap <- SarcoidMicrobiome:::PlotHeatmap(
    heatmap.data, use.reads=FALSE, x.axis = "SampleType", threshold = .28) +
    facet_grid(. ~ StudyGroup, space="free", scales="free") +
    scale_x_discrete(expand=c(0,0)) +
    # theme_classic(base_size = 12) +
    theme(
      text = element_text(color="black"),
      axis.text.x = element_text(angle=0, hjust=0.5, vjust=0),
      axis.title = element_blank(),
      strip.text.x = element_text(angle=0),
      strip.background = element_rect(fill=NA, color="grey20")
    ) +
    ggsci::scale_fill_material("blue-grey", name="Median %", na.value="white", labels=scales::percent_format())


  plot(heatmap)

  # ggsave(file.path(
  #   opts$figure_fp, 
  #   sprintf("FigX2_Set%s_%s_SummaryHeatmap.pdf", sampleset, datatype)),
  #   plot=heatmap, height=4, width=5.5)
}))
invisible(within(a.16s, {
  p <- agg %>%
    ungroup %>%
    filter(
      SampleType %in% c("lymph_node", "paraffin"), 
      StudyGroup %in% c("healthy", "sarcoidosis")) %>%
    MakeTopBarchartData(s, top=6, min.rank="Species") %>%
    PlotTopBarcharts() +
    scale_fill_manual("", values=top.barchart.fills) +
    scale_color_manual("", values=top.barchart.fills) +
    scale_y_continuous(expand=c(0,0), labels=scales::percent) +
    labs(
      title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype))
  plot(p)
  ggsave(file.path(
    opts$figure_fp,
    sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)),
    plot=p, height=4, width=7)
}))
invisible(within(a.its, {
  # browser()
  p <- agg %>%
    ungroup %>%
    filter(
      SampleType %in% c("lymph_node", "paraffin"), 
      StudyGroup %in% c("healthy", "sarcoidosis")) %>%
    MakeTopBarchartData(s, top=6, min.rank="Species") %>%
    PlotTopBarcharts() +
    scale_fill_manual("", values=top.barchart.fills) +
    scale_color_manual("", values=top.barchart.fills) +
    scale_y_continuous(expand=c(0,0), labels=scales::percent) +
    labs(
      title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype))
  plot(p)
  ggsave(file.path(
    opts$figure_fp,
    sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)),
    plot=p, height=4, width=7)
}))

Fig 2: Set B Barcharts

invisible(within(b.16s, {
  agg %>%
    filter(SampleType %in% c("lymph_node", "water")) %>%
    mutate(StudyGroup = fct_recode(StudyGroup, "extr_ctrl"="ext_ctrl")) %>%
    # mutate(SubjectID = ifelse(SampleType == "water", "B00", as.character(SubjectID))) %>%
    MakeBarchartData() %>%
    PlotBarchart() %>%
    SplitBarcharts() %>%
    SaveBarcharts(
      file.path(opts$figure_fp, sprintf("Fig2A_Set%s_%s_%%s.pdf", sampleset, datatype)),
      height=opts$barchart.height/1.5)
}))
invisible(within(b.its, {
  agg %>%
    group_by(SubjectID) %>%
    filter(sum(count) > 0) %>%
    filter(SampleType %in% c("lymph_node", "extr_blank")) %>%
    mutate(SampleType = fct_recode(SampleType, "water"="extr_blank")) %>%
    ungroup() %>%
    mutate(
      MinRankOrder = SarcoidMicrobiome:::min_rank(., "Order")) %>%
    mutate(
      MinRankOrder = forcats::fct_recode(
        MinRankOrder, 
        Other="Fungi", 
        Other="uncultured fungus (phylum)", 
        Other="Indeterminate")) %>%
    MakeBarchartData(taxa.level = "MinRankOrder") %>%
    PlotBarchart() %>%
    SplitBarcharts() %>%
    SaveBarcharts(
      file.path(opts$figure_fp, sprintf("Fig2B_Set%s_%s_%%s.pdf", sampleset, datatype)),
      height=opts$barchart.height/1.5)
}))
invisible(within(b.16s, {
  # browser()
  p <- agg %>%
    ungroup %>%
    filter(
      SampleType %in% c("lymph_node", "paraffin"), 
      StudyGroup %in% c("healthy", "sarcoidosis")) %>%
    MakeTopBarchartData(s, top=6, min.rank="Species") %>%
    PlotTopBarcharts() +
    scale_fill_manual("", values=top.barchart.fills) +
    scale_color_manual("", values=top.barchart.fills) +
    scale_y_continuous(expand=c(0,0), labels=scales::percent) +
    labs(
      title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype))
  plot(p)
  ggsave(file.path(
    opts$figure_fp,
    sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)),
    plot=p, height=4, width=7)
}))
invisible(within(b.its, {
  # browser()
  p <- agg %>%
    ungroup %>%
    filter(
      SampleType %in% c("lymph_node", "paraffin"), 
      StudyGroup %in% c("healthy", "sarcoidosis")) %>%
    MakeTopBarchartData(s, top=6, min.rank="Species") %>%
    PlotTopBarcharts() +
    scale_fill_manual("", values=top.barchart.fills) +
    scale_color_manual("", values=top.barchart.fills) +
    scale_y_continuous(expand=c(0,0), labels=scales::percent) +
    labs(
      y="Median abundance", 
      title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype))
  plot(p)
  ggsave(file.path(
    opts$figure_fp,
    sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)),
    plot=p, height=4, width=7)
}))

Figure 3: Set C Barcharts

invisible(within(c.16s, {
   agg %>%
    filter(SampleType %in% c("BAL", "prewash")) %>%
    mutate(SampleType = relevel(SampleType, "prewash")) %>%
    droplevels() %>%
    MakeBarchartData() %>%
    group_by(SubjectID, StudyGroup) %>% 
    complete(SampleType, fill = list(count=0)) %>%
    PlotBarchart(fill.values = bacterial.colors, x.labs = opts$bal.labels) %>%
    SplitBarcharts(groups = c("sarcoidosis", "healthy")) %>%
    SaveBarcharts(
      file.path(opts$figure_fp, sprintf("Fig3A_Set%s_%s_%%s.pdf", sampleset, datatype)))
}))
invisible(within(c.its, {
   agg %>%
    filter(SampleType %in% c("BAL", "prewash")) %>%
    mutate(SampleType = relevel(SampleType, "prewash")) %>%
    droplevels() %>%
    MakeBarchartData() %>%
    group_by(SubjectID, StudyGroup) %>% 
    complete(SampleType, fill = list(count=0)) %>%
    group_by(SubjectID) %>%
    filter(sum(count) > 0) %>%
    PlotBarchart(fill.values = fungal.colors, x.labs = opts$bal.labels) %>%
    SplitBarcharts(groups = c("sarcoidosis", "healthy")) %>%
    SaveBarcharts(
      file.path(opts$figure_fp, sprintf("Fig3B_Set%s_%s_%%s.pdf", sampleset, datatype)))
}))
invisible(within(c.vir, {
  tech.contam <- c(
    "Enterobacteria phage M13", "Enterobacteria phage T7", 
    "Enterobacteria phage phiX174 sensu lato","Bacillus phage phi29", 
    "Pseudomonas phage phi6", "Shamonda virus", "Human herpesvirus 7", 
    "Human herpesvirus 6", "Cyprinid herpesvirus 3")
   agg %>%
     filter(LibraryType == "Genomiphi") %>%
     filter(SampleType %in% c("BAL", "prewash")) %>%
     filter(!Species %in% tech.contam) %>% 
     mutate(SampleType = relevel(SampleType, "prewash")) %>%
     droplevels() %>%
     MakeBarchartData(taxa.level = "Family") %>%
     group_by(SubjectID, StudyGroup) %>% 
     complete(SampleType, fill = list(count=0)) %>%
     group_by(SubjectID) %>%
     filter(sum(count) > 0) %>%
     PlotBarchart(fill.values = NULL, x.labs = opts$bal.labels) %>%
     SplitBarcharts(groups = c("sarcoidosis", "healthy")) %>%
     SaveBarcharts(
       file.path(opts$figure_fp, sprintf("Fig3C_Set%s_%s_%%s.pdf", sampleset, datatype)))
}))
invisible(within(c.16s, {
  # browser()
  p <- agg %>%
    ungroup %>%
    filter(
      SampleType %in% c("BAL", "prewash"), 
      StudyGroup %in% c("healthy", "sarcoidosis")) %>%
    MakeTopBarchartData(s, top=6, min.rank="Species") %>%
    PlotTopBarcharts() +
    scale_fill_manual("", values=top.barchart.fills) +
    scale_color_manual("", values=top.barchart.fills) +
    scale_y_continuous(expand=c(0,0), labels=scales::percent) +
    labs(
      y="Median abundance", 
      title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype))
  plot(p)
  ggsave(file.path(
    opts$figure_fp,
    sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)),
    plot=p, height=4, width=7)
}))
invisible(within(c.its, {
  # browser()
  p <- agg %>%
    ungroup %>%
    filter(
      SampleType %in% c("BAL", "prewash"), 
      StudyGroup %in% c("healthy", "sarcoidosis")) %>%
    MakeTopBarchartData(s, top=6, min.rank="Species") %>%
    PlotTopBarcharts() +
    scale_fill_manual("", values=top.barchart.fills) +
    scale_color_manual("", values=top.barchart.fills) +
    scale_y_continuous(expand=c(0,0), labels=scales::percent) +
    labs(
      title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype))
  plot(p)
  ggsave(file.path(
    opts$figure_fp,
    sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)),
    plot=p, height=4, width=7)
}))
invisible(within(c.vir, {
  # browser()
  tech.contam <- c(
    "Enterobacteria phage M13", "Enterobacteria phage T7", 
    "Enterobacteria phage phiX174 sensu lato", "Bacillus phage phi29", 
    "Pseudomonas phage phi6", "Shamonda virus", "Human herpesvirus 7", 
    "Human herpesvirus 6", "Cyprinid herpesvirus 3")
  # browser()
  p <- agg %>%
    ungroup %>%
    filter(
      SampleType %in% c("BAL", "prewash"), 
      StudyGroup %in% c("healthy", "sarcoidosis"),
      !Species %in% tech.contam,
      Genus != "Phi29virus",
      LibraryType == "Genomiphi") %>%
    MakeTopBarchartData(s, top=6, min.rank="Genus") %>%
    PlotTopBarcharts() +
    scale_fill_manual("", values=top.barchart.fills) +
    scale_color_manual("", values=top.barchart.fills) +
    scale_y_continuous(expand=c(0,0), labels=scales::percent) +
    labs(
      title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype))
  plot(p)
  ggsave(file.path(
    opts$figure_fp,
    sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)),
    plot=p, height=4, width=7)
}))

Figure 4: Sets D/E Barcharts

invisible(within(de.16s, {
  .s <- distinct(s, Sample, .keep_all = TRUE) %>% select(-SampleID)
  .agg <- agg %>% group_by(Sample, otu) %>%
    summarize(count=sum(count)) %>%
    group_by(Sample) %>%
    mutate(proportion=count/sum(count)) %>%
    left_join(.s) %>%
    left_join(md) %>%
    rename(SampleID=Sample) %>%
    mutate(SubjectID=SampleID) %>% droplevels()
  tmp <- .agg %>%
    MakeBarchartData() %>%
    PlotBarchart(
      x.col="SampleID", 
      x.labs=c("B"="BL", "Sal"="SL"),
      theme=opts$barchart.theme + theme(axis.text.y=element_text(family="mono"))) %>%
    SplitBarcharts(groups=c("Sarcoidosis", "Control")) %>%
    SaveBarcharts(
       file.path(opts$figure_fp, sprintf("Fig4A_Set%s_%s_%%s.pdf", sampleset, datatype)),
       height=opts$barchart.height/1.5)
}))
invisible(within(de.its, {
  agg %>%
    mutate(SampleType = fct_recode(SampleType, "water"="extr_blank")) %>%
    mutate(SubjectID=SampleID) %>%
    mutate(
      MinRankOrder = SarcoidMicrobiome:::min_rank(., "Order")) %>%
    mutate(
      MinRankOrder = forcats::fct_recode(
        MinRankOrder, 
        Other="Fungi", 
        Other="uncultured fungus (phylum)", 
        Other="Indeterminate",
        Other="Coniochaetales")) %>%
    MakeBarchartData(taxa.level="MinRankOrder") %>%
    PlotBarchart(
      x.col="SubjectID",
      x.labs=c("Blank"="BL", "Saline"="SL", "Kveim"="KV", "Spleen1"="S1", "Spleen2"="S2", "Spleen3"="S3"),
      theme=opts$barchart.theme + theme(axis.text.y=element_text(family="mono"))) %>%
    SplitBarcharts(groups=c("Sarcoidosis", "Control")) %>%
    SaveBarcharts(
      file.path(opts$figure_fp, sprintf("Fig4B_Set%s_%s_%%s.pdf", sampleset, datatype)),
      height=opts$barchart.height/1.5)
  # browser()
}))
invisible(within(de.wgs, {
  agg %>% 
    filter(Order != "Primates") %>%
    mutate(SubjectID=SampleID) %>%
    MakeBarchartData() %>%
    PlotBarchart(
      x.col="SampleID", 
      x.labs=c("B"="BL", "Sal"="SL"),
      theme=opts$barchart.theme + theme(axis.text.y=element_text(family="mono"))) %>%
    SplitBarcharts(groups=c("Sarcoidosis", "Control")) %>%
    SaveBarcharts(
       file.path(opts$figure_fp, sprintf("Fig4C_Set%s_%s_%%s.pdf", sampleset, datatype)),
       height=opts$barchart.height/1.5)
}))

Figure S2: Set A Community Differences

invisible(within(a.16s, {
  set.seed(1)
  .agg <-  agg %>% 
    filter(
      StudyGroup != "extr_ctrl", 
      SampleType %in% c("lymph_node")) %>%
    mutate(SampleType = fct_recode(
      SampleType, c("lymph node"="lymph_node"))) %>%
    mutate(readcount=total)
  PCoAChunk(
    .agg, s, tree, rarefy.depth=1000, 
    testing.terms=c("readcount","StudyGroup"), 
    sampleset=sampleset, dataset=datatype,
    subset="tissue", fig.num="S2A", fig.dir=opts$figure_fp)
}))
invisible(within(a.its, {
  set.seed(1)

  .agg <-  agg %>% 
    filter(
      count > 0,
      StudyGroup != "extr_ctrl", 
      SampleType %in% c("lymph_node")) %>%
    mutate(SampleType = fct_recode(SampleType, c("lymph node"="lymph_node"))) %>%
    mutate(readcount=total)

  PCoAChunk(
    .agg, s, tree, rarefy.depth=900, 
    testing.terms=c("readcount", "StudyGroup"), 
    sampleset=sampleset, dataset=datatype, subset="tissue", fig.num="S2B", fig.dir=opts$figure_fp)
}))
invisible(within(a.its, {
  set.seed(1)

  .agg <-  agg %>% 
    filter(
      count > 0,
      StudyGroup != "extr_ctrl", 
      SampleType %in% c("paraffin")) %>%
    mutate(readcount=total)

  PCoAChunk(
    .agg, s, tree, rarefy.depth=900, 
    testing.terms=c("readcount", "StudyGroup"), 
    sampleset="A", dataset="ITS", subset="paraffin", fig.num="S2D", fig.dir=opts$figure_fp)
}))
invisible(within(a.16s, {
  set.seed(1)
  .agg <-  agg %>% 
    filter(
      StudyGroup != "extr_ctrl", 
      SampleType %in% c("paraffin")) %>%
    mutate(SampleType = fct_recode(SampleType, c("lymph node"="lymph_node"))) %>%
    mutate(readcount=total)
  PCoAChunk(
    .agg, s, tree, rarefy.depth=1000, 
    testing.terms=c("readcount", "StudyGroup"),
    sampleset=sampleset, dataset=datatype, subset="paraffin", fig.num="S2C", fig.dir=opts$figure_fp)
}))

Figure S3: Set B Community Differences

invisible(within(b.16s, {
# 
  .agg <-  agg %>% 
    filter(
      StudyGroup != "extr_ctrl", 
      SampleType %in% c("lymph_node", "paraffin"),
      StudyGroup %in% c("sarcoidosis", "healthy")) %>%
    mutate(
      SampleType = fct_recode(SampleType, c("lymph node"="lymph_node"))) %>%
    mutate(readcount=total)

  PCoAChunk(
    .agg, s, tree, rarefy.depth=900, 
    testing.terms=c("readcount", "StudyGroup"), 
    sampleset=sampleset, dataset=datatype, subset="tissue", fig.num="S3A", fig.dir=opts$figure_fp)
}))
invisible(within(b.its, {
# 
  .agg <-  agg %>% 
    filter(
      StudyGroup != "extr_ctrl", 
      SampleType %in% c("lymph_node", "paraffin"),
      StudyGroup %in% c("sarcoidosis", "healthy")) %>%
    mutate(
      SampleType = fct_recode(SampleType, c("lymph node"="lymph_node"))) %>%
    mutate(readcount=total)

  PCoAChunk(
    .agg, s, tree, rarefy.depth=900, 
    testing.terms=c("readcount", "StudyGroup"), 
    sampleset=sampleset, dataset=datatype, subset="tissue", fig.num="S3B", fig.dir=opts$figure_fp)
}))

Figure S4: Set C Community Differences

invisible(within(c.16s, {
  .agg <- agg %>%
    filter(
      StudyGroup != "extr_ctrl",
      SampleType %in% c("BAL")) %>%
    mutate(readcount=total)
  PCoAChunk(
    .agg, s, tree, rarefy.depth=1000, testing.terms=c("StudyGroup"), 
    sampleset=sampleset, dataset=datatype, subset="BAL", fig.num="S4A",
    fig.dir=opts$figure_fp)
}))
invisible(within(c.16s, {
  .agg <- agg %>%
    filter(
      StudyGroup != "extr_ctrl",
      SampleType %in% c("prewash")) %>%
    mutate(readcount=total)
  PCoAChunk(
    .agg, s, tree, rarefy.depth=1000, testing.terms=c("StudyGroup"), 
    sampleset=sampleset, dataset=datatype, subset="prewash", fig.num="S4B",
    fig.dir=opts$figure_fp)
}))

Figure S5: Viral populations in prewash

invisible(within(c.vir, {
  tech.contam <- c(
    "Enterobacteria phage M13", "Enterobacteria phage T7", 
    "Enterobacteria phage phiX174 sensu lato","Bacillus phage phi29", 
    "Bacillus virus phi29",
    "Pseudomonas phage phi6", "Shamonda virus", "Human herpesvirus 7", 
    "Human herpesvirus 6", "Cyprinid herpesvirus 3")
  p <- agg %>% 
    filter(SampleType == "prewash", LibraryType == "Genomiphi") %>%
    filter(!Species %in% tech.contam) %>%
    MakeHeatmapData(s, "Species") %>%
    PlotHeatmap(use.reads = FALSE) %>%
    eclectic::make_square() +
    theme(
      strip.text.x=element_text(angle=0),
      # strip.background=element_blank(),
      axis.text.x=element_blank(),
      axis.title=element_blank(),
      axis.ticks.x=element_blank()

    )
  plot(p)

  ggsave(
    file.path(opts$figure_fp, "FigS5_SetC_virome_prewash.pdf"), p,
    width=7, height=9)
}))

Table S1: Sample Metadata

combined <- do.call(rbind, lapply(list(
  a.16s, a.its, 
  b.16s, b.its, 
  c.16s, c.its, c.vir, 
  de.16s, de.its, de.wgs), 
  StandardizeMetadata))
write.table(
  combined, file=file.path(opts$table_fp, "all_samples.tsv"), sep="\t", 
  row.names = FALSE, quote = FALSE)

SRA data

WriteSRATables <- function(ds) {
  biosample <- MakeBioSample(ds)
  sra <- MakeSRAMetadata(ds)
  write.table(
    biosample, 
    file=file.path(
      opts$table_fp,
      sprintf("biosample_%s_%s.tsv", ds$sampleset, ds$datatype)), 
    sep="\t", row.names = FALSE, quote=FALSE)

  write.table(
    sra, 
    file=file.path(
      opts$table_fp, 
      sprintf("sra_%s_%s.tsv", ds$sampleset, ds$datatype)),
    sep="\t", row.names = FALSE, quote=FALSE)
}

lapply(list(
  a.16s, a.its, 
  b.16s, b.its, 
  c.16s, c.its, c.vir, 
  de.16s, de.its, de.wgs), 
  WriteSRATables)


eclarke/sarcoid-microbiome-paper documentation built on May 29, 2019, 2:52 p.m.