Note - in the orig, SoilSample combines Rhisosphere and Roots - is that OK?

library(tidyverse)
library(here)
library(vegan)
library(cowplot)
library(rstan)
library(phyloseq)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)

source(here("R","sample_posterior.R"))

Tijana's code to load the data

otutable = read.delim(file = here("private_data","tijana",'Seedlings_ITS2_otutab_ALL.csv'), sep = "\t") #reading the data I want to use
otutabP = otutable[, -1]
rownames(otutabP) = otutable [, 1]
t_otutab <- t(as.matrix(otutabP))

mapping = read.delim(file = here("private_data","tijana","Seedlings_ITS2_year1_mapping.csv"), sep = ",") %>%
  mutate(Replicate_no = gsub("[A-Za-z]","", Replicate), SoilSample = interaction(Soil, SampleType))
mapping2 = mapping [,-1] #it is the new mapping file with the proper row names, same as in previous case, here we added names from the mapping
rownames(mapping2) = mapping[,1]
#?order
mapping3 = sample_data(mapping2) #making corrected mapping file a phyloseq object
resampled_otu <- sample_posterior_dm(100, t_otutab, prior = "ml") 
resampled_richness <- summarise_posterior_richness(resampled_otu)
actual_richness <- mapping %>% inner_join(data.frame(Sample = colnames(otutabP), value = specnumber(otutabP, MARGIN = 2)), by = c("Sample" = "Sample"))
plot_resampled_richness <- function(resampled_richness, mapping, actual_richness) {
resampled_richness %>% as.data.frame() %>%
  rownames_to_column("observation") %>% inner_join(mapping %>% mutate(Sample = as.character(Sample)), by =c("observation" = "Sample")) %>%
  gather("sample","value", -observation, -Soil, -SampleType, -Sampling_time, -Replicate, - SoilSample, -Replicate_no) %>%
  ggplot(aes(x = SampleType, y = value)) + 
  geom_jitter(aes(color = Replicate_no), alpha = 0.1) +
  geom_boxplot(outlier.shape = NA, fill = "transparent") + 
  geom_point(aes(color = Replicate_no), data = actual_richness, size = 3) +
  facet_wrap(~Soil) +  theme(axis.text.x = element_text(angle = -90, hjust = 0),
        axis.title.x = element_blank())
}

plot_resampled_richness(resampled_richness, mapping, actual_richness)
resampled_otu_bayes <- sample_posterior_dm(100, t_otutab, prior = "bayes") 
resampled_richness_bayes <- summarise_posterior_richness(resampled_otu_bayes)

plot_resampled_richness(resampled_richness_bayes, mapping, actual_richness )
set.seed(32555337)
resampled_otu_deseq <- sample_posterior_DESeq2(100, t_otutab,  mapping, ~SoilSample)
resampled_richness_deseq <- summarise_posterior_richness(resampled_otu_deseq)

plot_resampled_richness(resampled_richness_deseq, mapping, actual_richness )
resampled_shannon <- summarise_posterior_per_sample(resampled_otu, function(x) {diversity(x, index = "shannon", MARGIN = 1)})
actual_shannon <- mapping %>% inner_join(data.frame(Sample = colnames(otutabP), value = diversity(otutabP, index = "shannon", MARGIN = 2)), by = c("Sample" = "Sample"))



plot_resampled_richness(resampled_shannon, mapping, actual_shannon)
resampled_shannon_deseq <- summarise_posterior_per_sample(resampled_otu_deseq, function(x) {diversity(x, index = "shannon", MARGIN = 1)})
plot_resampled_richness(resampled_shannon_deseq, mapping, actual_shannon)
xx <- apply(otutabP, MARGIN = 2, FUN = function(x) { array(c(1,2,3,4,5,6), dim = c(2,3))}) %>% array(c(2,3,60))

Tijana's code to compute Shannon diversity and species richness

library(phyloseq)

t_otus = as.data.frame(t(otutabP))
min_depth = min(colSums(otutabP))
min_depth
t_otus_rarefied = as.data.frame(round(rrarefy(t_otus, min_depth)))



shannondiv = diversity(t_otus_rarefied, index = "shannon", MARGIN = 1, base = exp(1))
head(shannondiv)
shannon2 = as.data.frame(shannondiv)
head(shannon2)
shannon3 = merge(mapping3, shannon2, by = "row.names")

plot_shannon = ggplot(shannon3, aes(x = SampleType, y = shannondiv, color = Replicate_no)) +
  geom_point(stat = "identity", size = 3) + #, 
           #position = position_jitter(width = 0.3)) +
  facet_grid(.~Soil)
print(plot_shannon)

#calculating stdev and standard error and summarizing in a table
#all

summary_shannon = shannon3 %>%
  group_by(SampleType, Soil) %>%
  summarise(mean_shannon = mean(shannondiv),
            sd_shannon = sd(shannondiv),
            n_shannon = n(), 
            SE_shannon = sd(shannondiv)/sqrt(n()))
#Taxa richness = number of OTUs 
#?estimateR
#?diversity
#?specnumber

richness = specnumber(t_otus_rarefied, MARGIN = 1)
head(richness)
richness2 =  as.data.frame(richness)
richness3 = merge(mapping3, richness2, by = "row.names")
richness_summary = richness3 %>%
  group_by(SampleType, Soil) %>%
  summarise(mean_richness= mean(richness),
            sd_richness = sd(richness),
            n_richness = n(), 
            SE_richness = sd(richness)/sqrt(n()))

richness_summary
richness_summary$Tukey = c("ab", "ab", "ab", "a", "a", "a", "a", "a", "a", "b", "b", "b")
richness_summary$SampleType = factor(richness_summary$SampleType, levels = c("Control", "Bulk", "Rhizosphere", "Roots"))

plot_richness = ggplot(richness_summary, aes(SampleType, mean_richness), fill = "white") + 
  geom_col(color = "black", fill = "white") +
  geom_errorbar(aes(ymin = mean_richness - sd_richness, ymax = mean_richness + sd_richness), width = 0.2)
p2 = plot_richness + labs(y = "Taxa richness +/- sd") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = -90, hjust = 0),
        axis.title.x = element_blank()) +
  geom_text(data = richness_summary, aes(x = SampleType, y = (mean_richness + sd_richness), 
                                         label = Tukey), vjust = -0.5) +
  facet_grid(.~Soil)
plot(p2)


cas-bioinf/metagenboot documentation built on Feb. 25, 2021, 3:58 p.m.