Chao index redundancy analysis, variance partition

Hellinger Distance, Chord Distance - great except for ignoring undersampling Otherwise Jaccard, Sorensen, Ochiai

library(tidyverse)
library(here)
library(vegan)
library(cowplot)
library(phyloseq)
library(rstan)
library(doParallel)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
source(here("R","sample_posterior.R"))
source(here("R","plot_mds.R"))

registerDoParallel(parallel::detectCores())

Tijana's code to load the data

otutable = read.delim(file = here("private_data","tijana",'Seedlings_ITS2_otutab_ALL.csv'), sep = "\t") #reading the data I want to use
otutabP = otutable[, -1]
rownames(otutabP) = otutable [, 1]

t_otutab <- t(as.matrix(otutabP))

mapping = read.delim(file = here("private_data","tijana","Seedlings_ITS2_year1_mapping.csv"), sep = ",") %>%
  mutate(Replicate_no = gsub("[A-Za-z]","", Replicate), SoilSample = interaction(Soil, SampleType))
mapping2 = mapping [,-1] #it is the new mapping file with the proper row names, same as in previous case, here we added names from the mapping
rownames(mapping2) = mapping[,1]
#?order
mapping3 = sample_data(mapping2) #making corrected mapping file a phyloseq object
plot_mds_tijana <- function(mds, mapping, aligned_samples = NULL, show_paths = "all") {
  plot_mds(mds, mapping, aligned_samples = aligned_samples, color_aes = SampleType, shape_aes = Soil, show_paths = show_paths)
}
set.seed(13248455)
base_mds <- metaMDS(t_otutab, trymax = 50)

samples_rarefy <- sample_posterior_rarefy(20, t_otutab)
aligned_samples_rarefy <- metaMDS_per_sample(samples_rarefy) %>% purrr::map(align_single_MDS, base_mds = base_mds)
plot_mds_tijana(base_mds, aligned_samples = aligned_samples_rarefy, mapping)
samples_jackknife <- sample_posterior_jackknife_observations(t_otutab)
mds_jack <-  metaMDS_per_sample(samples_jackknife) 
aligned_samples_jackknife <- mds_jack %>% purrr::map(align_single_MDS, base_mds = base_mds, allow_unmatched_observations = TRUE)
plot_mds_tijana(base_mds, aligned_samples = aligned_samples_jackknife, mapping)
set.seed(4822465)
classical_mds_func <- function(x) {
  vegdist(x) %>% sqrt() %>% wisconsin() %>% cmdscale(add = TRUE, list. = TRUE)
}

base_classical_mds <- classical_mds_func(t_otutab)
mds_samples_classical_rarefy <- samples_rarefy %>% apply_per_sample(classical_mds_func)
aligned_samples_classical_rarefy <- mds_samples_classical_rarefy %>% purrr::map(vegan::procrustes, X = base_classical_mds)

plot_mds_tijana(base_classical_mds, aligned_samples = aligned_samples_classical_rarefy, mapping)
set.seed(134524223)
#resampled_otu <- sample_posterior_dm(10, t_otutab, prior = 0.5, N_draws = "original") 
resampled_otu <- sample_posterior_dm(20, t_otutab, prior = "ml", N_draws = "original")
resampled_mds <- metaMDS_per_sample(resampled_otu, trymax = 50) 
resampled_mds_aligned <- resampled_mds %>% purrr::map(vegan::procrustes, X = base_mds)
plot_mds_tijana(base_mds, aligned_samples = resampled_mds_aligned, mapping)

What happens when we take a small subset of the samples so that they are similar?

selected_observations <- mapping %>% filter(SampleType == "Control", Soil == "Spruce") %>% pull("Sample") %>% as.character()

set.seed(1328454)
t_otutab_filtered <- t_otutab[selected_observations,, drop = FALSE]
resampled_otu_filtered <- sample_posterior_dm(20, t_otutab_filtered, prior = "ml", N_draws = "original")

base_mds_filtered <- metaMDS(t_otutab_filtered, trymax = 50)
resampled_mds_filtered <- metaMDS_per_sample(resampled_otu_filtered, trymax = 50) 
resampled_mds_filtered_aligned <- resampled_mds_filtered %>% purrr::map(vegan::procrustes, X = base_mds_filtered)
plot_mds_tijana(base_mds_filtered, aligned_samples = resampled_mds_filtered_aligned, mapping)

Testing DESeq2

set.seed(321485246)
#resampled_otu <- sample_posterior_dm(10, t_otutab, prior = 0.5, N_draws = "original") 
resampled_otu_deseq <- sample_posterior_DESeq2(20, t_otutab,  mapping, ~SoilSample)


resampled_mds_aligned_deseq <- metaMDS_per_sample(resampled_otu_deseq, trymax = 50)  %>% purrr::map(vegan::procrustes, X = base_mds)

plot_mds_tijana(base_mds, aligned_samples = resampled_mds_aligned_deseq, mapping)
set.seed(8432554)
classical_mds_deseq <- resampled_otu_deseq %>% apply_per_sample(classical_mds_func) %>% purrr::map(vegan::procrustes, X = base_classical_mds)
plot_mds_tijana(base_classical_mds, aligned_samples = classical_mds_deseq, mapping)

DESEQ with each sample separate

set.seed(13584224)
#resampled_otu <- sample_posterior_dm(10, t_otutab, prior = 0.5, N_draws = "original") 
mappingV2 <- mapping %>% mutate(Cat = Replicate_no < 3, SoilSampleCat = interaction(SoilSample,Cat))
resampled_otu_deseq_rep <- sample_posterior_DESeq2(20, t_otutab,  mappingV2, ~SoilSampleCat)
resampled_otu_flat_deseq_rep <- flatten_posterior_samples(resampled_otu_deseq_rep)

resampled_mds_deseq_rep <- metaMDS(rbind(original_renamed, resampled_otu_flat_deseq_rep), trymax = 50)
plot_mds_tijana(resampled_mds_deseq_rep, mapping)
dist_deseq_rep <- vegdist(rbind(original_renamed, resampled_otu_flat_deseq_rep)) %>% sqrt() %>% wisconsin()
classical_mds_deseq_rep <- cmdscale(dist_deseq_rep, add = TRUE, list. = TRUE)
plot_mds_tijana(classical_mds_deseq_rep, mapping)
xx <- sample_posterior_DESeq2(1, t_otutab, mapping, ~SoilSample)
plot(log(as.numeric(xx) + 0.5), log(as.numeric(t_otutab)+ 0.5))
abline(a = 0, b = 1)
library(DESeq2)
dds <- DESeqDataSetFromMatrix(countData = otutabP, colData = mapping, design = ~SoilSample)
dds <- estimateSizeFactors(dds, type = "poscounts")
dds <- estimateDispersions(dds)
dds <- nbinomWaldTest(dds)
n_otus <- ncol(t_otutab)
n_observations <- nrow(t_otutab)

predicted_means <- 2^(coef(dds) %*% t(model.matrix(~SoilSample, mapping ))) * matrix(rep(sizeFactors(dds), each = n_otus), n_otus, n_observations)
colnames(predicted_means) <- rownames(t_otutab)
sName <- "TIJ019"
otherS <- paste0("TIJ0",20:23)

for(otu in sample(1:1034, 20)) {
  rand <- rnbinom(1000, mu = predicted_means[otu, sName], size = dispersions(dds)[otu])
  actual <- tibble(x = c(t_otutab[sName, otu], t_otutab[otherS, otu] / sizeFactors(dds)[otherS]))
  (tibble(x = rand) %>% ggplot(aes(x)) + geom_histogram(bins = 50) + geom_vline(data = actual, aes(xintercept = x)) )%>% print()
}
fitted_quantiles <- pnbinom(t(t_otutab), mu = predicted_means, size = matrix(rep(dispersions(dds), times = n_observations), n_otus, n_observations))

fitted_quantiles_tidy <- fitted_quantiles %>% as.data.frame() %>% rownames_to_column("otu") %>% gather("observation","quantile", -otu) 

fitted_quantiles_tidy %>% filter(otu %in% sample(colnames(t_otutab), 20)) %>%
  ggplot(aes(x = quantile)) + geom_histogram(binwidth = 0.01) 

fitted_quantiles_tidy %>% filter(otu %in% sample(colnames(t_otutab), 20)) %>%
  ggplot(aes(x = quantile)) + geom_histogram(binwidth = 0.1) + facet_wrap(~otu)

fitted_quantiles_tidy %>% filter(observation %in% sample(rownames(t_otutab), 20)) %>%
  ggplot(aes(x = quantile)) + geom_histogram(binwidth = 0.1) + facet_wrap(~observation)

Not working parts

Testing alternative priors, currently does not work

set.seed(32146588)
#resampled_otu <- sample_posterior_dm(10, t_otutab, prior = 0.5, N_draws = "original") 
resampled_otu_2 <- sample_posterior_dm(20, t_otutab, prior = 0.5, N_draws = "original")
resampled_otu_flat_2 <- flatten_posterior_samples(resampled_otu)

resampled_mds_2 <- metaMDS(rbind(original_renamed, resampled_otu_flat_2), trymax = 50)
plot_sampled_mds(resampled_mds_2, mapping)

Sampling in latent space

set.seed(8524362)
resampled_latent <- sample_latent_dm_all(10, t_otutab, prior = 0.5) 
resampled_latent_flat <- flatten_posterior_samples(resampled_otu)

resampled_mds_latent <- metaMDS(resampled_otu_flat, trymax = 60)
resampled_mds_latent$points %>% as.data.frame() %>% rownames_to_column("observation_sample") %>%
  separate("observation_sample", c("observation","sample"), sep = "_")  %>% 
  inner_join(mapping %>% mutate(Sample = as.character(Sample)), by =c("observation" = "Sample")) %>%
  ggplot(aes(MDS1, MDS2, color = SampleType, shape = Soil) + geom_point() 


cas-bioinf/metagenboot documentation built on Feb. 25, 2021, 3:58 p.m.