knitr::opts_knit$set(root.dir = "../")
library(magrittr)
library(ggplot2)
smar::sourceDir("R/")
set.seed(1)
load("../data/genera.RData")
physeq <- physeq_genera %>% 
  phyloseq::subset_samples(dataset_name == "MucosalIBD") %>% 
  smar::prune_taxaSamples(flist_taxa = genefilter::kOverA(k = 5, A = 1))
mat_X_count <- smar::otu_table2(physeq)
mat_X_p <- apply(mat_X_count, 2, function(x) x / sum(x))

Agenda

Count-based models

mat_para <- vapply(seq_len(nrow(mat_X_count)),
                function(i) {
                  x <- mat_X_count[i, ]
                  pi <- mean(x != 0)
                  mu <- mean(log(x[x > 0]))
                  sd <- sd(log(x[x > 0]))
                  c(pi, mu, sd)
                },
                c(0.0, 0.0, 0.0))
mat_X_sim <- vapply(seq_len(ncol(mat_para)),
                    function(i) {
                      round(exp(rnorm(n = ncol(mat_X_count),
                                      mean = mat_para[2, i],
                                      sd = mat_para[3, i]))) *
                        rbinom(n = ncol(mat_X_count), 
                               size = 1,
                               prob = mat_para[1, i])
                    },
                    rep(0, ncol(mat_X_count)))
rho_original <- cor2(x = t(mat_X_count))
rho_sim <- cor2(x = mat_X_sim)
data.frame(rho = c(rho_original[lower.tri(rho_original)], 
                   rho_sim[lower.tri(rho_sim)]),
           data = rep(c("original", "simulated"),
                      each = nrow(rho_original) * 
                        (nrow(rho_original) - 1) / 
                        2)) %>% 
  ggplot(aes(x = rho, fill = data)) +
  geom_density(alpha = 0.1) +
  ggtitle("Spearman rho between feature counts")

Penalized normal copula fit

s_s <- cor2(x = t(mat_X_p), random = TRUE, R = 50)
s <- iRho(s_s)
data.frame(spearman = s_s[lower.tri(s_s)],
           pearson = s[lower.tri(s)]) %>%
  ggplot(aes(x = spearman, y = pearson)) +
  geom_point() + geom_abline(slope = 1, intercept = 0, color = "red")
df_logLik <- pick_rho_pencopula(data = t(mat_X_p),
                                rholist = seq(0.05, 0.2,
                                              by = 0.05),
                                K = 5)
print(df_logLik)
fit_glasso <- glasso::glasso(s = s, rho = 0.1)
data.frame(original = solve(s)[lower.tri(s)],
           glasso = fit_glasso$wi[lower.tri(s)]) %>%
  ggplot(aes(x = original, y = glasso)) +
  geom_point() + geom_abline(slope = 1, intercept = 0, color = "red") +
  ggtitle("Shrinkage of precision")
data.frame(original = s[lower.tri(s)],
           glasso = fit_glasso$w[lower.tri(s)]) %>%
  ggplot(aes(x = original, y = glasso)) +
  geom_point() + geom_abline(slope = 1, intercept = 0, color = "red") +
  ggtitle("Shrinkage of corr")
sum(fit_glasso$w == 0)


syma-research/CRT_microbiome documentation built on July 8, 2022, 8 a.m.