knitr::opts_knit$set(root.dir = "../")
library(magrittr) library(ggplot2) set.seed(1) ncores <- 6
load("../data/genera.RData") physeq <- physeq_genera %>% phyloseq::subset_samples(dataset_name == "MucosalIBD") %>% smar::prune_taxaSamples(flist_taxa = genefilter::kOverA(k = 5, A = 1)) mat_X_count <- smar::otu_table2(physeq) rownames(mat_X_count) <- CRTmicrobiome:::simplify_feature(rownames(mat_X_count)) mat_X_p <- apply(mat_X_count, 2, function(x) x / sum(x)) df_marginal <- CRTmicrobiome:::get_marginal(t(mat_X_p))
mat_X_p_pseudocount <- apply(mat_X_count + 0.5, 2, function(x) x / sum(x)) mu_a <- CRTmicrobiome:::get_mean_logisticMVN(mat_X_p_pseudocount) fit_gcoda <- CRTmicrobiome:::gcoda( data = t(mat_X_p_pseudocount), lambda_list = 0.6)$fits[[1]] params_gcoda <- list(mu = mu_a, Omega = fit_gcoda)
Seems that penalization gives higher estimation features' variances.
tb_sd <- tibble::tibble( original = mat_X_p_pseudocount %>% log() %>% t() %>% var() %>% diag() %>% sqrt(), estimated = diag(solve(fit_gcoda)) %>% sqrt() ) tb_sd %>% ggplot(aes(x = original, y = estimated)) + geom_point() + geom_abline(intercept = 0, slope = 1, color = "red")
dir.create("../results/gcoda_marginal/", recursive = TRUE, showWarnings = FALSE) R <- 10000 df_marginal_mix <- CRTmicrobiome:::get_marginal_mix(data = t(mat_X_p_pseudocount), df_marginal = df_marginal, mean0 = log(0.5) - mean(log(apply(mat_X_count, 2, sum))), var0 = var(log(apply(mat_X_count, 2, sum)))) sds_gcoda <- sqrt(diag(solve(fit_gcoda))) sd0 <- df_marginal_mix$sigma0 / df_marginal_mix$sigma_overall * sds_gcoda sd1 <- df_marginal_mix$sigma1 / df_marginal_mix$sigma_overall * sds_gcoda samples_gcoda <- CRTmicrobiome:::rgcoda(n = R, mean0 = df_marginal_mix$mu0, mean1 = df_marginal_mix$mu1, sd0 = sd0, sd1 = sd1, pi = df_marginal_mix$pi, sigma = solve(fit_gcoda)) cov_samples <- cov(samples_gcoda) p_original <- CRTmicrobiome:::plot_cors(cov(t(log(mat_X_p_pseudocount))), solve(fit_gcoda), labels = c("Data", "Fit")) p_sample <- CRTmicrobiome:::plot_cors(cov(t(log(samples_gcoda$abd))), solve(fit_gcoda), labels = c("Sample", "Fit")) cowplot::plot_grid(p_original, p_sample, nrow = 1) %>% ggsave("../results/gcoda_marginal/cov_comparison.pdf", ., width = 10, height = 5) df_original <- mat_X_p_pseudocount %>% CRTmicrobiome:::longify_abd() %>% dplyr::left_join( (mat_X_count != 0) %>% CRTmicrobiome:::longify_abd(abundance = "is_pos"), by = c("feature", "sample") ) %>% dplyr::mutate(data = "original") dimnames(samples_gcoda$abd) <- dimnames(samples_gcoda$ind) <- list(rownames(mat_X_count), paste0("sample", seq_len(R))) df_gcoda <- samples_gcoda$abd %>% CRTmicrobiome:::longify_abd() %>% dplyr::left_join( (samples_gcoda$ind == 1) %>% CRTmicrobiome:::longify_abd(abundance = "is_pos"), by = c("feature", "sample") ) %>% dplyr::mutate(data = "gcoda") df_p_gcoda <- rbind(df_original, df_gcoda) %>% dplyr::mutate( data = factor(data, levels = c("original", "gcoda")), log_abd = log10(abd)) %>% dplyr::group_by(feature) %>% dplyr::summarise( l_p = list( data.frame(log_abd = log_abd, data = data, is_pos = is_pos) %>% ggplot(aes(x = log_abd, fill = is_pos)) + geom_histogram(bins = 50) + facet_grid(data~., scales = "free_y") + scale_fill_manual(values = c("FALSE" = "grey", "TRUE" = "black")) + ggtitle(feature[1]) )) pdf("../results/gcoda_marginal/marginal_gcoda.pdf", width = 4, height = 8) for(i in seq_len(nrow(df_p_gcoda))) print(df_p_gcoda$l_p[[i]]) dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.