rm(list = ls()) library(magrittr) library(ggplot2) dir_output <- "results/DIABIMMUNE" dir.create(dir_output, showWarnings = TRUE, recursive = TRUE)
load("data/DIABIMMUNE/X_array.RData") l_fit_microTensor <- list() for(i in seq(1, 5)) { set.seed(i) l_fit_microTensor[[i]] <- microTensor::microTensor( X = X_array, R = 3, nn_t = TRUE, ortho_m = TRUE, weighted = TRUE, control = list(L_init = 2, gamma = 2, maxit = 1000, verbose = TRUE, debug_dir = paste0(dir_output, "/fit/", i))) } fit_microTensor <- l_fit_microTensor %>% purrr::map_dbl(~ min(.x$fitting$obj)) %>% {order(.)[1]} %>% {l_fit_microTensor[[.]]} save(fit_microTensor, file = paste0(dir_output, "/fit_microTensor.RData")) l_fit_ctf <- list() for(i in seq(1, 5)) { l_fit_ctf[[i]] <- microTensor::ctf( X = X_array, R = 3) } fit_ctf <- l_fit_ctf %>% purrr::map_dbl(~ min(.x$fitting$obj)) %>% {order(.)[1]} %>% {l_fit_ctf[[.]]} save(fit_ctf, file = paste0(dir_output, "/fit_ctf.RData")) fit_pca <- microTensor::pca(X_array, R = 3) save(fit_pca, file = paste0(dir_output, "/fit_pca.RData"))
load(paste0(dir_output, "/fit_microTensor.RData")) load(paste0(dir_output, "/fit_ctf.RData")) load(paste0(dir_output, "/fit_pca.RData")) load("data/DIABIMMUNE/X_array.RData") Yhat_microTensor <- microTensor::get_Yhat(fit_microTensor) expY <- exp(Yhat_microTensor) expYsum_m <- apply(expY, c(2, 3), sum) phat_microTensor <- vapply(seq_len(dim(X_array)[1]), function(i) expY[i, , ] / expYsum_m, matrix(0.0, nrow = dim(X_array)[2], ncol = dim(X_array)[3])) phat_microTensor <- aperm(phat_microTensor, perm = c(3, 1, 2)) Yhat_ctf <- microTensor::get_Yhat(fit_ctf) expY <- exp(Yhat_ctf) expYsum_m <- apply(expY, c(2, 3), sum) phat_ctf <- vapply(seq_len(dim(X_array)[1]), function(i) expY[i, , ] / expYsum_m, matrix(0.0, nrow = dim(X_array)[2], ncol = dim(X_array)[3])) phat_ctf <- aperm(phat_ctf, perm = c(3, 1, 2)) Yhat_pca_mat <- fit_pca$m %*% (t(fit_pca$s_full) * fit_pca$lambda) Yhat_pca <- array(NA, dim(X_array)) for(i in seq_len(nrow(Yhat_pca_mat))) { Yhat_pca[i, , ] <- Yhat_pca_mat[i, ] } expY <- exp(Yhat_pca) expYsum_m <- apply(expY, c(2, 3), sum) phat_pca <- vapply(seq_len(dim(X_array)[1]), function(i) expY[i, , ] / expYsum_m, matrix(0.0, nrow = dim(X_array)[2], ncol = dim(X_array)[3])) phat_pca <- aperm(phat_pca, perm = c(3, 1, 2)) phat_data <- array(NA, dim(X_array)) Xsum_m <- apply(X_array, c(2, 3), sum) phat_data <- vapply(seq_len(dim(X_array)[1]), function(i) X_array[i, , ] / Xsum_m, matrix(0.0, nrow = dim(X_array)[2], ncol = dim(X_array)[3])) phat_data <- aperm(phat_data, perm = c(3, 1, 2)) tb_diff <- list(phat_microTensor, phat_ctf, phat_pca) %>% purrr::map2_dfr( c("microTensor", "CTF", "PCA"), function(i_phat, i_method) { tibble::tibble( diff = abs(i_phat - phat_data) %>% apply(c(2, 3), mean) %>% as.vector(), method = i_method ) } ) %>% dplyr::filter(!is.na(diff)) %>% dplyr::mutate(method = factor(method, levels = c("PCA", "CTF", "microTensor"))) p_diff <- tb_diff %>% ggplot(aes(x = method, y = diff)) + geom_boxplot() + theme_bw() + theme(axis.title.x = element_blank()) + ylab("Absolute difference in observed and \n method-based microbial profiles")
load(paste0(dir_output, "/fit_microTensor.RData")) load(paste0(dir_output, "/fit_ctf.RData")) load(paste0(dir_output, "/fit_pca.RData")) load("data/DIABIMMUNE/df_samples.RData") df_subjects <- df_samples %>% dplyr::group_by(subjectid) %>% dplyr::slice(1) %>% dplyr::ungroup() load("data/DIABIMMUNE/X_array.RData") feature_names <- dimnames(X_array)[[1]] subject_names <- dimnames(X_array)[[2]] time_names <- dimnames(X_array)[[3]] mat_tax <- feature_names %>% stringr::str_split_fixed(stringr::fixed("|"), n = 6) percs <- fit_microTensor$lambda^2 / sum(fit_microTensor$lambda^2) * 100 ps <- c(1, 2) %>% purrr::map(function(r) { # feature df_m <- microTensor::create_loading(fit_microTensor, feature_names = feature_names, subject_names = subject_names, time_names = time_names, class = "feature") %>% dplyr::mutate( feature_plot = microTensor::betterGeneraNames(mat_tax[, 3], mat_tax[, 4], mat_tax[, 5], mat_tax[, 6])) %>% dplyr::mutate(m = !!sym(paste0("Axis ", r))) df_plot <- rbind( df_m %>% dplyr::arrange(-df_m[["m"]]) %>% dplyr::slice(seq_len(5)), df_m %>% dplyr::arrange(df_m[["m"]]) %>% dplyr::slice(seq_len(5)) %>% {.[seq(5, 1), ]} ) %>% dplyr::mutate(feature_plot = factor(feature_plot, levels = rev(feature_plot))) p_feature <- df_plot %>% ggplot(aes(y = m, x = feature_plot)) + geom_bar(stat = "identity") + coord_flip() + theme_bw() + theme(axis.title.y = element_blank()) + ylab("Feature loading") + ggtitle(" ") # subject tb_loading <- microTensor::create_loading( fit_decomp = fit_microTensor, feature_names = feature_names, subject_names = subject_names, time_names = time_names, class = "subject") tb_loading <- tb_loading %>% dplyr::left_join(df_samples %>% dplyr::group_by(subjectid) %>% dplyr::slice(1) %>% dplyr::ungroup(), by = c("Subject" = "subjectid")) %>% dplyr::mutate(Csection = ifelse(csection, "C-section", "Vaginal"), Loading = !!sym(paste0("Axis ", r))) p_subject <- tb_loading %>% ggplot(aes(x = Csection, y = Loading)) + geom_boxplot(outlier.shape = NA) + geom_point(position = position_jitter(width = 0.25)) + theme_bw() + xlab("") + ylab("Subject loading") + ggtitle("") return(list(p_feature, p_subject)) }) p_main <- cowplot::plot_grid(ps[[1]][[1]], ps[[1]][[2]], nrow = 1, rel_widths = c(1, 0.6), labels = c(paste0("B) Axis ", 1, " (", round(percs[1], 2), "%)"), "")) %>% cowplot::plot_grid(p_diff, ., labels = c("A)", ""), nrow = 1, rel_widths = c(0.5, 1)) ggsave(p_main, file = "figures/figure7/figure_DIABIMMUNE.pdf", width = 12, height = 4.5)
p_supps <- list(fit_pca, fit_ctf) %>% purrr::map2( c("PCA", "CTF"), function(fit_decomp, method) { percs <- fit_decomp$lambda^2 / sum(fit_decomp$lambda^2) * 100 ps <- c(1, 2) %>% purrr::map(function(r) { # feature df_m <- microTensor::create_loading( fit_decomp, feature_names = feature_names, subject_names = subject_names, time_names = time_names, class = "feature") %>% dplyr::mutate( feature_plot = microTensor::betterGeneraNames(mat_tax[, 3], mat_tax[, 4], mat_tax[, 5], mat_tax[, 6])) %>% dplyr::mutate(m = !!sym(paste0("Axis ", r))) df_plot <- rbind( df_m %>% dplyr::arrange(-df_m[["m"]]) %>% dplyr::slice(seq_len(5)), df_m %>% dplyr::arrange(df_m[["m"]]) %>% dplyr::slice(seq_len(5)) %>% {.[seq(5, 1), ]} ) %>% dplyr::mutate(feature_plot = factor(feature_plot, levels = rev(feature_plot))) p_feature <- df_plot %>% ggplot(aes(y = m, x = feature_plot)) + geom_bar(stat = "identity") + coord_flip() + theme_bw() + theme(axis.title.y = element_blank()) + ylab("Feature loading") + ggtitle(" ") # subject tb_loading <- microTensor::create_loading( fit_decomp = fit_decomp, feature_names = feature_names, subject_names = subject_names, time_names = time_names, class = "subject") tb_loading <- tb_loading %>% dplyr::left_join(df_samples %>% dplyr::group_by(subjectid) %>% dplyr::slice(1) %>% dplyr::ungroup(), by = c("Subject" = "subjectid")) %>% dplyr::mutate(Csection = ifelse(csection, "C-section", "Vaginal"), Loading = !!sym(paste0("Axis ", r))) p_subject <- tb_loading %>% ggplot(aes(x = Csection, y = Loading)) + geom_boxplot(outlier.shape = NA) + geom_point(position = position_jitter(width = 0.25)) + theme_bw() + xlab("") + ylab("Subject loading") + ggtitle("") return(list(p_feature, p_subject)) }) p <- cowplot::plot_grid(ps[[1]][[1]], ps[[1]][[2]], nrow = 1, rel_widths = c(1, 0.6), labels = c(paste0(" Axis ", 1, " (", round(percs[1], 2), "%)"), "")) return(p) }) p_supp <- cowplot::plot_grid(plotlist = p_supps, labels = c("A) PCA", "B) CTF"), ncol = 1) ggsave(p_supp, file = "figures/suppFigure/figure_DIABIMMUNE_supp.pdf", width = 7, height = 5)
list(fit_microTensor, fit_pca, fit_ctf) %>% purrr::map(function(fit_decomp) { tb_loading <- microTensor::create_loading(fit_decomp = fit_decomp, feature_names = feature_names, subject_names = subject_names, time_names = time_names, class = "subject") tb_loading <- tb_loading %>% dplyr::left_join(df_samples %>% dplyr::group_by(subjectid) %>% dplyr::slice(1) %>% dplyr::ungroup(), by = c("Subject" = "subjectid")) %>% dplyr::mutate(Csection = ifelse(csection, "C-section", "Vaginal"), Loading = `Axis 1`) t.test(tb_loading$Loading[tb_loading$csection], tb_loading$Loading[!tb_loading$csection]) })
load("data/DIABIMMUNE/df_samples.RData") df_subjects <- df_samples %>% dplyr::group_by(subjectid) %>% dplyr::slice(1) %>% dplyr::ungroup() load("data/DIABIMMUNE/X_array.RData") feature_names <- dimnames(X_array)[[1]] subject_names <- dimnames(X_array)[[2]] time_names <- dimnames(X_array)[[3]] tb_loading <- microTensor::create_loading(fit_decomp = fit_microTensor, feature_names = feature_names, subject_names = subject_names, time_names = time_names, class = "sample") tb_loading <- tb_loading %>% dplyr::mutate(Time = as.integer(as.character(Time))) %>% dplyr::left_join(df_subjects, by = c("Subject" = "subjectid")) tb_loading_avg <- tb_loading %>% dplyr::mutate(`Birth mode` = ifelse(csection, "C-section", "Vaginal")) %>% dplyr::group_by(Time, `Birth mode`) %>% dplyr::summarise(mean_loading = mean(`Axis 1`, na.rm = TRUE), sd_loading = sd(`Axis 1`, na.rm = TRUE) / sqrt(dplyr::n())) %>% dplyr::ungroup() %>% dplyr::mutate(Month = Time - 1) p_time <- tb_loading_avg %>% dplyr::filter(!is.na(`Birth mode`)) %>% ggplot(aes(x = Month, y = mean_loading, color = `Birth mode`, shape = `Birth mode`)) + geom_line(position = position_dodge(width = 0.5)) + geom_point(position = position_dodge(width = 0.5), size = 2) + geom_errorbar(aes(ymax = mean_loading + sd_loading, ymin = mean_loading - sd_loading), position = position_dodge(width = 0.5), width = 0.5) + theme_bw() + ylab("Average sample loading") + ggtitle("Time trajectory") + theme(legend.position = c(0, 1), legend.justification = c(0, 1), legend.background = element_blank()) ggsave(p_time, filename = "figures/suppFigure/time_DIABIMMUNE.pdf", width = 5, height = 4)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.