knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(HealthyOralMicrobiome) library(corncob) library(phyloseq) library(dplyr) library(tibble) library(gplots)
#Load in metadata Metadata <- HealthyOralMicrobiome::Healthy_Metadata dim(Metadata) #load in ASV table ASV_tab <- HealthyOralMicrobiome::Healthy_ASV_table dim(ASV_tab) #load in Genus table Genus_tab <- HealthyOralMicrobiome::Healthy_Genus_Table dim(Genus_tab) ### Filter the samples Comp_ASV_data <- Filt_samples(depth=5000, setType="complete", Taxa_table = ASV_tab, Metadata = Metadata, parallel = 20, cutoff_prop = 0.1) Comp_Genus_data <- Filt_samples(depth=5000, setType="complete", Taxa_table = Genus_tab, Metadata = Metadata, parallel = 20, cutoff_prop = 0.1) ### finally we will scale the data so that the values will be comparable to one another scaled_metadata_g<- Comp_Genus_data[["Metadata"]] %>% rownames_to_column('sample_name') %>% mutate_if(is.numeric, scale) %>% column_to_rownames('sample_name') scaled_metadata_a <- Comp_ASV_data[["Metadata"]] %>% rownames_to_column('sample_name') %>% mutate_if(is.numeric, scale) %>% column_to_rownames('sample_name') features_to_test <- c("A_SDC_AGE_CALC", "PM_STANDING_HEIGHT_AVG", "PM_WAIST_AVG", "PM_BIOIMPED_WEIGHT", "PM_WAIST_HIP_RATIO", "PM_BIOIMPED_FFM", "SALT_SEASONING", "NUTS_SEEDS_SERVINGS_PER_DAY", "NUT_VEG_DAY_QTY", "REFINED_GRAIN_SERVINGS_DAY_QTY", "A_SLE_LIGHT_EXP", "A_SDC_GENDER", "PM_BIOIMPED_BMI", "NUT_JUICE_DAY_QTY", "A_HS_DENTAL_VISIT_LAST")
### create phyloseq object to analysis ASVs_comp <- phyloseq::otu_table(Comp_ASV_data[['Taxa_Tab']], taxa_are_rows = T) Meta_comp <- phyloseq::sample_data(scaled_metadata_a) phylo_comp <- phyloseq::phyloseq(ASVs_comp, Meta_comp) features_to_test <- c("A_SDC_AGE_CALC", "PM_STANDING_HEIGHT_AVG", "PM_WAIST_AVG", "PM_BIOIMPED_WEIGHT", "PM_WAIST_HIP_RATIO", "PM_BIOIMPED_FFM", "SALT_SEASONING", "NUTS_SEEDS_SERVINGS_PER_DAY", "NUT_VEG_DAY_QTY", "REFINED_GRAIN_SERVINGS_DAY_QTY", "A_SLE_LIGHT_EXP", "A_SDC_GENDER", "PM_BIOIMPED_BMI", "NUT_JUICE_DAY_QTY", "A_HS_DENTAL_VISIT_LAST") #list of features that were significant in beta diversity analysis. cl <- parallel::makeCluster(15) doParallel::registerDoParallel(cl) foreach::getDoParWorkers() `%dopar%` <- foreach::`%dopar%` Comp_ASV_CC_Res <- foreach::foreach(i=1:15) %dopar% HealthyOralMicrobiome::run_corn_cob_analysis(features_to_test[i], phylo = phylo_comp) parallel::stopCluster(cl) ### Genus Analysis Genus_comp <- phyloseq::otu_table(Comp_Genus_data[["Taxa_Tab"]], taxa_are_rows = T) Meta_comp <- phyloseq::sample_data(scaled_metadata_g) phylo_comp_g <- phyloseq::phyloseq(Genus_comp, Meta_comp) cl <- parallel::makeCluster(15) doParallel::registerDoParallel(cl) foreach::getDoParWorkers() Genus_corn_cob_res <- foreach::foreach(i=1:15) %dopar% HealthyOralMicrobiome::run_corn_cob_analysis(features_to_test[i], phylo=phylo_comp_g) parallel::stopCluster(cl)
## Genus analysis first Genus_CC_res <- HealthyOralMicrobiome::Comp_Genus_CC_Res features_to_test <- c("A_SDC_AGE_CALC", "PM_STANDING_HEIGHT_AVG", "PM_WAIST_AVG", "PM_BIOIMPED_WEIGHT", "PM_WAIST_HIP_RATIO", "PM_BIOIMPED_FFM", "SALT_SEASONING", "NUTS_SEEDS_SERVINGS_PER_DAY", "NUT_VEG_DAY_QTY", "REFINED_GRAIN_SERVINGS_DAY_QTY", "A_SLE_LIGHT_EXP", "A_SDC_GENDER", "PM_BIOIMPED_BMI", "NUT_JUICE_DAY_QTY", "A_HS_DENTAL_VISIT_LAST") ## Extract Genus level p-values for each feature tested G_CC_scaled_p <- list() for(i in 1:15){ G_CC_scaled_p[[i]] <- Genus_CC_res[[i]]$p_fdr } G_CC_scaled_p_df <- do.call(rbind, G_CC_scaled_p) G_CC_scaled_p_df <- data.frame(t(G_CC_scaled_p_df)) colnames(G_CC_scaled_p_df) <- features_to_test #remove any taxa that weren't significant in atleast on feature filt_G_CC_scaled_p_df <- G_CC_scaled_p_df[which(apply(G_CC_scaled_p_df, 1, function(x) {any(x <= 0.1)})),] dim(filt_G_CC_scaled_p_df) ### leaves 45 taxa keep_genus <- rownames(filt_G_CC_scaled_p_df) ## get coefficents for each remaining model G_Corn_cob_coef <- data.frame() i <- 0 for(feat in features_to_test){ i <- i +1 if(feat=="A_SDC_GENDER"){ feat <- "A_SDC_GENDER2" } message(feat) feat_name <- paste("mu.",feat,sep="") for(j in 1:length(Genus_CC_res[[i]]$all_models)){ G_Corn_cob_coef[j,i] <- Genus_CC_res[[i]]$all_models[[j]]$coefficients[,1][which(rownames(Genus_CC_res[[i]]$all_models[[j]]$coefficients)==feat_name)] } } rownames(G_Corn_cob_coef) <- rownames(Genus_CC_res[[1]]$data@otu_table) colnames(G_Corn_cob_coef) <- features_to_test #rename taxa filt_G_Corn_cob_coef <- G_Corn_cob_coef[rownames(filt_G_CC_scaled_p_df),] renamed_filt_G_Corn_cob_coef <- filt_G_Corn_cob_coef rownames(renamed_filt_G_Corn_cob_coef) <- gsub(".*D_4", "D_4", rownames(renamed_filt_G_Corn_cob_coef)) rownames(renamed_filt_G_Corn_cob_coef) <- gsub("D_4__", "", rownames(renamed_filt_G_Corn_cob_coef)) rownames(renamed_filt_G_Corn_cob_coef) <- gsub("D_5__", "", rownames(renamed_filt_G_Corn_cob_coef)) rownames(renamed_filt_G_Corn_cob_coef) <- gsub(";", " ", rownames(renamed_filt_G_Corn_cob_coef)) ## manually fix some rows... rownames(renamed_filt_G_Corn_cob_coef) rownames(renamed_filt_G_Corn_cob_coef)[29] <- "Veillonellaceae unclassified" rownames(renamed_filt_G_Corn_cob_coef)[34] <- "Neisseriaceae unclassified" rownames(renamed_filt_G_Corn_cob_coef)[41] <- "Mollicutes uncultured" #set effect non-significant effect sizes to 0 set_na_filt_G_Corn_cob_coef <- renamed_filt_G_Corn_cob_coef set_na_filt_G_Corn_cob_coef[filt_G_CC_scaled_p_df > 0.1] <- 0 #set column names colnames(set_na_filt_G_Corn_cob_coef) colnames(set_na_filt_G_Corn_cob_coef) <- c("Age", "Height", "Waist Size", "Weight", "Waist Hip Ratio", "Fat Free Mass", "Salt Usage", "Nut/Seed Servings", "Vegetable Servings", "Refined Grain Servings", "Sleeping Light Exposure", "Sex", "Body Mass Index", "Juice Servings", "Last Dental Visit") #fix Column label for Sex table(Genus_CC_res[[1]]$data@sam_data$A_SDC_GENDER) set_na_filt_G_Corn_cob_coef$Sex <- -set_na_filt_G_Corn_cob_coef$Sex colnames(set_na_filt_G_Corn_cob_coef)[12] <- "Male*" #get phylums of each taxa Phylums <- gsub("D_0__.*;D_1__", "", rownames(filt_G_Corn_cob_coef)) Phylums <- gsub(";D_2__.*", "", Phylums) Phylums #set colors for each phylum taxa_colors <- vector() for(taxa in 1:length(Phylums)){ if(Phylums[[taxa]] == "Actinobacteria"){ taxa_colors[[taxa]] <- "Red" }else if(Phylums[[taxa]]=="Bacteroidetes"){ taxa_colors[[taxa]] <- "Blue" }else if(Phylums[[taxa]]=="Cyanobacteria"){ taxa_colors[[taxa]] <- "Orange" }else if(Phylums[[taxa]]=="Firmicutes"){ taxa_colors[[taxa]] <- "Brown" }else if(Phylums[[taxa]]=="Fusobacteria"){ taxa_colors[[taxa]] <- "Purple" }else if(Phylums[[taxa]]=="Proteobacteria"){ taxa_colors[[taxa]] <- "#664163" }else if(Phylums[[taxa]]=="Spirochaetes"){ taxa_colors[[taxa]] <- "Black" }else if(Phylums[[taxa]]=="Synergistetes"){ taxa_colors[[taxa]] <- "Cyan" }else if(Phylums[[taxa]]=="Tenericutes"){ taxa_colors[[taxa]] <- "#4a421f" }else print("error") } #set up color scale for heatmap breaks=seq(-2, 2, length.out = 180) #41 values #now add outliers breaks=append(breaks, 10000) breaks=append(breaks, -10000, 0) #create colour panel with length(breaks)-1 colours mycol <- colorpanel(n=length(breaks)-1,low="blue",mid="white",high="brown") #set plot dims layout_mat <- rbind(c(0,0,4), c(2,1,3), c(0,0,0)) lwid=c(.1,2.75,1.2) lhei=c(.2,3,.3) #plot gen_heat <- heatmap.2(as.matrix(set_na_filt_G_Corn_cob_coef), na.color="black", density.info = "none", symkey = F, trace="none", srtCol=35, dendrogram = "none", key.xlab = "log odds", key.ylab = "p Value", key=T, col=mycol, lmat = layout_mat, lwid = lwid, lhei = lhei, cexRow = 1.3, cexCol = 1.3, distfun=function(x) dist(x, method="manhattan"), colRow = taxa_colors, Rowv = F, rowsep = c(2, 10,29,30, 38, 39, 40), sepcolor = "black", keysize=0.2, key.par = list(cex=0.01, cex.axis=60, cex.main=60, mar=c(50,0,50,10), mgp=c(.01,50,1), tcl=-10)) #### Now lets make the ASV heatmap... ASV_CC_res <- HealthyOralMicrobiome::Comp_ASV_CC_Res ACC_scaled_p <- list() for(i in 1:15){ ACC_scaled_p[[i]] <- ASV_CC_res[[i]]$p_fdr } ACC_scaled_p_df <- do.call(rbind, ACC_scaled_p) ACC_scaled_p_df <- data.frame(t(ACC_scaled_p_df)) colnames(ACC_scaled_p_df) <- features_to_test #remove ASVs that were not significant in atleast one feature filt_ACC_scaled_p_df <- ACC_scaled_p_df[which(apply(ACC_scaled_p_df, 1, function(x) {any(x <= 0.1)})),] dim(filt_ACC_scaled_p_df) ASV_to_keep <- rownames(filt_ACC_scaled_p_df) ACorn_cob_coef <- data.frame() i <- 0 for(feat in features_to_test){ i <- i +1 if(feat=="A_SDC_GENDER"){ feat <- "A_SDC_GENDER2" } feat_name <- paste("mu.",feat,sep="") for(j in 1:length(ASV_CC_res[[i]]$all_models)){ ACorn_cob_coef[j,i] <- ASV_CC_res[[i]]$all_models[[j]]$coefficients[,1][which(rownames(ASV_CC_res[[i]]$all_models[[j]]$coefficients)==feat_name)] } } rownames(ACorn_cob_coef) <- rownames(ASV_CC_res[[1]]$data@otu_table) colnames(ACorn_cob_coef) <- features_to_test filt_ACorn_cob_coef <- ACorn_cob_coef[rownames(filt_ACC_scaled_p_df),] renamed_filt_ACorn_cob_coef <- filt_ACorn_cob_coef set_na_filt_ACorn_cob_coef <- renamed_filt_ACorn_cob_coef set_na_filt_ACorn_cob_coef[filt_ACC_scaled_p_df > 0.1] <- 0 colnames(set_na_filt_ACorn_cob_coef) <- c("Age", "Height", "Waist Size", "Weight", "Waist Hip Ratio", "Fat Free Mass", "Salt Usage", "Nut/Seed Servings", "Vegetable Servings", "Refined Grain Servings", "Sleeping Light Exposure", "Sex", "Body Mass Index", "Juice Servings", "Last Dental Visit") set_na_filt_ACorn_cob_coef$Sex <- -set_na_filt_ACorn_cob_coef$Sex colnames(set_na_filt_ACorn_cob_coef)[12] <- "Male*" ASV_taxa_labels <- HealthyOralMicrobiome::Taxa_Labels taxa_asv <- match(rownames(set_na_filt_ACorn_cob_coef), rownames(ASV_taxa_labels)) taxa_asv ### only keep first 5 ASV characters rownames(set_na_filt_ACorn_cob_coef) <- substring(rownames(set_na_filt_ACorn_cob_coef), 1, 5) rownames(set_na_filt_ACorn_cob_coef) <- paste0("ASV-", rownames(set_na_filt_ACorn_cob_coef)) ## okay now need to paste lowest taxaonomic assignment taxa_rename <- ASV_taxa_labels taxa_rename$Taxon <- gsub(".*D_5", "D_5", taxa_rename$Taxon) taxa_rename$Taxon <- gsub("D_5__", "", taxa_rename$Taxon) taxa_rename$Taxon <- gsub("D_6__", "", taxa_rename$Taxon) taxa_rename$Taxon <- gsub(";", " ", taxa_rename$Taxon) rownames(set_na_filt_ACorn_cob_coef) <- paste(rownames(set_na_filt_ACorn_cob_coef), taxa_rename$Taxon[taxa_asv], sep=" ") #manually curate the rest rownames(set_na_filt_ACorn_cob_coef) <- gsub(" bacterium", "", rownames(set_na_filt_ACorn_cob_coef)) rownames(set_na_filt_ACorn_cob_coef)[1] <- "ASV-0c2af Treponema 2 socranskii subsp. socranskii" rownames(set_na_filt_ACorn_cob_coef)[3] <- "ASV-10d24 Haemophilus haemolyticus" rownames(set_na_filt_ACorn_cob_coef)[4] <- "ASV-18820 Alloprevotella uncultured" rownames(set_na_filt_ACorn_cob_coef)[7] <- "ASV-1e8e5 Oribacterium sinus" rownames(set_na_filt_ACorn_cob_coef)[10] <- "ASV-2f0df Megasphaera micronuciformis" rownames(set_na_filt_ACorn_cob_coef)[12] <- "ASV-43ec6 Capnocytophaga gingivalis" rownames(set_na_filt_ACorn_cob_coef)[14] <- "ASV-4ca02 Selenomonas uncultured" rownames(set_na_filt_ACorn_cob_coef)[15] <- "ASV-4fcb9 Prevotella uncultured" rownames(set_na_filt_ACorn_cob_coef)[16] <- "ASV-50e51 Prevotella sp. oral taxon 299 str. F0039" rownames(set_na_filt_ACorn_cob_coef)[24] <- "ASV-77909 Prevotella 7 denticola" rownames(set_na_filt_ACorn_cob_coef)[30] <- "ASV-9a2e6 Mogibacterium unclassified" rownames(set_na_filt_ACorn_cob_coef)[31] <- "ASV-a0a4a Lachnospiraceae uncultured" rownames(set_na_filt_ACorn_cob_coef)[33] <- "ASV-b8b0d Capnocytophaga granulosa" rownames(set_na_filt_ACorn_cob_coef)[35] <- "ASV-c891c Bacteroidetes F0058 uncultured" rownames(set_na_filt_ACorn_cob_coef)[36] <- "ASV-ca12e Streptococcus anginosus subsp. anginosus" rownames(set_na_filt_ACorn_cob_coef)[42] <- "ASV-eec30 Prevotella unclassified" ASV_phylums <- gsub("D_0__.*;D_1__", "", ASV_taxa_labels$Taxon[taxa_asv]) ASV_phylums <- gsub("D_2__.*","", ASV_phylums) ASV_phylums <- gsub(";", "", ASV_phylums) ASV_phylums order_ASV_phylums <- ASV_phylums[order(ASV_phylums)] row_reorder <- set_na_filt_ACorn_cob_coef[order(ASV_phylums),] taxa_colors <- vector() for(taxa in 1:length(order_ASV_phylums)){ if(order_ASV_phylums[[taxa]] == "Actinobacteria"){ taxa_colors[[taxa]] <- "Red" }else if(order_ASV_phylums[[taxa]]=="Bacteroidetes"){ taxa_colors[[taxa]] <- "Blue" }else if(order_ASV_phylums[[taxa]]=="Cyanobacteria"){ taxa_colors[[taxa]] <- "Orange" }else if(order_ASV_phylums[[taxa]]=="Firmicutes"){ taxa_colors[[taxa]] <- "Brown" }else if(order_ASV_phylums[[taxa]]=="Fusobacteria"){ taxa_colors[[taxa]] <- "Purple" }else if(order_ASV_phylums[[taxa]]=="Proteobacteria"){ taxa_colors[[taxa]] <- "#664163" }else if(order_ASV_phylums[[taxa]]=="Spirochaetes"){ taxa_colors[[taxa]] <- "Black" }else if(order_ASV_phylums[[taxa]]=="Synergistetes"){ taxa_colors[[taxa]] <- "Cyan" }else if(order_ASV_phylums[[taxa]]=="Tenericutes"){ taxa_colors[[taxa]] <- "Gray" }else if(order_ASV_phylums[[taxa]]=="Epsilonbacteraeota"){ taxa_colors[[taxa]] <- "#42256f" }else print("error") } reorder_set_na_filt_ACorn_cob_coef <- row_reorder[,c(12,6,2,8,10,7,11,3,13,4,14,9,5,1,15)] layout_mat <- rbind(c(4,0,0), c(2,1,0), c(0,3,0)) lwid=c(2,3.5,.00001) lhei=c(.2,3,.3) breaks=seq(-2, 2, length.out = 200) #41 values #now add outliers breaks=append(breaks, 10000) breaks=append(breaks, -10000, 0) #create colour panel with length(breaks)-1 colours mycol <- colorpanel(n=length(breaks)-1,low="blue",mid="white",high="brown") ASV_heat <- heatmap.2(as.matrix(reorder_set_na_filt_ACorn_cob_coef),na.color="black", density.info = "none", symkey = F, trace="none", srtCol=35, dendrogram = "none", key.xlab = "log odds", key.ylab = "p Value", key=T, col=mycol, lmat = layout_mat, lwid = lwid, lhei = lhei, cexRow = 1.3, cexCol = 1.3, Colv=F, distfun=function(x) dist(x, method="manhattan"), colRow = taxa_colors, Rowv=F, offsetRow = -56.5, sepcolor = "black", rowsep = c(7, 20, 35, 37, 41), keysize=0.2, key.par = list(cex=0.01, cex.axis=60, cex.main=60, mar=c(50,50,50,50), mgp=c(.01,50,1), tcl=-10)) ### done
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.