knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(biomeStats) library(biomeUtils) library(microbiome) library(dplyr)
dis_col <- c(L1="#6883ba", C = "#166f65", `NA`="black")
data("FuentesIliGutData") ps <- subset_samples(FuentesIliGutData, ILI %in% c("C", "L1")) ps.gen <- tax_glom(ps, "Genus") #saveRDS(ps.gen, "data-raw/ps.gen.rds") ps.gen <- readRDS("../data-raw/ps.gen.rds") ps.rel <- microbiome::transform(ps.gen, "compositional") taxa_names(ps.rel) <- make.unique(tax_table(ps.rel)[,"Genus"]) sample_data(ps.rel)$sample_ids <- sample_names(ps.rel)
sample_variables(ps.rel)[1:10] # "ILI" "BMI" "sex" "age" # "antidepressants" "antibiotics" "T2.diabetes.medication"
getSampleTibble(ps.rel) %>% dplyr::count(ILI)
biomeUtils::getSampleTibble(ps.rel) %>% dplyr::count(sex) biomeUtils::getSampleTibble(ps.rel) %>% dplyr::count(antidepressants) biomeUtils::getSampleTibble(ps.rel) %>% dplyr::count(anticoagulants) biomeUtils::getSampleTibble(ps.rel) %>% dplyr::count(antibiotics) biomeUtils::getSampleTibble(ps.rel) %>% dplyr::count(statins) biomeUtils::getSampleTibble(ps.rel) %>% dplyr::count(alphabeta.blocker)
Prepare a tibble from phyloseq object. This tibble consists of ID (for sample) relative abundance of taxa, average and SD of counts of taxa in sample, prop.positive, entropy and C.V.
gen_tab <- prepTaxaAbundanceData(ps.rel, detection_threshold= 0.001, prevalence_threshold=50/100) dim(gen_tab)
There can be numerous variables that a user might want to test. It is advised to select and specify variables of interest. This has to be done manually because sample data my consists of several other data that are not useful, like primer/barcode sequences, internal codes from sequencing centre, etc.
#main_var <- "ILI" comord.vars <- c("BMI") #"antimicrobial","ARB", med.vars <- c("anticoagulants", "antibiotics", "statins","alphabeta.blocker") all.vars <- c(comord.vars,med.vars)
sample_data(ps.rel)$sex <- ifelse(sample_data(ps.rel)$sex =="M",0,1) sample_data(ps.rel)$antidepressants <- ifelse(sample_data(ps.rel)$antidepressants =="N",0,1) sample_data(ps.rel)$anticoagulants <- ifelse(sample_data(ps.rel)$anticoagulants =="N",0,1) sample_data(ps.rel)$antibiotics <- ifelse(sample_data(ps.rel)$antibiotics =="N",0,1) sample_data(ps.rel)$statins <- ifelse(sample_data(ps.rel)$statins =="N",0,1) sample_data(ps.rel)$alphabeta.blocker <- ifelse(sample_data(ps.rel)$alphabeta.blocker =="N",0,1)
phenotypic.data.1 <- prepSampleData(ps.rel, sample_ids = "sample_ids", reference_group ="C", compare = "ILI", confounder_variables = c("sex", "age"), variable_interest=all.vars) head(phenotypic.data.1)
frequency.of.strata <- table(phenotypic.data.1$stratum) #mean(frequency.of.strata) frequency.of.strata
# First column is sample ID so start from second column and stop at column 5 from last taxa_select <- colnames(gen_tab)[2:(ncol(gen_tab)-5)] # the last five columns are other <- c("average","SD","prop.positive","entropy","CV") #vars.test <- colnames(phenotypic.data.1)[c(2,5:39)] # get variables vars.test <- c("ILI",all.vars) data.set.1 <- phenotypic.data.1 %>% dplyr::left_join(gen_tab, by = "sample_ids") %>% dplyr::relocate(stratum, .after = last_col()) %>% dplyr::select(!c(sample_ids, sex, age, age.group)) # age and gender removed as we treat them as confounders # sum statistic with sex and age stratum as blocking factor. head(data.set.1) variables_to_test <- c("ILI",all.vars)
microbiome
IMPORTANT STEP
variables_to_test <- c("ILI",all.vars) # BMI is continuous variable TYPES <- c("categorical","continuous", rep("ordinal",4), rep("ordinal",length(taxa_select)), rep("continuous",5), "categorical") types <- c("categorical","continuous", rep("categorical",4), rep("dirac.and.continuous",length(taxa_select)), rep("continuous",5), "categorical")
#get column number that match bacterial abundance bac_cols <- which(colnames(data.set.1) %in% taxa_select) #no.of.factors <- length(variables_to_test) #aux.columns <- bac_cols #variables.of.interest <- colnames(data.set.1)[1:(ncol(data.set.1)-6)]
dir.create("../../test_stat") doAssociationAnalysis(data_for_testing = data.set.1, B = 1000, no_of_factors = length(variables_to_test), aux_columns = bac_cols, log_scale = TRUE, nominal_bound_on_FDR = 0.20, phen_data = phenotypic.data.1, compare_label ="case_control", aux_TYPES = TYPES, cat_ypes = types, path_loc="../../test_stat/", name_of_stratification= "Sex and age-group")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.