knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
biomeUtils
is part of the RIVM-ToolBox project aimed at providing standard set of tools that interact with open tools for a wide array of data analytics, including microbiomics. The RIVM-ToolBox is a set of individual R tools focused towards different goals/functionalities.
biomeUtils
: Data handling
Outputs for standard data generating pipelines/workflows.
biomeStats
: Data analytics
Common data analytics including basic statistics.
biomeViz
: Data visualization
Data visualization of different data types.
devtools::install_github("RIVM-IIV-Microbiome/biomeUtils")
library(biomeUtils)
data("FuentesIliGutData") FuentesIliGutData
data("FuentesIliGutData") # get otu table in tibble format otu_tib <- getAbundanceTibble(FuentesIliGutData, select_rows = c("ASV302", "ASV82", "ASV410", "ASV2332"), select_cols = c("sample_1", "sample_5", "sample_6"), column_id = "FeatureID") # get taxa table in tibble format tax_tib <- getTaxaTibble(FuentesIliGutData, select_rows = c("ASV302", "ASV82", "ASV410", "ASV2332"), select_cols = c("Phylum", "Genus"), column_id = "FeatureID") # get sample data in tibble format meta_tib <- getSampleTibble(FuentesIliGutData, select_rows = c("sample_1", "sample_5", "sample_6"), select_cols = c("participant_id", "ILI", "age"), column_id = "FeatureID")
# Filter by phyloseq like subset_* ps.filtered.samples <- filterSampleData(FuentesIliGutData, ILI == "C" & BMI < 26) ps.filtered.samples ps.filtered.taxa <- filterTaxaData(FuentesIliGutData, Phylum=="Firmicutes") ps.filtered.taxa # Filter by names like prune_* sams.select <- sample_names(FuentesIliGutData)[1:10] ps.filter.by.sam.names <- filterSampleByNames(FuentesIliGutData, ids = sams.select, keep = TRUE) tax.select <- taxa_names(FuentesIliGutData)[1:10] ps.filter.by.tax.names <- filterTaxaByNames(FuentesIliGutData, ids = tax.select, keep = TRUE)
Check for polyphyletic taxa in tax_table
. Useful to check this before aggregating at any level. Here, for e.g. Eubacterium is is in both Lachnospiraceae
and EUbacteriaceae
family.
library(biomeUtils) library(dplyr) data("FuentesIliGutData") polydf <- checkPolyphyletic(FuentesIliGutData, taxa_level = "Genus", return_df = TRUE) polydf
library(biomeUtils) data("FuentesIliGutData") ps1 <- filterSampleData(FuentesIliGutData, ILI == "C") ps2 <- filterSampleData(FuentesIliGutData, ILI == "L1") ps3 <- filterSampleData(FuentesIliGutData, ILI == "L2") ps.list <- c("C" = ps1, "L1" = ps2, "L2" = ps3) comparePhyloseq(ps.list)
library(biomeUtils) data("FuentesIliGutData") # reduce size for example ps1 <- filterSampleData(FuentesIliGutData, ILI == "C") meta_tib <- calculatePD(ps1, justDF=TRUE) # check meta_tib[c(1,2,3),c("PD", "SR")]
data("FuentesIliGutData") prev_tib <- getPrevalence(FuentesIliGutData, return_rank= c("Family", "Genus"), return_taxa = c("ASV4", "ASV17" , "ASV85", "ASV83"), sort=TRUE) head(prev_tib)
Extracts the minimum value from a \code{dissimilarity} matrix for each individual. This is the dissimilarity of an individual from their nearest neighbor. Here, the option of using a one or more reference samples is provided. see the man page. The original article (cite when using this) did all versus all samples dissimilarities. Wilmanski T, et al., (2021) Gut microbiome pattern reflects healthy ageing and predicts survival in humans. Nature metabolism
library(biomeUtils) data("FuentesIliGutData") ps <- getProportions(FuentesIliGutData) dist.mat <- phyloseq::distance(ps, "bray") muniq <- uniqueness(ps, dist_mat=dist.mat, reference_samples = NULL) #head(muniq) ggplot(muniq,aes(age,uniqueness)) + geom_point(alpha=0.5) + geom_smooth(method = "lm") + theme_bw()
library(biomeUtils) data("FuentesIliGutData") # Keep only two groups ps1 <- filterSampleData(FuentesIliGutData, ILI != "L2") taxa_fd <- calculateTaxaFoldDifference(ps1, group="ILI") # check head(taxa_fd)
library(biomeUtils) data("FuentesIliGutData") ps <- filterSampleData(FuentesIliGutData, ILI != "L2") ps <- phyloseq::rarefy_even_depth(ps) dist.mat <- phyloseq::distance(ps, "bray") dist.melt.sample <- meltDistanceToTable(ps, dist_mat = dist.mat, name_dist_column = "Bray-Curtis", select_cols = c("participant_id", "ILI")) head(dist.melt.sample)
Read %>%
as and then
Below we use four R packages in combination to check for differences in ASVs between two groups.
library(biomeUtils) library(ggplot2) library(microbiome) library(dplyr) # pipe function data("FuentesIliGutData") # Take FuentesIliGutData phyloseq object and then FuentesIliGutData %>% # join genus and species columns uniteGenusSpeciesNames() %>% # Remove 'L1' group and keep those with BMI less than 30 and then filterSampleData(ILI != "L1" & BMI < 30) %>% # convert to rel. abundance and then microbiome::transform(., "compositional") %>% # select ASVs with 0.0001 in at least 1% samples and then microbiome::core(., detection = 0.0001, prevalence = 0.01) %>% # calculate foldchange and then calculateTaxaFoldDifference(group="ILI") %>% # select major fold change ASVs and then plot filter(abs(FoldDifference) >= 0.5 & Prevalence.C != 0 & # Remove ASVs not detected in Controls Prevalence.L2 != 0) %>% # Remove ASVs not detected in L2 # join genus name to FeatureID mutate(asv.genus = paste0(FeatureID, "-", Genus)) %>% # Plot fold difference Notice that from here on below we use '+' ggplot(aes(FoldDifference, reorder(asv.genus, FoldDifference))) + geom_col(aes(fill=Enriched)) + ylab("ASVs") + scale_fill_manual(values = c( C = "steelblue", L2 = "brown3")) + theme_bw()
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.