knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

biomeUtils

biomeUtils is part of the RIVM-ToolBox project aimed at providing standard set of tools that interact with open tools for a wide array of data analytics, including microbiomics. The RIVM-ToolBox is a set of individual R tools focused towards different goals/functionalities.

Setup

Install

devtools::install_github("RIVM-IIV-Microbiome/biomeUtils")

Load pkg

library(biomeUtils)

Data handling

Data

data("FuentesIliGutData")
FuentesIliGutData

Get tibbles

data("FuentesIliGutData")
# get otu table in tibble format
otu_tib <- getAbundanceTibble(FuentesIliGutData,
                              select_rows = c("ASV302", "ASV82", "ASV410", "ASV2332"),
                              select_cols = c("sample_1", "sample_5", "sample_6"),
                              column_id = "FeatureID")

# get taxa table in tibble format
tax_tib <- getTaxaTibble(FuentesIliGutData,
                         select_rows = c("ASV302", "ASV82", "ASV410", "ASV2332"),
                         select_cols = c("Phylum", "Genus"),
                         column_id = "FeatureID")

# get sample data in tibble format
meta_tib <- getSampleTibble(FuentesIliGutData,
                            select_rows = c("sample_1", "sample_5", "sample_6"),
                            select_cols = c("participant_id", "ILI", "age"),
                            column_id = "FeatureID")

Subset/Filter phyloseq

# Filter by phyloseq like subset_*
ps.filtered.samples  <- filterSampleData(FuentesIliGutData,
                                         ILI == "C" & BMI < 26)
ps.filtered.samples

ps.filtered.taxa  <- filterTaxaData(FuentesIliGutData,
                                    Phylum=="Firmicutes")

ps.filtered.taxa

# Filter by names like prune_*
sams.select <- sample_names(FuentesIliGutData)[1:10]
ps.filter.by.sam.names <- filterSampleByNames(FuentesIliGutData,
                                               ids = sams.select,
                                               keep = TRUE)

tax.select <- taxa_names(FuentesIliGutData)[1:10]
ps.filter.by.tax.names <- filterTaxaByNames(FuentesIliGutData,
                                           ids = tax.select,
                                           keep = TRUE)

Check for polyphyly

Check for polyphyletic taxa in tax_table. Useful to check this before aggregating at any level. Here, for e.g. Eubacterium is is in both Lachnospiraceae and EUbacteriaceae family.

library(biomeUtils)
library(dplyr)
data("FuentesIliGutData")
polydf <- checkPolyphyletic(FuentesIliGutData,
                            taxa_level = "Genus",
                            return_df = TRUE)
polydf

Compare phyloseqs

library(biomeUtils)
data("FuentesIliGutData")
ps1 <- filterSampleData(FuentesIliGutData, ILI == "C")

ps2 <- filterSampleData(FuentesIliGutData, ILI == "L1")

ps3 <- filterSampleData(FuentesIliGutData, ILI == "L2")


ps.list <- c("C" = ps1, "L1" = ps2, "L2" = ps3)

comparePhyloseq(ps.list)

Calculations

Phylogenetic diversity

library(biomeUtils)
data("FuentesIliGutData")
# reduce size for example
ps1 <- filterSampleData(FuentesIliGutData, ILI == "C")

meta_tib <- calculatePD(ps1, justDF=TRUE)
# check
meta_tib[c(1,2,3),c("PD", "SR")]

Prevalence

data("FuentesIliGutData")
prev_tib <- getPrevalence(FuentesIliGutData,
                          return_rank= c("Family", "Genus"),
                          return_taxa = c("ASV4", "ASV17" , "ASV85", "ASV83"),
                          sort=TRUE)
head(prev_tib)

Microbiota Uniqueness

Extracts the minimum value from a \code{dissimilarity} matrix for each individual. This is the dissimilarity of an individual from their nearest neighbor. Here, the option of using a one or more reference samples is provided. see the man page. The original article (cite when using this) did all versus all samples dissimilarities. Wilmanski T, et al., (2021) Gut microbiome pattern reflects healthy ageing and predicts survival in humans. Nature metabolism

library(biomeUtils)
data("FuentesIliGutData")
ps <- getProportions(FuentesIliGutData)
dist.mat <- phyloseq::distance(ps, "bray")
muniq <- uniqueness(ps,
                    dist_mat=dist.mat,
                    reference_samples = NULL)
#head(muniq)

ggplot(muniq,aes(age,uniqueness)) +
  geom_point(alpha=0.5) +
  geom_smooth(method = "lm") +
  theme_bw()

Fold Difference

library(biomeUtils)

data("FuentesIliGutData")
# Keep only two groups
ps1 <- filterSampleData(FuentesIliGutData, ILI != "L2")

taxa_fd <- calculateTaxaFoldDifference(ps1, group="ILI")
# check
head(taxa_fd)

Melting

library(biomeUtils)
data("FuentesIliGutData")
ps <- filterSampleData(FuentesIliGutData, ILI != "L2")
ps <- phyloseq::rarefy_even_depth(ps)
dist.mat <- phyloseq::distance(ps, "bray")
dist.melt.sample <- meltDistanceToTable(ps,
                                        dist_mat = dist.mat,
                                        name_dist_column = "Bray-Curtis",
                                        select_cols = c("participant_id", "ILI"))
head(dist.melt.sample)

Pipe'ing' steps

Read %>% as and then
Below we use four R packages in combination to check for differences in ASVs between two groups.

library(biomeUtils)
library(ggplot2)
library(microbiome)
library(dplyr) # pipe function
data("FuentesIliGutData")

# Take FuentesIliGutData phyloseq object and then
FuentesIliGutData %>% 
  # join genus and species columns 
  uniteGenusSpeciesNames() %>% 
  # Remove 'L1' group and keep those with BMI less than 30 and then
  filterSampleData(ILI != "L1" & BMI < 30) %>% 
  # convert to rel. abundance and then
  microbiome::transform(., "compositional") %>% 
  # select ASVs with 0.0001 in at least 1% samples and then
  microbiome::core(., detection = 0.0001, prevalence = 0.01) %>% 
  # calculate foldchange and then 
  calculateTaxaFoldDifference(group="ILI")  %>% 
  # select major fold change ASVs and then plot 
  filter(abs(FoldDifference) >= 0.5 & 
           Prevalence.C != 0 &  # Remove ASVs not detected in Controls  
           Prevalence.L2 != 0) %>% # Remove ASVs not detected in L2
  # join genus name to FeatureID
  mutate(asv.genus = paste0(FeatureID, "-", Genus)) %>% 
  # Plot fold difference Notice that from here on below we use '+'
  ggplot(aes(FoldDifference, reorder(asv.genus, FoldDifference))) +
  geom_col(aes(fill=Enriched)) + 
  ylab("ASVs") + 
  scale_fill_manual(values = c( C = "steelblue", L2 = "brown3")) +
  theme_bw()
sessionInfo()


RIVM-IIV-Microbiome/biomeUtils documentation built on July 20, 2023, 10:29 a.m.