## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
fig.width = 6, fig.height = 4,
echo = TRUE,
message = TRUE,
collapse = TRUE,
result = 'asis',
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(knitr)
library(OCMSutility)
library(ggplot2)
library(dplyr)
library(tibble)
## -----------------------------------------------------------------------------
data(asv_example)
# 49 ASVs and 296 samples
dim(asv_example)
# 49 ASVs, with taxonomic levels in columns
dim(tax_example)
colnames(tax_example)
## ----filterFeature------------------------------------------------------------
data(dss_example)
# put featureID as rownames
tax_df <- dss_example$merged_taxonomy
count_df <- dss_example$merged_abundance_id %>%
column_to_rownames('featureID')
# set features in count tax to be in same order
count_df <- count_df[tax_df$featureID,]
filtered_ls <- filterFeature(count_df, tax_df, 'percent_sample', 0.001, 2)
summary(filtered_ls)
filtered_count <- filtered_ls$filtered
dim(filtered_count)
kable(head(filtered_count[,1:4]))
## ----relab--------------------------------------------------------------------
# get example data
data(asv_example)
# rownames have to be features
asv_counts <- data.frame(asv_example[2:ncol(asv_example)], row.names=asv_example$sequence)
rel_abundance <- relab(asv_counts)
## ----aggregateCount-----------------------------------------------------------
data(dss_example)
# featureID should be row names
feature_count <- dss_example$merged_abundance_id %>%
tibble::column_to_rownames('featureID')
# cleanup sample names
colnames(feature_count) <- paste0('id', colnames(feature_count))
# taxonomy table must have columns 'Kingdom','Phylum',
# 'Class','Order','Family','Genus','Species'
# and feature IDs in rownames
feature_tax <- dss_example$merged_taxonomy
# set row order of count and tax tables to be the same
feature_count <- feature_count[feature_tax$featureID,]
aggregated_list <- aggregateCount(feature_count, feature_tax,
aggregate_by = "Family")
summary(aggregated_list)
knitr::kable(head(aggregated_list[['count_df']][,1:5]))
knitr::kable(head(aggregated_list[['tax_df']]))
## ----reannotateTax-example1---------------------------------------------------
ex1 <- data.frame(ASV = paste0("ASV", 1:5),
Order = "order1",
Family = c(paste0("family", c(1,1,2,3)), 'unclassified'),
Genus = c("unclassified", 'genus1','unclassified','genus2',
"unclassified"),
read_count = 10)
knitr::kable(ex1)
## ----reannotateTax-example2---------------------------------------------------
ex2 <- ex1[,c('ASV','Order')]
ex2$Family <- c(paste0("family", c(1,1,2,3)), 'order1_unclassified')
ex2$Genus <- c('family1_unclassified','genus1','family2_unclassified','genus2',
'order1_unclassified')
ex2$read_count <- 10
knitr::kable(ex2)
## ----reannotateTax------------------------------------------------------------
# showing the dummy example
old_tax <- ex1[,2:4]
old_tax$Kingdom <- 'kingdom1'
old_tax$Phylum <- 'phylum1'
old_tax$Class <- 'class1'
old_tax$Species <- 'unclassified'
old_tax <- old_tax[, c('Kingdom','Phylum','Class','Order','Family','Genus','Species')]
old_tax[old_tax == 'unclassified'] <- NA
knitr::kable(old_tax)
new_tax <- reannotateTax(old_tax)
knitr::kable(new_tax)
# try with example data
data(asv_example)
# adding Kingdom column; removing sequence column because don't need asv IDs in this example
old_tax <- tax_example
colnames(old_tax)[1] <- 'Kingdom'
old_tax$Kingdom <- 'Bacteria'
knitr::kable(head(old_tax))
new_tax <- reannotateTax(old_tax)
knitr::kable(head(new_tax))
## ----plotPCoA-----------------------------------------------------------------
data(dss_example)
met_df <- dss_example$metadata
count_df <- dss_example$merged_abundance_id %>%
column_to_rownames('featureID')
count_df <- count_df[,met_df$sampleID]
relab <- relab(count_df)
iter_var <- c('Genotype','Phenotype')
plist <- list()
for(i in iter_var) {
plist[[i]] <- plotPCoA(relab, met_df, colour = i)
}
plist[[1]]
plist[[2]]
## ----plotPCA------------------------------------------------------------------
# get example data
data(asv_example)
# rownames have to be features
asv_counts <- data.frame(asv_example[2:ncol(asv_example)], row.names=asv_example$sequence)
asv_transformed <- clr(count_dataframe = asv_counts, return_as_dataframe = TRUE)
# generate some random metadata for the 295 samples - 5 time points with each individual
# having a data point at each time point
metadata <- data.frame(Timepoint = c(rep("Time 1", 59),
rep("Time 2", 59),
rep("Time 3", 59),
rep("Time 4", 59),
rep("Time 5", 59)),
Individual = as.character(c(rep(c(1:59), 5))),
row.names=colnames(asv_transformed),
stringsAsFactors = FALSE)
metadata$ID <- rownames(metadata)
pca_result <- prcomp(t(asv_transformed), scale = TRUE)
plot_data <- plotPCA(pca_result, metadata, colourby='Timepoint')
plot_data$p
# modify default plot
add_meta <- merge(plot_data$pdata, metadata, by = 'row.names' )
col_val <- getPalette(5, "Set3")
p <- plot_data$p +
scale_colour_manual(values = col_val) + # pick own colours
scale_shape_manual(values=21, guide = FALSE) + # change shape and remove from legend
geom_text(data = add_meta, aes(x = PC1, y = PC2, label = ID)) # add text label
p
## ----getPalette---------------------------------------------------------------
getPalette(n=10, palette="Set3", preview=TRUE)
## ----clr----------------------------------------------------------------------
# rownames have to be features
asv_counts <- data.frame(asv_example[2:ncol(asv_example)], row.names=asv_example$sequence)
clr_transformed <- clr(count_dataframe = asv_counts, return_as_dataframe = TRUE)
# returns data frame with transformed abundance estamtes with imputed zeroes
class(clr_transformed)
dim(clr_transformed)
## ----clr_object---------------------------------------------------------------
clr_transformed <- clr(count_dataframe = asv_counts, return_as_dataframe = FALSE)
# returns ALDEx2 object
class(clr_transformed)
## ----featurebox---------------------------------------------------------------
# get example data
data(asv_example)
# rownames have to be features
asv_counts <- data.frame(asv_example[2:ncol(asv_example)], row.names=asv_example$sequence)
# for plotting purposes we would transform the data e.g. clr
asv_clr <- clr(asv_counts)
# generate some random metadata for the 295 samples - 5 groups for example
metadata <- data.frame(Group = c(rep("Group 1", 59),
rep("Group 2", 59),
rep("Group 3", 59),
rep("Group 4", 59),
rep("Group 5", 59)),
row.names=colnames(asv_clr))
# produce boxplot of random 4 features as an example grouping by Group variable
features <- sample(rownames(asv_clr), size=4)
featurebox(abundance_matrix=asv_clr, metadata=metadata, features=features, group_by="Group")
## ----featurebox_colour--------------------------------------------------------
featurebox(abundance_matrix=asv_clr, metadata=metadata, features=features, group_by="Group") +
scale_colour_manual(values=getPalette(n=5, palette="Set1"))
## ----dissimilarity------------------------------------------------------------
# get example data
data(asv_example)
# rownames have to be features
asv_counts <- data.frame(asv_example[2:ncol(asv_example)], row.names=asv_example$sequence)
asv_relab <- relab(asv_counts)
# generate some random metadata for the 295 samples - 5 time points with each individual
# having a data point at each time point
metadata <- data.frame(Timepoint = c(rep("Time 1", 59),
rep("Time 2", 59),
rep("Time 3", 59),
rep("Time 4", 59),
rep("Time 5", 59)),
Individual = as.character(c(rep(c(1:59), 5))),
row.names=colnames(asv_relab),
stringsAsFactors = FALSE)
# remove samples with NA
asv_relab <- asv_relab[,!(is.na(colSums(asv_relab)))]
# make sure they are the same
metadata <- metadata[colnames(asv_relab),]
# ask the question - Are individuals more similar to each other than samples are within timepoints?
# within-individual dissimilarity
within_diss <- dissimilarity(asv_relab, metadata=metadata, individual_variable = "Individual", method="within")
knitr::kable(head(within_diss))
# between-individual dissimilarity at timpoint 1
metadata_t1 <- metadata[metadata$Timepoint == "Time 1",]
asv_relab_t1 <- asv_relab[,rownames(metadata_t1)]
between_diss <- dissimilarity(asv_relab_t1, metadata=metadata_t1, method="between")
knitr::kable(head(between_diss))
# we can then combine and plot them
diss <- bind_rows(within_diss, between_diss)
ggplot(diss, aes(x=method, y=dissimilarity)) +
geom_violin() +
xlab("Dissimilarity type") +
ylab("Bray-Curtis dissimilarity") +
theme_bw()
## ----rarefaction--------------------------------------------------------------
# get example data
data(asv_example)
# rownames have to be features
asv_counts <- data.frame(asv_example[2:ncol(asv_example)], row.names=asv_example$sequence)
rarefaction <- rarefaction(asv_counts)
# default plot
p <- rarefaction$rare_p
p
# modify default plot -- remove geom_label_repel layer
p$layers[[2]] <- NULL
p
## ----plotSunburst-------------------------------------------------------------
data("dss_example")
# set count feature ids as rownames
count_df <- dss_example$merged_abundance_id %>%
column_to_rownames('featureID')
# clean up some sample names
colnames(count_df) <- paste0('id', colnames(count_df))
tax_df <- dss_example$merged_taxonomy
# aggregate counts
agg_gen <- aggregateCount(count_df[tax_df$featureID,], tax_df, "Genus")
count_genus <- agg_gen$count_df
# reannotate taxonomy
tax_genus <- reannotateTax(agg_gen$tax_df)
relab <- relab(count_genus)
# color specific phyla
plotSunburst(relab = NULL, tax = tax_genus,
palettes = c("Proteobacteria" = "Oranges",
"Bacteroidetes" = "Greens"))
# color specific phyla taking into account of relative abundance
plotSunburst(relab = relab, tax = tax_genus,
palettes = c("Proteobacteria" = "Oranges", "Bacteroidetes" = "Greens"))
# highlight specific genera
plotSunburst(relab = relab, tax = tax_genus,
palettes = c("Bacteroidetes" = "Greens",'Firmicutes'='Blues'),
highlight = list("Genus" = c("Bacteroides",'Clostridium XlVa')))
## ----convert-platemap, eval=FALSE---------------------------------------------
# plate_map <- convert_platemap(map_file = "my96wellplate.xlsx",
# map_range = 'A1:H12')
## ----convert-platemap-demo, echo = FALSE--------------------------------------
# example output of convert_platemap
data.frame(well_id = paste0(rep(LETTERS[1:8], each = 12), rep(1:12, 8)),
col = rep(1:12, 8),
row = rep(LETTERS[1:8], each = 12),
sample_name = paste0('sample_name', 1:96))
## ----truPosRate---------------------------------------------------------------
# this would be better exemplified with actual std data rather than the example samples
data("dss_example")
data(zymobiomics)
# set count feature ids as rownames
count_df <- dss_example$merged_abundance_id %>%
column_to_rownames('featureID')
# clean up some sample names
colnames(count_df) <- paste0('id', colnames(count_df))
tax_df <- dss_example$merged_taxonomy
# aggregate counts
agg_gen <- aggregateCount(count_df[tax_df$featureID,], tax_df, "Genus")
genus_relab <- relab(agg_gen$count_df)
true_pos_result <- truePosRate(relab=genus_relab,
annotations=zymobiomics$anno_ncbi_16s,
level='genus', cutoff=0.01)
# plot true pos rate
p <- ggplot(true_pos_result,
aes(x=rank, y=true.pos.rate, colour=label, group=sample)) +
geom_point() +
theme_bw() +
ylab("TP / (TP + FP)") +
scale_colour_manual(values=c("grey", "purple")) +
facet_wrap(~sample, scale="free")
p
## ----metfile_init, eval=FALSE-------------------------------------------------
# db_file <- "/path/to/db/file"
# met <- metfile_init(db_file, dummy = "Group")
#
## ----metadata-sparsity--------------------------------------------------------
set.seed(1)
# setting up example metadata dataframe
metadata_example <- data.frame(
sampleID = LETTERS[1:10],
group = c(rep(1:2, each = 3), rep(3, 4)),
age = c(rnorm(6, 30, 5), rep(NA, 4)),
sex = c(rep('F', 3), rep(NA, 4), rep('M', 3)),
ethnicity = sample(c(NA,1,2,3), 10, replace = TRUE),
medication = sample(c(NA,1,2), 10, replace=TRUE))
met_sparse <- metadata_sparsity(metadata_example)
summary(met_sparse)
met_sparse$na_tally
met_sparse[[3]]
met_sparse[[4]]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.