#' Function to process 16S sequencing data and scale individual samples by microbial density, generating absolute abundances
#' @param biom_file .biom table in rhdf5 format. It is wise to use a pre-filtered table (for example, de-duplicated)
#' @param metadata_file the mapping file used in QIIME to split your sequencing library (must include microbial density data)
#' @param taxonomy_file a file containing taxonomy (e.g. output from assign_taxonomy.py in QIIME, or greengenes taxonomy)
#' @param filter OPTIONAL: minimum relative abundance to filter from data
#' @return a list containing OTU tables with and without taxonomy (at each given depth), both scaled and unscaled (the output is a list of lists).
#' @export
#'
individual_scale <- function(biom_file, metadata_file, taxonomy_file, filter) {
message("Reading Biom Table...\n")
# Read in biom table, begin manipulating
raw_biom_table <- biom::read_biom(biom_file)
biom_data <- methods::as(biom::biom_data(raw_biom_table), "matrix")
biom_only <- as.data.frame(t(biom_data))
colnames(biom_only) <- paste("OTU", colnames(biom_only), sep = "_")
biom_only$X.SampleID <- as.character(row.names(biom_only))
row.names(biom_only) <- NULL
message("Reading Metadata...\n")
# Read in metadata, QC for NA values of microbial density
metadata <- utils::read.delim(metadata_file)
if ("microbial_density" %in% colnames(metadata)) {
metadata$X.SampleID <- as.character(metadata$X.SampleID)
metadata <- metadata[!is.na(metadata$microbial_density), ]
} else {
stop("Microbial density data not in metadata file")
}
message("Making sure OTU data and metadata are properly aligned...\n")
# Make sure we are working with the right data, sort in same order:
biom_only <- biom_only[biom_only$X.SampleID %in% base::intersect(biom_only$X.SampleID, metadata$X.SampleID), ] %>%
dplyr::arrange(X.SampleID)
metadata <- metadata[metadata$X.SampleID %in% base::intersect(biom_only$X.SampleID, metadata$X.SampleID), ] %>%
dplyr::arrange(X.SampleID)
message("Reading Taxonomy...\n")
# Read in Taxonomy
taxonomy <- utils::read.delim(taxonomy_file, header = F)
taxonomy <- as.data.frame(apply(taxonomy, 2, function(x) gsub("\\s+", "", x)))
colnames(taxonomy) <- c("OTU_ID", "Taxon")
taxonomy$OTU_ID <- paste("OTU", taxonomy$OTU_ID, sep = "_")
taxonomy$OTU_ID <- as.character(taxonomy$OTU_ID)
message("Manipulating data...\n")
# Pick out otu data only
otus_only <- biom_only[, -length(names(biom_only))]
message("Scaling taxonomic abundances...\n")
# Create relative abundances and scale
pre_normalized <- otus_only/base::rowSums(otus_only)
if (missing(filter)) {
message("OTU table is NOT being filtered by relative abundance...\n")
normalized <- pre_normalized
} else {
message(paste("OTU table is being filtered to remove OTUs with relative abundance below", filter, "...\n", sep = " "))
max_otu_fraction <- apply(pre_normalized, 2, max)
normalized <- pre_normalized[, max_otu_fraction > filter]
}
if (ncol(normalized) == 0) {
stop("Filter is too strict, all OTUs filtered out!")
}
if (min(metadata$microbial_density) < 0) {
message('WARNING: Some microbial density values are less than zero, changing these values to 0.0001 to avoid errors.')
metadata$microbial_density[which(metadata$microbial_density<0)] <- 0.0001
}
scaled <- normalized * metadata$microbial_density
relative <- normalized * 100
scaled$X.SampleID <- as.character(metadata$X.SampleID)
relative$X.SampleID <- as.character(metadata$X.SampleID)
fraction_filtered <- 1 - (base::ncol(normalized)/base::ncol(pre_normalized))
message(paste("OTU table successfully filtered (", round(fraction_filtered, 3) * 100, "% of OTUs removed)...\n", sep = ""))
# Re-join metadata (may not be necessary to have added previously, but am concerned about not applying correct
# microbial density scaling to samples)
otus_scaled <- dplyr::left_join(metadata, scaled, by = "X.SampleID")
otus_relative <- dplyr::left_join(metadata, relative, by = "X.SampleID")
# Melt data to help create plots
melted <- reshape2::melt(data = scaled, id.vars = "X.SampleID", measure.vars = colnames(scaled)[-length(colnames(scaled))])
names(melted)[names(melted) == "variable"] <- "OTU_ID"
melted$OTU_ID <- as.character(melted$OTU_ID)
relative_melted <- reshape2::melt(data = relative, id.vars = "X.SampleID", measure.vars = colnames(relative)[-length(colnames(relative))])
names(relative_melted)[names(relative_melted) == "variable"] <- "OTU_ID"
relative_melted$OTU_ID <- as.character(relative_melted$OTU_ID)
message("Adding taxonomy and collapsing OTUs by taxonomy...\n")
# Add taxonomy
taxonomy_added <- dplyr::inner_join(melted, taxonomy, by = "OTU_ID")
relative_taxonomy_added <- dplyr::inner_join(relative_melted, taxonomy, by = "OTU_ID")
# Collapse taxonomy
taxonomy_collapsed <- taxonomy_added %>% dplyr::group_by(X.SampleID, Taxon) %>% dplyr::summarize(absolute_abundance = sum(value))
relative_taxonomy_collapsed <- relative_taxonomy_added %>% dplyr::group_by(X.SampleID, Taxon) %>% dplyr::summarize(relative_abundance = sum(value))
## Make tables at different levels of taxonomic depth
phylogeny <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
# Split taxonomy
split_tax <- taxonomy_collapsed %>% tidyr::separate(Taxon, phylogeny, sep = ";.__", remove = F, fill = "right")
split_tax$Kingdom <- "Bacteria"
relative_split_tax <- relative_taxonomy_collapsed %>% tidyr::separate(Taxon, phylogeny, sep = ";.__", remove = F,
fill = "right")
relative_split_tax$Kingdom <- "Bacteria"
melted_by_tax_scaled <- list()
compact_by_tax_scaled <- list()
melted_by_tax_relative <- list()
compact_by_tax_relative <- list()
message("Creating melted and compact tables by taxonomic depth...\n")
for (i in 1:length(phylogeny)) {
# Define a label for plotting
label <- paste(phylogeny[i - 1], phylogeny[i], sep = "__")
# Identify columns to add
dots <- lapply(phylogeny[1:i], as.symbol)
# Create table that collapses split taxonomy table by each depth, add label
table <- split_tax %>% dplyr::group_by(X.SampleID) %>% dplyr::group_by_(.dots = dots, add = T) %>% dplyr::summarise(abundance = sum(absolute_abundance))
if (i > 1) {
table$short_label <- paste(table[[phylogeny[i - 1]]], table[[phylogeny[i]]], sep = "__")
table <- table %>% tidyr::unite_("long_label", phylogeny[1:i], sep = "__", remove = F)
} else {
table$short_label <- table[[phylogeny[i]]]
table$long_label <- table[[phylogeny[i]]]
}
# Add info to table
table$Abundance_Type <- "Absolute"
table$Depth <- phylogeny[i]
# Add to output list
melted_by_tax_scaled[[phylogeny[i]]] <- table
# Create compact version
pre_compact_scaled <- reshape2::dcast(table, X.SampleID + Depth + Abundance_Type ~ long_label, value.var = "abundance")
compact_by_tax_scaled[[phylogeny[i]]] <- dplyr::left_join(metadata, pre_compact_scaled, by = "X.SampleID")
## Relative Abundance
rel_table <- relative_split_tax %>% dplyr::group_by(X.SampleID) %>% dplyr::group_by_(.dots = dots, add = T) %>%
dplyr::summarise(abundance = sum(relative_abundance))
if (i > 1) {
rel_table$short_label <- paste(rel_table[[phylogeny[i - 1]]], rel_table[[phylogeny[i]]], sep = "__")
rel_table <- rel_table %>% tidyr::unite_("long_label", phylogeny[1:i], sep = "__", remove = F)
} else {
rel_table$short_label <- rel_table[[phylogeny[i]]]
rel_table$long_label <- rel_table[[phylogeny[i]]]
}
rel_table$Abundance_Type <- "Relative"
rel_table$Depth <- phylogeny[i]
melted_by_tax_relative[[phylogeny[i]]] <- rel_table
pre_compact_relative <- reshape2::dcast(rel_table, X.SampleID + Depth + Abundance_Type ~ long_label, value.var = "abundance")
compact_by_tax_relative[[phylogeny[i]]] <- dplyr::left_join(metadata, pre_compact_relative, by = "X.SampleID")
}
return(list(scaled_otus = otus_scaled, relative_otus = otus_relative, melted_scaled_by_taxonomy = melted_by_tax_scaled,
melted_relative_by_taxonomy = melted_by_tax_relative, compact_scaled_by_taxonomy = compact_by_tax_scaled, compact_relative_by_taxonomy = compact_by_tax_relative, sample_metadata = metadata))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.