#' construct_otu_table
#'
#' construct_otu_table can construct a OTU table with a phyloseq object.
#'
#' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample metadata and
#' phylogenetic tree.
#'
#' @param level The coloumn name of the level wanted to select. Default is "all". If "all" then
#' retain all taxonomy level, else ONLY retain the given taxonomy level, drop everything else. Level
#' name should be one of "all", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species".
#'
#' @param relative_abundance Turn OTU table into relative abundance or not. Default is FALSE. If
#' TRUE, the raw count value will be converted to relative abundance and total sum is 1.
#'
#' @param hundred_percent Default is FALSE, if TRUE, turn relative abundance to 100 percent, total
#' sum is 100.
#'
#' @export
#'
#' @examples
#' construct_otu_table(demo_phyloseq_object, level = "Genus") %>% .[,1:5]
construct_otu_table <- function(
phyloseq,
level = "all",
relative_abundance = FALSE,
hundred_percent = FALSE
) {
# Set options, prevent R turnning numeric value to factor
options(stringsAsFactors = FALSE)
# Check if input 'level' is correct
if (!level %in% c("all", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")) {
stop(paste0('Argument "level" should be one of "all", "Kingdom", "Phylum", "Class", "Order", ',
'"Family", "Genus", "Species".'))
}
# Read in sequence table and taxonomy table from phyloseq
if (!taxa_are_rows(phyloseq)) {
otu <- otu_table(phyloseq) %>% as.data.frame() %>% t() %>%
as.data.frame() %>% rownames_to_column(var = "OTU_ID")
} else {
otu <- otu_table(phyloseq) %>% as.data.frame() %>%
as.data.frame() %>% rownames_to_column(var = "OTU_ID")
}
taxa <- tax_table(phyloseq) %>% as.data.frame() %>%
rownames_to_column(var = "OTU_ID")
## Step 1: Clean sequence table and taxonomy table
# If a sequence's taxonomy is all NA, remove it
all_NA_taxa <- taxa %>% filter_all(all_vars(is.na(.)))
taxa <- taxa %>% dplyr::filter(!OTU_ID %in% all_NA_taxa$OTU_ID)
## Step 2: Construct OTU table
if (level == "all") {
# If level == "all", collapse all taxonomy levels and separate by "; "
level <- "Taxonomy"
taxa <- taxa %>% tidyr::unite(Taxonomy, 2:ncol(taxa), sep = "; ")
} else {
# If level != "all", select the given taxonomy level
taxa <- taxa %>% dplyr::select(OTU_ID, level) %>% dplyr::filter(., !is.na(.[[level]]))
otu <- otu %>% dplyr::filter(OTU_ID %in% taxa$OTU_ID)
}
## Step 3: Merge sequence table and taxonomy table
otu <- left_join(otu, taxa)
## Step 4: Add abundance of those have the same taxonomy names and convert it to data frame
otu <- otu %>%
# replace group_by_() with group_by()
# See https://stackoverflow.com/questions/38220963/how-to-pass-a-variable-name-in-group-by
group_by(!!!rlang::syms(level)) %>%
summarise_if(is.numeric, sum, na.rm = TRUE) %>%
filter_if(is.numeric, any_vars(. != 0)) %>%
column_to_rownames(var = level) %>%
t() %>%
as.data.frame()
if(relative_abundance) {
if(hundred_percent) {
otu <- convert_to_percentage(otu, row_sum = TRUE, hundred_percent = T)
} else {
otu <- convert_to_percentage(otu, row_sum = TRUE)
}
}
if(level == "Taxonomy") {
# 转换成适合阅读和导出excel的格式:
# colnames为taxa level + SampleID,没有rownames(taxa_are_rows = TRUE)
otu <- otu %>% t() %>% as.data.frame() %>% tibble::rownames_to_column("Taxonomy") %>%
tidyr::separate(
Taxonomy, c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"), "; "
)
}
return(otu)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.