#' construct_lefse_table
#'
#' construct_lefse_table can construct a LEfSe-format OTU table.
#' See http://huttenhower.sph.harvard.edu/galaxy/ -> LEfSe -> Format Data for LEfSe for more details.
#'
#' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample metadata and
#' phylogenetic
#' tree.
#'
#' @param feature The column name of the feature you want to select. In final table, feature will be
#' the first row.
#'
#' @param level The coloumn name of the taxonomy level to select. Default is "all". If "all" then
#' retain all taxonomy level, else retain the taxonomy from Kingdom to selected level, drop
#' everything else. Level name should be one of c("all", "Kingdom", "Phylum", "Class", "Order",
#' "Family", "Genus", "Species"). Taxonomy will be seperated by "|".
#'
#' @export
#'
#' @examples
#' # minimum usage
#' construct_lefse_table(demo_phyloseq_object, feature = "diagnosis") %>%
#' .[1:20,1:5]
construct_lefse_table <- function(phyloseq, feature, level = "all") {
# Set options, prevent R turnning numeric value to factor
options(stringsAsFactors = FALSE)
# Read in phyloseq object
# Convert OTU to relative abundance
otu <- otu_table(phyloseq) %>% as.data.frame() %>%
convert_to_percentage() %>%
t() %>% as.data.frame() %>%
rownames_to_column(var = "OTU_ID")
taxa <- tax_table(phyloseq) %>% as.data.frame() %>%
rownames_to_column(var = "OTU_ID")
# Select taxa by given level
if (level == "all") {
levels <- base::colnames(taxa)[2:base::ncol(taxa)]
} else {
taxa <- taxa %>% dplyr::select(OTU_ID:!!level)
levels <- base::colnames(taxa)[2:base::ncol(taxa)]
}
# Extract levels name
levels <- base::colnames(taxa)[2:base::ncol(taxa)]
# Remove rows in taxa that is all NA
all_NA_taxa <- taxa %>% filter_all(all_vars(is.na(.)))
taxa <- taxa %>% dplyr::filter(!OTU_ID %in% all_NA_taxa$OTU_ID)
# Convert metadata and create lefse table
metadata <- extract_metadata_phyloseq(phyloseq = phyloseq, feature = feature)
lefse <- metadata %>% t() %>% as.data.frame()
base::colnames(lefse) <- lefse %>% .[rownames(.) == "SampleID",]
# Combine otu to lefse table
for(i in levels) {
taxa_tmp <- taxa %>%
as.data.frame() %>%
dplyr::select(1:which(base::colnames(taxa) == i)) %>%
filter_all(all_vars(!is.na(.)))
taxa_tmp <- taxa_tmp %>% tidyr::unite(Taxonomy, 2:base::ncol(taxa_tmp), sep = "|")
otu_tmp <- otu %>% dplyr::filter(OTU_ID %in% taxa_tmp$OTU_ID) %>%
left_join(taxa_tmp)
otu_tmp <- otu_tmp %>%
#group_by_() can pass variable to goup_by() function
group_by(Taxonomy) %>%
summarise_if(is.numeric, sum, na.rm=TRUE) %>%
column_to_rownames(var = "Taxonomy")
lefse <- base::unite(lefse, otu_tmp)
}
lefse <- rownames_to_column(lefse)
base::colnames(lefse) <- 1:base::ncol(lefse)
return(lefse)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.