Nothing
#' @title
#' Create \code{microtable} object to store and manage all the basic files.
#'
#' @description
#' This class is a wrapper for a series of operations on the input files and basic manipulations,
#' including microtable object creation, data trimming, data filtering, rarefaction based on Paul et al. (2013) <doi:10.1371/journal.pone.0061217>, taxonomic abundance calculation,
#' alpha and beta diversity calculation based on the An et al. (2019) <doi:10.1016/j.geoderma.2018.09.035> and
#' Lozupone et al. (2005) <doi:10.1128/AEM.71.12.8228-8235.2005> and other basic operations.\cr
#' \cr
#' Online tutorial: \href{https://chiliubio.github.io/microeco_tutorial/}{https://chiliubio.github.io/microeco_tutorial/} \cr
#' Download tutorial: \href{https://github.com/ChiLiubio/microeco_tutorial/releases}{https://github.com/ChiLiubio/microeco_tutorial/releases}
#'
#' @export
microtable <- R6Class(classname = "microtable",
public = list(
#' @param otu_table data.frame; The feature abundance table; rownames are features (e.g. OTUs/ASVs/species/genes); column names are samples.
#' @param sample_table data.frame; default NULL; The sample information table; rownames are samples; columns are sample metadata;
#' If not provided, the function can generate a table automatically according to the sample names in otu_table.
#' @param tax_table data.frame; default NULL; The taxonomic information table; rownames are features; column names are taxonomic classes.
#' @param phylo_tree phylo; default NULL; The phylogenetic tree; use \code{read.tree} function in ape package for input.
#' @param rep_fasta \code{DNAStringSet} or \code{list} format; default NULL; The representative sequences;
#' use \code{read.fasta} function in \code{seqinr} package or \code{readDNAStringSet} function in \code{Biostrings} package for input.
#' @param auto_tidy default FALSE; Whether trim the files in the \code{microtable} object automatically;
#' If TRUE, running the functions in \code{microtable} class can invoke the \code{tidy_dataset} function automatically.
#' @return an object of class \code{microtable} with the following components:
#' \describe{
#' \item{\code{sample_table}}{The sample information table.}
#' \item{\code{otu_table}}{The feature table.}
#' \item{\code{tax_table}}{The taxonomic table.}
#' \item{\code{phylo_tree}}{The phylogenetic tree.}
#' \item{\code{rep_fasta}}{The representative sequence.}
#' \item{\code{taxa_abund}}{default NULL; use \code{cal_abund} function to calculate.}
#' \item{\code{alpha_diversity}}{default NULL; use \code{cal_alphadiv} function to calculate.}
#' \item{\code{beta_diversity}}{default NULL; use \code{cal_betadiv} function to calculate.}
#' }
#' @format microtable.
#' @examples
#' data(otu_table_16S)
#' data(taxonomy_table_16S)
#' data(sample_info_16S)
#' data(phylo_tree_16S)
#' m1 <- microtable$new(otu_table = otu_table_16S)
#' m1 <- microtable$new(sample_table = sample_info_16S, otu_table = otu_table_16S,
#' tax_table = taxonomy_table_16S, phylo_tree = phylo_tree_16S)
#' # trim the files in the dataset
#' m1$tidy_dataset()
initialize = function(otu_table, sample_table = NULL, tax_table = NULL, phylo_tree = NULL, rep_fasta = NULL, auto_tidy = FALSE)
{
if(missing(otu_table)){
stop("otu_table must be provided!")
}
if(!inherits(otu_table, "data.frame")){
stop("The input otu_table must be data.frame format!")
}
otu_table <- private$check_abund_table(otu_table)
self$otu_table <- otu_table
if(is.null(sample_table)){
message("No sample_table provided, automatically use colnames in otu_table to create one ...")
self$sample_table <- data.frame(SampleID = colnames(otu_table), Group = colnames(otu_table)) %>%
`row.names<-`(.$SampleID)
}else{
if(!inherits(sample_table, "data.frame")){
stop("The input sample_table must be data.frame format!")
}
self$sample_table <- sample_table
}
if(!is.null(tax_table)){
if(!inherits(tax_table, "data.frame")){
stop("The input tax_table must be data.frame format!")
}
}
if(!is.null(phylo_tree)){
if(!inherits(phylo_tree, "phylo")){
stop("The input phylo_tree must be phylo format! Please use read.tree function in ape package to read the phylogenetic tree!")
}
if(!ape::is.rooted(phylo_tree)){
phylo_tree <- ape::multi2di(phylo_tree)
}
}
self$tax_table <- tax_table
self$phylo_tree <- phylo_tree
self$rep_fasta <- rep_fasta
self$taxa_abund <- NULL
self$alpha_diversity <- NULL
self$beta_diversity <- NULL
self$auto_tidy <- auto_tidy
if(self$auto_tidy) self$tidy_dataset()
},
#' @description
#' Filter the features considered pollution in \code{microtable$tax_table}.
#' This operation will remove any line of the \code{microtable$tax_table} containing any the word in taxa parameter regardless of word case.
#'
#' @param taxa default \code{c("mitochondria", "chloroplast")}; filter mitochondria and chloroplast, or others as needed.
#' @return None
#' @examples
#' m1$filter_pollution(taxa = c("mitochondria", "chloroplast"))
filter_pollution = function(taxa = c("mitochondria", "chloroplast")){
if(is.null(self$tax_table)){
stop("The tax_table in the microtable object is NULL ! Please check it!")
}
tax_table_use <- self$tax_table
if(length(taxa) > 1){
taxa <- paste0(taxa, collapse = "|")
}
tax_table_use %<>% base::subset(unlist(lapply(data.frame(t(.)), function(x) !any(grepl(taxa, x, ignore.case = TRUE)))))
filter_num <- nrow(self$tax_table) - nrow(tax_table_use)
message("Total ", filter_num, " features are removed from tax_table ...")
self$tax_table <- tax_table_use
if(self$auto_tidy) self$tidy_dataset()
},
#' @description
#' Filter the feature with low abundance and/or low occurrence frequency.
#'
#' @param rel_abund default 0; the relative abundance threshold, such as 0.0001.
#' @param freq default 1; the occurrence frequency threshold.
#' For example, the number 2 represents filtering the feature that occurs less than 2 times.
#' A number smaller than 1 is also allowable.
#' For instance, the number 0.1 represents filtering the feature that occurs in less than 10\% samples.
#' @param include_lowest default TRUE; whether include the feature with the threshold.
#' @return None
#' @examples
#' \donttest{
#' d1 <- clone(m1)
#' d1$filter_taxa(rel_abund = 0.0001, freq = 0.2)
#' }
filter_taxa = function(rel_abund = 0, freq = 1, include_lowest = TRUE){
raw_otu_table <- self$otu_table
if(rel_abund != 0){
if(rel_abund >= 1){
stop("rel_abund must be smaller than 1!")
}
taxa_raw_abund <- self$taxa_sums()
taxa_rel_abund <- taxa_raw_abund/sum(taxa_raw_abund)
if(include_lowest){
abund_names <- taxa_rel_abund[taxa_rel_abund < rel_abund] %>% names
}else{
abund_names <- taxa_rel_abund[taxa_rel_abund <= rel_abund] %>% names
}
if(length(abund_names) == nrow(raw_otu_table)){
stop("No feature remained! Please check the rel_abund parameter!")
}
message(length(abund_names), " features filtered based on the abundance ...")
}else{
abund_names <- c()
}
if(freq != 0){
if(freq < 1){
message("freq smaller than 1; first convert it to an integer ...")
freq <- round(ncol(raw_otu_table) * freq)
message("Use converted freq integer: ", freq, " for the following filtering ...")
}
taxa_occur_num <- apply(raw_otu_table, 1, function(x){sum(x != 0)})
if(include_lowest){
freq_names <- taxa_occur_num[taxa_occur_num < freq] %>% names
}else{
freq_names <- taxa_occur_num[taxa_occur_num <= freq] %>% names
}
if(length(freq_names) == nrow(raw_otu_table)){
stop("No feature remained! Please check the freq parameter!")
}
message(length(freq_names), " features filtered based on the occurrence...")
}else{
freq_names <- c()
}
filter_names <- c(abund_names, freq_names) %>% unique
if(length(filter_names) == 0){
new_table <- raw_otu_table
}else{
if(length(filter_names) == nrow(raw_otu_table)){
stop("All features are filtered! Please adjust the parameters")
}
new_table <- raw_otu_table[! rownames(raw_otu_table) %in% filter_names, ]
}
self$otu_table <- new_table
self$tidy_dataset()
},
#' @description
#' Rarefy communities to make all samples have same feature number.
#'
#' @param method default c("rarefying", "SRS")[1]; "rarefying" represents the classical resampling like \code{\link{rrarefy}} function of \code{vegan} package.
#' "SRS" is scaling with ranked subsampling method based on the SRS package provided by Lukas Beule and Petr Karlovsky (2020) <DOI:10.7717/peerj.9593>.
#' @param sample.size default NULL; feature number, If not provided, use minimum number of all samples.
#' @param rngseed default 123; random seed.
#' @param replace default TRUE; see \code{\link{sample}} for the random sampling; Available when \code{method = "rarefying"}.
#' @return None; rarefied dataset.
#' @examples
#' \donttest{
#' m1$rarefy_samples(sample.size = min(m1$sample_sums()), replace = TRUE)
#' }
rarefy_samples = function(method = c("rarefying", "SRS")[1], sample.size = NULL, rngseed = 123, replace = TRUE){
set.seed(rngseed)
self$tidy_dataset()
method <- match.arg(method, c("rarefying", "SRS"))
if(is.null(sample.size)){
sample.size <- min(self$sample_sums())
message("Use the minimum number across samples: ", sample.size)
}
if(length(sample.size) > 1){
stop("Input sample.size had more than one value!")
}
if(sample.size <= 0){
stop("sample.size less than or equal to zero. Need positive sample size to work!")
}
if (max(self$sample_sums()) < sample.size){
stop("sample.size is larger than the maximum of sample sums, pleasure check input sample.size!")
}
if (min(self$sample_sums()) < sample.size) {
rmsamples <- self$sample_names()[self$sample_sums() < sample.size]
message(length(rmsamples), " samples removed, ", "because of fewer reads than input sample.size.")
self$sample_table <- base::subset(self$sample_table, ! self$sample_names() %in% rmsamples)
self$tidy_dataset()
}
newotu <- self$otu_table
if(method == "rarefying"){
newotu <- as.data.frame(apply(newotu, 2, private$rarefaction_subsample, sample.size = sample.size, replace = replace))
}else{
newotu <- SRS::SRS(newotu, Cmin = sample.size, set_seed = TRUE, seed = rngseed)
}
rownames(newotu) <- rownames(self$otu_table)
self$otu_table <- newotu
rmtaxa <- apply(newotu, 1, sum) %>% .[. == 0]
if(length(rmtaxa) > 0){
message(length(rmtaxa), " features are removed because they are no longer present in any sample after random subsampling ...")
self$tidy_dataset()
}
},
#' @description
#' Trim all the data in the \code{microtable} object to make taxa and samples consistent. So the results are intersections.
#'
#' @param main_data default FALSE; if TRUE, only basic data in \code{microtable} object is trimmed. Otherwise, all data,
#' including \code{taxa_abund}, \code{alpha_diversity} and \code{beta_diversity}, are all trimed.
#' @return None, object of \code{microtable} itself cleaned up.
#' @examples
#' m1$tidy_dataset(main_data = TRUE)
tidy_dataset = function(main_data = FALSE){
self <- private$tidy_samples(self)
self$otu_table %<>% {.[apply(., 1, sum) > 0, , drop = FALSE]}
taxa_list <- list(rownames(self$otu_table), rownames(self$tax_table), self$phylo_tree$tip.label) %>%
.[!unlist(lapply(., is.null))]
taxa_names <- Reduce(intersect, taxa_list)
if(length(taxa_names) == 0){
if(is.null(self$phylo_tree)){
stop("No same feature names found between rownames of otu_table and rownames of tax_table! Please check rownames of those tables!")
}else{
stop("No same feature name found among otu_table, tax_table and phylo_tree! Please check feature names in those objects!")
}
}
self$otu_table %<>% .[taxa_names, , drop = FALSE]
if(!is.null(self$tax_table)){
self$tax_table %<>% .[taxa_names, , drop = FALSE]
}
if(!is.null(self$phylo_tree)){
self$phylo_tree %<>% ape::drop.tip(., base::setdiff(.$tip.label, taxa_names))
}
if(!is.null(self$rep_fasta)){
if(!all(taxa_names %in% names(self$rep_fasta))){
stop("Some feature names are not found in the names of rep_fasta! Please provide a complete fasta file or manually check the names!")
}
self$rep_fasta %<>% .[taxa_names]
}
# check again whether a sample has 0 abundance after feature filtering
self <- private$tidy_samples(self)
if(!main_data){
sample_names <- rownames(self$sample_table)
if(!is.null(self$taxa_abund)){
self$taxa_abund %<>% lapply(., function(x) x[, sample_names, drop = FALSE])
}
if(!is.null(self$alpha_diversity)){
self$alpha_diversity %<>% .[sample_names, , drop = FALSE]
}
if(!is.null(self$beta_diversity)){
self$beta_diversity %<>% lapply(., function(x) x[sample_names, sample_names, drop = FALSE])
}
}
},
#' @description
#' Add the rownames of \code{microtable$tax_table} as its last column.
#' This is especially useful when the rownames of \code{microtable$tax_table} are required as a taxonomic level
#' for the taxonomic abundance calculation and biomarker idenfification.
#'
#' @param use_name default "OTU"; The column name used in the \code{tax_table}.
#' @return NULL, a new tax_table stored in the object.
#' @examples
#' \donttest{
#' m1$add_rownames2taxonomy()
#' }
add_rownames2taxonomy = function(use_name = "OTU"){
self$tidy_dataset()
if(is.null(self$tax_table)){
message("The tax_table in the microtable object is NULL! Create one ...")
tax_table_use <- data.frame(rownames(self$otu_table), stringsAsFactors = FALSE)
rownames(tax_table_use) <- rownames(self$otu_table)
}else{
tax_table_use <- self$tax_table
tax_table_use <- data.frame(tax_table_use, rownames(tax_table_use), check.names = FALSE, stringsAsFactors = FALSE)
if(use_name %in% colnames(tax_table_use)){
stop("The input use_name: ", use_name, " has been used in the raw tax_table! Please check it!")
}
}
colnames(tax_table_use)[ncol(tax_table_use)] <- use_name
message("Use ", use_name, " as the name of new column in tax_table ...")
self$tax_table <- tax_table_use
},
#' @description
#' Sum the species number for each sample.
#'
#' @return species number of samples.
#' @examples
#' \donttest{
#' m1$sample_sums()
#' }
sample_sums = function(){
colSums(self$otu_table)
},
#' @description
#' Sum the species number for each taxa.
#'
#' @return species number of taxa.
#' @examples
#' \donttest{
#' m1$taxa_sums()
#' }
taxa_sums = function(){
rowSums(self$otu_table)
},
#' @description
#' Show sample names.
#'
#' @return sample names.
#' @examples
#' \donttest{
#' m1$sample_names()
#' }
sample_names = function(){
rownames(self$sample_table)
},
#' @description
#' Show taxa names of tax_table.
#'
#' @return taxa names.
#' @examples
#' \donttest{
#' m1$taxa_names()
#' }
taxa_names = function(){
rownames(self$tax_table)
},
#' @description
#' Rename the features, including the rownames of \code{otu_table}, rownames of \code{tax_table}, tip labels of \code{phylo_tree} and \code{rep_fasta}.
#'
#' @param newname_prefix default "ASV_"; the prefix of new names; new names will be newname_prefix + numbers according to the rownames order of \code{otu_table}.
#' @return None; renamed dataset.
#' @examples
#' \donttest{
#' m1$rename_taxa()
#' }
rename_taxa = function(newname_prefix = "ASV_"){
self$tidy_dataset()
old_names <- rownames(self$otu_table)
new_names <- paste0(newname_prefix, seq_len(nrow(self$otu_table)))
rownames(self$otu_table) <- new_names
if(!is.null(self$tax_table)){
rownames(self$tax_table) <- new_names
}
if(!is.null(self$phylo_tree)){
self$phylo_tree$tip.label[match(old_names, self$phylo_tree$tip.label)] <- new_names
}
if(!is.null(self$rep_fasta)){
names(self$rep_fasta)[match(old_names, names(self$rep_fasta))] <- new_names
}
},
#' @description
#' Merge samples according to specific group to generate a new \code{microtable}.
#'
#' @param use_group the group column in \code{sample_table}.
#' @return a new merged microtable object.
#' @examples
#' \donttest{
#' m1$merge_samples(use_group = "Group")
#' }
merge_samples = function(use_group){
otu_table <- self$otu_table
sample_table <- self$sample_table
if(!is.null(self$tax_table)){
tax_table <- self$tax_table
}else{
tax_table <- NULL
}
if(!is.null(self$phylo_tree)){
phylo_tree <- self$phylo_tree
}else{
phylo_tree <- NULL
}
if(!is.null(self$rep_fasta)){
rep_fasta <- self$rep_fasta
}else{
rep_fasta <- NULL
}
otu_table_new <- rowsum(t(otu_table), as.factor(as.character(sample_table[, use_group]))) %>% t %>% as.data.frame
sample_table_new <- data.frame(SampleID = unique(as.character(sample_table[, use_group]))) %>% `row.names<-`(.[,1])
microtable$new(
sample_table = sample_table_new,
otu_table = otu_table_new,
tax_table = tax_table,
phylo_tree = phylo_tree,
rep_fasta = rep_fasta,
auto_tidy = self$auto_tidy
)
},
#' @description
#' Merge taxa according to specific taxonomic rank to generate a new \code{microtable}.
#'
#' @param taxa default "Genus"; the specific rank in \code{tax_table}.
#' @return a new merged \code{microtable} object.
#' @examples
#' \donttest{
#' m1$merge_taxa(taxa = "Genus")
#' }
merge_taxa = function(taxa = "Genus"){
check_tax_level(taxa, self)
ranknumber <- which(colnames(self$tax_table) %in% taxa)
sampleinfo <- self$sample_table
abund <- self$otu_table
tax <- self$tax_table
tax <- tax[, 1:ranknumber, drop=FALSE]
# concatenate taxonomy in case of duplicated taxa names
merged_taxonomy <- apply(tax, 1, paste, collapse = "|")
abund1 <- cbind.data.frame(Display = merged_taxonomy, abund) %>%
reshape2::melt(id.var = "Display", value.name= "Abundance", variable.name = "Sample")
abund1 <- data.table(abund1)[, sum_abund:=sum(Abundance), by=list(Display, Sample)] %>%
.[, c("Abundance"):=NULL] %>%
setkey(Display, Sample) %>%
unique() %>%
as.data.frame()
new_abund <- as.data.frame(data.table::dcast(data.table(abund1), Display~Sample, value.var= list("sum_abund"))) %>%
`row.names<-`(.[,1]) %>%
.[,-1, drop = FALSE]
new_abund <- new_abund[order(apply(new_abund, 1, mean), decreasing = TRUE), rownames(sampleinfo), drop = FALSE]
# choose OTU names with highest abundance to replace the long taxonomic information in names
name1 <- cbind.data.frame(otuname = rownames(tax), Display = merged_taxonomy, abundance = apply(abund[rownames(tax), ], 1, sum))
name1 <- data.table(name1)[, max_abund:=max(abundance), by = Display]
name1 <- name1[max_abund == abundance] %>%
.[, c("abundance", "max_abund"):=NULL] %>%
setkey(Display) %>%
unique() %>%
as.data.frame()
name1 <- name1[!duplicated(name1$Display), ] %>%
`row.names<-`(.$Display)
rownames(new_abund) <- name1[rownames(new_abund), "otuname"]
new_tax <- tax[rownames(new_abund), , drop = FALSE]
microtable$new(sample_table = sampleinfo, otu_table = new_abund, tax_table = new_tax, auto_tidy = self$auto_tidy)
},
#' @description
#' Save each basic data in microtable object as local file.
#'
#' @param dirpath default "basic_files"; directory to save the tables, phylogenetic tree and sequences in microtable object. It will be created if not found.
#' @param sep default ","; the field separator string, used to save tables. Same with \code{sep} parameter in \code{\link{write.table}} function.
#' default \code{','} correspond to the file name suffix 'csv'. The option \code{'\t'} correspond to the file name suffix 'tsv'. For other options, suffix are all 'txt'.
#' @param ... parameters passed to \code{\link{write.table}}.
#' @examples
#' \dontrun{
#' m1$save_table()
#' }
save_table = function(dirpath = "basic_files", sep = ",", ...){
if(!dir.exists(dirpath)){
dir.create(dirpath)
}
suffix <- switch(sep, ',' = "csv", '\t' = "tsv", "txt")
tmp <- self$otu_table
tmp <- data.frame(ID = rownames(tmp), tmp)
save_path <- paste0(dirpath, "/feature_table.", suffix)
write.table(tmp, file = save_path, row.names = FALSE, sep = sep, ...)
message('Save feature abundance to ', save_path, ' ...')
tmp <- self$sample_table
tmp <- data.frame(ID = rownames(tmp), tmp)
save_path <- paste0(dirpath, "/sample_table.", suffix)
write.table(tmp, file = save_path, row.names = FALSE, sep = sep, ...)
message('Save metadata to ', save_path, ' ...')
if(!is.null(self$tax_table)){
tmp <- self$tax_table
tmp <- data.frame(ID = rownames(tmp), tmp)
save_path <- paste0(dirpath, "/tax_table.", suffix)
write.table(tmp, file = save_path, row.names = FALSE, sep = sep, ...)
message('Save taxonomic information to ', save_path, ' ...')
}
if(!is.null(self$phylo_tree)){
tmp <- self$phylo_tree
save_path <- file.path(dirpath, "phylo_tree.tre")
ape::write.tree(tmp, file = save_path)
message('Save phylogenetic tree to ', save_path, ' ...')
}
if(!is.null(self$rep_fasta)){
tmp <- self$rep_fasta
save_path <- file.path(dirpath, "rep_fasta.fasta")
if(inherits(tmp, "list")){
seqinr::write.fasta(tmp, names = names(tmp), file.out = save_path)
}else{
if(inherits(tmp, "DNAStringSet")){
Biostrings::writeXStringSet(x = tmp, filepath = save_path)
}else{
stop("Unknown fasta format! Must be either list (from read.fasta of seqinr package) or DNAStringSet (from readDNAStringSet of Biostrings package)!")
}
}
message('Save sequences to ', save_path, ' ...')
}
},
#' @description
#' Calculate the taxonomic abundance at each taxonomic level or selected levels.
#'
#' @param select_cols default NULL; numeric vector or character vector of colnames of \code{microtable$tax_table};
#' applied to select columns to merge and calculate abundances according to ordered hierarchical levels.
#' This is very useful if there are commented columns or some columns with multiple structure that cannot be used directly.
#' @param rel default TRUE; if TRUE, relative abundance is used; if FALSE, absolute abundance (i.e. raw values) will be summed.
#' @param merge_by default "|"; the symbol to merge and concatenate taxonomic names of different levels.
#' @param split_group default FALSE; if TRUE, split the rows to multiple rows according to one or more columns in \code{tax_table}.
#' Very useful when multiple mapping information exist.
#' @param split_by default "&&"; Separator delimiting collapsed values; only useful when \code{split_group = TRUE};
#' see \code{sep} parameter in \code{separate_rows} function of tidyr package.
#' @param split_column default NULL; character vector or list; only useful when \code{split_group = TRUE}; character vector:
#' fixed column or columns used for the splitting in tax_table for each abundance calculation;
#' list: containing more character vectors to assign the column names to each calculation, such as list(c("Phylum"), c("Phylum", "Class")).
#' @return taxa_abund list in object.
#' @examples
#' \donttest{
#' m1$cal_abund()
#' }
cal_abund = function(
select_cols = NULL,
rel = TRUE,
merge_by = "|",
split_group = FALSE,
split_by = "&&",
split_column = NULL
){
taxa_abund <- list()
if(is.null(self$tax_table)){
stop("No tax_table found! Please check your data!")
}
if(nrow(self$tax_table) != nrow(self$otu_table)){
message("The row number of tax_table is not equal to that of otu_table ...")
message("Automatically applying tidy_dataset() function to trim the data ...")
self$tidy_dataset()
print(self)
}
if(nrow(self$sample_table) != ncol(self$otu_table)){
message("The sample numbers of sample_table is not equal to that of otu_table ...")
message("Automatically applying tidy_dataset() function to trim the data ...")
self$tidy_dataset()
print(self)
}
if(nrow(self$tax_table) == 0){
stop("0 row in tax_table! Please check your data!")
}
if(is.null(select_cols)){
select_cols <- seq_len(ncol(self$tax_table))
}else{
if(!is.numeric(select_cols)){
if(any(! select_cols %in% colnames(self$tax_table))){
stop("Part of input names of select_cols are not in the tax_table!")
}else{
select_cols <- match(select_cols, colnames(self$tax_table))
}
}
}
if(split_group){
if(is.null(split_column)){
stop("Spliting rows by one or more columns require split_column parameter! Please set split_column and try again!")
}
}
for(i in seq_along(select_cols)){
taxrank <- colnames(self$tax_table)[select_cols[i]]
# assign the columns used for the splitting
if(!is.null(split_column)){
if(is.list(split_column)){
use_split_column <- split_column[[i]]
}else{
use_split_column <- split_column
}
}
taxa_abund[[taxrank]] <- private$transform_data_proportion(
self,
columns = select_cols[1:i],
rel = rel,
merge_by = merge_by,
split_group = split_group,
split_by = split_by,
split_column = use_split_column)
}
self$taxa_abund <- taxa_abund
message('The result is stored in object$taxa_abund ...')
},
#' @description
#' Save taxonomic abundance as local file.
#'
#' @param dirpath default "taxa_abund"; directory to save the taxonomic abundance files. It will be created if not found.
#' @param merge_all default FALSE; Whether merge all tables into one. The merged file format is generally called 'mpa' style.
#' @param rm_un default FALSE; Whether remove unclassified taxa in which the name ends with '__' generally.
#' @param rm_pattern default "__$"; The pattern searched through the merged taxonomic names. See also \code{pattern} parameter in \code{\link{grepl}} function.
#' Only available when \code{rm_un = TRUE}. The default "__$" means removing the names end with '__'.
#' @param sep default ","; the field separator string. Same with \code{sep} parameter in \code{\link{write.table}} function.
#' default \code{','} correspond to the file name suffix 'csv'. The option \code{'\t'} correspond to the file name suffix 'tsv'. For other options, suffix are all 'txt'.
#' @param ... parameters passed to \code{\link{write.table}}.
#' @examples
#' \dontrun{
#' m1$save_abund(dirpath = "taxa_abund")
#' m1$save_abund(merge_all = TRUE, rm_un = TRUE, sep = "\t")
#' }
save_abund = function(dirpath = "taxa_abund", merge_all = FALSE, rm_un = FALSE, rm_pattern = "__$", sep = ",", ...){
if(!dir.exists(dirpath)){
dir.create(dirpath)
}
suffix <- switch(sep, ',' = "csv", '\t' = "tsv", "txt")
if(merge_all){
res <- data.frame()
for(i in names(self$taxa_abund)){
res %<>% rbind(., self$taxa_abund[[i]])
}
res <- data.frame(Taxa = rownames(res), res)
if(rm_un){
res %<>% .[!grepl(rm_pattern, .$Taxa), ]
}
save_path <- paste0(dirpath, "/mpa_abund.", suffix)
write.table(res, file = save_path, row.names = FALSE, sep = sep, ...)
message('Save abundance to ', save_path, ' ...')
}else{
for(i in names(self$taxa_abund)){
tmp <- self$taxa_abund[[i]]
if(rm_un){
tmp %<>% .[!grepl(rm_pattern, rownames(.)), ]
}
tmp <- data.frame(Taxa = rownames(tmp), tmp)
save_path <- paste0(dirpath, "/", i, "_abund.", suffix)
write.table(tmp, file = save_path, row.names = FALSE, sep = sep, ...)
}
}
},
#' @description
#' Calculate alpha diversity.
#'
#' @param measures default NULL; one or more indexes of \code{c("Observed", "Coverage", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher", "PD")};
#' If null, use all those measures. 'Shannon', 'Simpson' and 'InvSimpson' are calculated based on \code{vegan::diversity} function;
#' 'Chao1' and 'ACE' depend on the function \code{vegan::estimateR}; 'PD' depends on the function \code{picante::pd}.
#' @param PD default FALSE; whether Faith's phylogenetic diversity should be calculated.
#' @return alpha_diversity stored in object.
#' @examples
#' \donttest{
#' m1$cal_alphadiv(measures = NULL, PD = FALSE)
#' class(m1$alpha_diversity)
#' }
cal_alphadiv = function(measures = NULL, PD = FALSE){
# modified based on the alpha diversity analysis of phyloseq package
OTU <- as.data.frame(t(self$otu_table), check.names = FALSE)
renamevec <- c("Observed", "Coverage", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher")
names(renamevec) <- c("S.obs", "coverage", "S.chao1", "S.ACE", "shannon", "simpson", "invsimpson", "fisher")
if(is.null(measures)){
use_measures <- as.character(renamevec)
}else{
use_measures <- measures
}
if(any(use_measures %in% names(renamevec))){
use_measures[use_measures %in% names(renamevec)] <- renamevec[names(renamevec) %in% use_measures]
}
if(!any(use_measures %in% renamevec)){
stop("None of the `measures` you provided are supported. Try default `NULL` instead.")
}
outlist <- vector("list")
estimRmeas <- c("Chao1", "Observed", "ACE")
if(any(estimRmeas %in% use_measures)){
outlist <- c(outlist, list(t(data.frame(vegan::estimateR(OTU), check.names = FALSE))))
}
if("Shannon" %in% use_measures){
outlist <- c(outlist, list(shannon = vegan::diversity(OTU, index = "shannon")))
}
if("Simpson" %in% use_measures){
outlist <- c(outlist, list(simpson = vegan::diversity(OTU, index = "simpson")))
}
if("InvSimpson" %in% use_measures){
outlist <- c(outlist, list(invsimpson = vegan::diversity(OTU, index = "invsimpson")))
}
if("Fisher" %in% use_measures){
fisher <- tryCatch(vegan::fisher.alpha(OTU, se = TRUE),
warning = function(w){suppressWarnings(vegan::fisher.alpha(OTU, se = TRUE)[, c("alpha", "se")])},
error = function(e){c("Skip the index Fisher because of an error ...")}
)
if(!is.null(dim(fisher))) {
colnames(fisher)[1:2] <- c("Fisher", "se.fisher")
outlist <- c(outlist, list(fisher))
}else{
if(is.numeric(fisher)){
outlist <- c(outlist, Fisher = list(fisher))
}else{
message(fisher)
}
}
}
if("Coverage" %in% use_measures){
outlist <- c(outlist, list(coverage = private$goods(OTU)))
}
if(PD){
if(is.null(self$phylo_tree)){
stop("Please provide phylogenetic tree for PD calculation!")
}else{
outlist <- c(outlist, list(PD = picante::pd(OTU, self$phylo_tree)[, "PD", drop=TRUE]))
}
}
res <- do.call("cbind", outlist)
namechange <- base::intersect(colnames(res), names(renamevec))
colnames(res)[colnames(res) %in% namechange] <- renamevec[namechange]
self$alpha_diversity <- as.data.frame(res)
message('The result is stored in object$alpha_diversity ...')
},
#' @description
#' Save alpha diversity table to the computer.
#'
#' @param dirpath default "alpha_diversity"; directory name to save the alpha_diversity.csv file.
save_alphadiv = function(dirpath = "alpha_diversity"){
if(!dir.exists(dirpath)){
dir.create(dirpath)
}
write.csv(self$alpha_diversity, file = paste0(dirpath, "/", "alpha_diversity.csv"), row.names = TRUE)
},
#' @description
#' Calculate beta diversity, including Bray-Curtis, Jaccard, and UniFrac.
#' See An et al. (2019) <doi:10.1016/j.geoderma.2018.09.035> and Lozupone et al. (2005) <doi:10.1128/AEM.71.12.8228–8235.2005>.
#'
#' @param method default NULL; a character vector with one or more elements; If default, "bray" and "jaccard" will be used;
#' see \code{\link{vegdist}} function and \code{method} parameter in \code{vegan} package.
#' @param unifrac default FALSE; whether UniFrac index should be calculated.
#' @param binary default FALSE; TRUE is used for jaccard and unweighted unifrac; optional for other indexes.
#' @param ... parameters passed to \code{\link{vegdist}} function.
#' @return beta_diversity list stored in object.
#' @examples
#' \donttest{
#' m1$cal_betadiv(unifrac = FALSE)
#' class(m1$beta_diversity)
#' }
cal_betadiv = function(method = NULL, unifrac = FALSE, binary = FALSE, ...){
res <- list()
eco_table <- t(self$otu_table)
sample_table <- self$sample_table
if(is.null(method)){
method <- c("bray", "jaccard")
}
for(i in method){
if(i == "jaccard"){
binary_use <- TRUE
}else{
binary_use <- binary
}
res[[i]] <- as.matrix(vegan::vegdist(eco_table, method = i, binary = binary_use, ...))
}
if(unifrac == T){
if(is.null(self$phylo_tree)){
stop("No phylogenetic tree provided, please change the parameter unifrac to FALSE")
}
phylo_tree <- self$phylo_tree
unifrac1 <- GUniFrac::GUniFrac(eco_table, phylo_tree, alpha = c(0, 0.5, 1))
unifrac2 <- unifrac1$unifracs
wei_unifrac <- unifrac2[,, "d_1"]
res$wei_unifrac <- wei_unifrac
unwei_unifrac <- unifrac2[,, "d_UW"]
res$unwei_unifrac <- unwei_unifrac
}
self$beta_diversity <- res
message('The result is stored in object$beta_diversity ...')
},
#' @description
#' Save beta diversity matrix to the computer.
#'
#' @param dirpath default "beta_diversity"; directory name to save the beta diversity matrix files.
save_betadiv = function(dirpath = "beta_diversity"){
if(!dir.exists(dirpath)){
dir.create(dirpath)
}
for(i in names(self$beta_diversity)){
write.csv(self$beta_diversity[[i]], file = paste0(dirpath, "/", i, ".csv"), row.names = TRUE)
}
},
#' @description
#' Print the microtable object.
print = function(){
cat("microtable-class object:\n")
cat(paste("sample_table have", nrow(self$sample_table), "rows and", ncol(self$sample_table), "columns\n"))
cat(paste("otu_table have", nrow(self$otu_table), "rows and", ncol(self$otu_table), "columns\n"))
if(!is.null(self$tax_table)) cat(paste("tax_table have", nrow(self$tax_table), "rows and", ncol(self$tax_table), "columns\n"))
if(!is.null(self$phylo_tree)) cat(paste("phylo_tree have", length(self$phylo_tree$tip.label), "tips\n"))
if(!is.null(self$rep_fasta)) cat(paste("rep_fasta have", length(self$rep_fasta), "sequences\n"))
if(!is.null(self$taxa_abund)) cat(paste("Taxa abundance: calculated for", paste0(names(self$taxa_abund), collapse = ","), "\n"))
if(!is.null(self$alpha_diversity)) cat(paste("Alpha diversity: calculated for", paste0(colnames(self$alpha_diversity), collapse = ","), "\n"))
if(!is.null(self$beta_diversity)) cat(paste("Beta diversity: calculated for", paste0(names(self$beta_diversity), collapse = ","), "\n"))
invisible(self)
}
),
private = list(
# check and remove OTU or sample with 0 abundance
check_abund_table = function(otu_table){
if(!all(sapply(otu_table, is.numeric))){
stop("Some columns in otu_table are not numeric class! Please check the input data!")
}
if(any(apply(otu_table, 1, sum) == 0)){
remove_num <- sum(apply(otu_table, 1, sum) == 0)
message(remove_num, " taxa with 0 abundance are removed from the otu_table ...")
otu_table %<>% .[apply(., 1, sum) > 0, , drop = FALSE]
}
if(any(apply(otu_table, 2, sum) == 0)){
remove_num <- sum(apply(otu_table, 2, sum) == 0)
message(remove_num, " samples with 0 abundance are removed from the otu_table ...")
otu_table %<>% .[, apply(., 2, sum) > 0, drop = FALSE]
}
if(ncol(otu_table) == 0){
stop("No available sample! Please check the data!")
}
if(nrow(otu_table) == 0){
stop("No available taxon! Please check the data!")
}
otu_table
},
tidy_samples = function(microtable_obj){
microtable_obj$otu_table <- private$check_abund_table(microtable_obj$otu_table)
sample_names <- intersect(rownames(microtable_obj$sample_table), colnames(microtable_obj$otu_table))
if(length(sample_names) == 0){
stop("No same sample name found between rownames of sample_table and colnames of otu_table! ",
"Please first check whether the rownames of sample_table are sample names! Then check through the sample names of each table!")
}
# keep the sample order same with original sample table
sample_names <- rownames(microtable_obj$sample_table) %>% .[. %in% sample_names]
microtable_obj$sample_table %<>% .[sample_names, , drop = FALSE]
microtable_obj$otu_table %<>% .[ , sample_names, drop = FALSE]
microtable_obj
},
transform_data_proportion = function(
input,
columns,
rel,
merge_by = "|",
split_group = FALSE,
split_by = "&&",
split_column
){
sampleinfo <- input$sample_table
abund <- input$otu_table
tax <- input$tax_table
tax <- tax[, columns, drop = FALSE]
# split rows to multiple rows if multiple correspondence
if(split_group){
merge_abund <- cbind.data.frame(tax, abund)
split_merge_abund <- tidyr::separate_rows(merge_abund, all_of(split_column), sep = split_by)
new_tax <- split_merge_abund[, columns, drop = FALSE]
new_abund <- split_merge_abund[, (ncol(tax) + 1):(ncol(merge_abund)), drop = FALSE]
abund1 <- cbind.data.frame(Display = apply(new_tax, 1, paste, collapse = merge_by), new_abund)
}else{
abund1 <- cbind.data.frame(Display = apply(tax, 1, paste, collapse = merge_by), abund)
}
# first convert table to long format
abund1 <- abund1 %>%
data.table() %>%
data.table::melt(id.vars = "Display", value.name= "Abundance", variable.name = "Sample") %>%
.[, sum_abund:=sum(Abundance), by=list(Display, Sample)] %>%
.[, c("Abundance"):=NULL] %>%
setkey(Display, Sample) %>%
unique()
if(rel == T){
abund1 <- abund1[, res_abund:=sum_abund/sum(sum_abund), by=list(Sample)] %>%
.[, c("sum_abund"):=NULL]
}else{
colnames(abund1)[colnames(abund1) == "sum_abund"] <- "res_abund"
}
abund2 <- as.data.frame(data.table::dcast(abund1, Display ~ Sample, value.var = list("res_abund"))) %>%
`row.names<-`(.[,1]) %>%
.[,-1, drop = FALSE]
abund2 <- abund2[order(apply(abund2, 1, mean), decreasing = TRUE), rownames(sampleinfo), drop = FALSE]
abund2
},
rarefaction_subsample = function(x, sample.size, replace=FALSE){
# Adapted from the rarefy_even_depth() in phyloseq package
# All rights reserved.
rarvec <- numeric(length(x))
if(sum(x) <= 0){
# Protect against, and quickly return an empty vector,
return(rarvec)
}
if(replace){
suppressWarnings(subsample <- sample(1:length(x), sample.size, replace = TRUE, prob=x))
} else {
# resample without replacement
obsvec <- apply(data.frame(OTUi=1:length(x), times=x), 1, function(x){
rep_len(x["OTUi"], x["times"])
})
obsvec <- unlist(obsvec, use.names=FALSE)
suppressWarnings(subsample <- sample(obsvec, sample.size, replace = FALSE))
}
sstab <- table(subsample)
# Assign the tabulated random subsample values to the species vector
rarvec[as(names(sstab), "integer")] <- sstab
return(rarvec)
},
goods = function(com){
no.seqs <- rowSums(com)
sing <- com==1
no.sing <- apply(sing, 1, sum)
goods <- 1 - no.sing/no.seqs
goods
}
),
lock_objects = FALSE,
lock_class = FALSE
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.