# Author: Walter Xie, Andrew Dopheide
# Accessed on 12 Sep 2016
#' @name CMStatistics
#' @title Statistics to one or multipe community matrix
#'
#' @description Statistics to one or multipe community matrix,
#' such as number of reads, OTUs, etc.
#' Jost diversity is also included in the summary.
#'
#' @details \code{summaryCM.Vector} return a named vector of summary of the
#' community matrix, where \code{community.matrix} can be one column only.
#' The vector is c("reads","OTUs","samples","Shannon","singletons","doubletons").
#'
#' @param digits The digits to \code{\link{round}} decimal places
#' if number is not interger. Default to 2.
#' @keywords community matrix
#' @export
#' @examples
#' summary.cm.vector <- summaryCM.Vector(community.matrix)
#'
#' @rdname CMStatistics
summaryCM.Vector <- function(community.matrix) {
suppressMessages(require(vegetarian))
samples <- ncol(community.matrix)
otus <- nrow(community.matrix)
reads <- sum(community.matrix)
if (! is.null(samples)) {
rs <- rowSums(community.matrix)
singletons <- sum(rs==1)
doubletons <- sum(rs==2)
min.otu.abun <- min(rs)
max.otu.abun <- max(rs)
cs <- colSums(community.matrix)
min.sample.abun <- min(cs)
max.sample.abun <- max(cs)
} else {
samples <- 1
otus <- length(community.matrix)
singletons <- NA
doubletons <- NA
min.otu.abun <- min(community.matrix)
max.otu.abun <- max(community.matrix)
min.sample.abun <- NA
max.sample.abun <- NA
}
shannon <- d(transposeDF(community.matrix),lev="gamma",q=1)
summary.cm <- c(reads, otus, samples, singletons, doubletons, max.otu.abun,
min.otu.abun, max.sample.abun, min.sample.abun)
names(summary.cm) <- c("reads", "OTUs", "samples", "singletons", "doubletons",
"max.OTU.abun","min.OTU.abun","max.sample.abun","min.sample.abun")
return(summary.cm)
}
#' @details \code{summaryCM} summarizes only single community matrix.
#'
#' @param has.total If 0, then only return abudence by samples (columns) of community matrix.
#' If 1, then only return toal abudence. If 2, then return abudence by samples (columns) and total.
#' Default to 1.
#' @param most.abund The threshold to define the number of the most abundent OTUs.
#' @param pretty.numbers Default to TRUE to make numbers look pretty,
#' but they will be hard to convert to numeric type.
#' @param x.lab,y.lab,abundance.lab The default text for "sample", "OTU", and "read".
#' @keywords community matrix
#' @export
#' @examples
#' summary.cm <- summaryCM(community.matrix)
#'
#' @rdname CMStatistics
summaryCM <- function(community.matrix, most.abund, has.total=1, digits=2, pretty.numbers=TRUE,
x.lab="sample", y.lab="OTU", abundance.lab="read") {
summary.row.names <- c(ComMA::getPlural(abundance.lab, y.lab, x.lab),"singletons", "doubletons",
paste("max",y.lab,"abundance",sep="."), paste("min",y.lab,"abundance",sep="."),
paste("max",x.lab,"abundance",sep="."), paste("min",x.lab,"abundance",sep="."))
summary.cm <- data.frame(row.names = summary.row.names, stringsAsFactors=FALSE, check.names=FALSE)
if (has.total != 1) {
for (col.name in colnames(community.matrix))
summary.cm[,col.name] <- ComMA::summaryCM.Vector(community.matrix[,col.name])
}
if (has.total > 0) {
summary.cm[,"total"] <- ComMA::summaryCM.Vector(community.matrix)
if (!missing(most.abund)) {
if (most.abund > nrow(community.matrix))
most.abund <- nrow(community.matrix)
cat("Set most abundent OTUs threshold =", most.abund, ".\n")
community.matrix <- community.matrix[order(rs, decreasing=TRUE),]
cm <- community.matrix[1:most.abund,]
col.name <- paste0("most.abund.", most.abund, ".otus)")
summary.cm[,col.name] <- ComMA::summaryCM.Vector(cm)
}
}
if (pretty.numbers)
summary.cm <- ComMA::prettyNumbers(summary.cm, digits=digits)
return(summary.cm)
}
#' @details \code{summaryOTUs} returns a data frame of
#' OTU clustering summary given one or multiple community matrix(matrices).
#' The summary is created by \code{\link{summaryCM.Vector}}.
#'
#' @param input.list Default to TRUE to unwrap list(...) to
#' get the actual list if the input is a list of cm.
#' @param row.names The row names in the summary.
#' Default to "reads", "OTUs", "samples", "singletons", "doubletons",
#' "max.OTU.abun","min.OTU.abun","max.sample.abun","min.sample.abun".
#' @keywords community matrix
#' @export
#' @examples
#' otu.stats <- summaryOTUs(cm)
#' # cm.list is a list of cm
#' otu.stats <- ComMA::summaryOTUs(cm.list, input.list=T)
#'
#' @rdname CMStatistics
summaryOTUs <- function(..., digits=2, input.list=FALSE, pretty.numbers=TRUE,
x.lab="sample", y.lab="OTU", abundance.lab="read") {
cm.list <- unwrapInputList(..., input.list=input.list)
cat("Summarize OTUs on", length(cm.list), "data sets.\n")
summary.row.names <- c(ComMA::getPlural(abundance.lab, y.lab, x.lab),"singleton OTU", "doubleton OTU",
paste("max",y.lab,"abundance",sep="."), paste("min",y.lab,"abundance",sep="."),
paste("max",x.lab,"abundance",sep="."), paste("min",x.lab,"abundance",sep="."))
otu.stats.list <- lapply(cm.list, ComMA::summaryCM.Vector)
otu.stats <- data.frame(t(do.call("rbind", otu.stats.list)), check.names = F)
if (is.null(names(cm.list)))
colnames(otu.stats) <- paste0("dataset", 1:length(cm.list))
rownames(otu.stats) <- summary.row.names
# add new row
otu.stats["non singletons",] <- as.numeric(otu.stats["OTUs",])-as.numeric(otu.stats["singleton OTU",])
if (pretty.numbers)
otu.stats <- ComMA::prettyNumbers(otu.stats, digits=digits)
return(otu.stats)
}
#' @details \code{summaryDiversity} returns a data frame of
#' OTU clustering summary given a list of community matrix.
#' The community matrix is transposed to an input of
#' \code{\link{d}} {vegetarian}, and the summary is
#' created by \code{\link{diversityTable}}.
#'
#' @param row.names The row names in the summary.
#' Default to "$^0D_\\gamma$","$^0D_\\alpha$","$^0D_\\beta$",
#' "$^1D_\\gamma$","$^1D_\\alpha$", "$^1D_\\beta$",
#' "$^2D_\\gamma$","$^2D_\\alpha$","$^2D_\\beta$".
#' @param row.order The same row indices of \code{row names},
#' but by a given order. Default to c().
#' @keywords community matrix
#' @export
#' @examples
#' div.stats <- summaryDiversity(cm, row.order=c(2,5,8,3,6,9,1,4,7))
#'
#' @rdname CMStatistics
#TODO rewrite like summaryOTUs
summaryDiversity <- function(..., row.order=c(), digits=2, input.list=FALSE, pretty.numbers=TRUE, verbose=TRUE,
row.names=c("$^0D_\\gamma$","$^0D_\\alpha$","$^0D_\\beta$","$^1D_\\gamma$","$^1D_\\alpha$",
"$^1D_\\beta$","$^2D_\\gamma$","$^2D_\\alpha$","$^2D_\\beta$")) {
cm.list <- unwrapInputList(..., input.list=input.list)
cat("Summarize Jost diversities on", length(cm.list), "data sets.\n")
div.stats <- data.frame(stringsAsFactors=FALSE, check.names=FALSE)
for (data.id in 1:length(cm.list)) {
t.cm <- ComMA::transposeDF(cm.list[[data.id]])
# gamma(q=0), alpha(q=0), beta(q=0), gamma(q=1), ..., beta(q=2)
diversity.v <- ComMA::diversityTable(t.cm, named.vector=T)
i.vec <- 1:length(diversity.v)
if (length(row.order) == length(row.names)) {
if (! all( 1:length(row.names) == sort(row.order) ) )
stop("Invalid vector row.order !")
# reorder to match div.row.names
i.vec <- row.order
}
col.name <- names(cm.list)[data.id]
if (is.null(col.name))
col.name <- paste("data",data.id,sep=".")
for (i in i.vec)
div.stats[row.names[i], col.name] <- diversity.v[i]
if (verbose)
cat("Add column", col.name, "to data set", data.id, ".\n")
}
if (pretty.numbers)
div.stats <- ComMA::prettyNumbers(div.stats, digits=digits)
return(div.stats)
}
#' @details \code{summaryTaxaGroup} returns two data frames of
#' taxonomic composition of given one or multiple merged
#' community matrix(matrices) with taxa table,
#' which is created by \code{\link{mergeCMTaxa}}.
#' The 1st data frame \code{otus} summarizes the OTUs
#' assigned to each taxa group.
#' The 2nd data frame \code{rank.count} summarizes the \code{count.rank}
#' assigned to each taxa group.
#'
#' The \code{group.rank} determines which taxonomic rank is based on summary.
#' It has to be consistent with given \code{taxa.group}. But there is a trick,
#' if you want to count the number of OTUs that are identified as EUKARYOTA
#' but aren't identified to Kingdom level, for example,
#' you can set group.rank="kingdom" to get that number.
#'
#' @param unclassified Refere to \code{\link{assignTaxaByRank}},
#' default to 3 to remove every rows containing "unclassified".
#' @param taxa.group The row names in the summary.
#' Default to "ARCHAEA", "BACTERIA", "CHROMISTA", "PROTOZOA",
#' "FUNGI", "PLANTAE", "ANIMALIA".
#' @param group.rank The rank of given taxa groups,
#' which determines the column name (rank) to create the subset.
#' @param count.rank The lower rank to be counted for each taxa group.
#' Set NA to ignore this count.
#' @keywords community matrix
#' @export
#' @examples
#' ta.gr.stats <- summaryTaxaGroup(cm.taxa)
#' ta.gr.stats$otus
#' ta.gr.stats$rank.count
#' # OTUs that were only identified to high-level EUKARYOTA ranks
#' ta.gr.stats <- summaryTaxaGroup(cm.taxa, group.rank="kingdom")
#'
#' @rdname CMStatistics
summaryTaxaGroup <- function(..., input.list=FALSE, unclassified=3, pretty.numbers=TRUE,
taxa.group=c("ARCHAEA", "BACTERIA", "CHROMISTA", "PROTOZOA", "FUNGI", "PLANTAE", "ANIMALIA"),
group.rank="kingdom", count.rank="phylum", digits = 0) {
cm.taxa.list <- unwrapInputList(..., input.list=input.list)
cat("Summarize OTUs by taxa group on", length(cm.taxa.list), "data sets.\n")
# data frame for statistics
tg.reads <- data.frame(row.names = taxa.group, check.names=FALSE)
tg.otus <- data.frame(row.names = taxa.group, check.names=FALSE)
tg.rank.count <- NA
if (!is.na(count.rank))
tg.rank.count <- data.frame(row.names = taxa.group, check.names=FALSE)
count.rank.df.list <- list()
for (data.id in 1:length(cm.taxa.list)) {
col.name <- names(cm.taxa.list)[data.id]
if (is.null(col.name))
col.name <- paste("data",data.id,sep=".")
for (taxa.id in 1:length(taxa.group)) {
cat("Data set", col.name, "taxonomic group", taxa.group[taxa.id], ".\n")
cm.taxa.sub <- ComMA::subsetTaxaTable(cm.taxa.list[[data.id]], taxa.group=taxa.group[taxa.id], rank=group.rank)
taxa.assign.reads <- list()
if (nrow(cm.taxa.sub) > 1) {
taxa.assign.otu <- ComMA::assignTaxaByRank(cm.taxa.sub, unclassified=unclassified, aggre.FUN=function(x) sum(x>0))
taxa.assign.reads <- ComMA::assignTaxaByRank(cm.taxa.sub, unclassified=unclassified)
}
if (is.null(nrow(taxa.assign.reads[[group.rank]])) || nrow(taxa.assign.reads[[group.rank]]) < 1) {
tg.reads[taxa.group[taxa.id],col.name] <- 0
tg.otus[taxa.group[taxa.id],col.name] <- 0
if (!is.na(count.rank))
tg.rank.count[taxa.group[taxa.id],col.name] <- 0
} else {
tg.otus[taxa.group[taxa.id],col.name] <- sum(taxa.assign.otu[[group.rank]])
tg.reads[taxa.group[taxa.id],col.name] <- sum(taxa.assign.reads[[group.rank]])
if (!is.na(count.rank)) {
tg.rank.count[taxa.group[taxa.id],col.name] <- nrow(taxa.assign.otu[[count.rank]])
# record taxa in count.rank
df.name <- paste(col.name, taxa.group[taxa.id], sep = ".")
count.rank.df.list[["reads"]][[df.name]] <- taxa.assign.reads[[count.rank]]
count.rank.df.list[["OTUs"]][[df.name]] <- taxa.assign.otu[[count.rank]]
}
}
} # for taxa.id
} # for data.id
if (pretty.numbers) {
tg.otus <- ComMA::prettyNumbers(tg.otus, digits = digits)
tg.reads <- ComMA::prettyNumbers(tg.reads, digits = digits)
if (!is.na(count.rank))
tg.rank.count <- ComMA::prettyNumbers(tg.rank.count, digits = digits)
}
list(otus=tg.otus, rank.count=tg.rank.count, reads=tg.reads, taxa.group=taxa.group, group.rank=group.rank,
count.rank=count.rank, count.rank.df.list=count.rank.df.list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.