estimate_prevalence <- function(physeq, group, format = c("long", "wide")) {
## Args:
## - physeq: phyloseq class object, otu abundances are extracted from this object
## - group: Either the a single character string matching a
## variable name in the corresponding sample_data of ‘physeq’, or a
## factor with the same length as the number of samples in ‘physeq’.
## - format: long (default) or wide. Should the result be organised as a long data.frame
## with columns "otu", "group", "prevalence" and "specificity" or a numeric matrix
## with otus in rows and group specificty/group prevalence in column.
##
## Returns;
## data frame with components
## - prevalence: observed prevalence in group
## - specifity : specifity (prevalence / global prevalence) in group
## - group: factor level
## - otu: otu
## or
## matrix m of size ntaxa(physeq) times 2*N where N is the number of levels (groups) in group
## and
## - m[i, g] is the prevalence of otu i in group g
## - m[i, N+g] is the specificity of otu i to group g
## Get grouping factor
if (!is.null(sample_data(physeq, FALSE))) {
if (class(group) == "character" & length(group) == 1) {
x1 <- data.frame(sample_data(physeq))
if (!group %in% colnames(x1)) {
stop("group not found among sample variable names.")
}
group <- x1[, group]
}
}
if (class(group) != "factor") {
group <- factor(group)
}
## Construct relative abundances by sample
tdf <- as(otu_table(physeq), "matrix")
if (!taxa_are_rows(physeq)) { tdf <- t(tdf) }
tdf <- 0 + (tdf > 0)
## prevalence
frac <- t(rowsum(t(tdf), group, reorder = TRUE)) / matrix(rep(table(group), each = nrow(tdf)),
nrow = nrow(tdf))
## specificity
spec <- t(rowsum(t(tdf), group, reorder = TRUE)) / rowSums(tdf)
spec[rowSums(tdf) == 0, ] <- 0
## cbind prevalence and specificity
res <- merge(melt(frac, varnames = c("otu", "group"), value.name = "prevalence"),
melt(spec, varnames = c("otu", "group"), value.name = "specificity"))
recast_prevalence <- function() {
prev <- acast(prevalence, otu ~ group, value.var = "prevalence")
colnames(prev) <- paste("prev", colnames(prev), sep = "_")
spec <- acast(prevalence, otu ~ group, value.var = "specificity")
colnames(prev) <- paste("spec", colnames(spec), sep = "_")
return(cbind(prev, spec))
}
format <- match.arg(format)
if (format == "wide") {
res <- recast_prevalence()
}
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.