#' Make a rarefaction curve using ggplot2
#' @param physeq_object A phyloseq class object, from which abundance data are extracted
#' @param step Step Size for sample size in rarefaction curves
#' @param label Default `NULL`. Character string. The name of the variable to map to text labels on the plot. Similar to color option but for plotting text.
#' @param color Default `NULL`. Character string. The name of the variable to map to the colors in the plot. This can be a sample variables among the set returned by sample_variables(physeq_object) or taxonomic rank, among the set returned by rank_names(physeq_object)
#' @param plot default `TRUE`. Logical. Should the graph be plotted
#' @param parallel default `FALSE`. Logical. Should rarefaction be parallelized
#' @param se default `TRUE`. Logical. Should standard errors be calculated.
#' @examples
#' good_taxon_table <- data.frame(sum.taxonomy = c("a;b;c;d;f;u", "p;q;r;s;t;u"),
#' site_1 = c(0,1), site_2 = c(10, 20))
#' good_maps <- data.frame(site = c("site_1", "site_2"),
#' season = c("wet", "dry"), host = c("oak", "sage"))
#' physeq_object <- convert_anacapa_to_phyloseq(good_taxon_table, good_maps)
#' ggrare(physeq_object, step = 20, se = TRUE)
#' @export
ggrare <- function(physeq_object, step = 10, label = NULL, color = NULL, plot = TRUE, parallel = FALSE, se = TRUE) {
x <- methods::as(phyloseq::otu_table(physeq_object), "matrix")
if (phyloseq::taxa_are_rows(physeq_object)) { x <- t(x) }
## This script is adapted from vegan `rarecurve` function
tot <- rowSums(x)
S <- rowSums(x > 0)
nr <- nrow(x)
rarefun <- function(i) {
cat(paste("rarefying sample", rownames(x)[i]), sep = "\n")
n <- seq(1, tot[i], by = step)
if (n[length(n)] != tot[i]) {
n <- c(n, tot[i])
}
y <- vegan::rarefy(x[i, ,drop = FALSE], n, se = se)
if (nrow(y) != 1) {
rownames(y) <- c(".S", ".se")
return(data.frame(t(y), Size = n, Sample = rownames(x)[i]))
} else {
return(data.frame(.S = y[1, ], Size = n, Sample = rownames(x)[i]))
}
}
if (parallel) {
out <- parallel::mclapply(seq_len(nr), rarefun, mc.preschedule = FALSE)
} else {
out <- lapply(seq_len(nr), rarefun)
}
df <- do.call(rbind, out)
# Get sample data
if (!is.null(phyloseq::sample_data(physeq_object, FALSE))) {
sdf <- methods::as(phyloseq::sample_data(physeq_object), "data.frame")
sdf$Sample <- rownames(sdf)
data <- merge(df, sdf, by = "Sample")
labels <- data.frame(x = tot, y = S, Sample = rownames(x))
labels <- merge(labels, sdf, by = "Sample")
}
# Add, any custom-supplied plot-mapped variables
if ( length(color) > 1 ) {
data$color <- color
names(data)[names(data) == "color"] <- deparse(substitute(color))
color <- deparse(substitute(color))
}
if ( length(label) > 1 ) {
labels$label <- label
names(labels)[names(labels) == "label"] <- deparse(substitute(label))
label <- deparse(substitute(label))
}
p <- ggplot2::ggplot(data = data,
ggplot2::aes_string(x = "Size",
y = ".S",
group = "Sample",
color = color))
p <- p + ggplot2::labs(x = "Sequence Sample Size", y = "Species Richness")
if (!is.null(label)) {
p <- p + ggplot2::geom_text(data = labels,
ggplot2::aes_string(x = "x",
y = "y",
label = label,
color = color),
size = 4, hjust = 0)
}
p <- p + ggplot2::geom_line()
if (se) { ## add standard error if available
p <- p +
ggplot2::geom_ribbon(ggplot2::aes_string(ymin = ".S - .se",
ymax = ".S + .se",
color = NULL,
fill = color),
alpha = 0.2)
}
if (plot) {
plot(p)
}
invisible(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.