#' Calculates the generalized UniFrac distances for an agglomerated dataframe.
#' This involves a random rooting of the tree and rarefaction, which are
#' non-deterministic. For reproducibility, recommend calling set.seed before this
#' @param agg agglomerated dataframe
#' @param s sample metadata dataframe
#' @param tree a tree (will root using random node)
#' @param alpha the alpha parameter for GUniFrac
MakeUnifracData <- function(agg, s, tree, rarefy.depth, alpha=0.5) {
# Root tree
.rtree <- root_tree(tree, agg$otu)
# Create sample matrix
.mat <- counts_matrix(agg)
# Keep only otus that appear in the tree
.mat <- .mat[, colnames(.mat) %in% .rtree$tip.label]
# Rarefy to specified depth
if (!missing(rarefy.depth)) {
.mat.rf <- vegan::rrarefy(.mat, sample=rarefy.depth)
} else {
.mat.rf <- .mat
}
# Calculate generalized UniFrac
.unifrac <- GUniFrac::GUniFrac(.mat.rf, .rtree, alpha=alpha)$unifracs
# Return the generalized UniFrac matrix at specified alpha level
.unifrac[,,paste0("d_",alpha)]
}
#' Performs principle coordinates analysis on a GUniFrac distance and merges
#' sample metadata for plotting.
#' @param unifrac.data the output of MakeUnifracData
#' @param s sample metadata dataframe
#' @param grouping.col column in s defining centroid groups
MakePCOAData <- function(unifrac.data, s, grouping.col) {
pc <- ape::pcoa(unifrac.data)
values <- pc$values[1:4,]
.pc <- data.frame(pc$vectors)[,1:4]
.pc$SampleID <- rownames(.pc)
.pc <- left_join(.pc, s)
if (!missing(grouping.col)) {
.pc <- group_by_(.pc, .dots=list(grouping.col)) %>%
mutate(med.x=mean(Axis.1), med.y=mean(Axis.2))
}
list(pcoord=.pc, values=values)
}
#' Performs PERMANOVA on the given terms using the unifrac distance matrix.
#' @param unifrac.data matrix/dist of unifrac distances (coerced to dist if not)
#' @param s sample metadata
#' @param terms a string giving the RHS of the adonis formula (eg "counts*StudyGroup")
TestPermanova <- function(unifrac.data, s, terms) {
.s <- filter(s, SampleID %in% rownames(unifrac.data))
.s <- .s[match(rownames(unifrac.data), .s$SampleID), ]
if (class(unifrac.data) != "dist") {
warning("unifrac.data not dist; coerced to dist")
unifrac.data <- dist(unifrac.data)
}
pn <- adonis(
formula(sprintf("unifrac.data ~ %s", terms)),
data = .s)
broom::tidy(pn$aov.tab)
}
#' Standardized PCoA plot.
#' @param pcoa.data dataframe generated by MakePCOAData
#' @param fill column to set as fill aesthetic
#' @param linetype column to set as linetype aesthetic
#' @param fill.palette color palette to use as fill
#' @param color.palette color palette to use for outlines and line segments
PlotPCOA <- function(pcoa.data, fill = "StudyGroup", linetype="SampleType",
fill.palette, color.palette) {
pcoords <- pcoa.data$pcoord
values <- pcoa.data$values
axis.1.title <- sprintf("Axis 1 (%1.1f%%)", values[1,]$Relative_eig*100)
axis.2.title <- sprintf("Axis 2 (%1.1f%%)", values[2,]$Relative_eig*100)
ggplot(
pcoords,
aes_string(x="Axis.1", y="Axis.2", fill=fill, color=fill)) +
geom_segment(aes_string(xend="med.x", yend="med.y"), size=0.4) +
geom_point(shape=21, size=2) +
stat_ellipse(alpha=0.3) +
scale_fill_manual(values=pcoa.fills) +
scale_color_manual(values=pcoa.colors) +
xlab(axis.1.title) +
ylab(axis.2.title) +
theme_classic(base_size = 10) +
theme(
axis.ticks=element_blank(),
axis.text=element_blank(),
aspect.ratio=1,
legend.title=element_blank(),
legend.background=element_blank(),
legend.position="bottom",
plot.title=element_blank())
# plot.subtitle=element_blank())
}
#' Wrapper for UniFrac calculations, PERMANOVA testing and PCoA plotting.
#' @export
PCoAChunk <- function(agg, s, tree, rarefy.depth, testing.terms,
sampleset=c("A", "B", "C"), dataset=c("16S","ITS"),
subset, fig.num, fig.dir=opts$figure_fp,
only.studygroup.terms=TRUE) {
agg <- agg %>% droplevels() %>% ungroup()
sample.set <- match.arg(sampleset)
dataset <- match.arg(dataset)
.s <- agg %>% distinct(SampleID, readcount) %>% left_join(s)
uf <- MakeUnifracData(agg, s, tree, rarefy.depth, alpha=0.5)
pc <- MakePCOAData(uf, .s, "StudyGroup")
pn <- TestPermanova(uf, .s, paste(testing.terms, collapse="*"))
subtitle <- NULL
if (only.studygroup.terms) {
if ("StudyGroup" %in% testing.terms) {
subtitle <- with(
pn[pn$term == "StudyGroup", ],
sprintf("PERMANOVA p = %1.3f; R2 = %1.3f", p.value, R2))
}
} else {
subtitle <- paste(c("PERMANOVA", sapply(
testing.terms, function(term){with(
pn[pn$term == term,],
sprintf("%s:\tp=%1.3f\tR2=%1.3f", term, p.value, R2))
})), collapse="\n")
}
title <- sprintf("Set %s, %s, %s", sampleset, dataset, subset)
p <- PlotPCOA(pc, fill.palette = pcoa.fills, color.palette = pcoa.colors) +
labs(subtitle=subtitle)
SavePCOAPlot(
p, fig.dir, fig.num, sampleset, dataset, subset)
}
SavePCOAPlot <- function(
p, fig.dir, fig.num, sampleset, dataset, subset)
{
filename=sprintf(
"Fig%s_Set%s_%s_%s.pdf", fig.num, sampleset, dataset, subset)
ggsave(
file.path(fig.dir, filename), p,
height=opts$pcoa.height,
width=opts$pcoa.width, device=cairo_failsafe)
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.