R/pcoa.R

#' Calculates the generalized UniFrac distances for an agglomerated dataframe.
#' This involves a random rooting of the tree and rarefaction, which are
#' non-deterministic. For reproducibility, recommend calling set.seed before this
#' @param agg agglomerated dataframe
#' @param s sample metadata dataframe
#' @param tree a tree (will root using random node)
#' @param alpha the alpha parameter for GUniFrac
MakeUnifracData <- function(agg, s, tree, rarefy.depth, alpha=0.5) {

  # Root tree
  .rtree <- root_tree(tree, agg$otu)
  # Create sample matrix
  .mat <- counts_matrix(agg)
  # Keep only otus that appear in the tree
  .mat <- .mat[, colnames(.mat) %in% .rtree$tip.label]

  # Rarefy to specified depth
  if (!missing(rarefy.depth)) {
    .mat.rf <- vegan::rrarefy(.mat, sample=rarefy.depth)
  } else {
    .mat.rf <- .mat
  }

  # Calculate generalized UniFrac
  .unifrac <- GUniFrac::GUniFrac(.mat.rf, .rtree, alpha=alpha)$unifracs
  # Return the generalized UniFrac matrix at specified alpha level
  .unifrac[,,paste0("d_",alpha)]
}

#' Performs principle coordinates analysis on a GUniFrac distance and merges
#' sample metadata for plotting.
#' @param unifrac.data the output of MakeUnifracData
#' @param s sample metadata dataframe
#' @param grouping.col column in s defining centroid groups
MakePCOAData <- function(unifrac.data, s, grouping.col) {
  pc <- ape::pcoa(unifrac.data)
  values <- pc$values[1:4,]
  .pc <- data.frame(pc$vectors)[,1:4]
  .pc$SampleID <- rownames(.pc)
  .pc <- left_join(.pc, s)
  if (!missing(grouping.col)) {
    .pc <- group_by_(.pc, .dots=list(grouping.col)) %>%
      mutate(med.x=mean(Axis.1), med.y=mean(Axis.2))
  }
  list(pcoord=.pc, values=values)
}

#' Performs PERMANOVA on the given terms using the unifrac distance matrix.
#' @param unifrac.data matrix/dist of unifrac distances (coerced to dist if not)
#' @param s sample metadata
#' @param terms a string giving the RHS of the adonis formula (eg "counts*StudyGroup")
TestPermanova <- function(unifrac.data, s, terms) {

  .s <- filter(s, SampleID %in% rownames(unifrac.data))
  .s <- .s[match(rownames(unifrac.data), .s$SampleID), ]

  if (class(unifrac.data) != "dist") {
    warning("unifrac.data not dist; coerced to dist")
    unifrac.data <- dist(unifrac.data)
  }

  pn <- adonis(
    formula(sprintf("unifrac.data ~ %s", terms)),
    data = .s)

  broom::tidy(pn$aov.tab)
}

#' Standardized PCoA plot.
#' @param pcoa.data dataframe generated by MakePCOAData
#' @param fill column to set as fill aesthetic
#' @param linetype column to set as linetype aesthetic
#' @param fill.palette color palette to use as fill
#' @param color.palette color palette to use for outlines and line segments
PlotPCOA <- function(pcoa.data, fill = "StudyGroup", linetype="SampleType",
                     fill.palette, color.palette) {
  pcoords <- pcoa.data$pcoord
  values <- pcoa.data$values
  axis.1.title <- sprintf("Axis 1 (%1.1f%%)", values[1,]$Relative_eig*100)
  axis.2.title <- sprintf("Axis 2 (%1.1f%%)", values[2,]$Relative_eig*100)
  ggplot(
    pcoords,
    aes_string(x="Axis.1", y="Axis.2", fill=fill, color=fill)) +
    geom_segment(aes_string(xend="med.x", yend="med.y"), size=0.4) +
    geom_point(shape=21, size=2) +
    stat_ellipse(alpha=0.3) +
    scale_fill_manual(values=pcoa.fills) +
    scale_color_manual(values=pcoa.colors) +
    xlab(axis.1.title) +
    ylab(axis.2.title) +
    theme_classic(base_size = 10) +
      theme(
        axis.ticks=element_blank(),
        axis.text=element_blank(),
        aspect.ratio=1,
        legend.title=element_blank(),
        legend.background=element_blank(),
        legend.position="bottom",
        plot.title=element_blank())
        # plot.subtitle=element_blank())
}

#' Wrapper for UniFrac calculations, PERMANOVA testing and PCoA plotting.
#' @export
PCoAChunk <- function(agg, s, tree, rarefy.depth, testing.terms,
                      sampleset=c("A", "B", "C"), dataset=c("16S","ITS"),
                      subset, fig.num, fig.dir=opts$figure_fp,
                      only.studygroup.terms=TRUE) {
  agg <- agg %>% droplevels() %>% ungroup()
  sample.set <- match.arg(sampleset)
  dataset <- match.arg(dataset)
  .s <- agg %>% distinct(SampleID, readcount) %>% left_join(s)
  uf <- MakeUnifracData(agg, s, tree, rarefy.depth, alpha=0.5)
  pc <- MakePCOAData(uf, .s, "StudyGroup")
  pn <- TestPermanova(uf, .s, paste(testing.terms, collapse="*"))
  subtitle <- NULL
  if (only.studygroup.terms) {
    if ("StudyGroup" %in% testing.terms) {
      subtitle <- with(
        pn[pn$term == "StudyGroup", ],
        sprintf("PERMANOVA p = %1.3f; R2 = %1.3f", p.value, R2))
    }
  } else {
    subtitle <- paste(c("PERMANOVA", sapply(
      testing.terms, function(term){with(
        pn[pn$term == term,],
        sprintf("%s:\tp=%1.3f\tR2=%1.3f", term, p.value, R2))
      })), collapse="\n")
  }
  title <- sprintf("Set %s, %s, %s", sampleset, dataset, subset)
  p <- PlotPCOA(pc, fill.palette = pcoa.fills, color.palette = pcoa.colors) +
    labs(subtitle=subtitle)
  SavePCOAPlot(
    p, fig.dir, fig.num, sampleset, dataset, subset)
}

SavePCOAPlot <- function(
  p, fig.dir, fig.num, sampleset, dataset, subset)
{
  filename=sprintf(
    "Fig%s_Set%s_%s_%s.pdf", fig.num, sampleset, dataset, subset)
  ggsave(
    file.path(fig.dir, filename), p,
    height=opts$pcoa.height,
    width=opts$pcoa.width, device=cairo_failsafe)
  p
}
eclarke/SarcoidMicrobiome documentation built on May 15, 2019, 7:53 p.m.