R/ordinations.R

#' Draw a between class analysis (BCA) plot.
#'
#' @param physeq Phyloseq object.
#' @param factor Name of the sample metadata column to perform BCA.
#' @param color Name of the sample metadata column to color the samples with.
#' @param label Taxonomic annotation of the taxon to be shown in the biplot.
#' @param ntaxa Maximum number of driver taxa to display. Actual number shown might be lower than the specified number.
#'
#' @return None.
#' @export
#'
#' @import ggplot2
#' @import ggrepel
#'
#' @examples
#' \dontrun{
#' biplot_bca(physeq, factor = "Enterotype", color = "Enterotype", label = "Genus")
#' biplot_bca(physeq, factor = "Enterotype", color = "Diet", label = "Genus", ntaxa = 5)
#' }
biplot_bca <- function(physeq, factor = "Enterotype", color = "Enterotype", label = "Genus", ntaxa = 10) {

  data <- as.data.frame(otu_table(physeq))
  sample_data <- as.data.frame(sample_data(physeq))
  taxnames <-  as.data.frame(tax_table(physeq))

  factors <- as.factor(sample_data[[factor]])
  classes = length(levels(factors))

  obs.pca <- dudi.pca(data, scannf=F, nf=10)
  obs.bet <- bca(obs.pca, fac=as.factor(factors), scannf=F, nf=classes-1)

  drivers <- obs.bet$co
  components <- colnames(drivers)

  # Sum-squared components
  drivers$Total <- apply(drivers**2, 1, sum)

  # Names
  names <- taxnames[label]
  colnames(names) <- NULL
  drivers<- cbind(Name = names, drivers)

  # Flag top 5 in each component
  drivers$Selected <- FALSE
  for (x in c(components, "Total")) {
    drivers <- dplyr::arrange(drivers, desc(abs(get(x))))
    drivers[1:ntaxa,]$Selected <- TRUE
  }

  # Need names
  drivers$Selected <- drivers$Selected & !is.na(drivers$Name)

  drivers <- dplyr::filter(drivers, Selected == TRUE)
  drivers <- dplyr::arrange(drivers, desc(Total))
  drivers <- head(drivers, ntaxa)
  row.names(drivers) <- drivers$Name
  drivers$Total <- NULL
  drivers$Name <- NULL
  drivers$Selected <- NULL

  color_factors <- as.factor(sample_data[[color]])
  colors <- brewer.pal(length(levels(color_factors)), my_palette[[color]])

  s.class(obs.bet$ls, fac=color_factors, grid=F, cell=0, cstar=0, col=colors)
  s.arrow(drivers*7, clabel=0.75, boxes = TRUE, add.plot=TRUE)
  s.class(obs.bet$ls, fac=color_factors, cpoint=0, grid=F, cell=0, cstar=0, col=colors, add.plot=TRUE)

  rm(drivers)
  rm(obs.pca)
  rm(obs.bet)
  rm(colors)
  rm(factors)
  rm(classes)
  rm(names)
  rm(data)
}


#' Draw a principal coordinate biplot using Bray-Curtis dissimilarity measure.
#'
#' @param physeq Phyloseq object.
#' @param color  Name of the sample metadata column to color the samples with.
#' @param shape  Name of the sample metadata column to assign shapes for the samples.
#' @param axis1  Index of the principal component to draw in X-axis.
#' @param axis2  Index of the principal component to draw in Y-axis.
#' @param show.taxa logical. Should the taxa be shown in the biplot? Without taxa, this becomes a standard PCoA plot.
#' @param label  Taxonomic annotation of the taxon to be shown in the biplot.
#' @param repel  logical. Should the taxa names in the biplot use ggrepel to display all taxa in a non-overlapping manner? If FALSE, then only non-overlapping taxa are shown to improve readability.
#' @param show.legend logical. Should the legend be drawn?
#' @param custom_palette Custom palette list to use (see example).
#'
#' @return ggplot object with the biplot
#' @export
#'
#' @examples
#' \dontrun{
#'
#' # Show samples and taxa in a biplot
#' biplot_pcoa(physeq, color="Diet")
#'
#' # Don't show taxa - equivalient to plot_pcoa()
#' biplot_pcoa(physeq, color="Diet", show.taxa = FALSE)
#'
#' # Specify color and shape
#' biplot_pcoa(physeq, color="Diet", shape="Diet", point_size = 2)
#' biplot_pcoa(physeq, color="Diet", shape="Diet", axis1 = 1, axis2 = 3, show.legend = FALSE, point_size = 2)
#'
#' # custom palette can be just names of ColorBrewer palettes
#'
#' pal = list(Diet = "Set2", Enterotype = "Pastel1", Significance = "PuRd")
#' biplot_pcoa(physeq, color="Diet", shape="Diet", point_size = 2, custom_palette = pal) + ggtitle("Fecal samples") + theme(plot.title = element_text(size = 7, face = "bold", hjust = 0.5), legend.position = "right")
#'
#' # Or it can also be named lists for all possible values.
#'
#' diet_colors = c(Control="black", HSD="#4A57A2")
#' ET_colors = c("red", "black", "blue")
#' names(ET_colors) = c("ET1", "ET2", "ET3")
#' pal = list(Diet = diet_colors, Enterotype = ET_colors)
#'
#' biplot_pcoa(physeq, color="Diet", shape="Diet", custom_palette = pal)
#' biplot_pcoa(physeq, color="Enterotype", shape="Diet", custom_palette = pal)
#' }
biplot_pcoa <- function(physeq, color = "Group", shape = NULL, axis1 = 1, axis2 = 2, show.taxa = TRUE, label = "Genus", repel = FALSE, show.legend = TRUE, custom_palette = NULL) {
  dist <- phyloseq::distance(physeq =physeq, method="bray")
  ordination <- phyloseq::ordinate(physeq, method = "PCoA", distance = dist)
  axes = c(axis1, axis2)
  DF <- phyloseq::plot_ordination(physeq, ordination, axes = axes, type="biplot", justDF = TRUE)
  x = colnames(DF)[1]
  y = colnames(DF)[2]

  ord_map = aes_string(x = x, y = y, color = color, shape = shape, na.rm = TRUE)
  label_map <- aes_string(x = x, y = y, label = label, na.rm = TRUE)

  # Get strength of the axes

  if (length(extract_eigenvalue(ordination)[axes]) > 0) {
    eigvec = extract_eigenvalue(ordination)
    fracvar = eigvec[axes]/sum(eigvec)
    percvar = round(100 * fracvar, 1)
  }

  # Get samples and taxa separately

  DF_Taxa <- DF[DF$id.type == "Taxa",]
  DF_Samples <- DF[DF$id.type == "Samples",]

  # Pick top 50 taxa based on their loadings weighted by variance

  score <- fracvar[1]*(DF_Taxa[x]^2) + fracvar[2]*(DF_Taxa[y]^2)
  colnames(score) <- NULL
  DF_Taxa <- cbind(DF_Taxa, Score = score)
  rm(score)

  DF_Taxa <- dplyr::arrange(DF_Taxa, desc(Score))
  DF_Taxa <- head(DF_Taxa, 50)

  # Plot

  Tr <-
    ggplot(DF_Samples, ord_map) +
    geom_point(na.rm = TRUE, size = 2, show.legend = show.legend) +
    theme(aspect.ratio = 1)
  #scale_size_manual("type", values = c(Samples = 2, Taxa = 4)) +
  #scale_shape_manual(values=GP1.shape) +
  #geom_segment(data = DF_Taxa, x = DF_Taxa$Axis.1, y = DF_Taxa$Axis.2, xend = 0, yend = 0)

  # Add color

  color_values <- get_my_palette_colors(custom_palette, color, levels(DF_Samples[[color]]), offset=0)
  Tr <- Tr + scale_color_manual(values = color_values)

  if (show.taxa) {
    if (repel) {
      Tr <- Tr +
        geom_text_repel(label_map, data = rm.na.phyloseq(DF_Taxa, label),
                        size = 3,
                        color = "black",
                        vjust = "center",
                        hjust = "middle",
                        show.legend = FALSE,
                        na.rm = TRUE)
    } else {
      Tr <- Tr +
        geom_text(label_map, data = rm.na.phyloseq(DF_Taxa, label),
                  check_overlap = TRUE,
                  size = 3,
                  color = "black",
                  vjust = "center",
                  hjust = "middle",
                  show.legend = FALSE,
                  na.rm = TRUE)
    }
  }

  # Add variance in axes

  strivar = paste0("PC", axes, "  [", percvar, "%]")
  Tr = Tr + xlab(strivar[1]) + ylab(strivar[2])

  return(Tr)
}

#' Draw a principal coordinate plot using Bray-Curtis dissimilarity measure.
#'
#' @param physeq Phyloseq object.
#' @param color Name of the sample metadata column to color the samples with. If NULL, first column will be used.
#' @param shape Name of the sample metadata column to assign shapes for the samples.
#' @param axis1 Index of the principal component to draw in X-axis.
#' @param axis2 Index of the principal component to draw in Y-axis.
#' @param show.legend logical. Should the legend be drawn?
#' @param point_size Size of the data points in the PCoA.
#' @param custom_palette Custom palette list to use (see example).
#'
#' @return ggplot object with the biplot
#'
#' @export
#'
#' @examples
#' \dontrun{
#' plot_pcoa(physeq, color="Diet")
#' plot_pcoa(physeq, color="Diet", shape="Diet", point_size = 2)
#' plot_pcoa(physeq, color="Diet", shape="Diet", axis1 = 1, axis2 = 3, show.legend = FALSE, point_size = 2)
#'
#' # custom palette can be just names of ColorBrewer palettes
#'
#' pal = list(Diet = "Set2", Enterotype = "Pastel1", Significance = "PuRd")
#' plot_pcoa(physeq, color="Diet", shape="Diet", point_size = 2, custom_palette = pal) + ggtitle("Fecal samples") + theme(plot.title = element_text(size = 7, face = "bold", hjust = 0.5), legend.position = "right")
#'
#' # Or it can also be named lists for all possible values.
#'
#' diet_colors = c(Control="black", HSD="#4A57A2")
#' ET_colors = c("red", "black", "blue")
#' names(ET_colors) = c("ET1", "ET2", "ET3")
#' pal = list(Diet = diet_colors, Enterotype = ET_colors)
#'
#' plot_pcoa(physeq, color="Diet", shape="Diet", custom_palette = pal)
#' plot_pcoa(physeq, color="Enterotype", shape="Diet", custom_palette = pal)
#' }
plot_pcoa <- function(physeq, color = NULL, shape = NULL, axis1 = 1, axis2 = 2, show.legend = TRUE, point_size = 2, custom_palette = NULL) {
  dist <- phyloseq::distance(physeq =physeq, method="bray")
  ordination <- phyloseq::ordinate(physeq, method = "PCoA", distance = dist)
  axes = c(axis1, axis2)
  DF <- phyloseq::plot_ordination(physeq, ordination, axes = axes, type="samples", justDF = TRUE)
  x = colnames(DF)[1]
  y = colnames(DF)[2]

  if (is.null(color)) {
    color = sample_data(physeq)[1]
  }

  ord_map = aes_string(x = x, y = y, color = color, shape = shape, na.rm = TRUE)

  # Get strength of the axes

  if (length(extract_eigenvalue(ordination)[axes]) > 0) {
    eigvec = extract_eigenvalue(ordination)
    fracvar = eigvec[axes]/sum(eigvec)
    percvar = round(100 * fracvar, 1)
  }

  # Plot

  Tr <-
    ggplot(DF, ord_map) +
    geom_point(na.rm = TRUE, size = point_size, show.legend = show.legend) +
    theme(aspect.ratio = 1)

  # Add color

  color_values <- get_my_palette_colors(custom_palette, color, levels(DF[[color]]), offset=0)
  Tr <- Tr + scale_color_manual(values = color_values)

  # Add variance in axes

  strivar = paste0("PC", axes, "  [", percvar, "%]")
  Tr = Tr + xlab(strivar[1]) + ylab(strivar[2])

  return(Tr)
}
TBrach/MicrobiomeX documentation built on May 14, 2019, 2:28 p.m.