R/micro_pca.R

Defines functions micro_pca

Documented in micro_pca

#' @title Calculate and plot principle components
#' @name micro_pca
#' @description Principle components are calculated on the centerted log ratio tranformation of the OTU table using the \code{\link[stats]{prcomp}} function from the \code{\link{stats}} package. Scaling the OTU table to a unit variance is the default option, and recommended, but this can be changed using scaled = F. The components are then plotted using the ggbiplot function, available on github
#' @param micro_set A tidy_micro data set
#' @param table OTU table of interest
#' @param dist A distance matrix, such as a beta diversity. If supplied a PCoA plot will be returned
#' @param grp_var Categorical grouping variable for color
#' @param y Value to calculate principle components on. Default is centered log ratio
#' @param scale Logical. Indicating whether the variables should be scaled to have unit variance before the analysis takes place
#' @param axes_arrows Logical. Plot component axes arrows
#' @param main Plot title
#' @param subtitle Plot subtitle
#' @param legend_title Legend title
#' @details PCA calculation is done by a singular value decomposition of the (centered and possibly scaled) data matrix, not by using eigen on the covariance matrix. This is generally the preferred method for numerical accuracy. Calculations are accomplished through the \code{\link[stats]{prcomp}} function, and ggplot is created through the \code{\link[ggbiplot]{ggbiplot}} function
#' @references Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) The New S Language. Wadsworth & Brooks/Cole.
#'
#' Mardia, K. V., J. T. Kent, and J. M. Bibby (1979) Multivariate Analysis, London: Academic Press.
#'
#' Venables, W. N. and B. D. Ripley (2002) Modern Applied Statistics with S, Springer-Verlag.
#'
#' Vincent Q. Vu (2011). ggbiplot: A ggplot2 based biplot. R package version 0.55. \link{http://github.com/vqv/ggbiplot}
#' @return A ggplot you can add geoms to if you'd like
#' @examples
#' data(phy); data(cla); data(ord); data(fam); data(met)
#'
#'otu_tabs <- list(Phylum = phy, Class = cla, Order = ord, Family = fam)
#'set <- tidy_micro(otu_tabs = otu_tabs, meta = met) \%>\%
#'filter(day == 7) \%>\% ## Only including first week
#'mutate(bpd1 = factor(bpd1))
#'
#'set \%>\% micro_pca(table = "Family", grp_var = bpd1)
#' @export
micro_pca <- function(micro_set, table = NULL, dist = NULL,  grp_var, y = clr, scale = TRUE, axes_arrows = F,
                main = NULL, subtitle = NULL, legend_title = NULL){

  if(is.null(dist)){
    ## PCA using
    if(is.null(table)) stop("Please supply a table with your tidy_micro set")

    if(table %nin% unique(micro_set$Table)) stop("Specified table is not in supplied micro_set")

    dd <- micro_set %>%
      dplyr::filter(Table == table) %>% ## Filtering out table we want
      dplyr::select(Lib, Taxa, !!rlang::enquo(y), !!rlang::enquo(grp_var)) %>% ## getting impartant vars
      tidyr::spread(Taxa, !!rlang::enquo(y)) ## Formatting for PCA

    ## % of variation explained by first two components
    grp_var <- dd %>% pull(!!rlang::enquo(grp_var))

    dd %<>%
      dplyr::select(-c(1,2)) %>%  ## Removing Lib and var
      stats::prcomp(scale = scale)

    ve <- round(summary(dd)$importance[2,1:2], 3) * 100

    gg <- dd %>%   ## Principle components
      ggbiplot::ggbiplot(var.axes = axes_arrows, ## Annoying arrows
                         groups = grp_var ## pulling var for grouping
      ) +
      ggplot2::theme_bw() +
      ggplot2::labs(title = main, subtitle = subtitle,
                    x = paste0("Principle Component 1 (", ve[1], "%)"),
                    y = paste0("Principle Component 2 (", ve[2], "%)"), colour = legend_title)

    cat("PCA plot created")
  } else {

    ## Not the most elegant but it works...
    ## pulling grouping var for color
    grp_var <- micro_set %>%
      dplyr::filter(Lib %in% rownames(dist)) %>% ## Only including Libs from dist
      dplyr::distinct(Lib, .keep_all = TRUE) %>% ## removing repeated libs
      pull(!!rlang::enquo(grp_var)) ## Pulling grouping var

    dd <- dist %>%
      stats::prcomp(scale = scale) ## Principle coordinants

    ve <- round(summary(dd)$importance[2,1:2], 3) * 100

    gg <- dd %>%
      ggbiplot::ggbiplot(var.axes = axes_arrows, ## Annoying arrows
                         groups = grp_var ## pulling var for grouping
      ) +
      ggplot2::theme_bw() +
      ggplot2::labs(title = main, subtitle = subtitle,
                    x = paste0("Principle Component 1 (", ve[1], "%)"),
                    y = paste0("Principle Component 2 (", ve[2], "%)"), colour = legend_title)

    cat("PCoA plot created")
  }

  gg
}
CharlieCarpenter/tidy.micro documentation built on Jan. 19, 2020, 6:28 p.m.