R/do_PCA.R

Defines functions do_PCA

Documented in do_PCA

#' Principal Component Analysis via Singular Value Decomposition
#' @param x matrix with variables in rows.
#' @param center center data?
#' @param level2 scale data?
#' @param log_transform log transform input data? Default FALSE.
#' @param keep number of axes to keep (default NULL, keep all axes).
#' @param robust use robust estimators of center and/or scale? (median and mad, respectively).
#' Default FALSE.
#' @param delta value to add to counts to avoid zero counts in log computation
#' @return a list with two elements: pc (the singular vectors) and sv (the singular values)
#' @export

do_PCA <- function(x, center = TRUE, scale = FALSE,
                   log_transform = FALSE, keep = NULL,
                   robust = FALSE, delta = 1) {

  rnames <- rownames(x)
  cnames <- colnames(x)
                    
  ifelse(log_transform, x <- t(log(x + delta)), x <- t(x))

  if(robust) {
    if(center) center <- apply(x, 2, median)
    if(scale) scale <- apply(x, 2, mad)
  }

  Y <- scale(x, center = center, scale = scale)
  svd_out <- svd(Y)
  sv_var <- 100 * svd_out$d^2/ sum(svd_out$d^2)
  sv_cumvar <-  cumsum(sv_var)

  bar_names <- seq_along(sv_var)
  barplot(round(sv_var, 2), names.arg = bar_names,
          col = "grey", xlab = "PC", "ylab" = "Variance (%)")

  if(is.null(keep)) {
    keep <- readline(prompt="Enter the number of PCs to keep: ")
    if(keep == "") {
      message("Keeping all PCs\n")
      keep <- NULL
    }
  }

  rownames(svd_out$v) <- colnames(svd_out$v) <- cnames
  colnames(svd_out$u) <- paste0("PC", seq_len(ncol(svd_out$u)))
  rownames(svd_out$u) <- rnames

  if(is.null(keep) || is.infinite(keep)) {
    outpc <- svd_out$u
  } else {
    nc <- ncol(svd_out$u)
    if(keep > nc) keep <- nc
    outpc <- svd_out$u[, seq_len(keep)]
  }



  list(pc = outpc , rotation = svd_out$v,  sv = svd_out$d, var = sv_var, cumvar = sv_cumvar)
}
leandroroser/RNASeqFunctions documentation built on May 17, 2019, 7:31 p.m.