R/correlatePCs.R

Defines functions plotPCcorrs correlatePCs

Documented in correlatePCs plotPCcorrs

#' Principal components (cor)relation with experimental covariates
#'
#' Computes the significance of (cor)relations between PCA scores and the sample
#' experimental covariates, using Kruskal-Wallis test for categorial variables
#' and the `cor.test` based on Spearman's correlation for continuous
#' variables
#'
#' @param pcaobj A `prcomp` object
#' @param coldata A `data.frame` object containing the experimental
#' covariates
#' @param pcs A numeric vector, containing the corresponding PC number
#'
#' @return A `data.frame` object with computed p values for each covariate
#' and for each principal component
#'
#' @examples
#' library(DESeq2)
#' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
#' rlt <- DESeq2::rlogTransformation(dds)
#' pcaobj <- prcomp(t(assay(rlt)))
#' correlatePCs(pcaobj, colData(dds))
#'
#' @export
correlatePCs <- function(pcaobj, coldata, pcs = 1:4) {
  # split the analysis for continuous and categorial
  coldataTypes <- vapply(coldata, class, character(1))
  # extract the scores from the pc object
  x <- pcaobj$x

  # do it until 1:4 PCs
  res <- matrix(NA, nrow = length(pcs), ncol = ncol(coldata))

  colnames(res) <- colnames(coldata)
  rownames(res) <- paste0("PC_", pcs)

  for (i in 1:ncol(res)) {
    # for each covariate...
    for (j in pcs) {
      if (coldataTypes[i] %in% c("factor", "character")) {
        if (length(levels(coldata[, i])) > 1) {
          res[j, i] <- kruskal.test(x[, j], coldata[, i])$p.value
        }
      } else {
        res[j, i] <- cor.test(x[, j], coldata[, i], method = "spearman")$p.value
      }
    }
  }
  res
}


#' Plot significance of (cor)relations of covariates VS principal components
#'
#' Plots the significance of the (cor)relation of each covariate vs a principal component
#'
#' @param pccorrs A `data.frame` object generated by [correlatePCs]
#' @param pc An integer number, corresponding to the principal component of
#' interest
#' @param logp Logical, defaults to `TRUE`, displays the -`log10` of
#' the pvalue instead of the p value itself
#'
#' @return A base plot object
#'
#' @examples
#' library(DESeq2)
#' dds <- makeExampleDESeqDataSet_multifac(betaSD_condition = 3, betaSD_tissue = 1)
#' rlt <- rlogTransformation(dds)
#' pcaobj <- prcomp(t(assay(rlt)))
#' res <- correlatePCs(pcaobj, colData(dds))
#' plotPCcorrs(res)
#'
#' @export
plotPCcorrs <- function(pccorrs, pc = 1, logp = TRUE) {
  selectedPC <- paste0("PC_", pc)
  pvals <- pccorrs[selectedPC, ]

  if (logp) pvals <- -log10(pvals)

  barplot(pvals, las = 2, col = "steelblue",
          main = paste0("Significance of the relations between PC ", pc, " vs covariates"),
          ylab = ifelse(logp, "-log10(pval)", "pval"))
}
federicomarini/pcaExplorer documentation built on Sept. 29, 2024, 9:31 p.m.