R/plot_corr.R

Defines functions plot_corr

Documented in plot_corr

#' Correlation Plot
#'
#' This function plots pairwise correlations between variables.
#'
#' @param dat A sample by feature data frame or matrix, e.g. of clinical
#'   variables. Non-numeric features are dropped with a warning.
#' @param method String specifying which correlation coefficient to compute.
#'   Must be one of \code{"pearson"}, \code{"kendall"}, or \code{"spearman"}.
#'   See \code{\link[stats]{cor}}.
#' @param use Optional character string giving a method for computing
#'   covariances in the presence of missing values. Must be one of \code{
#'   "everything"}, \code{"all.obs"}, \code{"complete.obs"}, \code{
#'   "na.or.complete"}, or \code{"pairwise.complete.obs"}.
#' @param alpha Optional significance threshold to impose on correlations.
#'   Those with \emph{p}-values (optionally adjusted) less than or equal to
#'   \code{alpha} are outlined in black.
#' @param p_adj Optional \emph{p}-value adjustment for multiple testing. Options
#'   include \code{"holm"}, \code{"hochberg"}, \code{"hommel"}, \code{
#'   "bonferroni"}, \code{"BH"}, \code{"BY"}, and \code{"fdr"}. See \code{
#'   \link[stats]{p.adjust}}.
#' @param lim Optional vector of length two defining lower and upper bounds for 
#'   the scale range. Default is observed extrema.
#' @param geom String specifying whether to visualize correlation coefficients
#'   as \code{"tile"} or \code{"circle"}.
#' @param label Print correlation coefficient over \code{geom}?
#' @param diag Include principal diagonal of the correlation matrix?
#' @param title Optional plot title.
#' @param legend Legend position. Must be one of \code{"bottom"}, \code{"left"},
#'   \code{"top"}, \code{"right"}, \code{"bottomright"}, \code{"bottomleft"},
#'   \code{"topleft"}, or \code{"topright"}.
#' @param hover Show correlation coefficient by hovering mouse over the
#'   corresponding tile or circle? If \code{TRUE}, the plot is rendered in HTML
#'   and will either open in your browser's graphic display or appear in the
#'   RStudio viewer.
#' @param export Export correlation matrix? If \code{TRUE} and \code{alpha} is
#'   non-\code{NULL}, then the \emph{p}-value matrix will also be returned.
#'
#' @details
#' Correlation plots visualize the associations between numeric features. They
#' are a valuable tool in exploratory data analysis for biological experiments,
#' where they may help identify dependencies among clinical covariates, leading
#' to better omic models.
#'
#' @return
#' If \code{export = TRUE}, a list with up to two elements:
#' \itemize{
#'   \item The correlation matrix, computed via the chosen \code{method}.
#'   \item The matrix of \emph{p}-values (optionally adjusted), if \code{alpha}
#'   is non-\code{NULL}.
#' }
#'
#' @examples
#' mat <- matrix(rnorm(100), 10, 10)
#' plot_corr(mat)
#'
#' @export
#' @importFrom purrr some keep
#' @importFrom tidyr pivot_longer
#' @importFrom RColorBrewer brewer.pal
#' @import dplyr
#' @import ggplot2
#'

plot_corr <- function(
  dat,
  method = 'pearson',
     use = 'everything',
   alpha = NULL,
   p_adj = NULL,
     lim = NULL,
    geom = 'tile',
   label = FALSE,
    diag = FALSE,
   title = 'Correlation Plot',
  legend = 'right',
   hover = FALSE,
  export = FALSE
) {

  # Preliminaries
  if (ncol(dat) < 2L) {
    stop('dat must have at least two columns to generate a correlation matrix.')
  }
  if (!every(dat, is.numeric)) {
    dat <- keep(dat, is.numeric)
    if (ncol(dat) < 2L) {
      stop('dat must have at least two numeric columns to generate a ',
           'correlation matrix.')
    } else {
      warning('Non-numeric variables have been detected and removed.')
    }
  }
  if (colnames(dat) %>% is.null) {
    colnames(dat) <- paste0('V', seq_len(ncol(dat)))
  }
  dat <- as_tibble(dat)
  if (!alpha %>% is.null) {
    if (alpha <= 0 | alpha >= 1) {
      stop('alpha must be numeric on (0, 1).')
    }
  }
  if (!p_adj %>% is.null) {
    p_adjes <- c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr')
    p_adj <- match.arg(p_adj, p_adjes)
  }
  locations <- c('bottom', 'left', 'top', 'right',
                 'bottomright', 'bottomleft', 'topleft', 'topright')
  legend <- match.arg(legend, locations)
  geoms <- c('tile', 'circle')
  geom <- match.arg(geom, geoms)

  # Tidy data
  mat <- cor(dat, method = method, use = use)
  mat[!lower.tri(mat)] <- NA_real_
  if (diag) {
    diag(mat) <- 1L
  }
  df <- mat %>%                                  # Melt correlation matrix
    as_tibble(.) %>%
    pivot_longer(everything(), names_to = 'x', values_to = 'Correlation') %>%
    mutate(y = rep(rownames(mat), each = nrow(mat))) %>%
    mutate(x = factor(x, levels = unique(x)),
           y = factor(y, levels = rev(unique(x))),
           Significant = FALSE) %>%
    select(x, y, Correlation, Significant) %>%
    na.omit(.)
  if (!alpha %>% is.null) {                      # Calculate p-value matrix?
    p_mat <- matrix(nrow = nrow(mat), ncol = ncol(mat))
    for (i in 2:ncol(p_mat)) {
      for (j in 1:(i - 1L)) {
        p_mat[i, j] <- cor.test(dat[[i]], dat[[j]],
                                method = method, use = use)$p.value
      }
    }
    p_val <- p_mat %>% keep(lower.tri(.))
    if (!p_adj %>% is.null) {
      p_val <- p.adjust(p_val, method = p_adj)
    }
    if (diag) {
      diag(p_mat) <- 1L
    }
    df <- df %>% mutate(Significant = if_else(p_val <= alpha, TRUE, FALSE))
  }
  if (export) {
    out <- list(
      'Correlation' = mat,
      'p.values' = p_mat,
      'p_adj' = p_adj
    )
  }

  # Build plot
  p <- ggplot(df, aes(x, y)) +
    coord_equal() +
    labs(title = title, x = NULL, y = NULL) +
    theme_bw() +
    theme(plot.title = element_text(hjust = 0.5),
         axis.text.x = element_text(angle = 45L, hjust = 1L))
  if (geom == 'tile') {
    p <- p + geom_tile(aes(fill = Correlation, color = Significant),
                       size = 1L, width = 0.9, height = 0.9) +
      scale_color_manual(values = c('grey90', 'black')) +
      guides(color = FALSE)
  } else if (geom == 'circle') {
    p <- p + geom_point(data = df %>% filter(Significant),
                        aes(x, y, size = 1.25 * abs(Correlation)),
                        color = 'black', show.legend = FALSE) +
      geom_point(data = df %>% filter(!Significant),
                 aes(x, y, size = 1.25 * abs(Correlation)),
                 color = 'grey60', show.legend = FALSE) +
      geom_point(aes(color = Correlation, size = abs(Correlation))) +
      guides(size = FALSE)
  }
  cols <- brewer.pal(11L, 'RdBu')[c(11, 1)]
  if (lim %>% is.null) {
    p <- p + scale_color_gradient2(low = cols[1], high = cols[2])
  } else {
    p <- p + scale_color_gradient2(low = cols[1], high = cols[2], limits = lim)
  }
  if (label) {
    p <- p + geom_text(aes(label = round(Correlation, 2)))
  }

  # Output
  gg_out(p, hover, legend)
  if (export) {
    return(out)
  }

}
dswatson/bioplotr documentation built on March 3, 2023, 9:43 p.m.