R/corr_plot.R

Defines functions corr_plot

Documented in corr_plot

#' Create correlation plots of CSPP data
#'
#' \code{corr_plot} takes CSPP data from \code{\link{get_cspp_data}} and returns
#' either a correlation matrix or correlation plot.
#'
#' This function is a wrapper that passes a dataframe to the
#' \code{ggcorrplot::ggcorrplot} function which generates correlation heat
#' plots.
#'
#' @name corr_plot
#'
#' @param data A dataframe. If data is generated by \code{get_cspp_data}
#'   function, the function can automatically parse the dataframe. Otherwise,
#'   this function will attempt to make a correlation plot or matrix from all
#'   numeric variables within the passed dataframe.
#' @param vars Default is NULL. If left NULL, uses all variables within the
#'   passed dataframe. Otherwise, must be a character vector. The dataframe is
#'   subset based on variables listed.
#' @param summarize Default is TRUE. If TRUE, and if the variable \code{st}
#'   is present, the function will create state specific averages for each
#'   variable in the dataframe. If FALSE, the function will generate the
#'   correlation matrix and plot for all values in the dataset.
#' @param labels Default is TRUE. If TRUE, the correlation plot will include
#'   labels for the correlation value. If FALSE, no labels will be present.
#' @param label_size Default is 3. Controls the size of the font for labels.
#' @param colors Specify the colors to be used in the correlation plot. Must
#'   include three values in a character vector format. The default values are
#'   `c("#6D9EC1", "#FFFFFF", "#E46726")`.
#' @param cor_matrix Default is FALSE. If set to TRUE, instead of returning a
#'   ggplot object that is a correlation plot, returns a correlation matrix.
#'   This is particularly useful if you want to customize the output with
#'   \code{ggcorrplot}.
#'
#' @seealso \code{ggcorrplot}
#'
#' @return ggplot2 object or correlation matrix
#'
#' @importFrom dplyr "%>%" select group_by ungroup summarize_all
#' @importFrom tidyselect all_of
#' @importFrom stats na.omit
#' @import ggcorrplot
#' @import utils
#'
#' @export
#'
#' @examples
#'
#' corr_plot(data = get_cspp_data(), vars = c("pollib_median",
#'  "innovatescore_boehmkeskinner", "citi6013", "ranney4_control", "h_diffs"),
#'  cor_matrix = FALSE)

corr_plot <- function(data = NULL, vars = NULL, summarize = TRUE, labels = TRUE, label_size = 3, colors = c("#6D9EC1", "#FFFFFF", "#E46726"), cor_matrix = FALSE){

  # filter to only numeric variables



  if(is.null(data) | !is.data.frame(data)) {
    stop("`data` cannot be NULL and must be a dataframe.")
  }

  if(!is.null(vars) & !is.character(vars)) {
    stop("`vars` must be a character vector.")
  }

  if(!is.null(vars)){

    if(!all(vars %in% names(data))){
      stop("Not all variables are present in the dataframe.")
    }

    if(!("st" %in% names(data)) & summarize == TRUE) {
      stop("Variable `st` (two character state abbreviation) must be present in dataframe.")
    }

    vars <- c(vars, "st")

    data <- data %>%
      dplyr::ungroup() %>%
      dplyr::select(tidyselect::all_of(vars)) %>%
      dplyr::select(where(is.numeric), st)

  }

  if(is.null(vars)) {

    data <- dplyr::select(data, where(is.numeric) | tidyselect::starts_with("st"))

  }



 if(summarize == TRUE) {

   cordf <- data %>%
     na.omit %>%
     dplyr::ungroup() %>%
     dplyr::group_by(st) %>%
     dplyr::summarize_all(list(~mean(., na.rm=T))) %>%
     dplyr::select(-st)

   cor <- round(cor(cordf), 1)

 } else {

   data <- data %>% dplyr::ungroup() %>% dplyr::select(where(is.numeric))

   cor <- round(cor(na.omit(data)), 1)

 }

  if(cor_matrix == TRUE) {

    return(cor)

  } else {

    cor_plot <- ggcorrplot::ggcorrplot(cor,
               hc.order = F,
               type = "lower",
               lab = labels,
               outline.color = "white",
               colors = colors,
               lab_size = label_size,
               tl.cex = 6) +
      ggplot2::theme(axis.ticks = element_line())

    return(cor_plot)

  }

}

Try the cspp package in your browser

Any scripts or data that you put into this service are public.

cspp documentation built on Dec. 28, 2022, 2:46 a.m.