R/plot_panel.R

Defines functions plot_panel

Documented in plot_panel

#' Generate time series plots of CSPP data
#'
#' \code{plot_panel} takes CSPP data from \code{\link{get_cspp_data}} and plots
#' the values of the passed variable name in a time series (grid or line)
#' format.
#'
#' This function will take any dataframe consisting of the variables `year` and
#' `st` plus one other variable.
#'
#' @name plot_panel
#'
#' @param cspp_data Dataframe generated by \code{get_cspp_data} which must
#'   include the variable \code{st}.
#' @param var_name Specific variable within the dataframe passed to `cspp_data`
#'   to plot. If left NULL, will automatically plot the first variable after
#'   state identifiers.
#' @param years Specify years within the passed dataframe to plot. If left NULL,
#'   will plot all years for which not all observations have missing values.
#'   Takes a vector of years.
#' @param colors Specify the colors to be used in a grid plot. Must include
#'   three values in a character vector format. The default values are
#'   `c("#b3a4a4", "#8f3838", "#dbdbdb")`. If the variable plotted is
#'   dichotomous, the first color is the non-treated value and the second color
#'   is the treated value. The third color is the value for NA. If plotting a
#'   continuous variable, the first color is the low end of the gradient and the
#'   second value is the high end of the gradient. See
#'   \code{\link[ggplot2]{scale_fill_gradient}}.
#'
#' @seealso \code{\link{get_var_info}}, \code{\link{get_cites}}, \code{\link{generate_map}}
#'
#' @return ggplot2 object
#'
#' @importFrom dplyr "%>%" filter group_by ungroup
#'   if_else mutate distinct rename n row_number
#' @importFrom tidyselect all_of
#' @importFrom stats na.omit
#' @import ggplot2
#'
#' @export
#'
#' @examples
#'
#' # dichotomous variable
#' cspp <- get_cspp_data(vars = c("drugs_medical_marijuana"))
#' plot_panel(cspp)
#'
#' # change colors and years
#' plot_panel(cspp, colors = c("white", "blue", "black"),
#'                  years = seq(1980, 2000))
#'
#' # continuous variable with missing data:
#' continuous_data <- get_cspp_data(vars = c("h_diffs"))
#'
#' plot_panel(continuous_data, colors = c("white", "dodgerblue", "#eeeeee"))
#'
#' # add ggplot2 features
#' library(ggplot2)
#' plot_panel(continuous_data, colors = c("white", "dodgerblue", "#eeeeee")) +
#'   theme(legend.position = "none") +
#'   ggplot2::ggtitle("Continuous variable")


plot_panel <- function(cspp_data = NULL, var_name = NULL, years = NULL, colors = c("#b3a4a4", "#8f3838", "#dbdbdb")) {

  if(is.null(cspp_data)) {
    no_arg_var <- "labor_right_to_work"
    message(paste0("No data provided, using ",no_arg_var,"."))
    cspp_data <- get_cspp_data(vars = no_arg_var,
                               years = seq(1976, 2015))
  }

  # check initial dataframe
  if(!(c("year") %in% names(cspp_data)) | !(c("st") %in% names(cspp_data)) | length(names(cspp_data)) == 2) {
    stop("Dataframe must be properly formatted from get_cspp_data() and/or contain the variables `st` and `year`.")
  }

  if(length(unique(cspp_data$year)) == 1){
    warning("Only one year present in data.")
  }

  # var_name
  if(!is.null(var_name)) {

    # check user input
    if(!(var_name %in% names(cspp_data)) | var_name %in% c("year", "st", "state", "state_fips", "state_icpsr", "stateno")) {
      stop("Variable name(s) must be in the dataset passed to this function.")
    }

    cspp_data <- dplyr::select(cspp_data, st, year, plot_var = tidyselect::all_of(var_name))

  # var_name is *not* provided
  } else {

    var_name <- names(cspp_data)[!(names(cspp_data) %in% c("year", "st", "state", "state_fips", "state_icpsr", "stateno"))] %>% .[1]
    cspp_data <- dplyr::select(cspp_data, st, year, plot_var = tidyselect::all_of(var_name))

  }

  # check years input
  if(!is.null(years)) {

    if(!is.numeric(years)) {
      stop("`years` must be a numeric vector.")
    }

    cspp_data <- dplyr::filter(cspp_data, year >= years[1] & year <= years[length(years)])

  }

  # check if length of dataframe is 0
  if(nrow(cspp_data) == 0) {
    stop("Dataframe has length 0.")
  }

  #### plotting

  # check color inputs:
  if(length(colors) != 3) {
    stop("`colors` must have three values")
  }

  # check type of plot_var to determine how to fill in plot
  if(length(unique(cspp_data$plot_var[!is.na(cspp_data$plot_var)])) <= 2) {
    type <- ggplot2::scale_fill_manual("Treated", values = c(colors[1], colors[2]), na.value = colors[3])
    cspp_data$plot_var <- as.character(cspp_data$plot_var)
  }

  if(length(unique(cspp_data$plot_var[!is.na(cspp_data$plot_var)])) > 2) {
    type <- ggplot2::scale_fill_gradient("Value", low = colors[1], high=colors[2], na.value = colors[3])
  }

  # drop years with all NA values
  plot_data <- cspp_data %>%
    dplyr::ungroup() %>%
    dplyr::group_by(year) %>%
    dplyr::filter(!all(is.na(plot_var)))

  if(nrow(plot_data) == 0){
    stop("Dataframe has length 0.")
  }

  p <- ggplot2::ggplot(plot_data, aes(year, st, fill = plot_var)) +
    ggplot2::geom_tile(color="white", size=.1) +
    type +
    ggplot2::theme(panel.grid = element_blank(),
                   axis.ticks = element_blank()) +
    ggplot2::coord_cartesian(expand=F) +
    ggplot2::ylab("") + ggplot2::xlab("Year")

  message(paste("Values from", var_name, "used to fill cells.", sep = " "))

  return(p)

}

Try the cspp package in your browser

Any scripts or data that you put into this service are public.

cspp documentation built on Dec. 28, 2022, 2:46 a.m.