R/generate_map.R

Defines functions generate_map

Documented in generate_map

#' Generate map visualizations (choropleths) of CSPP data
#'
#' \code{generate_map} takes CSPP data from \code{\link{get_cspp_data}} and plots the
#' values of numeric variables on the map of the U.S. It can also plot
#' individual states or sets of states.
#'
#' Note: due to complications with plotting Alaska and Hawaii, this package
#' currently does not support plotting these two states.
#'
#' This function is general in the sense that it will produce a ggplot-style map
#' for any dataframe passed to it with the proper formatting. Any dataframe that
#' has at least three columns, with the first two a numeric `year` column and a
#' state name as a string, and the final column the value to be plotted, will
#' work with this function.
#'
#' @name generate_map
#'
#' @param cspp_data Dataframe generated by \code{get_cspp_data} which must
#'   include the variable \code{state}. If there are multiple years of data per
#'   state, by default the most recent year is used in creating the map unless
#'   \code{average_years} is set to \code{TRUE}. Default is NULL and returns the
#'   most recent year's \code{poptotal} data as an example map.
#' @param var_name Specify the variable from the dataset passed to
#'   \code{cspp_data} to plot on the map. If left blank, the first variable that
#'   is not "year", "st", "state", "state_fips", or "state_icspr" is used.
#'   Default is NULL.
#' @param average_years Default is \code{FALSE}. If \code{TRUE}, averages over
#'   all of the years per state in the dataframe to produce a value to plot on
#'   the map. If the type of the variable in \code{var_name} is not numeric,
#'   will reset this parameter to FALSE.
#' @param drop_NA_states Choose whether to drop states at the map generating
#'   stage which have NA values. Default is \code{FALSE} and states with missing
#'   data will be filled grey. If set to \code{TRUE}, states will have no fill
#'   in the plot.
#'
#'   If you're passing a dataframe subset to certain states, set this to TRUE.
#'
#' @param poly_args Default is \code{list(color = "#666666", size = .5)}.
#'   Changes the aesthetics of how the states look when plotted. The \code{fill}
#'   of each state can be manually changed through ggplot's \code{scale_fill_}
#'   (see examples). See \code{\link[ggplot2]{geom_polygon}} for other options
#'   to pass to this argument.
#'
#' @return Returns a \code{ggplot} object. See examples for how to work with
#'   this object.
#'
#' @seealso \code{\link{get_cspp_data}}, \code{\link{get_cites}}, \code{\link{get_var_info}}
#'
#' @importFrom dplyr "%>%" filter arrange left_join full_join bind_rows group_by
#'   if_else mutate distinct rename n row_number
#' @importFrom tidyselect all_of
#' @importFrom stats na.omit
#' @import ggplot2
#' @import mapproj
#'
#' @export
#'
#' @examples
#'
#' ## default map with total population
#' generate_map()
#'
#' ## pass specific variables
#' # returns average over all non NA years in the data
#' generate_map(get_cspp_data(var_category = "demographics"),
#'              var_name = "pctpopover65")
#'
#' ## add additional ggplot options
#' generate_map(get_cspp_data(var_category = "demographics"),
#'              var_name = "pctpopover65",
#'              poly_args = list(color = "black"),
#'              drop_NA_states = FALSE) +
#'  ggplot2::scale_fill_gradient(low = "white", high = "red") +
#'  ggplot2::theme(legend.position = "none") +
#'  ggplot2::ggtitle("% Population Over 65")
#'
#' ## plot specific states
#' # drop_NA_states set to TRUE plots only those states
#' library(dplyr)
#' generate_map(get_cspp_data(var_category = "demographics") %>%
#'                dplyr::filter(st %in% c("NC", "VA", "SC")),
#'              var_name = "pctpopover65",
#'              poly_args = list(color = "black"),
#'              drop_NA_states = TRUE) +
#'  ggplot2::scale_fill_gradient(low = "white", high = "red") +
#'  ggplot2::theme(legend.position = "none") +
#'  ggplot2::ggtitle("% Population Over 65")
#'
#' ## pass specific variables and years
#' # returns average over set of years provided
#' library(dplyr)
#' generate_map(get_cspp_data(var_category = "demographics") %>%
#'  dplyr::filter(year %in% seq(2001, 2010)))
#'
#' # returns average over set of years provided
#' library(dplyr)
#' generate_map(get_cspp_data(var_category = "demographics") %>%
#'  dplyr::filter(year %in% seq(2001, 2010)))
#'

options(dplyr.summarise.inform = FALSE)

generate_map <- function(cspp_data = NULL, var_name = NULL, average_years = FALSE, drop_NA_states = FALSE, poly_args = list(color = "#666666", size = .5)) {

  if(is.null(cspp_data)) { cspp_data <- map_example }

  # check initial dataframe
  if(!(c("year") %in% names(cspp_data)) | !(c("state") %in% names(cspp_data)) | length(names(cspp_data)) == 2) {
    stop("Dataframe must be properly formatted from get_cspp_data()")
  }

  # var_name
  if(!is.null(var_name)) {

    # check user input
    if(!(var_name %in% names(cspp_data)) | var_name %in% c("year", "st", "state", "state_fips", "state_icpsr", "stateno")) {
      stop("Variable name must be a variable in the dataset passed to this function.")
    }

    if(typeof(cspp_data[, var_name]) %in% c("character", "factor")) {
      warning("Plotting a character or factor variable")
    }

    cspp_data <- dplyr::select(cspp_data, state, year, plot_var = tidyselect::all_of(var_name))

  # var_name is *not* provided
  } else {

    var_name <- names(cspp_data)[!(names(cspp_data) %in% c("year", "st", "state", "state_fips", "state_icpsr", "stateno"))] %>% .[1]
    cspp_data <- dplyr::select(cspp_data, state, year, plot_var = tidyselect::all_of(var_name))

  }

  grouped <- cspp_data %>%
    dplyr::group_by(state) %>%
    dplyr::summarize(n = dplyr::n()) %>%
    dplyr::ungroup()

  # if there is more than one year per state
  if(max(grouped$n) > 1) {

    if(!is.numeric(cspp_data$plot_var)) {
      average_years == FALSE
    }

    if(average_years == TRUE) {

      cspp_data <- cspp_data %>%
        dplyr::group_by(state) %>%
        dplyr::summarize(plot_var = mean(plot_var, na.rm=T), .groups=NULL) %>%
        dplyr::ungroup()

      if(nrow(cspp_data)==0) { stop("All rows in dataframe are NA.") }

    } else {

      cspp_data <- cspp_data %>%
        dplyr::filter(!is.na(plot_var))

      if(nrow(cspp_data)==0) { stop("All rows in dataframe are NA.") }

      cspp_data <- cspp_data %>%
        dplyr::group_by(state) %>%
        dplyr::arrange(year) %>%
        dplyr::filter(dplyr::row_number() == dplyr::n()) %>%
        dplyr::ungroup()

    }

  }

  cspp_data <- dplyr::filter(cspp_data, state != "Alaska" & state != "Hawaii")
  if(nrow(cspp_data)==0) { stop("All rows in dataframe are NA.") }

  # one more check:
  if(length(!is.na(cspp_data$plot_var)) == 0) { stop("Empty dataframe") }

  # made it this far, time to pull in map data

  states_map <- ggplot2::map_data("state") %>%
    dplyr::rename(state = region)

  cspp_data <- dplyr::mutate(cspp_data, state = stringr::str_to_lower(state))

  states_map <- dplyr::left_join(states_map, cspp_data, by = "state")

  # one more check, actually:
  if(length(states_map$plot_var[!is.na(states_map$plot_var)]) == 0) { stop("Empty map.") }

  if(drop_NA_states == TRUE) {

    states_map <- dplyr::filter(states_map, !is.na(plot_var))

  } else {

    if(length(states_map$plot_var[is.na(states_map$plot_var)]) > 0) {
      if(unique(states_map$state[is.na(states_map$plot_var)]) != "district of columbia") {
        message("Some states are missing values and are filled in grey. Set drop_NA_states to TRUE to drop these states.")
        print(unique(states_map$state[is.na(states_map$plot_var)]))
      }
    }
  }

  params <- ggplot2::layer(geom = GeomPolygon,
                  stat = "identity",
                  position = "identity",
                  params = poly_args)

  p <- ggplot2::ggplot(states_map, aes(x = long, y = lat, group = group, fill = plot_var)) +
    params +
    ggplot2::coord_map() +
    ggplot2::labs(fill = var_name) +
    ggplot2::theme_void()

  return(p)

}

Try the cspp package in your browser

Any scripts or data that you put into this service are public.

cspp documentation built on Dec. 28, 2022, 2:46 a.m.