Nothing
#' Generate map visualizations (choropleths) of CSPP data
#'
#' \code{generate_map} takes CSPP data from \code{\link{get_cspp_data}} and plots the
#' values of numeric variables on the map of the U.S. It can also plot
#' individual states or sets of states.
#'
#' Note: due to complications with plotting Alaska and Hawaii, this package
#' currently does not support plotting these two states.
#'
#' This function is general in the sense that it will produce a ggplot-style map
#' for any dataframe passed to it with the proper formatting. Any dataframe that
#' has at least three columns, with the first two a numeric `year` column and a
#' state name as a string, and the final column the value to be plotted, will
#' work with this function.
#'
#' @name generate_map
#'
#' @param cspp_data Dataframe generated by \code{get_cspp_data} which must
#' include the variable \code{state}. If there are multiple years of data per
#' state, by default the most recent year is used in creating the map unless
#' \code{average_years} is set to \code{TRUE}. Default is NULL and returns the
#' most recent year's \code{poptotal} data as an example map.
#' @param var_name Specify the variable from the dataset passed to
#' \code{cspp_data} to plot on the map. If left blank, the first variable that
#' is not "year", "st", "state", "state_fips", or "state_icspr" is used.
#' Default is NULL.
#' @param average_years Default is \code{FALSE}. If \code{TRUE}, averages over
#' all of the years per state in the dataframe to produce a value to plot on
#' the map. If the type of the variable in \code{var_name} is not numeric,
#' will reset this parameter to FALSE.
#' @param drop_NA_states Choose whether to drop states at the map generating
#' stage which have NA values. Default is \code{FALSE} and states with missing
#' data will be filled grey. If set to \code{TRUE}, states will have no fill
#' in the plot.
#'
#' If you're passing a dataframe subset to certain states, set this to TRUE.
#'
#' @param poly_args Default is \code{list(color = "#666666", size = .5)}.
#' Changes the aesthetics of how the states look when plotted. The \code{fill}
#' of each state can be manually changed through ggplot's \code{scale_fill_}
#' (see examples). See \code{\link[ggplot2]{geom_polygon}} for other options
#' to pass to this argument.
#'
#' @return Returns a \code{ggplot} object. See examples for how to work with
#' this object.
#'
#' @seealso \code{\link{get_cspp_data}}, \code{\link{get_cites}}, \code{\link{get_var_info}}
#'
#' @importFrom dplyr "%>%" filter arrange left_join full_join bind_rows group_by
#' if_else mutate distinct rename n row_number
#' @importFrom tidyselect all_of
#' @importFrom stats na.omit
#' @import ggplot2
#' @import mapproj
#'
#' @export
#'
#' @examples
#'
#' ## default map with total population
#' generate_map()
#'
#' ## pass specific variables
#' # returns average over all non NA years in the data
#' generate_map(get_cspp_data(var_category = "demographics"),
#' var_name = "pctpopover65")
#'
#' ## add additional ggplot options
#' generate_map(get_cspp_data(var_category = "demographics"),
#' var_name = "pctpopover65",
#' poly_args = list(color = "black"),
#' drop_NA_states = FALSE) +
#' ggplot2::scale_fill_gradient(low = "white", high = "red") +
#' ggplot2::theme(legend.position = "none") +
#' ggplot2::ggtitle("% Population Over 65")
#'
#' ## plot specific states
#' # drop_NA_states set to TRUE plots only those states
#' library(dplyr)
#' generate_map(get_cspp_data(var_category = "demographics") %>%
#' dplyr::filter(st %in% c("NC", "VA", "SC")),
#' var_name = "pctpopover65",
#' poly_args = list(color = "black"),
#' drop_NA_states = TRUE) +
#' ggplot2::scale_fill_gradient(low = "white", high = "red") +
#' ggplot2::theme(legend.position = "none") +
#' ggplot2::ggtitle("% Population Over 65")
#'
#' ## pass specific variables and years
#' # returns average over set of years provided
#' library(dplyr)
#' generate_map(get_cspp_data(var_category = "demographics") %>%
#' dplyr::filter(year %in% seq(2001, 2010)))
#'
#' # returns average over set of years provided
#' library(dplyr)
#' generate_map(get_cspp_data(var_category = "demographics") %>%
#' dplyr::filter(year %in% seq(2001, 2010)))
#'
options(dplyr.summarise.inform = FALSE)
generate_map <- function(cspp_data = NULL, var_name = NULL, average_years = FALSE, drop_NA_states = FALSE, poly_args = list(color = "#666666", size = .5)) {
if(is.null(cspp_data)) { cspp_data <- map_example }
# check initial dataframe
if(!(c("year") %in% names(cspp_data)) | !(c("state") %in% names(cspp_data)) | length(names(cspp_data)) == 2) {
stop("Dataframe must be properly formatted from get_cspp_data()")
}
# var_name
if(!is.null(var_name)) {
# check user input
if(!(var_name %in% names(cspp_data)) | var_name %in% c("year", "st", "state", "state_fips", "state_icpsr", "stateno")) {
stop("Variable name must be a variable in the dataset passed to this function.")
}
if(typeof(cspp_data[, var_name]) %in% c("character", "factor")) {
warning("Plotting a character or factor variable")
}
cspp_data <- dplyr::select(cspp_data, state, year, plot_var = tidyselect::all_of(var_name))
# var_name is *not* provided
} else {
var_name <- names(cspp_data)[!(names(cspp_data) %in% c("year", "st", "state", "state_fips", "state_icpsr", "stateno"))] %>% .[1]
cspp_data <- dplyr::select(cspp_data, state, year, plot_var = tidyselect::all_of(var_name))
}
grouped <- cspp_data %>%
dplyr::group_by(state) %>%
dplyr::summarize(n = dplyr::n()) %>%
dplyr::ungroup()
# if there is more than one year per state
if(max(grouped$n) > 1) {
if(!is.numeric(cspp_data$plot_var)) {
average_years == FALSE
}
if(average_years == TRUE) {
cspp_data <- cspp_data %>%
dplyr::group_by(state) %>%
dplyr::summarize(plot_var = mean(plot_var, na.rm=T), .groups=NULL) %>%
dplyr::ungroup()
if(nrow(cspp_data)==0) { stop("All rows in dataframe are NA.") }
} else {
cspp_data <- cspp_data %>%
dplyr::filter(!is.na(plot_var))
if(nrow(cspp_data)==0) { stop("All rows in dataframe are NA.") }
cspp_data <- cspp_data %>%
dplyr::group_by(state) %>%
dplyr::arrange(year) %>%
dplyr::filter(dplyr::row_number() == dplyr::n()) %>%
dplyr::ungroup()
}
}
cspp_data <- dplyr::filter(cspp_data, state != "Alaska" & state != "Hawaii")
if(nrow(cspp_data)==0) { stop("All rows in dataframe are NA.") }
# one more check:
if(length(!is.na(cspp_data$plot_var)) == 0) { stop("Empty dataframe") }
# made it this far, time to pull in map data
states_map <- ggplot2::map_data("state") %>%
dplyr::rename(state = region)
cspp_data <- dplyr::mutate(cspp_data, state = stringr::str_to_lower(state))
states_map <- dplyr::left_join(states_map, cspp_data, by = "state")
# one more check, actually:
if(length(states_map$plot_var[!is.na(states_map$plot_var)]) == 0) { stop("Empty map.") }
if(drop_NA_states == TRUE) {
states_map <- dplyr::filter(states_map, !is.na(plot_var))
} else {
if(length(states_map$plot_var[is.na(states_map$plot_var)]) > 0) {
if(unique(states_map$state[is.na(states_map$plot_var)]) != "district of columbia") {
message("Some states are missing values and are filled in grey. Set drop_NA_states to TRUE to drop these states.")
print(unique(states_map$state[is.na(states_map$plot_var)]))
}
}
}
params <- ggplot2::layer(geom = GeomPolygon,
stat = "identity",
position = "identity",
params = poly_args)
p <- ggplot2::ggplot(states_map, aes(x = long, y = lat, group = group, fill = plot_var)) +
params +
ggplot2::coord_map() +
ggplot2::labs(fill = var_name) +
ggplot2::theme_void()
return(p)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.