R/get_network_data.R

Defines functions get_network_data

Documented in get_network_data

#' Get state networks data
#'
#' \code{network_data} returns a dataframe of the state networks data compiled
#' by the Correlates of State Policy Project. The dataframe is in an edge list
#' format, with each row a state dyad combination. The \code{merge} argument
#' allows the direct merging of a dataframe generated by the
#' \code{\link{get_cspp_data}} function.
#'
#' The network dataframe that results is directed, with variables directed
#' towards the state in the \code{State1} column. For instance, the
#' \code{IncomingFlights} variable is the number of flights from \code{State2}
#' with a destination in \code{State1}.
#'
#' @name get_network_data
#'
#' @param category A category within the networks data. Default is NULL. If left
#'   blank, returns the full state networks data. Category options are "Distance
#'   Travel Migration", "Economic", "Political", "Policy", "Demographic".
#'
#' @param merge_data Default is NULL. Takes a dataframe object in the format
#'   generated by \code{\link{get_cspp_data}}. The function merges this
#'   dataframe into the network data by state. If the merge dataframe has multiple
#'   observations per state, this function averages over all values per state as
#'   long as the variables are numeric. If the dataframe passed has multiple
#'   values per state and some are not numeric, only numeric variables are merged.
#'
#' @return A dataframe formatted as an edge list.
#'
#' @importFrom dplyr "%>%" filter left_join group_by mutate n summarize
#' @importFrom tidyselect all_of any_of
#'
#' @export
#'
#' @examples
#'
#' # Load full network data:
#' network.df <- get_network_data()
#'
#' # Network data for subset of categories:
#' network.df <- get_network_data(category = c("Economic", "Political"))
#'
#' # Merge in data from get_cspp_data()
#' network.df <- get_network_data(category = "Distance Travel Migration",
#'                                merge_data  = get_cspp_data(vars = c("sess_length", "hou_majority"),
#'                                                            years = seq(1999, 2000)))
#'
#' @seealso For more information on the construction of the network data as well
#'   as a full codebook see
#'   \url{http://ippsr.msu.edu/public-policy/state-networks}.

get_network_data <- function(category = NULL, merge_data = NULL){

  network <- network_data

  if(!is.null(merge_data) & !is.data.frame(merge_data)) {
    stop("merge_data must be a dataframe")
  }

  if(!is.null(category) & !all(category %in% c("Distance Travel Migration", "Economic", "Political", "Policy", "Demographic"))) {
    stop('Category must be one or more of "Distance Travel Migration", "Economic", "Political", "Policy", "Demographic"')
  }

  # category is filled in and valid
  if(!is.null(category) & all(category %in% c("Distance Travel Migration", "Economic", "Political", "Policy", "Demographic"))) {

    if (length(category) > 0) {
      cats <- paste(category, collapse="|")
      vars <- network_vars %>% dplyr::filter(str_detect(category, cats))
    } else {
      vars <- network_vars
    }

    network <- network %>%
      dplyr::select(State1:dyadid, tidyselect::all_of(vars$var_names))
  }

  # if merge is TRUE
  if(!is.null(merge_data)) {

    # check the `data` dataframe structure:
    if(!("st" %in% names(merge_data))) {
      stop("Data must be formatted by get_cspp_data() or have a column named `st`")
    }

    if(length(names(merge_data)[!(names(merge_data) %in% c("year", "stateno", "state", "state_fips", "state_icpsr", "st"))]) == 0) {
      stop("No variables to merge")
    }

    # check if there are multiple rows per state;
    # if so, summarize all variables by state
    check <- merge_data %>%
      dplyr::group_by(st) %>%
      dplyr::mutate(n = dplyr::n())

    if(max(check$n) > 1){
      warning("There are multiple observations per state in the `data` dataframe. Creating one observation per state (dplyr::summarize()) prior to merging...")

      # drop columns that aren't numeric
      merge_data <- merge_data %>%
        dplyr::select_if(names(.) %in% c("year", "stateno", "state", "state_fips", "state_icpsr", "st") | purrr::map_lgl(., is.numeric))

      merge_data <- merge_data %>%
        dplyr::group_by(st) %>%
        dplyr::summarize_at(dplyr::vars(-tidyselect::any_of(c("year", "stateno", "state", "state_fips", "state_icpsr"))), list(~mean(., na.rm = T)))

    }

    network <- dplyr::left_join(network, merge_data, by = c("st.abb1" = "st"))

  }

  return(network)

}

Try the cspp package in your browser

Any scripts or data that you put into this service are public.

cspp documentation built on Dec. 28, 2022, 2:46 a.m.