R/census_helpers.R

Defines functions census_us census_pumas census_msa census_nhood census_regions census_state census_blockgroups census_tracts census_counties census_towns wrap_census

## UTILS ----
wrap_census <- function(src, geography, table, year, state, dataset, key, ...) {
    if (src == "acs") {
        suppressMessages(
            httr::with_config(
                httr::user_agent("cwi"),
                tidycensus::get_acs(geography = geography, table = table, year = year, state = state, survey = dataset, key = key, cache_table = TRUE, ...)
            )
        )
    } else if (src == "decennial") {
        suppressMessages(
            httr::with_config(
                httr::user_agent("cwi"),
                tidycensus::get_decennial(geography = geography, table = table, year = year, state = state, sumfile = dataset, key = key, cache_table = TRUE, ...)
            )
        )
    } else {
        return(NULL)
    }
}



## TOWNS ----
# fetch all in the state, then filter by county or arg
census_towns <- function(src, table, year, towns, counties, state, dataset, key, sleep, ...) {
    # tidycensus now handles skipped levels in hierarchy i.e. can get towns w/o specifying counties
    Sys.sleep(sleep)

    xw <- county_x_state(state, counties)
    fetch <- wrap_census(src, geography = "county subdivision", table = table, year = year, state = state, dataset = dataset, key = key, ...)
    fetch <- town_names(fetch, NAME)
    fetch$county_geoid <- substr(fetch$GEOID, 1, 5)
    fetch <- dplyr::inner_join(fetch, xw, by = "county_geoid")

    if (!identical(towns, "all")) {
        fetch <- dplyr::filter(fetch, NAME %in% towns)
    }
    fetch$county_geoid <- NULL
    fetch
}

## COUNTIES ----
# fetch all, then filter by arg
census_counties <- function(src, table, year, counties, state, dataset, key, sleep, ...) {
    Sys.sleep(sleep)
    xw <- county_x_state(state, counties)

    fetch <- wrap_census(src, geography = "county", table = table, year = year, state = state, dataset = dataset, key = key, ...)
    fetch$NAME <- stringr::str_remove(fetch$NAME, ", .+$") # remove , Connecticut
    fetch$NAME <- stringr::str_replace(fetch$NAME, "Planning Region", "COG")

    fetch$county_geoid <- substr(fetch$GEOID, 1, 5)
    fetch <- dplyr::inner_join(fetch, xw, by = "county_geoid")
    fetch$county_geoid <- NULL
    fetch$county <- NULL
    fetch
}

## TRACTS ----
# fetch all in the state, then filter by county or arg
census_tracts <- function(src, table, year, tracts, counties, state, dataset, key, sleep, ...) {
    Sys.sleep(sleep)

    fetch <- wrap_census(src, geography = "tract", table = table, year = year, state = state, dataset = dataset, key = key, ...)

    if (identical(tracts, "all")) {
        xw <- county_x_state(state, counties)
    } else {
        xw <- county_x_state(state, "all")
        fetch <- dplyr::filter(fetch, GEOID %in% tracts)
    }
    fetch$county_geoid <- substr(fetch$GEOID, 1, 5)
    fetch <- dplyr::inner_join(fetch, xw, by = "county_geoid")
    fetch$county_geoid <- NULL
    fetch$NAME <- fetch$GEOID
    fetch
}

## BLOCKGROUPS ----
# fetch all in the state, then filter by county or arg
census_blockgroups <- function(src, table, year, blockgroups, counties, state, dataset, key, sleep, ...) {
    Sys.sleep(sleep)

    fetch <- wrap_census(src, geography = "block group", table = table, year = year, state = state, dataset = dataset, key = key, ...)

    if (identical(blockgroups, "all")) {
        xw <- county_x_state(state, counties)
    } else {
        xw <- county_x_state(state, "all")
        fetch <- dplyr::filter(fetch, GEOID %in% blockgroups)
    }
    fetch$county_geoid <- substr(fetch$GEOID, 1, 5)
    fetch <- dplyr::inner_join(fetch, xw, by = "county_geoid")
    fetch$county_geoid <- NULL
    fetch$NAME <- fetch$GEOID
    fetch
}

## STATE ----
# fetch all, then filter
census_state <- function(src, table, year, state, dataset, key, sleep, ...) {
    Sys.sleep(sleep)
    fetch <- wrap_census(src, geography = "state", table = table, year = year, state = NULL, dataset = dataset, key = key, ...)
    fetch <- dplyr::filter(fetch, GEOID == state)
    fetch
}

## REGIONS ----
# fetch all towns, then filter by region & aggregate
# needs name of estimate/value column
census_regions <- function(src, table, year, regions, state, value, dataset, key, sleep, ...) {
    Sys.sleep(sleep)

    # get unique regions but keep names
    regions <- regions[unique(names(regions))]

    region_df <- tibble::enframe(regions, value = "town")
    region_df <- tidyr::unnest(region_df, town)
    fetch <- census_towns(src, table, year, "all", "all", state, dataset, key, 0, ...)
    fetch <- dplyr::inner_join(fetch, region_df, by = c("NAME" = "town"))
    fetch <- dplyr::group_by(fetch, state, NAME = name, variable)
    if ("moe" %in% names(fetch)) {
        fetch <- dplyr::summarise(fetch,
            dplyr::across({{ value }}, sum),
            moe = round(tidycensus::moe_sum(moe, {{ value }}))
        )
    } else {
        fetch <- dplyr::summarise(
            fetch,
            dplyr::across({{ value }}, sum)
        )
    }
    fetch <- dplyr::ungroup(fetch)
    fetch
}

## NEIGHBORHOODS ----
# fetch tracts or bgs, then filter by nhood table & aggregate
# let counties be independent of neighborhoods
# needs name of estimate/value column
# switch nhood col names to strings instead of bare
census_nhood <- function(src, table, year, nhood_data, state, name, geoid, weight, is_tract, value, dataset, key, sleep, ...) {
    Sys.sleep(sleep)
    if (is_tract) {
        fetch <- census_tracts(src, table, year, "all", "all", state, dataset, key, 0, ...)
    } else {
        fetch <- census_blockgroups(src, table, year, "all", "all", state, dataset, key, 0, ...)
    }
    fetch <- dplyr::inner_join(nhood_data, fetch, by = stats::setNames("GEOID", geoid))
    fetch <- dplyr::group_by(fetch, dplyr::across(c(state, county, tidyselect::any_of(name), variable)))

    weight_col <- rlang::sym(weight)
    if ("moe" %in% names(fetch)) {
        fetch <- dplyr::summarise(fetch,
            {{ value }} := round(sum({{ value }} * {{ weight_col }})),
            moe = round(tidycensus::moe_sum(moe, {{ value }} * {{ weight_col }}))
        )
    } else {
        fetch <- dplyr::summarise(
            fetch,
            {{ value }} := round(sum({{ value }} * {{ weight_col }}))
        )
    }
    fetch <- dplyr::ungroup(fetch)
    fetch
}

## MSAs ----
# fetch all in us, then filter
census_msa <- function(src, table, year, new_england, dataset, key, sleep, ...) {
    Sys.sleep(sleep)
    if (year < 2015) {
        cli::cli_inform("Note: OMB changed MSA boundaries around 2015. These might not match the ones you're expecting.")
    }
    fetch <- wrap_census(src, geography = "metropolitan statistical area/micropolitan statistical area", table = table, year = year, state = NULL, dataset = dataset, key = key, ...)
    if (new_england) {
        ne_msa <- dplyr::filter(cwi::msa, region == "New England")
        fetch <- dplyr::semi_join(fetch, ne_msa, by = c("GEOID" = "geoid"))
    }
    fetch
}

## PUMAs ----
# fetch all in state, then filter like tracts (only available for ACS)
census_pumas <- function(src, table, year, pumas, counties, state, dataset, key, sleep, ...) {
    Sys.sleep(sleep)

    fetch <- wrap_census(src, geography = "public use microdata area", table = table, year = year, state = state, dataset = dataset, key = key, ...)

    if (identical(pumas, "all")) {
        xw <- county_x_state(state, counties)
    } else {
        # joining with county_x_state doesn't work here for 2022 because pumas aren't subsets of counties anymore
        xw <- county_x_state(state, "all")
        fetch <- dplyr::filter(fetch, GEOID %in% pumas)
    }

    fetch$county_geoid <- substr(fetch$GEOID, 1, 5)

    # don't filter if CT in 2022+
    if (!(year >= 2022 & state == "09")) {
        fetch <- dplyr::inner_join(fetch, xw, by = "county_geoid")
    }
    fetch$county_geoid <- NULL
    fetch$NAME <- fetch$GEOID
    fetch
}

## US ----
# fetch
census_us <- function(src, table, year, dataset, key, sleep, ...) {
    Sys.sleep(sleep)
    fetch <- wrap_census(src, geography = "us", table = table, year = year, state = NULL, dataset = dataset, key = key, ...)
    fetch
}
CT-Data-Haven/cwi documentation built on April 13, 2025, 1:42 p.m.