R/ref_area.R

Defines functions sc_from_geoid ref_area_from_geoid sc_from_county_geoids county_geoids_from_state sc_from_preliminary_call ref_area_from_sc ref_area_from_zcta ref_area

ref_area <- function(geography,
                     state,
                     county,
                     zcta,
                     geoid,
                     year,
                     dataset,
                     partial_tidycensus_calls,
                     evaluator) {
  switch(
    geography,
    zcta = ref_area_from_zcta(zcta, state, county, geoid, year, dataset),
    switch(
      determine_input_arg(geoid = geoid, state = state, county = county),
      geoid =
        ref_area_from_geoid(
          geoid = geoid,
          geography = geography,
          year = year,
          dataset = dataset,
          partial_tidycensus_calls = partial_tidycensus_calls,
          evaluator = evaluator
        ),
      ref_area_from_sc(
        state = state,
        county = county,
        geography = geography,
        year = year,
        dataset = dataset,
        partial_tidycensus_calls = partial_tidycensus_calls,
        evaluator = evaluator
      )
    )
  )
}


ref_area_from_zcta <- function(zcta, state, county, geoid, year, dataset) {
  if (!is.null(county)) {
    stop('county must be NULL when geography = "zcta"', call. = FALSE)
  }
  if (!is.null(geoid)) {
    stop('geoid must be NULL when geography = "zcta"', call. = FALSE)
  }
  if (!is.null(zcta)) {
    zcta <- trimws(zcta)
    if (!length(zcta) || anyNA(zcta) || !all(grepl("^\\d{1,5}$", zcta))) {
      stop(
        "zcta argument must be a character vector of digits,",
        "\neach between 1 and 5 characters long",
        call. = FALSE
      )
    }

    switch(
      dataset,
      acs5 = if (!is.null(state) && any(2020:2023 == year)) {
        state <- NULL
        warning('state argument is ignored when geography = "zcta" and year is',
                " in the range 2020-2023.", call. = FALSE, immediate. = TRUE)
      }
    )
  }

  # Validates non-NULL input to zcta and creates reference area (ref_area). It
  # must contain one or more strings of 1-5 digits.
  list(
    geoid        = NULL,
    geo_length   = NULL,
    state_county = dplyr::tibble(state = list(state), county = list(NULL)),
    zcta         = zcta
  )

}



ref_area_from_sc <- function(state,
                             county,
                             geography,
                             year,
                             dataset,
                             partial_tidycensus_calls,
                             evaluator) {

  # Creates ref_area from user-inputted state and county.
  if (!is.null(county) && length(state) != 1L) {
    stop(
      "If supplying counties, exactly one state must be provided",
      "\nIn order to supply specific counties in multiple states,",
      "\nuse get_geoids() to get their GEOIDs and pass them to the",
      "geoid argument in this function.",
      call. = FALSE
    )
  }

  # This is a tibble with columns "state" and "county". It is constructed so
  # that the contents of each row can be successfully passed to the
  # corresponding "state" and "county" arugments in tidycensus::get_acs() or
  # tidycensus::get_decennial().
  state_county <-

    if (any(c("state", "county") == geography)) {

      # If geography is "state" or "county", the state and county args are
      # passed as-is to the tidycensus function(s).
      dplyr::tibble(state = list(state), county = list(county))

    } else if (is.null(county)) {
      # If geography = "tract" or smaller and county = NULL, preliminary
      # call(s) to tidycensus must be performed in order to obtain all county
      # GEOIDs for all states. The exception is the special case described
      # below.

      if (geography == "tract" && dataset == "decennial" && year < 2010) {
        # In this special case, the tidycensus function(s) will be called once
        # per state. Also, state = NULL is a shortcut to "all 50 states plus
        # DC and Puerto Rico")
        dplyr::tibble(
          state = if (is.null(state)) sociome::state_geoids else state
        )
      } else {
        sc_from_preliminary_call(
          partial_tidycensus_calls,
          state,
          evaluator
        )
      }

    } else {
      # If geography = "tract" or smaller and county was not NULL, the
      # tidycensus function(s) will be called once per county.
      dplyr::tibble(state = state, county = county)
    }

  list(
    geoid = NULL,
    geo_length = NULL,
    state_county = state_county,
    zcta = NULL
  )
}



sc_from_preliminary_call <- function(partial_tidycensus_calls,
                                     state,
                                     evaluator) {

  # Obtains a single character vector of all county GEOIDs for all states in the
  # "state" argument
  county_geoids <-
    county_geoids_from_state(partial_tidycensus_calls, state, evaluator)

  # Turns the character vector into a proper state_county object (i.e., a tibble
  # with columns "state" and "county").
  sc_from_county_geoids(county_geoids)
}



county_geoids_from_state <- function(partial_tidycensus_calls,
                                     state,
                                     evaluator) {
  message("\nPreliminary tidycensus call beginning...")

  # The preliminary tidycensus call is performed by modifying the tidycensus
  # call skeleton that happens earlier in get_adi(). It ends up requesting data
  # on a single arbitrary variable at the county level for all user-inputted
  # states.
  counties <-
    partial_tidycensus_calls[[1L]] %>%
    rlang::call_modify(
      geography = "county",
      variables = .$variables[1L],
      geometry = FALSE,
      shift_geo = rlang::zap(),
      state = state
    ) %>%
    evaluator()

  # The data requested in the "variables" argument is not actually even saved;
  # only the column of GEOIDs is returned.
  counties[["GEOID"]]
}




sc_from_county_geoids <- function(county_geoids) {

  # Converts a character vector of 5+ digit GEOIDs into a proper
  # state_county object (i.e., a tibble with columns "state" and "county"). The
  # tidycensus functions' "state" argument accepts two-digit GEOIDs of states,
  # and their "county" argument accepts 3-digit county FIPS codes (see
  # ?tidycensus::get_acs) that are often seen concatenated onto the end of the
  # 2-digit state GEOID to make a 5-digit county GEOID (e.g., "39" (Ohio) +
  # "035" (Cuyahoga County) = "39035" (Cuyahoga County)).

  dplyr::tibble(
    state  = substr(county_geoids, 1L, 2L),
    county = substr(county_geoids, 3L, 5L)
  )
}




ref_area_from_geoid <- function(geoid,
                                geography,
                                year,
                                dataset,
                                partial_tidycensus_calls,
                                evaluator) {

  # Validates user input to geoid and constructs a state_county object (tibble
  # with columns "state" and "county") from it.

  geoid <- validate_geoid(geoid)

  # Get number of characters in a GEOID of user-inputted geography level. This
  # is used later when trimming tidycensus's results to only include areas in
  # the user-inputted reference area.
  geo_length <- validate_geo_length(geography, geoid)

  state_county <-
    sc_from_geoid(
      geoid,
      geography,
      year,
      dataset,
      partial_tidycensus_calls,
      evaluator
    )

  list(
    geoid = geoid,
    geo_length = geo_length,
    state_county = state_county,
    zcta = NULL
  )
}

sc_from_geoid <- function(geoid,
                          geography,
                          year,
                          dataset,
                          partial_tidycensus_calls,
                          evaluator) {

  # The state GEOIDs of each element in "geoid".
  first_two <- substr(geoid, 1L, 2L)

  if (any(c("state", "county") == geography)) {

    # If the user specified a geography of "state" or "county", the tidycensus
    # function(s) will be called once with the state argument containing a
    # character vector of all states GEOIDs represented in the geoid argument.
    dplyr::tibble(state = list(unique(first_two)))

  } else if (geography == "tract" && dataset == "decennial" && year < 2010) {

    # In this special case, the tidycensus function(s) will be called once per
    # unique state represented in the geoid argument.
    dplyr::tibble(state = unique(first_two))

  } else {

    # If the user specified a geography of "tract" or smaller, the tidycensus
    # function(s) will be called once per unique county represented in the geoid
    # argument.

    state_geoids_lgl <- geoid == first_two

    # If any elements in the "geoid" argument were state GEOIDs (i.e., 2
    # digits), they are passed to a preliminary tidycensus call in order to
    # obtain all their county GEOIDs. All the state GEOIDs are then removed, and
    # this list of county GEOIDs is appended.
    if (any(state_geoids_lgl)) {
      geoid <-
        c(
          geoid[!state_geoids_lgl],
          county_geoids_from_state(
            partial_tidycensus_calls = partial_tidycensus_calls,
            state = unique(geoid[state_geoids_lgl]),
            evaluator = evaluator
          )
        )
    }

    # All geoids are truncated to only include their 5-digit county geoids.
    county_geoids <- geoid %>% substr(1L, 5L) %>% unique()

    # Turns the character vector into a proper state_county object (i.e., a
    # tibble with columns "state" and "county").
    sc_from_county_geoids(county_geoids)

  }
}

Try the sociome package in your browser

Any scripts or data that you put into this service are public.

sociome documentation built on Jan. 10, 2026, 9:17 a.m.