knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "README-"
)

Travis-CI Build Status

About

This R data package intends to store 2010 Census ZIP Code Tabulation Area (ZCTA) Relationship files. So far it includes:

Motivation

Linking the USPS's ZIP codes to US counties is tedious:

A proposed solution: ZIP codes -> ZCTAs -> counties. This package contains data for connecting these links using official US Census relationship files and ZIP-to-ZCTA crosswalk files created by John Snow, Inc.

Useful Links

Installation

# install.packages("devtools")
devtools::install_github("jjchern/zcta")

Usage

# devtools::install_github("jjchern/gaze")

library(tidyverse)

# ZCTA to counties
zcta::zcta_county_rel_10

# ZIP codes to ZCTAs 
zcta::zipzcta 

# Show variable labels, and whether value label exists for certain variables
# devtools::install_github("larmarange/labelled")
labelled::var_label(zcta::zcta_county_rel_10)

# Total number of zcta records
nrow(zcta::zcta_county_rel_10)

# Number of distinct zcta
zcta::zcta_county_rel_10 %>% distinct(zcta5) %>% nrow()

# In most instances the ZCTA code is the same as the ZIP Code for an area
# But some zctas fall in more than one county 
# For example, there're 7060 zctas fall in 2 counties
zcta::zcta_county_rel_10 %>% 
  group_by(zcta5) %>% 
  summarise(`Num of counties` = n()) %>% 
  group_by(`Num of counties`) %>% 
  summarise(`Num of zctas` = n())

# To get an one-to-one relationship between zcta and county, assign county to 
# a zcta if the zcta has the most population. For Example:
# Before: zcta 601 fall in county 72001 and 72141
zcta::zcta_county_rel_10 %>% 
  select(zcta5, state, county, geoid, poppt, zpoppct)

# After: relate zcta 601 only to county 72001 as it accounts for 99.43% of the population
one_to_one_pop <- zcta::zcta_county_rel_10 %>% 
  select(zcta5, state, county, geoid, poppt, zpoppct) %>% 
  group_by(zcta5) %>% 
  slice(which.max(zpoppct)) %>% 
  ungroup()
one_to_one_pop

# Or assign county to a zcta if the zcta accounts for most of the area.
one_to_one_area <- zcta::zcta_county_rel_10 %>% 
  select(zcta5, state, county, geoid, poppt, zpoppct, zareapct) %>% 
  group_by(zcta5) %>% 
  slice(which.max(zareapct)) %>% 
  ungroup()
one_to_one_area

# Using either of these ZCTA-to-county tables, you can go from ZIP codes to ZCTAs to county
zipcounty <- zcta::zipzcta %>% 
  left_join(one_to_one_area, by = c("zcta" = "zcta5")) %>% 
  select(zip, zcta, state = state.x, countygeoid = geoid) %>% 
  arrange(zip)
zipcounty

# Merge the two 1 to 1 relationship datasets and identify zctas that have different county match
one_to_one_pop %>% 
  left_join(one_to_one_area, by = "zcta5") %>% 
  select(zcta5, 
         county.x, geoid.x, zpoppct.x, 
         county.y, geoid.y, zpoppct.y) %>% 
  filter(geoid.x != geoid.y)

# Get county names for the 1 to 1 relationship dataset
# Also keep just states and DC
one_to_one_pop %>% 
  mutate(geoid = as.integer(geoid)) %>% 
  left_join(gaze::county10, by = "geoid") %>% 
  select(zcta5, state, usps, county, geoid, name) %>% 
  filter(state <= 56)


jjchern/zcta documentation built on Nov. 12, 2021, 8:40 a.m.