library(tidyr)
library(readr)
library(dplyr)
library(stringr)
library(magrittr)
library(glptools)
library(survey)
library(feather)
path <- "data-raw/qop/migration/"
# Break the download up into smaller .csv files that can be read into R memory (run once)
if(FALSE){
library(bigmemory)
data <- read.big.matrix(path %+% "usa_00064.csv", header = TRUE)
for(y in 2005:2017){
year_col <- data[,1]
year_lgl <- year_col == y
output <- data[year_lgl,]
output <- as.data.frame(output)
write_feather(output, paste0(path, "single-year/", y, ".feather"))
}
}
# Migration data uses MIGPUMAs instead of regular PUMAs. MIGPUMAs need to be assigned to MSAs.
# 1. Create a crosswalk from PUMAs to MIGPUMAs using downloads from IPUMS.
# 2. Load the crosswalk from PUMAs to MSAs. (Created using data from IPUMS.)
# 3. Combine crosswalks to bridge over PUMAs, creating a crosswalk from MIGPUMAs to MSAs.
#crosswalk_05 <- output
#crosswalk_05 <- data.frame(
# crosswalk_05,
# year = rep(2005:2012, each = nrow(crosswalk_05)))
# 1. Start with crosswalks from PUMAs to MIGPUMAs.
# The crosswalk uses PUMA definitions put into use in 2012.
# Code for earlier definitions is commented out at the bottom.
# The 2005 crosswalk contains ranges in individual cells.
# These ranges are expanded across multiple rows.
crosswalk_05 <- read_csv(path %+% "PUMA2000_MIGPUMA_crosswalk.csv")
crosswalk_05 %<>%
transmute(
STATEFIP = `State code (STATEFIP)`,
PUMA = `PUMA (PUMA)`,
PUMA = paste0("c(", gsub("-", ":", PUMA), ")"),
MIGPUMA = `PUMA of Migration (MIGPUMA or MIGPUMA1)`)
for(i in 1:nrow(crosswalk_05)){
df <- data.frame(
STATEFIP = crosswalk_05[i,]$STATEFIP,
PUMA = eval(parse(text = crosswalk_05[i,]$PUMA)),
MIGPUMA = crosswalk_05[i,]$MIGPUMA)
if(i == 1){
output <- df
} else {
output %<>% bind_rows(df)
}
}
crosswalk_05 <- output
crosswalk_12 <- read_csv(path %+% "puma_migpuma1_pwpuma00.csv", skip = 2, n_max = 2378)
crosswalk_12 %<>%
transmute(
STATEFIP = `State of Residence (ST)`,
PUMA,
MIGPUMA = `PWPUMA00 or MIGPUMA1`)
# replicate the crosswalk for each year
crosswalk_05 <- data.frame(
crosswalk_05,
year = rep(2005:2011, each = nrow(crosswalk_05)))
crosswalk_12 <- data.frame(
crosswalk_12,
year = rep(2012:2017, each = nrow(crosswalk_12)))
PUMA_MIGPUMA <- bind_rows(crosswalk_05, crosswalk_12)
# 2. Crosswalk from PUMAs to MSAs exists in the glptools package
MSA_PUMA <- MSA_PUMA %>% filter(year >= 2005)
# 3. Combine crosswalks to bridge over PUMAs.
# Rename MSA to MIGMSA to differentiate from MSA of residence.
# Filter to unique rows
MIGPUMA_MSA <- left_join(MSA_PUMA, PUMA_MIGPUMA, by = c("STATEFIP", "PUMA", "year"))
MIGPUMA_MSA %<>%
select(year, STATEFIP, MIGPUMA, MSA) %>%
rename(MIGMSA = MSA) %>%
unique()
# Load and process ACS.
# Outmigration data requires using the full US sample, so the data is read in one year at a time
# and subset to observations where the
# Attempt to reproduce ACS overall migration data. To change back to educational attainment:
# Subset to age 25+ to mirror FactFinder's migration by education status universe.
#
for(y in 2005:2017){
acs <- read_feather(paste0(path, "single-year/", y, ".feather"))
# Assign MSA and FIPS codes. Process race and education variables.
acs %<>%
process_microdata(pull_peers = FALSE) %>%
select(-SERIAL, -HHWT, -PERNUM) %>%
filter(age >= 25)
# Join crosswalk to data.
acs %<>%
left_join(MIGPUMA_MSA,
by = c("year", "MIGPLAC1" = "STATEFIP", "MIGPUMA1" = "MIGPUMA"))
# Create migration variables.
#
# moved_MIGPUMA
# moved_MIGPUMA = 1 if the person moved between MIGPUMAs.
# (In the case of Lou, Jefferson County is one MIGPUMA).
# moved_MIGPUMA = 0 if the person did not move MIGPUMAs.
# moved_MIGPUMA = NA if the person is less than one year old.
#
# moved_MSA
# moved_MSA = 1 if the person moved between MSAs, one or both of which is a peer.
# (Non-peer MSAs are coded as 0 for MSA and MIGMSA. People who move from a non-peer MSA
# to another non-peer MSA are not captured in moved_MSA, though they are not relevant for
# the analysis.)
#
# suburbanized
# suburbanized = 1 if the person moved from the core county to an outer county within the MSA.
# (Only verified for Louisville.)
#
# urbanized
# burbanized = 1 if the person moved form an outer county within the MSA to the core county.
# (Only verified for Louisville.)
#
# https://usa.ipums.org/usa-action/variables/MIGRATE1
acs %<>%
mutate(
moved_MIGPUMA = if_else(MIGRATE1D >= 24, 1, 0),
moved_MIGPUMA = replace(moved_MIGPUMA, MIGRATE1D == 0, NA),
MIGMSA = replace(MIGMSA, is.na(MIGMSA), 0),
moved_MSA = if_else(moved_MIGPUMA == 1 & MIGMSA != MSA, 1, 0),
suburbanized = if_else(moved_MIGPUMA == 1 & moved_MSA == 0 & is.na(FIPS), 1, 0),
urbanized = if_else(moved_MIGPUMA == 1 & moved_MSA == 0 & !is.na(FIPS), 1, 0)) %>%
select(-MIGRATE1, -MIGRATE1D, -MIGPLAC1, -MIGPUMA1)
#recode education to bachelor's or higher
acs %<>%
mutate(
bach_plus = if_else(educ %in% c("bach", "grad"), 1, 0),
bach_plus = replace(bach_plus, is.na(educ), NA))
# Create output data frames.
#
# Inmigration df is subset to everyone living in a peer MSA
#
# Outmigration df recodes MIGMSA to MSA and is subset to everyone
# who has moved within the last year (whether within the same MIGPUMA/MSA or not)
if(y == 2005){
inmig_output <- acs %>%
pull_peers_MSA()
outmig_output <- acs %>%
select(-MSA) %>%
rename(MSA = MIGMSA) %>%
pull_peers_MSA()
} else{
inmig <- acs %>%
pull_peers_MSA()
inmig_output %<>% bind_rows(inmig)
outmig <- acs %>%
select(-MSA) %>%
rename(MSA = MIGMSA) %>%
pull_peers_MSA()
outmig_output %<>% bind_rows(outmig)
}
}
# Shortcut to avoid waiting a long time for code above to run:
write_feather(inmig_output, path %+% "inmig_micro.feather")
write_feather(outmig_output, path %+% "outmig_micro.feather")
inmig_output <- read_feather(path %+% "inmig_micro.feather")
outmig_output <- read_feather(path %+% "outmig_micro.feather")
# Subset to people who moved MSA
inmig_output %<>%
filter(moved_MSA == 1)
outmig_output %<>%
filter(moved_MSA == 1)
# Create survey objects
inmig_svy <- svydesign(ids = ~0,
weights = ~PERWT,
data = inmig_output)
outmig_svy <- svydesign(ids = ~0,
weights = ~PERWT,
data = outmig_output)
# Create survey output
inmig <- svytable(~year+MSA+bach_plus, design = inmig_svy)
outmig <- svytable(~year+MSA+bach_plus, design = outmig_svy)
# Clean data frames
inmig %<>% as.data.frame(stringsAsFactors = FALSE)
outmig %<>% as.data.frame(stringsAsFactors = FALSE)
inmig %<>%
rename(in_mig_msa_bach_plus = Freq)
outmig %<>%
rename(out_mig_msa_bach_plus = Freq)
mig_MSA <- full_join(inmig, outmig, by = c("year", "MSA", "bach_plus"))
mig_MSA %<>%
filter(bach_plus == 1) %>%
select(-bach_plus) %>%
mutate(
year = as.numeric(year),
net_mig_msa_bach_plus = in_mig_msa_bach_plus - out_mig_msa_bach_plus)
usethis::use_data(mig_MSA, overwrite = TRUE)
# Calculate "suburbanization flows"
inmig_output <- read_feather(path %+% "inmig_micro.feather")
outmig_output <- read_feather(path %+% "outmig_micro.feather")
inmig_output %<>% filter(MSA == 31140, urbanized == TRUE)
outmig_output %<>% filter(MSA == 31140, suburbanized == TRUE)
# Create survey objects
inmig_svy <- svydesign(ids = ~0,
weights = ~PERWT,
data = inmig_output)
outmig_svy <- svydesign(ids = ~0,
weights = ~PERWT,
data = outmig_output)
# Create survey output
inmig <- svytable(~year+bach_plus, design = inmig_svy)
outmig <- svytable(~year+bach_plus, design = outmig_svy)
# Clean data frames
inmig %<>%
as.data.frame(stringsAsFactors = FALSE) %>%
rename(in_mig = Freq)
outmig %<>%
as.data.frame(stringsAsFactors = FALSE) %>%
rename(out_mig = Freq)
lou_mig_all <- full_join(inmig, outmig, by = c("year", "bach_plus")) %>%
group_by(year) %>%
summarise(
urban = sum(in_mig),
suburban = sum(out_mig)) %>%
mutate(
year = as.numeric(year),
net_suburb = suburban - urban) %>%
mutate_at(vars(urban:net_suburb), funs(rollmeanr), 3) %>%
filter(!is.na(urban)) %>%
gather(urban:net_suburb, key = "key", value = "value") %>%
mutate(
key = factor(key,
levels = c("urban", "suburban", "net_suburb"),
labels = c("into Jeff Co", "out of Jeff Co", "Net Suburbs")))
lou_mig_bach <- full_join(inmig, outmig, by = c("year", "bach_plus")) %>%
filter(bach_plus == 1) %>%
select(-bach_plus) %>%
rename(
urban_bach = in_mig,
suburban_bach = out_mig) %>%
mutate(
year = as.numeric(year),
net_suburb_bach = suburban_bach - urban_bach) %>%
mutate_at(vars(urban_bach:net_suburb_bach), funs(rollmeanr), 3) %>%
filter(!is.na(urban_bach)) %>%
gather(urban_bach:net_suburb_bach, key = "key", value = "value") %>%
mutate(
key = factor(key,
levels = c("urban_bach", "suburban_bach", "net_suburb_bach"),
labels = c("into Jeff Co", "out of Jeff Co", "Net Suburbs")))
g <- ggplot(lou_mig_all,
aes(x = year, y = value, group = key, color = key)) +
geom_line(size = 2) +
ylim(c(0, 10000))
g %<>% glptools:::tl_style(plot_title = "Net suburbanization", y_title = "residents",
caption_text = "", subtitle_text = "", cat_names = "")
g <- g + theme(legend.position = "right", legend.text = element_text(size = 12), legend.title = element_text(size = 12)) +
guides(color = guide_legend(title = "Moved:"))
g <- ggplot(lou_mig_bach,
aes(x = year, y = value, group = key, color = key)) +
geom_line(size = 2) +
ylim(c(-500, 2500))
g %<>% glptools:::tl_style(plot_title = "Net suburbanization, bachelor's +", y_title = "residents",
caption_text = "", subtitle_text = "", cat_names = "")
g <- g + theme(legend.position = "right", legend.text = element_text(size = 12), legend.title = element_text(size = 12)) +
guides(color = guide_legend(title = "Moved:"))
rename(
`0`,
`1`
))
filter(bach_plus == 1) %>%
select(-bach_plus) %>%
mutate(
year = as.numeric(year),
net_mig = in_mig - out_mig)
library(ggplot2)
library(scales)
trend_single(mig_MSA_graph, net_mig,
rollmean = 3, xmin = 2006, plot_title = "Migration, Bachelor's +, MSA level")
trend_single(glpdata::mig %>% pull_peers_FIPS, net_mig_bach_plus, rollmean = 3, xmin = 2014,
plot_title = "Migration, Bachelor's +, County")
# Suburbanization??
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.