rm(list=ls())
library(tidyverse)
library(tidycensus)
library(ipumsr)
library(httr)
library(devtools)
# These age groups are the most granular ones that cover all years of data
age_vec <- c(seq(0, 30, by = 5), seq(35, 85, by = 10), Inf)
race_vec <- c(
h = "Hispanic or Latino",
nhba = "Black or African American",
nhaa = "Asian",
nhia = "American Indian and Alaska Native", # indegineous?
nhna = "Native Hawaiian and Other Pacific Islander", # indegionus?
nhtom = "Two or More Races", # other
nhwa = "White",
tot = ""
)
# Source reference
# https://data.nber.org/census/popest/www.census.gov/popest/data/intercensal/county/files/CO-EST00INT-ALLDATA.pdf
#DF <- "https://data.nber.org/census/popest/coest00intalldata.csv" %>%
DF <- "~/Downloads/coest00intalldata.csv" %>%
read_csv() %>%
# lets keep the data sources consistenet
filter(yearref != 1 & yearref != 12) %>%
mutate(agegrp = ifelse(agegrp == 0, 1, agegrp)) %>%
select(
county, year, agegrp, tot_male, tot_female,
nhwa_male:nhtom_female, h_male:h_female) %>%
# age group 99 is teh aggregate age group here for the data
filter(agegrp != 0 & agegrp != 99) %>%
mutate(Age = cut((agegrp - 1) * 5, age_vec, right = FALSE)) %>%
pivot_longer(tot_male:h_female) %>%
mutate(Sex = str_to_title(str_split_fixed(name, "_", 2)[,2])) %>%
mutate(Race = race_vec[str_split_fixed(name, "_", 2)[,1]]) %>%
select(-name, -agegrp)
# Source reference
# https://data.nber.org/census/popest/www.census.gov/popest/data/counties/asrh/2015/files/CC-EST2015-ALLDATA.pdf
#DF2 <- "https://data.nber.org/census/popest/countypopmonthasrh.csv" %>%
DF2 <- "~/Downloads/countypopmonthasrh.csv" %>%
read_csv() %>%
filter(year > 2010 & year < 2016) %>%
rename_all(tolower) %>%
select(
county, year, agegrp, tot_male, tot_female,
nhwa_male:nhtom_female, h_male:h_female) %>%
# age group 99 is teh aggregate age group here for the data
filter(agegrp != 0 & agegrp != 99) %>%
mutate(Age = cut((agegrp - 1) * 5, age_vec, right = FALSE)) %>%
pivot_longer(tot_male:h_female) %>%
mutate(Sex = str_to_title(str_split_fixed(name, "_", 2)[,2])) %>%
mutate(Race = race_vec[str_split_fixed(name, "_", 2)[,1]]) %>%
select(-name, -agegrp)
ipums_key <- Sys.getenv("IPUMS_API_KEY")
## wait a bit cause this could take a while for IPUMS to do its thing
data_extract_status_res <- GET(
"https://api.ipums.org/extracts?product=nhgis&version=v1",
add_headers(Authorization = ipums_key))
de_df <- content(data_extract_status_res, "parsed", simplifyDataFrame = T)
if(nrow(filter(
de_df, description == "Fatal Encounters Denom" &
!is.null(de_df$download_links$table_data))) == 0){
himeta_df <- "https://api.ipums.org/metadata/nhgis/datasets?version=v1" %>%
GET(add_headers(Authorization = ipums_key)) %>%
content("parsed", simplifyDataFrame = TRUE) %>%
as_tibble()
acsdetail_df <- bind_rows(lapply(2016:2018, function(y){
"https://api.ipums.org/metadata/nhgis/datasets/" %>%
str_c(y, "_ACS1?version=v1") %>%
GET(add_headers(Authorization = ipums_key)) %>%
content("parsed", simplifyDataFrame = TRUE) %>%
.[["data_tables"]] %>%
as_tibble() %>%
mutate(year = y)
}))
mybody_json <- list(
datasets = sapply(str_c(2016:2018, "_ACS1"), function(x){
list(
geog_levels = list("county", "state"),
data_tables = as.list(
c("B03002",
str_c("B01001", c("", "B", "C", "D", "E", "G", "H", "I"))
)
)
)
}, simplify = FALSE),
data_format = "csv_no_header",
description = "Fatal Encounters Denom",
breakdown_and_data_type_layout = "single_file"
)
## send in the data request
result <- POST(
"https://api.ipums.org/extracts/?product=nhgis&version=v1",
add_headers(Authorization = ipums_key),
body = mybody_json, encode = "json", verbose())
res_df <- content(result, "parsed", simplifyDataFrame = T)
my_number <- res_df$number
## wait a bit cause this could take a while for IPUMS to do its thing
data_extract_status_res <- GET(
"https://api.ipums.org/extracts?product=nhgis&version=v1",
add_headers(Authorization = ipums_key))
de_df <- content(data_extract_status_res, "parsed", simplifyDataFrame = T)
}
my_number <- filter(de_df,
description == "Fatal Encounters Denom" &
!is.null(de_df$download_links$table_data))$number[1]
tf <- tempfile(fileext = ".zip")
download.file(
de_df[de_df$number == my_number,c("download_links")]$table_data,
tf
)
county_acs_df <- bind_rows(lapply(2016:2018, function(y){
sty <- str_c(y, "_county")
ipums_year_df <- tf %>%
read_nhgis(data_layer = contains(sty)) %>%
set_ipums_var_attributes(
read_ipums_codebook(
tf,
contains(sty)))
year_var_df <- ipums_year_df %>%
select(-(GISJOIN:NAME_E), -NAME_M) %>%
{tibble(
name = names(sapply(., attr, which = "label")),
desc = sapply(., attr, which = "label"),
var = sapply(., attr, which = "var_desc"))} %>%
filter(grepl("Sex by Age", var)) %>%
mutate(var = str_sub(var, 1, -8)) %>%
filter(desc != "Total" & desc != "Female" & desc != "Male") %>%
filter(str_sub(name, -4, -4) == "E") %>%
mutate(Age = str_split(desc, ": ", simplify = TRUE)[,2]) %>%
mutate(Sex = str_split(desc, ": ", simplify = TRUE)[,1]) %>%
mutate(`Min Age` = as.numeric(str_split_fixed(Age, " ", 2)[,1])) %>%
mutate(`Max Age` = as.numeric(str_split_fixed(Age, " ", 4)[,3])) %>%
mutate(`Min Age` = ifelse(
is.na(`Min Age`) & is.na(`Max Age`), 0, `Min Age`)) %>%
mutate(Age = cut(`Min Age`, age_vec, right = F)) %>%
mutate(var = str_remove(var, "Sex by Age")) %>%
mutate(var = str_remove(var, " \\(")) %>%
mutate(var = str_remove(var, "\\)")) %>%
mutate(var = str_remove(var, " Alone")) %>%
mutate(Race = str_remove(var, ", Not Hispanic or Latino")) %>%
select(name, Age, Sex, Race)
ipums_year_df %>%
mutate(GEOID = str_c(STATEA, COUNTYA)) %>%
select(YEAR, GEOID, STATE, COUNTY, !!year_var_df$name) %>%
pivot_longer(!!year_var_df$name) %>%
left_join(year_var_df, by = "name") %>%
select(-name) %>%
group_by(YEAR, GEOID, Age, Sex, Race) %>%
summarize(value = sum(value))}))
county_totacs_df <- bind_rows(lapply(2016:2018, function(y){
sty <- str_c(y, "_county")
ipums_year_df <- tf %>%
read_nhgis(data_layer = contains(sty)) %>%
set_ipums_var_attributes(
read_ipums_codebook(
tf,
contains(sty)))
year_var_df <- ipums_year_df %>%
select(-(GISJOIN:NAME_E), -NAME_M) %>%
{tibble(
name = names(sapply(., attr, which = "label")),
desc = sapply(., attr, which = "label"),
var = sapply(., attr, which = "var_desc"))} %>%
filter(!grepl("Sex by Age", var)) %>%
filter(desc != "Total" & desc != "Not Hispanic or Latino") %>%
mutate(desc = str_remove(desc, " alone")) %>%
mutate(desc = str_remove(desc, "Not Hispanic or Latino: ")) %>%
filter(!grepl("Hispanic or Latino: ", desc)) %>%
mutate(Race = ifelse(
grepl("Two or", desc), "Two or More Races", desc)) %>%
filter(str_sub(name, -4, -4) == "E") %>%
select(name, Race)
ipums_year_df %>%
mutate(GEOID = str_c(STATEA, COUNTYA)) %>%
select(YEAR, GEOID, STATE, COUNTY, !!year_var_df$name) %>%
pivot_longer(!!year_var_df$name) %>%
left_join(year_var_df, by = "name") %>%
select(-name) %>%
group_by(YEAR, GEOID, Race) %>%
summarize(value = sum(value))})) %>%
ungroup() %>%
filter(Race != "Some other race")
state_totacs_df <- bind_rows(lapply(2016:2018, function(y){
sty <- str_c(y, "_state")
ipums_year_df <- tf %>%
read_nhgis(data_layer = contains(sty)) %>%
set_ipums_var_attributes(
read_ipums_codebook(
tf,
contains(sty)))
year_var_df <- ipums_year_df %>%
select(-(GISJOIN:NAME_E), -NAME_M) %>%
{tibble(
name = names(sapply(., attr, which = "label")),
desc = sapply(., attr, which = "label"),
var = sapply(., attr, which = "var_desc"))} %>%
filter(!grepl("Sex by Age", var)) %>%
filter(desc != "Total" & desc != "Not Hispanic or Latino") %>%
mutate(desc = str_remove(desc, " alone")) %>%
mutate(desc = str_remove(desc, "Not Hispanic or Latino: ")) %>%
filter(!grepl("Hispanic or Latino: ", desc)) %>%
mutate(Race = ifelse(
grepl("Two or", desc), "Two or More Races", desc)) %>%
filter(str_sub(name, -4, -4) == "E") %>%
select(name, Race)
ipums_year_df %>%
mutate(GEOID = STATEA) %>%
select(YEAR, GEOID, STATE, !!year_var_df$name) %>%
pivot_longer(!!year_var_df$name) %>%
left_join(year_var_df, by = "name") %>%
select(-name) %>%
group_by(YEAR, GEOID, Race) %>%
summarize(value = sum(value))})) %>%
ungroup() %>%
filter(Race != "Some other race")
state_acs_df <- bind_rows(lapply(2016:2018, function(y){
sty <- str_c(y, "_state")
ipums_year_df <- tf %>%
read_nhgis(data_layer = contains(sty)) %>%
set_ipums_var_attributes(
read_ipums_codebook(
tf,
contains(sty)))
year_var_df <- ipums_year_df %>%
select(-(GISJOIN:NAME_E), -NAME_M) %>%
{tibble(
name = names(sapply(., attr, which = "label")),
desc = sapply(., attr, which = "label"),
var = sapply(., attr, which = "var_desc"))} %>%
mutate(var = str_sub(var, 1, -8)) %>%
filter(desc != "Total" & desc != "Female" & desc != "Male") %>%
filter(str_sub(name, -4, -4) == "E") %>%
mutate(Age = str_split(desc, ": ", simplify = TRUE)[,2]) %>%
mutate(Sex = str_split(desc, ": ", simplify = TRUE)[,1]) %>%
mutate(`Min Age` = as.numeric(str_split_fixed(Age, " ", 2)[,1])) %>%
mutate(`Max Age` = as.numeric(str_split_fixed(Age, " ", 4)[,3])) %>%
mutate(`Min Age` = ifelse(
is.na(`Min Age`) & is.na(`Max Age`), 0, `Min Age`)) %>%
mutate(Age = cut(`Min Age`, age_vec, right = F)) %>%
mutate(var = str_remove(var, "Sex by Age")) %>%
mutate(var = str_remove(var, " \\(")) %>%
mutate(var = str_remove(var, "\\)")) %>%
mutate(var = str_remove(var, " Alone")) %>%
mutate(Race = str_remove(var, ", Not Hispanic or Latino")) %>%
select(name, Age, Sex, Race)
ipums_year_df %>%
mutate(GEOID = STATEA) %>%
select(YEAR, GEOID, STATE, !!year_var_df$name) %>%
pivot_longer(!!year_var_df$name) %>%
left_join(year_var_df, by = "name") %>%
select(-name) %>%
group_by(YEAR, GEOID, Age, Sex, Race) %>%
summarize(value = sum(value))}))
full_age_df <- bind_rows(DF, DF2) %>%
rename(GEOID = county, YEAR = year) %>%
group_by(GEOID, YEAR, Age, Race, Sex) %>%
summarise_all(sum) %>%
ungroup() %>%
bind_rows(
ungroup(county_acs_df)
) %>%
arrange(GEOID, YEAR, Age, Race, Sex) %>%
mutate(Race = case_when(
Race == "Asian" ~ "Asian/Pacific Islander",
Race == "Native Hawaiian and Other Pacific Islander" ~ "Asian/Pacific Islander",
Race == "White" ~ "European-American/White",
Race == "American Indian and Alaska Native" ~ "Native American/Alaskan",
Race == "Black or African American" ~ "African-American/Black",
Race == "Hispanic or Latino" ~ "Hispanic/Latino",
TRUE ~ Race,
)) %>%
group_by(GEOID, YEAR, Age, Race, Sex) %>%
summarise_all(sum) %>%
ungroup()
full_df_state <- bind_rows(DF, DF2) %>%
rename(GEOID = county, YEAR = year) %>%
mutate(GEOID = str_sub(GEOID, 1, 2)) %>%
group_by(GEOID, YEAR, Race) %>%
summarize(value = sum(value)) %>%
ungroup() %>%
bind_rows(ungroup(state_totacs_df)) %>%
arrange(GEOID, YEAR, Race) %>%
mutate(Race = case_when(
Race == "Asian" ~ "Asian/Pacific Islander",
Race == "Native Hawaiian and Other Pacific Islander" ~ "Asian/Pacific Islander",
Race == "White" ~ "European-American/White",
Race == "American Indian and Alaska Native" ~ "Native American/Alaskan",
Race == "Black or African American" ~ "African-American/Black",
Race == "Hispanic or Latino" ~ "Hispanic/Latino",
TRUE ~ Race,
)) %>%
group_by(GEOID, YEAR, Race) %>%
summarize(value = sum(value)) %>%
ungroup()
full_df_county <- bind_rows(DF, DF2) %>%
rename(GEOID = county, YEAR = year) %>%
group_by(GEOID, YEAR, Race) %>%
summarize(value = sum(value)) %>%
ungroup() %>%
bind_rows(ungroup(state_totacs_df)) %>%
arrange(GEOID, YEAR, Race) %>%
mutate(Race = case_when(
Race == "Asian" ~ "Asian/Pacific Islander",
Race == "Native HawaiiSome other racean and Other Pacific Islander" ~ "Asian/Pacific Islander",
Race == "White" ~ "European-American/White",
Race == "American Indian and Alaska Native" ~ "Native American/Alaskan",
Race == "Black or African American" ~ "African-American/Black",
Race == "Hispanic or Latino" ~ "Hispanic/Latino",
TRUE ~ Race,
)) %>%
group_by(GEOID, YEAR, Race) %>%
summarize(value = sum(value)) %>%
ungroup()
full_age_df_state <- bind_rows(DF, DF2) %>%
rename(GEOID = county, YEAR = year) %>%
mutate(GEOID = str_sub(GEOID, 1, 2)) %>%
group_by(GEOID, YEAR, Age, Race, Sex) %>%
summarise_all(sum) %>%
ungroup() %>%
bind_rows(ungroup(state_acs_df)) %>%
arrange(GEOID, YEAR, Age, Race, Sex) %>%
mutate(Race = case_when(
Race == "Asian" ~ "Asian/Pacific Islander",
Race == "Native Hawaiian and Other Pacific Islander" ~ "Asian/Pacific Islander",
Race == "White" ~ "European-American/White",
Race == "American Indian and Alaska Native" ~ "Native American/Alaskan",
Race == "Black or African American" ~ "African-American/Black",
Race == "Hispanic or Latino" ~ "Hispanic/Latino",
TRUE ~ Race,
)) %>%
group_by(GEOID, YEAR, Age, Race, Sex) %>%
summarise_all(sum) %>%
ungroup()
race_age_df <- full_age_df %>%
filter(Race != "" & Race != "Two or More Races") %>%
filter(GEOID < 72)
age_df <- full_age_df %>%
filter(Race == "") %>%
select(-Race) %>%
filter(GEOID < 72)
race_df <- full_df_county %>%
filter(Race != "" & Race != "Two or More Races") %>%
filter(GEOID < 72)
state_race_age_df <- full_age_df_state %>%
filter(Race != "" & Race != "Two or More Races") %>%
filter(GEOID < 72)
state_age_df <- full_age_df_state %>%
filter(Race == "") %>%
select(-Race) %>%
filter(GEOID < 72)
state_race_df <- full_df_state %>%
filter(Race != "" & Race != "Two or More Races") %>%
filter(GEOID < 72)
use_data(
race_df, race_age_df, age_df,
state_race_df, state_race_age_df, state_age_df, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.