get_name_from_census2010 <- function(FIPs, geo_header, in_states){
# this function is to be called in convert_fips_to_names()
geoheader <- tolower(geo_header)
path_to_census <- Sys.getenv("PATH_TO_CENSUS")
lst <- list()
for (st in in_states){
file <- paste0(path_to_census, "/generated_data/fips_", geoheader, "/",
geoheader, "_fips_", st, ".csv")
lst[[st]] <- fread(file, colClasses = "character")
}
fips_geoheader <- rbindlist(lst) %>%
.[, .(fips = get(geo_header), state, NAME)]
if (geo_header == "CBSA" && in_states == "US"){
names <- fips_geoheader[FIPs, on = .(fips)] %>%
.[, NAME]
} else {
names <- fips_geoheader[FIPs, on = .(fips, state)] %>%
.[, NAME]
}
return(names)
}
compress_datatable <- function(dt){
# If a two-columns data.table dt has repeated element in first column, remove
# duplication of that column and turn second column into rows of vectors
# Example_____
# dt <- data.table(x = c("A", "A", "B", "A"), y = letters[6:9])
# x y
# 1: A f
# 2: A g
# 3: B h
# 4: A i
# compress_datatable(dt)
# x y
# 1: A, c("f", "g", "i")
# 2: B "h"
#
# dt2 <- data.table(x = c("A", "A", "A", "A"), y = letters[6:9])
# compress_datatable(dt2)
col_1 <- names(dt)[1]
col_2 <- names(dt)[2]
res <- dt %>%
.[, paste(get(col_2), collapse = ","), by = .(get(col_1))] %>%
.[, (col_1) := get] %>%
# force to be a list after split, otherwise if there is only one list
# element after split, it will be treated as vector and only the force
# element of the vector will be taken.
.[, (col_2) := list(str_split(V1, ","))] %>%
.[, ":=" (get = NULL, V1 = NULL)]
return(res)
}
lookup_tablecontents <- function(table_contents, lookup){
# From a vector of table_contents find the file segments and table contents
# for each segment. Used in function read_xxx_tablecontents_()
# Args_____
# table_contents: vector of table content references
# lookup: the database where to find the match
#
# Return_____
# A data.table with two columns, file_seg and table_contents
#
# Example_____
# table_contents = c("P0150008", "P0030001", "P0030003",
# "P0080036", "PCT012G002", "PCT012G181")
# lookup <- lookup_decennial_2010
# lookup_tablecontents(table_contents, lookup)
# file_seg table_contents
# 1: 03 c("P0030001", "P0030003", "P0080036")
# 2: 05 "P0150008"
# 3: 26 c("PCT012G002", "PCT012G181")
# try to keep the order of input table_contents
order_contents <- data.table(
id = 1:length(table_contents),
content = table_contents
)
file_content <- lookup[reference %in% table_contents,
.(file_seg = file_segment,
table_contents = reference)] %>%
# if a table_content appears in multiple file_segs, keep only
# the first one
unique(by = "table_contents") %>%
.[order_contents, on = .(table_contents = content)] %>%
.[order(id)] %>%
compress_datatable()
return(file_content)
}
organize_geoheaders <- function(geo_headers) {
# convert the argument geo_headers in read_xxx() into a data.table, which
# has columns of geoheader, code, and state
# Examples_____
# geo_headers <- c("place = ut62360",
# "place = 14140",
# "cousub = ri41500",
# "cbsa = *")
# convert_geoheaders(geo_headers)
# geoheader code state
# 1: PLACE 62360 UT
# 2: PLACE 14140
# 3: COUSUB 41500 RI
# 4: CBSA
geo <- str_replace_all(geo_headers, " ", "") %>%
toupper()
res <- data.table(
geoheader = str_extract(geo, "^[^=]*"),
code = str_split(str_extract(geo, "[^=]*$"), ",")
) %>%
.[, state := str_extract(code, "^[A-Z]*")] %>%
.[, code := str_extract(code, "[0-9]*$")] %>%
.[, name := pmap_chr(list(geoheader, code, state), get_name)]
return(res)
}
organize_tablecontents <- function(table_contents) {
# convert the argument table_contents in read_xxx() into a data.table, which
# has columns of code and name. The names are given by users
# Examples_____
# table_contents <- c("aaa = C1234",
# "bbb = B14140",
# "A6432", # no given name
# "ddd = D222")
# organize_tablecontents(table_contents)
# name code
# 1: aaa A1234
# 2: bbb B14140
# 3: C6432 C6432
# 4: ddd D222
tc <- str_replace_all(table_contents, " ", "")
res <- data.table(
name = str_extract(tc, "^[^=]*"),
reference = toupper(str_extract(tc, "[^=]*$"))
)
return(res)
}
get_fips <- function(area){
# return fips code of a given area. Only provide One area
# get fips code of a county, city or town in a state, or a metro area.
# If a city or town has both PLACE and COUSUB, return PLACE only
#
# Args_____
# area : for county, city, or town, in the format like "kent county, RI",
# "Boston city, MA", "Lincoln town, RI". For metro area, in the format
# like "New York metro".
#
# Return_____
# string such as "CBSA = 39300", "PLACE = ma07000" of the area
#
# Examples_____
# get_fips("Lincoln town, RI")
# get_fips("Providence city, RI")
# get_fips("Providence metro")
# get_fips("Kent county, RI")
# get_fips("Salt Lake City city, UT")
# get_fips("COUSUB = RI41500")
# get_fips("CBSA = 39300")
area <- tolower(area)
if (str_detect(area, "=")){
geoheader <- area
# } else if (str_detect(area, "\\*")){
# if (str_detect(area, ",")){
# state <- str_extract(area, "[^ ]*$")
# geography <- str_extract(area, "[^ ]*,") %>%
# str_replace(",", "")
# }
#
# if (str_detect(area, "metro")){
# stop('Do not allow "* metro", please provide a specific metro.')
# }
#
# if (geography == "county"){
# geoheader <- dict_fips[tolower(state_abbr) == state &
# SUMLEV == "050"] %>%
# .[, .(COUNTY, state_abbr)] %>%
# .[, geoheader := paste0("COUNTY = ", state_abbr, COUNTY)] %>%
# .[, geoheader]
# }
#
# if (geography == "city"){
# geoheader <- dict_fips
# }
} else {
if (str_detect(area, ",")){
geography <- str_extract(area, "[^ ]*,") %>%
str_replace(",", "")
} else {
geography <- "metro"
area <- str_replace(area, "metro$", "") %>%
str_trim()
}
if (geography == "metro"){
geoheader <- get_cbsa(area)
} else if (geography == "county"){
state <- str_extract(area, "[^ ]*$")
name <- str_extract(area, "^[^,]*")
fips <- dict_fips[tolower(state_abbr) == state &
tolower(NAME) == name &
SUMLEV == "050"] %>%
.[, COUNTY]
if (length(fips) == 0){
stop(paste0("No match found for ", name,
". Please search a name with search_fips()."))
} else {
geoheader <- paste0("COUNTY = ", toupper(state), fips)
}
} else {
state <- str_extract(area, "[^ ]*$")
name = str_extract(area, "^[^,]*")
if (nchar(state) == 2){
fips <- dict_fips[tolower(state_abbr) == state]
} else {
fips <- dict_fips[tolower(state_full) %like% state]
}
fips <- fips[tolower(NAME) == name]
if (nrow(fips) == 0){
stop(paste0("No match found for ", area,
". Please search with search_fips() for the name."))
}
if (any(fips[, PLACE] != "00000")){
fips <- fips[PLACE != "00000", PLACE]
geoheader <- paste0("PLACE = ", toupper(state), fips)
} else {
fips <- fips[COUSUB != 0, COUSUB]
geoheader <- paste0("COUSUB = ", toupper(state), fips)
}
}
}
return(geoheader)
}
get_cbsa <- function(name){
name <- tolower(name)
cbsa <- dict_cbsa[tolower(CBSA_title) %like% name]
if (nrow(cbsa) == 0){
stop(paste0("No match found for ", name,
". Please search with search_cbsa() for the right name."))
} else {
cbsa <- cbsa[, unique(CBSA)]
if (length(cbsa) > 1){
stop(paste0("Two many matches found for ", name,
". Please search with search_cbsa() and provide a unique name."))
} else {
return(paste0("CBSA = ", cbsa))
}
}
}
get_name <- function(geoheader, fips, state = NULL){
# get the name of a metro, city, county, or town from its fips code
#
# Args_____
# state : state abbreviation such "IN"
# geoheader : geographic header such as "CBSA", "COUNTY"
# fips : fips code
#
# Return_____
# name of the area
state <- tolower(state)
geoheader <- toupper(geoheader)
if (geoheader %in% c("PLACE", "COUSUB")){
name <- dict_fips[tolower(state_abbr) == state, c(geoheader, "NAME"), with = FALSE] %>%
.[get(geoheader) == fips, NAME] %>%
paste0(", ", toupper(state))
}
if (geoheader == "COUNTY"){
name = dict_fips[tolower(state_abbr) == state & SUMLEV == "050"] %>%
.[COUNTY == fips, NAME] %>%
paste0(", ", toupper(state))
}
if (geoheader == "CBSA"){
name <- dict_cbsa[CBSA == fips, CBSA_title] %>%
unique() %>%
str_extract("^[^-|,]*") %>%
paste0(" metro")
}
return(name)
}
convert_areas <- function(areas) {
# convert the argument areas in read_xxx() into a data.table, which
# has columns of geoheader, code, and state.
# The area is given by name or fips code and is coverted into a data.table
# that has four columnes: geoheader, code, state, and name
# Examples_____
# areas <- c("PLACE = UT62360",
# "COUNTY = RI005",
# "COUSUB = RI41500",
# "CBSA = 39300",
# "Salt Lake City city, UT",
# "Bristol town, RI",
# "Salt Lake metro",
# "Kent county, RI")
# convert_areas(areas)
# geoheader code state
# geoheader code state name
# 1: PLACE 62360 UT Providence city, UT
# 2: COUNTY 005 RI Newport County, RI
# 3: COUSUB 41500 RI Lincoln town, RI
# 4: CBSA 39300 Providence metro
# 5: PLACE 67000 UT Salt Lake City city, UT
# 6: COUSUB 09280 RI Bristol town, RI
# 7: CBSA 41620 Salt Lake City metro
# 8: COUNTY 003 RI Kent County, RI
# first convert all element in areas to format "geoheader = code"
result <- map_chr(areas, get_fips) %>%
organize_geoheaders()
return(result)
}
add_geoheader <- function(dt, state, geo_headers, summary_level,
survey = "acs"){
# Add codes of "PLACE" or "COUSUB" to data.table dt when summary level is
# tract or block group.
#
# A tract or block group does not exclusively belong to a PLACE or COUSUB.
# So in summary 1, the code of PALCE and COUSUB is not provided for them.
# This function is to add the code using data from Census 2010. If a tract
# or block group belongs to multiple PLACE or COUSUB, add additional rows
# to shows the relationship.
#
# Args_____
# dt : the data.table read from read_xxxx() functions.
# state : state of the data.
# geo_headers : argument of geo_headers in read_xxxx() functions.
# summary_level : "140" for tract or "150" for block group.
# survey : survey of dt, choose from "acs" and "decennial"
# replace PLACE and COUSUB with those obtained from census 2010 with
# generate_geoid_coordinate.R
path_to_census <- Sys.getenv("PATH_TO_CENSUS")
if (survey == "acs"){
if (summary_level %in% c("150", "*")){
if ("PLACE" %in% geo_headers){
file <- paste0(
path_to_census,
"/generated_data/blkgrp_geoid_place/blkgrp_geoid_place_",
state, ".csv"
)
blkgrp <- fread(file, colClasses = "character") %>%
.[, .(GEOID, PLACE_tmp = PLACE)]
dt <- blkgrp[dt, on = .(GEOID), allow.cartesian=TRUE] %>%
.[SUMLEV == "150", PLACE := PLACE_tmp] %>%
.[, PLACE_tmp := NULL]
}
if ("COUSUB" %in% geo_headers){
file <- paste0(
path_to_census,
"/generated_data/blkgrp_geoid_cousub/blkgrp_geoid_cousub_",
state, ".csv"
)
blkgrp <- fread(file, colClasses = "character") %>%
.[, .(GEOID, COUSUB_tmp = COUSUB)]
dt <- blkgrp[dt, on = "GEOID", allow.cartesian=TRUE] %>%
.[SUMLEV == "150", COUSUB := COUSUB_tmp] %>%
.[, COUSUB_tmp := NULL]
}
}
if (summary_level %in% c("140", "*")){
if ("PLACE" %in% geo_headers){
file <- paste0(
path_to_census,
"/generated_data/tract_geoid_place/tract_geoid_place_",
state, ".csv"
)
tract <- fread(file, colClasses = "character") %>%
.[, .(GEOID, PLACE_tmp = PLACE)]
dt <- tract[dt, on = .(GEOID), allow.cartesian=TRUE] %>%
.[SUMLEV == "140", PLACE := PLACE_tmp] %>%
.[, PLACE_tmp := NULL]
}
if ("COUSUB" %in% geo_headers){
file <- paste0(
path_to_census,
"/generated_data/tract_geoid_cousub/tract_geoid_cousub_",
state, ".csv"
)
tract <- fread(file, colClasses = "character") %>%
.[, .(GEOID, COUSUB_tmp = COUSUB)]
dt <- tract[dt, on = .(GEOID), allow.cartesian=TRUE] %>%
.[SUMLEV == "140", COUSUB := COUSUB_tmp] %>%
.[, COUSUB_tmp := NULL]
}
}
}
if (survey == "decennial"){
if (summary_level %in% c("150", "*")){
if ("PLACE" %in% geo_headers){
file <- paste0(
path_to_census,
"/generated_data/blkgrp_geoid_place/blkgrp_geoid_place_",
state, ".csv"
)
blkgrp <- fread(file, colClasses = "character") %>%
.[, .(LOGRECNO, PLACE_tmp = PLACE)] %>%
.[, LOGRECNO := as.numeric(LOGRECNO)]
dt <- blkgrp[dt, on = .(LOGRECNO), allow.cartesian=TRUE] %>%
.[SUMLEV == "150", PLACE := PLACE_tmp] %>%
.[, PLACE_tmp := NULL]
}
if ("COUSUB" %in% geo_headers){
file <- paste0(
path_to_census,
"/generated_data/blkgrp_geoid_cousub/blkgrp_geoid_cousub_",
state, ".csv"
)
blkgrp <- fread(file, colClasses = "character") %>%
.[, .(LOGRECNO, COUSUB_tmp = COUSUB)] %>%
.[, LOGRECNO := as.numeric(LOGRECNO)]
dt <- blkgrp[dt, on = .(LOGRECNO), allow.cartesian=TRUE] %>%
.[SUMLEV == "150", COUSUB := COUSUB_tmp] %>%
.[, COUSUB_tmp := NULL]
}
}
if (summary_level %in% c("140", "*")){
if ("PLACE" %in% geo_headers){
file <- paste0(
path_to_census,
"/generated_data/tract_geoid_place/tract_geoid_place_",
state, ".csv"
)
tract <- fread(file, colClasses = "character") %>%
.[, .(LOGRECNO, PLACE_tmp = PLACE)] %>%
.[, LOGRECNO := as.numeric(LOGRECNO)]
dt <- tract[dt, on = .(LOGRECNO), allow.cartesian=TRUE] %>%
.[SUMLEV == "140", PLACE := PLACE_tmp] %>%
.[, PLACE_tmp := NULL]
}
if ("COUSUB" %in% geo_headers){
file <- paste0(
path_to_census,
"/generated_data/tract_geoid_cousub/tract_geoid_cousub_",
state, ".csv"
)
tract <- fread(file, colClasses = "character") %>%
.[, .(LOGRECNO, COUSUB_tmp = COUSUB)] %>%
.[, LOGRECNO := as.numeric(LOGRECNO)]
dt <- tract[dt, on = .(LOGRECNO), allow.cartesian=TRUE] %>%
.[SUMLEV == "140", COUSUB := COUSUB_tmp] %>%
.[, COUSUB_tmp := NULL]
}
}
}
return(dt)
}
add_coord <- function(dt, state, geo_headers = NULL){
# Add coordinates to dt read from ACS 1-year and ACS 5-year surveys.
#
# The summary file of ACS 1-year or 5-year surveys does not have
# (lon, lat) of geographic area. In addition, it also has missing code of
# geo_headers. This function adds coordinates and codes to dt using data
# generated from Census 2010 summary file 1 based on GEOID. The data.table
# dt can be any data that conitains a GEOID column.
#
# Args_____
# dt : the data.table read from decennial census or ACS 1year and 5-year
# survey.
# state : state of the data. The generated data are split by state in order
# save reading time.
# geo_headers : argument of geo_headers in read_xxxx() functions.
path_to_census <- Sys.getenv("PATH_TO_CENSUS")
file <- paste0(path_to_census, "/generated_data/geoid_coord/geoid_coord_",
state, ".csv")
coord <- fread(file,
select = c("GEOID", "lon", "lat", geo_headers),
colClasses = "character") %>%
.[, lon := as.numeric(lon)] %>%
.[, lat := as.numeric(lat)]
dt <- coord[dt, on = .(GEOID)]
return(dt)
}
switch_summarylevel <- function(summary_level, year = NULL){
# This function switch summary level from plain text to code
common_level <- c("state", "county", "county subdivision", "place",
"tract", "block group", "block")
if (summary_level %in% common_level){
if (!is.null(year)){
if (year == 2000){
block_code <- "101"
} else if (year == 2010){
block_code <- "100"
}
}
summary_level <- switch(summary_level,
"state" = "040",
"county" = "050",
"county subdivision" = "060",
"place" = "160",
"tract" = "140",
"block group" = "150",
"block" = block_code)
}
return(summary_level)
}
switch_geocomp <- function(geo_comp){
# Switch only common geocomponent, leave others alone
common_geo <- c("total", "urban", "urbanized area", "urban cluster", "rural")
if (geo_comp %in% common_geo){
geo_comp <- switch(
geo_comp,
"total" = "00",
"urban" = "01",
"urbanized area" = "04",
"urban cluster" = "28",
"rural" = "43"
)
}
return(geo_comp)
}
convert_geocomp_name <- function(dt){
# convert common geocomp from code to name
# convert only the following common geocomp
dt[GEOCOMP == "00", GEOCOMP := "total"] %>%
.[GEOCOMP == "01", GEOCOMP := "urban"] %>%
.[GEOCOMP == "04", GEOCOMP := "urbanized area"] %>%
.[GEOCOMP == "28", GEOCOMP := "urban cluster"] %>%
.[GEOCOMP == "43", GEOCOMP := "rural"]
return(dt)
}
select_columns <- function(df, contains){
# select column names that contain element in a vector contains
# Args____
# df: a data frame or data table
# contains: such as 2010:2013 and c("aaa", "bbb")
cols <- names(df)
selected <- str_detect(cols, paste0(contains, collapse = "|"))
selected_cols <- cols[selected]
return(selected_cols)
}
# add GEOID ====================================================================
# The following functions help add GEOID to decennial census data, which is not
# provided in the summary file 1.
get_geoheaders_of_summarylevel <- function(summary_level){
# get geoheaders that required to generate GEOID of a given summary level
if (summary_level == "010") {
geoheaders <- c("GEOCOMP")
} else if (summary_level == "020"){
geoheaders <- c("GEOCOMP", "REGION")
} else if (summary_level == "030"){
geoheaders <- c("GEOCOMP", "DIVISION")
} else if (summary_level == "040"){
geoheaders <- c("GEOCOMP", "STATE")
} else if (summary_level == "050"){
geoheaders <- c("GEOCOMP", "STATE", "COUNTY")
} else if (summary_level == "060"){
geoheaders <- c("GEOCOMP", "STATE", "COUNTY", "COUSUB")
} else if (summary_level == "070"){
geoheaders <- c("GEOCOMP", "STATE", "COUNTY", "COUSUB", "PLACE")
} else if (summary_level == "140"){
geoheaders <- c("GEOCOMP", "STATE", "COUNTY", "TRACT")
} else if (summary_level == "150"){
geoheaders <- c("GEOCOMP", "STATE", "COUNTY", "TRACT", "BLKGRP")
} else if (summary_level %in% c("100", "101")){
# first digit of BLOCK is BLKGRP
geoheaders <- c("GEOCOMP", "STATE", "COUNTY", "TRACT", "BLOCK")
} else if (summary_level == "155"){
geoheaders <- c("GEOCOMP", "STATE", "PLACE", "COUNTY")
} else if (summary_level == "160"){
geoheaders <- c("GEOCOMP", "STATE", "PLACE")
} else if (summary_level == "170"){
geoheaders <- c("GEOCOMP", "STATE", "CONCIT")
} else if (summary_level == "172"){
geoheaders <- c("GEOCOMP", "STATE", "CONCIT", "PLACE")
} else if (summary_level == "230"){
geoheaders <- c("GEOCOMP", "STATE", "ANRC")
} else if (summary_level == "250"){
geoheaders <- c("GEOCOMP", "AIANHH")
} else if (summary_level == "251"){
geoheaders <- c("GEOCOMP", "AIANHH", "AITSCE")
} else if (summary_level == "252"){
geoheaders <- c("GEOCOMP", "AIANHH", "AIHHTLI")
} else if (summary_level == "254"){
# same as 252, one is R one is T
geoheaders <- c("GEOCOMP", "AIANHH", "AIHHTLI")
} else if (summary_level == "256"){
geoheaders <- c("GEOCOMP", "AIANHH", "TTRACT")
} else if (summary_level == "258"){
geoheaders <- c("GEOCOMP", "AIANHH", "TTRACT", "TBLKGRP")
} else if (summary_level == "260"){
geoheaders <- c("GEOCOMP", "AIANHH", "STATE")
} else if (summary_level == "269"){
geoheaders <- c("GEOCOMP", "AIANHH", "STATE", "PLACE")
} else if (summary_level == "270"){
geoheaders <- c("GEOCOMP", "AIANHH", "STATE", "COUNTY")
} else if (summary_level == "280"){
geoheaders <- c("GEOCOMP", "STATE", "AIANHH")
} else if (summary_level == "283"){
geoheaders <- c("GEOCOMP", "STATE", "AIANHH", "AIHHTLI")
} else if (summary_level == "286"){
# same as 283
geoheaders <- c("GEOCOMP", "STATE", "AIANHH", "AIHHTLI")
} else if (summary_level == "290"){
geoheaders <- c("GEOCOMP", "AIANHH", "AITSCE", "STATE")
} else if (summary_level == "291"){
geoheaders <- c("GEOCOMP", "AIANHH", "AIHHTLI", "TTRACT")
} else if (summary_level == "292"){
geoheaders <- c("GEOCOMP", "AIANHH", "AIHHTLI", "TTRACT")
} else if (summary_level == "293"){
geoheaders <- c("GEOCOMP", "AIANHH", "AIHHTLI", "TTRACT", "TBLKGRP")
} else if (summary_level == "294"){
geoheaders <- c("GEOCOMP", "AIANHH", "AIHHTLI", "TTRACT", "TBLKGRP")
} else if (summary_level == "310"){
geoheaders <- c("GEOCOMP", "CBSA")
} else if (summary_level == "311"){
geoheaders <- c("GEOCOMP", "CBSA", "STATE")
} else if (summary_level == "312"){
geoheaders <- c("GEOCOMP", "CBSA", "STATE", "PLACE")
} else if (summary_level == "313"){
geoheaders <- c("GEOCOMP", "CBSA", "STATE", "COUNTY")
} else if (summary_level == "314"){
geoheaders <- c("GEOCOMP", "CBSA", "METDIV")
} else if (summary_level == "315"){
geoheaders <- c("GEOCOMP", "CBSA", "METDIV", "STATE")
} else if (summary_level == "316"){
geoheaders <- c("GEOCOMP", "CBSA", "METDIV", "STATE", "COUNTY")
} else if (summary_level == "320"){
geoheaders <- c("GEOCOMP", "STATE", "CBSA")
} else if (summary_level == "321"){
geoheaders <- c("GEOCOMP", "STATE", "CBSA", "PLACE")
} else if (summary_level == "322"){
geoheaders <- c("GEOCOMP", "STATE", "CBSA", "COUNTY")
} else if (summary_level == "323"){
geoheaders <- c("GEOCOMP", "STATE", "CBSA", "METDIV")
} else if (summary_level == "324"){
geoheaders <- c("GEOCOMP", "STATE", "CBSA", "METDIV", "COUNTY")
} else if (summary_level == "330"){
geoheaders <- c("GEOCOMP", "CSA")
} else if (summary_level == "331"){
geoheaders <- c("GEOCOMP", "CSA", "STATE")
} else if (summary_level == "332"){
geoheaders <- c("GEOCOMP", "CSA", "CBSA")
} else if (summary_level == "333"){
geoheaders <- c("GEOCOMP", "CSA", "CBSA", "STATE")
} else if (summary_level == "335"){
geoheaders <- c("GEOCOMP", "CNECTA")
} else if (summary_level == "336"){
geoheaders <- c("GEOCOMP", "CNECTA", "STATE")
} else if (summary_level == "337"){
geoheaders <- c("GEOCOMP", "CNECTA", "NECTA")
} else if (summary_level == "338"){
geoheaders <- c("GEOCOMP", "CNECTA", "NECTA", "STATE")
} else if (summary_level == "340"){
geoheaders <- c("GEOCOMP", "STATE", "CSA")
} else if (summary_level == "341"){
geoheaders <- c("GEOCOMP", "STATE", "CSA", "CBSA")
} else if (summary_level == "345"){
geoheaders <- c("GEOCOMP", "STATE", "CNECTA")
} else if (summary_level == "346"){
geoheaders <- c("GEOCOMP", "STATE", "CNECTA", "NECTA")
} else if (summary_level == "350"){
geoheaders <- c("GEOCOMP", "NECTA")
} else if (summary_level == "351"){
geoheaders <- c("GEOCOMP", "NECTA", "STATE")
} else if (summary_level == "352"){
geoheaders <- c("GEOCOMP", "NECTA", "STATE", "PLACE")
} else if (summary_level == "353"){
geoheaders <- c("GEOCOMP", "NECTA", "STATE", "COUNTY")
} else if (summary_level == "354"){
geoheaders <- c("GEOCOMP", "NECTA", "STATE", "COUNTY", "COUSUB")
} else if (summary_level == "355"){
geoheaders <- c("GEOCOMP", "NECTA", "NECTADIV")
} else if (summary_level == "356"){
geoheaders <- c("GEOCOMP", "NECTA", "NECTADIV", "STATE")
} else if (summary_level == "357"){
geoheaders <- c("GEOCOMP", "NECTA", "NECTADIV", "STATE", "COUNTY")
} else if (summary_level == "358"){
geoheaders <- c("GEOCOMP", "NECTA", "NECTADIV", "STATE", "COUNTY", "COUSUB")
} else if (summary_level == "360"){
geoheaders <- c("GEOCOMP", "STATE", "NECTA")
} else if (summary_level == "361"){
geoheaders <- c("GEOCOMP", "STATE", "NECTA", "PLACE")
} else if (summary_level == "362"){
geoheaders <- c("GEOCOMP", "STATE", "NECTA", "COUNTY")
} else if (summary_level == "363"){
geoheaders <- c("GEOCOMP", "STATE", "NECTA", "COUNTY", "COUSUB")
} else if (summary_level == "364"){
geoheaders <- c("GEOCOMP", "STATE", "NECTA", "NECTADIV")
} else if (summary_level == "365"){
geoheaders <- c("GEOCOMP", "STATE", "NECTA", "NECTADIV", "COUNTY")
} else if (summary_level == "366"){
geoheaders <- c("GEOCOMP", "STATE", "NECTA", "NECTADIV", "COUNTY", "COUSUB")
} else if (summary_level == "400"){
geoheaders <- c("GEOCOMP", "UA")
} else if (summary_level == "410"){
geoheaders <- c("GEOCOMP", "UA", "STATE")
} else if (summary_level == "430"){
geoheaders <- c("GEOCOMP", "UA", "STATE", "COUNTY")
} else if (summary_level == "500"){
geoheaders <- c("GEOCOMP", "STATE", "CD")
} else if (summary_level == "510"){
geoheaders <- c("GEOCOMP", "STATE", "CD", "COUNTY")
} else if (summary_level == "550"){
geoheaders <- c("GEOCOMP", "STATE", "CD", "AIANHH")
} else if (summary_level == "610"){
geoheaders <- c("GEOCOMP", "STATE", "SLDU")
} else if (summary_level == "612"){
geoheaders <- c("GEOCOMP", "STATE", "SLDU", "COUNTY")
} else if (summary_level == "620"){
geoheaders <- c("GEOCOMP", "STATE", "SLDL")
} else if (summary_level == "622"){
geoheaders <- c("GEOCOMP", "STATE", "SLDL", "COUNTY")
} else if (summary_level == "795"){
geoheaders <- c("GEOCOMP", "STATE", "PUMA")
} else if (summary_level == "860"){
geoheaders <- c("GEOCOMP", "ZCTA5")
} else if (summary_level == "950"){
geoheaders <- c("GEOCOMP", "STATE", "SDELM")
} else if (summary_level == "960"){
geoheaders <- c("GEOCOMP", "STATE", "SDSEC")
} else if (summary_level == "970"){
geoheaders <- c("GEOCOMP", "STATE", "SDUNI")
}
return(geoheaders)
}
add_geoid <- function(dt, summary_level){
# add GEOID to data.table dt that has geoheaders of summary_level
geoheaders <- get_geoheaders_of_summarylevel(summary_level)
# add first three element to GEOID
dt[, GEOID := paste0(summary_level, get("GEOCOMP"), "US")]
# add all others
for (gh in geoheaders[2:length(geoheaders)]){
dt[, GEOID := paste0(GEOID, get(gh))]
}
return(dt)
}
# add ACS names to decennial census data ======================================
add_acsname <- function(dt){
# assign acs_NAME to decennial census data
path_to_census <- Sys.getenv("PATH_TO_CENSUS")
file_name <- paste0(path_to_census,
"/generated_data/acs_geoid_name/acs_geoid_name.csv")
generated <- fread(file_name) %>%
.[, .(geoid_tmp = GEOID, acs_NAME)]
# for block in dt, give the tract name from ACS
dt[, geoid_tmp := GEOID]
dt[GEOID %like% "10000US", geoid_tmp := paste0("14000US", str_sub(GEOID, 8, 18))]
dt <- generated[dt, on = .(geoid_tmp)] %>%
.[, geoid_tmp := NULL]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.