## code to prepare `naics` dataset goes here
library(dplyr)
library(stringr)
source("R/readxl_online.R")
# update
url <- "https://www.census.gov/eos/www/naics/2017NAICS/2-6%20digit_2017_Codes.xlsx"
# naics xwalk 07-12-17 ----
naics1712 <- metro.data::readxl_online("https://www.census.gov/eos/www/naics/concordances/2017_to_2012_NAICS.xlsx", skip = 2) %>%
mutate(naics6_code_17 = as.character(`2017 NAICS Code`),
naics6_code_12 = as.character(`2012 NAICS Code`))
naics0712 <- metro.data::readxl_online("https://www.census.gov/eos/www/naics/concordances/2007_to_2012_NAICS.xls", skip = 2) %>%
mutate(naics6_code_07 = as.character(`2007 NAICS Code`),
naics6_code_12 = as.character(`2012 NAICS Code`))
# get naics data ----------------------
get_naics <- function(url) {
tmp <- readxl_online(url) %>%
# remove columns contain only nas
select_if(~ sum(!is.na(.)) > 0) %>%
select(-1)
names(tmp) <- c("naics_code", "naics_name")
return(tmp)
}
# wrangle to wide relation file
reshape_naics <- function(df) {
df %>%
filter(str_length(naics_code) == 6) %>%
mutate(
naics5_code = substr(naics_code, 1, 5),
naics4_code = substr(naics_code, 1, 4),
naics3_code = substr(naics_code, 1, 3),
naics2_code = substr(naics_code, 1, 2)
) %>%
# correct for naics2 aggregates
mutate(naics2_code = case_when(
naics2_code %in% c("31", "32", "33") ~ "31-33",
naics2_code %in% c("44", "45") ~ "44-45",
naics2_code %in% c("48", "49") ~ "48-49",
T ~ naics2_code
)) %>%
# make naics6 as unique rows, other idustry level as corresponding columns
left_join(tmp, by = c("naics5_code" = "naics_code"), suffix = c("", "5")) %>%
left_join(tmp, by = c("naics4_code" = "naics_code"), suffix = c("", "4")) %>%
left_join(tmp, by = c("naics3_code" = "naics_code"), suffix = c("", "3")) %>%
left_join(tmp, by = c("naics2_code" = "naics_code"), suffix = c("6", "2")) %>%
select(
naics6_code = naics_code,
contains("6"), contains("5"), contains("4"), contains("3"), contains("2")
)
}
# add advanced industry categories
load("data/naics4_ai.rda")
naics4_ai <- naics4_ai %>%
rename(naics_code = naics4_code)
# supply chain categories/tradables
load("data-raw/naics6_sc.rda")
load("data-raw/naics3_sc.rda")
naics_sc <- bind_rows(naics3_sc, naics6_sc)
load("data-raw/naics3_BEST.rda")
# run this ==================================================
naics <- naics %>%
get_naics(url) %>%
mutate(naics_level = ifelse(grepl("-",naics_code),2,
str_length(naics_code))) %>%
left_join(naics4_ai, by = "naics_code") %>%
left_join(naics1712[c("naics6_code_17","naics6_code_12")], by = c("naics_code" = "naics6_code_17")) %>%
mutate(naics6_code_12 = ifelse(is.na(naics6_code_12),naics_code,naics6_code_12)) %>%
left_join(naics_sc, by = c("naics6_code_12"="naics_code")) %>%
left_join(naics0712[c("naics6_code_07","naics6_code_12")], by = "naics6_code_12") %>%
mutate(BEST = naics_code %in% naics3_BEST)
# save
usethis::use_data(naics, overwrite = T)
# usethis::use_data(naics4_ai, overwrite = T)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.