# Pull the 6 digit NAICS associated with crops/livestock from the loaded 2012 NAICS codes and names. Manually create
# 8 digit NAICS for Census of Agriculture (CoA) Crops and Livestock. The purpose of NAICS8 are *only* to help aggregate
# CoA data to NAICS6. NAICS8 are unofficial and are not used again after initial aggregation to NAICS6. NAICS8 are based
# on NAICS definitions from the Census.
# Generate Census of Agriculture NAICS8 from 2012 Census NAICS6
getCoANAICS8 <- function(){
####### Pull NAICS6 from NAICS 2012 names and codes ########
# NAICS 2012 file used in "NAICS.R"
FileName <- "inst/extdata/2-digit_2012_Codes.xls"
NAICS_raw <- as.data.frame(readxl::read_excel(FileName, sheet = 1, col_names = TRUE))[-1,-1]
# modify column names
NAICS <- NAICS_raw
colnames(NAICS) <- c("NAICS_Code", "NAICS_Name")
# trim any possible whitespace
NAICS$NAICS_Code <- trimws(NAICS$NAICS_Code)
NAICS$NAICS_Name <- trimws(NAICS$NAICS_Name)
# for some data (like aquaculture), the NAICS activity is duplicated, but the NAICS Codes are different, even when
# standardized to 8 digit NAICS. Remove duplicates in activity - first need to sort the dataframe
NAICS <- NAICS[order(NAICS$NAICS_Code),]
NAICS <- NAICS[!duplicated(NAICS$NAICS_Name),]
# extract rows related to agriculture/livestock
df <- subset(NAICS, substr(NAICS$NAICS_Code, 1, 2) == '11')
# right pad to make all NAICS 8 digits
df$NAICS_Code <- stringr::str_pad(df$NAICS_Code, 8, "right", pad="0")
# more specifically, don't want rows where first 3 digits are 113, 114, 115
df <- subset(df, !substr(df$NAICS_Code, 1, 3) =='113')
df <- subset(df, !substr(df$NAICS_Code, 1, 3) =='114')
df <- subset(df, !substr(df$NAICS_Code, 1, 3) =='115')
# remove duplicate rows introduced by standardizing length of naics
df <- df[!duplicated(df), ]
####### Add CoA NAICS8 by hard coding ##############
# CoA NAICS names are formatted to match those generated by NASS Quickstats
# https://quickstats.nass.usda.gov/
# create new df to incorporate coa naics
df2 <- data.frame(NAICS8=integer(),
Activity=character(),
stringsAsFactors=FALSE)
#### CROPS ####################3
# coa equivalent to agriculture, forestry, fishing, and hunting
df2[nrow(df2) + 1,] = c('11000000', 'AG LAND')
# coa equivalent to crop production: 111000
df2[nrow(df2) + 1,] = c('11100000', 'AG LAND, CROPLAND')
df2[nrow(df2) + 1,] = c('1110000A', 'FIELD CROPS, OTHER')
# coa equivalent to Animal Production and Aquaculture: 112000
df2[nrow(df2) + 1,] = c('11200000', 'AG LAND, PASTURELAND')
## coa equivalent to soybean farming: 111110
df2[nrow(df2) + 1,] = c('11111000', 'SOYBEANS')
# coa aggregates to oilseed (except soybean) farming: 111120
df2[nrow(df2) + 1,] = c('1111200A', 'CANOLA')
df2[nrow(df2) + 1,] = c('1111200B', 'FLAXSEED')
df2[nrow(df2) + 1,] = c('1111200C', 'MUSTARD')
df2[nrow(df2) + 1,] = c('1111200D', 'RAPESEED')
df2[nrow(df2) + 1,] = c('1111200E', 'SAFFLOWER')
df2[nrow(df2) + 1,] = c('1111200F', 'SESAME')
df2[nrow(df2) + 1,] = c('1111200G', 'SUNFLOWER')
# coa aggregates to dry pea and bean farming: 111130
df2[nrow(df2) + 1,] = c('1111300A', 'BEANS, DRY EDIBLE, (EXCL LIMA), INCL CHICKPEAS')
df2[nrow(df2) + 1,] = c('1111300B', 'BEANS, DRY EDIBLE, (EXCL CHICKPEAS & LIMA)')
df2[nrow(df2) + 1,] = c('1111300C', 'BEANS, DRY EDIBLE, LIMA')
df2[nrow(df2) + 1,] = c('1111300D', 'CHICKPEAS')
df2[nrow(df2) + 1,] = c('1111300E', 'LENTILS')
df2[nrow(df2) + 1,] = c('1111300F', 'PEAS, AUSTRIAN WINTER')
df2[nrow(df2) + 1,] = c('1111300G', 'PEAS, DRY EDIBLE')
df2[nrow(df2) + 1,] = c('1111300H', 'PEAS, DRY, SOUTHERN (COWPEAS)')
#df2[nrow(df2) + 1,] = c('', 'BEANS, MUNG') # last year published 2002
# coa equivalent to wheat farming: 111140
df2[nrow(df2) + 1,] = c('11114000', 'WHEAT')
# coa aggregates to corn farming: 111150
df2[nrow(df2) + 1,] = c('11115000', 'CORN')
df2[nrow(df2) + 1,] = c('1111500A', 'CORN, GRAIN')
df2[nrow(df2) + 1,] = c('1111500B', 'CORN, SILAGE')
df2[nrow(df2) + 1,] = c('1111500C', 'POPCORN')
# coa equivalent to rice farming: 111160
df2[nrow(df2) + 1,] = c('11116000', 'RICE')
# coa equates to other grain farming: 111190
df2[nrow(df2) + 1,] = c('11119000', 'SMALL GRAINS, WHEAT & BARLEY & OATS & RYE')
# coa aggregates to all other grain farming: 111199
df2[nrow(df2) + 1,] = c('1111990A', 'BARLEY')
df2[nrow(df2) + 1,] = c('1111990B', 'BUCKWHEAT')
df2[nrow(df2) + 1,] = c('1111990C', 'MILLET, PROSO')
df2[nrow(df2) + 1,] = c('1111990D', 'OATS')
df2[nrow(df2) + 1,] = c('1111990E', 'RYE')
df2[nrow(df2) + 1,] = c('1111990F', 'SORGHUM, GRAIN')
df2[nrow(df2) + 1,] = c('1111990G', 'SORGHUM, SILAGE')
df2[nrow(df2) + 1,] = c('1111990H', 'SORGHUM, SYRUP')
df2[nrow(df2) + 1,] = c('1111990I', 'TRITICALE')
df2[nrow(df2) + 1,] = c('1111990J', 'WILD RICE')
# df2[nrow(df2) + 1,] = c('', 'SWEET RICE') # last year published 2002
# coa equivalent to vegetable and melon farming: 111200
df2[nrow(df2) + 1,] = c('11120000', 'VEGETABLE TOTALS') # this category does include melons
# coa aggregates to fruit and tree nut farming: 111300
df2[nrow(df2) + 1,] = c('11130000', 'ORCHARDS')
df2[nrow(df2) + 1,] = c('1113000A', 'BERRY TOTALS')
# coa aggregates to other noncitrus fruit farming: 111339
# missing pinneapples
# coa aggregates to greenhouse nursery and floriculture production: 111400
df2[nrow(df2) + 1,] = c('11140000', 'HORTICULTURE TOTALS')
df2[nrow(df2) + 1,] = c('1114000A', 'CUT CHRISTMAS TREES')
df2[nrow(df2) + 1,] = c('11140008', 'SHORT TERM WOODY CROPS')
# coa equivalent to other crop farming: 111900
df2[nrow(df2) + 1,] = c('11190000', 'CROPS, OTHER')
# coa equivalent to tobacco farming: 111910
df2[nrow(df2) + 1,] = c('11191000', 'TOBACCO')
# coa aggregates to cotton: 111920
df2[nrow(df2) + 1,] = c('11192000', 'COTTON')
# coa aggregates to sugarcane farming: 111930
df2[nrow(df2) + 1,] = c('1119300A', 'SUGARCANE, SUGAR')
df2[nrow(df2) + 1,] = c('1119300B', 'SUGARCANE, SEED')
# coa aggregates to hay farming: 111940
df2[nrow(df2) + 1,] = c('11194000', 'HAY & HAYLAGE')
df2[nrow(df2) + 1,] = c('1119400A', 'HAY & HAYLAGE (EXCL ALFALFA)')
df2[nrow(df2) + 1,] = c('1119400B', 'HAY & HAYLAGE, ALFALFA')
df2[nrow(df2) + 1,] = c('11194010', 'HAY')
df2[nrow(df2) + 1,] = c('1119401A', 'HAY (EXCL ALFALFA)')
df2[nrow(df2) + 1,] = c('1119401B', 'HAY, ALFALFA')
df2[nrow(df2) + 1,] = c('11194020', 'HAYLAGE')
df2[nrow(df2) + 1,] = c('1119402A', 'HAYLAGE (EXCL ALFALFA)')
df2[nrow(df2) + 1,] = c('1119402B', 'HAYLAGE, ALFALFA')
# coa aggregates to all other crop farming: 111990
df2[nrow(df2) + 1,] = c('1119900A', 'DILL')
df2[nrow(df2) + 1,] = c('1119900C', 'GRASSES & LEGUMES TOTALS')
df2[nrow(df2) + 1,] = c('1119900D', 'GUAR')
df2[nrow(df2) + 1,] = c('1119900E', 'HERBS, DRY')
df2[nrow(df2) + 1,] = c('1119900F', 'HOPS')
df2[nrow(df2) + 1,] = c('1119900G', 'JOJOBA')
df2[nrow(df2) + 1,] = c('1119900H', 'MISCANTHUS')
df2[nrow(df2) + 1,] = c('1119900I', 'PEANUTS')
df2[nrow(df2) + 1,] = c('1119900J', 'SUGARBEETS')
###### ANIMALS ###################3
# cattle ranching and farming: 112100
# beef cattle ranching and farming including feedlots: 112110
df2[nrow(df2) + 1,] = c('1121100A', 'BISON')
df2[nrow(df2) + 1,] = c('1121100B', 'CATTLE, (EXCL COWS)')
df2[nrow(df2) + 1,] = c('1121100C', 'CATTLE, COWS')
df2[nrow(df2) + 1,] = c('1121100D', 'CATTLE, INCL CALVES')
# beef cattle ranching and farming including feedlots: 112111
df2[nrow(df2) + 1,] = c('11211100', 'CATTLE, COWS, BEEF')
# cattle feedlots: 112112
df2[nrow(df2) + 1,] = c('112112', 'CATTLE, ON FEED')
# dairy cattle and milk production: 112120
df2[nrow(df2) + 1,] = c('1121200A', 'CATTLE, COWS, MILK')
# hog and pig farming: 112200
df2[nrow(df2) + 1,] = c('11220000', 'HOGS')
# poultry and egg production: 112300
# chicken egg production: 112310
df2[nrow(df2) + 1,] = c('1123100A', 'CHICKENS, LAYERS')
# broilers and other meat-type chicken production: 112320
df2[nrow(df2) + 1,] = c('1123200A', 'CHICKENS, BROILERS')
df2[nrow(df2) + 1,] = c('1123200B', 'CHICKENS, PULLETS, REPLACEMENT')
df2[nrow(df2) + 1,] = c('1123200C', 'CHICKENS, ROOSTERS')
# turkey production: 112330
df2[nrow(df2) + 1,] = c('11233000', 'TURKEYS')
# poultry hatcheries: 112340
# other poultry production: 112390
df2[nrow(df2) + 1,] = c('1123900A', 'CHUKARS')
df2[nrow(df2) + 1,] = c('1123900B', 'DUCKS')
df2[nrow(df2) + 1,] = c('1123900C', 'EMUS')
df2[nrow(df2) + 1,] = c('1123900D', 'GEESE')
df2[nrow(df2) + 1,] = c('1123900E', 'GUINEAS')
df2[nrow(df2) + 1,] = c('1123900F', 'OSTRICHES')
df2[nrow(df2) + 1,] = c('1123900G', 'PARTRIDGES, HUNGARIAN')
df2[nrow(df2) + 1,] = c('1123900H', 'PEAFOWL, HENS & COCKS')
df2[nrow(df2) + 1,] = c('1123900J', 'PHEASANTS')
df2[nrow(df2) + 1,] = c('1123900K', 'PIGEONS & SQUAB')
df2[nrow(df2) + 1,] = c('1123900L', 'POULTRY, OTHER')
df2[nrow(df2) + 1,] = c('1123900M', 'QUAIL')
df2[nrow(df2) + 1,] = c('1123900N', 'RHEAS')
# sheep and goat farming: 112400
df2[nrow(df2) + 1,] = c('11240000','SHEEP & GOATS TOTALS')
# sheep farming: 112410
df2[nrow(df2) + 1,] = c('11241000', 'SHEEP, INCL LAMBS')
# goat farming: 112420
df2[nrow(df2) + 1,] = c('11242000', 'GOATS')
# animal aquaculture: 112500
df2[nrow(df2) + 1,] = c('11250000', 'AQUACULTURE TOTALS')
# part of Finfish farming
df2[nrow(df2) + 1,] = c('1125110A', 'FOOD FISH, CATFISH')
df2[nrow(df2) + 1,] = c('1125110B', 'FOOD FISH, TROUT')
df2[nrow(df2) + 1,] = c('1125110C', 'FOOD FISH, (EXCL CATFISH & TROUT)')
df2[nrow(df2) + 1,] = c('1125110D', 'BAITFISH')
df2[nrow(df2) + 1,] = c('1125110E', 'ORNAMENTAL FISH')
df2[nrow(df2) + 1,] = c('1125110F', 'SPORT FISH')
# part of Shellfish farming
df2[nrow(df2) + 1,] = c('1125120A', 'CRUSTACEANS')
df2[nrow(df2) + 1,] = c('1125120B', 'MOLLUSKS')
df2[nrow(df2) + 1,] = c('11251900', 'AQUACULTURE, OTHER')
# other animal production: 112900
# apiculture: 112910
df2[nrow(df2) + 1,] = c('1129100A', 'HONEY')
df2[nrow(df2) + 1,] = c('1129100B', 'HONEY, BEE COLONIES')
# horse and other equine production: 112920
df2[nrow(df2) + 1,] = c('11292000','EQUINE, (HORSES & PONIES) & (MULES & BURROS & DONKEYS)')
df2[nrow(df2) + 1,] = c('1129200A','EQUINE, HORSES AND POINIES')
df2[nrow(df2) + 1,] = c('1129200B','EQUINE, MULES & BURROS & DONKEYS')
# fur-bearing animal and rabbit production: 112930
df2[nrow(df2) + 1,] = c('1129300A', 'MINK, LIVE')
df2[nrow(df2) + 1,] = c('1129300B', 'RABBITS, LIVE')
# all other animal production: 112990
df2[nrow(df2) + 1,] = c('1129900A', 'ALPACAS')
df2[nrow(df2) + 1,] = c('1129900B', 'DEER')
df2[nrow(df2) + 1,] = c('1129900C', 'ELK')
df2[nrow(df2) + 1,] = c('1129900D', 'LLAMAS')
##### JOIN imported naics and manually created naics dataframes
df3 <- merge(df, df2, by.x = "NAICS_Code", by.y = "NAICS8", all = TRUE)
# add additional column of original NAICS 2-6 digit codes pulled from NAICS csv file
df4 <- merge(df3, NAICS, by = "NAICS_Name", all.x = TRUE)
# rename columns and reorder
names(df4)[names(df4) == 'NAICS_Code.y'] <- 'NAICS_2012_Code'
names(df4)[names(df4) == 'NAICS_Code.x'] <- 'NAICS8_Code'
df4 <-df4[ , c("NAICS_2012_Code", "NAICS8_Code", "NAICS_Name", "Activity")]
# sort data by NAICS8 code
df5 <- df4[order(df4$NAICS8_Code, df4$NAICS_2012_Code),]
# remove rows where the NAICS_2012_Code ends in "0". Because in all cases, this is a 6 digit NAICS
# and the 5 digit NAICS also exists and has the same NAICS code name. Want to avoid possible double counting.
df6 <- subset(df5, (!substr(df5$NAICS_2012_Code, 6, 6)=='0') | is.na(df5$NAICS_2012_Code))
# At times, one NAICS8 corresponds directly to one NAICS 5/6 digit. In other cases, multiple NAICS8 must be summed to
# calculate the value of one NAICS 5/6 digit. This next section of code determines occurances of the later situation
# and assigns a "NAICS_2012_Code" alongside a NAICS8_Code. If "NAICS_2012_Code" is left as NA, then the row of data
# does not need to be summed.
# add NAICS7 column used to match two dataframes - drop later
df6$NAICS7 <- substr(df6$NAICS8_Code, 1, 7)
# create subset of df where Activity = NA
df_s <- subset(df6, is.na(Activity))
# modify naics8 to naics7
df_s$NAICS7 <- substr(df_s$NAICS8_Code, 1, 7)
# rename column
names(df_s)[names(df_s) == 'NAICS_2012_Code'] <- 'NAICS_Replacement'
# select columns
df_s <- df_s[ , c("NAICS_Replacement", "NAICS7")]
# merge master dataframe with subset dataframe by naics7, then drop naics7
df7 <- merge(df6, df_s, by = "NAICS7", all.x = TRUE)
df7 <- subset(df7, select = -c(NAICS7))
# sort data by NAICS8 code
df8 <- df7[order(df7$NAICS8_Code, df7$NAICS_2012_Code),]
# if NAICS_12_Code is NA, replace with NAICS_Replacement column (some of which are also NA)
df8$NAICS_2012_Code[is.na(df8$NAICS_2012_Code)] <- df8$NAICS_Replacement[is.na(df8$NAICS_2012_Code)]
#drop NAICS replacement column
df8 <- subset(df8, select = -c(NAICS_Replacement))
### finished assigning NAICS codes. Now, modify presentation
# if the coa activity definition colum is NA, replace with the 2012 NAICS name
df9 <- df8
df9$Activity[is.na(df9$Activity)] <- as.character(df9$NAICS_Name[is.na(df9$Activity)])
# select specific columns
df9 <-df9[ , c("NAICS_2012_Code", "NAICS8_Code", "Activity")]
# sort df
CoAtoNAICS <- df9[order(df9$NAICS8_Code, df9$NAICS_2012_Code),]
# write as csv
utils::write.csv(CoAtoNAICS, "inst/extdata/Crosswalk_CoAtoNAICS.csv", na = "", row.names = FALSE, fileEncoding="UTF-8")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.