vignettes/0-NASS-OHIO.R
In usdarnass: USDA NASS Quick Stats API

# NASS API for their Quickstats web interface:
# https://quickstats.nass.usda.gov/

# devtools::install_github("rdinter/rnass")
#source("0-data/0-api_keys.R")

# devtools::install_github("rdinter/nassR")
library("nassR")
library("tidyverse")

# Create a directory for the data
local_dir    <- "0-data/OHIO"
data_source <- paste0(local_dir, "/raw")
if (!file.exists(local_dir)) dir.create(local_dir)
if (!file.exists(data_source)) dir.create(data_source)

# NOTE: a major issue in USDA county level data is that sometimes there are
#  multiple counties combined together as "OTHER (COMBINED) COUNTIES" which
#  should be where we simply impute downward at this level. This can be done
#  by recognizing that the "asd_desc" is the level which counties are combined
#  at. Therefore, group by "asd_desc" and check if values are missing. If they
#  are missing, then replace them with the "OTHER (COMBINED) COUNTIES" which
#  also has the county_code of "998

# Can I create a function for this?

# ---- financial ----------------------------------------------------------

ohio_rent <- nass_data(commodity_desc = "RENT", agg_level_desc = "COUNTY",
                       state_name = "OHIO", numeric_vals = T)

from_rent <- c("RENT, CASH, CROPLAND, IRRIGATED - EXPENSE, MEASURED IN $ / ACRE",
               "RENT, CASH, CROPLAND, NON-IRRIGATED - EXPENSE, MEASURED IN $ / ACRE",
               "RENT, CASH, LAND & BUILDINGS - EXPENSE, MEASURED IN $",
               "RENT, CASH, LAND & BUILDINGS - OPERATIONS WITH EXPENSE",
               "RENT, CASH, PASTURELAND - EXPENSE, MEASURED IN $ / ACRE",
               "RENT, PER HEAD OR ANIMAL UNIT MONTH - OPERATIONS WITH EXPENSE")
to_rent   <- c("rent_irrigated", "rent_nonirrigated", "rent_expense",
               "operations_with_rent", "rent_pasture", "operations_head")

ohio <- ohio_rent %>% 
  mutate(year = as.numeric(year),
         #location_desc = gsub("OHIO, ", "", location_desc),
         short_desc = plyr::mapvalues(short_desc,
                                      from = from_rent,
                                      to = to_rent)) %>% 
  select(year, val = Value, short_desc,
         county_code, county_name, asd_desc) %>% 
  spread(short_desc, val)

# Add on acreage from Census on rented land: these are only for part-owners
ohio_rent <- map(c("AG LAND, OWNED, IN FARMS - ACRES",
                   "AG LAND, RENTED FROM OTHERS, IN FARMS - ACRES",
                   "AG LAND, CROPLAND - ACRES",
                   "AG LAND, PASTURELAND, (EXCL CROPLAND & WOODLAND) - ACRES",
                   "AG LAND, WOODLAND - ACRES",
                   "AG LAND, WOODLAND, PASTURED - ACRES"),
                 function(x){
                     nass_data(commodity_desc = "AG LAND",
                               agg_level_desc = "COUNTY",
                               state_name = "OHIO", #domain_desc = "TOTAL",
                               #source_desc = "SURVEY", 
                               short_desc = x, numeric_vals = T)
                   })

ohio_rent <- ohio_rent %>% 
  bind_rows() %>% 
  filter(domain_desc != "IRRIGATION STATUS") %>% 
  mutate(year = as.numeric(year),
         short_desc = plyr::mapvalues(short_desc,
                                      from = c("AG LAND, OWNED, IN FARMS - ACRES",
                                               "AG LAND, CROPLAND - ACRES",
                                               "AG LAND, PASTURELAND, (EXCL CROPLAND & WOODLAND) - ACRES",
                                               "AG LAND, WOODLAND - ACRES",
                                               "AG LAND, WOODLAND, PASTURED - ACRES",
                                               "AG LAND, RENTED FROM OTHERS, IN FARMS - ACRES"),
                                      to = c("acres_part_owned", "cropland_acres", "pasture_acres",
                                             "woodland_acres", "woodland_pastured_acres", "acres_part_rented"))) %>% 
  select(year, val = Value, short_desc,
         county_code, county_name, asd_desc) %>% 
  spread(short_desc, val)

ohio <- left_join(ohio, ohio_rent)

# Now for the other general all categories:
ohio_rent <- nass_data(commodity_desc = "FARM OPERATIONS",
                       agg_level_desc = "COUNTY", state_name = "OHIO",
                       source_desc = "CENSUS", domain_desc = "TENURE",
                       short_desc = "FARM OPERATIONS - ACRES OPERATED",
                       numeric_vals = T)

ohio_rent <- ohio_rent %>% 
  bind_rows() %>% 
  #filter(domain_desc != "IRRIGATION STATUS") %>% 
  mutate(year = as.numeric(year),
         domaincat_desc = plyr::mapvalues(domaincat_desc,
                                      from = c("TENURE: (FULL OWNER)",
                                               "TENURE: (PART OWNER)",
                                               "TENURE: (TENANT)"),
                                      to = c("acres_owned", "acres_part",
                                             "acres_tenant_rented"))) %>% 
  select(year, val = Value, domaincat_desc,
         county_code, county_name, asd_desc) %>% 
  spread(domaincat_desc, val)

ohio <- left_join(ohio, ohio_rent)

####
# NOW ADD IN THOSE RENTED AND OWNED ACRES
#####
ohio$owned_acres  <- ohio$acres_owned + ohio$acres_part_owned
ohio$rented_acres <- ohio$acres_part_rented + ohio$acres_tenant_rented

# Correct for missing values in the "other"
ohio <- ohio %>% 
  expand(year, nesting(county_code, county_name, asd_desc)) %>% 
  left_join(ohio) %>% 
  group_by(year, asd_desc) %>% 
  mutate_at(vars(rent_irrigated, rent_nonirrigated, rent_pasture),
            funs(ifelse(is.na(.), .[county_code == "998"], .))) %>% 
  filter(county_code != "998")

ohio_tax <- nass_data(commodity_desc = "TAXES", agg_level_desc = "COUNTY",
                      state_name = "OHIO", numeric_vals = T)

from_tax <- c("TAXES, PROPERTY, REAL ESTATE & NON-REAL ESTATE, (EXCL PAID BY LANDLORD) - EXPENSE, MEASURED IN $",
              "TAXES, PROPERTY, REAL ESTATE & NON-REAL ESTATE, (EXCL PAID BY LANDLORD) - OPERATIONS WITH EXPENSE")
ohio_tax <- ohio_tax %>% 
  mutate(year = as.numeric(year),
         short_desc = plyr::mapvalues(short_desc,
                                      from = from_tax,
                                      to = c("taxes", "taxes_operations"))) %>% 
  select(year, val = Value, short_desc,
         county_code, county_name, asd_desc) %>% 
  spread(short_desc, val)

ohio_income <- map(c("INCOME, FARM-RELATED - OPERATIONS WITH RECEIPTS",
                    "INCOME, FARM-RELATED - RECEIPTS, MEASURED IN $"),
                   function(x){
                     nass_data(commodity_desc = "INCOME, FARM-RELATED",
                                agg_level_desc = "COUNTY",
                                state_name = "OHIO", domain_desc = "TOTAL",
                                #source_desc = "SURVEY", 
                                short_desc = x, numeric_vals = T)
                    })

ohio_income <- ohio_income %>% 
  bind_rows() %>% 
  mutate(year = as.numeric(year),
         short_desc = plyr::mapvalues(short_desc,
                                      from = c("INCOME, FARM-RELATED - OPERATIONS WITH RECEIPTS",
                                               "INCOME, FARM-RELATED - RECEIPTS, MEASURED IN $"),
                                      to = c("receipt_operations", "receipts"))) %>% 
  select(year, val = Value, short_desc,
         county_code, county_name, asd_desc) %>% 
  spread(short_desc, val)

ohio_land <- nass_data(commodity_desc = "AG LAND", agg_level_desc = "COUNTY",
                       state_name = "OHIO", domain_desc = "TOTAL",
                       statisticcat_desc = "ASSET VALUE", numeric_vals = T)

ohio_land <- ohio_land %>% 
  mutate(year = as.numeric(year),
         short_desc = plyr::mapvalues(short_desc,
                                      from = c("AG LAND, INCL BUILDINGS - ASSET VALUE, MEASURED IN $",
                                               "AG LAND, INCL BUILDINGS - ASSET VALUE, MEASURED IN $ / ACRE",
                                               "AG LAND, INCL BUILDINGS - ASSET VALUE, MEASURED IN $ / OPERATION",
                                               "AG LAND, INCL BUILDINGS - OPERATIONS WITH ASSET VALUE"),
                                      to = c("agland", "agland_per_acre",
                                             "agland_per_operation", "agland_operations"))) %>% 
  select(year, val = Value, short_desc,
         county_code, county_name, asd_desc) %>% 
  spread(short_desc, val)

ohio_farms <- map(c("FARM OPERATIONS - NUMBER OF OPERATIONS",
                    "FARM OPERATIONS - ACRES OPERATED"), function(x){
                      nass_data(commodity_desc = "FARM OPERATIONS",
                                agg_level_desc = "COUNTY",
                                state_name = "OHIO", domain_desc = "TOTAL",
                                #source_desc = "SURVEY", 
                                short_desc = x, numeric_vals = T)
                    })

ohio_farms <- ohio_farms %>% 
  bind_rows() %>% 
  mutate(year = as.numeric(year),
         short_desc = plyr::mapvalues(short_desc,
                                      from = c("FARM OPERATIONS - NUMBER OF OPERATIONS",
                                               "FARM OPERATIONS - ACRES OPERATED"),
                                      to = c("farms", "acres"))) %>% 
  filter(!(year %in% c(1997, 2002, 2007) & source_desc == "SURVEY")) %>% 
  select(year, val = Value, short_desc,
         county_code, county_name, asd_desc) %>% 
  spread(short_desc, val)

ohio <- ohio_farms %>% 
  full_join(ohio) %>% 
  full_join(ohio_tax) %>% 
  full_join(ohio_income) %>% 
  full_join(ohio_land)
ohio$fips <- 39000 + as.numeric(ohio$county_code)

write.csv(ohio, paste0(local_dir, "/ohio_nass.csv"), row.names = F)
write_rds(ohio, paste0(local_dir, "/ohio_nass.rds"))

# ---- prices -------------------------------------------------------------

prices <- map(c("CORN", "HAY", "SOYBEANS", "WHEAT"), function(x){
  nass_data(commodity_desc = x, statisticcat_desc = "PRICE RECEIVED",
            state_name = "OHIO", numeric_vals = T)})

from_price <- c("CORN, GRAIN - PRICE RECEIVED, MEASURED IN $ / BU",
                "HAY, ALFALFA - PRICE RECEIVED, MEASURED IN $ / TON",
                "HAY, (EXCL ALFALFA) - PRICE RECEIVED, MEASURED IN $ / TON",
                "HAY - PRICE RECEIVED, MEASURED IN $ / TON",
                "SOYBEANS - PRICE RECEIVED, MEASURED IN $ / BU",
                "WHEAT - PRICE RECEIVED, MEASURED IN $ / BU",
                "WHEAT, WINTER - PRICE RECEIVED, MEASURED IN $ / BU")
to_price <- c("corn_price", "hay_alfa_price", "hay_nonalfa_price", "hay_price",
              "soy_price", "wheat_price", "wheat_winter_price")

prices_data <- prices %>% 
  bind_rows() %>% 
  rename(time = reference_period_desc) %>% 
  mutate(year = as.numeric(year),
         time = case_when(.$time == "JAN" ~ 1, .$time == "FEB" ~ 2,
                          .$time == "MAR" ~ 3, .$time == "APR" ~ 4,
                          .$time == "MAY" ~ 5, .$time == "JUN" ~ 6,
                          .$time == "JUL" ~ 7, .$time == "AUG" ~ 8,
                          .$time == "SEP" ~ 9, .$time == "OCT" ~ 10,
                          .$time == "NOV" ~ 11, .$time == "DEC" ~ 12,
                          TRUE ~ 13),
         short_desc = plyr::mapvalues(short_desc,
                                      from = from_price,
                                      to = to_price),
         date = as.Date(paste0(year, "-", time, "-1"), format="%Y-%m-%d")) %>% 
  select(year, val = Value, short_desc, time, date) %>% 
  spread(short_desc, val)

monthly <- prices_data %>% 
  filter(time != 13) %>% 
  select(-time)
annual  <- prices_data %>% 
  filter(time == 13) %>% 
  select(-date, -time)

# Add in a blank holder for next year's prices
annual <- data.frame(year = max(annual$year) + 1) %>% 
  bind_rows(annual) %>% 
  arrange(year)

# Sales
sales <- map(c("CORN", "HAY", "SOYBEANS", "WHEAT"), function(x){
  nass_data(commodity_desc = x, statisticcat_desc = "SALES",
            freq_desc = "MONTHLY", state_name = "OHIO", numeric_vals = T)})

from_sales <- c("CORN, GRAIN - SALES, MEASURED IN PCT OF MKTG YEAR",
                "HAY - SALES, MEASURED IN PCT OF MKTG YEAR",
                "SOYBEANS - SALES, MEASURED IN PCT OF MKTG YEAR",
                "WHEAT - SALES, MEASURED IN PCT OF MKTG YEAR")
to_sales <- c("corn_sales", "hay_sales",
              "soy_sales", "wheat_sales")

sales_data <- sales %>% 
  bind_rows() %>% 
  rename(time = reference_period_desc) %>% 
  mutate(year = as.numeric(year),
         time = case_when(.$time == "JAN" ~ 1, .$time == "FEB" ~ 2,
                          .$time == "MAR" ~ 3, .$time == "APR" ~ 4,
                          .$time == "MAY" ~ 5, .$time == "JUN" ~ 6,
                          .$time == "JUL" ~ 7, .$time == "AUG" ~ 8,
                          .$time == "SEP" ~ 9, .$time == "OCT" ~ 10,
                          .$time == "NOV" ~ 11, .$time == "DEC" ~ 12,
                          TRUE ~ 13),
         short_desc = plyr::mapvalues(short_desc,
                                      from = from_sales,
                                      to = to_sales),
         date = as.Date(paste0(year, "-", time, "-1"), format="%Y-%m-%d")) %>% 
  select(year, val = Value, short_desc, time, date) %>% 
  spread(short_desc, val)

monthly <- sales_data %>% 
  select(-time) %>% 
  right_join(monthly)

write.csv(annual, paste0(local_dir, "/ohio_prices_annual.csv"), row.names = F)
write_rds(annual, paste0(local_dir, "/ohio_prices_annual.rds"))

write.csv(monthly, paste0(local_dir, "/ohio_prices_monthly.csv"), row.names=F)
write_rds(monthly, paste0(local_dir, "/ohio_prices_monthly.rds"))

# ---- crops --------------------------------------------------------------

# statisticcat_desc - category
# short_desc - data item
# category - area harvested, area planted, production, sales, yields
corn_vals  <- c("CORN - ACRES PLANTED",
                "CORN, GRAIN - ACRES HARVESTED",
                "CORN, SILAGE - ACRES HARVESTED",
                # "CORN, GRAIN, IRRIGATED - ACRES HARVESTED",
                # "CORN, SILAGE, IRRIGATED - ACRES HARVESTED",
                "CORN, GRAIN - PRODUCTION, MEASURED IN BU",
                "CORN, SILAGE - PRODUCTION, MEASURED IN TONS",
                "CORN, GRAIN - YIELD, MEASURED IN BU / ACRE",
                "CORN, SILAGE - YIELD, MEASURED IN TONS / ACRE")#, "CORN - SALES, MEASURED IN $")
corn_names <- c("corn_acres_planted",
                "corn_grain_acres_harvest",
                "corn_silage_acres_harvest",
                # "corn_grain_irr_acres_harvest",
                # "corn_silage_irr_acres_harvest",
                "corn_grain_prod_bu",
                "corn_silage_prod_tons",
                "corn_grain_yield",
                "corn_silage_yield")#, "corn_sales")
hay_vals  <- c("HAY - ACRES HARVESTED", "HAY - PRODUCTION, MEASURED IN TONS",
               "HAY - YIELD, MEASURED IN TONS / ACRE")
hay_names <- c("hay_acres_harvest", "hay_prod_tons", "hay_yield")
soy_vals  <- c("SOYBEANS - ACRES HARVESTED", "SOYBEANS - ACRES PLANTED",
               "SOYBEANS - PRODUCTION, MEASURED IN BU",
               "SOYBEANS - YIELD, MEASURED IN BU / ACRE")
soy_names  <- c("soy_acres_harvest", "soy_acres_planted",
                "soy_prod_bu", "soy_yield")
wheat_vals <- c("WHEAT - ACRES HARVESTED", "WHEAT - ACRES PLANTED",
                "WHEAT - PRODUCTION, MEASURED IN BU", "WHEAT, WINTER - ACRES HARVESTED",
                "WHEAT, WINTER - ACRES PLANTED",
                "WHEAT, WINTER - PRODUCTION, MEASURED IN BU",
                "WHEAT, WINTER - YIELD, MEASURED IN BU / ACRE",
                "WHEAT - YIELD, MEASURED IN BU / ACRE")
wheat_names <- c("wheat_acres_harvest", "wheat_acres_planted", "wheat_prod_bu",
                 "wheat_winter_acres_harvest", "wheat_winter_acres_planted",
                 "wheat_winter_prod_bu", "wheat_winter_yield", "wheat_yield")

ohio_crops <- map(c(corn_vals, soy_vals, wheat_vals), function(x){
  nass_data(short_desc = x, agg_level_desc = "COUNTY", state_name = "OHIO",
            sector = "CROPS", source_desc = "SURVEY", numeric_vals = T)
  })

crops <- ohio_crops %>% 
  bind_rows() %>% 
  mutate(county = tolower(county_name),
         year = as.numeric(year),
         short_desc = plyr::mapvalues(short_desc,
                                      from = c(corn_vals, soy_vals, wheat_vals),
                                      to = c(corn_names, soy_names, wheat_names))) %>% 
  select(year, county, county_code, asd_desc, val = Value, short_desc) %>% 
  spread(short_desc, val)

# Livestock
cattle <- c("CATTLE, COWS, MILK - INVENTORY",
            "CATTLE, INCL CALVES - INVENTORY")
cattle_names <- c("cattle_milk_inventory", "cattle_inventory")
ohio_livestock <- map(cattle, function(x){
  nass_data(short_desc = x, agg_level_desc = "COUNTY", state_name = "OHIO",
            source_desc = "SURVEY", numeric_vals = T)
})

livestock <- ohio_livestock %>% 
  bind_rows() %>% 
  mutate(county = tolower(county_name),
         year = as.numeric(year),
         short_desc = plyr::mapvalues(short_desc,
                                      from = cattle,
                                      to = cattle_names)) %>% 
  select(year, county, county_code, asd_desc, val = Value, short_desc) %>% 
  spread(short_desc, val)

crops <- crops %>%
  full_join(livestock) %>% 
  expand(nesting(county, county_code, asd_desc), year) %>% 
  left_join(crops) %>% 
  left_join(livestock)

crops <- crops %>% 
  group_by(year, asd_desc) %>% 
  mutate_at(vars(corn_acres_planted:cattle_milk_inventory),
            funs(ifelse(is.na(.), .[county_code == "998"], .))) %>% 
  filter(county_code != "998")

# Drop out winter wheat, just have wheat
crops <- crops %>% 
  mutate(wheat_acres_harvest = ifelse(is.na(wheat_acres_harvest),
                                      wheat_winter_acres_harvest,
                                      wheat_acres_harvest),
         wheat_acres_planted = ifelse(is.na(wheat_acres_planted),
                                      wheat_winter_acres_planted,
                                      wheat_acres_planted),
         wheat_prod_bu = ifelse(is.na(wheat_prod_bu),
                                wheat_winter_prod_bu, wheat_prod_bu),
         wheat_yield = ifelse(is.na(wheat_yield),
                              wheat_winter_yield, wheat_yield)) %>% 
  select(-contains("winter"))

# # Add in a blank holder for next year's prices
# annual <- data.frame(year = max(annual$year) + 1) %>% 
#   bind_rows(annual) %>% 
#   arrange(year)


write.csv(crops, paste0(local_dir, "/ohio_crops.csv"), row.names = F)
write_rds(crops, paste0(local_dir, "/ohio_crops.rds"))

# ---- state --------------------------------------------------------------


state_crops <- map(c(corn_vals, soy_vals, wheat_vals), function(x){
  nass_data(short_desc = x, agg_level_desc = "STATE", state_name = "OHIO",
            sector = "CROPS", source_desc = "SURVEY", numeric_vals = T)
})

crops <- state_crops %>% 
  bind_rows() %>% 
  filter(reference_period_desc == "YEAR") %>% 
  mutate(year = as.numeric(year),
         short_desc = plyr::mapvalues(short_desc,
                                      from = c(corn_vals, soy_vals, wheat_vals),
                                      to = c(corn_names, soy_names, wheat_names))) %>% 
  select(year, asd_desc, val = Value, short_desc) %>% 
  spread(short_desc, val)

crops <- crops %>% 
  mutate(wheat_acres_harvest = ifelse(is.na(wheat_acres_harvest),
                                      wheat_winter_acres_harvest,
                                      wheat_acres_harvest),
         wheat_acres_planted = ifelse(is.na(wheat_acres_planted),
                                      wheat_winter_acres_planted,
                                      wheat_acres_planted),
         wheat_prod_bu = ifelse(is.na(wheat_prod_bu),
                                wheat_winter_prod_bu, wheat_prod_bu),
         wheat_yield = ifelse(is.na(wheat_yield),
                              wheat_winter_yield, wheat_yield)) %>% 
  select(-contains("winter"))


write.csv(crops, paste0(local_dir, "/ohio_state_crops.csv"), row.names = F)
write_rds(crops, paste0(local_dir, "/ohio_state_crops.rds"))