R/request.R

Defines functions get_nassqs get_nassqs_data

Documented in get_nassqs_data

# Request USDA NASS data wrapper ----
#' Get USDA NASS data and return a data frame
#'
#' @description
#' A wrapper function for the `rnassqs` package, `get_nassqs_data` makes a HTTP GET
#' request to the USDA-NASS Quick Stats API and returns a subset of the data parsed as a
#' tibble.
#'
#'@param comm_list A list of 'nassqs' commodities. All possible values available from [nassqs_param_values()]
#'@param start_year A start year integer
#'@param end_year An end year integer
#'
#'@details
#'
#' - The default comm_list includes CORN, COTTON, SOYBEANS, WHEAT, SUGARCANE, SUGARBEETS, SORGHUM, RICE, BARLEY, and APPLES
#' - The default date range is 1990 through 2019
#' - The core 'rnassqs' function [nassqs()] is mapped with 'purrr' to be called for each combination of year and commodity. Larger data requests will require more time
#'
#'
#'@examples
#'library(tidyverse)
#'
#'comm_list <- c("CORN","WHEAT")
#'start_year <- 2018L
#'end_year <- 2019L
#'
#'get_nassqs_data(comm_list, start_year, end_year)
#'
#'@export
get_nassqs_data <- function(comm_list = c("CORN",
                                             "COTTON",
                                             "SOYBEANS",
                                             "WHEAT",
                                             "SUGARCANE",
                                             "SUGARBEETS",
                                             "SORGHUM",
                                             "RICE",
                                             "BARLEY"),
                               start_year = 1990L,
                               end_year = 2019L) {

# checks
  required_names  <- rnassqs::nassqs_param_values("commodity_desc")
  required_names_present <- all(comm_list %in% required_names)
  if (!required_names_present) rlang::abort("Commodity name mismatch. See rnassqs::nassqs_param_values('commodity_desc') for the available list.")

  if (!is.integer(start_year)) rlang::abort("Enter an integer start_year.")

  if (!is.integer(end_year)) rlang::abort("Enter an integer end_year. The function default is the year 2019")

  if (start_year >= end_year) rlang::abort("Choose a start year before the end year.")

  if (start_year < 1915L) rlang::abort("Choose a start year after 1915.")

  if (end_year > as.integer(lubridate::year(Sys.Date()) - 1)) rlang::abort("Choose a suitable end date as an integer.")

years <- start_year:end_year

tidyr::expand_grid(year = as.character(years), comm = comm_list) %>%
  dplyr::mutate(row = dplyr::row_number()) %>%
  tidyr::nest(data = c(year,comm)) %>%
  dplyr::mutate(nassdata = purrr::map(data, get_nassqs)) %>%
  dplyr::select(-data, -row) %>%
  tidyr::unnest(nassdata) %>%
  dplyr::filter(!Value %in% c("(NA)","(D)"),
         state_alpha != "US") %>%
  dplyr::mutate(Value = readr::parse_number(Value)) %>%
  dplyr::filter(freq_desc == "ANNUAL" & reference_period_desc == "YEAR") %>%
  dplyr::select(year, state_alpha, commodity_desc, statisticcat_desc, unit_desc, short_desc, Value)

}

# utility function ----

get_nassqs <- function(data){
  params <- list(source_desc = "SURVEY")
  params[["agg_level_desc"]] <- "STATE"
  params[["freq_desc"]] <- "ANNUAL"
  params[["reference_period_desc"]] <- c("YEAR","MARKETING YEAR")
  params[["statisticcat_desc"]] <- c("AREA HARVESTED",
                                     "PRICE RECEIVED",
                                     "PRODUCTION")
  params[["year"]] <- data$year
  params[["commodity_desc"]] <- data$comm
  rnassqs::nassqs(params)
}
opus1993/rnassqs.extra documentation built on Oct. 14, 2020, 3:25 a.m.