R/hud_do_query_calls.R

Defines functions smart_rbind parse_resp_content print_resp_warning_messages process_status_codes make_query_calls if_tibble_return fmr_do_query_call il_do_query_call misc_do_query_call cw_do_query_calls chas_do_query_calls

#' @import httr
#' @import tibble
#' @import R.cache


#' @name chas_do_query_calls
#' @title API Calls for Comprehensive Housing and Affordability Helper
#' @description Helper function for making the query calls to
#'   Comprehensive Housing and Affordability Strategy (CHAS)
#'   API endpoint as well as concatenating all response objects to
#'   be returned to the user. Collect error urls and warn users and cache the
#'   queries and show the download bar.
#' @param urls A character vector: the urls to query for.
#' @param key A character vector of length one with the key obtained from HUD
#'   (US Department of Housing and Urban Development)
#'   USER website.
#' @param to_tibble A logical: if TRUE, return as tibble else dataframe.
#' @returns A tibble or dataframe of all response bodies.
#' @noRd
#' @noMd
chas_do_query_calls <- function(urls, key, to_tibble) {
  # Form all query calls...
  list_res <- c()
  error_urls <- character(0)

  # These measurements are hardcoded in, but a more effective method might
  # be systematic checks to find all unique columns names from all CHAS
  # datasets.

  all_measurements <- hud_chas_col_names()

  for (i in seq_len(length(urls))) {

    url <- urls[i]

    call <- R.cache::memoizedCall(make_query_calls, url, key)
    processed_code <- process_status_codes(call)

    if (!is.null(processed_code)) {

      error_urls <- c(error_urls, list(c(url, processed_code)))

    } else {
      cont <- parse_resp_content(call)

      not_measured <- all_measurements[!all_measurements %in%
                                         names(unlist(cont[[1]]))]
      # Check this CHAS data does not have data defined for
      # all expected fields. If so fill them in with NA's.
      if (length(not_measured) >= 1) {

        extra_mes <- rep(NA, length(not_measured))
        names(extra_mes) <- not_measured

        list_res[[i]] <- c(unlist(cont[[1]]), extra_mes)

      } else {

        list_res[[i]] <- unlist(cont[[1]])

      }
    }

    download_bar(done = i, total = length(urls),
                 current = url, error = length(error_urls))

  }

  print_resp_warning_messages(error_urls)
  if_tibble_return(list_res = list_res,
                   to_tibble = to_tibble,
                   api = "chas")
}


#' @name cw_do_query_calls
#' @title API Calls for USPS Crosswalk Helper
#' @description Helper function for making the query calls to
#'   USPS Crosswalk API endpoint as well as concatenating all response objects
#'   to be returned to the user.  Collect error urls and warn users and cache
#'   the
#'   queries and show the download bar.
#' @param urls A character vector : the urls to query for.
#' @param query A character vector : the geoids to query for.
#' @param year A character or numeric vector : the years to query for.
#' @param quarter A character or numeric vector: the quarters in the year
#'   to query for.
#' @param primary_geoid A character vector: the first geoid part of a
#'   function call. For example,
#'   hud_cw_zip_tract() has zip as first geoid and tract as second geoid.
#' @param secondary_geoid A character vector: the second geoid part of
#'   function call.
#' @param key A character vector of length one with the key obtained from HUD
#'   (US Department of Housing and Urban Development)
#'   USER website.
#' @param to_tibble A logical: if TRUE, return as tibble else dataframe.
#' @returns A tibble or dataframe of all response bodies.
#' @noRd
#' @noMd
cw_do_query_calls <- function(urls, query, year, quarter, primary_geoid,
                              secondary_geoid, key, to_tibble) {
  list_res <- c()
  error_urls <- character(0)

  for (i in seq_len(length(urls))) {
    url <- urls[i]

    call <- R.cache::memoizedCall(make_query_calls, url, key)
    processed_code <- process_status_codes(call)

    if (!is.null(processed_code)) {

      # Need to output a single error message instead of a bunch when
      # something bad occurs. Append to list of errored urls.
      error_urls <- c(error_urls, list(c(url, processed_code)))

    } else {

      cont <- parse_resp_content(call)

      res <- as.data.frame(do.call(rbind, cont$data$results))

      res$year <- year[i]
      res$quarter <- quarter[i]

      res[1] <- unlist(res[1])
      res[2] <- unlist(res[2])
      res[3] <- unlist(res[3])
      res[4] <- unlist(res[4])
      res[5] <- unlist(res[5])
      res[6] <- unlist(res[6])


      list_res[[i]] <- res

    }



    download_bar(done = i, total = length(urls),
                 current = url, error = length(error_urls))

  }

  print_resp_warning_messages(error_urls)

  if_tibble_return(list_res = list_res,
                   to_tibble = to_tibble,
                   api = "cw",
                   primary_geoid = primary_geoid,
                   secondary_geoid = secondary_geoid)
}


#' @name misc_do_query_call
#' @title API Calls for Misc Helper
#' @description Helper function for making the query calls to
#'   misc endpoints as well as concatenating all response objects
#'   to be returned to the user. The misc APIs are located in hud_misc.R.
#'   Collect error urls and warn users and cache the
#'   queries and show the download bar.
#' @param urls A character vector: the urls to query for.
#' @param key A character vector of length one: the key obtained from HUD
#'   (US Department of Housing and Urban Development)
#'   USER website.
#' @param to_tibble A logical: if TRUE, return as tibble else dataframe.
#' @returns A tibble or dataframe of all response bodies.
#' @noRd
#' @noMd
misc_do_query_call <- function(urls, key, to_tibble) {
  list_res <- c()
  error_urls <- character(0)

  for (i in seq_len(length(urls))) {

    url <- urls[i]

    call <- R.cache::memoizedCall(make_query_calls, url, key)
    processed_code <- process_status_codes(call)

    if (!is.null(processed_code)) {
      # Need to output a single error message instead of a bunch when
      # something bad occurs. Append to list of errored urls.
      error_urls <- c(error_urls, list(c(url, processed_code)))

    } else {

      cont <- parse_resp_content(call)
      list_res[[i]] <- as.data.frame(do.call(rbind, cont))

    }

    download_bar(done = i, total = length(urls),
                 current = url, error = length(error_urls))

  }

  print_resp_warning_messages(error_urls)
  if_tibble_return(list_res = list_res,
                   to_tibble = to_tibble,
                   api = "misc")
}





#' @name il_do_query_call
#' @title API Calls for Fair Markets Rent
#' @description Helper function for making the query calls to
#'   FMR (Fair Markets Rent) endpoints as well as concatenating all
#'   response objects
#'   to be returned to the user.
#' @param all_queries The components of an IL query call, including the
#'   geoid and year.
#' @param key A character vector of length one: the key obtained from HUD
#'   (US Department of Housing and Urban Development)
#'   USER website.
#' @param to_tibble A logical: if TRUE, return as tibble else data frame.
#' @param query_type The geoid type being queried for: state, county, or cbsa.
#' @returns A tibble or dataframe of all response bodies.
#' @noRd
#' @noMd
il_do_query_call <- function(all_queries, key, to_tibble, query_type) {

  list_res <- c()
  error_urls <- character(0)

  for (i in seq_len(nrow(all_queries))) {

    # Build the urls for querying the data.

    urls <- paste(get_hud_host_name(),
                  "il/",
                  if (query_type == "state") "statedata/" else "data/",
                  all_queries$query[i], "?year=", all_queries$year[i], sep = "")

    call <- R.cache::memoizedCall(make_query_calls, urls, key)
    processed_code <- process_status_codes(call)

    if (!is.null(processed_code)) {

      error_urls <- c(error_urls, list(c(url, processed_code)))

    } else {

      cont <- parse_resp_content(call)

      if (query_type == "state") {

        res <- as.data.frame(cont$data)
        res$statecode <- cont$data$statecode

        oth <- data.frame(query = all_queries$query[i],
                          year = all_queries$year[i],
                          median_income = cont$data$median_income,
                          stringsAsFactors = FALSE)

        res <- cbind(oth, res)

      } else if (query_type == "county" || query_type == "cbsa") {

        res <- as.data.frame(cont$data)

        oth <- data.frame(query = all_queries$query[i],
                          stringsAsFactors = FALSE)

        res <- cbind(oth, res)

      }

      list_res[[i]] <- res
    }

    download_bar(done = i, total = nrow(all_queries),
                 current = urls, error = length(error_urls))
  }

  print_resp_warning_messages(error_urls)

  if_tibble_return(list_res = list_res,
                   to_tibble = to_tibble,
                   api = "il",
                   resolution = query_type)

}

#' @name fmr_do_query_call
#' @title API Calls for Income Limits
#' @description Helper function for making the query calls to
#'   IL (Income Limits) endpoints as well as concatenating all response objects
#'   to be returned to the user.
#' @param all_queries The components of an IL query call, including the
#'   geoid and year.
#' @param key A character vector of length one: the key obtained from HUD
#'   (US Department of Housing and Urban Development)
#'   USER website.
#' @param to_tibble A logical: if TRUE, return as tibble else dataframe.
#' @param query_type The geoid type being queried for: state, county, or cbsa.
#' @returns A tibble or dataframe of all response bodies.
#' @noRd
#' @noMd
fmr_do_query_call <- function(all_queries, key, to_tibble, query_type) {

  res <- NULL

  error_urls <- character(0)
  list_county_res <- c()
  list_metroarea_res <- c()
  list_res <- c()


  for (i in seq_len(nrow(all_queries))) {
    # Build the urls for querying the data.

    urls <- paste(get_hud_host_name(),
                  "fmr/",
                  if (query_type == "state") "statedata/" else "data/",
                  all_queries$query[i], "?year=", all_queries$year[i], sep = "")

    call <- R.cache::memoizedCall(make_query_calls, urls, key)
    processed_code <- process_status_codes(call)

    if (!is.null(processed_code)) {

      error_urls <- c(error_urls, list(c(url, processed_code)))

    } else {

      cont <- parse_resp_content(call)

      if (query_type == "state") {

        res_county <- as.data.frame(do.call(rbind, cont$data$counties))
        res_metroareas <- as.data.frame(do.call(rbind, cont$data$metroareas))

        res_county$query <- all_queries$query[i]
        res_county$year <- all_queries$year[i]

        res_metroareas$query <- all_queries$query[i]
        res_metroareas$year <- all_queries$year[i]

        list_county_res[[i]] <- res_county
        list_metroarea_res[[i]] <- res_metroareas

      } else if (query_type == "county" || query_type == "cbsa") {

        if (cont$data$smallarea_status == "0") {
          # Returns just county

          res <- as.data.frame(do.call(cbind, cont$data$basicdata))

          res$query <- all_queries$query[i]
          res$year <- all_queries$year[i]
          res$zip_code <- ""

          res$county_name <- cont$data$county_name
          res$counties_msa <- cont$data$counties_msa
          res$town_name <- cont$data$town_name
          res$metro_status <- cont$data$metro_status
          res$metro_name <- cont$data$metro_name
          res$area_name <- cont$data$area_name
          res$smallarea_status <- cont$data$smallarea_status

          list_res[[i]] <- res

        } else {

          # Returns zip code level data
          res <- as.data.frame(do.call(rbind, cont$data$basicdata))
          res <- as.data.frame(sapply(res, function(x) unlist(as.character(x))))

          res$query <- all_queries$query[i]
          res$year <- all_queries$year[i]

          res$county_name <- cont$data$county_name
          res$counties_msa <- cont$data$counties_msa
          res$town_name <- cont$data$town_name
          res$metro_status <- cont$data$metro_status
          res$metro_name <- cont$data$metro_name
          res$area_name <- cont$data$area_name
          res$smallarea_status <- cont$data$smallarea_status

          list_res[[i]] <- res
        }

      }

    }

    download_bar(done = i, total = nrow(all_queries),
                 current = urls, error = length(error_urls))

  }

  print_resp_warning_messages(error_urls)

  if (query_type == "state") {

    res <- if_tibble_return(list_res = list_county_res,
                            list_res_two = list_metroarea_res,
                            to_tibble = to_tibble,
                            api = "fmr",
                            resolution = query_type)

  } else if (query_type == "county" || query_type == "cbsa") {

    res <- if_tibble_return(list_res = list_res,
                            to_tibble = to_tibble,
                            api = "fmr",
                            resolution = query_type)

  }

  res
}


#' @name if_tibble_return
#' @title Convert Final Object to Tibble
#' @description Convert Final Object to Tibble if TRUE, else return
#'   as data frame.
#' @param list_res The list of data frame responses to concatenate together.
#' @param list_res_two Secondary list of response objects for separating
#'  metro and county level data if user
#'  specifies states as the query using fmr api.
#' @param to_tibble If TRUE convert to tibble: If FALSE keep as data frame.
#' @param api The API that is queried for:
#' @param resolution If FMR or IL, specify if state, county, or cbsa.
#'   1) chas
#'   2) cw
#'   3) fmr
#'   4) il
#'   5) misc
#' @returns The final response object.
#' @noRd
#' @noMd
if_tibble_return <- function(list_res,
                             list_res_two = NULL,
                             to_tibble,
                             api,
                             primary_geoid = NULL,
                             secondary_geoid = NULL,
                             resolution =  NULL) {

  res <- NULL
  if (length(list_res) != 0) {

    if (api == "fmr") {

        if (resolution == "state") {

          if (length(list_res) != 0) {

            res_county <- as.data.frame(do.call(rbind, list_res))
            res_county <- as.data.frame(sapply(res_county,
                                        function(x) unlist(as.character(x))))

            res_metroareas <- as.data.frame(do.call(rbind, list_res_two))
            res_metroareas <- as.data.frame(sapply(
              res_metroareas,
              function(x) unlist(as.character(x))))

            if (to_tibble) {
              res_county <- tibble(res_county)
              res_metroareas <- tibble(res_metroareas)
            }

            res <- list(counties = res_county, metroareas = res_metroareas)
          }

        } else if (resolution == "county" || resolution == "cbsa") {

          res <- as.data.frame(do.call(rbind, list_res))

          if (length(list_res) > 1) {

            res <- as.data.frame(sapply(res, function(x) unlist(as.character(x))))

          }

        }

    } else if (api == "cw") {


      # Depending on year + quarter, data for some data sets might
      # return differing # of columns. Over time, new fields are added.
      # This should correct for that.

      # Smart rbind is really only used for crosswalk data for now, but
      # may, in the future be required for the other datasets. The CHAS dataset
      # could also benefit from this instead of hardcoding the expect column
      # values.
      res <- as.data.frame(smart_rbind(list_res))


    } else {
      res <- as.data.frame(do.call(rbind, list_res))
    }

    if (to_tibble) {
      res <- as_tibble(res)
    }
  }

  res
}



#' @name make_query_calls
#' @title Make Query Calls to HUD USER
#' @description Centralized atomic function for querying API calls
#'   as to make R.cache memoizedCall work at a singular API call resolution.
#' @param urls A character vector: the urls to query for.
#' @param key A character vector of length one with the key obtained from HUD
#'   (US Department of Housing and Urban Development)
#'   USER website.
#' @returns The response object.
#' @noRd
#' @noMd
make_query_calls <- function(url, key, path, query) {

  # Check if Sys.getenv("HUD_USER_AGENT") has been set
  # and is not empty string. If so, then allow user agent
  # will be set to this. Otherwise, just point back to the
  # url of the package.

  the_user_agent <- if (Sys.getenv("HUD_USER_AGENT") != "") {
                      Sys.getenv("HUD_USER_AGENT")
                    } else {
                      "https://github.com/etam4260/rhud"
                    }

  config <- add_headers(Authorization = paste("Bearer ", as.character(key)))

  # HUD Crosswalk API returns 429 request now so need to use RETRIES
  # or data returned might be missing...

  # 10/31/2022 HUD does not return a retry-in within 429 request
  # need to figure out recommended retry amount
  # for "most" data to pass through.

  # Wait a minute after a 429 is given.
  request <- RETRY("GET",
                   url,
                   config,
                   times = getOption("rhud_num_retries", 10),
                   user_agent(the_user_agent),
                   pause_min = 60,
                   queir = TRUE,
                   terminate_on = c(400, 401, 403, 404, 405, 406, 500),
                   terminate_on_success = TRUE
                   )

  request
}




#' @name process_status_codes
#' @title Handle Status Codes Returned By Query
#' @description Given a response object from a query, handle it differently
#'   based on the status codes returned by HUD USER API server.
#' @param call The response object
#' @returns NULL if the response is 200 else return the status code and
#'   the associated error.
#' @noRd
#' @noMd
process_status_codes <- function(call) {
  error <- NULL

  if (status_code(call) == 400) {

    error <- c(400, paste("An invalid value was specified for one of ",
                  "the query parameters in the request URI."))

  } else if (status_code(call) == 401) {

    error <- c(401, paste("Authentication failure"))

  } else if (status_code(call) == 403) {

    error <- paste(403, "Not allowed to access this dataset API, ",
                  "because you have not registered for it.")

  } else if (status_code(call) == 404) {

    error <- paste(404, "No data found using '(value you entered)'")

  } else if (status_code(call) == 405) {

    error <- paste(405, "Unsupported method, only GET is supported")

  } else if (status_code(call) == 406) {

    error <- paste(406, "Unsupported Accept Header value, ",
                  "must be application/json")

  } else if (status_code(call) == 500) {

    error <- paste(500, "Internal server error occurred")

  } else if (status_code(call) == 429) {

    error <- paste(429, "Too many requests")
  }

  error

}


#' @name print_resp_warning_messages
#' @title Print Response Warning Messages
#' @description Print warning messages associated with query calls.
#' @param errors The c(response code, response code description, url)
#' @noRd
#' @noMd
print_resp_warning_messages <- function(errors) {
  # Spit out error messages to user after all
  # queries are done.
  if (length(errors) != 0) {
    message("\n")

    # Spit out error messages to user after all
    # queries are done.
    warning(paste("Could not find data for queries: \n\n",
                  paste(paste("*", errors, sep = " "), collapse = "\n"),
                  "\n\nIt is possible that your key maybe invalid or ",
                  "there isn't any data for these parameters, ",
                  "If you think this is wrong please ",
                  "report it at https://github.com/etam4260/rhud/issues.",
                  sep = ""), call. = FALSE)

  }


  message("\n")

}



#' @name parse_resp_content
#' @title Parse Response Body Content
#' @description Extract content from a call object and handle warnings and
#'   errors.
#' @param call The response object.
#' @returns The content of response object in UTF-8 encoding.
#' @noRd
#' @noMd
parse_resp_content <- function(call) {
   cont <- list()

   tryCatch(

    expr = {
      cont <- try(content(call, encoding = "UTF-8"), silent = TRUE)
    },

    error = function(e){
      stop(e, call. = FALSE)
    },

    warning = function(w){
      warning(w, call. = FALSE)
    }

  )

}



#' @name smart_rbind
#' @title Rbind Dataframes with Overlapping Colnames
#' @description Attempts to rbind a list of dataframes that have
#'   differing number of columns but contains some with overlapping column
#'   names.
#' @param list_res A list of dataframes
#' @returns The concatenated dataframe of those within list_res.
#' @noRd
#' @noMd
smart_rbind <- function(list_res) {
  col_set <- FALSE
  res <- data.frame()

  for (cont in list_res) {

    if (!col_set) {
      res <- cont
      col_set <- TRUE
    } else {

      res[setdiff(names(cont), names(res))] <- NA
      cont[setdiff(names(res), names(cont))] <- NA

      res <- as.data.frame(rbind(res, cont))

    }
  }
  # Cases in hud_chas functions where some row names appear.
  # Make sure to remove those.

  rownames(res) <- NULL

  res
}
etam4260/rhud documentation built on Nov. 12, 2022, 2:53 a.m.