R/fetch_vd.R
In ohvbd: One Health VBD Hub

Documented in fetch_vd

#' @title Fetch VecDyn dataset/s by ID
#' @description Retrieve VecDyn dataset/s specified by their dataset ID.
#' @author Francis Windram
#'
#' @param ids a numeric ID or numeric vector of ids (preferably in an `ohvbd.ids` object) indicating the particular dataset/s to download.
#' @param rate maximum number of calls to the API per second.
#' @param connections number of simultaneous connections to the server at once. Maximum 8. **Do not enable unless you really need to** as this hits the server significantly harder than usual.
#' @param basereq an [httr2 request][httr2::request()] object, as generated by [vb_basereq()]. If `NA`, uses the default request.
#'
#' @return A list of [httr2 response][httr2::response()] objects, as an `ohvbd.responses` object.
#'
#' @examplesIf interactive()
#' fetch_vd(54)
#'
#' fetch_vd(c(423,424,425))
#'
#' ohvbd.ids(c(423,424,425), "vd") |>
#'   fetch() # Calls fetch_vd()
#'
#' @concept vecdyn
#'
#' @export
#'

fetch_vd <- function(ids, rate = 5, connections = 2, basereq = vb_basereq()) {
  max_conns <- 8

  check_provenance(ids, "vd", altfunc = "fetch")

  resp_parsed <- fetch_vd_counts(ids, 50, noprogress = TRUE, basereq = basereq)
  missing <- setdiff(ids, resp_parsed$id)

  if (nrow(resp_parsed) <= 0) {
    # Short-circuit to return cases where no id is correct
    cli::cli_alert_info("Incorrect ids:")
    cli::cli_ul(unique(missing))
    cli::cli_alert_warning(
      "No records retrieved (are you sure the IDs are correct?)."
    )
    return(new_ohvbd.responses(l = list(), db = "vd"))
  }

  cli::cli_alert_info("Found {.val {sum(resp_parsed$num)}} row{?s} of data.")
  # Found by fitting an exponential using nls to the performance benchmarks
  # fmt: skip
  predicted_time <- lubridate::as.duration( # nolint: object_usage_linter
    lubridate::seconds(
      ceiling(
        sum(resp_parsed$num) * (0.01340331 * exp(-0.32535609 * min(connections, 5)))
      )
    )
  )
  cli::cli_alert_info("Predicted to take ~{.val {predicted_time}}.")

  basereq_url <- basereq$url # Should always be set!
  basereq_useragent <- basereq$options$useragent %||% ""
  basereq_unsafe <- !is.null(basereq$options$ssl_verifypeer)

  # Construct a df containing one row with all appropriate params for each request, and then generate reqs for parallel requesting
  reqs_df <- resp_parsed |>
    dplyr::group_by(.data$id) |>
    dplyr::mutate(pages = list(seq(1, .data$pages))) |>
    tidyr::unnest(cols = "pages") |>
    dplyr::ungroup()
  reqs <- mapply(
    vd_make_req,
    reqs_df$id,
    reqs_df$pages,
    5,
    basereq_url,
    basereq_useragent,
    basereq_unsafe,
    SIMPLIFY = FALSE
  )

  if (connections > max_conns) {
    cli::cli_alert_warning(
      "No more than {.val {max_conns}} simultaneous connection{?s} allowed!"
    )
    cli::cli_alert_info("Restricting to {.val {max_conns}} connection{?s}.")
    connections <- max_conns
  }
  resps <- reqs |>
    req_perform_parallel(
      on_error = "continue",
      max_active = connections,
      progress = list(
        name = "VecDyn Data",
        format = "Downloading {cli::pb_name} {cli::pb_current}/{cli::pb_total} {cli::pb_bar} {cli::pb_percent} | ETA: {cli::pb_eta}"
      )
    )

  fails <- resps |> httr2::resps_failures()

  # Test if any failures were missing files (not 404s here, but counts of 0)
  # Realistically at this point there should be none, but it's still worth checking
  missing <- c(missing, find_vd_missing(resps))

  if (!is.null(missing)) {
    if (!(is.numeric(missing) && length(missing) == 0)) {
      cli::cli_alert_info("Incorrect ids:")
      cli::cli_ul(unique(missing))
    }

    # Need an extra check here because failed VD calls don't become 404s.
    if (length(missing) >= length(resps)) {
      cli::cli_alert_warning(
        "No records retrieved (are you sure the IDs are correct?)."
      )
    }
  }

  # Test to see if we got only errors
  if (length(fails) >= length(resps)) {
    # Only got errors!
    cli::cli_alert_warning(
      "No records retrieved (are you sure the IDs are correct?)."
    )
  }

  resps <- new_ohvbd.responses(l = resps, db = "vd")

  return(resps)
}