Nothing
#' @title Fetch VecDyn dataset/s by ID
#' @description Retrieve VecDyn dataset/s specified by their dataset ID.
#' @author Francis Windram
#'
#' @param ids a numeric ID or numeric vector of ids (preferably in an `ohvbd.ids` object) indicating the particular dataset/s to download.
#' @param rate maximum number of calls to the API per second.
#' @param connections number of simultaneous connections to the server at once. Maximum 8. **Do not enable unless you really need to** as this hits the server significantly harder than usual.
#' @param basereq an [httr2 request][httr2::request()] object, as generated by [vb_basereq()]. If `NA`, uses the default request.
#'
#' @return A list of [httr2 response][httr2::response()] objects, as an `ohvbd.responses` object.
#'
#' @examplesIf interactive()
#' fetch_vd(54)
#'
#' fetch_vd(c(423,424,425))
#'
#' ohvbd.ids(c(423,424,425), "vd") |>
#' fetch() # Calls fetch_vd()
#'
#' @concept vecdyn
#'
#' @export
#'
fetch_vd <- function(ids, rate = 5, connections = 2, basereq = vb_basereq()) {
max_conns <- 8
check_provenance(ids, "vd", altfunc = "fetch")
resp_parsed <- fetch_vd_counts(ids, 50, noprogress = TRUE, basereq = basereq)
missing <- setdiff(ids, resp_parsed$id)
if (nrow(resp_parsed) <= 0) {
# Short-circuit to return cases where no id is correct
cli::cli_alert_info("Incorrect ids:")
cli::cli_ul(unique(missing))
cli::cli_alert_warning(
"No records retrieved (are you sure the IDs are correct?)."
)
return(new_ohvbd.responses(l = list(), db = "vd"))
}
cli::cli_alert_info("Found {.val {sum(resp_parsed$num)}} row{?s} of data.")
# Found by fitting an exponential using nls to the performance benchmarks
# fmt: skip
predicted_time <- lubridate::as.duration( # nolint: object_usage_linter
lubridate::seconds(
ceiling(
sum(resp_parsed$num) * (0.01340331 * exp(-0.32535609 * min(connections, 5)))
)
)
)
cli::cli_alert_info("Predicted to take ~{.val {predicted_time}}.")
basereq_url <- basereq$url # Should always be set!
basereq_useragent <- basereq$options$useragent %||% ""
basereq_unsafe <- !is.null(basereq$options$ssl_verifypeer)
# Construct a df containing one row with all appropriate params for each request, and then generate reqs for parallel requesting
reqs_df <- resp_parsed |>
dplyr::group_by(.data$id) |>
dplyr::mutate(pages = list(seq(1, .data$pages))) |>
tidyr::unnest(cols = "pages") |>
dplyr::ungroup()
reqs <- mapply(
vd_make_req,
reqs_df$id,
reqs_df$pages,
5,
basereq_url,
basereq_useragent,
basereq_unsafe,
SIMPLIFY = FALSE
)
if (connections > max_conns) {
cli::cli_alert_warning(
"No more than {.val {max_conns}} simultaneous connection{?s} allowed!"
)
cli::cli_alert_info("Restricting to {.val {max_conns}} connection{?s}.")
connections <- max_conns
}
resps <- reqs |>
req_perform_parallel(
on_error = "continue",
max_active = connections,
progress = list(
name = "VecDyn Data",
format = "Downloading {cli::pb_name} {cli::pb_current}/{cli::pb_total} {cli::pb_bar} {cli::pb_percent} | ETA: {cli::pb_eta}"
)
)
fails <- resps |> httr2::resps_failures()
# Test if any failures were missing files (not 404s here, but counts of 0)
# Realistically at this point there should be none, but it's still worth checking
missing <- c(missing, find_vd_missing(resps))
if (!is.null(missing)) {
if (!(is.numeric(missing) && length(missing) == 0)) {
cli::cli_alert_info("Incorrect ids:")
cli::cli_ul(unique(missing))
}
# Need an extra check here because failed VD calls don't become 404s.
if (length(missing) >= length(resps)) {
cli::cli_alert_warning(
"No records retrieved (are you sure the IDs are correct?)."
)
}
}
# Test to see if we got only errors
if (length(fails) >= length(resps)) {
# Only got errors!
cli::cli_alert_warning(
"No records retrieved (are you sure the IDs are correct?)."
)
}
resps <- new_ohvbd.responses(l = resps, db = "vd")
return(resps)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.