R/fetch_vd_meta.R

Defines functions fetch_vd_counts fetch_vd_meta

Documented in fetch_vd_counts fetch_vd_meta

#' @title Fetch VecDyn metadata table
#' @description Fetch VecDyn metadata table (downloading if necessary) and cache if fresh.
#' @author Francis Windram
#'
#' @param ids a numeric ID or numeric vector of ids (preferably in an `ohvbd.ids` object) indicating the particular dataset/s to download.
#' @param cache_location path to cache location (defaults to a temporary user directory, or one set by [set_default_ohvbd_cache()]).
#' @param refresh_cache force a refresh of the relevant cached data.
#' @param noprogress disable non-essential messaging (progress bars etc.).
#' @param basereq an [httr2 request][httr2::request()] object, as generated by [vb_basereq()]. If `NA`, uses the default request.
#'
#' @return A dataframe describing the current VecDyn metadata.
#'
#' @examplesIf interactive()
#' fetch_vd_meta_table()
#'
#' @concept internal
fetch_vd_meta <- function(ids = NULL, cache_location = NULL, refresh_cache = FALSE, noprogress = FALSE, basereq = vb_basereq()) {
  cache_loaded <- FALSE

  if (!is.null(ids)) {
    check_provenance(ids, "vd", altfunc = "fetch", altfunc_suffix = "meta")
  }

  cache_location <- cache_location %||% get_default_ohvbd_cache("vecdyn")
  file_location <- file.path(cache_location, "vd_meta_table.rds")

  # Check cache
  if (file.exists(file_location) && !refresh_cache) {
    meta_table <- readRDS(file_location)
    cached_writetime <- attr(meta_table, "writetime", exact = TRUE)
    # If timestamp is present and data is not stale then load and call it a day
    if (!is.null(cached_writetime) && lubridate::now() - cached_writetime < days(1)) {
      cache_loaded <- TRUE
      if (!noprogress) {
        cli::cli_alert_success("Loaded vd metadata from cache.")
      }
    } else {
      cli::cli_alert_warning("Cached metadata table is stale and must be re-downloaded. Download timestamp: {cached_writetime}.")
    }
  }

  # If we didn't load (or decided not to use) cached data, download it fresh!
  if (!cache_loaded) {
    progress_format <- list(format = "{cli::pb_spin} Downloading VecDyn metadata table...")
    if (noprogress) {
      progress_format <- FALSE
    }

    # Download metadata table from vecdyn
    res <- basereq |>
      httr2::req_url_path_append("vecdynbyprovider") |>
      httr2::req_throttle(5) |>
      httr2::req_perform_iterative(
        httr2::iterate_with_offset("page", resp_complete = \(resp) {
          # Check to make sure there's not another page to find.
          httr2::resp_body_json(resp)$data["next"] == "NULL"
        }),
        progress = progress_format)

    # Extract and parse multi-element lists
    meta_table <- res |>
      resps_successes() |>
      resps_data(\(resp) resp_body_json(resp)$data$results)

    meta_table <- sapply(meta_table, \(x) {
      x["SpeciesName"] <- paste(unlist(x["SpeciesName"]), collapse = ", ")
      x["Years"] <- paste(unlist(x["Years"]), collapse = ", ")
      x["Tags"] <- paste(unlist(x["Tags"]), collapse = ", ")
      x["CollectionMethods"] <- paste(unlist(x["CollectionMethods"]), collapse = ", ")
      x["AverageGPSCoords"] <- paste(unlist(x["AverageGPSCoords"]), collapse = ", ")
      x
    })

    meta_table <- as.data.frame(t(meta_table))
    # Convert all columns to vectors
    numeric_cols <- c("Id", "Collections", "row_count")
    for (cname in colnames(meta_table)) {
      if (cname %in% numeric_cols) {
        meta_table[,cname] <- suppressWarnings(as.numeric(as.character(meta_table[,cname])))
      } else {
        meta_table[,cname] <- as.character(meta_table[,cname])
        # Just make sure "null" and "" are represented as NA
        meta_table[which(tolower(meta_table[,cname]) == "null") ,cname] <- NA
        meta_table[which(tolower(meta_table[,cname]) == "") ,cname] <- NA
      }
    }
    attr(meta_table, "writetime") <- lubridate::now()
  }

  # Save to cache if we performed a download
  if (!cache_loaded) {
    saveRDS(meta_table, file_location)
    if (!noprogress) {
      cli::cli_alert_success("Saved to cache.")
    }
  }

  if (!is.null(ids)) {
    meta_table <- subset(meta_table, meta_table$Id %in% ids)
  }

  return(meta_table)
}

#' @title Fetch VecDyn dataset length by ID
#' @description Retrieve length of VecDyn dataset/s specified by their dataset ID.
#' @author Francis Windram
#'
#' @param ids a numeric ID or numeric vector of ids (preferably in an `ohvbd.ids` object) indicating the particular dataset/s to download.
#' @param page_size the page size returned by VecDyn (default is 50).
#' @param cache_location path to cache location (defaults to a temporary user directory, or one set by [set_default_ohvbd_cache()]).
#' @param refresh_cache force a refresh of the relevant cached data.
#' @param noprogress disable non-essential messaging (progress bars etc.).
#' @param basereq an [httr2 request][httr2::request()] object, as generated by [vb_basereq()]. If `NA`, uses the default request.
#'
#' @return A dataframe describing the number of rows and number of pages for the set of ids.
#'
#' @examplesIf interactive()
#' fetch_vd_counts(54)
#'
#' fetch_vd_counts(c(423,424,425))
#'
#' @concept vecdyn
#'
#' @export

fetch_vd_counts <- function(
    ids,
    page_size = 50,
    cache_location = NULL,
    refresh_cache = FALSE,
    noprogress = FALSE,
    basereq = vb_basereq()
) {
  meta_table <- fetch_vd_meta(
    ids,
    cache_location,
    refresh_cache,
    noprogress,
    basereq
  )
  # Find counts and calculate required pages
  count_df <- data.frame(id = meta_table$Id, num = meta_table$row_count)
  count_df$pages <- ceiling(count_df$num / page_size)
  return(count_df)
}

Try the ohvbd package in your browser

Any scripts or data that you put into this service are public.

ohvbd documentation built on March 10, 2026, 1:07 a.m.