R/fetch_ad.R

Defines functions fetch_ad

Documented in fetch_ad

#' @title Fetch AREAdata dataset
#' @description Retrieve AREAdata dataset/s specified by metric and spatial scale (GID).
#' @author Francis Windram
#'
#' @param metric the metric to retrieve from areadata.
#' @param gid the spatial scale to retrieve (0 = country-level, 1=province-level ...).
#' @param use_cache load files from cache if possible, and save them if not present.
#' @param cache_location path to cache location (defaults to a temporary user directory, or one set by [set_default_ohvbd_cache()]).
#' @param refresh_cache force a refresh of the relevant cached data (and enables use_cache).
#' @param timeout timeout for data download from figshare/github in seconds.
#' @param basereq the url of the AREAdata database (usually generated by [ad_basereq()]). If `NA`, uses the default.
#'
#' @return A `ohvbd.ad.matrix` of the requested data (with added attributes for gid and metric).
#'
#' @section Valid metrics:
#' The following metrics are valid (alternative names are listed in brackets):
#' - `temp` (*temperature*)
#' - `spechumid` (*specific humidity*)
#' - `relhumid` (*relative humidity*)
#' - `uv` (*ultraviolet*)
#' - `precip` (*precipitation, rainfall*)
#' - `popdens` (*population density, population*)
#' - `forecast` (*future climate, future*)
#'
#' @examplesIf interactive()
#' fetch_ad(metric="temp", gid=0)
#'
#' @concept areadata
#'
#' @export
#'

fetch_ad <- function(
  metric = "temp",
  gid = 0,
  use_cache = TRUE,
  cache_location = NULL,
  refresh_cache = FALSE,
  timeout = 240,
  basereq = ad_basereq()
) {

  if (refresh_cache) {
    use_cache <- TRUE
  }

  if (gid > 1 && !use_cache) {
    cli::cli_alert_warning("GID2 datasets are quite large.")
    cli::cli_alert_info(
      "It is recommended to set {.arg use_cache=TRUE} to enable caching."
    )
  }

  cache_location <- cache_location %||% get_default_ohvbd_cache("adcache")

  loaded_cache <- FALSE
  final_url <- paste0(basereq, "output/")
  poss_metrics <- c(
    "temp" = 1,
    "temperature" = 1,
    "spechumid" = 2,
    "specific humidity" = 2,
    "relhumid" = 3,
    "relative humidity" = 3,
    "uv" = 4,
    "ultraviolet" = 4,
    "precip" = 5,
    "precipitation" = 5,
    "rainfall" = 5,
    "popdens" = 6,
    "population density" = 6,
    "population" = 6,
    "forecast" = 7,
    "future" = 7,
    "future climate" = 7
  )
  final_metrics <- c(
    "temp",
    "spechumid",
    "relhumid",
    "uv",
    "precip",
    "popdens",
    "forecast"
  )

  matched_metric_list <- .match_term(metric, poss_metrics, final_metrics, default_term = "temp", term_name = "metric", named_options = TRUE)
  final_metric <- matched_metric_list$term
  metricid <- matched_metric_list$id

  outmat <- NA
  # Try to load cache
  if (use_cache && !refresh_cache) {
    outmat <- tryCatch(
      {
        cli::cli_progress_message(
          "{cli::symbol$pointer} Loading AREAdata cache: {final_metric}-{gid} ..."
        )
        suppressWarnings(read_ad_cache(final_metric, gid, cache_location))
      },
      error = function(e) {
        cli::cli_alert_danger("Failed to load AREAdata cache: {final_metric}-{gid}!")
        NA
      }
    )
  }

  if (any(!is.na(outmat))) {
    loaded_cache <- TRUE
    cli::cli_alert_success("Loaded AREAdata cache {final_metric}-{gid}.")
  }

  if (!loaded_cache) {
    loadloc <- c("github", "github", "figshare") # nolint: object_usage_linter
    cli::cli_progress_message(
      "{cli::symbol$pointer} Loading AREAdata {final_metric}-{gid} from {loadloc[gid + 1]}..."
    )
    gid_str <- c("countries", "GID1", "GID2")[gid + 1]

    if (gid < 2) {
      if (metricid <= 5) {
        # Daily Climate
        # fmt: skip
        final_url <- paste0(final_url, final_metric, "-dailymean-", gid_str, "-cleaned.RDS")
      } else if (metricid == 6) {
        # Population Density
        # fmt: skip
        final_url <- paste0(final_url, "population-density-", gid_str, ".RDS")
      } else {
        # Future climate Scenario Forecasts
        # fmt: skip
        final_url <- paste0(final_url, "annual-mean-temperature-forecast-", gid_str, ".RDS")
      }
    } else {
      if (metricid <= 5) {
        # Retrieve AD article from figshare
        # fmt: skip
        figshare_resp <- httr2::request("https://api.figshare.com/v2/articles/") |>
          httr2::req_user_agent("ROHVBD") |>
          httr2::req_url_path_append(16587311) |>
          httr2::req_perform()
        # browser()
        figshare_data <- figshare_resp |> httr2::resp_body_json()
        figshare_df <- data.table::rbindlist(figshare_data$files)
        # Filter only rds files
        figshare_df <- figshare_df[which(stringr::str_detect(figshare_df$name, stringr::fixed(".RDS"))), ]
        figshare_df <- figshare_df[which(stringr::str_detect(figshare_df$name, "GID|countries")), ]
        figshare_df <- figshare_df |>
          tidyr::separate_wider_delim(
            "name",
            delim = "-",
            names = c("metric", "agg", "gid", "cleaned")) |>
          tidyr::separate_wider_delim(
            "cleaned",
            delim = ".",
            names = c("cleaned", "fileext")) |>
          dplyr::select(-one_of(c("agg", "cleaned"))) |> # Drop unnecessary columns
          dplyr::filter(gid == "GID2") # Get only GID2

        # Could throw an error if not found. Might have to handle that later if necessary
        final_url <- figshare_df$download_url[which(
          figshare_df$metric == final_metric
        )][1]
      } else if (metricid == 6) {
        cli::cli_alert_warning(
          "{.val {final_metric}} not available at GID level 2. Defaulting to GID level 1..."
        )
        final_url <- paste0(final_url, "population-density-GID1.RDS")
      } else {
        cli::cli_alert_warning(
          "{.val {final_metric}} not available at GID level 2. Defaulting to GID level 1..."
        )
        final_url <- paste0(
          # Temperature _forecast_ is not the same as temp-dailymean-GID1-cleaned.RDS
          final_url,
          "annual-mean-temperature-forecast-GID1.RDS"
        )
      }
    }

    if (is.na(final_url)) {
      cli::cli_abort("Final AD download url is blank!", .internal = TRUE)
    }

    # Handle download timeout
    timeout_bak <- getOption("timeout")
    outmat <- tryCatch(
      {
        options(timeout = timeout)
        suppressWarnings(outmat <- readRDS(url(final_url)))
        cli::cli_alert_success(
          "Loaded AREAdata {final_metric}-{gid} from {loadloc[gid + 1]}."
        )
        outmat
      },
      error = function(e) {
        NULL
      },
      finally = {
        # Make sure timeout gets reset no matter what happens
        options(timeout = timeout_bak)
      }
    )
    if (is.null(outmat)) {
      cli::cli_progress_done()
      cli::cli_abort(c(
        "x" = "Failed to load AREAdata {final_metric}-{gid} from {loadloc[gid + 1]}.",
        "!" = "Try increasing the {.arg timeout} parameter."
      ))
    }

    # Add attributes to matrix to allow easier parsing later down the line
    outmat <- new_ohvbd.ad.matrix(
      m = outmat,
      metric = final_metric,
      gid = gid,
      cached = FALSE,
      db = "ad",
      writetime = lubridate::now()
    )
  }

  if (use_cache) {
    if (!loaded_cache || refresh_cache) {
      cli::cli_progress_message(
        "{cli::symbol$pointer} Caching AREAdata {final_metric}-{gid} in {.path {cache_location}}..."
      )
      write_ad_cache(
        outmat,
        metric = final_metric,
        gid = gid,
        path = cache_location,
        format = "rda"
      )
      cli::cli_alert_success(
        "Cached AREAdata {final_metric}-{gid} in {.path {cache_location}}."
      )
    }
  }

  cli::cli_progress_done()

  return(outmat)
}

Try the ohvbd package in your browser

Any scripts or data that you put into this service are public.

ohvbd documentation built on March 10, 2026, 1:07 a.m.