R/ids_get.R

Defines functions filter_post_actual_na process_time_range validate_year validate_progress validate_string validate_character_vector process_character_vector process_debt_statistics create_resource_url create_progress_message get_debt_statistics ids_get

Documented in ids_get

#' Fetch Data from the World Bank International Debt Statistics (IDS) API
#'
#' Retrieves standardized debt statistics from the World Bank's International
#' Debt Statistics (IDS) database, which provides comprehensive data on the
#' external debt of low and middle-income countries. The function handles
#' country identification, data validation, and unit standardization, making it
#' easier to conduct cross-country debt analysis and monitoring.
#'
#' @param entities A character vector of entity identifiers representing
#'   debtor countries and aggregates. Must use `entity_id` from
#'   \link{ids_list_entities}:
#'   * For individual countries, use ISO3C codes (e.g., "GHA" for Ghana)
#'   * For aggregates, use World Bank codes (e.g., "LIC" for low income
#'     countries)
#'   The IDS database covers low and middle-income countries and related
#'   aggregates only. Cannot contain NA values.
#'
#' @param series A character vector of debt statistics series identifiers that
#'   must match the `series_id` column from \link{ids_list_series}. Each
#'   series represents a specific debt statistic (e.g., "DT.DOD.DECT.CD" for
#'   total external debt stocks, "DT.TDS.DECT.CD" for debt service payments).
#'   Cannot contain NA values.
#
#' @param counterparts A character vector of creditor identifiers that must
#'   match the `counterpart_id` column from \link{ids_list_counterparts}. The
#'   default "WLD" returns aggregated global totals across all creditors.
#'   Common options:
#'   * "WLD" - World total (aggregated across all creditors)
#'   * "all" - Retrieve data broken down by all creditors
#'   * All identifiers are strings, but some are string-formatted numbers
#'     (e.g., "730" for China, "907" for IMF), while others are alphabetic
#'     codes (e.g., "BND" for bondholders)
#'   Cannot contain NA values.
#
#' @param start_year A numeric value representing the starting year (default:
#'   2000). This default is intended to reduce data volume. For historical
#'   analysis, explicitly set to 1970 (the earliest year of data available).
#'
#' @param end_year A numeric value representing the ending year (default: NULL).
#'   Must be >= 1970 and cannot be earlier than start_year. If NULL, returns
#'   data through the most recent available year. Some debt service-related
#'   series include projections of debt service. For the 2024 data release,
#'   debt service projections are available through 2031.
#'
#' @param progress A logical value indicating whether to display progress
#'   messages during data retrieval (default: FALSE).
#'
#' @param geographies `r lifecycle::badge("superseded")`
#'   Superseded in favor of `entities`. If supplied, it will be mapped to
#'   `entities` with a warning. Do **not** use together with `entities`.
#'
#' @return A tibble containing debt statistics with the following columns:
#' \describe{
#'   \item{entity_id}{The identifier for the debtor entity (e.g., "GHA"
#'     for Ghana, "LIC" for low income countries)}
#'   \item{series_id}{The identifier for the debt statistic series (e.g.,
#'     "DT.DOD.DECT.CD" for total external debt stocks)}
#'   \item{counterpart_id}{The identifier for the creditor (e.g., "WLD" for
#'     world total, "730" for China)}
#'   \item{year}{The year of the observation}
#'   \item{value}{The numeric value of the debt statistic, standardized to the
#'     units specified in the series definition (typically current US dollars)}
#' }
#'
#' @section Data Coverage and Validation:
#' The IDS database provides detailed debt statistics for low and middle-income
#' countries, including:
#' * Debt stocks and flows
#' * Debt service and interest payments
#' * Creditor composition
#' * Terms and conditions of new commitments
#'
#' To ensure valid queries:
#' * Use \link{ids_list_entities} to find valid debtor entity codes
#' * Use \link{ids_list_series} to explore available debt statistics
#' * Use \link{ids_list_counterparts} to see available creditor codes
#'
#' @examples
#' \donttest{
#' # Get total external debt stocks for a single country from 2000 onward
#' ghana_debt <- ids_get(
#'   entities = "GHA",
#'   series = "DT.DOD.DECT.CD"  # External debt stocks, total
#' )
#'
#' # Compare debt service metrics across income groups
#' income_groups <- ids_get(
#'   entities = c("LIC", "LMC", "UMC"),  # Income group aggregates
#'   series = "DT.TDS.DECT.CD",  # Total debt service
#'   start_year = 2010
#' )
#'
#' # Analyze debt composition by major creditors
#' creditor_analysis <- ids_get(
#'   entities = c("KEN", "ETH"),  # Kenya and Ethiopia
#'   series = c(
#'     "DT.DOD.DECT.CD",  # Total external debt
#'     "DT.TDS.DECT.CD"   # Total debt service
#'   ),
#'   counterparts = c(
#'     "WLD",  # World total
#'     "730",  # China
#'     "907",  # IMF
#'     "BND"   # Bondholders
#'   ),
#'   start_year = 2015
#' )
#' }
#'
#' @seealso
#' * `ids_list_entities()` for available debtor entity codes
#' * `ids_list_series()` for available debt statistics series codes
#' * `ids_list_counterparts()` for available creditor codes
#'
#' @export
ids_get <- function(
  entities,
  series,
  counterparts = "WLD",
  start_year = 2000,
  end_year = NULL,
  progress = FALSE,
  geographies = lifecycle::deprecated()
) {
  # Handle superseded argument
  #nocov start
  if (lifecycle::is_present(geographies)) {
    lifecycle::deprecate_warn(
      when = "1.1.0",
      what = "ids_get(geographies =)",
      with = "ids_get(entities =)"
    )

    if (missing(entities) || is.null(entities)) {
      entities <- geographies
    } else {
      cli::cli_abort(
        c(
          "!" = "Can't supply both {.arg entities} and {.arg geographies}.",
          "i" = "Use {.arg entities} only."
        )
      )
    }
  }
  #nocov end

  # Validate arguments
  validate_progress(progress)

  # Process character vectors and dates into semicolon-separated strings
  entities <- process_character_vector(entities, "entities")
  series <- process_character_vector(series, "series")
  counterparts <- process_character_vector(counterparts, "counterparts")
  time <- process_time_range(start_year, end_year)

  # Fetch debt statistics
  debt_statistics_raw <- get_debt_statistics(
    entities,
    series,
    counterparts,
    time,
    progress
  )

  # Process debt statistics
  debt_statistics <- process_debt_statistics(debt_statistics_raw)

  # Apply specific filtering logic for years beyond latest actual data
  debt_statistics <- filter_post_actual_na(debt_statistics)

  debt_statistics
}

#' Fetch Debt Statistics from the World Bank International Debt Statistics API
#'
#' This function is a helper function for `ids_get()`. It constructs the API
#' request URL, performs the request, and returns the raw data.
#'
#' @param entity A character string representing the entity code
#'  (e.g., "ZMB" for Zambia).
#' @param series A character string representing the series code (e.g.,
#'  "DT.DOD.DPPG.CD").
#' @param counterpart A character string representing the counterpart area
#'  (e.g., "all", "001").
#' @param time A character string representing the time range for the request
#'  (e.g., "YR2015;YR2016;YR2017").
#' @param progress A logical value or a character string. If `TRUE`, a default
#'  progress message is displayed. If a character string, it is used as the
#'  progress message. If `FALSE`, no progress message is displayed.
#'
#' @return A list containing the raw debt statistics data returned by the API.
#'
#' @noRd
#' @keywords internal
get_debt_statistics <- function(
  entity,
  series,
  counterpart,
  time,
  progress
) {
  # Create progress message if progress is TRUE
  progress_message <- create_progress_message(
    progress,
    series,
    entity,
    counterpart,
    time
  )

  # Create resource URL
  resource <- create_resource_url(entity, series, counterpart, time)

  # Perform API request
  perform_request(resource, progress = progress_message)
}

#' Create Progress Message
#'
#' Creates a progress message for API requests if progress tracking is enabled.
#'
#' @param progress A logical value indicating whether to show progress
#' @param series The series code
#' @param entity The entity code
#' @param counterpart The counterpart code
#' @param time The time period
#'
#' @return Character string with progress message or FALSE
#'
#' @noRd
#' @keywords internal
create_progress_message <- function(
  progress,
  series,
  entity,
  counterpart,
  time
) {
  if (!progress) {
    return(FALSE)
  }

  paste(
    "Fetching series",
    series,
    "for entity",
    entity,
    ", counterpart",
    counterpart,
    ", and time",
    time
  )
}

#' Create Resource URL
#'
#' Creates the resource URL path for the API request.
#'
#' @param entity The semicolon-separated entity codes
#' @param series The semicolon-separated series codes
#' @param counterpart The semicolon-separated counterpart codes
#' @param time The semicolon-separated time periods
#'
#' @return Character string containing the resource URL path
#'
#' @noRd
#' @keywords internal
create_resource_url <- function(entity, series, counterpart, time) {
  url <- paste0(
    "country/",
    entity,
    "/series/",
    series,
    "/counterpart-area/",
    counterpart,
    "/time/",
    time
  )

  validate_string(url, 4000, "resource URL")

  url
}

#' Process Raw Debt Statistics Data
#'
#' This function processes the raw debt statistics data returned by the API
#' and transforms it into a tidy tibble.
#'
#' @param series_raw A list containing the raw debt statistics data.
#'
#' @return A tibble with the following columns:
#'   \describe{
#'     \item{entity_id}{The unique identifier for the entity.}
#'     \item{series_id}{The unique identifier for the series.}
#'     \item{counterpart_id}{The unique identifier for the counterpart.}
#'     \item{year}{The year corresponding to the data.}
#'     \item{value}{The numeric value representing the statistic.}
#'   }
#'
#' @noRd
#' @keywords internal
process_debt_statistics <- function(series_raw) {
  if (length(series_raw[[1]]$variable[[1]]$concept) == 0) {
    tibble::tibble(
      "entity_id" = character(),
      "series_id" = character(),
      "counterpart_id" = character(),
      "year" = integer(),
      "value" = numeric()
    )
  } else {
    series_df <- series_raw |>
      bind_rows() |>
      unnest_wider("variable", names_sep = "_")

    tibble::tibble(
      entity_id = series_df$variable_id[
        series_df$variable_concept == "Country"
      ],
      series_id = series_df$variable_id[
        series_df$variable_concept == "Series"
      ],
      counterpart_id = series_df$variable_id[
        series_df$variable_concept == "Counterpart-Area"
      ],
      year = as.integer(series_df$variable_value[
        series_df$variable_concept == "Time"
      ]),
      value = purrr::map_dbl(
        series_raw,
        \(x) ifelse(is.null(x$value), NA_real_, x$value)
      )
    )
  }
}

#' Process Character Vector to String
#'
#' Validates and converts a character vector into a semicolon-separated string
#' for API requests.
#'
#' @param arg The character vector to process
#' @param arg_name The name of the argument (for error messages)
#'
#' @return A semicolon-separated string
#'
#' @noRd
#' @keywords internal
process_character_vector <- function(arg, arg_name) {
  validate_character_vector(arg, arg_name)
  semicolon_separated <- paste(arg, collapse = ";")
  validate_string(semicolon_separated, 1500L, arg_name)
  semicolon_separated
}

#' Validate Character Vector
#'
#' This function validates that the input argument is a character vector and
#' does not contain any NA values.
#'
#' @param arg The argument to validate.
#' @param arg_name The name of the argument (used in error messages).
#'
#' @return This function does not return a value. It throws an error if the
#'   argument is invalid.
#'
#' @noRd
#' @keywords internal
validate_character_vector <- function(arg, arg_name) {
  if (!is.character(arg) || any(is.na(arg))) {
    cli::cli_abort(paste(
      "{.arg {arg_name}} must be a character vector and cannot contain ",
      "NA values."
    ))
  }
  if (length(arg) > 60L) {
    cli::cli_abort(c(
      paste(
        "{.arg {arg_name}} must be a character vector with 60 or fewer values."
      ),
      "i" = paste(
        "For larger requests, consider using {.fn ids_bulk} to download the",
        "complete dataset. See {.fn ids_bulk_files} for available files."
      )
    ))
  }
}

#' Validate String Length
#'
#' This function validates that the input string is shorter than a specified
#' length.
#'
#' @param str The string to validate
#' @param length The maximum length allowed for the string
#' @param arg_name The name of the argument (used in error messages)
#'
#' @return This function does not return a value. It throws an error if the
#'   string is too long.
#'
#' @noRd
#' @keywords internal
validate_string <- function(str, length, arg_name) {
  if (nchar(str) >= length) {
    cli::cli_abort(c(
      paste(
        "Concatenated {.arg {arg_name}} string must be less than {length}",
        "characters."
      ),
      "i" = paste(
        "For large data requests, consider using {.fn ids_bulk} to download",
        "the complete dataset. See {.fn ids_bulk_files} for available files."
      )
    ))
  }
}

#' Validate Progress Argument
#'
#' This function validates that the progress argument is a logical value
#' (`TRUE` or `FALSE`).
#'
#' @param progress The progress argument to validate.
#'
#' @return This function does not return a value. It throws an error if the
#'   progress argument is invalid.
#'
#' @noRd
#' @keywords internal
validate_progress <- function(progress) {
  if (!is.logical(progress)) {
    cli::cli_abort(
      "{.arg progress} must be either TRUE or FALSE."
    )
  }
}

#' Validate Year Input
#'
#' Helper function to validate a year input is numeric and single value.
#' If year < 1970, issues a warning and returns 1970.
#'
#' @param year The year to validate
#' @param arg_name The argument name for error messages
#'
#' @return The validated year
#'
#' @noRd
#' @keywords internal
validate_year <- function(year, arg_name) {
  if (!is.numeric(year) || length(year) != 1) {
    cli::cli_abort(paste(
      "{.arg {arg_name}} must be a single numeric value."
    ))
  }

  if (year < times$time_year[1] && arg_name == "start_year") {
    cli::cli_warn(c(
      "!" = "Data only available from {times$time_year[1]} onward.",
      "i" = "Setting {.arg {arg_name}} to {times$time_year[1]}."
    ))
    return(times$time_year[1])
  } else if (year < times$time_year[1] && arg_name == "end_year") {
    # Raise an error if end_year is before 1970
    cli::cli_abort(c(
      "!" = "Data only available from {times$time_year[1]} onward."
    ))
  }

  year
}

#' Process Time Range
#'
#' Validates and converts start and end dates into a semicolon-separated string
#' of years for API requests.
#'
#' @param start_year The starting year (optional).
#' @param end_year The ending year (optional).
#'
#' @return A character string representing the time range for the API request.
#'
#' @noRd
#' @keywords internal
process_time_range <- function(start_year, end_year) {
  if (!is.null(end_year)) {
    end_year <- validate_year(end_year, "end_year")
  }
  if (!is.null(start_year)) {
    start_year <- validate_year(start_year, "start_year")
  }

  # Create time string
  if (!is.null(start_year) && !is.null(end_year)) {
    if (start_year > end_year) {
      cli::cli_abort(
        "{.arg start_year} cannot be greater than {.arg end_year}."
      )
    }
    paste(
      times$time_id[
        times$time_year >= start_year & times$time_year <= end_year
      ],
      collapse = ";",
      sep = ""
    )
  } else if (!is.null(start_year)) {
    paste(
      times$time_id[times$time_year >= start_year],
      collapse = ";",
      sep = ""
    )
  } else {
    "all"
  }
}

#' Filter Data for Years Beyond Latest Observed Data
#'
#' This function filters out rows for projection years that have no data.
#' It assumes the latest actual data is from the current or previous year,
#' and removes future years where all values are NA.
#'
#' @param data The data to filter.
#'
#' @return The filtered data.
#'
#' @noRd
#' @keywords internal
filter_post_actual_na <- function(data) {
  # Assume latest actual data could be up to 2 years behind current year
  # (e.g., in early 2025, latest data might still be 2023)
  current_year <- as.integer(format(Sys.Date(), "%Y"))
  cutoff_year <- current_year - 2

  # Check if all values after the cutoff year are NA
  data_after_cutoff <- data |>
    filter(.data$year > cutoff_year)

  if (nrow(data_after_cutoff) > 0 && all(is.na(data_after_cutoff$value))) {
    data <- data |>
      filter(.data$year <= cutoff_year)
  }

  data
}

Try the wbids package in your browser

Any scripts or data that you put into this service are public.

wbids documentation built on Dec. 20, 2025, 9:06 a.m.