R/req-retries.R

Defines functions backoff_default is_number_or_na retry_after retry_backoff retry_is_transient retry_max_seconds retry_max_tries req_retry

Documented in req_retry

#' Control when a request will retry, and how long it will wait between tries
#'
#' @description
#' `req_retry()` alters [req_perform()] so that it will automatically retry
#' in the case of failure. To activate it, you must specify either the total
#' number of requests to make with `max_tries` or the total amount of time
#' to spend with `max_seconds`. Then `req_perform()` will retry if the error is
#' "transient", i.e. it's an HTTP error that can be resolved by waiting. By
#' default, 429 and 503 statuses are treated as transient, but if the API you
#' are wrapping has other transient status codes (or conveys transient-ness
#' with some other property of the response), you can override the default
#' with `is_transient`.
#'
#' Additionally, if you set `retry_on_failure = TRUE`, the request will retry
#' if either the HTTP request or HTTP response doesn't complete successfully
#' leading to an error from curl, the lower-level library that httr2 uses to
#' perform HTTP request. This occurs, for example, if your wifi is down.
#'
#' It's a bad idea to immediately retry a request, so `req_perform()` will
#' wait a little before trying again:
#'
#' * If the response contains the `Retry-After` header, httr2 will wait the
#'   amount of time it specifies. If the API you are wrapping conveys this
#'   information with a different header (or other property of the response)
#'   you can override the default behaviour with `retry_after`.
#'
#' * Otherwise, httr2 will use "truncated exponential backoff with full
#'   jitter", i.e. it will wait a random amount of time between one second and
#'   `2 ^ tries` seconds, capped to at most 60 seconds. In other words, it
#'   waits `runif(1, 1, 2)` seconds after the first failure, `runif(1, 1, 4)`
#'   after the second, `runif(1, 1, 8)` after the third, and so on. If you'd
#'   prefer a different strategy, you can override the default with `backoff`.
#'
#' @inheritParams req_perform
#' @param max_tries,max_seconds Cap the maximum number of attempts with
#'   `max_tries` or the total elapsed time from the first request with
#'   `max_seconds`. If neither option is supplied (the default), [req_perform()]
#'   will not retry.
#'
#'   `max_tries` is the total number of attempts make, so this should always
#'   be greater than one.`
#' @param is_transient A predicate function that takes a single argument
#'   (the response) and returns `TRUE` or `FALSE` specifying whether or not
#'   the response represents a transient error.
#' @param retry_on_failure Treat low-level failures as if they are
#'   transient errors, and can be retried.
#' @param backoff A function that takes a single argument (the number of failed
#'   attempts so far) and returns the number of seconds to wait.
#' @param after A function that takes a single argument (the response) and
#'   returns either a number of seconds to wait or `NULL`, which indicates
#'   that a precise wait time is not available that the `backoff` strategy
#'   should be used instead..
#' @returns A modified HTTP [request].
#' @export
#' @seealso [req_throttle()] if the API has a rate-limit but doesn't expose
#'   the limits in the response.
#' @examples
#' # google APIs assume that a 500 is also a transient error
#' request("http://google.com") |>
#'   req_retry(is_transient = \(resp) resp_status(resp) %in% c(429, 500, 503))
#'
#' # use a constant 10s delay after every failure
#' request("http://example.com") |>
#'   req_retry(backoff = ~10)
#'
#' # When rate-limited, GitHub's API returns a 403 with
#' # `X-RateLimit-Remaining: 0` and an Unix time stored in the
#' # `X-RateLimit-Reset` header. This takes a bit more work to handle:
#' github_is_transient <- function(resp) {
#'   resp_status(resp) == 403 &&
#'     identical(resp_header(resp, "X-RateLimit-Remaining"), "0")
#' }
#' github_after <- function(resp) {
#'   time <- as.numeric(resp_header(resp, "X-RateLimit-Reset"))
#'   time - unclass(Sys.time())
#' }
#' request("http://api.github.com") |>
#'   req_retry(
#'     is_transient = github_is_transient,
#'     after = github_after
#'   )
req_retry <- function(req,
                      max_tries = NULL,
                      max_seconds = NULL,
                      retry_on_failure = FALSE,
                      is_transient = NULL,
                      backoff = NULL,
                      after = NULL) {
  check_request(req)
  check_number_whole(max_tries, min = 2, allow_null = TRUE)
  check_number_whole(max_seconds, min = 0, allow_null = TRUE)
  check_bool(retry_on_failure)

  req_policies(req,
    retry_max_tries = max_tries,
    retry_max_wait = max_seconds,
    retry_on_failure = retry_on_failure,
    retry_is_transient = as_callback(is_transient, 1, "is_transient"),
    retry_backoff = as_callback(backoff, 1, "backoff"),
    retry_after = as_callback(after, 1, "after")
  )
}

retry_max_tries <- function(req) {
  has_max_wait <- !is.null(req$policies$retry_max_wait)
  req$policies$retry_max_tries %||% if (has_max_wait) Inf else 1
}

retry_max_seconds <- function(req) {
  req$policies$retry_max_wait %||% Inf
}

retry_is_transient <- function(req, resp) {
  if (is_error(resp)) {
    return(req$policies$retry_on_failure %||% FALSE)
  }

  req_policy_call(req, "retry_is_transient", list(resp),
    default = function(resp) resp_status(resp) %in% c(429, 503)
  )
}

retry_backoff <- function(req, i) {
  req_policy_call(req, "retry_backoff", list(i), default = backoff_default)
}

retry_after <- function(req, resp, i, error_call = caller_env()) {
  if (is_error(resp)) {
    return(retry_backoff(req, i))
  }

  after <- req_policy_call(req, "retry_after", list(resp), default = resp_retry_after)

  # TODO: apply this idea to all callbacks
  if (!is_number_or_na(after)) {
    not <- obj_type_friendly(after)
    cli::cli_abort(
      "The {.code after} callback to {.fn req_retry} must return a single number or NA, not {not}.",
      call = error_call
    )
  }

  if (is.na(after)) {
    retry_backoff(req, i)
  } else {
    after
  }
}

is_number_or_na <- function(x) {
  (is.numeric(x) && length(x) == 1) || identical(x, NA)
}

# Helpers -----------------------------------------------------------------

# Exponential backoff with full-jitter, capped to 60s wait
# https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
backoff_default <- function(i) {
  round(min(stats::runif(1, min = 1, max = 2^i), 60), 1)
}
r-lib/httr2 documentation built on Nov. 4, 2024, 11:32 p.m.