R/bootstrap.R
In climatekit: Unified Climate Indices for Temperature, Precipitation, and Drought

# Zhang et al. (2005) in-base bootstrap for percentile-day indices.
#
# When a percentile-based index (TX10p / TN10p / TX90p / TN90p) is
# computed for a year that lies inside the reference period, the year
# being analysed contributes to its own threshold and biases the
# result toward the central tendency. The Zhang (2005) bootstrap
# corrects this by leaving the analysis year out and substituting one
# of the other (N - 1) reference years in turn, then averaging the
# resulting exceedance proportions.
#
# Reference: Zhang, X., Hegerl, G. C., Zwiers, F. W., & Kenyon, J.
# (2005). Avoiding inhomogeneity in percentile-based indices of
# temperature extremes. *Journal of Climate*, 18(11), 1641-1651.
# \doi{10.1175/JCLI3366.1}.

#' Bootstrap-corrected per-day exceedance proportions
#'
#' Returns a numeric vector aligned with `values` and `dates`.
#' Out-of-base years yield 0/1 booleans (NA where the value or
#' threshold is missing). In-base years yield averages in [0, 1]
#' from the leave-one-out resampling. Used by [ck_tx10p()],
#' [ck_tn10p()], [ck_tx90p()], and [ck_tn90p()] when
#' `bootstrap = TRUE`.
#' @noRd
.bootstrap_pct_exceedance <- function(values, dates, percentile, op,
                                      ref_start, ref_end, window = 5L) {
  doy <- as.integer(format(dates, "%j"))
  years <- as.integer(format(dates, "%Y"))

  in_ref <- years >= ref_start & years <= ref_end
  if (!any(in_ref)) {
    cli::cli_abort(
      "No data in reference period {ref_start}-{ref_end}; supply data covering the period or pass {.arg ref_start} / {.arg ref_end}."
    )
  }

  # Standard full-period thresholds for out-of-base years.
  std_thresh <- .calendar_day_percentile(values, dates, percentile,
                                         ref_start, ref_end, window = window)

  out <- rep(NA_real_, length(values))

  # --- Out-of-base years: standard binary exceedance ---
  oob_idx <- which(!in_ref)
  if (length(oob_idx) > 0L) {
    oob_thr <- std_thresh[doy[oob_idx]]
    oob_vals <- values[oob_idx]
    valid <- !is.na(oob_vals) & !is.na(oob_thr)
    if (identical(op, ">")) {
      out[oob_idx] <- ifelse(valid, as.numeric(oob_vals > oob_thr), NA_real_)
    } else {
      out[oob_idx] <- ifelse(valid, as.numeric(oob_vals < oob_thr), NA_real_)
    }
  }

  # --- In-base years: leave-one-out bootstrap ---
  #
  # The naive implementation rebuilds the calendar-day percentile pool
  # from the full `values` vector for every (y, w) pair, costing
  # O(N^2 * 366 * N_window) operations for an N-year reference. The
  # optimised implementation precomputes per-year-per-DOY contribution
  # vectors once, then for each (y, w) simply concatenates the slices
  # for "all years except y, plus year w again" and runs `quantile` on
  # the result. That cuts the inner cost to O(366) per (y, w) and the
  # total to O(N^2 * 366), roughly a 30x speedup for a 30-year base.
  ref_years <- seq(ref_start, ref_end)
  n_ref <- length(ref_years)
  half  <- (window - 1L) %/% 2L

  # Per-year-per-DOY contribution: year_doy_pool[[i]][[d]] = numeric
  # vector of values from ref-year `ref_years[i]` whose day-of-year
  # falls in the (window)-day window centred on calendar day d.
  year_doy_pool <- vector("list", n_ref)
  for (i in seq_along(ref_years)) {
    yr <- ref_years[i]
    yr_idx_full <- which(years == yr)
    yr_doy_full <- doy[yr_idx_full]
    yr_vals_full <- values[yr_idx_full]
    pool_i <- vector("list", 366L)
    for (d in seq_len(366L)) {
      target_doys <- ((d - half - 1L):(d + half - 1L)) %% 366L + 1L
      slice <- yr_vals_full[yr_doy_full %in% target_doys]
      slice <- slice[!is.na(slice)]
      pool_i[[d]] <- slice
    }
    year_doy_pool[[i]] <- pool_i
  }

  for (i_y in seq_along(ref_years)) {
    yr <- ref_years[i_y]
    yr_idx <- which(years == yr)
    if (length(yr_idx) == 0L) next

    yr_doy <- doy[yr_idx]
    yr_vals <- values[yr_idx]

    other_idx <- setdiff(seq_along(ref_years), i_y)

    # Single-year reference: bootstrap is undefined; fall back to standard.
    if (length(other_idx) == 0L) {
      yr_thr <- std_thresh[yr_doy]
      valid <- !is.na(yr_vals) & !is.na(yr_thr)
      if (identical(op, ">")) {
        out[yr_idx] <- ifelse(valid, as.numeric(yr_vals > yr_thr), NA_real_)
      } else {
        out[yr_idx] <- ifelse(valid, as.numeric(yr_vals < yr_thr), NA_real_)
      }
      next
    }

    # Pool with year `i_y` removed, per DOY (one-time per y).
    minus_y_pool <- vector("list", 366L)
    for (d in seq_len(366L)) {
      minus_y_pool[[d]] <- unlist(
        lapply(other_idx, function(j) year_doy_pool[[j]][[d]]),
        use.names = FALSE
      )
    }

    bs_sum   <- rep(0, length(yr_idx))
    bs_count <- rep(0L, length(yr_idx))

    for (i_w in other_idx) {
      # Synthetic ref pool: everything except year y, with year w's
      # values appended (representing the substitution for year y).
      mod_thresh <- vapply(seq_len(366L), function(d) {
        pool <- c(minus_y_pool[[d]], year_doy_pool[[i_w]][[d]])
        if (length(pool) == 0L) return(NA_real_)
        stats::quantile(pool, percentile, na.rm = TRUE,
                        names = FALSE, type = 8L)
      }, numeric(1))

      yr_thr <- mod_thresh[yr_doy]
      valid <- !is.na(yr_vals) & !is.na(yr_thr)
      if (identical(op, ">")) {
        ex <- ifelse(valid, as.numeric(yr_vals > yr_thr), NA_real_)
      } else {
        ex <- ifelse(valid, as.numeric(yr_vals < yr_thr), NA_real_)
      }
      bs_sum[valid]   <- bs_sum[valid]   + ex[valid]
      bs_count[valid] <- bs_count[valid] + 1L
    }

    out[yr_idx] <- ifelse(bs_count > 0L, bs_sum / bs_count, NA_real_)
  }

  out
}

#' Per-day exceedance proportions for percentile indices
#'
#' Single entry point for the percentile-day functions. When
#' `bootstrap = FALSE`, returns standard 0/1 booleans (NA where the
#' value or threshold is missing). When `bootstrap = TRUE`, applies
#' the Zhang (2005) leave-one-out bootstrap for in-base years.
#' @noRd
.pct_exceedance_per_day <- function(values, dates, percentile, op,
                                    ref_start, ref_end,
                                    bootstrap = FALSE, window = 5L) {
  if (isTRUE(bootstrap)) {
    return(.bootstrap_pct_exceedance(values, dates, percentile, op,
                                     ref_start, ref_end, window))
  }

  thresholds <- .calendar_day_percentile(values, dates, percentile,
                                         ref_start, ref_end, window = window)
  doy <- as.integer(format(dates, "%j"))
  thr <- thresholds[doy]
  valid <- !is.na(values) & !is.na(thr)

  if (identical(op, ">")) {
    ifelse(valid, as.numeric(values > thr), NA_real_)
  } else if (identical(op, "<")) {
    ifelse(valid, as.numeric(values < thr), NA_real_)
  } else {
    cli::cli_abort("Internal: unsupported {.arg op}.")
  }
}

#' Aggregate per-day exceedance proportions to per-period percentages
#'
#' Helper used by the percentile-day functions to convert a daily
#' soft exceedance vector (in [0, 1] or NA) into a per-period
#' percentage. Periods with no valid days return NA.
#' @noRd
.aggregate_pct_exceedance <- function(soft_ex, dates, period) {
  periods <- get_periods(dates, period)
  unique_periods <- unique(periods)
  values <- vapply(unique_periods, function(p) {
    e <- soft_ex[periods == p]
    e <- e[!is.na(e)]
    if (length(e) == 0L) return(NA_real_)
    100 * mean(e)
  }, numeric(1))
  list(periods = unique_periods, values = values)
}

Any scripts or data that you put into this service are public.

climatekit documentation built on May 9, 2026, 5:08 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

climatekit
Unified Climate Indices for Temperature, Precipitation, and Drought

R/bootstrap.R
In climatekit: Unified Climate Indices for Temperature, Precipitation, and Drought

Defines functions .aggregate_pct_exceedance .pct_exceedance_per_day .bootstrap_pct_exceedance

Try the climatekit package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

climatekit Unified Climate Indices for Temperature, Precipitation, and Drought

R/bootstrap.R In climatekit: Unified Climate Indices for Temperature, Precipitation, and Drought

Defines functions .aggregate_pct_exceedance .pct_exceedance_per_day .bootstrap_pct_exceedance

Try the climatekit package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

climatekit
Unified Climate Indices for Temperature, Precipitation, and Drought

R/bootstrap.R
In climatekit: Unified Climate Indices for Temperature, Precipitation, and Drought