#' Make pools for block bootstrapping
#'
#' For doing a block bootstrap using \link{bootstrap_verify}, the blocks can be
#' passed as a data frame with a "pool" column telling \link{bootstrap_verify}
#' how to pool the data into blocks. \code{make_bootstrap_pools} is a function
#' to make such a data frame.
#'
#' Typically block bootstrapping would be used if there are serial
#' auto-correlations in the data. If for example auto-correlations are suspected
#' between forecasts, pools could be defined from the \code{fcdate} column to
#' create blocks of data where those auto-correlations are maintained.
#'
#' Pools may be set to overlap, whereby a new pool is created beginning at each
#' new value in \code{pool_col}. The length of a pool should be defined in the
#' units used in \code{pool_col} - if \code{pool_col} is a date-time column,
#' then \code{pool_length} is assumed to be in hours, though the units can be
#' set by adding a qualifier letter: "s" = seconds, "m" = minutes, "h" = hours,
#' "d" = days.
#'
#' @param .fcst A \code{harp_fcst} object
#' @param pool_col The column used to define the pools. Must be unquoted.
#' @param pool_length The length of a pool. Numeric or a character with a unit
#' qualifier if \code{pool_col} is in date-time format. The unit qualifier can
#' be : "s" = seconds, "m" = minutes, "h" = hours, "d" = days.
#' @param overlap Logical. Whether the pools should overlap.
#'
#' @return A data frame with columns from \code{pool_col} and "pool".
#' @export
#'
#' @examples
make_bootstrap_pools <- function(
.fcst, pool_col, pool_length, overlap = FALSE
) {
pool_col_quo <- rlang::enquo(pool_col)
pools_df <- dplyr::distinct(
dplyr::arrange(
purrr::map_dfr(
.fcst,
~dplyr::select(.x, !!pool_col_quo)
),
!!pool_col_quo
)
)
pool_col_is_date <- FALSE
if (inherits(dplyr::pull(pools_df, !! pool_col_quo), "POSIXct")) {
pool_col_is_date <- TRUE
pools_df <- dplyr::mutate(
pools_df, dplyr::across(dplyr::everything(), as.numeric)
)
if (is.numeric(pool_length)) {
pool_length <- paste0(pool_length, "h")
}
pool_length <- as.numeric(
gsub("[[:alpha:]]|[[:punct:]]", "", pool_length)
) *
harpIO:::units_multiplier(pool_length)
}
if (overlap) {
last_pool_start <- min(
which(
dplyr::pull(pools_df, !!pool_col_quo) >
max(dplyr::pull(pools_df, !!pool_col_quo)) - pool_length
)
)
pools_df <- purrr::map_dfr(
seq(1, last_pool_start),
~dplyr::mutate(
overlapping_pool(pools_df, .x, !!pool_col_quo, pool_length),
pool = .x
)
)
pools_df <- dplyr::distinct(pools_df)
} else {
breaks = get_breaks(dplyr::pull(pools_df, !!pool_col_quo), pool_length)
pools_df <- dplyr::mutate(
pools_df,
pool = as.numeric(
cut(
!!pool_col_quo, breaks = breaks,
include.lowest = TRUE, right = FALSE
)
)
)
}
if (pool_col_is_date) {
pools_df <- dplyr::mutate(
pools_df, {{pool_col}} := harpIO::unix2datetime(!!pool_col_quo)
)
}
pools_df
}
get_breaks <- function(x, break_length) {
x_range <- range(x)
breaks <- seq(min(x_range), max(x_range), by = break_length)
if (max(breaks) < max(x_range)) {
breaks[(length(breaks) + 1)] <- max(breaks) + break_length
}
breaks
}
overlapping_pool <- function(x, row_start, col, res) {
x <- x[row_start:nrow(x), ]
col <- rlang::enquo(col)
max_val <- dplyr::pull(x, !!col)[1] + res
dplyr::filter(x, !!col < max_val)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.