#' Nested or Double Resampling
#'
#' `nested_cv` can be used to take the results of one resampling procedure
#' and conduct further resamples within each split. Any type of resampling
#' used in `rsample` can be used.
#'
#' @details
#' It is a bad idea to use bootstrapping as the outer resampling procedure (see
#' the example below)
#'
#' @inheritParams vfold_cv
#' @param data A data frame.
#' @param outside The initial resampling specification. This can be an already
#' created object or an expression of a new object (see the examples below).
#' If the latter is used, the `data` argument does not need to be
#' specified and, if it is given, will be ignored.
#' @param inside An expression for the type of resampling to be conducted
#' within the initial procedure.
#' @return An tibble with classe `nested_cv` and any other classes that
#' outer resampling process normally contains. The results include a
#' column for the outer data split objects, one or more `id` columns,
#' and a column of nested tibbles called `inner_resamples` with the
#' additional resamples.
#' @examples
#' ## Using expressions for the resampling procedures:
#' nested_cv(mtcars, outside = vfold_cv(v = 3), inside = bootstraps(times = 5))
#'
#' ## Using an existing object:
#' folds <- vfold_cv(mtcars)
#' nested_cv(mtcars, folds, inside = bootstraps(times = 5))
#'
#' ## The dangers of outer bootstraps:
#' set.seed(2222)
#' bad_idea <- nested_cv(mtcars,
#' outside = bootstraps(times = 5),
#' inside = vfold_cv(v = 3))
#'
#' first_outer_split <- bad_idea$splits[[1]]
#' outer_analysis <- as.data.frame(first_outer_split)
#' sum(grepl("Volvo 142E", rownames(outer_analysis)))
#'
#' ## For the 3-fold CV used inside of each bootstrap, how are the replicated
#' ## `Volvo 142E` data partitioned?
#' first_inner_split <- bad_idea$inner_resamples[[1]]$splits[[1]]
#' inner_analysis <- as.data.frame(first_inner_split)
#' inner_assess <- as.data.frame(first_inner_split, data = "assessment")
#'
#' sum(grepl("Volvo 142E", rownames(inner_analysis)))
#' sum(grepl("Volvo 142E", rownames(inner_assess)))
#' @importFrom rlang is_lang
#' @importFrom purrr map
#' @importFrom dplyr bind_cols
#' @importFrom methods formalArgs
#' @export
nested_cv <- function(data, outside, inside) {
nest_args <- formalArgs(nested_cv)
cl <- match.call()
boot_msg <-
paste0(
"Using bootstrapping as the outer resample is dangerous ",
"since the inner resample might have the same data ",
"point in both the analysis and assessment set."
)
outer_cl <- cl[["outside"]]
if (is_lang(outer_cl)) {
if (grepl("^bootstraps", deparse(outer_cl)))
warning(boot_msg, call. = FALSE)
outer_cl$data <- quote(data)
outside <- eval(outer_cl)
} else {
if (inherits(outside, "bootstraps"))
warning(boot_msg, call. = FALSE)
}
inner_cl <- cl[["inside"]]
if (!is_lang(inner_cl))
stop(
"`inside` should be a expression such as `vfold()` or ",
"bootstraps(times = 10)` instead of a existing object.",
call. = FALSE
)
inside <- map(outside$splits, inside_resample, cl = inner_cl)
inside <- tibble(inner_resamples = inside)
out <- dplyr::bind_cols(outside, inside)
out <- add_class(out, cls = "nested_cv", at_end = FALSE)
attr(out, "outside") <- cl$outside
attr(out, "inside") <- cl$inside
out
}
inside_resample <- function(src, cl) {
cl$data <- quote(as.data.frame(src))
eval(cl)
}
#' @importFrom tibble tibble
#' @importFrom rlang is_lang
#' @export
print.nested_cv <- function(x, ...) {
char_x <- paste("#", pretty(x))
cat(char_x, sep = "\n")
class(x) <- class(tibble())
print(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.