R/ols-boot-residual.R

Defines functions comp_boot_res

Documented in comp_boot_res

#' A wrapper for the residual bootstrap of a fitted OLS regression model
#'
#' \code{comp_boot_res} is a wrapper for the empirical bootstrap of
#' a fitted \code{\link[stats]{lm}} model.
#'
#' @details The residual bootstrap consists of fitting the chosen statistical
#'   model (\code{mod_fit}) \code{B} times. Each of the \code{B} datasets
#'   consists of the original independent variables and of dependent variable
#'   given by the sum of the estimates of the original model and a
#'   bootstrap versions of the residuals.
#'
#' @param mod_fit An object of class \code{\link[stats]{lm}} to fit on the data.
#'   This object should contain the formula and the data.
#' @param B Bootstrap repetitions or number of bootstrap samples to be drawn.
#'
#' @return A list containing the following elements.
#'   \code{var_type}: The type of estimator for the variance of the coefficients
#'   estimates. An abbreviated string representing the
#'   type of the estimator of the variance  (\code{var_type_abb}).
#'   \code{var_summary}: A tibble containing the summary statistics for the model:
#'   terms (\code{term}), standard errors (\code{std.error}),
#'   statistics (\code{statistic}), p-values (\code{p.values}). The format
#'   of the tibble is exactly identical to the one generated by
#'   \code{\link[broom]{tidy}}, but the standard errors and p-values are computed
#'   via the bootstrap.
#'   \code{var_assumptions}: The assumptions under which the estimator of the
#'   variance is consistent.
#'   \code{cov_mat}: The covariance matrix of the coefficients estimates.
#'   \code{boot_out}: A tibble of the model's coefficients estimated (\code{term} and
#'   \code{estimate}) on the bootstrapped datasets,
#'   the size of the original dataset (\code{n}), and the number of the
#'   bootstrap repetition (\code{b}). In case of empirical bootstrap, it will
#'   also contain the size of each bootstrapped dataset (\code{m}).
#'
#' @keywords internal
#'
#' @importFrom rlang .data
#'
#' @examples
#' \dontrun{
#' # Obtain estimates of the coefficients on bootstrapped versions of the dataset
#' set.seed(35542)
#' n <- 1e3
#' X <- stats::rnorm(n, 0, 1)
#' y <- 2 + X * 1 + stats::rnorm(n, 0, 1)
#' lm_fit <- stats::lm(y ~ X)
#' out <- comp_boot_res(lm_fit, B = 100)
#'
#' print(out)
#' }
comp_boot_res <- function(mod_fit, B = 100) {
  assertthat::assert_that(all("lm" == class(mod_fit)),
    msg = glue::glue("mod_fit must only be of class lm")
  )
  check_fn_args(B = B)

  mod_res <- mod_fit$res
  mod_pred <- mod_fit$fitted.values
  n <- length(mod_res)
  data <- stats::model.frame(mod_fit)
  response_name <- as.character(stats::formula(mod_fit)[2])

  boot_out <- 1:B %>% purrr::map(.x = ., .f = ~ fit_reg(
    mod_fit = mod_fit,
    data = data %>%
      dplyr::mutate({{ response_name }} := mod_pred + sample(mod_res, n, replace = TRUE))
  ))

  # compute covariance matrix
  cov_mat <- boot_out %>%
    purrr::map(.x = ., .f = ~ .x %>% dplyr::pull(estimate)) %>%
    dplyr::bind_rows(data = ., .id = NULL) %>%
    stats::cov(x = .)

  boot_out <- boot_out %>%
    dplyr::bind_rows(.id = 'b')  %>%
    # consolidate tibble
    tidyr::nest(.data = .,
                data = c(.data$term, .data$estimate)) %>%
    dplyr::rename(boot_out = data) %>%
    dplyr::mutate(n = n) %>%
    dplyr::relocate(n)

  summary_boot <- get_boot_summary(
    mod_fit = mod_fit,
    boot_out = boot_out,
    boot_type = "res"
  )

  out <- get_mms_comp_var_ind(var_type_abb = "res",
                              summary_tbl = summary_boot,
                              cov_mat = cov_mat,
                              B = B,
                              m = NULL,
                              n = NULL,
                              weights_type = NULL,
                              boot_out = boot_out)

  return(out)
}
shamindras/maars documentation built on Sept. 21, 2021, 2:50 a.m.