## Utility code related to bootstrapping
#' Bootstrap difference in statistic across two groups
#'
#' See:
#' https://stats.stackexchange.com/questions/136661
#' Also:
#' Efron's and Tibshirani's - intro to the bootstrap, page 223
#' @param mutation_table tibble; A mutation table nested by a group
#' @param statistic function; A valid bootstrap statistic
#' @param reps integer; Number of bootstrap replicates
#' @importFrom magrittr "%>%"
#' @importFrom dplyr summarize mutate
#' @importFrom purrr map map_dbl
#' @import dplyr
#' @return A p-value
#' @export
bootstrap_test <- function(tbl, statistic, reps = 1e4) {
combined_statistic <- tbl %>%
unnest %>%
summarize(statistic(., indices = 1:nrow(.))[1]) %>%
as.numeric
# Bootstrap null distributions for each group
tbl <- tbl %>%
mutate(obs = map_dbl(data, ~statistic(., indices = 1:nrow(.))[1])) %>%
mutate(boot_null = data %>% map(
bootstrap_centered_null,
statistic = statistic,
center = combined_statistic,
reps = reps))
observed_diff <- abs(tbl$obs[1] - tbl$obs[2])
boot_diff <- abs(tbl$boot_null[[1]] - tbl$boot_null[[2]])
p <- (length(which(boot_diff >= observed_diff)) + 1) / length(boot_diff)
p
}
#' Apply a two group statistical test to each group in a nested tibble
#' @importFrom dplyr group_vars
#' @export
boot_compare_all <- function(tbl, statistic, within = NULL, reps = 1e4, ...) {
tibble_combn(tbl,
set_size = 2,
func = bootstrap_test,
statistic = statistic,
within = within,
reps = reps)
}
#' Bootstrap a statistic across a table and produce bca ci and quantiles
#' @param tbl A tibble
#' @param statistic function; A bootstrap statistic
#' @param reps integer; Number of bootstrap replicates
#' @importFrom dplyr mutate
#' @importFrom tidyr unnest
#' @importFrom purrr map map_dbl
#' @importFrom magrittr "%>%"
#' @importFrom boot boot
#' @export
boot_quantiles <- function(tbl, statistic, reps = 1e4) {
tbl %>%
mutate(value = data %>%
map_dbl(~statistic(., 1:nrow(.))[1])) %>%
mutate(boot_stat = data %>%
map(boot, statistic = statistic, R = reps)) %>%
mutate(quantiles = boot_stat %>%
map(calc_bca_quantiles)) %>%
select(-data, -boot_stat) %>%
unnest
}
#' Center a distribution at a given mean value
#' @param d numeric; the distribution
#' @param target numeric; the new mean
#' @return The centered distribution
center_distribution <- function(d, target) {
d <- unlist(d)
d - mean(d) + target
}
#' Bootstrap the null distribution for a statistic centered at a given value
#'
#' This function generates a distribution for the null hypothesis that two
#' groups do not differ in some statistic. For example if we were interested in
#' whether two samples had a different mean; this function would produce a
#' bootstrapped sample for one group centered at the actual mean across both
#' groups. This ensures that the bootstrapped distribution corresponds to the
#' actual null hypothesis. Explained better here:
#' https://stats.stackexchange.com/questions/136661
#' @importFrom boot boot
#' @importFrom broom tidy
#' @importFrom magrittr "%>%" set_colnames
#' @importFrom tibble as_tibble
bootstrap_centered_null <- function(
mutation_table, statistic, center, ..., reps = 1e4) {
boot_sample <- boot(mutation_table, statistic = statistic, R = reps, ...)$t
colnames(boot_sample) <- c("value", "variance")
centered_boot <- center_distribution(boot_sample[, "value"], target = center)
centered_boot
}
#' Calculate quantiles and bias corrected confidence intervals for a boot object
#' @importFrom boot boot.ci
#' @importFrom tibble tibble
calc_bca_quantiles <- function(boot_object) {
if (is.na(boot_object$t0[1])) {
output <- tibble(ci1 = NA, q25 = NA, q75 = NA, ci2 = NA)
} else {
ci <- boot.ci(boot_object)$bca[4:5]
quantiles <- quantile(boot_object$t[, 1], probs = c(0.25, 0.75))
output <- tibble(
ci1 = ci[1],
q25 = quantiles[1],
q75 = quantiles[2],
ci2 = ci[2])
}
output
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.