#' Title
#' Function designed for use in dplyr (tidyverse) piping to return CSC and bootstrap CI around that
#' @param formula1 formula defining the two variables to be correlated as scores ~ group
#' @param data data.frame or tibble with the data, often constructed with pick() in dplyr
#' @param bootReps integer giving number of bootstrap replications
#' @param conf numeric value giving width of confidence interval, e.g. .95 (default)
#' @param bootCImethod string giving method to derive bootstrap CI, minimum two letters 'pe', 'no', 'ba' or 'bc' for percentile, normal, basic or bca
#'
#' @return list of named values obsCSC, LCLCSC and UCLCSC
#' @export
#'
#' @importFrom stats na.omit
#' @importFrom stats sd
#' @importFrom dplyr n
#' @importFrom dplyr mutate
#'
#' @section Background:
#' For general information about the CSC (Clinically Significant Change criterion), see \code{\link{getCSC}}
#'
#' @family RCSC functions
#' @family bootstrap CI functions
#' @seealso \code{\link{getCSC}} provides just the CSC if you don't need the CI around it. Much faster of course!
#'
#' @examples
#' \dontrun{
#' ### will need tidyverse to run
#' library(tidyverse)
#' ### create some data
#' n <- 120
#' set.seed(12345) # to get replicable sample and results from the bootstrap
#' list(scores = rnorm(n), # Gaussian random base for scores
#' ### now add a grouping variable: help-seeking or not
#' grp = sample(c("HS", "not"), n, replace = TRUE),
#' ### now add gender
#' gender = sample(c("F", "M"), n, replace = TRUE)) %>%
#' as_tibble() %>%
#' ### next add a gender effect nudging women's scores up by .4
#' mutate(scores = if_else(gender == "F", scores + .4, scores),
#' ### next add the crucial help-seeking effect of 1.1
#' scores = if_else(grp == "HS", scores + 1.1, scores)) -> tmpDat
#'
#' ### have a look at that
#' tmpDat
#'
#' ### this just computes the CSC and CI(CSC) for the whole dataset
#' tmpDat %>%
#' ### don't forget to prefix the call with "list(" to tell dplyr
#' ### you are creating list output
#' ### Also pick(everything()) has replaced the deprecated cur_data() that I used
#' ### earlier I see the flexibility of pick() but it's ugly when used to replace
#' ### the simpler cur_data() for this sort of thing. Not my choice!
#' summarise(CSC = list(getBootCICSC(scores ~ grp, pick(everything())))) %>%
#' ### now unnest the list to columns
#' unnest_wider(CSC)
#'
#' ### now an example of how this becomes useful: same but by grouping by gender
#' tmpDat %>%
#' group_by(gender) %>%
#' ### remember the list output again!
#' summarise(CSC = list(getBootCICSC(scores ~ grp, pick(everything())))) %>%
#' ### remember to unnnest again!
#' unnest_wider(CSC)
#' }
#'
#' @author Chris Evans
#'
#' @section History/development log:
#' Started before 5.iv.21
#'
getBootCICSC <- function(formula1, data, bootReps = 1000, conf = .95, bootCImethod = "pe") {
### function to return bootstrap CI around an observed correlation
### between two variables fed in within a formula as variable1 ~ variable2
### and each within a data frame or tibble, data
### method sets the correlation method from the stats::cor() package options
### bootReps sets the number of bootstrap replications
### conf sets the width of the CI
### uses the boot() and boot.ci() functions from the package boot, hence ...
invisible(stopifnot(base::requireNamespace("boot")))
### OK now some input sanity checking largely to get informative error messages
### if things go wrong
### sanity check 1: is formula1 a formula?!
if (!inherits(formula1, "formula")) {
stop("first argument must be a simple formula of form dependent ~ predictor")
}
### sanity check 2: it should have two terms
if (length(formula1) != 3) {
stop("first argument must be a simple formula of form variable1 ~ variable2")
}
var1 <- formula1[[2]]
var2 <- formula1[[3]]
### sanity check 3
if (length(var1) != 1 | length(var2) != 1) {
stop("formula input can only have one term on each side of the formula")
}
### I think this will always work and just pulls the environment
e <- environment(formula1)
### which can then be used to get the dependent out of data in that environment
scores <- eval(var1, data, e)
### same for the predictor
grp <- eval(var2, data, e)
### boot() needs stratification variable, i.e. grouping variable, to be numeric
if (!is.numeric(grp)) {
if (is.factor(grp)) {
grp <- as.numeric(grp)
} else {
grp <- as.numeric(factor(grp))
}
}
### sanity check 4: must have only 2 values in grp
if (dplyr::n_distinct(grp) != 2) {
stop("Grouping variable must have two and only two values. You may see this if group_by() stratifying\nhas meant you don't have two values of the grouping variable.")
}
list(scores = scores,
grp = grp) %>%
as_tibble() %>%
na.omit() -> tmpDat
### sanity check 5: scores must be numeric
if (!is.numeric(scores)) {
stop("Scores must be numeric")
}
### sanity check 6: smallest sample must be > 20 to get stable bootstrap
tmpDat %>%
dplyr::group_by(grp) %>%
dplyr::summarise(n = n()) %>%
dplyr::summarise(min = min(n)) %>%
dplyr::pull() -> valMinN
if (valMinN < 20) {
stop("You won't get a stable bootstrap CI with minimum cell size < 20. You may see this if group_by() stratifying\nhas given you a small cell size.")
}
### sanity check 7: R must be numeric and integer
### can't imagine we really need this but ...
### function from documentation of integer in base R
is.wholenumber <-
function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol
if (!is.numeric(bootReps) | !is.wholenumber(bootReps) | bootReps < 20) {
stop("Bootreps must be integer and numeric and, even for testing, >= 20")
}
### sanity check 8: conf must be sensible
if (!is.numeric(conf) | conf <= 0 | conf > .999) {
stop("conf must be numeric and 0 < conf < .999")
}
### sanity check 9: check bootCImethod, i.e. type of bootstrap CI to be used
if (!is.character(bootCImethod) | length(bootCImethod) != 1) {
stop("bootCImethod must be character variable of length 1")
}
bootCImethod <- stringr::str_to_lower(stringr::str_sub(bootCImethod, 1, 2))
if (!bootCImethod %in% c("no", "ba", "st", "pe", "bc")) {
stop("bootCImethod must start, after lowercasing with one of 'no', 'ba', 'pe' or 'bc'")
}
### clumsy switch but easy to read
if (bootCImethod == "no") {
useBootCImethod <- "norm"
}
if (bootCImethod == "ba") {
useBootCImethod <- "basic"
}
if (bootCImethod == "pe") {
useBootCImethod <- "perc"
}
if (bootCImethod == "bc") {
useBootCImethod <- "bca"
}
### end of sanity checking
###
message("Function: getBootCICSC")
message("CSC method is c as I think that's the only useful one!")
message("Function always uses only complete values of both variables")
message(paste("Number of bootstrap replications is:",
bootReps))
message(paste("Method for computing bootstrap confidence interval is:",
useBootCImethod))
message(paste("Width of confidence interval is:",
conf,
"i.e.",
paste0(round(100 * conf),
"%")))
### now we need a function to feed into boot() to get CSC
### first a minimal getCSC
getCSClocal <- function(tmpDat) {
### tmpDat has two columns: scores and grp and has been prechecked
tmpDat %>%
dplyr::group_by(grp) %>%
dplyr::summarise(n = n(),
mean = mean(scores),
sd = sd(scores)) -> tibStats
tibStats %>%
dplyr::summarise(denominator = sum(sd)) %>%
dplyr::pull() -> denominator
numerator <- tibStats$mean[2] * tibStats$sd[1] + tibStats$mean[1] * tibStats$sd[2]
### CSC is numerator / denominator (doh!)
numerator / denominator
}
getCSCforBoot <- function(tmpDat, i) {
### expects x as two column structure
getCSClocal(tmpDat[i, ])
}
### now use that to do the bootstrapping
### have to use as.data.frame() so we can index by columns even though
### tmpDat is a tibble
tmpDat <- as.data.frame(tmpDat)
tmpBootRes <- boot::boot(tmpDat, # data
statistic = getCSCforBoot, # function to apply
stype = "i", # bootstrapping within strata done by indexing
# that means that i is a row number _within_ the stratum for each stratum
strata = tmpDat[, 2], # tells boot::boot() which variable to use for strata
R = bootReps) # number of bootstrap replications
### and now get the CI from that,
tmpCI <- boot::boot.ci(tmpBootRes,
type = useBootCImethod,
conf = conf)
if (bootCImethod == "pe") {
retVal <- list(obsCSC = as.numeric(tmpBootRes$t0),
LCLCSC = tmpCI$percent[4],
UCLCSC = tmpCI$percent[5])
}
if (bootCImethod == "ba") {
retVal <- list(obsCSC = as.numeric(tmpBootRes$t0),
LCLCSC = tmpCI$basic[4],
UCLCSC = tmpCI$basic[5])
}
if (bootCImethod == "bc") {
retVal <- list(obsCSC = as.numeric(tmpBootRes$t0),
LCLCSC = tmpCI$bca[4],
UCLCSC = tmpCI$bca[5])
}
if (bootCImethod == "no") {
retVal <- list(obsCSC = as.numeric(tmpBootRes$t0),
LCLCSC = tmpCI$normal[2],
UCLCSC = tmpCI$normal[3])
}
retVal
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.