R/Boot.R
In car: Companion to Applied Regression

Documented in Boot Boot.default

# Boot:  A reimplementation of bootCase using the 'boot' package to do the
#  work.  The main function 'Boot' creates the 'statistic' argument to
#  'boot', and passes this function to 'boot'
# For the call b1 <- Boot(m1) and b2 <- bootCase(m1),
#   b2 was the returned bootstaps; this is in b1$t
# b1 is of class c("Boot", "boot", so ALL the 'boot' generic methods work
# 'Boot' has new generic methods 'summary', 'confint' and 'hist'

# notes:  See Davison and Hinkley Chapters 6 and 7.
# Boot.lm, method="case" is the simple case resampling
#             method="residual" uses algorithm 6.3, p. 271
#               The use of weights comes from using 'pearson' residuals
#               This is equivalent to alg. 6.1, p262, unweighted
# Boot.glm method="case" as for lm
#             method="residual" not implemented.  Too problematic.
# May 23, 2012 Sanford Weisberg sandy@umn.edu
# June 1, 2012:  changed from class c("Boot", "boot") to just class "boot"
# 2012-12-10 replaced .GlobalEnv with .carEnv to avoid warnings
# 2013-07-08 changed .carEnv to car:::.carEnv so 'boot' could find the environment
# 4014-08-17: added calls to requireNamespace() and :: where necessary. J. Fox
# 2015-01-27 .carEnv now in global environment. John
# 2015-02-20: fixed coding error in Boot.nls(). John
# 2017-06-12: added a default for f in the generic method to suppress an error generated by Rstudio
# 2017-06-22: added a vcov.boot method that simply returns cov(object$t)
# 2017-06-22: fixed args to hist.boot as suggested by Achim Zeileis
# 2017-06-22: Fixed bugs in Boot.default; updated .Rd file as suggested by Achim Zeileis
# 2017-06-24: (Z) added '...' argument to generic and all methods
#             set labels=names(f(object)) with f() rather than coef()
#             simplified and avoid bug in computation of 'out' and check for $qr in Boot.default
#             do not rely on $model to be available
#             instead set up empty dummy data with right number of rows (either via nobs or
#             NROW(residuals(...)))
#             optionally use original estimates as starting values in update(object, ...)
#             within Boot.default
# 2017-06-25: modified bca confidence intervals to default to 'perc' if adjustment is out of range
# 2017-06-26: consistently use inherits(..., "try-error") rather than class(...) == "try-error"
# 2017-09-16:  Changed to vcov.boot method to pass arguments to cov.  In
#  particular, if some of the bootstrap reps are NA, then the argument
#  use="complete.obs" may be desirable.
# 2017-10-06:  Corrected bug that put the wrong estimates in t0 if missing values were
#             present with case resampling.
# 2017-10-19:  Added "norm" as an option on histograms
# 2017-11-30: Use carPalette() for colors in hist.boot()
# 2017-12-24: Removed parallel argument that was added. If ncores<=1, no parallel processing is used.  If ncores>1
# selects the correct parallel environment, and implements with that number of cores. 
# 2018-01-28: Changed print.summary.boot to print R once only if it is constant
# 2018-04-02: John fixed error in Boot.nlm() reported by Derek Ogle.
# 2018-05-16: John modified Confint.boot() to return a "confint.boot" object.
# 2018-08-03: Sandy corrected bug in Boot.lm and Boot.glm that caused failure
#             with transformed predictors.  Also added test to missing values.
#             If missing values are present, Boot returns an error.
# 2018-09-21: John fixed bug that hard-coded level=0.95 when confint.boot() falls 
#             back to type="perc" (reported by Derek Lee Sonderegger).
# 2018-09-21: Brad removed the otpions for multicore on non-unix OS platforms. It now will
#              produce a warning and set ncores=1.  
##2018-12-21:  Brad changed the cluster initalizations for the parallel environements to makeCluster
##             For non-unix environments we export the data, and model type to the cluster.
## 2020-09-02: Corrected weights with lm models and method="residual" to
#              match Alg 6.3 of Davison and Hinkley
## 2020-09=02: Removed unneeded code using missing values in Boot.nls
## 2020-09-02: Boot.nls failed if nls algorithm="plinear".  This is fixed
## 2020-09-02: Correctly use weights in lm, nls
## 2020-12-03: changed vcov.boot to use="complete.obs" by default, added a warning if any bootstrap samples are NA
## 2021-04-21: Residual bootstrap fails if namespace not attached; was tentatively fixed, but the fix has been commented out with #ns
## 2021-05-27: Legend in hist.boot slightly improved.

Boot <- function(object, f=coef, labels=names(f(object)), R=999, 
            method=c("case", "residual"), ncores=1, ...){UseMethod("Boot")}

Boot.default <- function(object, f=coef, labels=names(f(object)),
                         R=999, method=c("case", "residual"), 
                         ncores=1, start=FALSE,...) {
  if(!(requireNamespace("boot"))) stop("The 'boot' package is missing")
  
  ## original statistic
  f0 <- f(object)
  if(length(labels) != length(f0)) labels <- paste0("V", seq_along(f0))
  
  ## process starting values (if any)
  if(isTRUE(start)) start <- f0
  
  ## set up bootstrap handling for case vs. residual bootstrapping
  method <- match.arg(method, c("case", "residual"))
  if(method=="case") {
    boot.f <- function(data, indices, .fn) {
      assign(".boot.indices", indices, envir=.carEnv)
      mod <- if(identical(start, FALSE)) {
        update(object, subset=get(".boot.indices", envir=.carEnv))
      } else {
        update(object, subset=get(".boot.indices", envir=.carEnv), start=start)
      }
      out <- if(!is.null(object$qr) && (mod$qr$rank != object$qr$rank)) 
        f0 * NA else .fn(mod)
      out
    }
  } else {
    boot.f <- function(data, indices, .fn) {
      first <- all(indices == seq(length(indices)))
      res <- if(first) residuals(object, type="pearson") else
        residuals(object, type="pearson")/sqrt(1 - hatvalues(object))
      res <- if(!first) (res - mean(res)) else res
# next two lines added 9/2/2020.  This works for lm or other model methods
# that return a slot called weights with the case weights
      wts <- if(!is.null(object$weights)) object$weights else 1
      val <- fitted(object) + res[indices]/sqrt(wts)
      if (!is.null(object$na.action)){
        pad <- object$na.action
        attr(pad, "class") <- "exclude"
        val <- naresid(pad, val)
      }
      assign(".y.boot", val, envir=.carEnv)
#ns      attach(.carEnv) # namespace fix deleted
#ns      on.exit(detach(.carEnv)) # namespace fix deleted
      mod <- if(identical(start, FALSE)) {
        update(object, get(".y.boot", envir=.carEnv) ~ .)
      } else {
        update(object, get(".y.boot", envir=.carEnv) ~ ., start=start)
#ns        update(object, .y.boot  ~ .) #, data=Data)
#ns      } else {
#ns        update(object, .y.boot  ~ ., start=start) #, data=Data, start=start)
      }
      out <- if(!is.null(object$qr) && (mod$qr$rank != object$qr$rank)) 
        f0 * NA else .fn(mod)
      out
    }
  }
  
  ## try to determine number of observations and set up empty dummy data
  nobs0 <- function(x, ...) {
    rval <- try(stats::nobs(x, ...), silent = TRUE)
    if(inherits(rval, "try-error") | is.null(rval)) rval <- NROW(residuals(x, ...))
    return(rval)
  }
  n <- nobs0(object)
  dd <- data.frame(.zero = rep.int(0L, n))
  
  if(ncores<=1){
    #parallel_env="no"
    #ncores=getOption("boot.ncpus",1L)
    cl2=NULL
    p_type="no"
    #}else{
    #if(.Platform$OS.type=="unix"){
    #    parallel_env="multicore"
  }else{
    #warning("Multicore processing in Boot is not avaliable for Windows.  It is current under development")
    #ncores=1
    #parallel_env="no"
    #ncores=getOption("boot.ncpus",1L)
    cl2 <- parallel::makeCluster(ncores)
    on.exit(parallel::stopCluster(cl2))
    if(.Platform$OS.type=="unix"){
      p_type="multicore"
    }else{
      p_type="snow"
      parallel::clusterExport(cl2,varlist=c(".carEnv",sapply(c(1,3),function(m){gsub("()",getCall(object)[m],replacement="")})))
      
    }
  }
  
  ## call boot() but set nice labels
  b <- boot::boot(dd, boot.f, R, .fn=f,parallel=p_type,ncpus = ncores, 
                  cl = cl2,...)
  colnames(b$t) <- labels
  
  ## clean up and return
  if(exists(".y.boot", envir=.carEnv))
    remove(".y.boot", envir=.carEnv)
  if(exists(".boot.indices", envir=.carEnv))
    remove(".boot.indices", envir=.carEnv)
  b
}

Boot.lm <- function(object, f=coef, labels=names(f(object)),
                     R=999, method=c("case", "residual"), ncores=1, ...){
# check for missing values:
  if(!is.null(object$na.action))
    stop("The Boot function in the 'car' package does not allow
  missing values for lm or glm models.  Refit your model with rows 
  with missing values removed.  If you have a data frame called 'd', 
  then the argument data=na.omit(d) is likely to work.")
  Boot.default(object, f, labels, R, method, ncores, ...)
   }

Boot.glm <- function(object, f=coef, labels=names(f(object)),
                     R=999, method=c("case", "residual"), ncores=1, ...) {
  method <- match.arg(method, c("case", "residual"))
  if(method != "case") 
    stop("Residual bootstrap is not implemented in the 'car' function 'Boot'.
  Use the 'boot' function in the 'boot' package to write
  your own version of residual bootstrap for a glm.")
  # check for missing values:
  if(!is.null(object$na.action))
    stop("The Boot function in the 'car' package does not allow
  missing values for lm or glm models.  Refit your model with rows 
  with missing values removed.  If you have a data frame called 'd', 
  then the argument data=na.omit(d) is likely to work.")  
    Boot.default(object, f, labels, R, method,ncores, ...)
}


Boot.nls <- function(object, f=coef, labels=names(f(object)),
                     R=999, method=c("case", "residual"), ncores=1, ...) {
  ## check for missing values:
  if(!is.null(object$na.action))
    stop("The Boot function in the 'car' package does not allow
missing values for nls models.  Refit your model with rows 
with missing values removed.  If you have a data frame called 'd', 
then the argument data=complete.cases(d) is likely to work.")
  if(!(requireNamespace("boot"))) stop("The 'boot' package is missing")
  f0 <- f(object)
  if(length(labels) != length(f0)) labels <- paste("V", seq(length(f0)), sep="")
  method <- match.arg(method)
  if(method=="case") { 
    boot.f <- function(data, indices, .fn) {
      assign(".boot.indices", indices, envir=.carEnv)
      # When algorithm="plinear", remove all coefs with names starting with '.' 
      # from the starting values
      sv <- coef(object)
      if(object$call$algorithm == "plinear") sv <- sv[!grepl("\\.", names(sv))]
      # update the call to use the bootstrap sample determined by indices
      # if the weights argument has been set then update it as well.
      newcall <- if(is.null(object$weights))
                    update(object, subset=get(".boot.indices", envir=.carEnv),
                        start=sv, evaluate=FALSE)  
                else  update(object, subset=get(".boot.indices", envir=.carEnv),
                        weights=object$weights, start=sv, evaluate=FALSE) 
      # try to evaluate the call
      mod <- try(eval(newcall), silent=TRUE)
      if(inherits(mod, "try-error")){
        out <- .fn(object)
        out <- rep(NA, length(out)) } else  {out <- .fn(mod)}
      out
    }
  } else {
    boot.f <- function(data, indices, .fn) { 
      wts <- if(is.null(object$weights)) 1 else object$weights
      res <- residuals(object) * sqrt(wts)  # Pearson residuals
      val <- fitted(object) + res[indices]/sqrt(wts)
      assign(".y.boot", val, envir=.carEnv)
      assign(".wts", wts, envir=.carEnv)
#ns      attach(.carEnv)          
#ns      on.exit(detach(.carEnv)) 
      # When algorithm="plinear", remove all coefs with names starting with '.' 
      # from the starting values
      sv <- coef(object)
      if(object$call$algorithm == "plinear") sv <- sv[!grepl("^\\.", names(sv))]
      # generate an updated call with .y.boot as the response but do not evaluate
      newcall <- if(is.null(object$call$weights))
        update(object, get(".y.boot", envir=.carEnv) ~ ., start=sv, evaluate=FALSE)
#ns         update(object, .y.boot ~ ., start=sv, evaluate=FALSE) 
      else
        update(object, get(".y.boot", envir=.carEnv) ~ .,
               weights= get(".wts", envir=.carEnv),
               start=sv, evaluate=FALSE)
#ns       update(object, .y.boot ~ .,   # works
#ns            weights= .wts,
#ns            start=sv, evaluate=FALSE)
      # formula.update may have mangled the rhs of newcall$formula
      # copy it from the original call.  I consider this to be a kludge to work
      # around a bug in formula.update
      newcall$formula[[3]] <- formula(object)[[3]]
      # refit to bootstrap sample
      mod <- try(eval(newcall), silent=TRUE)
      if(inherits(mod, "try-error")){
        out <- .fn(object)
        out <- rep(NA, length(out)) } else  {out <- .fn(mod)}
      out
    }
  }
  # multicore code  
  if(ncores<=1){
    #parallel_env="no"
    #ncores=getOption("boot.ncpus",1L)
    cl2=NULL
    p_type="no"
    #}else{
    #if(.Platform$OS.type=="unix"){
    #    parallel_env="multicore"
  }else{
    #warning("Multicore processing in Boot is not available for Windows.  It is current under development")
    #ncores=1
    #parallel_env="no"
    #ncores=getOption("boot.ncpus",1L)
    cl2 <- parallel::makeCluster(ncores)
    on.exit(parallel::stopCluster(cl2))
    if(.Platform$OS.type=="unix"){
      p_type="multicore"
    }else{
      p_type="snow"
      parallel::clusterExport(cl2,varlist=c(".carEnv",sapply(c(1,3),function(m){gsub("()",getCall(object)[m],replacement="")})))
    }
  }
  # call boot::boot.  
  b <- boot::boot(data.frame(update(object, model=TRUE)$model), boot.f, R, 
                  .fn=f, parallel = p_type, ncpus = ncores, cl=cl2, ...)
  #  b <- boot::boot(eval(object$data), boot.f, R, .fn=f, parallel = p_type, ncpus = ncores, cl=cl2, ...)
  colnames(b$t) <- labels
  if(exists(".y.boot", envir=.carEnv))
    remove(".y.boot", envir=.carEnv)
  if(exists(".boot.indices", envir=.carEnv))
    remove(".boot.indices", envir=.carEnv)
  if(exists(".wts", envir=.carEnv))
    remove(".wts", envir=.carEnv)
  d <- dim(na.omit(b$t))[1]
  if(d != R)
    cat( paste("\n","Number of bootstraps was", d, "out of", R, "attempted", "\n"))
  b
}



Confint.boot <- function(object, parm, level = 0.95,
  type = c("bca", "norm", "basic", "perc"), ...){
  ci <- confint(object, parm, level, type, ...)
  typelab <- attr(ci, "type")
  class <- class(ci)
  co <- object$t0
  co <- co[names(co) %in% rownames(ci)]
  ci <- cbind(Estimate=co, ci)
  attr(ci, "type") <- typelab
  class(ci) <- class
  ci
}

confint.boot <- function(object, parm, level = 0.95,
    type = c("bca", "norm", "basic", "perc"), ...){
  if (!requireNamespace("boot")) "boot package is missing"
  cl <- match.call()
  type <- match.arg(type)
#  if(type=="all") stop("Use 'boot::boot.ci' if you want to see 'all' types") # has no effect
  types <-   c("bca", "norm", "basic", "perc")
  typelab <- c("bca", "normal",   "basic", "percent")[match(type, types)]
  nn <- colnames(object$t)
  names(nn) <- nn
  parm <- if(missing(parm)) which(!is.na(object$t0)) else parm
  out <- list()
  for (j in 1:length(parm)){
   out[[j]] <- try(boot::boot.ci(object, conf=level, type=type, index=parm[j], ...), silent=TRUE)
   if(inherits(out[[j]], "try-error") && type=="bca"){
    warning("BCa method fails for this problem.  Using 'perc' instead")
    return(confint(object, parm, level = level, type = "perc", ...))}
  }
  levs <- unlist(lapply(level, function(x) c( (1-x)/2, 1 - (1-x)/2)))
  ints <- matrix(0, nrow=length(parm), ncol=length(levs))
  rownames(ints) <- nn[parm]
  for (j in 1:length(parm)){
    which <- if(typelab=="normal") 2:3 else 4:5
    ints[j, ] <- as.vector(t(out[[j]][[typelab]][, which]))
  }
  or <- order(levs)
  levs <- levs[or]
  ints <- ints[, or, drop=FALSE]
  colnames(ints) <- paste(round(100*levs, 1), " %",sep="")
  attr(ints,"type") <- typelab
  class(ints) <- c("confint.boot", class(ints))
  ints
}
print.confint.boot <- function(x, ...) {
  cat("Bootstrap", attr(x, "type"), "confidence intervals\n\n")
  print(as.data.frame(x), ...)
  }


summary.boot <- function (object, parm, high.moments = FALSE,
    extremes=FALSE, ...)
{
    cl <- match.call()
    skew1 <- function(x){
       x <- x[!is.na(x)]
       xbar <- mean(x)
       sum((x-xbar)^3)/(length(x) * sd(x)^3)
       }
    kurtosis1 <- function (x) {
       x <- x[!is.na(x)]
       xbar <- mean(x)
       sum((x - xbar)^4)/(length(x) * sd(x)^4) - 3
       }
    not.aliased <-  !is.na(object$t0)
    boots <- object$t[ , not.aliased, drop=FALSE ]
    nc <- if(is.matrix(boots)) ncol(boots) else 1
    stats <- matrix(rep(NA, nc * 10), ncol = 10)
    rownames(stats) <- colnames(boots)
    stats[, 1] <- apply(boots, 2, function(x) sum(!is.na(x))) # num. obs
    stats[, 2] <- object$t0[not.aliased]  # point estimate
    stats[, 3] <- apply(boots, 2, function(x) mean(x, na.rm=TRUE)) - stats[, 2]
    stats[, 5] <- apply(boots, 2, function(x) median(x, na.rm=TRUE))
    stats[, 4] <- apply(boots, 2, function(x) sd(x, na.rm=TRUE))
    stats[, 6] <- apply(boots, 2, function(x) min(x, na.rm=TRUE))
    stats[, 7] <- apply(boots, 2, function(x) max(x, na.rm=TRUE))
    stats[, 8] <- stats[, 7] - stats[, 6]
    stats[, 9] <- apply(boots, 2, skew1)
    stats[, 10] <- apply(boots, 2, kurtosis1)
    colnames(stats) <- c(
      "R", "original", "bootBias", "bootSE", "bootMed", "bootMin",
     "bootMax", "bootRange", "bootSkew", "bootKurtosis")
    stats <- as.data.frame(stats)
    class(stats) <- c("summary.boot", "data.frame")
    use <- rep(TRUE, 10)
    if (high.moments == FALSE) use[9:10] <- FALSE
    if (extremes==FALSE) use[6:8] <- FALSE
    parm <- if(missing(parm)) 1:dim(stats)[1] else parm
    return(stats[parm , use])
}
print.summary.boot <-
   function(x, digits = max(getOption("digits") - 2, 3), ...) {
    if(dim(x)[1] == 1L){print.data.frame(x, digits=digits, ...)} else{
      if(sd(x[, 1]) < 1.e-8 ) {
        cat(paste("\nNumber of bootstrap replications R =", x[1, 1], "\n", sep=" "))
        print.data.frame(x[, -1], digits=digits, ...)} else
      print.data.frame(x, digits=digits, ...)
}}

hist.boot <- function(x, parm, layout=NULL, ask, main="", freq=FALSE,
      estPoint = TRUE, point.col=carPalette()[1], point.lty=2, point.lwd=2,
      estDensity = !freq, den.col=carPalette()[2], den.lty=1, den.lwd=2,
      estNormal = !freq,  nor.col=carPalette()[3],   nor.lty=2, nor.lwd=2,
      ci=c("bca", "none", "perc", "norm"), level=0.95,
      legend=c("top", "none", "separate"), box=TRUE, ...){
  not.aliased <- which(!is.na(x$t0))
  ci <- match.arg(ci)
  legend <- match.arg(legend)
  pe <- x$t0[not.aliased]
  if(is.null(names(pe))) names(pe) <- colnames(x$t)
  if(missing(parm)) parm <- not.aliased
  nt <- length(parm) + if(legend == "separate") 1 else 0
  if (nt > 1 & (is.null(layout) || is.numeric(layout))) {
    if(is.null(layout)){
         layout <- switch(min(nt, 9), c(1, 1), c(1, 2), c(2, 2), c(2, 2),
                             c(3, 2), c(3, 2), c(3, 3), c(3, 3), c(3, 3))
    }
    ask <- if(missing(ask) || is.null(ask)) prod(layout) < nt else ask
    oma3 <- if(legend == "top") 1.0 + estPoint + estDensity + estNormal
              else 1.5
    op <- par(mfrow=layout, ask=ask, no.readonly=TRUE,
            oma=c(0, 0, oma3, 0), mar=c(5, 4, 1, 2) + .1)
    on.exit(par(op))
    }
  if(ci != "none") clim <- confint(x, type=ci, level=level)
  pn <- colnames(x$t)
  names(pn) <- pn
  what <- c(estNormal & !freq, estDensity & !freq, ci != "none", estPoint)
  for (j in parm){
# determine the range of the y-axis
       z <- na.omit(x$t[, j])
       h <- hist(z, plot=FALSE)
       d <- density(z)
       n <- pnorm(0)/(sd <- sd(z))
       m <- if(freq == FALSE) max(h$density, d$y, n) else max(h$counts)
       plot(h, xlab=pn[j], freq=freq,
            main=if(length(parm)==1) main else "", ylim=c(0, m), ...)
       if(estDensity & !freq){
          lines(d, col=den.col, lty=den.lty, lwd=den.lwd)
          }
       if(estNormal & !freq){
          z <- na.omit(x$t[, j])
          xx <- seq(-4, 4, length=400)
          xbar <- mean(z)
          sd <- sd(z)
          lines( xbar + sd*xx, dnorm(xx)/sd, col=nor.col, lty=nor.lty,
              lwd=nor.lwd)
          }
       if(ci != "none") lines( clim[j ,], c(0, 0), lwd=4)
       if(estPoint) abline(v=pe[j], lty=point.lty, col=point.col, lwd=point.lwd)
       if(box) box()
       if( j == parm[1] & legend == "top" ) { # add legend
		        usr <- par("usr")
		        legend.coords <- list(x=usr[1], y=usr[4] * 1.05 + 1.3 * (1 + sum(what)) *strheight("N"))
            legend( legend.coords,
             c("Normal Density", "Kernel Density",
             paste(ci, " ", round(100*level), "% CI", sep=""),
                   "Obs. Value")[what],
             lty=c(nor.lty, den.lty, 1, point.lty)[what],
             col=c(nor.col, den.col, "black", point.col)[what],
             fill=c(nor.col, den.col, "black", point.col)[what],
             lwd=c(2, 2, 4, 2)[what],
             border=c(nor.col, den.col, "black", point.col)[what],
             bty="n", cex=0.9, xpd=NA)#, #horiz=TRUE, offset=
		}
  }
  mtext(side=3, outer=TRUE, main, cex=1.2)
  if(legend == "separate") {
    plot(0:1, 0:1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
    use <- (1:4)[c( estNormal, estDensity, TRUE, ci != "none")]
    curves <- c("Normal Density", "Kernel Density",
              paste(ci, " ", 100*level, "% CI", sep=""),
              "Obs. Value")
    colors <- c(nor.col, den.col, "black", point.col)
    lines <- c(nor.lty, den.lty, 1, point.lty)
    widths<- c(nor.lwd, den.lwd, 2, point.lty)
    legend("center", curves[use], lty=lines[use], lwd=widths[use],
          col=colors[use], bty="n", #box.col=par()$bg,
          title="Bootstrap histograms")
  }
  invisible(NULL)
  }

vcov.boot <- function(object, use="complete.obs", ...){
  if(use == "complete.obs"){
    num <- nrow(object$t) - nrow(na.omit(object$t))
    if(num == 1L) warning(
      "one bootstrap sample returned NA and was omitted")
    if(num > 1L) warning(
      paste(num, " bootstrap samples returned NA and were omitted", sep=""))}
  cov(object$t, use=use)
}