R/boot_lm.R

Defines functions boot_lm

Documented in boot_lm

#' Bootstraping for linear models 
#' 
#' @title Bootstrapping for linear models 
#' @name boot_lm
#' @param object object of class \code{\link[stats]{lm}}
#' @param f function to be applied (and bootstrapped), default coef 
#' @param R number of bootstrap samples, default 999
#' @param psim simulation level for \code{\link{simulate_lm}}
#' @param resid.type either \dQuote{resample}, \dQuote{normal} or \dQuote{wild}.
#' @param data optional data argument (useful/needed when data are not in an available environment).
#' @param verbose logical (default TRUE) whether to print message if model does not converge. 
#' (rare for linear models).
#' @param ... additional arguments to be passed to function \code{\link[boot]{boot}}
#' @details The residuals can either be generated by resampling with replacement (default),
#' from a normal distribution (parameteric) or by changing their signs (wild). This last
#' one is called \dQuote{wild bootstrap}.
#' @note at the moment, when the argument data is used, it is not possible to check that it matches the 
#' original data used to fit the model. It will also override the fetching of data.
#' @export
#' @examples 
#' \donttest{
#' require(car)
#' data(barley, package = "nlraa")
#' ## Fit a linear model (quadratic)
#' fit.lm <- lm(yield ~ NF + I(NF^2), data = barley)
#' 
#' ## Bootstrap coefficients by default
#' fit.lm.bt <- boot_lm(fit.lm)
#' ## Compute confidence intervals
#' confint(fit.lm.bt, type = "perc")
#' ## Visualize
#' hist(fit.lm.bt, 1, ci = "perc", main = "Intercept")
#' hist(fit.lm.bt, 2, ci = "perc", main = "NF term")
#' hist(fit.lm.bt, 3, ci = "perc", main = "I(NF^2) term")
#' }
#' 

boot_lm <- function(object, 
                    f = NULL, 
                    R = 999, 
                    psim = 2, 
                    resid.type = c("resample","normal","wild"),
                    data = NULL,
                    verbose = TRUE,
                    ...){
  ## I chose to write it in this way instead of UseMethod
  ## because I feel it is more efficient and results in less code
  ## Maybe I'm wrong and will change this in the future
  ## Error checking
  if(!inherits(object, "lm")) stop("object should be of class 'lm'")
  
  if(psim < 2) stop("psim should be 2 or higher")
  
  resid.type <- match.arg(resid.type)
  
  ## extract the data
  dat <- try(eval(getCall(object)$data), silent = TRUE)
  if(inherits(dat, "try-error") && missing(data)){
    stop("'data' argument is required. It is likely you are using boot_lm inside another function. 
          The data are not in an available environment.", call. = FALSE)    
  }
  
  if(!missing(data)){
    dat <- data
  }

  if(missing(f)) f <- coef
    
  f0 <- f(object) ## To model the behavior of the orignial function
  NA.j <- 0
  
  boot_fun_resid <- function(data, fn, model, psim,...){
    
    ## Copy that will be later modified
    bdat <- data
    ## I need a hack to make the first iteration be t0 for boot
    if(identical(get(".k.boot.lm", envir = nlraa.lm.env), 0L)){
      ## This makes the assumption that the first time
      ## boot calls 'boot_fun_resid' it will generate 't0' 
      new.y <- fitted(model) + resid(model)
      assign(".k.boot.lm", 1L, envir = nlraa.lm.env)
    }else{
      ## Fitted values, which are simulated values if psim = 1
      new.y <- simulate_lm(model, psim = psim, resid.type = resid.type, data = data, ...)
    }
    
    resp.var <- all.vars(formula(model))[1]
    bdat[[resp.var]] <- new.y
    
    assign(".bdat.lm", bdat, envir = nlraa.lm.env)
    umod <- tryCatch(update(model, data = get(".bdat.lm", envir = nlraa.lm.env)), error = function(e){invisible(e)})
    
    ## It should be rare for an lm model to fail to converge though
    ## Trying to catch any condition in which the model above does not converge
    if(inherits(umod, "error") || any(is.na(fn(umod))) || is.null(fn(umod)) || any(is.nan(fn(umod)))){
      out <- rep(NA, length(f0))
      NA.j <<- NA.j + 1
    }else{
      out <- fn(umod)
    }
    out
  }
  
  ans <- boot::boot(data = dat, 
                    sim = "parametric",
                    statistic = boot_fun_resid, 
                    R = R, fn = f,
                    model = object,
                    psim = psim, ...)
  
  ## There is no reason why a lm should not converge, but just in case
  if(sum(NA.j) > 0 && verbose) cat("Number of times model fit did not converge", NA.j, "out of",R,"\n")
  
  assign(".k.boot.lm", 0L, envir = nlraa.lm.env)
  assign(".bdat.lm", NULL, envir = nlraa.lm.env)
  return(ans)
}


### Just initializing variables in the environment
### Not sure if this is even necessary
nlraa.lm.env <- new.env(parent = emptyenv())
assign('.bdat.lm', NA, nlraa.lm.env)
assign('.k.boot.lm', 0L, nlraa.lm.env)

Try the nlraa package in your browser

Any scripts or data that you put into this service are public.

nlraa documentation built on May 29, 2024, 2:01 a.m.