R/bart.step.R

Defines functions bart.step

Documented in bart.step

#' @title Full-service variable selection
#'
#' @description
#'
#' A wrapper for a few core functions, including a few diagnostic plots of variable importance, and the automated stepwise variable set reduction algorithm. 
#'
#' @param x.data A data frame of covariates
#' @param y.data A vector of outcomes (1/0)
#' @param iter.step How many BART models to run for each iteration of the stepwise reduction
#' @param tree.step How many trees to use in the variable set reduction.Should be a SMALL number (10 or 20 trees) in order to create the maximum disparity in variable importance between informative and uninformative predictors (recommendations taken from Chipman et al. 2010).
#' @param iter.plot How many iterations to use in the first diagnostic plot 
#' @param full If this is set to FALSE (by default), this runs a stepwise variable set reduction and returns a model with the optimal variable step - much like gbm::gbm.step() or similar functions. In running varimp.step() it generates a single plot of RMSE against variables dropped. If this is set to TRUE, it also runs summary() on the model, and two additional plots are generated: the initial variable importance diagnostic generated by varimp.diag() (this is SLOW), and a final variable importance bar chart for the final model.  
#' 
#' @return Returns a model object run with the optimal, reduced variable set.
#' 
#' @export
#'
#'
#'

bart.step <- function(x.data, y.data, ri.data=NULL,
                      iter.step=100, tree.step=10,
                      iter.plot=100,
                      full=FALSE,
                      quiet=FALSE) {
  
  ###############
  
  # auto-drops 
  
  quietly <- function(x) {
    sink(tempfile())
    on.exit(sink())
    invisible(force(x))
  }  # THANKS HADLEY
  
  quietly(model.0 <- bart.flex(x.data = x.data, y.data = y.data, 
                               ri.data = ri.data,
                               n.trees = 200))
  
  if(class(model.0)=='rbart') {
    fitobj <- model.0$fit[[1]]
  }
  if(class(model.0)=='bart') {
    fitobj <- model.0$fit
  }
  
  dropnames <- colnames(x.data)[!(colnames(x.data) %in% names(which(unlist(attr(fitobj$data@x,"drop"))==FALSE)))]
  
  if(length(dropnames)==0) {} else{
    message("Some of your variables have been automatically dropped by dbarts.")
    message("(This could be because they're characters, homogenous, etc.)")
    message("It is strongly recommended that you remove these from the raw data:")
    message(paste(dropnames,collapse = ' '), ' \n')
  }
  
  x.data %>% dplyr::select(-dropnames) -> x.data  
  
  ###############
  
  quiet2 <- quiet
  if(full==TRUE){varimp.diag(x.data, y.data, ri.data, iter=iter.plot, quiet=quiet2)}
  vs <- variable.step(x.data, y.data, ri.data, n.trees=tree.step, iter=iter.step, quiet=quiet2)
  
  invisible(best.model <- bart.flex(x.data = x.data[,vs], y.data = y.data, 
                                      ri.data = ri.data, n.trees=200))
  if(full==TRUE){varimp(best.model, plots=TRUE)}
  if(full==TRUE) {p <- summary(best.model, plots=TRUE)
                  print(p)} else 
                  {p <- summary(best.model, plots=FALSE)
                  print(p)}
  invisible(best.model)
}  
cjcarlson/embarcadero documentation built on Sept. 9, 2023, 10:47 p.m.