#' @title Full-service variable selection
#'
#' @description
#'
#' A wrapper for a few core functions, including a few diagnostic plots of variable importance, and the automated stepwise variable set reduction algorithm.
#'
#' @param x.data A data frame of covariates
#' @param y.data A vector of outcomes (1/0)
#' @param iter.step How many BART models to run for each iteration of the stepwise reduction
#' @param tree.step How many trees to use in the variable set reduction.Should be a SMALL number (10 or 20 trees) in order to create the maximum disparity in variable importance between informative and uninformative predictors (recommendations taken from Chipman et al. 2010).
#' @param iter.plot How many iterations to use in the first diagnostic plot
#' @param full If this is set to FALSE (by default), this runs a stepwise variable set reduction and returns a model with the optimal variable step - much like gbm::gbm.step() or similar functions. In running varimp.step() it generates a single plot of RMSE against variables dropped. If this is set to TRUE, it also runs summary() on the model, and two additional plots are generated: the initial variable importance diagnostic generated by varimp.diag() (this is SLOW), and a final variable importance bar chart for the final model.
#'
#' @return Returns a model object run with the optimal, reduced variable set.
#'
#' @export
#'
#'
#'
bart.step <- function(x.data, y.data, ri.data=NULL,
iter.step=100, tree.step=10,
iter.plot=100,
full=FALSE,
quiet=FALSE) {
###############
# auto-drops
quietly <- function(x) {
sink(tempfile())
on.exit(sink())
invisible(force(x))
} # THANKS HADLEY
quietly(model.0 <- bart.flex(x.data = x.data, y.data = y.data,
ri.data = ri.data,
n.trees = 200))
if(class(model.0)=='rbart') {
fitobj <- model.0$fit[[1]]
}
if(class(model.0)=='bart') {
fitobj <- model.0$fit
}
dropnames <- colnames(x.data)[!(colnames(x.data) %in% names(which(unlist(attr(fitobj$data@x,"drop"))==FALSE)))]
if(length(dropnames)==0) {} else{
message("Some of your variables have been automatically dropped by dbarts.")
message("(This could be because they're characters, homogenous, etc.)")
message("It is strongly recommended that you remove these from the raw data:")
message(paste(dropnames,collapse = ' '), ' \n')
}
x.data %>% dplyr::select(-dropnames) -> x.data
###############
quiet2 <- quiet
if(full==TRUE){varimp.diag(x.data, y.data, ri.data, iter=iter.plot, quiet=quiet2)}
vs <- variable.step(x.data, y.data, ri.data, n.trees=tree.step, iter=iter.step, quiet=quiet2)
invisible(best.model <- bart.flex(x.data = x.data[,vs], y.data = y.data,
ri.data = ri.data, n.trees=200))
if(full==TRUE){varimp(best.model, plots=TRUE)}
if(full==TRUE) {p <- summary(best.model, plots=TRUE)
print(p)} else
{p <- summary(best.model, plots=FALSE)
print(p)}
invisible(best.model)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.