Nothing
#' Forward feature selection
#' @description A simple forward feature selection algorithm
#' @param predictors see \code{\link{train}}
#' @param response see \code{\link{train}}
#' @param method see \code{\link{train}}
#' @param metric see \code{\link{train}}
#' @param maximize see \code{\link{train}}
#' @param globalval Logical. Should models be evaluated based on 'global' performance? See \code{\link{global_validation}}
#' @param withinSE Logical Models are only selected if they are better than the
#' currently best models Standard error
#' @param minVar Numeric. Number of variables to combine for the first selection.
#' See Details.
#' @param trControl see \code{\link{train}}
#' @param tuneLength see \code{\link{train}}
#' @param tuneGrid see \code{\link{train}}
#' @param seed A random number used for model training
#' @param cores Numeric. If > 2, mclapply will be used. see \code{\link{mclapply}}
#' @param verbose Logical. Should information about the progress be printed?
#' @param ... arguments passed to the classification or regression routine
#' (such as randomForest).
#' @return A list of class train. Beside of the usual train content
#' the object contains the vector "selectedvars" and "selectedvars_perf"
#' that give the order of the best variables selected as well as their corresponding
#' performance (starting from the first two variables). It also contains "perf_all"
#' that gives the performance of all model runs.
#' @details Models with two predictors are first trained using all possible
#' pairs of predictor variables. The best model of these initial models is kept.
#' On the basis of this best model the predictor variables are iteratively
#' increased and each of the remaining variables is tested for its improvement
#' of the currently best model. The process stops if none of the remaining
#' variables increases the model performance when added to the current best model.
#'
#' The forward feature selection can be run in parallel with forking on Linux systems (mclapply).
#' Each fork computes a model, which drastically speeds up the runtime -
#' especially of the initial predictor search.
#' The internal cross validation can be run in parallel on all systems. See information
#' on parallel processing of carets train functions for details.
#'
#' Using withinSE will favour models with less variables and
#' probably shorten the calculation time
#'
#' Per Default, the ffs starts with all possible 2-pair combinations.
#' minVar allows to start the selection with more than 2 variables, e.g.
#' minVar=3 starts the ffs testing all combinations of 3 (instead of 2) variables
#' first and then increasing the number. This is important for e.g. neural networks
#' that often cannot make sense of only two variables. It is also relevant if
#' it is assumed that the optimal variables can only be found if more than 2
#' are considered at the same time.
#'
#' @note This variable selection is particularly suitable for spatial
#' cross validations where variable selection
#' MUST be based on the performance of the model for predicting new spatial units.
#' See Meyer et al. (2018) and Meyer et al. (2019) for further details.
#'
#' @author Hanna Meyer
#' @seealso \code{\link{train}},\code{\link{bss}},
#' \code{\link{trainControl}},\code{\link{CreateSpacetimeFolds}},\code{\link{nndm}}
#' @references
#' \itemize{
#' \item Gasch, C.K., Hengl, T., Gräler, B., Meyer, H., Magney, T., Brown, D.J. (2015): Spatio-temporal interpolation of soil water, temperature, and electrical conductivity in 3D+T: the Cook Agronomy Farm data set. Spatial Statistics 14: 70-90.
#' \item Meyer, H., Reudenbach, C., Hengl, T., Katurji, M., Nauß, T. (2018): Improving performance of spatio-temporal machine learning models using forward feature selection and target-oriented validation. Environmental Modelling & Software 101: 1-9. \doi{10.1016/j.envsoft.2017.12.001}
#' \item Meyer, H., Reudenbach, C., Wöllauer, S., Nauss, T. (2019): Importance of spatial predictor variable selection in machine learning applications - Moving from data reproduction to spatial prediction. Ecological Modelling. 411, 108815. \doi{10.1016/j.ecolmodel.2019.108815}.
#' \item Ludwig, M., Moreno-Martinez, A., Hölzel, N., Pebesma, E., Meyer, H. (2023): Assessing and improving the transferability of current global spatial prediction models. Global Ecology and Biogeography. \doi{10.1111/geb.13635}.
#' }
#' @examples
#' \dontrun{
#' data(splotdata)
#' ffsmodel <- ffs(splotdata[,6:12], splotdata$Species_richness, ntree = 20)
#'
#' ffsmodel$selectedvars
#' ffsmodel$selectedvars_perf
#' plot(ffsmodel)
#' #or only selected variables:
#' plot(ffsmodel,plotType="selected")
#'}
#'
#' # or perform model with target-oriented validation (LLO CV)
#' #the example is described in Gasch et al. (2015). The ffs approach for this dataset is described in
#' #Meyer et al. (2018). Due to high computation time needed, only a small and thus not robust example
#' #is shown here.
#'
#' \dontrun{
#' # run the model on three cores (see vignette for details):
#' library(doParallel)
#' library(lubridate)
#' cl <- makeCluster(3)
#' registerDoParallel(cl)
#'
#' #load and prepare dataset:
#' data(cookfarm)
#' trainDat <- cookfarm[cookfarm$altitude==-0.3&
#' year(cookfarm$Date)==2012&week(cookfarm$Date)%in%c(13:14),]
#'
#' #visualize dataset:
#' ggplot(data = trainDat, aes(x=Date, y=VW)) + geom_line(aes(colour=SOURCEID))
#'
#' #create folds for Leave Location Out Cross Validation:
#' set.seed(10)
#' indices <- CreateSpacetimeFolds(trainDat,spacevar = "SOURCEID",k=3)
#' ctrl <- trainControl(method="cv",index = indices$index)
#'
#' #define potential predictors:
#' predictors <- c("DEM","TWI","BLD","Precip_cum","cday","MaxT_wrcc",
#' "Precip_wrcc","NDRE.M","Bt","MinT_wrcc","Northing","Easting")
#'
#' #run ffs model with Leave Location out CV
#' set.seed(10)
#' ffsmodel <- ffs(trainDat[,predictors],trainDat$VW,method="rf",
#' tuneLength=1,trControl=ctrl)
#' ffsmodel
#' plot(ffsmodel)
#' #or only selected variables:
#' plot(ffsmodel,plotType="selected")
#'
#' #compare to model without ffs:
#' model <- train(trainDat[,predictors],trainDat$VW,method="rf",
#' tuneLength=1, trControl=ctrl)
#' model
#' stopCluster(cl)
#'}
#'
#'\dontrun{
#'## on linux machines, you can also run the ffs in parallel with forks:
#' data("splotdata")
#' spatial_cv = CreateSpacetimeFolds(splotdata, spacevar = "Biome", k = 5)
#' ctrl <- trainControl(method="cv",index = spatial_cv$index)
#'
#'ffsmodel <- ffs(predictors = splotdata[,6:16],
#' response = splotdata$Species_richness,
#' tuneLength = 1,
#' method = "rf",
#' trControl = ctrl,
#' ntree = 20,
#' seed = 1,
#' cores = 4)
#'}
#'
#'
#' @export ffs
#' @aliases ffs
ffs <- function (predictors,
response,
method = "rf",
metric = ifelse(is.factor(response), "Accuracy", "RMSE"),
maximize = ifelse(metric == "RMSE", FALSE, TRUE),
globalval=FALSE,
withinSE = FALSE,
minVar = 2,
trControl = caret::trainControl(),
tuneLength = 3,
tuneGrid = NULL,
seed = sample(1:1000, 1),
verbose=TRUE,
cores = 1,
...){
# Init ----------------
## Input Checks ------------------------------
if(inherits(predictors, "sf")){
predictors = sf::st_drop_geometry(predictors)
}
if(cores > 1 & .Platform$OS.type != "unix"){
warning("Parallel computations of ffs only implemented on unix systems. cores is set to 1")
cores <- 1
}
if(inherits(response,"character")){
response <- factor(response)
if(metric=="RMSE"){
metric <- "Accuracy"
maximize <- TRUE
}
}
if (trControl$method=="LOOCV" & withinSE){
warning("withinSE is set to FALSE as no SE can be calculated using method LOOCV")
withinSE <- FALSE
}
if(globalval & withinSE){
warning("withinSE is set to FALSE as no SE can be calculated using global validation")
withinSE <- FALSE
}
## Define helper functions ---------------
se <- function(x){sd(x, na.rm = TRUE)/sqrt(length(na.exclude(x)))}
evalfunc = ifelse(maximize,
function(x){max(x,na.rm=TRUE)},
function(x){min(x,na.rm=TRUE)})
isBetter <- function (actmodelperf,bestmodelperf,
bestmodelperfSE=NULL,
maximization=FALSE,
withinSE=FALSE){
if(withinSE){
result <- ifelse (!maximization, actmodelperf < bestmodelperf-bestmodelperfSE,
actmodelperf > bestmodelperf+bestmodelperfSE)
}else{
result <- ifelse (!maximization, actmodelperf < bestmodelperf,
actmodelperf > bestmodelperf)
}
return(result)
}
## Initialize Variables --------------------------
trControl$returnResamp <- "final"
trControl$savePredictions <- "final"
n <- length(names(predictors))
acc <- 0
perf_all <- data.frame(matrix(ncol=length(predictors)+3,
nrow=choose(n, minVar)+(n-minVar)*(n-minVar+1)/2))
names(perf_all) <- c(paste0("var",1:length(predictors)),metric,"SE","nvar")
minGrid <- t(data.frame(combn(names(predictors),minVar)))
# Computation -----------------------------------------------
## Step 1: Search best initial variables -----
## parallel ----------
if(cores > 1){
initial_models = parallel::mclapply(X = 1:nrow(minGrid), mc.cores = cores, FUN = function(i){
set.seed(seed)
#adaptations for pls:
tuneGrid_orig <- tuneGrid
tuneLength_orig <- tuneLength
if(method=="pls"&!is.null(tuneGrid)&any(tuneGrid$ncomp>minVar)){
tuneGrid <- data.frame(ncomp=tuneGrid[tuneGrid$ncomp<=minVar,])
if(verbose){
print(paste0("note: maximum ncomp is ", minVar))
}
}
#adaptations for tuning of ranger:
if(method=="ranger"&!is.null(tuneGrid)&any(tuneGrid$mtry>minVar)){
tuneGrid$mtry <- minVar
if(verbose){
print("invalid value for mtry. Reset to valid range.")
}
}
# adaptations for RF and minVar == 1 - tuneLength must be 1, only one mtry possible
if(minVar==1 & method%in%c("ranger", "rf") & is.null(tuneGrid)){
tuneLength <- minVar
}
#train model:
model <- caret::train(predictors[minGrid[i,]],
response,
method=method,
metric=metric,
trControl=trControl,
tuneLength = tuneLength,
tuneGrid = tuneGrid)
#...)
tuneGrid <- tuneGrid_orig
tuneLength <- tuneLength_orig
if (globalval){
perf_stats <- global_validation(model)[names(global_validation(model))==metric]
}else{
perf_stats <- model$results[,names(model$results)==metric]
}
result = as.data.frame(t(minGrid[i,]))
result$actmodelperf <- evalfunc(perf_stats)
result$actmodelperfSE <- se(
sapply(unique(model$resample$Resample),
FUN=function(x){mean(model$resample[model$resample$Resample==x,
metric],na.rm=TRUE)}))
return(result)
})
initial_models = do.call(rbind, initial_models)
## save best model from initial models
best_rowindex = ifelse(maximize, which.max(initial_models$actmodelperf), which.min(initial_models$actmodelperf))
bestmodelperf <- initial_models$actmodelperf[best_rowindex]
bestmodelperfSE <- initial_models$actmodelperfSE[best_rowindex]
best_predictors <- as.character(initial_models[best_rowindex, 1:minVar])
# best minVar model has to be retrained
#
#
bestmodel <- caret::train(predictors[,best_predictors],
response,
method=method,
metric=metric,
trControl=trControl,
tuneLength = tuneLength,
tuneGrid = tuneGrid)
#...)
acc = nrow(minGrid)
# patching perf_all
perf_all[1:acc, 1:minVar] <- initial_models[,1:minVar]
perf_all[1:acc, (ncol(perf_all)-2):(ncol(perf_all)-1)] <- initial_models[,(ncol(initial_models)-1):ncol(initial_models)]
perf_all$nvar[1:nrow(minGrid)] <- minVar
}else{
## unparallel -------------
for (i in 1:nrow(minGrid)){
if (verbose){
print(paste0("model using ",paste0(minGrid[i,],collapse=","), " will be trained now..." ))
}
set.seed(seed)
#adaptations for pls:
tuneGrid_orig <- tuneGrid
tuneLength_orig <- tuneLength
if(method=="pls"&!is.null(tuneGrid)&any(tuneGrid$ncomp>minVar)){
tuneGrid <- data.frame(ncomp=tuneGrid[tuneGrid$ncomp<=minVar,])
if(verbose){
print(paste0("note: maximum ncomp is ", minVar))
}
}
#adaptations for tuning of ranger:
if(method=="ranger"&!is.null(tuneGrid)&any(tuneGrid$mtry>minVar)){
tuneGrid$mtry <- minVar
if(verbose){
print("invalid value for mtry. Reset to valid range.")
}
}
# adaptations for RF and minVar == 1 - tuneLength must be 1, only one mtry possible
if(minVar==1 & method%in%c("ranger", "rf") & is.null(tuneGrid)){
tuneLength <- minVar
}
#train model:
model <- caret::train(predictors[minGrid[i,]],
response,
method=method,
metric=metric,
trControl=trControl,
tuneLength = tuneLength,
tuneGrid = tuneGrid,
...)
tuneGrid <- tuneGrid_orig
tuneLength <- tuneLength_orig
### compare the model with the currently best model
if (globalval){
perf_stats <- global_validation(model)[names(global_validation(model))==metric]
}else{
perf_stats <- model$results[,names(model$results)==metric]
}
actmodelperf <- evalfunc(perf_stats)
actmodelperfSE <- se(
sapply(unique(model$resample$Resample),
FUN=function(x){mean(model$resample[model$resample$Resample==x,
metric],na.rm=TRUE)}))
if (i == 1){
bestmodelperf <- actmodelperf
bestmodelperfSE <- actmodelperfSE
bestmodel <- model
} else{
if (isBetter(actmodelperf,bestmodelperf,maximization=maximize,withinSE=FALSE)){
bestmodelperf <- actmodelperf
bestmodelperfSE <- actmodelperfSE
bestmodel <- model
}
}
acc <- acc+1
variablenames <- names(model$trainingData)[-length(names(model$trainingData))]
perf_all[acc,1:length(variablenames)] <- variablenames
perf_all[acc,(length(predictors)+1):ncol(perf_all)] <- c(actmodelperf,actmodelperfSE,length(variablenames))
if(verbose){
print(paste0("maximum number of models that still need to be trained: ",
round(choose(n, minVar)+(n-minVar)*(n-minVar+1)/2-acc,0)))
}
}
}
## both --------
selectedvars <- names(bestmodel$trainingData)[-which(
names(bestmodel$trainingData)==".outcome")]
if (globalval){
selectedvars_perf <- global_validation(bestmodel)[names(global_validation(bestmodel))==metric]
}else{
if (maximize){
selectedvars_perf <-max(bestmodel$results[,metric])
}else{
selectedvars_perf <- min(bestmodel$results[,metric])
}
}
selectedvars_SE <- bestmodelperfSE
if(verbose){
print(paste0(paste0("vars selected: ",paste(selectedvars, collapse = ',')),
" with ",metric," ",round(selectedvars_perf,3)))
}
## Step 2: Append more variables ------
# increase the number of predictors by one (try all combinations)
# and test if model performance increases
# k: amount of "additional variables" left after initial search
# for each k: search best additional predictor
## parallel -----
if(cores > 1){
for(k in 1:(length(names(predictors))-minVar)){
startvars <- names(bestmodel$trainingData)[-which(
names(bestmodel$trainingData)==".outcome")]
nextvars <- names(predictors)[-which(
names(predictors)%in%startvars)]
if(verbose){
print(paste0("Searching for additional variable ", minVar + k, " now. ",
length(nextvars), " potential predictors are available:"))
print(nextvars)
}
# search best additional variable in parallel
next_models <- parallel::mclapply(1:length(nextvars), mc.cores = cores, FUN = function(i){
set.seed(seed)
#adaptation for pls:
tuneGrid_orig <- tuneGrid
if(method=="pls"&!is.null(tuneGrid)&any(tuneGrid$ncomp>ncol(predictors[,c(startvars,nextvars[i])]))){
tuneGrid<- data.frame(ncomp=tuneGrid[tuneGrid$ncomp<=ncol(predictors[,c(startvars,nextvars[i])]),])
if(verbose){
print(paste0("note: maximum ncomp is ", ncol(predictors[,c(startvars,nextvars[i])])))
}}
#adaptation for ranger:
if(method=="ranger"&!is.null(tuneGrid)&any(tuneGrid$mtry>ncol(predictors[,c(startvars,nextvars[i])]))){
tuneGrid$mtry[tuneGrid$mtry>ncol(predictors[,c(startvars,nextvars[i])])] <- ncol(predictors[,c(startvars,nextvars[i])])
if(verbose){
print("invalid value for mtry. Reset to valid range.")
}
}
model <- caret::train(predictors[,c(startvars,nextvars[i])],
response,
method = method,
metric=metric,
trControl = trControl,
tuneLength = tuneLength,
tuneGrid = tuneGrid,
...)
tuneGrid <- tuneGrid_orig
if (globalval){
perf_stats <- global_validation(model)[names(global_validation(model))==metric]
}else{
perf_stats <- model$results[,names(model$results)==metric]
}
startvars
result = as.data.frame(t(startvars))
result$nextvar = nextvars[i]
result$actmodelperf <- evalfunc(perf_stats)
result$actmodelperfSE <- se(
sapply(unique(model$resample$Resample),
FUN=function(x){mean(model$resample[model$resample$Resample==x,
metric],na.rm=TRUE)}))
return(result)
})
next_models = do.call(rbind, next_models)
## best next_model
best_next_rowindex = ifelse(maximize,
which.max(next_models[,(ncol(next_models)-1)]),
which.min(next_models[,(ncol(next_models)-1)]))
better = isBetter(actmodelperf = next_models$actmodelperf[best_next_rowindex],
bestmodelperf = bestmodelperf,
bestmodelperfSE = bestmodelperfSE,
maximization = maximize, withinSE = withinSE)
# patching perf_all
perf_all[(acc+1):(acc+length(nextvars)), 1:(minVar+k)] <- next_models[,1:(minVar+k)]
perf_all[(acc+1):(acc+length(nextvars)), (ncol(perf_all)-2):(ncol(perf_all)-1)] <- next_models[,(ncol(next_models)-1):ncol(next_models)]
perf_all$nvar[(acc+1):(acc+length(nextvars))] <- minVar+k
if(better){
# update best model stats
bestmodelperf = next_models$actmodelperf[best_next_rowindex]
bestmodelperfSE = next_models$actmodelperfSE[best_next_rowindex]
best_predictors = as.character(next_models[best_next_rowindex, 1:(minVar+k)])
selectedvars_perf = c(selectedvars_perf, bestmodelperf)
selectedvars_SE = c(selectedvars_SE, bestmodelperfSE)
bestmodel <- caret::train(predictors[,best_predictors],
response,
method=method,
metric=metric,
trControl=trControl,
tuneLength = tuneLength,
tuneGrid = tuneGrid,
...)
acc = acc+nrow(next_models)
}else{
# not better: return model and stats
message(paste0("Note: No increase in performance found using more than ",
length(startvars), " variables"))
bestmodel$selectedvars <- best_predictors
bestmodel$selectedvars_perf <- selectedvars_perf
bestmodel$selectedvars_perf_SE <- selectedvars_SE
bestmodel$perf_all <- perf_all
bestmodel$perf_all <- bestmodel$perf_all[!apply(is.na(bestmodel$perf_all), 1, all),]
bestmodel$perf_all <- bestmodel$perf_all[colSums(!is.na(bestmodel$perf_all)) > 0]
bestmodel$minVar <- minVar
bestmodel$type <- "ffs"
class(bestmodel) <- c("ffs", "train")
return(bestmodel)
}
}# end of k loop
}else{
## unparallel -----
for (k in 1:(length(names(predictors))-minVar)){
startvars <- names(bestmodel$trainingData)[-which(
names(bestmodel$trainingData)==".outcome")]
nextvars <- names(predictors)[-which(
names(predictors)%in%startvars)]
if (length(startvars)<(k+(minVar-1))){
message(paste0("Note: No increase in performance found using more than ",
length(startvars), " variables"))
bestmodel$selectedvars <- selectedvars
bestmodel$selectedvars_perf <- selectedvars_perf[-length(selectedvars_perf)]
bestmodel$selectedvars_perf_SE <- selectedvars_SE[-length(selectedvars_SE)] #!!!
bestmodel$perf_all <- perf_all
bestmodel$perf_all <- bestmodel$perf_all[!apply(is.na(bestmodel$perf_all), 1, all),]
bestmodel$perf_all <- bestmodel$perf_all[colSums(!is.na(bestmodel$perf_all)) > 0]
bestmodel$minVar <- minVar
bestmodel$type <- "ffs"
class(bestmodel) <- c("ffs", "train")
return(bestmodel)
break()
}
for (i in 1:length(nextvars)){
if(verbose){
print(paste0("model using additional variable ",nextvars[i], " will be trained now..." ))
}
set.seed(seed)
#adaptation for pls:
tuneGrid_orig <- tuneGrid
if(method=="pls"&!is.null(tuneGrid)&any(tuneGrid$ncomp>ncol(predictors[,c(startvars,nextvars[i])]))){
tuneGrid<- data.frame(ncomp=tuneGrid[tuneGrid$ncomp<=ncol(predictors[,c(startvars,nextvars[i])]),])
if(verbose){
print(paste0("note: maximum ncomp is ", ncol(predictors[,c(startvars,nextvars[i])])))
}}
#adaptation for ranger:
if(method=="ranger"&!is.null(tuneGrid)&any(tuneGrid$mtry>ncol(predictors[,c(startvars,nextvars[i])]))){
tuneGrid$mtry[tuneGrid$mtry>ncol(predictors[,c(startvars,nextvars[i])])] <- ncol(predictors[,c(startvars,nextvars[i])])
if(verbose){
print("invalid value for mtry. Reset to valid range.")
}
}
model <- caret::train(predictors[,c(startvars,nextvars[i])],
response,
method = method,
metric=metric,
trControl = trControl,
tuneLength = tuneLength,
tuneGrid = tuneGrid,
...)
tuneGrid <- tuneGrid_orig
if (globalval){
perf_stats <- global_validation(model)[names(global_validation(model))==metric]
}else{
perf_stats <- model$results[,names(model$results)==metric]
}
actmodelperf <- evalfunc(perf_stats)
actmodelperfSE <- se(
sapply(unique(model$resample$Resample),
FUN=function(x){mean(model$resample[model$resample$Resample==x,
metric],na.rm=TRUE)}))
if(isBetter(actmodelperf,bestmodelperf,
selectedvars_SE[length(selectedvars_SE)], #SE from model with nvar-1
maximization=maximize,withinSE=withinSE)){
bestmodelperf <- actmodelperf
bestmodelperfSE <- actmodelperfSE
bestmodel <- model
}
acc <- acc+1
variablenames <- names(model$trainingData)[-length(names(model$trainingData))]
perf_all[acc,1:length(variablenames)] <- variablenames
perf_all[acc,(length(predictors)+1):ncol(
perf_all)] <- c(actmodelperf,actmodelperfSE,length(variablenames))
if(verbose){
print(paste0("maximum number of models that still need to be trained: ",
round(choose(n, minVar)+(n-minVar)*(n-minVar+1)/2-acc,0)))
}
}
selectedvars <- c(selectedvars,names(bestmodel$trainingData)[-which(
names(bestmodel$trainingData)%in%c(".outcome",selectedvars))])
selectedvars_SE <- c(selectedvars_SE,bestmodelperfSE)
if (maximize){
if(globalval){
selectedvars_perf <- c(selectedvars_perf,global_validation(bestmodel)[names(global_validation(bestmodel))==metric])
}else{
selectedvars_perf <- c(selectedvars_perf,max(bestmodel$results[,metric]))
}
}
if (!maximize){
if(globalval){
selectedvars_perf <- c(selectedvars_perf,global_validation(bestmodel)[names(global_validation(bestmodel))==metric])
}else{
selectedvars_perf <- c(selectedvars_perf,min(bestmodel$results[,metric]))
}
}
if(verbose){
print(paste0(paste0("vars selected: ",paste(selectedvars, collapse = ',')),
" with ",metric," ",round(selectedvars_perf[length(selectedvars_perf)],3)))
}
}
}
## return best model --------
bestmodel$selectedvars <- selectedvars
bestmodel$selectedvars_perf <- selectedvars_perf
bestmodel$selectedvars_perf_SE <- selectedvars_SE
if(globalval){
bestmodel$selectedvars_perf_SE <- NA
}
bestmodel$perf_all <- perf_all
bestmodel$perf_all <- bestmodel$perf_all[!apply(is.na(bestmodel$perf_all), 1, all),]
bestmodel$minVar <- minVar
bestmodel$type <- "ffs"
bestmodel$perf_all <- bestmodel$perf_all[colSums(!is.na(bestmodel$perf_all)) > 0]
class(bestmodel) <- c("ffs", "train")
return(bestmodel)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.