#' Extract cross-validated important variables
#'
#' @description
#' This function approaches the identification of important variables from
#' \code{\link[caret]{rfe}} more conservatively than \strong{caret}. It uses
#' the standard deviation (or standard error) of the cross-validated error
#' metric to identify important variables.
#'
#' @param model a rfe model. See \code{\link[caret]{rfe}}
#' @param metric the metric to be used. Note this needs to be the metric used
#' to calculate the \code{\link[caret]{rfe}} model
#' @param maximize logical: Is a higher value of the metric favourable
#' (e.g metric = Rsquared) or not (e.g metric = RMSE). maximize=TRUE is
#' determined automatically as long as metric is either Rsquared, ROC, Accuracy.
#' maximize =FALSE is used for all other metrics. Set this manually if you
#' use an other metric where higher values are favourable.
#' @param sderror If TRUE then standard error is calculated. If FALSE then
#' standard deviations are used
#'
#' @return
#' a character vector of the variable names
#'
#' @examples
#' # Not run
trainModelVarSelSD <- function (model, metric = model$metric, maximize=FALSE,
sderror=TRUE){
if (metric=="Rsquared"||metric=="ROC"||metric=="Accuracy"){
maximize=TRUE
} else {
maximize =FALSE
}
data <- as.data.frame(model$resample)
sdv <- c()
means <- c()
for (i in unique(data$Variables)) {
if(!sderror){
sdv <- c(sdv,
sd(eval(parse(text = paste("data$",
metric)))[data$Variables == i]))
}
if(sderror){
sdv <- c(sdv,helpStandardError(eval(parse(text = paste("data$",
metric)))[data$Variables == i]))
}
means <- c(means,
mean(eval(parse(text=paste("data$",
metric)))[data$Variables==i]))
}
if (maximize) {
upr <- means - sdv
} else {
upr <- means + sdv
}
#start_var <- min(unique(data$Variables))
#start_offset <- abs(start_var - 1)
# print(means)
# print(upr)
# print(upr[model$bestSubset - start_offset])
# print(means < upr[model$bestSubset - start_offset])
if (maximize){
n_vars <- unique(data$Variables)[which(means >
upr[which(unique(data$Variables)==
model$bestSubset)])[1]]
} else{
n_vars <- unique(data$Variables)[which(means <
upr[which(unique(data$Variables)==
model$bestSubset)])[1]]
}
subset <- model$variables[model$variables$Variables==n_vars,]
uniqueVars <- unique(subset$var)
bestVar<-c()
for (i in 1:length(uniqueVars)){
bestVar[i]=sum(subset$Overall[subset$var==uniqueVars[i]])/10
}
names(bestVar) <- uniqueVars
if(is.na(names(bestVar))){
bestVar<-model$optVariables
} else {
bestVar<-sort(bestVar,decreasing=TRUE)[1:n_vars]
}
#bestVar <- model$control$functions$selectVar(model$variables, n_vars)
#return(model$optVariables[1:n_vars])
return(bestVar)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.