# #random forest with cutoff as additional tuning parameter
# #author: Max Kuhn on http://caret.r-forge.r-project.org/custom_models.html
#
# ## Get the model code for the original random forest method:
#
# rf_thres <- getModelInfo("rf", regex = FALSE)[[1]]
# rf_thres$type <- c("Classification")
# ## Add the threshold as another tuning parameter
# rf_thres$parameters <- data.frame(parameter = c("mtry", "threshold"),
# class = c("numeric", "numeric"),
# label = c("#Randomly Selected Predictors",
# "Probability Cutoff"))
# ## The default tuning grid code:
# rf_thres$grid <- function(x, y, len = NULL) {
# p <- ncol(x)
# expand.grid(mtry = floor(sqrt(p)),
# threshold = seq(.01, .99, length = len))
# }
#
# ## Here we fit a single random forest model (with a fixed mtry)
# ## and loop over the threshold values to get predictions from the same
# ## randomForest model.
# rf_thres$loop = function(grid) {
# library(plyr)
# loop <- ddply(grid, c("mtry"),
# function(x) c(threshold = max(x$threshold)))
# submodels <- vector(mode = "list", length = nrow(loop))
# for(i in seq(along = loop$threshold)) {
# index <- which(grid$mtry == loop$mtry[i])
# cuts <- grid[index, "threshold"]
# submodels[[i]] <- data.frame(threshold = cuts[cuts != loop$threshold[i]])
# }
# list(loop = loop, submodels = submodels)
# }
#
# ## Fit the model independent of the threshold parameter
# rf_thres$fit = function(x, y, wts, param, lev, last, classProbs, ...) {
# if(length(levels(y)) != 2)
# stop("This works only for 2-class problems")
# randomForest(x, y, mtry = param$mtry, ...)
# }
#
# ## Now get a probability prediction and use different thresholds to
# ## get the predicted class
# rf_thres$predict = function(modelFit, newdata, submodels = NULL) {
# class1Prob <- predict(modelFit,
# newdata,
# type = "prob")[, modelFit$obsLevels[1]]
#
# ## Raise the threshold for class #1 and a higher level of
# ## evidence is needed to call it class 1 so it should
# ## decrease sensitivity and increase specificity
# out <- ifelse(class1Prob >= modelFit$tuneValue$threshold,
# modelFit$obsLevels[1],
# modelFit$obsLevels[2])
# if(!is.null(submodels))
# {
# tmp2 <- out
# out <- vector(mode = "list", length = length(submodels$threshold))
# out[[1]] <- tmp2
# for(i in seq(along = submodels$threshold)) {
# out[[i+1]] <- ifelse(class1Prob >= submodels$threshold[[i]],
# modelFit$obsLevels[1],
# modelFit$obsLevels[2])
# }
# }
# out
# }
#
# ## The probabilities are always the same but we have to create
# ## mulitple versions of the probs to evaluate the data across
# ## thresholds
# rf_thres$prob = function(modelFit, newdata, submodels = NULL) {
# out <- as.data.frame(predict(modelFit, newdata, type = "prob"))
# if(!is.null(submodels))
# {
# probs <- out
# out <- vector(mode = "list", length = length(submodels$threshold)+1)
# out <- lapply(out, function(x) probs)
# }
# out
# }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.