R/rf_thvs.R

# #random forest with cutoff as additional tuning parameter
# #author: Max Kuhn on http://caret.r-forge.r-project.org/custom_models.html
# 
# ## Get the model code for the original random forest method:
# 
# rf_thres <- getModelInfo("rf", regex = FALSE)[[1]]
# rf_thres$type <- c("Classification")
# ## Add the threshold as another tuning parameter
# rf_thres$parameters <- data.frame(parameter = c("mtry", "threshold"),
#                                   class = c("numeric", "numeric"),
#                                   label = c("#Randomly Selected Predictors",
#                                             "Probability Cutoff"))
# ## The default tuning grid code:
# rf_thres$grid <- function(x, y, len = NULL) {
#   p <- ncol(x)
#   expand.grid(mtry = floor(sqrt(p)),
#               threshold = seq(.01, .99, length = len))
# }
# 
# ## Here we fit a single random forest model (with a fixed mtry)
# ## and loop over the threshold values to get predictions from the same
# ## randomForest model.
# rf_thres$loop = function(grid) {
#   library(plyr)
#   loop <- ddply(grid, c("mtry"),
#                 function(x) c(threshold = max(x$threshold)))
#   submodels <- vector(mode = "list", length = nrow(loop))
#   for(i in seq(along = loop$threshold)) {
#     index <- which(grid$mtry == loop$mtry[i])
#     cuts <- grid[index, "threshold"]
#     submodels[[i]] <- data.frame(threshold = cuts[cuts != loop$threshold[i]])
#   }
#   list(loop = loop, submodels = submodels)
# }
# 
# ## Fit the model independent of the threshold parameter
# rf_thres$fit = function(x, y, wts, param, lev, last, classProbs, ...) {
#   if(length(levels(y)) != 2)
#     stop("This works only for 2-class problems")
#   randomForest(x, y, mtry = param$mtry, ...)
# }
# 
# ## Now get a probability prediction and use different thresholds to
# ## get the predicted class
# rf_thres$predict = function(modelFit, newdata, submodels = NULL) {
#   class1Prob <- predict(modelFit,
#                         newdata,
#                         type = "prob")[, modelFit$obsLevels[1]]
#   
#   ## Raise the threshold for class #1 and a higher level of
#   ## evidence is needed to call it class 1 so it should 
#   ## decrease sensitivity and increase specificity
#   out <- ifelse(class1Prob >= modelFit$tuneValue$threshold,
#                 modelFit$obsLevels[1],
#                 modelFit$obsLevels[2])
#   if(!is.null(submodels))
#   {
#     tmp2 <- out
#     out <- vector(mode = "list", length = length(submodels$threshold))
#     out[[1]] <- tmp2
#     for(i in seq(along = submodels$threshold)) {
#       out[[i+1]] <- ifelse(class1Prob >= submodels$threshold[[i]],
#                            modelFit$obsLevels[1],
#                            modelFit$obsLevels[2])
#     }
#   }
#   out
# }
# 
# ## The probabilities are always the same but we have to create
# ## mulitple versions of the probs to evaluate the data across
# ## thresholds
# rf_thres$prob = function(modelFit, newdata, submodels = NULL) {
#   out <- as.data.frame(predict(modelFit, newdata, type = "prob"))
#   if(!is.null(submodels))
#   {
#     probs <- out
#     out <- vector(mode = "list", length = length(submodels$threshold)+1)
#     out <- lapply(out, function(x) probs)
#   }
#   out
# }
environmentalinformatics-marburg/gpm documentation built on July 11, 2020, 11:12 a.m.