# imports used in most of the functions
#' @import data.table
#' @import ggplot2
#' @importFrom foreach foreach %do% %dopar% %:% getDoParWorkers
#' @importFrom stats predict na.omit
#' @importFrom utils globalVariables setTxtProgressBar txtProgressBar
NULL
#' Combine objects
#'
#' You can use \code{function(model.list)} or \code{c(...)}
#' to combine multiple objects generated by functions from this package.
#'
#' The output \code{function(list(model1, model2))} is the same as \code{c(function(model1), function(model2))}.
#' But note that you can only concatenate objects obtained from models with different methods (e.g. 'rf','gbm', ...).
#' \cr\cr
#' In contrast, using \code{function(list(model1, model2))} will append '.1', '.2', etc
#' if a method is repeated in the models.
#' Optionally, you can furnish a named list, and the names will override the labels of the methods.
#' @param x,model A list of models created by \code{\link[caret]{train}}.
#' @param ... For \code{function(model.list, ...)}, further arguments passed to the function.
#' For \code{c(...)}, multiple objects returned by the same function.
#' @name combine
#' @rdname combine
NULL
globalVariables(c(".", ".SD", "method", "data", "metric", "variable", "value",
"error", "Resample", "thr", "coef"))
maxentThr <- function(x) {
return(switch(x,
"Min_Presence" = "Minimum training presence",
"10%_Presence" = "10 percentile training presence",
"Sens=Spec" = "Equal training sensitivity and specificity",
"MaxSens+Spec" = "Maximum training sensitivity plus specificity",
"Balance" = "Balance training omission, predicted area and threshold value",
"Entropy" = "Equate entropy of thresholded and original distributions"))
}
get.response <- function(model, newdata, newy) {
pred.name <- "testy"
if (is.null(newy)) {
if (inherits(model, "train.formula")) {
pred.name <- all.vars(model$terms)[attr(model$terms, "response")]
} else {
stop("Either provide 'testy' or train using a formula")
}
newy <- newdata[[pred.name]]
}
if (!all(levels(newy) %in% model$levels))
stop(pred.name, " must have levels ", paste(model$levels, collapse = " and "))
return(newy)
}
createIndex <- function(y, method, number, repeats, p) {
return(switch(tolower(method),
cv = caret::createFolds(y, number, returnTrain = TRUE),
repeatedcv =, adaptive_cv = caret::createMultiFolds(y, number, repeats),
boot =, adaptive_boot = caret::createResample(y, number),
lgocv =, adaptive_lgocv = caret::createDataPartition(y, number, p),
none = list(all = seq(along = y),
stop("Value for 'method' must be one of cv, repeatedcv, boot, lgocv or none.
Perhaps you forgot to set 'testindex' to evaluate?"))
))
}
#' Functions to calculate confidence intervals
#'
#' \code{ci_95} calculates 95\% confidence intervals and \code{se} calculates stardard error.
#' @param x Numeric vector to calculate confidence intervals
#' @param alpha Probability used for the Student t Distribution
#' (\code{alpha = 0.05} is equivilant 95\% CI)
#' @param na.rm a logical value indicating whether NA values should be stripped.
#' @rdname CI
#' @export
ci_95 <- function(x, alpha = 0.05, na.rm = FALSE) {
if (na.rm) x <- x[!is.na(x)]
n <- length(x)
out <- if (n > 1) stats::sd(x) / sqrt(n) * stats::qt(alpha / 2, n - 1, lower.tail = FALSE) else 0
return(out)
}
#' @rdname CI
#' @export
se <- function(x, na.rm = FALSE) {
if (na.rm) x <- x[!is.na(x)]
return(stats::sd(x) / sqrt(length(x)))
}
# check models in list, and return a error if one of those checks is false
check_list <- function(model.list) {
# check if all objects are a train
if (!all(sapply(model.list, inherits, "train")))
stop("All elements in the list must be objects returned by caret::train.")
# check if all models are of the same problem type
modelType <- sapply(model.list, `[[`, "modelType")
if (!all(modelType == modelType[1]))
stop("All models should be of the same type (classification or regression).")
# Check if all models have the same coefficients
coefs <- lapply(model.list, `[[`, "coefnames")
coefs_check <- all(sapply(coefs, function(x, y) all(x %in% y), y = coefs[[1]]))
if (!coefs_check) stop("The first model should have all coeficients used in following models")
return(invisible(NULL))
}
# return models with unique names
check_names <- function(model.list) {
if (is.null(names(model.list))) {
m.names <- sapply(model.list, function(x) x$modelInfo$label)
} else {
m.names <- names(model.list)
}
m.names <- make.unique(m.names)
return(mapply(setname, model = model.list, name = m.names, SIMPLIFY = FALSE))
}
setname <- function(model, name) {
model$modelInfo$label <- name
return(model)
}
# check models before concatanating
check_c <- function(model.list, className) {
if (!all(sapply(model.list, inherits, what = className)))
stop("All objects must be an output of '", className, "'.")
methods <- unlist(lapply(model.list, function(x) levels(x[[1]]$method)))
if (any(duplicated(methods)))
stop("Models should have different methods.")
return(invisible(NULL))
}
check_train <- function(model) {
if (!model$control$returnData)
stop("returnData should be TRUE in the trControl.")
return(invisible(NULL))
}
check_list_ensemble <- function(model.list) {
if (!all(sapply(model.list, function(x) x$control$returnData)))
stop("returnData should be TRUE in the trControl for all models.")
if (model.list[[1]]$modelType == "Classification") { # assumed that passed check_list
check_lvl <- sapply(model.list, function(x) length(x$levels))
if (any(check_lvl > 2))
warning("Multi class problem detected, confidence map will be based on the probabilities
of the first class only.")
}
return(invisible(NULL))
}
getcoefs <- function(model, strip = c(".outcome", ".weights")) {
cnames <- colnames(model$trainingData)
return(cnames[!(cnames %in% strip)])
}
create_args <- function(model) {
control <- model$control
control$method <- "none"
control$index <- NULL
control$indexOut <- NULL
control$savePredictions <- FALSE # to use a little less memory
# make a list of arguments
args <- c(list(.outcome ~ .,
preProcess = model$call$preProcess,
metric = model$metric,
trControl = control,
tuneGrid = model$bestTune),
model$dots)
args$method <- if (model$method == "custom") model$modelInfo else model$method
return(args)
}
raster2data <- function(raster, model) {
r <- raster::as.data.frame(raster, na.rm = FALSE)
r <- stats::na.omit(r)
colnames(r) <- sub("_VALUE$", "", colnames(r))
# converto to factor if avaiable
varfactor <- names(model$xlevels)
if (length(varfactor) > 0) {
for (f in varfactor) {
r[, f] <- as.factor(r[, f])
}
}
return(r)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.