utils::globalVariables(c("factors", "predictors", "s"))
#' @rdname response
#' @export
response <- function(model, ...) UseMethod("response")
#' Calculate response plots for train models
#'
#' Evaluation Strip method proposed by Elith et al.(2005),
#' applied to models created using \code{caret::train}.
#' Here we use the same process as described in \code{biomod2::response.plot2}.
#' @param model A model returned by \code{\link[caret]{train}}.
#' @param fixedvarFunction A function used to fix as constant the other variables when the predicting responses.
#' @param errorFunction A function used to calculate error across resamples. Default is 95\% confidence interval.
#' If \code{NULL}, responses are calculated from the final model only.
#' @param n Number of responses to get for each variable, as in seq(min(variable), max(variable), length.out=n).
#' @param progress logical. Show progress bar?
#' @return An S3 object of class 'response.train', including:
#' \itemize{
#' \item{num and fact}{Data.tables with responses for each variable of type numeric and factors.
#' If \code{errorFunction != NULL}, responses are means from resamples and errors are also provided.}
#' \item{num_resample and fact_resample}{Data.tables with responses for each resample,
#' only provided if \code{errorFunction != NULL}.}
#' \item{quantiles}{A matrix with quantiles for each variable used to plot rugs.}
#' }
#' @rdname response
#' @export
response.train <- function(model, fixedvarFunction = mean, errorFunction = ci_95, n = 100, progress = FALSE, ...) {
fixedDat <- response_table(model, fixedvarFunction, n)
out <- response_main(model, errorFunction, progress, fixedDat, ...)
return(out)
}
#' @export
response.ensemble.train <- function(model, fixedvarFunction = mean, n = 100, progress = FALSE, ...) {
fixedDat <- response_table(model, fixedvarFunction, n)
out <- response_main(model, NULL, progress, fixedDat, ...)
return(out)
}
#' @export
response.list <- function(model, ...) {
check_list(model)
model.list <- check_names(model)
# calculated fixedData for predictions based on the training data of the first model
fixedDat <- response_table(model[[1]], ...)
x <- lapply(model, response_main, fixedDat = fixedDat, ...)
return(c_response.train(x))
}
#' @param x An object returned by \code{response}.
#' @param plot_errorbar logical. Should plot errors? Only valid if \code{resample = TRUE}.
#' @param plot_rugs logical. Should plot rugs representing quantiles?
#' @param free_y logical. Free or fixed y axis in plots?
#' @param plot_thr logical. Should plot a line representing the probability threshold of the model?
#' A threshold must be set using \code{setThreshold} and its only plotted if there is only one model
#' (to avoid to much noise in the plot).
#' @param ... ignored
#' @rdname response
#' @export
plot.response.train <- function(x, plot_errorbar = TRUE, plot_rugs = TRUE, free_y = TRUE, plot_thr = TRUE, ...) {
fig <- list()
plot.error <- diff(range(x$num$error)) > 0 && plot_errorbar
if (!free_y) {
if (plot_errorbar) {
lims <- c(
min(c(x$fact$response - x$fact$error, x$num$response - x$num$error)),
max(c(x$fact$response + x$fact$error, x$num$response + x$num$error))
)
} else {
lims <- range(c(x$fact$response, x$num$response))
}
}
for (coef in x$coefs) {
if (coef %in% names(x$xlevels)) {
# plot bar plot to factor
tmp <- x$fact[variable == coef]
figtmp <- ggplot(tmp, aes(x = factors, y = response, fill = method)) +
geom_bar(position = position_dodge(), stat = "identity") +
scale_fill_brewer(palette = "Set1")
if (free_y) {
figtmp <- figtmp + scale_y_continuous(expand = expansion(mult = c(0, 0.05)))
} else {
figtmp <- figtmp + scale_y_continuous(limits = lims, expand = expansion(mult = c(0, 0.05)))
}
if (plot.error) {
figtmp <- figtmp + geom_errorbar(aes(ymin = response - error, ymax = response + error),
position = position_dodge(.9), width = 0.25)
}
} else {
# plot line to numeric
tmp <- x$num[variable == coef]
figtmp <- ggplot(tmp, aes(x = predictors, y = response, color = method)) +
geom_line() + scale_color_brewer(palette = "Set1") +
scale_x_continuous(expand = expansion(mult = c(0.025, 0.025)))
if (plot.error) {
figtmp <- figtmp + geom_ribbon(aes(ymin = response - error, ymax = response + error, fill = method),
alpha = 0.2, color = NA) +
scale_fill_brewer(palette = "Set1")
}
if (!free_y) {
figtmp <- figtmp + scale_y_continuous(limits = lims)
}
if (plot_rugs) {
quantiles <- data.frame(q = x$quantiles[, coef])
figtmp <- figtmp + geom_rug(data = quantiles, aes(x = q), sides = "b", inherit.aes = FALSE)
}
}
if (!is.null(x$thr) && plot_thr) {
figtmp <- figtmp + geom_hline(yintercept = x$thr, linetype = "dashed")
}
fig[[coef]] <- figtmp + ggtitle(coef) + theme_bw() +
theme(plot.title = element_text(hjust = 0.5, size = 9, face = "bold"),
axis.title = element_blank())
}
methods <- unique(levels(x$num$method), levels(x$fact$method))
if (length(methods) > 1) {
figs <- ggpubr::ggarrange(plotlist = fig, common.legend = TRUE, legend = "bottom")
} else {
figs <- ggpubr::ggarrange(plotlist = fig, legend = "none")
}
print(figs)
return(invisible(figs))
}
#' @rdname combine
#' @export
c.response.train <- function(...) {
x <- list(...)
check_c(x, "response.train")
cnames <- lapply(x, `[[`, "coefs")
if (!all(sapply(cnames, FUN = identical, y = cnames[[1]])))
stop("All models must use same coeficients for train data.")
return(c_response.train(x))
}
c_response.train <- function(x) {
m <- list()
m$num <- rbindlist(lapply(x, `[[`, "num"))
m$fact <- rbindlist(lapply(x, `[[`, "fact"))
m$num_resample <- rbindlist(lapply(x, `[[`, "num_resample"))
m$fact_resample <- rbindlist(lapply(x, `[[`, "fact_resample"))
m$coefs <- x[[1]][["coefs"]]
m$xlevels <- x[[1]][["xlevels"]]
m$thr <- NULL
m$modelType <- x[[1]][["modelType"]]
m$quantiles <- x[[1]][["quantiles"]]
class(m) <- "response.train"
return(m)
}
#' @export
print.response.train <- function(x, ...) {
cat("Object of type response.train\n")
cat("Models avaiable:", levels(x$num$method), "\n")
cat("Coeficients avaiable:", x$coefs, "\n\n")
if (nrow(x$num) > 0) print(x$num, nrows = 20)
if (nrow(x$fact) > 0) print(x$fact, nrows = 20)
}
###################
# helper function
response.helper <- function(model, fixedDat, coefs, model.type) {
varnumeric <- fixedDat[[1]]
varfactor <- fixedDat[[2]]
fixedDatan <- fixedDat[[3]]
fixedDataf <- fixedDat[[4]]
response_numeric <- fixedDat[[7]]
variablen <- fixedDat[[8]]
variablef <- fixedDat[[9]]
tmp2_u <- fixedDat[[10]]
out <- list()
# predict and save output
# for numeric
index <- variablen %in% coefs
if (length(varnumeric) > 0 && sum(index) > 0) {
pred <- predict2(model, fixedDatan[index, ], type = model.type)
out$num <- data.table(variable = factor(variablen[index], levels = varnumeric),
predictors = response_numeric[index],
response = pred)
} else {
out$num <- data.table(variable = factor(),
predictors = numeric(),
response = numeric())
}
# for factors
index <- variablef %in% coefs
if (length(varfactor) > 0 && sum(index) > 0) {
pred <- predict2(model, fixedDataf[index, ], type = model.type)
out$fact <- data.table(variable = factor(variablef[index], levels = varfactor),
factors = tmp2_u[index],
response = pred)
} else {
out$fact <- data.table(variable = factor(),
factors = character(),
response = numeric())
}
return(out)
}
response_table <- function(model, fixedvarFunction = mean, n = 100, ...) {
# create tables with fixed values for predictions
check_train(model)
coefs <- getcoefs(model)
model$trainingData <- as.data.frame(model$trainingData)
# calculate overall response
varfactor <- names(model$xlevels)
varnumeric <- coefs[!(coefs %in% varfactor)]
newdata <- model$trainingData[, coefs]
# calculate data.frames with variable fixed
tmp1 <- lapply(newdata[, varnumeric, drop = FALSE], fixedvarFunction, na.rm = TRUE)
tmp2 <- lapply(newdata[, varfactor, drop = FALSE], function(x) names(which.max(table(na.omit(x)))))
fixedData <- data.frame(Filter(function(x) length(x) > 0, list(tmp1, tmp2)))
# for numeric data
if (length(varnumeric) > 0) {
fixedDatan <- fixedData[rep(1, (n * length(varnumeric))), ]
variablen <- rep(varnumeric, each = n)
response_numeric <- vector("numeric", n * length(varnumeric))
for (var in varnumeric) {
tmp <- range(model$trainingData[, var])
response_numeric[which(variablen == var)] <- seq(tmp[1], tmp[2], length.out = n)
}
# calculate quantiles for rugs
quantiles <- apply(model$trainingData[, varnumeric], 2, stats::quantile, probs = seq(0.01, 0.99, 0.01))
# replace values in fixedData for a sequence for each variable
for (var in varnumeric) {
index <- which(variablen == var)
fixedDatan[index, var] <- response_numeric[index]
}
if (length(varfactor) > 0) {
for (var in varfactor) {
fixedDatan[, var] <- factor(fixedDatan[, var], levels = model$xlevels[[var]])
}
}
} else {
fixedDatan <- NULL
response_numeric <- NULL
quantiles <- NULL
variablen <- NULL
}
# for factor data
if (length(varfactor) > 0) {
tmp2_u <- lapply(newdata[, varfactor, drop = FALSE], unique)
tmp2_l <- sapply(tmp2_u, length)
fixedDataf <- fixedData[rep(1, sum(tmp2_l)), ]
variablef <- rep(varfactor, tmp2_l)
tmp2_u <- unlist(tmp2_u)
for (var in varfactor) {
index <- variablef == var
fixedDataf[index, var] <- as.character(tmp2_u[index])
fixedDataf[, var] <- factor(fixedDataf[, var], levels = model$xlevels[[var]])
}
} else {
fixedDataf <- NULL
variablef <- NULL
tmp2_u <- NULL
}
return(list(varnumeric, varfactor, fixedDatan, fixedDataf, quantiles, coefs,
response_numeric, variablen, variablef, tmp2_u))
}
response_main <- function(model, errorFunction = ci_95, progress = FALSE, fixedDat, ...) {
coefs <- getcoefs(model) #fixedDat[[6]]
coefs_obs <- c(coefs, ".outcome")
model.type <- if (model$modelType == "Classification") "prob1" else "raw"
model$trainingData <- as.data.frame(model$trainingData)
# calculate response
if (!is.null(errorFunction)) {
# get with seed to use
results <- model$results
results$order <- seq(nrow(results))
results <- merge(results, model$bestTune)
M <- results$order
# modify trainControl to remove resampling
args <- create_args(model)
# prepare progress bar
do.par <- model$control$allowParallel && getDoParWorkers() > 1
`%op%` <- if (do.par) `%dopar%` else `%do%`
opts <- NULL
doProgress <- progress && !do.par
if (progress) {
cat(model$modelInfo$label, "\n")
pb <- txtProgressBar(max = length(model$control$index), style = 3)
if (do.par) {
opts <- list(progress = function(n) setTxtProgressBar(pb, n))
} else {
counter <- 1
}
}
results <- foreach(i = model$control$index, s = model$control$seeds,
.packages = "caret", .options.snow = opts) %op% {
args$trControl$seeds <- s[M]
args$data <- model$trainingData[i, coefs_obs]
args$weights <- model$trainingData$.weights[i]
tmp.model <- invisible(do.call("train", args))
if (doProgress) {
setTxtProgressBar(pb, counter)
counter <- counter + 1
}
# create response
response.helper(tmp.model, fixedDat, coefs, model.type)
}
if (progress) {
close(pb)
cat("\n")
}
names(results) <- names(model$control$index)
num <- rbindlist(lapply(results, `[[`, "num"), idcol = "Resample")
fact <- rbindlist(lapply(results, `[[`, "fact"), idcol = "Resample")
num[, c("Resample", "method") := .(factor(Resample), factor(model$modelInfo$label))]
fact[, c("Resample", "method") := .(factor(Resample), factor(model$modelInfo$label))]
# calculate mean and error
out <- list()
out$num <- num[, .(response = mean(response), error = errorFunction(response)),
by = .(method, variable, predictors)]
out$fact <- fact[, .(response = mean(response), error = errorFunction(response)),
by = .(method, variable, factors)]
out$num_resample <- num
out$fact_resample <- fact
} else {
# only use the final model
out <- response.helper(model, fixedDat, model.type)
if (nrow(out$num) > 0) {
out$num <- out$num[, .(method = factor(model$modelInfo$label),
variable, predictors, response, error = 0)]
} else {
out$num <- data.table()
}
if (nrow(out$fact) > 0) {
out$fact <- out$fact[, .(method = factor(model$modelInfo$label),
variable, factors, response, error = 0)]
} else {
out$fact <- data.table()
}
out$num_resample <- data.table()
out$fact_resample <- data.table()
}
# add other variables to the object
out$coefs <- coefs
out$xlevels <- model$xlevels
out$thr <- model$thr
out$modelType <- model$modelType
out$quantiles <- fixedDat[[5]]
class(out) <- "response.train"
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.