#' @title Iterate tuning of ENMs
#' @description Internal functions to tune and summarize results for ecological niche models (ENMs) iteratively across a range of user-specified tuning settings.
#' See \link{ENMevaluate} for descriptions of shared arguments.
#' Function \code{tune.parallel()} tunes ENMs with parallelization. Function \code{cv.enm()} calculates training and validation evaluation statistics for one set of specified tuning parameters.
#' @aliases tune.parallel tune.regular cv.enm
#' @param enm \link{ENMdetails} object
#' @param occs.z data.frame: the envs values for the coordinates at the full dataset occurrence records
#' @param bg.z data.frame: the envs values for the coordinates at the full dataset background records
#' @param mod.full model object: the model trained on the full dataset
#' @param tune.tbl data frame: all combinations of tuning parameters
#' @param tune.tbl.i vector: single set of tuning parameters
#' @param partitions character: name of partitioning technique (see \code{?partitions})
#' @param occs.train.z data.frame: the envs values for the coordinates at the training occurrence records
#' @param occs.val.z data.frame: the envs values for the coordinates at the validation occurrence records
#' @param occs.testing.z data.frame: when fully withheld testing data is provided, the envs values for the
#' coordinates at the testing occurrence records
#' @param bg.train.z data.frame: the envs values for the coordinates at the training background records
#' @param bg.val.z data.frame: the envs values for the coordinates at the validation background records
#' @param mod.k model object: the model trained on the training dataset that becomes evaluated on the validation data
#' @param nk numeric: the number of folds (i.e., partitions) -- will be equal to \code{kfolds} for random partitions
#' @param d data frame: data frame from \code{ENMevaluate()} with occurrence and background coordinates (or coordinates plus predictor variable values) and partition group values
#' @description Validation CBI is calculated here with background values, not raster data, in order
#' to standardize the methodology for both training and validation data for spatial partitions, as ENMeval
#' does not mask rasters to partition areas and hence does not have partitioned raster data. Further,
#' predictions for occurrence and background localities are combined as input for the parameter "fit" in
#' \code{ecospat::ecospat_boyce()} because the interval is determined from "fit" only, and if test occurrences
#' all have higher predictions than the background, the interval will be cut short.
#' @inheritParams ENMevaluate
#' @name tune.enm
NULL
#' @rdname tune.enm
tune.train <- function(enm, occs.z, bg.z, mod.full, tune.tbl.i, other.settings, partitions, quiet) {
# get model predictions for training data
occs.pred <- enm@predict(mod.full, occs.z, other.settings)
bg.pred <- enm@predict(mod.full, bg.z, other.settings)
# training AUC
e <- predicts::pa_evaluate(occs.pred, bg.pred)
auc.train <- e@stats$auc
# calculate training CBI with background, not raster -- this is in order
# to calculate CBI in a standardized way across training and validation data for
# spatial partitions (ENMeval does not mask rasters to partition areas and hence
# does not have this information)
# combine occs and bg predictions as input for "fit" because the interval
# is determined from "fit" only, and if occs have higher predictions than
# bg, the interval will be cut short
if(other.settings$ecospat.use == TRUE) {
cbi.train <- ecospat::ecospat.boyce(c(bg.pred, occs.pred), occs.pred, PEplot = FALSE)$cor
}else{
cbi.train <- NA
}
out.df <- data.frame(auc.train = auc.train, cbi.train = cbi.train)
return(out.df)
}
#' @rdname tune.enm
tune.validate <- function(enm, occs.train.z, occs.val.z, bg.train.z, bg.val.z,
mod.k, nk, tune.tbl.i, other.settings, partitions,
user.eval, quiet) {
# get model predictions for training and validation data for partition k
occs.train.pred <- enm@predict(mod.k, occs.train.z, other.settings)
occs.val.pred <- enm@predict(mod.k, occs.val.z, other.settings)
bg.train.pred <- enm@predict(mod.k, bg.train.z, other.settings)
# if the background was partitioned, get predictions for this subset; if not, it will be NULL
if(nrow(bg.val.z) > 0) {
bg.val.pred <- enm@predict(mod.k, bg.val.z, other.settings)
}else{
bg.val.pred <- NULL
# if(quiet != TRUE) message("\nNOTE: Background points were not partitioned for this analysis, so model validation will proceed on the full background.")
}
# if validation.bg == "full", calculate training and validation AUC and CBI based on the full background
# (training + validation background for all statistics)
# see Radosavljevic & Anderson 2014 for an example of calculating validation AUC with spatial partitions over a shared background
if(other.settings$validation.bg == "full") {
e.train <- predicts::pa_evaluate(occs.train.pred, c(bg.train.pred, bg.val.pred))
auc.train <- e.train@stats$auc
e.val <- predicts::pa_evaluate(occs.val.pred, c(bg.train.pred, bg.val.pred))
auc.val <- e.val@stats$auc
# calculate AUC diff as training AUC minus validation AUC with a shared background
auc.diff <- auc.train - auc.val
# calculate CBI based on the full background (do not calculate for jackknife partitions)
if(other.settings$ecospat.use == TRUE) {
if(partitions != "jackknife") {
cbi.val <- ecospat::ecospat.boyce(c(bg.train.pred, bg.val.pred, occs.val.pred), occs.val.pred, PEplot = FALSE)$cor
}else{
cbi.val <- NA
}
}else{
cbi.val <- NA
}
# if validation.bg == "partition", calculate training and validation AUC and CBI based on the partitioned backgrounds only
# (training background for training statistics and validation background for validation statistics)
}else if(other.settings$validation.bg == "partition") {
e.train <- predicts::pa_evaluate(occs.train.pred, bg.train.pred)
auc.train <- e.train@stats$auc
e.val <- predicts::pa_evaluate(occs.val.pred, bg.val.pred)
auc.val <- e.val@stats$auc
# calculate AUC diff as training AUC minus validation AUC with different backgrounds
auc.diff <- auc.train - auc.val
# calculate CBI based on the validation background only (do not calculate for jackknife partitions)
if(other.settings$ecospat.use == TRUE) {
if(partitions != "jackknife") {
cbi.val <- ecospat::ecospat.boyce(c(bg.val.pred, occs.val.pred), occs.val.pred, PEplot = FALSE)$cor
}else{
cbi.val <- NA
}
}else{
cbi.val <- NA
}
}
# get absolute value of AUC diff if requested
if(other.settings$abs.auc.diff == TRUE) auc.diff <- abs(auc.diff)
## omission rates
# get minimum training presence threshold (expected no omission)
min.train.thr <- min(occs.train.pred)
or.mtp <- mean(occs.val.pred < min.train.thr)
# get 10 percentile training presence threshold (expected 0.1 omission)
pct10.train.thr <- calc.10p.trainThresh(occs.train.pred)
or.10p <- mean(occs.val.pred < pct10.train.thr)
# perform user-specified validation statistics if available
if(is.function(user.eval)) {
vars <- list(enm, occs.train.z, occs.val.z, bg.train.z, bg.val.z, mod.k, nk,
other.settings, partitions, occs.train.pred, occs.val.pred,
bg.train.pred, bg.val.pred)
names(vars) <- c("enm", "occs.train.z", "occs.val.z", "bg.train.z", "bg.val.z", "mod.k", "nk",
"other.settings", "partitions", "occs.train.pred", "occs.val.pred",
"bg.train.pred", "bg.val.pred")
user.eval.out <- user.eval(vars)
}else{
user.eval.out <- NULL
}
# gather all evaluation statistics for k
out.df <- data.frame(auc.val = auc.val, auc.diff = auc.diff, cbi.val = cbi.val, or.mtp = or.mtp, or.10p = or.10p)
if(!is.null(user.eval.out)) out.df <- cbind(out.df, user.eval.out)
return(out.df)
}
#' @rdname tune.enm
tune <- function(d, enm, partitions, tune.tbl, doClamp, other.settings,
partition.settings, user.val.grps, occs.testing.z,
numCores, parallel, user.eval, algorithm,
updateProgress, quiet) {
if(parallel == TRUE) {
# set up parallel processing functionality
allCores <- parallel::detectCores()
if (is.null(numCores)) {
numCores <- allCores
}
cl <- parallel::makeCluster(numCores, setup_strategy = "sequential")
parallel::clusterExport(cl, c("d", "enm", "partitions", "tune.tbl",
"doClamp", "other.settings",
"partition.settings", "user.val.grps",
"occs.testing.z", "user.eval", "algorithm",
"quiet", "cv.enm", "tune.train", "clamp.vars",
"tune.validate"),
envir = environment())
n <- ifelse(!is.null(tune.tbl), nrow(tune.tbl), 1)
if(quiet != TRUE) message(paste0("\nOf ", allCores, " total cores using ", numCores, "..."))
if(quiet != TRUE) message(paste0("Running in parallel ..."))
par.cv.enm <- function(i) {
cv.enm(d, enm, partitions, tune.tbl[i,], doClamp, other.settings,
partition.settings, user.val.grps, occs.testing.z, user.eval,
algorithm, quiet)
}
results <- parallel::parLapply(cl, 1:n, par.cv.enm)
}else{
results <- list()
n <- ifelse(!is.null(tune.tbl), nrow(tune.tbl), 1)
# set up the console progress bar
if(quiet != TRUE) pb <- txtProgressBar(0, n, style = 3)
for(i in 1:n) {
# and (optionally) the shiny progress bar (updateProgress)
if(n > 1) {
if(is.function(updateProgress)) {
text <- paste0('Running ', paste(as.character(tune.tbl[i,]), collapse = ""), '...')
updateProgress(detail = text)
}
if(quiet != TRUE) setTxtProgressBar(pb, i)
}
# set the current tune settings
tune.tbl.i <- tune.tbl[i,]
results[[i]] <- cv.enm(d, enm, partitions, tune.tbl.i, doClamp,
other.settings, partition.settings, user.val.grps,
occs.testing.z, user.eval, algorithm, quiet)
}
if(quiet != TRUE) close(pb)
}
return(results)
}
#' @rdname tune.enm
cv.enm <- function(d, enm, partitions, tune.tbl.i, doClamp, other.settings,
partition.settings, user.val.grps, occs.testing.z,
user.eval, algorithm, quiet) {
envs.names <- names(d[, 3:(ncol(d)-2)])
# unpack predictor variable values for occs and bg
occs.xy <- d |> dplyr::filter(pb == 1) |> dplyr::select(1:2)
occs.z <- d |> dplyr::filter(pb == 1) |> dplyr::select(dplyr::all_of(envs.names))
bg.xy <- d |> dplyr::filter(pb == 0) |> dplyr::select(1:2)
bg.z <- d |> dplyr::filter(pb == 0) |> dplyr::select(dplyr::all_of(envs.names))
# define number of grp (the value of "k") for occurrences
nk <- length(unique(d[d$pb == 1, "grp"]))
# build the full model from all the data
# assign arguments
mod.full.args <- enm@args(occs.z, bg.z, tune.tbl.i, other.settings)
# run training model with specified arguments
mod.full <- do.call(enm@fun, mod.full.args)
if(is.null(mod.full)) stop('Training model is NULL. Consider changing the tuning parameters or inputting more background points.')
# as bioclim can be tuned with different "tails" settings that affect not the
# model but the prediction, these settings need to be moved from tune.tbl.i
# to other.settings as eval.predict() does not accept tune.args
if(algorithm == "bioclim") other.settings$tails <- tune.tbl.i$tails
mod.full.pred <- enm@predict(mod.full, rbind(occs.z, bg.z), other.settings)
# get evaluation statistics for training data
train <- tune.train(enm, occs.z, bg.z, mod.full, tune.tbl.i, other.settings, partitions, quiet)
# make training stats table
tune.args.col <- paste(names(tune.tbl.i), tune.tbl.i, collapse = "_", sep = ".")
train.stats.df <- data.frame(tune.args = tune.args.col, stringsAsFactors = FALSE) |> cbind(train)
# if no partitions, return results without cv.stats
if(partitions == "none") {
cv.res <- list(mod.full = mod.full, mod.full.pred = mod.full.pred, train.stats = train.stats.df, cv.stats = NULL)
return(cv.res)
}
if(partitions == "testing") {
bg.val.z <- data.frame()
occs.testing.zEnvs <- occs.testing.z |> dplyr::select(dplyr::all_of(envs.names))
if(doClamp == TRUE) {
occs.testing.zEnvs <- clamp.vars(orig.vals = occs.testing.zEnvs, ref.vals = rbind(occs.z, bg.z),
left = other.settings$clamp.directions$left, right = other.settings$clamp.directions$right,
categoricals = other.settings$categoricals)
}
validate <- tune.validate(enm, occs.z, occs.testing.zEnvs, bg.z, bg.val.z, mod.full, 0, tune.tbl.i, other.settings, partitions, user.eval, quiet)
test.stats.df <- data.frame(tune.args = tune.args.col, fold = 0, stringsAsFactors = FALSE) |> cbind(validate)
cv.res <- list(mod.full = mod.full, mod.full.pred = mod.full.pred, train.stats = train.stats.df, cv.stats = test.stats.df)
return(cv.res)
}
# list to contain cv statistics
cv.stats <- list()
for(k in 1:nk) {
# assign partitions for training and validation occurrence data and for background data
occs.train.z <- d |> dplyr::filter(pb == 1, grp != k) |> dplyr::select(dplyr::all_of(envs.names))
bg.train.z <- d |> dplyr::filter(pb == 0, grp != k) |> dplyr::select(dplyr::all_of(envs.names))
if(is.null(user.val.grps)) {
occs.val.z <- d |> dplyr::filter(pb == 1, grp == k) |> dplyr::select(dplyr::all_of(envs.names))
bg.val.z <- d |> dplyr::filter(pb == 0, grp == k) |> dplyr::select(dplyr::all_of(envs.names))
}else{
# assign partitions for training and validation occurrence data and for background data based on user data
occs.val.z <- user.val.grps |> dplyr::filter(grp == k) |> dplyr::select(dplyr::all_of(envs.names))
bg.val.z <- d |> dplyr::filter(pb == 0, grp == k) |> dplyr::select(envs.names)
}
# if doClamp is on, make sure that the validation data for each validation model is also clamped
# this means for each partition, making sure no values in validation data are more extreme than those in training data
if(doClamp == TRUE) {
val.z <- clamp.vars(orig.vals = rbind(occs.val.z, bg.val.z),
ref.vals = rbind(occs.train.z, bg.train.z),
left = other.settings$clamp.directions$left,
right = other.settings$clamp.directions$right,
categoricals = other.settings$categoricals)
occs.val.z <- val.z[1:nrow(occs.val.z),]
if(nrow(bg.val.z) > 0) bg.val.z <- val.z[(nrow(occs.val.z)+1):nrow(bg.val.z),]
}
# define model arguments for current model k
mod.k.args <- enm@args(occs.train.z, bg.train.z, tune.tbl.i, other.settings)
# run model k with specified arguments
mod.k <- tryCatch({
do.call(enm@fun, mod.k.args)
}, error = function(cond) {
if(quiet != TRUE) message(paste0("\n", cond, "\n"))
# Choose a return value in case of error
return(NULL)
})
# if model is NULL for some reason, continue but report to user
if(is.null(mod.k)) {
if(quiet != TRUE) message(paste0("\nThe results were NULL for model with settings ",
paste(names(tune.tbl.i),
tune.tbl.i,
collapse = ", "),
" for partition ", k,
". These settings will have NA results."))
validate <- data.frame(auc.val = NA, auc.diff = NA, cbi.val = NA,
or.mtp = NA, or.10p = NA)
}else{
validate <- tune.validate(enm, occs.train.z, occs.val.z, bg.train.z,
bg.val.z, mod.k, nk, tune.tbl.i, other.settings,
partitions, user.eval, quiet)
}
# put into list as one-row data frame for easy binding
cv.stats[[k]] <- data.frame(tune.args = tune.args.col, fold = k,
stringsAsFactors = FALSE) |> cbind(validate)
}
cv.stats.df <- dplyr::bind_rows(cv.stats)
cv.res <- list(mod.full = mod.full, mod.full.pred = mod.full.pred,
train.stats = train.stats.df, cv.stats = cv.stats.df)
return(cv.res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.