#' Train SDMs across multiple iterations of a scenario
#'
#' This function trains species distribution models on simulate data. Typical implementation is to use \code{predImportMakeData} to create simulated data sets, then \code{predImportTrainModels} to train SDMs on those data sets, then \code{predImportEval} to evaluate the models.
#' @param simDir Character, path name of directory in which scenario data files are saved.
#' @param modelDir Character, path name of directory in which model files are saved. Depending on whether multivariate, reduced, and/or univariate models are trained, inside this folder will be subfolders named "multivariate", "reduced", and/or "univariate" followed by the name of the algorithm (e.g., "multivariate brt").
#' @param vars Character vector, names of variables to use in model training. These should match the names in the \code{geography} argument supplied to the \code{predImportMakeData} function. See \code{\link[enmSdmPredImport]{genesis}} function for more details on \code{geography}.
#' @param algos Character list of model algorithms to implement. Options include \code{omniscient}, \code{brt} (boosted regression trees), \code{gam} (generalized additive models), \code{glm} (generalized linear models), \code{maxent} (Maxent, using version 3.3.3k or before), \code{maxnet} (Maxent, version 3.4 or higher), or \code{rf} (random forests).
#' @param type Character, type of models to train. Options include \code{multivariate} (use all variables in \code{vars}, \code{reduced} (a series of models, each using all but one variable in \code{vars}), and/or \code{univariate} (a series of models, one per variable in \code{vars}).
#' @param iters Vector of positive integers, data iterations to train models for.
#' @param numBg Positive integer, vector of positive integers, or \code{NULL}. This is the number of background sites used to train the model. If this is \code{NULL} (default), then the number of background sites will be equal to the number of sites available in the "sim" object created by \code{predImportMakeData}. If this is a single integer, then the background sites used for training will be taken from the first \code{numBg} sites available in the "sim" object. If this is a vector, then it must be the same length as \code{algos}, and each algorithm will then be presented with the respective number of background sites. If the latter, the vector must have names that match the algorithm(s) being used.
#' @param fileFlag Either \code{NULL} or a character string. If a character string then this is included in the simulated data file name and each model file name. If \code{NULL} (default), nothing is added, so file names will be as "model XXX.RData". If a character string, then the file name will be as "ALGORITHM FLAG model XXX.RData" where "XXX" is the iteration number, "FLAG" the string in \code{fileFlag}, and "ALGORITHM" the model algorithm name.
#' @param overwrite Logical, if \code{TRUE} then overwrite existing model results files. Default is \code{FALSE}.
#' @param tempDir Character, path of temporary directory. Used to store ancillary modeling files generated by Maxent 3.3.3k and earlier. Not used for any other modeling algorithm. Maxent can generate \emph{a lot} of these files which eventually fill up a disc. By specifying this folder you can have some control over where they are saved and thus if they fill up a hard drive (e.g., send temp files to a fast secondary drive with lots of space).
#' @param verbose Numeric, if 0 then show minimal output, 1 more output, 2 even more, >2 all of it.
#' @param ... Other arguments to pass to "train~~~" functions in the \pkg{enmSdm} package.
#' @return Nothing (writes models to disc).
#' @seealso \code{\link[enmSdmPredImport]{predImportMakeData}}, \code{\link[enmSdmPredImport]{predImportEval}}
#' @export
predImportTrainModels <- function(
simDir,
modelDir,
vars,
algos=c('omniscient', 'bioclim', 'brt', 'gam', 'glm', 'maxent', 'maxnet', 'rf'),
type=c('multivariate', 'reduced', 'univariate'),
iters=1:100,
numBg=NULL,
fileFlag=NULL,
overwrite=FALSE,
tempDir=raster::rasterOptions()$tmpdir,
verbose=1,
...
) {
# file prefix
fileAppendStartSpace <- if (!is.null(fileFlag)) { paste0(' ', fileFlag) } else { '' }
fileAppendEndSpace <- if (!is.null(fileFlag)) { paste0(fileFlag, ' ') } else { '' }
### for each SDM algorithm
##########################
for (algo in algos) {
if (verbose >= 0) omnibus::say(date(), ' | Modeling with ', toupper(algo), post=0)
thisNumBg <- if (is.null(numBg)) {
NULL
} else if (length(numBg) == 1) {
numBg
} else {
numBg[[algo]]
}
# by ITERATION
for (iter in iters) {
if (verbose==1) {
omnibus::say('| ', iter, post=0)
} else if (verbose > 1) {
omnibus::say(date(), ' | ', toupper(algo), ' | iteration ', iter, post=0, pre=1)
}
### load training/test data
###########################
load(paste0(simDir, '/', fileAppendEndSpace, 'sim ', omnibus::prefix(iter, 4), '.Rdata'))
# select background sites
if (is.null(thisNumBg)) thisNumBg <- sim$stats$numBg
trainPres <- sim$trainDataPresBg[sim$trainDataPresBg$presBg == 1, c('presBg', vars)]
trainBg <- sim$trainDataPresBg[sim$trainDataPresBg$presBg == 0, c('presBg', vars)]
if (thisNumBg > nrow(trainBg)) stop('The "sim" object does not contain the desired number of training background sites (see argument "numBg").')
trainBg <- trainBg[1:thisNumBg, ]
trainDataPresBg <- rbind(trainPres, trainBg)
### train MULTIVARIATE models
#############################
if ('multivariate' %in% type) {
if (verbose > 0) { omnibus::say('multi', post=0) }
# if overwriting models is OK, OR if model doesn't exist
fileMissing <- !file.exists(paste0(modelDir, '/multivariate ', algo, '/', algo, ' ', fileAppendEndSpace, 'model ', omnibus::prefix(iter, 4), '.RData'))
if (overwrite | fileMissing) {
if (algo=='omniscient') {
out <- response
attr(out, 'modelType') <- 'full'
} else if (algo=='bioclim') {
bioclimData <- trainDataPresBg[trainDataPresBg$presBg == 1, vars, drop=FALSE]
out <- dismo::bioclim(bioclimData, ...)
} else if (algo=='glm') {
out <- enmSdm::trainGlm(
data=trainDataPresBg,
resp='presBg',
preds=vars,
construct=FALSE,
verbose=verbose > 2,
...
)
} else if (algo=='maxent') {
out <- enmSdm::trainMaxEnt(
data=trainDataPresBg,
resp='presBg',
preds=vars,
scratchDir=tempDir,
verbose=(verbose > 2),
...
)
} else if (algo=='maxnet') {
out <- enmSdm::trainMaxNet(
data=trainDataPresBg,
resp='presBg',
preds=vars,
verbose=(verbose > 2),
...
)
} else if (algo=='brt') {
out <- enmSdm::trainBrt(
data=trainDataPresBg,
resp='presBg',
preds=vars,
w=TRUE,
verbose=(verbose > 2),
...
)
} else if (algo=='gam') {
out <- enmSdm::trainGam(
data=trainDataPresBg,
resp='presBg',
preds=vars,
construct=FALSE,
verbose=(verbose > 2),
...
)
} else if (algo=='rf') {
trainDataPresBg$presBg <- as.factor(trainDataPresBg$presBg)
out <- enmSdm::trainRf(
data=trainDataPresBg,
resp='presBg',
preds=vars,
importance=TRUE,
verbose=(verbose > 2),
...
)
}
model <- if (is.null(out)) {
FALSE
} else {
out
}
if (!(algo %in% c('omniscient', 'maxent', 'bioclim'))) {
if (!is.na(model)) model$stats$numTrainBg <- thisNumBg
}
omnibus::dirCreate(modelDir, '/multivariate ', algo)
fileName <- paste0(modelDir, '/multivariate ', algo, '/', algo, ' ', fileAppendEndSpace, 'model ', omnibus::prefix(iter, 4), '.RData')
save(model, file=fileName)
rm(model); gc()
} # if overwriting models OK OR model doesn't exist
if (verbose > 0) omnibus::say('\U2713', post=0)
} # if wanting multivariate models
### train REDUCED models
########################
if ('reduced' %in% type) {
if (length(vars) > 2) {
if (verbose > 0) { omnibus::say('reduced', post=0) }
# if overwriting models OK OR model doesn't exist
fileMissing <- !file.exists(paste0(modelDir, '/reduced ', algo, '/', algo, fileAppendStartSpace, ' model ', omnibus::prefix(iter, 4), '.Rdata'))
if (overwrite | fileMissing) {
model <- list()
# for EACH variable
for (count in seq_along(vars)) {
reducedVars <- vars[-count]
reducedTrainData <- trainDataPresBg[ , reducedVars, drop=FALSE]
if (algo=='omniscient') {
out <- response
attr(out, 'modelType') <- 'reduced'
attr(out, 'reducedSans') <- names(sim$geography)[count]
} else if (algo=='bioclim') {
bioclimData <- reducedTrainData[reducedTrainData$presBg == 1, reducedVars, drop=FALSE]
out <- dismo::bioclim(bioclimData, ...)
} else if (algo=='glm') {
out <- enmSdm::trainGlm(
data=reducedTrainData,
resp='presBg',
preds=reducedVars,
construct=FALSE,
verbose=verbose > 2,
...
)
} else if (algo=='maxent') {
out <- enmSdm::trainMaxEnt(
data=reducedTrainData,
resp='presBg',
preds=reducedVars,
scratchDir=tempDir,
verbose=(verbose > 2),
...
)
} else if (algo=='maxnet') {
out <- enmSdm::trainMaxNet(
data=reducedTrainData,
resp='presBg',
preds=reducedVars,
verbose=(verbose > 2),
...
)
} else if (algo=='brt') {
out <- enmSdm::trainBrt(
data=reducedTrainData,
resp='presBg',
preds=reducedVars,
verbose=(verbose > 2),
...
)
} else if (algo=='gam') {
out <- enmSdm::trainGam(
data=reducedTrainData,
resp='presBg',
preds=reducedVars,
construct=FALSE,
verbose=(verbose > 2),
...
)
} else if (algo=='rf') {
reducedTrainData$presBg <- as.factor(reducedTrainData$presBg)
out <- enmSdm::trainRf(
data=reducedTrainData,
resp='presBg',
preds=reducedVars,
importance=TRUE,
verbose=(verbose > 2),
...
)
}
model[[count]] <- if (!is.null(out)) {
out
} else {
FALSE
}
names(model)[[count]] <- paste0('sans', vars[count])
if (!(algo %in% c('omniscient', 'maxent', 'bioclim'))) {
if (!is.na(model[[count]])) model[[count]]$stats$numTrainBg <- thisNumBg
}
} # next reduced model
omnibus::dirCreate(modelDir, '/reduced ', algo)
fileName <- paste0(modelDir, '/reduced ', algo, '/', algo, fileAppendStartSpace, ' model ', omnibus::prefix(iter, 4), '.Rdata')
save(model, file=fileName)
rm(model); gc()
} # if overwriting models OK OR model doesn't exist
} # if enough layers to do reduced models
if (verbose > 0) omnibus::say('\U2713', post=0)
} # if wanting reduced models
### train UNIVARIATE models
###########################
if ('univariate' %in% type) {
if (verbose > 0) { omnibus::say('univar', post=0) }
# if overwriting models OK OR model doesn't exist
if (overwrite | !file.exists(paste0(modelDir, '/univariate ', algo, '/', algo, ' ', fileAppendEndSpace, 'model ', omnibus::prefix(iter, 4), '.RData'))) {
model <- list()
for (count in seq_along(vars)) {
if (verbose > 1) { omnibus::say(vars[count], post=0) }
univarVar <- vars[count]
univarTrainData <- trainDataPresBg[ , c('presBg', univarVar)]
# omniscient model
if (algo=='omniscient') {
out <- response
attr(out, 'modelType') <- 'univariate'
attr(out, 'univarWith') <- names(sim$geography)[count]
# BIOCLIM
} else if (algo=='bioclim') {
bioclimData <- univarTrainData[univarTrainData$presBg == 1, univarVar, drop=FALSE]
out <- dismo::bioclim(bioclimData, ...)
# GLM
} else if (algo=='glm') {
out <- enmSdm::trainGlm(
data=univarTrainData,
resp='presBg',
preds=univarVar,
construct=FALSE,
select=TRUE,
verbose=verbose > 2,
...
)
# maxent
} else if (algo=='maxent') {
out <- enmSdm::trainMaxEnt(
data=univarTrainData,
resp='presBg',
preds=univarVar,
scratchDir=tempDir,
verbose=(verbose > 2),
...
)
# maxnet
} else if (algo=='maxnet') {
out <- enmSdm::trainMaxNet(
data=univarTrainData,
resp='presBg',
preds=univarVar,
verbose=(verbose > 2),
...
)
# BRTs
} else if (algo=='brt') {
out <- enmSdm::trainBrt(
data=univarTrainData,
resp='presBg',
preds=univarVar,
w=TRUE,
verbose=(verbose > 2),
...
)
### GAMs
} else if (algo=='gam') {
out <- enmSdm::trainGam(
data=univarTrainData,
resp='presBg',
preds=univarVar,
construct=FALSE,
select=FALSE,
verbose=(verbose > 2),
...
)
### RFs
} else if (algo == 'rf') {
univarTrainData$presBg <- as.factor(univarTrainData$presBg)
out <- enmSdm::trainRf(
data=univarTrainData,
resp='presBg',
preds=univarVar,
importance=FALSE,
verbose=(verbose > 2),
...
)
}
model[[count]] <- if (!is.null(out)) {
out
} else {
FALSE
}
names(model)[[count]] <- paste0('only', univarVar)
if (!(algo %in% c('omniscient', 'maxent', 'bioclim'))) {
if (!is.na(model[[count]])) model[[count]]$stats$numTrainBg <- thisNumBg
}
} # next univariate model
omnibus::dirCreate(modelDir, '/univariate ', algo)
fileName <- paste0(modelDir, '/univariate ', algo, '/', algo, ' ', fileAppendEndSpace, 'model ', omnibus::prefix(iter, 4), '.RData')
save(model, file=fileName)
rm(model); gc()
} # if overwriting models OK OR model doesn't exist
if (verbose > 0) omnibus::say('\U2713', post=0)
} # if wanting univariate models
} # next iteration
omnibus::say('')
} # next algorithm
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.