Nothing
# BIOMOD_Modeling ---------------------------------------------------------
##' @name BIOMOD_Modeling
##' @author Wilfried Thuiller, Damien Georges, Robin Engler
##'
##' @title Run a range of species distribution models
##'
##' @description This function allows to calibrate and evaluate a range of modeling techniques
##' for a given species distribution. The dataset can be split up in calibration/validation parts,
##' and the predictive power of the different models can be estimated using a range of evaluation
##' metrics (see Details).
##'
##'
##' @param bm.format a \code{\link{BIOMOD.formated.data}} or \code{\link{BIOMOD.formated.data.PA}}
##' object returned by the \code{\link{BIOMOD_FormatingData}} function
##' @param modeling.id a \code{character} corresponding to the name (ID) of the simulation set
##' (\emph{a random number by default})
##' @param models a \code{vector} containing model names to be computed, must be among
##' \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS},
##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}
##' @param models.pa (\emph{optional, default} \code{NULL}) \cr
##' A \code{list} containing for each model a \code{vector} defining which pseudo-absence datasets
##' are to be used, must be among \code{colnames(bm.format@PA.table)}
##'
##' @param CV.strategy a \code{character} corresponding to the cross-validation selection strategy,
##' must be among \code{random}, \code{kfold}, \code{block}, \code{strat}, \code{env} or
##' \code{user.defined}
##' @param CV.nb.rep (\emph{optional, default} \code{0}) \cr
##' If \code{strategy = 'random'} or \code{strategy = 'kfold'}, an \code{integer} corresponding
##' to the number of sets (repetitions) of cross-validation points that will be drawn
##' @param CV.perc (\emph{optional, default} \code{0}) \cr
##' If \code{strategy = 'random'}, a \code{numeric} between \code{0} and \code{1} defining the
##' percentage of data that will be kept for calibration
##' @param CV.k (\emph{optional, default} \code{0}) \cr
##' If \code{strategy = 'kfold'} or \code{strategy = 'strat'} or \code{strategy = 'env'}, an
##' \code{integer} corresponding to the number of partitions
##' @param CV.balance (\emph{optional, default} \code{'presences'}) \cr
##' If \code{strategy = 'strat'} or \code{strategy = 'env'}, a \code{character} corresponding
##' to how data will be balanced between partitions, must be either \code{presences} or
##' \code{absences}
##' @param CV.env.var (\emph{optional}) \cr If \code{strategy = 'env'}, a \code{character}
##' corresponding to the environmental variables used to build the partition. \code{k} partitions
##' will be built for each environmental variables. By default the function uses all
##' environmental variables available.
##' @param CV.strat (\emph{optional, default} \code{'both'}) \cr
##' If \code{strategy = 'env'}, a \code{character} corresponding to how data will partitioned
##' along gradient, must be among \code{x}, \code{y}, \code{both}
##' @param CV.user.table (\emph{optional, default} \code{NULL}) \cr
##' If \code{strategy = 'user.defined'}, a \code{matrix} or \code{data.frame} defining for each
##' repetition (in columns) which observation lines should be used for models calibration
##' (\code{TRUE}) and validation (\code{FALSE})
##' @param CV.do.full.models (\emph{optional, default} \code{TRUE}) \cr
##' A \code{logical} value defining whether models should be also calibrated and validated over
##' the whole dataset (and pseudo-absence datasets) or not
##' @param nb.rep \emph{deprecated}, now called \code{CV.nb.rep}
##' @param data.split.perc \emph{deprecated}, now called \code{CV.perc}
##' @param data.split.table \emph{deprecated}, now called \code{CV.user.table}
##' @param do.full.models \emph{deprecated}, now called \code{CV.do.full.models}
##'
##' @param OPT.data.type a \code{character} corresponding to the data type to be used, must be
##' either \code{binary}, \code{binary.PA}, \code{abundance}, \code{compositional}
##' @param OPT.strategy a \code{character} corresponding to the method to select models'
##' parameters values, must be either \code{default}, \code{bigboss}, \code{user.defined},
##' \code{tuned}
##' @param OPT.user.val (\emph{optional, default} \code{NULL}) \cr
##' A \code{list} containing parameters values for some (all) models
##' @param OPT.user.base (\emph{optional, default} \code{bigboss}) \cr A character,
##' \code{default} or \code{bigboss} used when \code{OPT.strategy = 'user.defined'}.
##' It sets the bases of parameters to be modified by user defined values.
##' @param OPT.user (\emph{optional, default} \code{TRUE}) \cr
##' A \code{\link{BIOMOD.models.options}} object returned by the \code{\link{bm_ModelingOptions}}
##' function
##' @param bm.options a \code{\link{BIOMOD.models.options}} object returned by the
##' \code{\link{bm_ModelingOptions}} function
##'
##' @param weights (\emph{optional, default} \code{NULL}) \cr
##' A \code{vector} of \code{numeric} values corresponding to observation weights (one per
##' observation, see Details)
##' @param prevalence (\emph{optional, default} \code{NULL}) \cr
##' A \code{numeric} between \code{0} and \code{1} corresponding to the species prevalence to
##' build '\emph{weighted response weights}' (see Details)
##' @param metric.eval a \code{vector} containing evaluation metric names to be used, must
##' be among \code{POD}, \code{FAR}, \code{POFD}, \code{SR}, \code{ACCURACY}, \code{BIAS},
##' \code{ROC}, \code{TSS}, \code{KAPPA}, \code{OR}, \code{ORSS}, \code{CSI}, \code{ETS},
##' \code{BOYCE}, \code{MPA}
##' @param var.import (\emph{optional, default} \code{NULL}) \cr
##' An \code{integer} corresponding to the number of permutations to be done for each variable to
##' estimate variable importance
##' @param scale.models (\emph{optional, default} \code{FALSE}) \cr
##' A \code{logical} value defining whether all models predictions should be scaled with a
##' binomial GLM or not
##' @param nb.cpu (\emph{optional, default} \code{1}) \cr
##' An \code{integer} value corresponding to the number of computing resources to be used to
##' parallelize the single models computation
##' @param seed.val (\emph{optional, default} \code{NULL}) \cr
##' An \code{integer} value corresponding to the new seed value to be set
##' @param do.progress (\emph{optional, default} \code{TRUE}) \cr
##' A \code{logical} value defining whether the progress bar is to be rendered or not
##'
##'
##' @return
##'
##' A \code{\link{BIOMOD.models.out}} object containing models outputs, or links to saved outputs. \cr
##' Models outputs are stored out of \R (for memory storage reasons) in 2 different folders
##' created in the current working directory :
##' \enumerate{
##' \item a \emph{models} folder, named after the \code{resp.name} argument of
##' \code{\link{BIOMOD_FormatingData}}, and containing all calibrated models for each
##' repetition and pseudo-absence run
##' \item a \emph{hidden} folder, named \code{.BIOMOD_DATA}, and containing outputs related
##' files (original dataset, calibration lines, pseudo-absences selected, predictions,
##' variables importance, evaluation values...), that can be retrieved with
##' \href{https://biomodhub.github.io/biomod2/reference/getters.out.html}{\code{get_[...]}}
##' or \code{\link{load}} functions, and used by other \pkg{biomod2} functions, like
##' \code{\link{BIOMOD_Projection}} or \code{\link{BIOMOD_EnsembleModeling}}
##' }
##'
##'
##' @details
##'
##' \describe{
##' \item{bm.format}{If pseudo absences have been added to the original dataset (see
##' \code{\link{BIOMOD_FormatingData}}), \cr \code{PA.nb.rep *(nb.rep + 1)} models will be
##' created.}
##'
##' \item{models}{The set of models to be calibrated on the data. 12 modeling techniques
##' are currently available :
##' \itemize{
##' \item \code{ANN} : Artificial Neural Network (\code{\link[nnet]{nnet}})
##' \item \code{CTA} : Classification Tree Analysis (\code{\link[rpart]{rpart}})
##' \item \code{FDA} : Flexible Discriminant Analysis (\code{\link[mda]{fda}})
##' \item \code{GAM} : Generalized Additive Model (\code{\link[gam]{gam}}, \code{\link[mgcv]{gam}}
##' or \code{\link[mgcv]{bam}}) \cr
##' (see \code{\link{bm_ModelingOptions} for details on algorithm selection})
##' \item \code{GBM} : Generalized Boosting Model, or usually called Boosted Regression Trees
##' (\code{\link[gbm]{gbm}})
##' \item \code{GLM} : Generalized Linear Model (\code{\link[stats]{glm}})
##' \item \code{MARS} : Multiple Adaptive Regression Splines (\code{\link[earth]{earth}})
##' \item \code{MAXENT} : Maximum Entropy
##' (\url{https://biodiversityinformatics.amnh.org/open_source/maxent/})
##' \item \code{MAXNET} : Maximum Entropy (\code{\link[maxnet]{maxnet}})
##' \item \code{RF} : Random Forest (\code{\link[randomForest]{randomForest}})
##' \item \code{SRE} : Surface Range Envelop or usually called BIOCLIM (\code{\link{bm_SRE}})
##' \item \code{XGBOOST} : eXtreme Gradient Boosting Training (\code{\link[xgboost]{xgboost}})
##' }}
##'
##' \item{models.pa}{Different models might respond differently to different numbers of
##' pseudo-absences. It is possible to create sets of pseudo-absences with different numbers
##' of points (see \code{\link{BIOMOD_FormatingData}}) and to assign only some of these
##' datasets to each single model.
##' }
##'
##' \item{CV.[...] parameters}{Different methods are available to calibrate/validate the
##' single models (see \code{\link{bm_CrossValidation}}).}
##'
##' \item{OPT.[...] parameters}{Different methods are available to parameterize the
##' single models (see \code{\link{bm_ModelingOptions}} and
##' \code{\link{BIOMOD.options.dataset}}). Note that only \code{binary} data type is
##' allowed currently.
##' \itemize{
##' \item \code{default} : only default parameter values of default parameters of the single
##' models functions are retrieved. Nothing is changed so it might not give good results.
##' \item \code{bigboss} : uses parameters pre-defined by \pkg{biomod2} team and that are
##' available in the dataset \code{\link{OptionsBigboss}}. \cr
##' \emph{to be optimized in near future}
##' \item \code{user.defined} : updates default or bigboss parameters with some parameters
##' values defined by the user (but matching the format of a
##' \code{\link{BIOMOD.models.options}} object)
##' \item \code{tuned} : calling the \code{\link{bm_Tuning}} function to try and optimize
##' some default values
##' }
##' }
##'
##' \item{weights & prevalence}{More or less weight can be given to some specific observations.
##' \itemize{
##' \item If \code{weights = prevalence = NULL}, each observation (presence or absence) will
##' have the same weight, no matter the total number of presences and absences.
##' \item If \code{prevalence = 0.5}, presences and absences will be weighted equally
##' (\emph{i.e. the weighted sum of presences equals the weighted sum of absences}).
##' \item If \code{prevalence} is set below (\emph{above}) \code{0.5}, more weight will be
##' given to absences (\emph{presences}).
##' \item If \code{weights} is defined, \code{prevalence} argument will be ignored, and each
##' observation will have its own weight.
##' \item If pseudo-absences have been generated (\code{PA.nb.rep > 0} in
##' \code{\link{BIOMOD_FormatingData}}), weights are by default calculated such that
##' \code{prevalence = 0.5}. \emph{Automatically created \code{weights} will be \code{integer}
##' values to prevent some modeling issues.}
##' }}
##'
##' \item{metric.eval}{
##' \describe{
##' \item{simple}{
##' \itemize{
##' \item \code{POD} : Probability of detection (hit rate)
##' \item \code{FAR} : False alarm ratio
##' \item \code{POFD} : Probability of false detection (fall-out)
##' \item \code{SR} : Success ratio
##' \item \code{ACCURACY} : Accuracy (fraction correct)
##' \item \code{BIAS} : Bias score (frequency bias)
##' }
##' }
##' \item{complex}{
##' \itemize{
##' \item \code{ROC} : Relative operating characteristic
##' \item \code{TSS} : True skill statistic (Hanssen and Kuipers discriminant, Peirce's
##' skill score)
##' \item \code{KAPPA} : Cohen's Kappa (Heidke skill score)
##' \item \code{OR} : Odds Ratio
##' \item \code{ORSS} : Odds ratio skill score (Yule's Q)
##' \item \code{CSI} : Critical success index (threat score)
##' \item \code{ETS} : Equitable threat score (Gilbert skill score)
##' }
##' }
##' \item{presence-only}{
##' \itemize{
##' \item \code{BOYCE} : Boyce index
##' \item \code{MPA} : Minimal predicted area (cutoff optimising MPA to predict 90\% of
##' presences)
##' }
##' }
##' }
##' Optimal value of each method can be obtained with the \code{\link{get_optim_value}}
##' function. Several evaluation metrics can be selected. \emph{Please refer to the
##' \href{https://www.cawcr.gov.au/projects/verification/}{CAWRC website (section "Methods for
##' dichotomous forecasts")} to get detailed description of each metric.}
##' Results after modeling can be obtained through the \code{\link{get_evaluations}} function. \cr
##' Evaluation metric are calculated on the calibrating data (column \code{calibration}), on
##' the cross-validation data (column \code{validation}) or on the evaluation data
##' (column \code{evaluation}). \cr \emph{For cross-validation data, see \code{CV.[...]}
##' parameters in \code{\link{BIOMOD_Modeling}} function ; for evaluation data, see
##' \code{eval.[...]} parameters in \code{\link{BIOMOD_FormatingData}}.}
##' }
##'
##' \item{var.import}{A value caracterizing how much each variable has an impact on each model
##' predictions can be calculated by randomizing the variable of interest and computing the
##' correlation between original and shuffled variables (see \code{\link{bm_VariablesImportance}}).}
##'
##' \item{scale.models}{\bold{This parameter is quite experimental and it is recommended
##' not to use it. It may lead to reduction in projection scale amplitude.} Some categorical
##' models always have to be scaled (\code{FDA}, \code{ANN}), but it may be interesting to
##' scale all computed models to ensure comparable predictions (\code{0-1000} range). It might
##' be particularly useful when doing ensemble forecasting to remove the scale prediction effect
##' (\emph{the more extended projections are, the more they influence ensemble forecasting
##' results}).
##' }
##' }
##'
##'
##' @keywords models regression nonlinear multivariate nonparametric tree
##'
##'
##' @seealso \code{\link[stats]{glm}}, \code{\link[gam]{gam}},
##' \code{\link[mgcv]{gam}}, \code{\link[mgcv]{bam}}, \code{\link[gbm]{gbm}},
##' \code{\link[rpart]{rpart}}, \code{\link[nnet]{nnet}},
##' \code{\link[mda]{fda}}, \code{\link[earth]{earth}},
##' \code{\link[randomForest]{randomForest}}, \code{\link[maxnet]{maxnet}},
##' \code{\link[xgboost]{xgboost}}, \code{\link{BIOMOD_FormatingData}},
##' \code{\link{bm_ModelingOptions}}, \code{\link{bm_Tuning}},
##' \code{\link{bm_CrossValidation}},
##' \code{ \link{bm_VariablesImportance}}, \code{\link{BIOMOD_Projection}},
##' \code{\link{BIOMOD_EnsembleModeling}}, \code{\link{bm_PlotEvalMean}},
##' \code{\link{bm_PlotEvalBoxplot}}, \code{\link{bm_PlotVarImpBoxplot}},
##' \code{\link{bm_PlotResponseCurves}}
##' @family Main functions
##'
##'
##' @examples
##' library(terra)
##'
##' # Load species occurrences (6 species available)
##' data(DataSpecies)
##' head(DataSpecies)
##'
##' # Select the name of the studied species
##' myRespName <- 'GuloGulo'
##'
##' # Get corresponding presence/absence data
##' myResp <- as.numeric(DataSpecies[, myRespName])
##'
##' # Get corresponding XY coordinates
##' myRespXY <- DataSpecies[, c('X_WGS84', 'Y_WGS84')]
##'
##' # Load environmental variables extracted from BIOCLIM (bio_3, bio_4, bio_7, bio_11 & bio_12)
##' data(bioclim_current)
##' myExpl <- terra::rast(bioclim_current)
##'
##' \dontshow{
##' myExtent <- terra::ext(0,30,45,70)
##' myExpl <- terra::crop(myExpl, myExtent)
##' }
##'
##' # ---------------------------------------------------------------------------- #
##' # Format Data with true absences
##' myBiomodData <- BIOMOD_FormatingData(resp.var = myResp,
##' expl.var = myExpl,
##' resp.xy = myRespXY,
##' resp.name = myRespName)
##'
##'
##' # ---------------------------------------------------------------------------- #
##' # Model single models
##' myBiomodModelOut <- BIOMOD_Modeling(bm.format = myBiomodData,
##' modeling.id = 'AllModels',
##' models = c('RF', 'GLM'),
##' CV.strategy = 'random',
##' CV.nb.rep = 2,
##' CV.perc = 0.8,
##' OPT.strategy = 'bigboss',
##' metric.eval = c('TSS','ROC'),
##' var.import = 2,
##' seed.val = 42)
##' myBiomodModelOut
##'
##' # Get evaluation scores & variables importance
##' get_evaluations(myBiomodModelOut)
##' get_variables_importance(myBiomodModelOut)
##'
##' # Represent evaluation scores
##' bm_PlotEvalMean(bm.out = myBiomodModelOut, dataset = 'calibration')
##' bm_PlotEvalMean(bm.out = myBiomodModelOut, dataset = 'validation')
##' bm_PlotEvalBoxplot(bm.out = myBiomodModelOut, group.by = c('algo', 'run'))
##'
##' # # Represent variables importance
##' # bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut, group.by = c('expl.var', 'algo', 'algo'))
##' # bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut, group.by = c('expl.var', 'algo', 'run'))
##' # bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut, group.by = c('algo', 'expl.var', 'run'))
##'
##' # # Represent response curves
##' # mods <- get_built_models(myBiomodModelOut, run = 'RUN1')
##' # bm_PlotResponseCurves(bm.out = myBiomodModelOut,
##' # models.chosen = mods,
##' # fixed.var = 'median')
##' # bm_PlotResponseCurves(bm.out = myBiomodModelOut,
##' # models.chosen = mods,
##' # fixed.var = 'min')
##' # mods <- get_built_models(myBiomodModelOut, full.name = 'GuloGulo_allData_RUN2_RF')
##' # bm_PlotResponseCurves(bm.out = myBiomodModelOut,
##' # models.chosen = mods,
##' # fixed.var = 'median',
##' # do.bivariate = TRUE)
##'
##'
##' @importFrom foreach foreach %do%
##'
##' @export
##'
##'
BIOMOD_Modeling <- function(bm.format,
modeling.id = as.character(format(Sys.time(), "%s")),
models = c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM', 'MARS'
, 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST'),
models.pa = NULL,
CV.strategy = 'random',
CV.nb.rep = 1,
CV.perc = NULL,
CV.k = NULL,
CV.balance = NULL,
CV.env.var = NULL,
CV.strat = NULL,
CV.user.table = NULL,
CV.do.full.models = TRUE,
OPT.data.type = 'binary',
OPT.strategy = 'default',
OPT.user.val = NULL,
OPT.user.base = 'bigboss',
OPT.user = NULL,
bm.options, ## deprecated
nb.rep, ## deprecated
data.split.perc, ## deprecated
data.split.table, ## deprecated
do.full.models, ## deprecated
weights = NULL,
prevalence = NULL,
metric.eval = c('KAPPA', 'TSS', 'ROC'),
var.import = 0,
scale.models = FALSE,
nb.cpu = 1,
seed.val = NULL,
do.progress = TRUE)
{
.bm_cat("Build Single Models")
## 0. Check arguments ---------------------------------------------------------------------------
args <- .BIOMOD_Modeling.check.args(
bm.format = bm.format,
modeling.id = modeling.id,
models = models,
models.pa = models.pa,
OPT.user = OPT.user,
CV.user.table = CV.user.table,
CV.do.full.models = CV.do.full.models,
weights = weights,
prevalence = prevalence,
metric.eval = metric.eval,
var.import = var.import,
scale.models = scale.models,
nb.cpu = nb.cpu,
seed.val = seed.val,
do.progress = do.progress
)
for (argi in names(args)) { assign(x = argi, value = args[[argi]]) }
rm(args)
# check for obsolete arguments
args <- .BIOMOD_Modeling.check.args.obsolete(
bm.options = bm.options,
OPT.user = OPT.user,
CV.strategy = CV.strategy,
data.split.perc = data.split.perc,
CV.perc = CV.perc,
data.split.table = data.split.table,
CV.user.table = CV.user.table,
nb.rep = nb.rep,
CV.nb.rep = CV.nb.rep,
do.full.models = do.full.models,
CV.do.full.models = CV.do.full.models
)
for (argi in names(args)) { assign(x = argi, value = args[[argi]]) }
rm(args)
## 1. Create output object ----------------------------------------------------------------------
models.out <- new('BIOMOD.models.out',
dir.name = bm.format@dir.name,
sp.name = bm.format@sp.name,
modeling.id = modeling.id,
expl.var.names = colnames(bm.format@data.env.var),
has.evaluation.data = bm.format@has.data.eval,
scale.models = scale.models)
## 2. Create simulation directories -------------------------------------------------------------
## Various objects will be stored (models, predictions, projections)
## Projections directories are created in Projection() function
.BIOMOD_Modeling.prepare.workdir(bm.format@dir.name, bm.format@sp.name, models.out@modeling.id)
name.BIOMOD_DATA = file.path(models.out@dir.name, models.out@sp.name, ".BIOMOD_DATA", models.out@modeling.id)
## 3.1 Save input data ------------------------------------------------------
models.out = .fill_BIOMOD.models.out("formated.input.data", bm.format, models.out
, inMemory = FALSE, nameFolder = name.BIOMOD_DATA)
## 3.2 Get and save calibration lines ---------------------------------------
calib.lines <- bm_CrossValidation(bm.format = bm.format,
strategy = CV.strategy,
nb.rep = CV.nb.rep,
perc = CV.perc,
k = CV.k,
balance = CV.balance,
env.var = CV.env.var,
strat = CV.strat,
user.table = CV.user.table,
do.full.models = CV.do.full.models)
models.out = .fill_BIOMOD.models.out("calib.lines", calib.lines, models.out
, inMemory = FALSE, nameFolder = name.BIOMOD_DATA)
## 3.3 Get and save models options ------------------------------------------
if (!is.null(OPT.user)) {
## Check for model names -----------
if (sum(!(models %in% sapply(OPT.user@models, function(x) strsplit(x, "[.]")[[1]][1]))) > 0) {
stop(paste0("\n", "OPT.user", " must contain information for '",
ifelse(length(models) > 1,
paste0(paste0(models[1:(length(models) -1)], collapse = "', '"),
"' and '", models[length(models)])
, paste0(models,"' models"))))
}
## Check for calib.lines names -----
for (mod in OPT.user@models) {
nam <- names(OPT.user@options[[mod]]@args.values)
vals <- colnames(calib.lines)
if (any(!(vals %in% nam))) {
if (length(nam) == 1 && nam == "_allData_allRun") {
val <- OPT.user@options[[mod]]@args.values[[nam]]
for (ii in vals) {
OPT.user@options[[mod]]@args.values[[ii]] <- val
}
} else if (all(grepl("_allRun", nam))) { #Check if the user create the options just for PA dataset
sep.name <- unlist(strsplit(mod, split = '[.]'))
cat("\n \n \t The options for", sep.name[1], "for '_PAx_allRun' will be given to all runs with PAx (_PAx_RUN1, _PAx_RUN2,...) \n")
for (run in vals) {
PA.set <- grep("PA|allData", unlist(strsplit(run, "_")), value = TRUE)
if (is.null(OPT.user@options[[mod]]@args.values[[paste0("_", PA.set, "_allRun")]])) {
opt.default <- BIOMOD.options.dataset(mod = sep.name[1], typ = sep.name[2], pkg =sep.name[3], fun = sep.name[4]
, strategy = "default", bm.format = bm.format, calib.lines = calib.lines)
OPT.user@options[[mod]]@args.values[[run]] <- opt.default@args.values[["_allData_allRun"]]
} else {
OPT.user@options[[mod]]@args.values[[run]] <- OPT.user@options[[mod]]@args.values[[paste0("_", PA.set, "_allRun")]]
}
}
} else {
stop(paste0("\n", "names(OPT.user@options[['", mod, "']]@args.values)", " must be '",
ifelse(length(vals) > 1,
paste0(paste0(vals[1:(length(vals) -1)], collapse = "', '"),
"' and '", vals[length(vals)])
, paste0(vals,"'"))))
}
}
}
bm.options <- OPT.user
} else {
bm.options <- bm_ModelingOptions(data.type = OPT.data.type,
models = models,
strategy = OPT.strategy,
user.val = OPT.user.val,
user.base = OPT.user.base,
bm.format = bm.format,
calib.lines = calib.lines)
}
if (!is.null(prevalence) && "MAXENT" %in% models) {
for (nam in names(bm.options@options$MAXENT.binary.MAXENT.MAXENT@args.values)) {
bm.options@options$MAXENT.binary.MAXENT.MAXENT@args.values[[nam]][['defaultprevalence']] <- prevalence
}
}
models.out = .fill_BIOMOD.models.out("models.options", bm.options, models.out
, inMemory = TRUE, nameFolder = name.BIOMOD_DATA)
## 4. Print modeling summary in console ---------------------------------------------------------
.BIOMOD_Modeling.summary(bm.format, calib.lines, models, models.pa)
## 5. Run models with loop over PA --------------------------------------------------------------
mod.out <- bm_RunModelsLoop(bm.format = bm.format,
weights = weights,
calib.lines = calib.lines,
modeling.id = models.out@modeling.id,
models = models,
models.pa = models.pa,
bm.options = bm.options,
var.import = var.import,
metric.eval = metric.eval,
scale.models = scale.models,
nb.cpu = nb.cpu,
seed.val = seed.val,
do.progress = do.progress)
## 6. Rearrange and save outputs -------------------------------------------
models.out@models.computed <- .transform_outputs_list("mod", mod.out, out = "model")
models.out@models.failed <- .transform_outputs_list("mod", mod.out, out = "calib.failure")
if(length(models.out@models.computed) == 1 && models.out@models.computed == "none"){
cat("\n! All models failed")
return(models.out)
}
## 3.4 Rearrange and save models outputs : ----------------------------------
## models evaluation, variables importance, models prediction, predictions evaluation
models.evaluation <- .transform_outputs_list("mod", mod.out, out = "evaluation")
models.out = .fill_BIOMOD.models.out("models.evaluation", models.evaluation, models.out
, inMemory = TRUE, nameFolder = name.BIOMOD_DATA)
rm(models.evaluation)
if (var.import > 0) {
variables.importance <- .transform_outputs_list("mod", mod.out, out = "var.import")
models.out = .fill_BIOMOD.models.out("variables.importance", variables.importance, models.out
, inMemory = FALSE, nameFolder = name.BIOMOD_DATA)
rm(variables.importance)
}
models.prediction <- .transform_outputs_list("mod", mod.out, out = "pred")
models.out = .fill_BIOMOD.models.out("models.prediction", models.prediction, models.out
, inMemory = FALSE, nameFolder = name.BIOMOD_DATA)
rm(models.prediction)
models.prediction.eval <- .transform_outputs_list("mod", mod.out, out = "pred.eval")
models.out = .fill_BIOMOD.models.out("models.prediction.eval", models.prediction.eval, models.out
, inMemory = FALSE, nameFolder = name.BIOMOD_DATA)
rm(models.prediction.eval)
rm(mod.out)
## 6. SAVE MODEL OBJECT ON HARD DRIVE ----------------------------
name.OUT = paste0(models.out@sp.name, '.', models.out@modeling.id, '.models.out')
models.out@link <- file.path(models.out@dir.name, models.out@sp.name, name.OUT)
assign(x = name.OUT, value = models.out)
save(list = name.OUT, file = models.out@link)
.bm_cat("Done")
return(models.out)
}
# ---------------------------------------------------------------------------- #
.BIOMOD_Modeling.prepare.workdir <- function(dir.name, sp.name, modeling.id)
{
cat("\nCreating suitable Workdir...\n")
dir.create(file.path(dir.name, sp.name), showWarnings = FALSE, recursive = TRUE)
dir.create(file.path(dir.name, sp.name, ".BIOMOD_DATA", modeling.id), showWarnings = FALSE, recursive = TRUE)
dir.create(file.path(dir.name, sp.name, "models", modeling.id), showWarnings = FALSE, recursive = TRUE)
}
# ---------------------------------------------------------------------------- #
.BIOMOD_Modeling.check.args <- function(bm.format, modeling.id, models, models.pa, OPT.user
, CV.user.table, CV.do.full.models
, weights, prevalence, metric.eval, var.import
, scale.models, nb.cpu, seed.val, do.progress)
{
## 0. Check bm.format and models arguments ----------------------------------
cat('\n\nChecking Models arguments...\n')
if (!is.character(modeling.id) || length(modeling.id) > 1) { stop("modeling.id must be a 'character' of length 1") }
.fun_testIfInherits(TRUE, "bm.format", bm.format, c("BIOMOD.formated.data", "BIOMOD.formated.data.PA"))
if (!is.character(models)) { stop("models must be a 'character' vector") }
# Support for old names in models
# Deprecated MAXENT.Phillips/MAXENT.Phillips.2
if (any(models == "MAXENT.Phillips")) {
models[which(models == "MAXENT.Phillips")] <- "MAXENT"
cat(paste0("\n\t! 'MAXENT.Phillips' model name is deprecated, please use 'MAXENT' instead."))
}
if (any(models == "MAXENT.Phillips.2")) {
models[which(models == "MAXENT.Phillips.2")] <- "MAXNET"
cat(paste0("\n\t! 'MAXENT.Phillips.2' model name is deprecated, please use 'MAXNET' instead."))
}
## Deprecated MAXENT.Tsuruoka
## because of package maintaining issue (request from B Ripley 03-2019)
if ('MAXENT.Tsuruoka' %in% models) {
models.switch.off <- unique(c(models.switch.off, "MAXENT.Tsuruoka"))
models <- setdiff(models, models.switch.off)
warning('MAXENT.Tsuruoka has been disabled because of package maintaining issue (request from cran team 03-2019)')
}
models <- unique(models)
models.switch.off <- NULL
## check if model is supported
avail.models.list <- c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST')
.fun_testIfIn(TRUE, "models", models, avail.models.list)
## Specific case of one variable with GBM / MAXNET
if ('GBM' %in% models && ncol(bm.format@data.env.var) == 1) {
warning('GBM might have issues when only one variable is used. Please be sure to install the following version : devtools::install_github("rpatin/gbm")')
}
if ('MAXNET' %in% models && ncol(bm.format@data.env.var) == 1) {
warning('MAXNET might have issues when only one variable is used. Please be sure to install the following version : devtools::install_github("mrmaxent/maxnet")')
}
## 1.1 Remove models not supporting categorical variables --------------------
categorical_var <- .get_categorical_names(bm.format@data.env.var)
if (length(categorical_var) > 0) {
models.fact.unsupport <- c("SRE", "XGBOOST")
models.switch.off <- c(models.switch.off, intersect(models, models.fact.unsupport))
if (length(models.switch.off) > 0) {
models <- setdiff(models, models.switch.off)
cat(paste0("\n\t! ", paste(models.switch.off, collapse = ",")," were switched off because of categorical variables !"))
}
}
## Check models.pa argument
if (!is.null(models.pa)) {
if (inherits(bm.format, "BIOMOD.formated.data.PA")) {
.fun_testIfInherits(TRUE, "models.pa", models.pa, "list")
.fun_testIfIn(TRUE, "unlist(models.pa)", unlist(models.pa), colnames(bm.format@PA.table))
.fun_testIfIn(TRUE, "names(models.pa)", names(models.pa), models)
if (length(models.pa) != length(models)) {
mod.miss = models[-which(models %in% names(models.pa))]
list.miss = rep(list(colnames(bm.format@PA.table)), length(mod.miss))
names(list.miss) = mod.miss
models.pa = c(models.pa, list.miss)
warning(paste0(paste0(mod.miss, collapse = ", ")
, " have been assigned to all PA datasets as no information was given")
, immediate. = TRUE)
}
} else {
warning("models.pa has been disabled because no PA datasets have been given", immediate. = TRUE)
models.pa = NULL
}
}
## 3. Check OPT.user arguments --------------------------------------------
if (!is.null(OPT.user)) {
.fun_testIfInherits(TRUE, "OPT.user", OPT.user, "BIOMOD.models.options")
}
# else {
# warning("Models will run with 'default' parameters", immediate. = TRUE)
# bm.options <- BIOMOD_ModelingOptions()
# }
## 4. Check CV.user.table
if (!is.null(CV.user.table)){
if(!("_allData_allRun" %in% colnames(CV.user.table)) & CV.do.full.models == T){
CV.do.full.models = FALSE
warning("CV.do.full.model has been disabled because '_allData_allRun' is not provided in CV.user.table")
}
}
## 5. Check prevalence arguments --------------------------------------------
if (!is.null(prevalence)) {
.fun_testIf01(TRUE, "prevalence", prevalence)
} else {
prevalence = 0.5
}
## 6. Check weights arguments -----------------------------------------------
if (is.null(weights)) {
if (!is.null(prevalence)) {
cat("\n\t> Automatic weights creation to rise a", prevalence, "prevalence")
data.sp <- as.numeric(bm.format@data.species)
if (inherits(bm.format, "BIOMOD.formated.data.PA")) {
weights.pa <- foreach(pa = 1:ncol(bm.format@PA.table), .combine = "cbind") %do%
{
ind.PA <- which(bm.format@PA.table[, pa] == TRUE)
data.sp_pa <- data.sp[ind.PA]
data.sp_pa[which(is.na(data.sp_pa))] <- 0
weights <- .automatic_weights_creation(data.sp_pa, prev = prevalence)
wei <- rep(NA, length(data.sp))
wei[ind.PA] <- weights
return(matrix(wei, ncol = 1))
}
weights.pa <- cbind(weights.pa, rep(1, nrow(weights.pa)))
colnames(weights.pa) <- c(colnames(bm.format@PA.table), "allData")
weights <- weights.pa
} else {
weights <- .automatic_weights_creation(data.sp, prev = prevalence)
weights <- matrix(weights, nrow = length(weights), ncol = 1)
colnames(weights) <- "allData"
}
}
# else { ## NEVER OCCURRING NO ??
# cat("\n\t> No weights : all observations will have the same weight")
# weights <- rep(1, length(bm.format@data.species))
# }
} else {
if (!is.numeric(weights)) { stop("weights must be a numeric vector") }
if (length(weights) != length(bm.format@data.species)) {
stop("The number of 'Weight' does not match with the input calibration data. Simulation cannot proceed.")
}
if (inherits(bm.format, "BIOMOD.formated.data.PA")) {
weights.pa <- foreach(pa = 1:ncol(bm.format@PA.table), .combine = "cbind") %do%
{
wei <- weights
wei[which(bm.format@PA.table[, pa] == FALSE | is.na(bm.format@PA.table[, pa]))] <- NA
return(matrix(wei, ncol = 1))
}
weights.pa <- cbind(weights.pa, rep(1, nrow(weights.pa)))
colnames(weights.pa) <- c(colnames(bm.format@PA.table), "allData")
weights <- weights.pa
} else {
weights <- matrix(weights, nrow = length(weights), ncol = 1)
colnames(weights) <- "allData"
}
}
## 7. Check metric.eval arguments -------------------------------------------
metric.eval <- unique(metric.eval)
avail.eval.meth.list <- c('POD', 'FAR', 'POFD', 'SR', 'ACCURACY', 'BIAS'
, 'ROC', 'TSS', 'KAPPA', 'OR', 'ORSS', 'CSI'
, 'ETS', 'BOYCE', 'MPA')
.fun_testIfIn(TRUE, "metric.eval", metric.eval, avail.eval.meth.list)
if (!is.null(seed.val)) {
set.seed(seed.val)
}
if (is.null(var.import)) {
var.import = 0
}
return(list(models = models,
models.pa = models.pa,
weights = weights,
var.import = var.import,
metric.eval = metric.eval,
prevalence = prevalence,
seed.val = seed.val,
do.progress = do.progress,
CV.do.full.models = CV.do.full.models))
}
# Obsolete argument Check -------------------------------------------------------
.BIOMOD_Modeling.check.args.obsolete <- function(bm.options,
OPT.user,
CV.strategy,
data.split.perc,
CV.perc,
data.split.table,
CV.user.table,
nb.rep,
CV.nb.rep,
do.full.models,
CV.do.full.models)
{
## bm.options ------------------------------
if (!missing(bm.options)) {
if (!is.null(OPT.user)) {
cat("\n! ignored obsolete argument 'bm.options' as 'OPT.user' was also given")
} else {
OPT.user <- bm.options
cat("\n!!! argument 'bm.options' is obsolete, please use 'OPT.user' instead")
}
}
## do.full.models ------------------------------
if (!missing(do.full.models)) {
if (!is.null(CV.do.full.models)) {
cat("\n! ignored obsolete argument 'do.full.models' as 'CV.do.full.models' was also given")
} else {
CV.do.full.models <- do.full.models
cat("\n!!! argument 'do.full.models' is obsolete, please use 'CV.do.full.models' instead")
}
}
## data.split.perc --------------------------
if (!missing(data.split.perc)) {
if (CV.strategy != "random") {
cat("\n! ignored obsolete argument 'data.split.perc' as 'CV.strategy' was not set to 'random'")
} else if (!is.null(CV.perc)) {
cat("\n! ignored obsolete argument 'data.split.perc' as 'CV.perc' was also given")
} else {
CV.perc <- data.split.perc/100
cat("\n!!! argument 'do.full.models' is obsolete, please use 'CV.perc' instead.
\n /!\ 'CV.perc' is on a scale 0-1 and was set to data.split.perc/100 =",CV.perc)
}
}
## nb.rep --------------------------
if (!missing(nb.rep)) {
if (! CV.strategy %in% c("random", "kfold")) {
cat("\n! ignored obsolete argument 'nb.rep' as 'CV.strategy' was not set to 'random' or 'kfold'")
} else if (!is.null(CV.nb.rep)) {
cat("\n! ignored obsolete argument 'nb.rep' as 'CV.nb.rep' was also given")
} else {
CV.nb.rep <- nb.rep
cat("\n!!! argument 'nb.rep' is obsolete, please use 'CV.nb.rep' instead.")
}
}
## data.split.table --------------------------
if (!missing(data.split.table)) {
if (! CV.strategy %in% c("user.defined")) {
cat("\n! ignored obsolete argument 'data.split.table' as 'CV.strategy' was not set to 'user.defined'")
} else if (!is.null(CV.user.table)) {
cat("\n! ignored obsolete argument 'data.split.table' as 'CV.user.table' was also given")
} else {
CV.user.table <- data.split.table
cat("\n!!! argument 'data.split.table' is obsolete, please use 'CV.user.table' instead.")
}
}
return(list(
CV.perc = CV.perc,
CV.user.table = CV.user.table,
CV.nb.rep = CV.nb.rep,
CV.do.full.models = CV.do.full.models))
}
## 0. Check bm.format and models arguments ----------------------------------
cat('\n\nChecking Models arguments...\n')
# ---------------------------------------------------------------------------- #
.BIOMOD_Modeling.summary <- function(bm.format, calib.lines, models, models.pa = NULL)
{
cat("\n\n")
.bm_cat(paste(bm.format@sp.name, "Modeling Summary"))
cat("\n", ncol(bm.format@data.env.var), " environmental variables (", colnames(bm.format@data.env.var), ")")
nb.eval.rep <-
ncol(calib.lines) /
ifelse(inherits(bm.format, "BIOMOD.formated.data.PA"),
ncol(bm.format@PA.table), 1)
cat("\nNumber of evaluation repetitions :", nb.eval.rep)
cat("\nModels selected :", models, "\n")
if (is.null(models.pa)) {
nb.runs = ncol(calib.lines) * length(models)
} else {
nb.runs =
length(which(
sapply(unlist(models.pa), function(x) grepl(colnames(calib.lines), pattern = x))
))
}
cat("\nTotal number of model runs:", nb.runs, "\n")
.bm_cat()
}
# ---------------------------------------------------------------------------- #
.automatic_weights_creation <- function(resp, prev = 0.5, subset = NULL)
{
if (is.null(subset)) { subset <- rep(TRUE, length(resp)) }
nbPres <- sum(resp[subset], na.rm = TRUE)
# The number of true absences + pseudo absences to maintain true value of prevalence
nbAbsKept <- sum(subset, na.rm = TRUE) - sum(resp[subset], na.rm = TRUE)
weights <- rep(1, length(resp))
if (nbAbsKept > nbPres) {
# code absences as 1
weights[which(resp > 0)] <- (prev * nbAbsKept) / (nbPres * (1 - prev))
} else {
# code presences as 1
weights[which(resp == 0 | is.na(resp))] <- (nbPres * (1 - prev)) / (prev * nbAbsKept)
}
weights = round(weights[])
weights[!subset] <- 0
return(weights)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.