Nothing
###################################################################################################
##' @name bm_RunModelsLoop
##' @aliases bm_RunModelsLoop
##' @aliases bm_RunModel
##' @author Damien Georges
##'
##' @title Loop to compute all single species distribution models
##'
##' @description This internal \pkg{biomod2} function allows the user to compute all single
##' species distribution models (asked by the \code{\link{BIOMOD_Modeling}} function).
##'
##'
##' @param bm.format a \code{\link{BIOMOD.formated.data}} or \code{\link{BIOMOD.formated.data.PA}}
##' object returned by the \code{\link{BIOMOD_FormatingData}} function
##' @param weights a \code{matrix} containing observation weights for each pseudo-absence (or
##' \code{allData}) dataset
##' @param calib.lines a \code{matrix} containing calibration / validation lines for each
##' pseudo-absence (or \code{allData}) x repetition (or \code{allRun}) combination that can be
##' obtained with the \code{\link{bm_CrossValidation}} function
##'
##' @param modeling.id a \code{character} corresponding to the name (ID) of the simulation set
##' (\emph{a random number by default})
##' @param models a \code{vector} containing model names to be computed, must be among
##' \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS},
##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}
##' @param models.pa (\emph{optional, default} \code{NULL}) \cr
##' A \code{list} containing for each model a \code{vector} defining which pseudo-absence datasets
##' are to be used, must be among \code{colnames(bm.format@PA.table)}
##' @param bm.options a \code{\link{BIOMOD.models.options}} object returned by the
##' \code{\link{bm_ModelingOptions}} function
##' @param metric.eval a \code{vector} containing evaluation metric names to be used, must
##' be among \code{POD}, \code{FAR}, \code{POFD}, \code{SR}, \code{ACCURACY}, \code{BIAS},
##' \code{ROC}, \code{TSS}, \code{KAPPA}, \code{OR}, \code{ORSS}, \code{CSI}, \code{ETS},
##' \code{BOYCE}, \code{MPA}
##' @param var.import (\emph{optional, default} \code{NULL}) \cr
##' An \code{integer} corresponding to the number of permutations to be done for each variable to
##' estimate variable importance
##' @param scale.models (\emph{optional, default} \code{FALSE}) \cr
##' A \code{logical} value defining whether all models predictions should be scaled with a
##' binomial GLM or not
##' @param nb.cpu (\emph{optional, default} \code{1}) \cr
##' An \code{integer} value corresponding to the number of computing resources to be used to
##' parallelize the single models computation
##' @param seed.val (\emph{optional, default} \code{NULL}) \cr
##' An \code{integer} value corresponding to the new seed value to be set
##' @param do.progress (\emph{optional, default} \code{TRUE}) \cr
##' A \code{logical} value defining whether the progress bar is to be rendered or not
##'
##'
##' @param model a \code{character} corresponding to the model name to be computed, must be either
##' \code{ANN}, \code{CTA}, \code{FDA}, \code{GAM}, \code{GBM}, \code{GLM}, \code{MARS},
##' \code{MAXENT}, \code{MAXNET}, \code{RF}, \code{SRE}, \code{XGBOOST}
##' @param run.name a \code{character} corresponding to the model to be run (sp.name + pa.id +
##' run.id)
##' @param dir.name (\emph{optional, default} \code{.}) \cr
##' A \code{character} corresponding to the modeling folder
##' @param Data a \code{data.frame} containing observations, coordinates and environmental
##' variables that can be obtained with the \code{get_species_data} function
##' @param weights.vec a \code{vector} containing observation weights the concerned pseudo-absence
##' (or \code{allData}) dataset
##' @param calib.lines.vec a \code{vector} containing calibration / validation lines for the
##' concerned pseudo-absence (or \code{allData}) x repetition (or \code{allRun}) combination
##' @param eval.data (\emph{optional, default} \code{NULL}) \cr
##' A \code{data.frame} containing validation observations, coordinates and environmental
##' variables that can be obtained with the \code{get_eval_data} function
##'
##'
##'
##' @return
##'
##' A \code{list} containing for each model a \code{list} containing the following elements :
##' \itemize{
##' \item \code{model} : the name of correctly computed model
##' \item \code{calib.failure} : the name of incorrectly computed model
##' \item \code{pred} : the prediction outputs for calibration data
##' \item \code{pred.eval} : the prediction outputs for evaluation data
##' \item \code{evaluation} : the evaluation outputs returned by the
##' \code{\link{bm_FindOptimStat}} function
##' \item \code{var.import} : the mean of variables importance returned by the
##' \code{\link{bm_VariablesImportance}} function
##' }
##'
##'
##' @keywords models formula options CTA GLM GBM GAM RF ANN FDA SRE MARS MAXENT XGBOOST
##'
##'
##' @seealso \code{\link[rpart]{rpart}}, \code{\link[rpart]{prune}}, \code{\link[gbm]{gbm}},
##' \code{\link[nnet]{nnet}}, \code{\link[earth]{earth}},
##' \code{\link[mda]{fda}}, \code{\link[mda]{mars}}, \code{\link[maxnet]{maxnet}},
##' \code{\link[randomForest]{randomForest}}, \code{\link[xgboost]{xgboost}},
##' \code{\link{bm_ModelingOptions}}, \code{\link{BIOMOD_Modeling}},
##' \code{\link{bm_MakeFormula}}, \code{\link{bm_SampleFactorLevels}},
##' \code{\link{bm_FindOptimStat}}, \code{\link{bm_VariablesImportance}}
##' @family Secundary functions
##'
##'
##' @importFrom foreach foreach %do% %dopar%
##' @importFrom rpart prune
##' @importFrom gbm gbm.perf
##' @importFrom dplyr mutate_at %>%
##'
##' @export
##'
##'
###################################################################################################
bm_RunModelsLoop <- function(bm.format,
weights,
calib.lines,
modeling.id,
models,
models.pa,
bm.options,
metric.eval,
var.import,
scale.models = TRUE,
nb.cpu = 1,
seed.val = NULL,
do.progress = TRUE)
{
if (nb.cpu > 1) {
if (.getOS() != "windows") {
if (!isNamespaceLoaded("doParallel")) {
if(!requireNamespace('doParallel', quietly = TRUE)) stop("Package 'doParallel' not found")
}
doParallel::registerDoParallel(cores = nb.cpu)
} else {
warning("Parallelisation with `foreach` is not available for Windows. Sorry.")
}
}
## PREPARE DATA ---------------------------------------------------------------------------------
list.data <- list()
pa.list = sapply(colnames(calib.lines), function(x) strsplit(x, "_")[[1]][2])
for (pa.id in unique(pa.list)) { # loop on PA -------------------------------
models.subset = models
if (!is.null(models.pa)) {
## optional : subset of models associated to the concerned PA dataset
models.subset = sapply(models.pa, function(x) pa.id %in% x)
models.subset = names(models.pa)[which(models.subset == TRUE)]
}
data.all <- get_species_data(bm.format)
if (pa.id %in% colnames(data.all)) {
## optional : subset of species data associated to the concerned PA dataset
data.all <- data.all[which(data.all[, pa.id] == TRUE), ]
}
data.all <- data.all[, c(bm.format@sp.name, "x", "y", colnames(bm.format@data.env.var))]
for (i in which(pa.list == pa.id)) { # loop on RUN ------------------------
run.id = strsplit(colnames(calib.lines)[i], "_")[[1]][3]
run.name = paste0(bm.format@sp.name, "_", pa.id, "_", run.id)
for (modi in models.subset) { # loop on models --------------------------
all.name = paste0(run.name, "_", modi)
list.data[[all.name]] <- list(modi = modi,
run.name = run.name,
data.all = data.all,
calib.lines.vec = na.omit(calib.lines[, i]), ## ATTENTION na.omit
weights.vec = na.omit(weights[, pa.id])) ## ATTENTION na.omit
}}}
## RUN models -----------------------------------------------------------------------------------
out <- foreach(ii = 1:length(list.data), .combine =) %dopar%
{
cat('\n\n-=-=-=--=-=-=-', names(list.data)[ii], '\n')
bm_RunModel(model = list.data[[ii]]$modi,
run.name = list.data[[ii]]$run.name,
dir.name = bm.format@dir.name,
modeling.id = modeling.id,
bm.options = bm.options,
Data = list.data[[ii]]$data.all,
weights.vec = list.data[[ii]]$weights.vec,
calib.lines.vec = list.data[[ii]]$calib.lines.vec,
eval.data = get_eval_data(bm.format),
metric.eval = metric.eval,
var.import = var.import,
scale.models = scale.models,
seed.val = seed.val,
do.progress = TRUE)
}
return(out)
}
# ---------------------------------------------------------------------------- #
##'
##' @rdname bm_RunModelsLoop
##' @export
##'
bm_RunModel <- function(model, run.name, dir.name = '.'
, modeling.id = '', bm.options
, Data, weights.vec, calib.lines.vec
, eval.data = NULL
, metric.eval = c('ROC','TSS','KAPPA'), var.import = 0
, scale.models = TRUE, nb.cpu = 1, seed.val = NULL, do.progress = TRUE)
{
## 0. Check arguments ---------------------------------------------------------------------------
args <- .bm_RunModel.check.args(model, bm.options, Data, weights.vec, calib.lines.vec
, eval.data, metric.eval, scale.models, seed.val, do.progress)
if (is.null(args)) { return(NULL) }
for (argi in names(args)) { assign(x = argi, value = args[[argi]]) }
rm(args)
## get model name and names of categorical variables
dir_name <- dir.name
model_name <- paste0(run.name, '_', model)
dataset_name <- paste0("_", paste0(strsplit(run.name, "_")[[1]][-1], collapse = "_"))
## get options for specific model
opt_name <- grep(model, names(bm.options@options), value = TRUE)
if (length(opt_name) == 1) {
bm.opt <- bm.options@options[[opt_name]]
} else { stop("pitiprobleum") } ## Should not happen now
if (length(grep("GAM", model)) == 1) {
subclass_name <- paste0(bm.opt@model, "_", bm.opt@type, "_", bm.opt@package)
.load_gam_namespace(model_subclass = subclass_name)
}
## get options for specific dataset
if (dataset_name %in% names(bm.opt@args.values)) {
bm.opt.val <- bm.opt@args.values[[dataset_name]]
} else { stop("groprobleum") }
## 2. CREATE MODELS -----------------------------------------------------------------------------
set.seed(seed.val)
if (model != "MAXENT") { ## ANY MODEL BUT MAXENT ------------------------------------------------
## PRELIMINAR ---------------------------------------------------
if (model %in% c("ANN", "MARS", "RF") & is.null(bm.opt.val$formula)) {
bm.opt.val$formula <- bm_MakeFormula(resp.name = resp_name
, expl.var = head(data_env)
, type = 'simple'
, interaction.level = 0)
}
if (model == "RF" && !is.null(bm.opt.val$type) && bm.opt.val$type == "classification") {
# defining occurrences as factor for doing classification and not regression in RF
data_mod <- data_mod %>% mutate_at(resp_name, factor)
bm.opt.val$strata <- data_mod[calib.lines.vec, , drop = FALSE][ , resp_name]
bm.opt.val$sampsize <- unlist(ifelse(!is.null(bm.opt.val$sampsize), list(bm.opt.val$sampsize), length(data_sp[calib.lines.vec]))) ## TOCHECK !!
}
## FILL data parameter ------------------------------------------
if (model %in% c("ANN", "CTA", "FDA", "GAM", "GBM", "MARS", "RF")) {
bm.opt.val$data <- data_mod[calib.lines.vec, , drop = FALSE]
} else if (model == "GLM") {
bm.opt.val$data <- cbind(data_mod[calib.lines.vec, , drop = FALSE],
data.frame("weights" = weights.vec[calib.lines.vec]))
if ("mustart" %in% names(bm.opt.val) &&
(length(bm.opt.val$mustart) > 0 && nchar(bm.opt.val$mustart) > 0) &&
length(bm.opt.val$mustart) < nrow(bm.opt.val$data)) {
bm.opt.val$mustart <- rep(bm.opt.val$mustart[1], nrow(bm.opt.val$data))
## Should we put a warning about that ?
}
} else if (model == "MAXNET") {
bm.opt.val$p <- data_sp[calib.lines.vec]
bm.opt.val$data <- data_env[calib.lines.vec, , drop = FALSE]
} else if (model == "SRE") {
bm.opt.val$resp.var <- data_sp[calib.lines.vec]
bm.opt.val$expl.var <- data_env[calib.lines.vec, , drop = FALSE]
} else if (model == "XGBOOST") {
bm.opt.val$label <- data_sp[calib.lines.vec]
bm.opt.val$data <- as.matrix(data_env[calib.lines.vec, , drop = FALSE])
}
## FILL weights parameter ---------------------------------------
if (model %in% c("ANN", "CTA", "GBM", "GLM", "MARS")) { #, "RF")) { ## TO BE ADDED RF ??
bm.opt.val$weights <- quote(weights)
} else if (model %in% c("FDA", "GAM")) {
bm.opt.val$weights <- weights.vec[calib.lines.vec]
} else if (model %in% c("XGBOOST")) {
bm.opt.val$weight <- weights.vec[calib.lines.vec]
}
## REORGANIZE order of parameters -------------------------------
if (model %in% c("ANN", "MARS", "RF")) {
bm.opt.val <- bm.opt.val[c("formula", "data", names(bm.opt.val)[which(!(names(bm.opt.val) %in% c("formula", "data")))])]
}
if (model %in% c("FDA")) {
bm.opt.val$method <- eval(parse(text = paste0("quote(", bm.opt.val$method, ")")))
}
## RUN model ----------------------------------------------------
model.call <- paste0(bm.opt@package, "::", bm.opt@func)
model.sp <- try(do.call(eval(parse(text = model.call)), bm.opt.val))
## GET results --------------------------------------------------
if (!inherits(model.sp, "try-error")) {
if (model == "CTA") {
# select best trees --------------- May be done otherway
tr <- as.data.frame(model.sp$cptable)
tr$xsum <- tr$xerror + tr$xstd
tr <- tr[tr$nsplit > 0,]
if (nrow(tr) > 0) {
Cp <- tr[tr$xsum == min(tr$xsum), "CP"]
model.sp <- prune(model.sp, cp = Cp[length(Cp)])
} else {
warning("Pruning of CTA model failed")
}
} else if (model == "GBM" && bm.opt.val$cv.folds > 1) {
best.iter <- try(gbm.perf(model.sp, method = "cv" , plot.it = FALSE)) ## c('OOB', 'test', 'cv')
}
model.bm <- new(paste0(bm.opt@model, "_biomod2_model"),
model = model.sp,
model_name = model_name,
model_class = bm.opt@model,
model_options = bm.opt, ## bm.opt.val ??
dir_name = dir_name,
resp_name = resp_name,
expl_var_names = expl_var_names,
expl_var_type = get_var_type(data_env[calib.lines.vec, , drop = FALSE]),
expl_var_range = get_var_range(data_env[calib.lines.vec, , drop = FALSE]))
if (model == "GAM") { model.bm@model_subclass = subclass_name } ## TO BE ADDED to all models ?
if (model == "GBM" && exists("best.iter")) { model.bm@n.trees_optim = best.iter }
if (model == "SRE" && bm.opt.val$do.extrem == TRUE) { model.bm@extremal_conditions = model.sp }
# if (model == "SRE") {
# model.sp <- as.data.frame(model.sp)
# rownames(model.sp) <- rownames(bm.opt.val$expl.var)
# model.bm@extremal_conditions = model.sp
# }
}
## POSTLIMINAR --------------------------------------------------
if (model == "RF" && !is.null(bm.opt.val$type) && bm.opt.val$type == "classification") {
# canceling occurences class modifications
data_mod <- data_mod %>% mutate_at(resp_name, function(.x) {
.x %>% as.character() %>% as.numeric()
})
}
} else { ## MAXENT ------------------------------------------------------------------------------
if (!file.exists(file.path(bm.opt.val$path_to_maxent.jar, "maxent.jar"))) {
warning(paste0("'maxent.jar' file is missing in specified directory ("
, bm.opt.val$path_to_maxent.jar, ").\n"
, "It must be downloaded (https://biodiversityinformatics.amnh.org/open_source/maxent/) "
, "and put in that directory."), immediate. = TRUE)
g.pred <- NA
class(g.pred) <- "try-error"
cat("\n*** Error in MAXENT, no executable file")
} else {
categorical_var <- .get_categorical_names(data_env)
MWD <- .maxent.prepare.workdir(sp_name = resp_name
, run_name = run.name
, data_sp = data_sp
, data_xy = data_xy
, data_env = data_env
, categorical_var = categorical_var
, calib.lines.vec = calib.lines.vec
, data_eval = eval.data
, dir.name = dir_name
, modeling.id = modeling.id
, background_data_dir = bm.opt.val$background_data_dir)
# list of arguments ---------------
maxent.args <- c(
ifelse(is.null(bm.opt.val$memory_allocated), "", paste0("-mx", bm.opt.val$memory_allocated, "m")),
ifelse(is.null(bm.opt.val$initial_heap_size), "", paste0(" -Xms", bm.opt.val$initial_heap_size)),
ifelse(is.null(bm.opt.val$max_heap_size), "", paste0(" -Xmx", bm.opt.val$max_heap_size)),
paste0(" -jar ", file.path(bm.opt.val$path_to_maxent.jar, "maxent.jar")),
paste0(" environmentallayers=\"", MWD$m_backgroundFile, "\""),
paste0(" samplesfile=\"", MWD$m_speciesFile, "\""),
paste0(" projectionlayers=\"", gsub(", ", ",", toString(MWD$m_predictFile)), "\""),
paste0(" outputdirectory=\"", MWD$m_outdir, "\""),
paste0(" outputformat=logistic "),
ifelse(length(categorical_var), paste0(" togglelayertype=", categorical_var, collapse = " "), ""),
" redoifexists")
vec_x <- names(bm.opt.val)[which(!(names(bm.opt.val) %in% c("path_to_maxent.jar", "memory_allocated", "initial_heap_size",
"max_heap_size", "background_data_dir")))]
maxent.args <- c(maxent.args, sapply(vec_x, function(xx) {
paste0(" ", xx, "=", bm.opt.val[[xx]])
}))
maxent.args <- c(maxent.args, " autorun ", " nowarnings ", " notooltips ", " noaddsamplestobackground")
# file to log potential errors ----
maxent_stderr_file <- paste0(MWD$m_outdir, "/maxent.stderr")
# run model -----------------------
system2(command = "java", args = maxent.args,
wait = TRUE,
stdout = "", stderr = maxent_stderr_file)
maxent_exec_output <- readLines(maxent_stderr_file)
if (any(grepl(pattern = "Error", x = maxent_exec_output))) {
g.pred <- NA
class(g.pred) <- "try-error"
cat("\n*** Error in MAXENT, more info available in ", maxent_stderr_file)
} else {
model.bm <- new("MAXENT_biomod2_model",
model_output_dir = MWD$m_outdir, # MAXENT only
model_name = model_name,
model_class = bm.opt@model,
model_options = bm.opt,
dir_name = dir_name,
resp_name = resp_name,
expl_var_names = expl_var_names,
expl_var_type = get_var_type(data_env[calib.lines.vec, , drop = FALSE]),
expl_var_range = get_var_range(data_env[calib.lines.vec, , drop = FALSE]))
# for MAXENT predictions are calculated in the same time than models building to save time.
cat("\n Getting predictions...")
g.pred <- try(round(as.numeric(read.csv(MWD$m_outputFile)[, 3]) * 1000))
if (var.import > 0) {
cat("\n Getting predictor contributions...")
variables.importance <- bm_VariablesImportance(bm.model = model.bm
, expl.var = data_env
, nb.rep = var.import
, temp.workdir = MWD$m_outdir
, seed.val = seed.val
, do.progress = do.progress)
}
}
}
}
## 1. Create output object ----------------------------------------------------------------------
ListOut <- list(model = NULL,
calib.failure = NULL,
pred = NULL,
pred.eval = NULL,
evaluation = NULL,
var.import = NULL)
## 3. CREATE PREDICTIONS ------------------------------------------------------------------------
temp_workdir = NULL
if (model != "MAXENT") {
if (inherits(model.sp, "try-error")) {
g.pred <- NA
class(g.pred) <- "try-error"
cat("\n*** Error in ", model, ", calibration failed")
} else if (model == "SRE" && bm.opt.val$do.extrem == FALSE) {
g.pred <- model.sp
} else {
g.pred <- try(predict(model.bm, data_env, on_0_1000 = TRUE, seedval = seed.val, temp_workdir = temp_workdir))
}
}
if (model == "MAXENT" & !inherits(g.pred, 'try-error')) {
temp_workdir = model.bm@model_output_dir
}
## scale or not predictions -------------------------------------------------
if (scale.models & !inherits(g.pred, 'try-error')) {
cat("\n\tModel scaling...")
model.bm@scaling_model <- try(.scaling_model(g.pred / 1000, data_sp, weights = weights.vec))
## with weights
g.pred <- try(predict(model.bm, data_env, on_0_1000 = TRUE, seedval = seed.val, temp_workdir = temp_workdir))
}
## check predictions existence and stop execution if not ok -----------------
test_pred_ok <- TRUE
if (inherits(g.pred, "try-error")) { # model calibration or prediction failed
test_pred_ok <- FALSE
cat("\n*** inherits(g.pred,'try-error')")
} else if (all(is.na(g.pred))) { # only NA predicted
test_pred_ok <- FALSE
cat("\n*** only NA predicted")
} else if (length(unique(na.omit(g.pred))) <= 1) { # single value predicted
test_pred_ok <- FALSE
cat("\n*** single value predicted")
}
if (test_pred_ok) {
# keep the model name
ListOut$model <- model_name
} else {
# keep the name of uncompleted modelisations
cat("\n ! Note : ", model_name, "failed!\n")
ListOut$calib.failure = model_name
return(ListOut) ## end of function.
}
## make prediction on evaluation data ---------------------------------------
if (!is.null(eval.data)) {
g.pred.eval <- try(
predict(model.bm,
eval.data[, expl_var_names, drop = FALSE],
on_0_1000 = TRUE,
seedval = seed.val,
temp_workdir = temp_workdir)
)
}
## SAVE predictions ---------------------------------------------------------
ListOut$pred <- g.pred
if (exists("g.pred.eval")) { ListOut$pred.eval <- g.pred.eval }
## 4. EVALUATE MODEL ----------------------------------------------------------------------------
if (length(metric.eval) > 0) {
cat("\n\tEvaluating Model stuff...")
## Check no NA in g.pred to avoid evaluation failures
na_cell_id <- which(is.na(g.pred))
if (length(na_cell_id) > 0) {
eval.lines.vec <- eval.lines.vec[-na_cell_id]
cat('\n\tNote : some NA occurs in predictions')
}
if (length(which(eval.lines.vec == TRUE)) < length(g.pred)) {
## CALIBRATION & VALIDATION LINES -------------------------------------------------
cross.validation <- foreach(xx = metric.eval, .combine = "rbind") %do% {
bm_FindOptimStat(metric.eval = xx,
obs = data_sp[which(eval.lines.vec == FALSE)],
fit = g.pred[which(eval.lines.vec == FALSE)])
}
if (max(cross.validation$cutoff,na.rm = T) > 1000) {cat("\n*** Wrong values predicted, please be careful with the results fo this model")}
colnames(cross.validation)[which(colnames(cross.validation) == "best.stat")] <- "calibration"
stat.validation <- foreach(xx = metric.eval, .combine = "rbind") %do% {
bm_FindOptimStat(metric.eval = xx,
obs = data_sp[which(eval.lines.vec == TRUE)],
fit = g.pred[which(eval.lines.vec == TRUE)],
threshold = cross.validation$cutoff[
which(cross.validation$metric.eval == xx)
])
}
cross.validation$validation <- stat.validation$best.stat
} else {
## NO VALIDATION LINES -----------------------------------------------------
cross.validation <- foreach(xx = metric.eval, .combine = "rbind") %do% {
bm_FindOptimStat(metric.eval = xx,
obs = data_sp[which(eval.lines.vec == TRUE)],
fit = g.pred[which(eval.lines.vec == TRUE)])
}
colnames(cross.validation)[which(colnames(cross.validation) == "best.stat")] <- "calibration"
cross.validation$validation <- NA
}
if (exists('g.pred.eval')) {
## Check no NA in g.pred.eval to avoid evaluation failures
na_cell_id <- which(is.na(g.pred.eval))
if (length(na_cell_id) > 0) {
g.pred.eval.without.na <- g.pred.eval[-na_cell_id]
eval.data <- eval.data[-na_cell_id, ]
cat('\n\tNote : some NA occurs in evaluation predictions')
} else {
g.pred.eval.without.na <- g.pred.eval
}
stat.evaluation <- foreach(xx = metric.eval, .combine = "rbind") %do% {
bm_FindOptimStat(metric.eval = xx,
obs = eval.data[, 1],
fit = g.pred.eval.without.na,
threshold = cross.validation["cutoff", xx])
}
cross.validation$evaluation <- stat.evaluation$best.stat
} else {
cross.validation$evaluation <- NA
}
## store results
for (col.i in 2:ncol(cross.validation)) {
cross.validation[, col.i] <- round(cross.validation[, col.i], digits = 3)
}
ListOut$evaluation <- cross.validation
model.bm@model_evaluation <- cross.validation
rm(cross.validation)
}
## 5. COMPUTE VARIABLES IMPORTANCE --------------------------------------------------------------
if (var.import > 0) {
cat("\n\tEvaluating Predictor Contributions...")
if (model != "MAXENT") {
variables.importance <- bm_VariablesImportance(bm.model = model.bm
, expl.var = data_env
, nb.rep = var.import
, seed.val = seed.val
, do.progress = do.progress)
}
ListOut$var.import <- variables.importance
model.bm@model_variables_importance <- variables.importance
rm(variables.importance)
cat("\n")
}
## 6. SAVE MODEL OBJECT ON HARD DRIVE -----------------------------------------------------------
nameModel = paste(run.name, model, sep = "_")
assign(x = nameModel, value = model.bm)
save(list = nameModel, file = file.path(dir_name, resp_name, "models", modeling.id, nameModel), compress = TRUE)
return(ListOut)
}
###################################################################################################
.bm_RunModel.check.args <- function(model, bm.options, Data, weights.vec, calib.lines.vec
, eval.data, metric.eval, scale.models, seed.val = NULL, do.progress = TRUE)
{
## 0. Do some cleaning over Data argument -----------------------------------
data_sp <- Data[, 1]
data_xy <- Data[, c("x", "y")]
data_env <- Data[, -c(1, which(colnames(Data) %in% c("x", "y"))), drop = FALSE]
resp_name <- colnames(Data)[1] ## species name
expl_var_names <- colnames(data_env) ## explanatory variable names
# replace Pseudo absences selected (NA) into true absences (0).. for model computing purpose
if (sum(is.na(Data[, 1]))) { data_sp[which(is.na(data_sp))] <- 0 }
## 1. Check CalibLines argument ---------------------------------------------
if (any(calib.lines.vec == FALSE)) ## if some lines for evaluation...
{
eval.lines.vec <- !calib.lines.vec
# ...test if there is (pseudo)absences AND presences in evaluation and calibration datasets
if (length(which(data_sp[calib.lines.vec] == 0)) == 0 ||
length(which(data_sp[calib.lines.vec] == 0)) == length(calib.lines.vec) ||
length(which(data_sp[eval.lines.vec] == 0)) == 0 ||
length(which(data_sp[eval.lines.vec] == 0)) == length(eval.lines.vec)) {
warning(paste0(resp_name, " ", model,
" was switched off because of no both presences and absences data given"),
immediate. = TRUE)
return(NULL)
}
} else { ## evaluation = calibration dataset
eval.lines.vec <- calib.lines.vec
# ...test if there is absences AND presences in whole dataset
if (length(which(data_sp == 0)) == 0 ||
length(which(data_sp == 0)) == length(data_sp)) {
warning(paste0(resp_name, " ", model,
" was switched off because of no both presences and absences data given (full model)"),
immediate. = TRUE)
return(NULL)
}
}
## 2. Check weights argument ------------------------------------------------
if (is.null(weights.vec)) { weights.vec <- rep(1, nrow(Data)) }
## These models require data and weights to be in the same dataset
if (model %in% c('ANN', 'MARS', 'CTA', 'GBM')) { ## TO BE ADDED RF ??
data_env_w <- cbind(data_env, weights.vec)
colnames(data_env_w) <- c(colnames(data_env), "weights")
} else {
data_env_w <- data_env
}
## 3. Check scale.models argument -------------------------------------------
if (model == "SRE") { scale.models <- FALSE } else if (model %in% c("ANN", "FDA")) { scale.models <- TRUE }
## 4. Check models.eval.meth arguments --------------------------------------
metric.eval <- unique(metric.eval)
avail.eval.meth.list <- c('POD', 'FAR', 'POFD', 'SR', 'ACCURACY', 'BIAS'
, 'ROC', 'TSS', 'KAPPA', 'OR', 'ORSS', 'CSI'
, 'ETS', 'BOYCE', 'MPA')
# .fun_testIfIn(TRUE, "metric.eval", metric.eval, avail.eval.meth.list)
if (sum(!(metric.eval %in% avail.eval.meth.list)) > 0) {
tmp = which(metric.eval %in% avail.eval.meth.list)
warnings(paste0(toString(metric.eval[!tmp]), ' were switched off !'), imediate = TRUE)
metric.eval <- metric.eval[tmp]
}
data_mod <- cbind(data_sp, data_env_w)
colnames(data_mod) <- c(resp_name, colnames(data_env_w))
return(list(data_sp = data_sp,
data_xy = data_xy,
data_env = data_env,
data_mod = data_mod,
weights.vec = weights.vec,
eval.lines.vec = eval.lines.vec,
metric.eval = metric.eval,
eval.data = eval.data,
scale.models = scale.models,
resp_name = resp_name,
expl_var_names = expl_var_names,
seed.val = seed.val, ##seedval, CAREFUL really user value now, don't know if it is good thing or not !
do.progress = do.progress))
}
.maxent.prepare.workdir <- function(sp_name, run_name = NULL, data_sp, data_xy, data_env
, categorical_var = NULL, calib.lines.vec = NULL, data_eval
, dir.name = '.', modeling.id = '', background_data_dir = 'default')
{
cat('\n\t\tCreating Maxent Temp Proj Data...')
## initialise output
MWD <- list()
class(MWD) <- "maxent_workdir_info"
## default parameters setting
if (is.null(run_name)) { run_name <- sp_name }
if (is.null(calib.lines.vec)) { calib.lines.vec <- rep(TRUE, nrow(data_env)) }
## define all paths to files needed by MAXENT
nameFolder = file.path(dir.name, sp_name, 'models', modeling.id)
m_outdir <- file.path(nameFolder, paste0(run_name, '_MAXENT_outputs'))
m_predictDir <- file.path(m_outdir, "Predictions")
MWD$m_outdir <- m_outdir
MWD$m_outputFile <- file.path(m_outdir, paste0(run_name, '_Pred_swd.csv'))
MWD$m_predictDir <- m_predictDir
## directories creation
dir.create(m_outdir, showWarnings = FALSE, recursive = TRUE, mode = '777')
dir.create(m_predictDir, showWarnings = FALSE, recursive = TRUE, mode = '777')
## transform categorical variables into numeric to avoid factors being saved
## as characters, which are not readable by maxent
data_env <- .categorical2numeric(data_env, categorical_var)
## Presence Data --------------------------------------------------------------------------------
presLines <- which((data_sp == 1) & calib.lines.vec)
absLines <- which((data_sp == 0) & calib.lines.vec)
Sp_swd <- cbind(rep(run_name, length(presLines))
, data_xy[presLines, ]
, data_env[presLines, , drop = FALSE])
colnames(Sp_swd) <- c('species', 'X', 'Y', colnames(data_env))
m_speciesFile <- file.path(m_outdir, "Sp_swd.csv")
write.table(Sp_swd, file = m_speciesFile, quote = FALSE, row.names = FALSE, sep = ",")
MWD$m_speciesFile <- m_speciesFile
## Background Data (create background file only if needed) --------------------------------------
if (background_data_dir == 'default') {
# keep only 0 of calib lines
Back_swd <- cbind(rep("background", length(absLines))
, data_xy[absLines, ]
, data_env[absLines, , drop = FALSE])
colnames(Back_swd) <- c("background", colnames(Back_swd)[-1])
m_backgroundFile <- file.path(m_outdir, "Back_swd.csv")
write.table(Back_swd, file = m_backgroundFile, quote = FALSE, row.names = FALSE, col.names = TRUE, sep = ",")
MWD$m_backgroundFile <- m_backgroundFile
} else { ## use background directory given as an option
MWD$m_backgroundFile <- background_data_dir
}
## Prediction Data ------------------------------------------------------------------------------
Pred_swd <- cbind(rep("predict", nrow(data_xy)), data_xy, data_env)
colnames(Pred_swd) <- c("predict", "x", "y", colnames(data_env))
m_predictFile <- file.path(m_predictDir, "Pred_swd.csv")
write.table(Pred_swd, file = m_predictFile, quote = FALSE, row.names = FALSE, col.names = TRUE, sep = ",")
MWD$m_predictFile <- m_predictFile
## dealing with independent evaluation data -----------------------------------------------------
if (!is.null(data_eval)) {
Pred_eval_swd <- cbind(rep("predictEval", nrow(data_eval))
, data_eval[, c("x", "y")]
, data_eval[, colnames(data_env), drop = FALSE])
colnames(Pred_eval_swd) <- c("predict", colnames(Back_swd)[-1])
m_predictEvalFile <- file.path(m_predictDir, "PredEval_swd.csv")
write.table(Pred_eval_swd, file = m_predictEvalFile, quote = FALSE, row.names = FALSE, col.names = TRUE, sep = ",")
MWD$m_predictEvalFile <- m_predictEvalFile
}
return(MWD)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.