R/biomod2_internal.R

Defines functions .find_freq_mode .find_mode .threshold_ordinal .numeric2factor .avail.eval.meth.list .avail.models.list .which.data.type .get_formal_predictions .xgb_pred .run_pred .check_env_levels .categorical_stack_to_terra .categorical2numeric .get_categorical_names .load_gam_namespace .fill_BIOMOD.models.out .extract_selected.layers .extract_projlinkInfo .filter_outputs.vec .filter_outputs.df .transform_model.as.col .format_proj.df .extract_modelNamesInfo .transform_outputs_list .scaling_model .automatic_weights_creation .get_env_class .get_var_range .get_var_type .get_data_mask .check_duplicated_cells rast.has.values .check_calib.lines_names .check_formating_expl.var .check_formating_xy .check_formating_table .check_formating_resp.var.abun .check_formating_resp.var.bin .check_formating_spatial .fun_testMetric .fun_testIfPosInt .fun_testIf01 .fun_testIfPosNum .fun_testIfIn .fun_testIfInherits .bm_cat .getOS

Documented in .categorical2numeric .check_duplicated_cells .get_categorical_names .get_env_class .load_gam_namespace rast.has.values .transform_model.as.col

.getOS <- function()
{
  sysinf = Sys.info()
  if (!is.null(sysinf))
  {
    os = sysinf['sysname']
    if (os == 'Darwin') os = "mac"
  } else
  {
    os = .Platform$OS.type
    if (grepl("^darwin", R.version$os)) os = "mac"
    if (grepl("linux-gnu", R.version$os)) os = "linux"
  }
  return(tolower(os))
}


## BIOMOD SPECIFIC CAT ----------------------------------------------------------------------------

.bm_cat <- function(x = NULL, ...)
{
  if (is.null(x))
  {
    cat("\n")
    cat(paste(rep("-=", round(.Options$width / 2)), collapse = ""))
    cat("\n")
  } else
  {
    x.length = nchar(x) + 2
    y.length = (.Options$width - x.length) / 2
    cat("\n")
    cat(paste(rep("-=", round(y.length / 2)), collapse = "")
        , x
        , paste(rep("-=", round(y.length / 2)), collapse = ""))
    cat("\n")
  }
}


## TEST PARAMETERS --------------------------------------------------------------------------------

.fun_testIfInherits <- function(test, objName, objValue, values)
{
  if (!inherits(objValue, values)) {
    stop(paste0("\n", paste0(objName, " must be a '", paste0(values[1:(length(values) -1)], collapse = "', '")
                             , ifelse(length(values) > 1, paste0("' or '", values[length(values)]), "")
                             , "' object")))
    test <- FALSE
  }
  return(test)
}

.fun_testIfIn <- function(test, objName, objValue, values, exact = FALSE)
{
  if (any(! objValue %in% values) ||
      (exact == TRUE && any(! values %in% objValue))) {
    stop(paste0("\n", objName, " must be '", 
                ifelse(length(values) > 1, 
                       paste0(paste0(values[1:(length(values) -1)], collapse = "', '"),
                              "' or '", values[length(values)])
                       , paste0(values,"'"))))
    test <- FALSE
  }
  return(test)
}

.fun_testIfPosNum <- function(test, objName, objValue)
{
  if (!is.numeric(objValue)) {
    stop(paste0("\n", objName, " must be a numeric"))
    test <- FALSE
  } else if (objValue < 0) {
    stop(paste0("\n", objName, " must be a positive numeric"))
    test <- FALSE
  }
  return(test)
}

.fun_testIf01 <- function(test, objName, objValue)
{
  test <- .fun_testIfPosNum(test, objName, objValue)
  if (test && objValue > 1) {
    stop(paste0("\n", objName, " must be a 0 to 1 numeric"))
    test <- FALSE
  }
  return(test)
}

.fun_testIfPosInt <- function(test, objName, objValue)
{
  if (!is.numeric(objValue)) {
    cat(paste0("\n", objName, " must be a integer"))
    test <- FALSE
  } else if (any(objValue < 0) || any(objValue %% 1 != 0)) {
    cat(paste0("\n", objName, " must be a positive integer"))
    test <- FALSE
  }
  return(test)
}

.fun_testMetric <- function(test, objName, objValue, values)
{
  if (!is.null(objValue)) {
    test <- .fun_testIfInherits(test, objName, objValue, "character")
    if(length(objValue) != 1) {
      stop(paste0("`",objName,"` must only have one element"))
    }
    test <- .fun_testIfIn(test, objName, objValue, values)
  }
  return(test)
}


## CHECK formated data ----------------------------------------------------------------------------
## used in biomod2_classes_1

.check_formating_spatial <- function(resp.var, expl.var = NULL, resp.xy = NULL, is.eval = FALSE)
{
  if (!is.null(resp.xy)) {
    cat("\n      ! XY coordinates of response variable will be ignored because spatial response object is given.")
  }
  
  if (inherits(resp.var, 'SpatialPoints')) { 
    resp.xy <- data.matrix(sp::coordinates(resp.var))
    if (inherits(resp.var, 'SpatialPointsDataFrame')) {
      resp.var <- resp.var@data
    } else {
      cat("\n      ! Response variable is considered as only presences... Is it really what you want?")
      resp.var <- rep(1, nrow(resp.xy))
    }
  }
  
  if (inherits(resp.var, 'SpatVector')) { 
    resp.xy <- data.matrix(crds(resp.var))
    resp.var <- as.data.frame(resp.var)
    if (ncol(resp.var) == 0) {
      if(is.eval){
        stop("eval.resp must have both presences and absences in the data associated to the SpatVector") 
      } else {
        cat("\n      ! Response variable is considered as only presences... Is it really what you want?")
        resp.var <- rep(1, nrow(resp.xy))
      }
    }
  }
  
  if (!is.eval) {
    if ( all(!is.na(resp.var)) && 
         all(resp.var == 1, na.rm = TRUE) &&
         !inherits(expl.var, c('Raster','SpatRaster'))) {
      stop("For Presence-Only model based on SpatialPoints or SpatVector, expl.var needs to be a RasterStack or SpatRaster to be able to sample pseudo-absences")
    }
  }
  
  return(list(resp.var = resp.var, resp.xy = resp.xy))
}

.check_formating_resp.var.bin <- function(resp.var, is.eval = FALSE)
{
  if (is.factor(resp.var)){
    resp.var <- as.numeric(as.character(resp.var)) 
  }
  if (length(which(!(resp.var %in% c(0, 1, NA)))) > 0) {
    cat("\n      ! ", ifelse(is.eval, "Evaluation",""), "Response variable have non-binary values that will be converted into 0 (resp <=0) or 1 (resp > 0).")
    resp.var[which(resp.var > 0)] <- 1
    resp.var[which(resp.var <= 0)] <- 0
  }
  if (is.eval) {
    if (!any(resp.var == 1, na.rm = TRUE) || !any(resp.var == 0, na.rm = TRUE)) {
      stop("Evaluation response data must have both presences and absences")
    }
  }
  as.numeric(resp.var)
}

.check_formating_resp.var.abun <- function(resp.var)
{  
  if (!is.numeric(resp.var)) {
    if (!is.factor(resp.var)) {
      stop("biomod2 accept only numeric or factor values : please check your response data.")
    # } else if (!is.ordered(resp.var)) {
    #   stop("Your ordinal data doesn't seem ordered : please check your response data.")
    }
  } else {
    if (-Inf %in% resp.var | Inf %in% resp.var) {
      stop("It seems there is Inf in your response data. Please check and remove it.")
    }
    negative <- any(resp.var < 0 )
    if (negative) {
      stop("biomod2 doesn't accept negative values : please check your response data.")
    }
  }
  resp.var <- resp.var[!is.na(resp.var)] ## Add a warning ? 
  return(resp.var)
}

.check_formating_table <- function(resp.var)
{
  resp.var = as.data.frame(resp.var)
  if (ncol(resp.var) > 1) {
    stop("You must give a monospecific response variable (1D object)")
  } else if (is.ordered(resp.var[, 1])) {
    levels <- levels(resp.var[, 1])
    resp.var <- factor(resp.var[, 1], levels = levels, ordered = T)
  } else {
    resp.var <- as.numeric(resp.var[, 1])
  }
  resp.var
}

.check_formating_xy <- function(resp.xy, resp.length, env.as.df = FALSE)
{
  if (ncol(resp.xy) != 2) {
    stop("If given, resp.xy must be a 2 column matrix or data.frame")
  }
  if (nrow(resp.xy) != resp.length &&
      !(env.as.df & (nrow(resp.xy) == 0))) {
    stop("Response variable and its coordinates don't match")
  }
  as.data.frame(resp.xy)
}

.check_formating_expl.var <- function(expl.var, length.resp.var)
{
  if (is.matrix(expl.var) | is.numeric(expl.var)) {
    expl.var <- as.data.frame(expl.var)
  }
  
  if (inherits(expl.var, 'Raster')) {
    expl.var <- raster::stack(expl.var, RAT = FALSE)
    if (any(is.factor(expl.var))) {
      expl.var <- .categorical_stack_to_terra(expl.var)
    } else {
      # as of 20/10/2022 the line below does not work if categorical variables
      # are present, hence the trick above. 
      expl.var <- rast(expl.var)
    }
  }
  
  if (inherits(expl.var, 'SpatialPoints')) {
    expl.var <- as.data.frame(expl.var@data)
  }
  if (inherits(expl.var, 'SpatVector')) {
    expl.var <- as.data.frame(expl.var)
  }
  
  if (inherits(expl.var, 'data.frame')) {
    if (nrow(expl.var) != length.resp.var) {
      stop("If explanatory variable is not a raster then dimensions of response variable and explanatory variable must match!")
    }
  }
  expl.var
}


## CHECK calib.lines names ------------------------------------------------------------------------
## used in bm_CrossValidation

.check_calib.lines_names <- function(calib.lines, expected_PA.names)
{
  full.names <- colnames(calib.lines)
  if (missing(expected_PA.names)) { # CV only
    expected_CV.names <- c(paste0("_allData_RUN", seq_len(ncol(calib.lines))), "_allData_allRun")
    .fun_testIfIn(TRUE, "colnames(calib.lines)", full.names, expected_CV.names)
  } else {
    err.msg <- "colnames(calib.lines) must follow the following format: '_PAx_RUNy' with x and y integer"
    # check for beginning '_'
    if (!all( substr(full.names, 1, 1) == "_")) {
      stop(err.msg)
    }
    PA.names <- sapply(strsplit(full.names, split = "_"), function(x) x[2])
    CV.names <- sapply(strsplit(full.names, split = "_"), function(x) x[3])
    .fun_testIfIn(TRUE, "Pseudo-absence dataset in colnames(calib.lines)", PA.names, expected_PA.names)
    if (!all( substr(CV.names, 1, 3) == "RUN")) {
      stop(err.msg)
    }
    CV.num <- sapply(strsplit(CV.names, split = "RUN"), function(x) x[2])
    if (any(is.na(as.numeric(CV.num)))) {
      stop(err.msg)
    }
  }
}


## TOOLS for SpatRaster ---------------------------------------------------------------------------
## used in biomod2_classes_1

##' @name rast.has.values
##' @title Check whether SpatRaster is an empty \code{rast()} 
##' @description Check whether SpatRaster is an empty \code{rast()} 
##' @param x SpatRaster to be checked
##' @return a boolean
##' @keywords internal

rast.has.values <- function(x)
{
  stopifnot(inherits(x, 'SpatRaster'))
  suppressWarnings(any(!is.na(values(x))))
}

##' @name .check_duplicated_cells
##' @title Check duplicated cells
##' @description Identify data that are contained in the same raster cells ; 
##' print warnings if need be and filter those data if asked for.
##' @param env a \code{SpatRaster} with environmental data
##' @param xy a \code{data.frame} with coordinates
##' @param sp a \code{vector} with species occurrence data
##' @param filter.raster a \code{boolean} 
##' @return a \code{list}
##' @keywords internal
##' @importFrom terra cellFromXY

.check_duplicated_cells <- function(env, xy, sp, filter.raster, PA.user.table = NULL)
{
  sp.cell <- duplicated(cellFromXY(env, xy))
  if (any(sp.cell)) {
    if (filter.raster) {
      sp <- sp[!sp.cell]
      xy <- xy[!sp.cell,]
      if (!is.null(PA.user.table)) {
        PA.user.table <- PA.user.table[!sp.cell, , drop = FALSE]
      }
      cat("\n !!! Some data are located in the same raster cell. 
          Only the first data in each cell will be kept as `filter.raster = TRUE`.")
    } else {
      cat("\n !!! Some data are located in the same raster cell. 
          Please set `filter.raster = TRUE` if you want an automatic filtering.")
    }
  }
  return(list("sp"  = sp, 
              "xy"  = xy,
              "PA.user.table" = PA.user.table))
}

## used in bm_PseudoAbsences, BIOMOD_Projection, BIOMOD_EnsembleForecasting
.get_data_mask <- function(expl.var, value.out = 1)
{
  stopifnot(inherits(expl.var, 'SpatRaster'))
  classify(as.numeric(!any(is.na(expl.var))),
           matrix(c(0, NA, 
                    1, value.out), 
                  ncol = 2, byrow = TRUE)) 
}



## GET variables class and ranges -----------------------------------------------------------------

## used bm_RunModelsLoop, BIOMOD_EnsembleModeling
.get_var_type <- function(data) { return(sapply(data, function(x) class(x)[1])) }

.get_var_range <- function(data)
{
  get_range <- function(x) {
    if (is.numeric(x)) {
      return(c(min = min(x, na.rm = TRUE), max = max(x, na.rm = TRUE)))
    } else if (is.factor(x)) {
      return(levels(x))
    }
  }
  xx <- lapply(data, get_range)
  names(xx) <- names(data)
  return(xx)
}

## used in BIOMOD_Projection, BIOMOD_EnsembleForecasting

##' @name .get_env_class
##' @title Get class of environmental data provided
##' @description Get class of environmental data provided
##' @param new.env object to identify
##' @return a character
##' @keywords internal

.get_env_class <- function(new.env)
{
  .fun_testIfInherits(TRUE, "new.env", new.env, c('data.frame', 'SpatRaster'))
  if (inherits(new.env,"data.frame")) {
    return("data.frame")
  }
  if (inherits(new.env,"SpatRaster")) {
    return("SpatRaster")
  }
  NULL
}


## CREATE WEIGHTS AUTOMATICALLY according to PREVALENCE -------------------------------------------
## used in BIOMOD_Modeling

.automatic_weights_creation <- function(resp, prev = 0.5, subset = NULL)
{
  if (is.null(subset)) { ## NEVER used
    subset <- rep(TRUE, length(resp))
  } else if (length(subset) != length(resp)) {
    stop("subset should be a vector of logical of the same length as resp")
  }
  
  nbPres <- sum(resp[subset] > 0, na.rm = TRUE)
  # number of true absences + pseudo absences to maintain true value of prevalence
  nbAbs <- length(resp[subset]) - nbPres
  weights <- rep(1, length(resp))
  
  if (nbAbs > nbPres) { # code absences as 1
    weights[which(resp > 0)] <- (prev * nbAbs) / (nbPres * (1 - prev))
  } else { # code presences as 1
    weights[which(resp == 0 | is.na(resp))] <- (nbPres * (1 - prev)) / (prev * nbAbs)
  }
  weights = round(weights[]) # to remove glm & gam warnings
  weights[!subset] <- 0
  
  return(weights)
}


## RESCALE MODEL WITH BINOMIAL GLM ----------------------------------------------------------------
## used in bm_RunModelsLoop

##' 
##' @importFrom stats glm binomial
##' 

.scaling_model <- function(data.to.rescale, data.ref = NULL, ...)
{
  args <- list(...)
  prevalence <- args$prevalence
  weights <- args$weights
  
  if (is.null(weights)) {
    if (!is.null(prevalence)) { # create weights to rise the defined prevalence
      weights <- .automatic_weights_creation(resp = data.ref, prev = prevalence)
    } else { # only 1 vector
      weights <- rep(1, length(data.ref))
    }
  }
  
  ## define a glm to scale predictions from 0 to 1
  new.data <- data.frame(ref = as.numeric(data.ref), pred = as.numeric(data.to.rescale))
  scaling_model <- glm(ref ~ pred, data = new.data, family = binomial(link = probit)
                       , x = TRUE, weights = weights)
  
  return(scaling_model)
}


## TRANSFORM models outputs getting specific slot -------------------------------------------------
## used in BIOMOD_Modeling, BIOMOD_EnsembleModeling

.transform_outputs_list <- function(obj.type, obj.out, out = 'evaluation')
{
  ## 0. CHECK object type ---------------------------------------------------------------
  .fun_testIfIn(TRUE, "obj.type", obj.type, c("mod", "em"))
  .fun_testIfIn(TRUE, "out", out, c("model", "calib.failure", "models.kept", "pred", "pred.eval", "evaluation", "var.import"))
  
  if (obj.type == "mod") {
    dim_names <- c("PA", "run", "algo")
  } else if (obj.type == "em") {
    dim_names <- c("merged.by", "filtered.by", "algo")
    dim_names.bis <- c("merged.by.PA", "merged.by.run", "merged.by.algo")
  }
  
  if (obj.type == "mod") {
    output <- foreach(i.dim1 = 1:length(obj.out), .combine = "rbind") %do%
      {
        res <- obj.out[[i.dim1]][[out]]
        if (!is.null(res) && length(res) > 0) {
          res <- as.data.frame(res)
          if (out %in% c("model", "calib.failure", "models.kept", "pred", "pred.eval")) {
            colnames(res) <- out
            res[["points"]] <- 1:nrow(res)
            res <- res[, c("points", out)]
          }
          col_names <- colnames(res)
          tmp.full.name <- obj.out[[i.dim1]][["model"]]
          if(out == "calib.failure" | is.null(tmp.full.name)){
            res[["full.name"]] <- NA
            return(res[, c("full.name", col_names)])
          } else {
            res[["full.name"]] <- tmp.full.name
            res[[dim_names[1]]] <- strsplit(tmp.full.name, "_")[[1]][2]
            res[[dim_names[2]]] <- strsplit(tmp.full.name, "_")[[1]][3]
            res[[dim_names[3]]] <- strsplit(tmp.full.name, "_")[[1]][4]
            return(res[, c("full.name", dim_names, col_names)])
          }
        }
      }
  } else if (obj.type == "em") {
    
    ## 1. GET dimension names -------------------------------------------------------------
    ## BIOMOD.models.out            -> dataset / run / algo
    ## BIOMOD.ensemble.models.out   -> mergedBy / filteredBy / algo
    dim1 <- length(obj.out)
    dim2 <- length(obj.out[[1]])
    dim3 <- length(obj.out[[1]][[1]])
    
    ## 2. GET outputs ---------------------------------------------------------------------
    output <- foreach(i.dim1 = 1:dim1, .combine = "rbind") %:%
      foreach(i.dim2 = 1:dim2, .combine = "rbind") %:% 
      foreach(i.dim3 = 1:dim3, .combine = "rbind") %do%
      {
        if (length(obj.out) >= i.dim1 &&
            length(obj.out[[i.dim1]]) >= i.dim2 &&
            length(obj.out[[i.dim1]][[i.dim2]]) >= i.dim3) {
          res <- obj.out[[i.dim1]][[i.dim2]][[i.dim3]][[out]]
          if (!is.null(res) && length(res) > 0) {
            res <- as.data.frame(res)
            if (out %in% c("model", "calib.failure", "models.kept", "pred", "pred.eval")) {
              colnames(res) <- out
              res[["points"]] <- 1:nrow(res)
              res <- res[, c("points", out)]
            }
            col_names <- colnames(res)
            res[[dim_names[1]]] <- names(obj.out)[i.dim1]
            res[[dim_names[2]]] <- names(obj.out[[i.dim1]])[i.dim2]
            res[[dim_names[3]]] <- names(obj.out[[i.dim1]][[i.dim2]])[i.dim3]
            tmp.full.name <- obj.out[[i.dim1]][[i.dim2]][[i.dim3]][["model"]]
            if(out == "calib.failure" | is.null(tmp.full.name)){
              res[["full.name"]] <- NA
            } else {
              res[["full.name"]] <- tmp.full.name
            }
            tmp <- names(obj.out)[i.dim1]
            res[[dim_names.bis[1]]] <- strsplit(tmp, "_")[[1]][1]
            res[[dim_names.bis[2]]] <- strsplit(tmp, "_")[[1]][2]
            res[[dim_names.bis[3]]] <- strsplit(tmp, "_")[[1]][3]
            res[[dim_names[1]]] <- NULL
            return(res[, c("full.name", dim_names.bis, dim_names[-1], col_names)])
          }
        }
      }
  }
  
  if (out %in% c("model", "calib.failure", "models.kept")) {
    if (is.null(output)) { 
      output <- 'none'
    } else { 
      output <- unique(as.character(output[[out]]))
    }
  }
  return(output)
}

## EXTRACT model names according to specific infos ------------------------------------------------
## used in biomod2_classes_4, BIOMOD_EnsembleModeling

.extract_modelNamesInfo <- function(model.names, obj.type, info, as.unique = TRUE)
{
  ## 0. CHECK parameters ----------------------------------------------------------------
  if (!is.character(model.names)) { stop("model.names must be a character vector") }
  .fun_testIfIn(TRUE, "obj.type", obj.type, c("mod", "em"))
  if (obj.type == "mod") {
    .fun_testIfIn(TRUE, "info", info, c("species", "PA", "run", "algo"))
  } else if (obj.type == "em") {
    .fun_testIfIn(TRUE, "info", info, c("species", "merged.by.PA", "merged.by.run", "merged.by.algo", "filtered.by", "algo"))
  }
  
  ## 1. SPLIT model.names ---------------------------------------------------------------
  info.tmp <- strsplit(model.names, "_")
  
  if (obj.type == "mod") {
    res <- switch(info
                  , species = sapply(info.tmp, function(x) x[1])
                  , PA = sapply(info.tmp, function(x) x[2])
                  , run = sapply(info.tmp, function(x) x[3])
                  , algo = sapply(info.tmp, function(x) x[4]))
  } else if (obj.type == "em") {
    res <- switch(info
                  , species = sapply(info.tmp, function(x) x[1])
                  , merged.by.PA = sapply(info.tmp, function(x) x[3])
                  , merged.by.run = sapply(info.tmp, function(x) x[4])
                  , merged.by.algo = sapply(info.tmp, function(x) x[5])
                  , filtered.by = sapply(info.tmp, function(x) sub(".*By", "", x[2]))
                  , algo = sapply(info.tmp, function(x) sub("By.*", "", x[2])))
  }
  
  ## 2. RETURN either the full vector, or only the possible values ----------------------
  if (as.unique == TRUE) {
    return(unique(res))
  } else {
    return(res)
  }
}

## FORMAT projection output as data.frame with appropriate columns --------------------------------
## used in BIOMOD_Projection, BIOMOD_EnsembleForecasting

.format_proj.df <- function(proj, obj.type = "mod")
{
  # argument check
  .fun_testIfInherits(TRUE, "proj", proj, "data.frame")
  .fun_testIfIn(TRUE, "obj.type", obj.type, c("mod","em"))
  # function content
  proj$points <- 1:nrow(proj)
  tmp <- melt(proj, id.vars =  "points")
  colnames(tmp) <- c("points", "full.name", "pred")
  tmp$full.name <- as.character(tmp$full.name)
  if(obj.type == "mod"){
    tmp$PA <- .extract_modelNamesInfo(tmp$full.name, obj.type = "mod", info = "PA", as.unique = FALSE)
    tmp$run <- .extract_modelNamesInfo(tmp$full.name, obj.type = "mod", info = "run", as.unique = FALSE)
    tmp$algo <- .extract_modelNamesInfo(tmp$full.name, obj.type = "mod", info = "algo", as.unique = FALSE)
    proj <- tmp[, c("full.name", "PA", "run", "algo", "points", "pred")]
  } else {
    tmp$merged.by.PA <- .extract_modelNamesInfo(tmp$full.name, obj.type = "em", info = "merged.by.PA", as.unique = FALSE)
    tmp$merged.by.run <- .extract_modelNamesInfo(tmp$full.name, obj.type = "em", info = "merged.by.run", as.unique = FALSE)
    tmp$merged.by.algo <- .extract_modelNamesInfo(tmp$full.name, obj.type = "em", info = "merged.by.algo", as.unique = FALSE)
    tmp$filtered.by <- .extract_modelNamesInfo(tmp$full.name, obj.type = "em", info = "filtered.by", as.unique = FALSE)
    tmp$algo <- .extract_modelNamesInfo(tmp$full.name, obj.type = "em", info = "algo", as.unique = FALSE)
    proj <- tmp[, c("full.name", "merged.by.PA", "merged.by.run", "merged.by.algo"
                    , "filtered.by", "algo", "points", "pred")]
  }
  proj
}

## FORMAT .format_proj.df output from long to wide with models as columns -------------------------
## used in biomod2_classes_3

##' @name .transform_model.as.col
##' @author Rémi Lemaire-Patin
##' 
##' @title Transform predictions data.frame from long to wide with models as columns
##' 
##' @description This function is used internally in \code{\link{get_predictions}}
##' to ensure back-compatibility with former output of \code{\link{get_predictions}}
##' (i.e. for \pkg{biomod2} version < 4.2-2). It transform a long \code{data.frame}
##' into a wide \code{data.frame} with each column corresponding to a single model.
##' 
##' \emph{Note that the function is intended for internal use but have been 
##' made available for compatibility with \pkg{ecospat}} 
##'
##' @param df a long \code{data.frame}, generally a standard output of 
##' \code{\link{get_predictions}}
##' 
##' @return a wide \code{data.frame}
##' @importFrom stats reshape
##' @export
##' @keywords internal

.transform_model.as.col <- function(df)
{
  df <- reshape(df[, c("full.name", "points", "pred")], idvar = "points"
                , timevar = "full.name", direction = "wide")[ , -1, drop = FALSE] 
  colnames(df) <- substring(colnames(df), 6)
  df
}


## EXTRACT models outputs according to specific infos ---------------------------------------------
## used in biomod2_classes_3.R

.filter_outputs.df <- function(out, subset.list)
{
  keep_subset <- foreach(sub.i = names(subset.list)) %do%
    {
      keep_lines <- 1:nrow(out)
      if (!is.null(subset.list[[sub.i]])) {
        .fun_testIfIn(TRUE, sub.i, subset.list[[sub.i]], unique(out[, sub.i]))
        keep_lines <- which(out[, sub.i] %in% subset.list[[sub.i]])
      }
      return(keep_lines)
    }
  keep_lines <- Reduce(intersect, keep_subset)
  if (length(keep_lines) == 0) {
    warning(paste0("No information corresponding to the given filter(s) ("
                   , paste0(names(subset.list), collapse = ', '), ")"))
  }
  return(keep_lines)
}

.filter_outputs.vec <- function(out_names, obj.type, subset.list)
{
  keep_layers <- out_names
  if ("full.name" %in% names(subset.list) && !is.null(subset.list[["full.name"]])) {
    .fun_testIfIn(TRUE, "full.name", subset.list[["full.name"]], out_names)
    keep_layers <- which(out_names %in% subset.list[["full.name"]])
  } else {
    keep_subset <- foreach(sub.i = names(subset.list)) %do%
      {
        keep_tmp <- 1:length(out_names)
        if (!is.null(subset.list[[sub.i]])) {
          .fun_testIfIn(TRUE, sub.i, subset.list[[sub.i]], .extract_modelNamesInfo(out_names, obj.type = obj.type, info = sub.i, as.unique = TRUE))
          keep_tmp <- grep(paste0(subset.list[[sub.i]], ifelse(sub.i %in% c("PA", "run"), "_", ""), collapse = "|"), out_names)
        }
        return(keep_tmp)
      }
    keep_layers <- Reduce(intersect, keep_subset)
  }
  if (length(keep_layers) == 0) {
    warning(paste0("No information corresponding to the given filter(s) ("
                   , paste0(names(subset.list), collapse = ', '), ")"))
  }
  return(keep_layers)
}


## EXTRACT projections info and specific layers ---------------------------------------------------
## used in biomod2_classes_3.R

## Extract info (out/binary/filtered + metric) from BIOMOD.projection.out
.extract_projlinkInfo <- function(obj)
{
  stopifnot(inherits(obj, 'BIOMOD.projection.out'))
  out.names <- obj@proj.out@link
  sp.name <- obj@sp.name
  sub(pattern     = paste0(".*?_",sp.name), 
      replacement = "",
      x           = out.names)
  out.binary <- grepl(pattern = "bin\\.",
                      x = out.names)
  out.filt <- grepl(pattern = "filt\\.",
                    x = out.names)
  out.type <- ifelse(out.binary,
                     "bin",
                     ifelse(out.filt,
                            "filt", 
                            "out"))
  # To avoid special case when the species name finish by "bin" (or "filt") ??
  # out.format <- tools::file_ext(out.names)
  # if(grepl(pattern = paste0(sp.name,"/.",out.format), x = out.names)){ 
  #   out.type <- "out"
  # }
  out.metric <- sapply(seq_len(length(out.names)), 
                       function(i){
                         if (out.type[i] == "out") {
                           return("none")
                         } else {
                           begin.cut <-  gsub(".*_", "", out.names[i]) 
                           return(  sub(paste0(out.type[i],".*"), "", begin.cut) )
                         }
                       })
  data.frame("link"   = out.names,
             "type"   = out.type,
             "metric" = out.metric)
}

## Return relevant layers indices from BIOMOD.projection.out given metric.binary or metric.select
.extract_selected.layers <- function(obj, metric.binary, metric.filter)
{
  ## argument  check ----------------------------------------------------------
  stopifnot(inherits(obj, "BIOMOD.projection.out"))
  
  if (!is.null(metric.binary) & !is.null(metric.filter)) {
    stop("cannot return both binary and filtered projection, please provide either `metric.binary` or `metric.filter` but not both.")
  }
  
  df.info <- .extract_projlinkInfo(obj)
  
  available.metrics.binary <- unique(df.info$metric[which(df.info$type == "bin")])
  available.metrics.filter <- unique(df.info$metric[which(df.info$type == "filt")])
  .fun_testMetric(TRUE, "metric.binary", metric.binary, available.metrics.binary)
  .fun_testMetric(TRUE, "metric.filter", metric.filter, available.metrics.filter)
  
  
  ## select layers  -----------------------------------------------------------
  if (!is.null(metric.binary)) {
    selected.layers <- which(df.info$type == "bin" & df.info$metric == metric.binary)  
  } else if ( !is.null(metric.filter)) {
    selected.layers <- which(df.info$type == "filt" & df.info$metric == metric.filter)  
  } else {
    selected.layers <- which(df.info$type == "out")  
  }
  selected.layers
}



## FILL BIOMOD.models.out elements in bm.mod or bm.em objects -------------------------------------
## used in BIOMOD_Modeling, BIOMOD_EnsembleModeling

.fill_BIOMOD.models.out <- function(objName, objValue, mod.out, inMemory = FALSE, nameFolder = ".")
{
  # backup case uses existing @val slot
  if(is.null(objValue)){
    eval(parse(text = paste0("objValue <- mod.out@", objName, "@val")))
  }
  
  save(objValue, file = file.path(nameFolder, objName), compress = TRUE)
  if (inMemory) {
    eval(parse(text = paste0("mod.out@", objName, "@val <- objValue")))
  }
  eval(parse(text = paste0("mod.out@", objName, "@inMemory <- ", inMemory)))
  eval(parse(text = paste0("mod.out@", objName, "@link <- file.path(nameFolder, objName)")))
  return(mod.out)
}



## GAM library loading ----------------------------------------------------------------------------
## used in biomod2_classes_4, bm_RunModelsLoop

##' @name .load_gam_namespace
##' @title Load library for GAM models
##' @description This function loads library for either GAM and BAM from mgcv package or 
##' for GAM from gam package.
##' @param model_subclass the subclass of GAM model
##' @keywords internal

.load_gam_namespace <- function(model_subclass = c("GAM_mgcv_gam","GAM_mgcv_bam", "GAM_gam_gam"))
{
  if (model_subclass %in% c("GAM_mgcv_gam", "GAM_mgcv_bam")) {
    # cat("\n*** unloading gam package / loading mgcv package")
    if (isNamespaceLoaded("gam")) { unloadNamespace("gam") }
    if (!isNamespaceLoaded("mgcv")) { 
      if(!requireNamespace('mgcv', quietly = TRUE)) stop("Package 'mgcv' not found")
    }
  }
  
  if (model_subclass == "GAM_gam_gam") {
    # cat("\n*** unloading mgcv package / loading gam package")
    if (isNamespaceLoaded("mgcv")) {
      if (isNamespaceLoaded("caret")) { unloadNamespace("caret") } ## need to unload caret before car
      if (isNamespaceLoaded("car")) { unloadNamespace("car") } ## need to unload car before mgcv
      unloadNamespace("mgcv")
    }
    if (!isNamespaceLoaded("gam")) { 
      if(!requireNamespace('gam', quietly = TRUE)) stop("Package 'gam' not found")
    }
  }
  invisible(NULL)
}


## CATEGORICAL variables tools --------------------------------------------------------------------

## used in biomod2_classes_4, BIOMOD_Modeling, bm_RunModelsLoop

##' @name .get_categorical_names
##' @title Get categorical variable names
##' @description Internal function to get categorical variables name from a data.frame.
##' @param df data.frame to be checked
##' @return a vector with the name of categorical variables
##' @keywords internal

.get_categorical_names <- function(df){
  unlist(sapply(names(df), function(x) {
    if (is.factor(df[, x])) { return(x) } else { return(NULL) }
  }))
}

## used in biomod2_classes_4, bm_RunModelsLoop

##' @name .categorical2numeric
##' 
##' @title Transform categorical into numeric variables
##' 
##' @description Internal function transform categorical variables in a 
##' data.frame into numeric variables. Mostly used with maxent which cannot 
##' read character
##' 
##' @param df data.frame to be transformed
##' @param categorical_var the names of categorical variables in df
##' @return a data.frame without categorical variables
##' @keywords internal

.categorical2numeric <- function(df, categorical_var)
{
  if (length(categorical_var) > 0) {
    for (this_var in categorical_var) {
      df[,this_var] <- as.numeric(df[, this_var])
    }
  }
  df
}

## used in bm_FindOptimStat, bm_PlotResponseCurves, BIOMOD_Projection, BIOMOD_EnsembleForecasting
.categorical_stack_to_terra <- function(myraster, expected_levels = NULL)
{
  myTerra <- rast(
    sapply(1:raster::nlayers(myraster), 
           function(thislayer){
             rast(myraster[[thislayer]])
           })
  )
  which.factor <- which(raster::is.factor(myraster))
  for (this.layer in which.factor) {
    this.levels <- raster::levels(myraster)[[this.layer]][[1]]
    ind.levels <- ifelse(ncol(this.levels) > 1, 2, 1)
    if (any(duplicated(this.levels[ , ind.levels]))) {
      stop("duplicated levels in environmental raster")
    }
    if (is.null(expected_levels)) {
      # no check to do, just formatting
      this.levels.df <- data.frame(ID = this.levels[,ind.levels],
                                   value = paste0(this.levels[,ind.levels]))
    } else {
      new.levels <- paste0(this.levels[,ind.levels])
      new.index <- this.levels[,1]
      this.layer.name <- names(myraster)[this.layer]
      fit.levels <- levels(expected_levels[,this.layer.name])
      if (!all(new.levels %in% fit.levels)) {
        cat("\n",
            "!! Levels for layer", colnames(expected_levels)[this.layer],
            " do not match.", new.levels[which(!new.levels %in% fit.levels)], "not found in fit data",
            "\n Fit data levels: ",paste0(fit.levels, collapse = " ; "),
            "\n Projection data levels: ",paste0(new.levels, collapse = " ; "))
        stop(paste0("Levels for ", colnames(expected_levels)[this.layer],
                    " do not match."))
      }
      levels.to.add <- which(!fit.levels %in% new.levels)
      if (length(levels.to.add) > 0) {
        max.index <- max(new.index)
        this.levels.df <- data.frame(ID = c(new.index,
                                            max.index +
                                              seq_along(levels.to.add)),
                                     value = c(new.levels, fit.levels[levels.to.add]))
      } else {
        this.levels.df <- data.frame(ID = new.index,
                                     value = new.levels)
      }
    }
    colnames(this.levels.df) <- c("ID", names(myTerra)[this.layer])
    try({myTerra <- categories(myTerra, layer = this.layer, this.levels.df)}, silent = TRUE)
  }
  return(myTerra)
}

## used in BIOMOD_Projection, BIOMOD_EnsembleForecasting
.check_env_levels <-  function(new.env, expected_levels)
{
  which.factor <- which(sapply(new.env, is.factor))
  if (inherits(new.env, 'SpatRaster')) {
    for (this.layer in which.factor) {
      this.layer.name <- names(new.env)[this.layer]
      this.levels <- cats(new.env)[[this.layer]]
      ind.levels <- ifelse(ncol(this.levels) > 1, 2, 1)
      new.levels <- paste0(this.levels[,ind.levels])
      if( any(duplicated(new.levels)) ) {
        stop("duplicated levels in `new.env`")
      }
      new.index <- this.levels[, 1]
      fit.levels <- levels(expected_levels[,this.layer.name])
      if (!all(new.levels %in% fit.levels)) {
        cat("\n",
            "!! Levels for layer", colnames(expected_levels)[this.layer],
            " do not match.", new.levels[which(!new.levels %in% fit.levels)], "not found in fit data",
            "\n Fit data levels: ",paste0(fit.levels, collapse = " ; "),
            "\n Projection data levels: ",paste0(new.levels, collapse = " ; "))
        stop(paste0("Levels for ", colnames(expected_levels)[this.layer],
                    " do not match."))
      }
      levels.to.add <- which(!fit.levels %in% new.levels)
      if (length(levels.to.add) > 0) {
        max.index <- max(new.index)
        this.levels.df <- data.frame(ID = c(new.index,
                                            max.index +
                                              seq_along(levels.to.add)),
                                     value = c(new.levels, fit.levels[levels.to.add]))
        
        colnames(this.levels.df) <- c("ID", names(new.env)[this.layer])
        new.env <- categories(new.env, layer = this.layer, 
                              this.levels.df)
      } 
    }
  } else {
    for (this.layer in which.factor) {
      this.layer.name <- names(new.env)[this.layer]
      this.levels <- levels(new.env[,this.layer.name])
      new.levels <- paste0(this.levels)
      fit.levels <- levels(expected_levels[,this.layer.name])
      if (!all(new.levels %in% fit.levels)) {
        cat("\n",
            "!! Levels for layer", colnames(expected_levels)[this.layer],
            " do not match.", new.levels[which(!new.levels %in% fit.levels)], "not found in fit data",
            "\n Fit data levels: ",paste0(fit.levels, collapse = " ; "),
            "\n Projection data levels: ",paste0(new.levels, collapse = " ; "))
        stop(paste0("Levels for ", colnames(expected_levels)[this.layer],
                    " do not match."))
      }
      levels.to.add <- which(!fit.levels %in% new.levels)
      if (length(levels.to.add) > 0) {
        new.env[ , this.layer.name] <- factor(new.env[ , this.layer.name], 
                                              levels = c(new.levels,
                                                         fit.levels[levels.to.add]))
      }
    }
  }
  new.env 
}


## PREDICTIONS TOOLS ------------------------------------------------------------------------------

## used in biomod2_classes_4

##' 
##' @importFrom terra rast classify subset
##' 

.run_pred <- function(object, Prev = 0.5, dat, mod.name = NULL)
{
  if (is.finite(object$deviance) && 
      is.finite(object$null.deviance) && 
      object$deviance != object$null.deviance)
  {
    if (inherits(dat, 'SpatRaster')) {
      pred <- predict(object = dat, model = object, type = "response", wopt = list(names = mod.name))
    } else {
      pred <- predict(object, dat, type = "response")
    }
  }
  
  if (!exists('pred')) {
    if (inherits(dat, 'SpatRaster')) {
      pred <- subset(dat, 1)
      if (Prev < 0.5) {
        pred <- classify(x = pred, rcl = c(-Inf, Inf, 0))
      } else {
        pred <- classify(x = pred, rcl = c(-Inf, Inf, 1))
      }
    } else {
      if (Prev < 0.5) {
        pred <- rep(0, nrow(dat))
      } else {
        pred <- rep(1, nrow(dat))
      }
    }
  }
  
  return(pred)
}

## Predict on SpatRaster with XGBOOST
## used in biomod2_classes_4

.xgb_pred <- function(model, data, ...) {
  predict(model, newdata = as.matrix(data), ...)
}


## Get formal predictions for ensemble models
## used in biomod2_classes_5
.get_formal_predictions <- function(object, newdata, on_0_1000, seedval)
{
  # make prediction of all models required
  formal_predictions <- sapply(object@model,
                               function(mod.name, dir_name, resp_name, modeling.id)
                               {
                                 ## check if model is loaded on memory
                                 if (is.character(mod.name)) {
                                   mod <- get(load(file.path(dir_name, resp_name, "models", modeling.id, mod.name)))
                                 }
                                 temp_workdir = NULL
                                 if (length(grep("MAXENT$", mod.name)) == 1) {
                                   temp_workdir = mod@model_output_dir
                                 }
                                 return(predict(mod, newdata = newdata, on_0_1000 = on_0_1000
                                                , temp_workdir = temp_workdir, seedval = seedval))
                               }, dir_name = object@dir_name, resp_name = object@resp_name, modeling.id = object@modeling.id)
  
  
  return(formal_predictions)
}


## ABUNDANCE tools --------------------------------------------------------------------------------

## used in biomod2_classes_1
.which.data.type <- function(resp.var)
{
  if (is.factor(resp.var)) {
    data.type <- ifelse(nlevels(resp.var) <= 2, "binary", 
                        ifelse(is.ordered(resp.var), "ordinal", "multiclass"))
  } else {
    element <- sort(unique(resp.var))
    if (identical(as.numeric(element), c(0, 1)) | identical(as.numeric(element), 1)) {
      data.type <- "binary"
    } else {
      if (identical(as.numeric(as.integer(element)), as.numeric(element))) {
        data.type <- "count"
      } else {
        data.type <- ifelse(!any(resp.var > 1), "relative", "abundance")
      }
    }
  } 
  return(data.type)
}

## get good models ou metrics depending of the data.type 
## used in BIOMOD.Modeling, bm_ModelingOptions, bm_RunModelsLoop

.avail.models.list <- function(data.type){
  switch(data.type,
    binary = c('ANN', 'CTA', 'DNN', 'FDA', 'GAM', 'GBM', 'GLM', 'MARS', 'MAXENT', 'MAXNET', 'RF','RFd', 'SRE', 'XGBOOST'),
    ordinal = c('CTA', 'DNN', 'FDA',  'GAM', 'GLM', 'MARS', 'RF', 'XGBOOST'),
    multiclass = c('CTA', 'DNN', 'FDA', 'MARS', 'RF', 'XGBOOST'),
    abundance = c('CTA', 'DNN', 'GAM', 'GBM', 'GLM', 'MARS', 'RF', 'XGBOOST'),
    count = c('CTA', 'DNN', 'GAM', 'GBM', 'GLM', 'MARS', 'RF', 'XGBOOST'),
    relative = c('CTA', 'DNN', 'GAM', 'GBM', 'GLM', 'MARS', 'RF', 'XGBOOST')
  )
}

.avail.eval.meth.list <- function(data.type){
  switch(data.type,
         binary = c('TSS', 'KAPPA', 'ACCURACY', 'BIAS', 'POD', 'FAR', 'POFD'
                    , 'SR', 'CSI', 'ETS', 'HK', 'HSS', 'OR', 'ORSS', 'AUCroc', 'AUCprg'
                    , 'BOYCE', 'MPA'),
         ordinal = c('Accuracy', 'Recall', 'Precision', 'F1'),
         multiclass = c('Accuracy', 'Recall', 'Precision', 'F1'),
         abundance = c('RMSE', 'MSE', 'MAE', 'Rsquared', 'Rsquared_aj', 'Max_error'),
         count = c('RMSE', 'MSE', 'MAE', 'Rsquared', 'Rsquared_aj', 'Max_error'),
         relative = c('RMSE', 'MSE', 'MAE', 'Rsquared', 'Rsquared_aj', 'Max_error')
  )
}

## used in bm_RunModelsLoop
.numeric2factor <- function(pred, obs, ordered = FALSE)
{
  nblevels <- length(levels(obs))
  pred <- round(pred)
  pred[pred > nblevels] <- nblevels
  pred[pred < 1] <- 1
  if (inherits(pred, "SpatRaster")){
    pred <- terra::subst(pred, 1:length(levels(obs)), levels(obs))
  } else {
    newlevels <- levels(obs)[sort(unique(pred))]
    pred <- factor(pred, labels = newlevels, ordered = ordered)
  }
  return(pred)
}

## CHOOSE the optimized cutoff between each ordinal variables
## used in bm_RunModelsLoop, bm_Tuning
.threshold_ordinal <- function(obs, fit, metric.eval)
{
  nlevels <- length(levels(obs))
  
  #manage extremum
  fit[fit < 1] <- 1
  fit[fit > nlevels] <- nlevels
  
  #Define all the possibility
  seq <- list(seq(0.2, 0.8, by = 0.1))
  possibility <- rep(seq, nlevels-1)
  names(possibility) <- paste0("Threshold_", 1:(nlevels-1))
  grid <- expand.grid(possibility)
  
  test_seq <- foreach(i = 1:nrow(grid), .combine = rbind) %do%
    {
      fit_factor <- fit
      for (j in 1:(nlevels - 1)) {
        fit_factor[fit_factor <= j + grid[i, j] & fit_factor >= j ] <- j
        fit_factor[fit_factor > j + grid[i, j] & fit_factor < j+1 ] <- j+1
      }
      stat <- bm_CalculateStatAbun(metric.eval, obs, fit_factor)
      return(data.frame(grid[i, ], "stat" = stat))
    }
  
  max <- test_seq[test_seq$stat == max(test_seq$stat), ]
  limits <- colMeans(max) #several maximum !?!
  
  fit_factor <- fit
  for (j in 1:(nlevels - 1)) {
    fit_factor[fit_factor <= j + limits[j] & fit_factor >= j ] <- j
    fit_factor[fit_factor > j + limits[j] & fit_factor < j + 1 ] <- j + 1
  }
  
  levels <- 1:length(levels(obs))
  names(levels) <- levels(obs)
  fit_factor <- factor(fit_factor, levels = levels, ordered = TRUE)
  
  return(list(limits = limits, fit_factor = fit_factor))
}

## Used in predict of EMmode-biomod2-model
.find_mode <- function(x) {
  if (all(is.na(x))) {return(NA)}
  freq_table <- table(x, useNA = "no")
  mode <- names(freq_table[freq_table == max(freq_table)])
  return(mode[1]) #If double, we take the first one ?? 
}

## Used in predict of EMfreq-biomod2-model
.find_freq_mode <- function(x) {
  if (all(is.na(x))) {return(NA)}
  freq_table <- table(x, useNA = "no")
  freq <- as.numeric(max(freq_table)/length(x))
  return(freq)
}

Try the biomod2 package in your browser

Any scripts or data that you put into this service are public.

biomod2 documentation built on Jan. 30, 2026, 5:08 p.m.