rmaxent: Tools for working with Maxent in R

Documented in project

#' Project a fitted Maxent model
#'
#' Project a fitted Maxent model by predicting to new environmental data.
#'
#' @param lambdas Either (1) a `MaxEnt` fitted model object (fitted with 
#'   the `maxent` function in the `dismo` package), (2) a file path to
#'   a Maxent .lambdas file, or (3) a `lambdas` object returned by 
#'   [parse_lambdas()].
#' @param newdata A `RasterStack`, `RasterBrick`, `list`,
#'   `data.frame`, `data.table`, or `matrix` that has
#'   layers/elements/columns whose names correspond to the names of predictors
#'   used to fit the model. These layers/elements/columns must all have the same
#'   length.
#' @param return_lfx Logical. Should `Raster` layers be returned giving
#'   lambda*feature values for each feature with a non-zero lambda? Currently
#'   ignored if `newdata` is not a `Raster*` object.
#' @param mask (Optional; requires that `newdata` is a `Raster*` 
#'   object.) A `Raster` object with `NA` values in cells for which 
#'   the model should _not_ be projected. These cells will be assigned
#'   `NA` in the returned output.
#' @param quiet Logical. Should projection progress be reported?   
#' @return If `newdata` is a `RasterStack` or `RasterBrick`, a 
#'   list with three elements: 
#' * `prediction_raw`: a `Raster` layer giving the raw Maxent
#'   prediction;
#' * `prediction_logistic`: a `Raster` layer giving the
#'   logistic Maxent prediction; and
#' * `prediction_cloglog`: a `Raster` layer giving the
#'   cloglog Maxent prediction.
#' If `newdata` is _not_ a `RasterStack` or `RasterBrick`,
#' the raster layers will be replaced with `data.table`s in the returned
#' list.
#'
#' Additionally, if `newdata` is a `RasterStack` or `RasterBrick`
#' and `return_lfx` is `TRUE`, the returned list will include 
#' `prediction_lfx` (the logit scores for the linear predictor), and 
#' `lfx_all` (the contributions to `prediction_lfx` of each feature
#' with a non-zero lambda).
#' @details `project` uses feature weights described in a .lambas
#'   file or `MaxEnt` object to predict a Maxent model to environmental
#'   data. This function performs the projection entirely in R, without the need
#'   for the Maxent Java software. For tested datasets, it performs the 
#'   projection in roughly one third of the time taken for the same projection
#'   by maxent.jar.
#' @section Warning:
#' This function is still in development, and no guarantee is made for the
#' accuracy of its projections.
#' @keywords maxent, predict, project
#' @references 
#' * Wilson, P. W. (2009) [_Guidelines for computing MaxEnt model output values from a lambdas file_](https://gsp.humboldt.edu/olm/Courses/GSP_570/Learning%20Modules/10%20BlueSpray_Maxent_Uncertinaty/MaxEnt%20lambda%20files.pdf).
#' * _Maxent software for species habitat modeling, version 3.3.3k_ help file (software freely available [here](https://www.cs.princeton.edu/~schapire/maxent/)).
#' @seealso [read_mxe()]
#' @importFrom raster raster mask compareRaster as.data.frame
#' @importFrom data.table data.table as.data.table is.data.table :=
#' @importFrom methods is
#' @importFrom stats complete.cases plogis
#' @export
#' @examples
#' \dontrun{
#' # Below we use the dismo::maxent example to fit a Maxent model:
#' fnames <- list.files(system.file('ex', package='dismo'), '\\.grd$', 
#'                      full.names=TRUE )
#' predictors <- stack(fnames)
#' occurrence <- system.file('ex/bradypus.csv', package='dismo')
#' occ <- read.table(occurrence, header=TRUE, sep=',')[,-1]
#' me <- maxent(predictors, occ, factors='biome')
#' 
#' # ... and then predict it to the full environmental grids:
#' pred <- project(me, predictors)
#' # This is equivalent to using the predict method for MaxEnt objects:
#' pred2 <- predict(me, predictors, args='outputformat=logistic')
#' pred3 <- predict(me, predictors, args='outputformat=cloglog')
#' 
#' all.equal(values(pred$prediction_logistic), values(pred2))
#' all.equal(values(pred$prediction_cloglog), values(pred3))
#' }
project <- function(lambdas, newdata, return_lfx=FALSE, mask, quiet=FALSE) {
  if(!missing(mask)) {
    if(!methods::is(mask, 'RasterLayer')) {
      stop('mask should be a RasterLayer object')
    } else {
      if(!methods::is(newdata, 'Raster')) {
        stop('If mask is provided, newdata should be a Raster object with the',
             'same dimensions, extent, and CRS.')
      }
      if(!raster::compareRaster(mask, newdata, stopiffalse=FALSE))
        stop('If mask is provided, newdata should be a Raster object with the',
             'same dimensions, extent, and CRS.')
    }
  }
  if(!is.lambdas(lambdas)) lambdas <- parse_lambdas(lambdas)
  meta <- lambdas[-1]
  lambdas <- lambdas[[1]]
  is_cat <- unique(
    gsub('\\(|==.*\\)', '', lambdas[lambdas$type=='categorical', 'feature']))
  nms <- unique(unlist(strsplit(lambdas$var[lambdas$lambda != 0], ',')))
  clamp_limits <- data.table::data.table(lambdas[lambdas$type=='linear', ])
  lambdas <- lambdas[lambdas$lambda != 0, ]
  lambdas <- split(lambdas, c('other', 'hinge')[
    grepl('hinge', lambdas$type) + 1])
  if (is(newdata, 'RasterStack') | is(newdata, 'RasterBrick') | is(newdata, 'Raster')) {
    pred_raw <- pred_logistic <- pred_cloglog <- pred_lfx <- raster::raster(newdata)
    if(!missing(mask)) {
      newdata <- raster::mask(newdata, mask)
    }
    newdata <- data.table::as.data.table(newdata[])
  }
  if (is.matrix(newdata)) newdata <- data.table::as.data.table(newdata)
  if (is.list(newdata) & !is.data.frame(newdata)) {
    if(length(unique(sapply(newdata, length))) != 1) 
      stop('newdata was provided as a list, but its elements vary in length.')
    newdata <- data.table::as.data.table(newdata)
  }
  if (!is.data.frame(newdata))
    stop('newdata must be a list, data.table, data.frame, matrix, RasterStack,',
         'or RasterBrick.')
  if (!data.table::is.data.table(newdata)) 
    newdata <- data.table::as.data.table(newdata)
  if(!all(nms %in% names(newdata))) {
    stop(sprintf('Variables missing in newdata: %s', 
                 paste(setdiff(nms, colnames(newdata)), collapse=', ')))
  }
  if (any(!names(newdata) %in% nms)) {
    newdata <- newdata[, setdiff(names(newdata), nms) := NULL]
  }
  na <- !complete.cases(newdata)
  newdata <- newdata[!na]
  
  # Clamp newdata
  invisible(lapply(setdiff(names(newdata), is_cat), function(x) {
    clamp_max <- clamp_limits[clamp_limits$feature==x, max]
    clamp_min <- clamp_limits[clamp_limits$feature==x, min]
    newdata[, c(x) := pmax(pmin(get(x), clamp_max), clamp_min)]
  }))
  
  k_hinge <- if('hinge' %in% names(lambdas)) nrow(lambdas$hinge) else 0
  k_other <- if('other' %in% names(lambdas)) nrow(lambdas$other) else 0
  k <- k_hinge + k_other

  txt <- sprintf('\rCalculating contribution of feature %%%1$dd of %%%1$dd', 
                 nchar(k))
  lfx <- numeric(nrow(newdata))
  lfx_all <- setNames(vector('list', sum(sapply(lambdas, nrow))),
                      unlist(lapply(lambdas[2:1], function(x) x$feature)))
  
  if(k_other > 0) {
    for (i in seq_len(k_other)) {
      if(!quiet) cat(sprintf(txt, i, k))
      x <- with(newdata, eval(parse(text=lambdas$other$feature[i])))
      # clamp feature
      x <- pmin(pmax(x, lambdas$other$min[i]), lambdas$other$max[i])
      x01 <- (x - lambdas$other$min[i]) / 
        (lambdas$other$max[i] - lambdas$other$min[i])
      lfx_all[[i]] <- lambdas$other$lambda[i] * x01
      lfx <- lfx + lfx_all[[i]]
    }
    rm(x, x01)
  }
  
  if(k_hinge > 0) {
    for (i in seq_len(nrow(lambdas$hinge))) {
      if(!quiet) cat(sprintf(txt, k_other + i, k))
      x <- with(newdata, get(sub("'|`", "", lambdas$hinge$feature[i])))
      x01 <- (x - lambdas$hinge$min[i]) / (lambdas$hinge$max[i] - lambdas$hinge$min[i])
      if (lambdas$hinge$type[i]=='reverse_hinge') {
        lfx_all[[k_other + i]] <- 
          lambdas$hinge$lambda[i] * (x < lambdas$hinge$max[i]) * (1-x01)
      } else {
        lfx_all[[k_other + i]] <- 
          lambdas$hinge$lambda[i] * (x >= lambdas$hinge$min[i]) * x01
      }
      lfx <- lfx + lfx_all[[k_other + i]]
    }
    rm(x, x01)
  }
  
  ln_raw <- lfx - meta$linearPredictorNormalizer - log(meta$densityNormalizer)
  raw <- exp(ln_raw)
  logit <- meta$entropy + ln_raw
  cloglog <- 1 - exp(-exp(meta$entropy) * raw)
  logistic <- stats::plogis(logit)
  
  #linpred <- rep(NA_real_, length(na))
  #linpred[!na] <- lfx
  if(exists('pred_raw', inherits=FALSE)) {
    pred_raw[which(!na)] <- raw
    pred_logistic[which(!na)] <- logistic
    pred_cloglog[which(!na)] <- cloglog
    pred_lfx[which(!na)] <- lfx
    out <- list(prediction_raw=pred_raw,
                prediction_logistic=pred_logistic,
                prediction_cloglog=pred_cloglog)
    if(isTRUE(return_lfx)) {
      lfx_each <- lapply(lfx_all, function(x) {
        r <- raster(pred_raw)
        r[which(!na)] <- x
        r
      })
      out <- c(out, 
               list(prediction_lfx=pred_lfx,
                    lfx_all=lfx_each))
    } 
  } else {
    prediction_raw <- prediction_logistic <- prediction_cloglog <-
      rep(NA_real_, length(na))
    prediction_raw[!na] <- raw
    prediction_logistic[!na] <- logistic
    prediction_cloglog[!na] <- cloglog
    #prediction_lfx[!na] <- lfx
    out <- list(prediction_raw=prediction_raw,
                prediction_logistic=prediction_logistic,
                prediction_cloglog=prediction_cloglog)
  }
  return(out)
}