#' @rdname maxent
#' @export
maxent <- function(x, ...) UseMethod("maxent")
#' Maximum Entropy model
#'
#' Create a "MaxEnt" (Maximum Entropy) species distribution model.
#'
#' This function will use the java implementation of MaxEnt by Phillips,
#' Dudik and Schapire, instead of the package maxnet.
#' Check the MaxEnt documentation for more information.
#' Only suitable for presence/absence or presence/background data.
#' This is similar to \code{\link[dismo]{maxent}}, with less options (for instance, this will not accept rasters),
#' but it is faster, specially the predict function.
#' This function do not support maxent.jar replicates, as those should be handled by caret.
#'
#' @param x A data frame of predictors
#' @param y A response factor, same length as \code{nrow(x)}. \strong{First level is assumed to be "presence" data}.
#' @param reg Regularization features to use. l - linear, q - quadratic, p - product, t - threshold, h - hinge
#' @param beta Beta multiplier
#' @param niter Maximum number of iterations
#' @param outputType One of 'Cloglog', 'Logistic', 'Cumulative', 'Raw'
#' @param thrtype Which threshold type to use. One of 'Min_Presence','10\%_Presence',
#' 'Sens=Spec','MaxSens+Spec','Balance','Entropy'
#' @param clamp logical. Apply clamping in prediction?
#' @param filesPath Path used to store MaxEnt output files
#' @param maxentPath Path of maxent.jar. If NULL, it will use maxent.jar from the dismo package
#' @param flags Other flags to maxent. Should be in the format
#' \code{c('addsamplestobackground=false', 'beta_hinge=1.5')}.
#' @return An S3 object of class 'maxent' build using maxent.jar, including:
#' \itemize{
#' \item path - The path to the files generated by maxent.jar.
#' \item params - A list with parameters used to build the model.
#' \item results - A data.frame with multiple results from the model.
#' \item predicted - A vector with predictions made by the model on the trained data.
#' }
#' @examples
#' \dontrun{
#' maxent(x, y)
#' maxent(x, y, reg = "lq", maxentPath="~/maxent.jar", filesPath="/run/shm/maxent")
#'
#' # using caret
#' # use 'categorical' if there are categorical variables in traindata
#' model.maxent = train(species ~ ., data=traindata,
#' method=maxentCaret, metric="ROC", trControl = control,
#' tuneGrid = expand.grid(reg = c("lq","l"), beta = seq(0.5, 2, 0.2)),
#' categorical = c("factor1", "factor2"))
#' }
#' @seealso \code{\link{methods.maxent}}
#' @rdname maxent
#' @export
maxent.default <- function(x, y, reg = "lqph", beta = 1, niter = 500, outputType = "Cloglog",
thrtype = "MaxSens+Spec", clamp = TRUE,
filesPath = paste0(tempdir(), '/maxent'), maxentPath = NULL, flags = NULL, ...) {
if (is.null(maxentPath)) {
if (system.file(package = "dismo") == "") {
stop("Package dismo not found, provide a path to maxent or install dismo")
}
maxentPath <- paste0(system.file(package = "dismo"), "/java/maxent.jar", sep = '')
}
ylevels <- levels(y)
if (length(ylevels) > 2) {
stop("Response variable (y) must have two classes (presence/absence) for Maxent")
}
model <- list()
# create folder
thereisfile <- TRUE
while (thereisfile) {
f <- paste(round(stats::runif(10) * 9), collapse = "")
outdir <- paste0(filesPath, '/', f)
thereisfile <- file.exists(outdir)
}
dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
model$call <- match.call()
model$maxentPath <- maxentPath
model$path <- outdir
model$filesPath <- filesPath
model$type <- "Classification"
model$params <- list()
model$params$clamp <- clamp
model$params$outputType <- outputType
model$params$reg.features <- reg
model$params$beta <- beta
model$params$thr <- maxentThr(thrtype)
args <- paste0('betamultiplier=', beta)
if (reg == 'auto') {
args <- c(args, 'autofeature=true')
} else {
args <- c(args, 'autofeature=false',
paste0('linear=', if (grepl('l', reg)) 'true' else 'false'),
paste0('quadratic=', if (grepl('q', reg)) 'true' else 'false'),
paste0('product=', if (grepl('p', reg)) 'true' else 'false'),
paste0('hinge=', if (grepl('h', reg)) 'true' else 'false'),
paste0('threshold=', if (grepl('t', reg)) 'true' else 'false'))
}
model$classes <- ylevels
model$xNames <- colnames(x)
# detect factors
fi <- sapply(x, is.factor)
if (any(fi)) {
factors <- colnames(x)[fi]
model$params$factors <- factors
args <- c("-t", "FACTOR_", args)
model$xlevels <- lapply(x[, fi], levels)
x[, fi] <- sapply(x[, fi], as.numeric)
colnames(x)[fi] <- paste0("FACTOR_", factors)
} else {
model$xlevels <- list()
}
# detect flags
if (!is.null(flags)) {
flags_used <- c("removeduplicates", "maximumbackground", "plots", "pictures", "outputformat",
"autofeature", "-t", "linear", "quadratic", "product", "hinge", "threshold",
"betamultiplier", "outputdirectory", "samplesfile", "environmentallayers",
"maximumiterations", "autorun", "visible", "replicates")
if (any(sapply(flags_used, grepl, x = flags)))
stop("'flags' should not include any of ", paste(flags_used, collapse = ", "))
args <- c(args, flags)
}
# write presence table
pres <- y == ylevels[1]
int_order <- order(c(which(pres), which(!pres)))
spp.val <- x[pres, ]
species <- rep("species", nrow(spp.val))
spp.xy <- seq_len(nrow(spp.val))
model_pres <- paste0(outdir, "/presence")
data.table::fwrite(data.table::data.table(species, x = spp.xy, y = spp.xy, spp.val),
file = model_pres, quote = FALSE)
# write absence table
bg.val <- x[!pres, ]
species <- rep("bg", nrow(bg.val))
bg.xy <- seq_len(nrow(bg.val))
model_bg <- paste0(outdir, "/ausence")
data.table::fwrite(data.table::data.table(species, x = bg.xy, y = bg.xy, bg.val), file = model_bg, quote = FALSE)
# make model
invisible(system2('java',
args = c("-mx1024m", "-jar", maxentPath, "-e", model_bg, "-s", model_pres, "-o",
outdir, "-m", niter, "-z", "-a", "plots=false", "pictures=false",
paste0("maximumbackground=", nrow(bg.val) + 1),
paste0("outputformat=", tolower(outputType)), "removeduplicates=false", args)))
# store lambda
model$lambda <- paste0(outdir, "/species.lambdas")
# store results
results <- data.table::fread(paste0(outdir, "/maxentResults.csv"), header = FALSE, sep = ",")
results <- data.table::transpose(results)[-1, ]
colnames(results) <- c("variable", "value")
results$value <- as.numeric(results$value)
data.table::setDF(results)
model$results <- results
# store predictions
pres.pred <- data.table::fread(paste0(outdir, "/species_samplePredictions.csv"), sep = ",", header = TRUE)
pres.pred <- pres.pred[, paste0(model$params$outputType, " prediction"), with = FALSE][[1]]
bg.pred <- data.table::fread(paste0(outdir, "/species.csv"), sep = ",", header = TRUE)[[3]]
model$y <- y
model$predicted <- c(pres.pred, bg.pred)[int_order]
class(model) <- "maxent"
# unlink(c(model_bg, model_pres))
return(model)
}
#' @param form A formula of the form y ~ x1 + x2 + ...
#' @param data Data frame from which variables specified in formula
#' @param ... Arguments passed to default method.
#' @param subset An index vector specifying the cases to be used in the training sample.
#' @param na.action A function to specify the action to be taken if NAs are found.
#' The default action is for the procedure to fail.
#' @rdname maxent
#' @export
maxent.formula <- function(form, data, ..., subset, na.action) {
m <- match.call(expand.dots = FALSE)
m$... <- NULL
if (!("na.action" %in% names(m))) m$na.action <- quote(stats::na.fail)
m[[1]] <- quote(stats::model.frame)
m <- eval.parent(m)
if (nrow(m) < 1) stop("Every row has at least one missing value")
data <- as.data.frame(data)
Terms <- attr(m, "terms")
x <- data[, attr(Terms, "term.labels")]
y <- stats::model.response(m)
model <- maxent.default(x, y, ...)
model$terms <- Terms
model$call <- match.call()
return(model)
}
#' @param categorical A character vector with column names to be treated as categorical variable.
#' @note
#' When using a data.frame, variables defined as factors are automatically assigned as
#' categorical variables. When using a matrix or a formula with caret, you must the argument
#' \code{categorical} if any of the variables are factors/categorical.\cr
#' Due to the way \code{caret::train} handles the data when using a formula, it is not possible to
#' automatically get which variables are categorical.
#' @rdname maxent
#' @export
maxent.matrix <- function(x, y, ..., categorical = NULL) {
x <- as.data.frame(x)
if (!is.null(categorical)) {
x <- matrix.inverse(x, categorical)
}
return(maxent.default(x, y, ...))
}
matrix.inverse <- function(x, categorical) {
index <- numeric()
factors <- list()
for (c in categorical) {
i <- grep(c, colnames(x))
if (length(i) > 0) {
index <- c(index, i)
if (length(i) > 1) {
# melt dummy variables
factors[[c]] <- factor(apply(x[, i], 1, which.max2))
} else {
factors[[c]] <- factor(x[, i])
}
} else {
warning("categorical ", c, " provided, but no match were found in column names.")
}
}
x <- cbind(x[, -index], data.frame(factors))
return(x)
}
which.max2 <- function(x) {
if (all(x == 0)) {
return(0)
} else {
return(which.max(x))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.