R/maxent.R

Defines functions which.max2 matrix.inverse maxent.matrix maxent.formula maxent.default maxent

Documented in maxent maxent.default maxent.formula maxent.matrix

#' @rdname maxent
#' @export
maxent <- function(x, ...) UseMethod("maxent")

#' Maximum Entropy model
#'
#' Create a "MaxEnt" (Maximum Entropy) species distribution model.
#'
#' This function will use the java implementation of MaxEnt by Phillips,
#' Dudik and Schapire, instead of the package maxnet.
#' Check the MaxEnt documentation for more information.
#' Only suitable for presence/absence or presence/background data.
#' This is similar to \code{\link[dismo]{maxent}}, with less options (for instance, this will not accept rasters),
#' but it is faster, specially the predict function.
#' This function do not support maxent.jar replicates, as those should be handled by caret.
#'
#' @param x A data frame of predictors
#' @param y A response factor, same length as \code{nrow(x)}. \strong{First level is assumed to be "presence" data}.
#' @param reg Regularization features to use. l - linear, q - quadratic, p - product, t - threshold, h - hinge
#' @param beta Beta multiplier
#' @param niter Maximum number of iterations
#' @param outputType One of 'Cloglog', 'Logistic', 'Cumulative', 'Raw'
#' @param thrtype Which threshold type to use. One of 'Min_Presence','10\%_Presence',
#' 'Sens=Spec','MaxSens+Spec','Balance','Entropy'
#' @param clamp logical. Apply clamping in prediction?
#' @param filesPath Path used to store MaxEnt output files
#' @param maxentPath Path of maxent.jar. If NULL, it will use maxent.jar from the dismo package
#' @param flags Other flags to maxent. Should be in the format
#' \code{c('addsamplestobackground=false', 'beta_hinge=1.5')}.
#' @return An S3 object of class 'maxent' build using maxent.jar, including:
#' \itemize{
#'   \item path - The path to the files generated by maxent.jar.
#'   \item params - A list with parameters used to build the model.
#'   \item results - A data.frame with multiple results from the model.
#'   \item predicted - A vector with predictions made by the model on the trained data.
#' }
#' @examples
#' \dontrun{
#' maxent(x, y)
#' maxent(x, y, reg = "lq", maxentPath="~/maxent.jar", filesPath="/run/shm/maxent")
#'
#' # using caret
#' # use 'categorical' if there are categorical  variables in traindata
#' model.maxent = train(species ~ ., data=traindata,
#'                      method=maxentCaret, metric="ROC", trControl = control,
#'                      tuneGrid = expand.grid(reg = c("lq","l"), beta = seq(0.5, 2, 0.2)),
#'                      categorical = c("factor1", "factor2"))
#' }
#' @seealso \code{\link{methods.maxent}}
#' @rdname maxent
#' @export
maxent.default <- function(x, y, reg = "lqph", beta = 1, niter = 500, outputType = "Cloglog",
                   thrtype = "MaxSens+Spec", clamp = TRUE,
                   filesPath = paste0(tempdir(), '/maxent'), maxentPath = NULL, flags = NULL, ...) {

    if (is.null(maxentPath)) {
        if (system.file(package = "dismo") == "") {
            stop("Package dismo not found, provide a path to maxent or install dismo")
        }
        maxentPath <- paste0(system.file(package = "dismo"), "/java/maxent.jar", sep = '')
    }

    ylevels <- levels(y)
    if (length(ylevels) > 2) {
        stop("Response variable (y) must have two classes (presence/absence) for Maxent")
    }

    model <- list()

    # create folder
    thereisfile <- TRUE
    while (thereisfile) {
        f <- paste(round(stats::runif(10) * 9), collapse = "")
        outdir <- paste0(filesPath, '/', f)

        thereisfile <- file.exists(outdir)
    }

    dir.create(outdir, showWarnings = FALSE, recursive = TRUE)

    model$call <- match.call()
    model$maxentPath <- maxentPath
    model$path <- outdir
    model$filesPath <- filesPath
    model$type <- "Classification"
    model$params <- list()
    model$params$clamp <- clamp
    model$params$outputType <- outputType
    model$params$reg.features <- reg
    model$params$beta <- beta
    model$params$thr <- maxentThr(thrtype)

    args <- paste0('betamultiplier=', beta)

    if (reg == 'auto') {
        args <- c(args, 'autofeature=true')
    } else {
        args <- c(args, 'autofeature=false',
                  paste0('linear=', if (grepl('l', reg)) 'true' else 'false'),
                  paste0('quadratic=', if (grepl('q', reg)) 'true' else 'false'),
                  paste0('product=', if (grepl('p', reg)) 'true' else 'false'),
                  paste0('hinge=', if (grepl('h', reg)) 'true' else 'false'),
                  paste0('threshold=', if (grepl('t', reg)) 'true' else 'false'))
    }


    model$classes <- ylevels
    model$xNames <- colnames(x)

    # detect factors
    fi <- sapply(x, is.factor)
    if (any(fi)) {
        factors <- colnames(x)[fi]
        model$params$factors <- factors
        args <- c("-t", "FACTOR_", args)
        model$xlevels <- lapply(x[, fi], levels)
        x[, fi] <- sapply(x[, fi], as.numeric)
        colnames(x)[fi] <- paste0("FACTOR_", factors)
    } else {
        model$xlevels <- list()
    }

    # detect flags
    if (!is.null(flags)) {
        flags_used <- c("removeduplicates", "maximumbackground", "plots", "pictures", "outputformat",
                        "autofeature", "-t", "linear", "quadratic", "product", "hinge", "threshold",
                        "betamultiplier", "outputdirectory", "samplesfile", "environmentallayers",
                        "maximumiterations", "autorun", "visible", "replicates")
        if (any(sapply(flags_used, grepl, x = flags)))
            stop("'flags' should not include any of ", paste(flags_used, collapse = ", "))

        args <- c(args, flags)
    }


    # write presence table
    pres <- y == ylevels[1]
    int_order <- order(c(which(pres), which(!pres)))

    spp.val <- x[pres, ]
    species <- rep("species", nrow(spp.val))
    spp.xy <- seq_len(nrow(spp.val))
    model_pres <- paste0(outdir, "/presence")

    data.table::fwrite(data.table::data.table(species, x = spp.xy, y = spp.xy, spp.val),
                       file = model_pres, quote = FALSE)

    # write absence table
    bg.val <- x[!pres, ]
    species <- rep("bg", nrow(bg.val))
    bg.xy <- seq_len(nrow(bg.val))
    model_bg <- paste0(outdir, "/ausence")

    data.table::fwrite(data.table::data.table(species, x = bg.xy, y = bg.xy, bg.val), file = model_bg, quote = FALSE)


    # make model
    invisible(system2('java',
        args = c("-mx1024m", "-jar", maxentPath, "-e", model_bg, "-s", model_pres, "-o",
                 outdir, "-m", niter, "-z", "-a", "plots=false", "pictures=false",
                 paste0("maximumbackground=", nrow(bg.val) + 1),
                 paste0("outputformat=", tolower(outputType)), "removeduplicates=false", args)))


    # store lambda
    model$lambda <- paste0(outdir, "/species.lambdas")

    # store results
    results <- data.table::fread(paste0(outdir, "/maxentResults.csv"), header = FALSE, sep = ",")

    results <- data.table::transpose(results)[-1, ]
    colnames(results) <- c("variable", "value")
    results$value <- as.numeric(results$value)
    data.table::setDF(results)

    model$results <- results


    # store predictions
    pres.pred <- data.table::fread(paste0(outdir, "/species_samplePredictions.csv"), sep = ",", header = TRUE)
    pres.pred <- pres.pred[, paste0(model$params$outputType, " prediction"), with = FALSE][[1]]

    bg.pred <- data.table::fread(paste0(outdir, "/species.csv"), sep = ",", header = TRUE)[[3]]

    model$y <- y
    model$predicted <- c(pres.pred, bg.pred)[int_order]

    class(model) <- "maxent"
    # unlink(c(model_bg, model_pres))

    return(model)
}


#' @param form A formula of the form y ~ x1 + x2 + ...
#' @param data Data frame from which variables specified in formula
#' @param ... Arguments passed to default method.
#' @param subset An index vector specifying the cases to be used in the training sample.
#' @param na.action A function to specify the action to be taken if NAs are found.
#' The default action is for the procedure to fail.
#' @rdname maxent
#' @export
maxent.formula <- function(form, data, ..., subset, na.action)  {
    m <- match.call(expand.dots = FALSE)
    m$... <- NULL
    if (!("na.action" %in% names(m))) m$na.action <- quote(stats::na.fail)
    m[[1]] <- quote(stats::model.frame)
    m <- eval.parent(m)
    if (nrow(m) < 1) stop("Every row has at least one missing value")

    data <- as.data.frame(data)
    Terms <- attr(m, "terms")
    x <- data[, attr(Terms, "term.labels")]
    y <- stats::model.response(m)

    model <- maxent.default(x, y, ...)
    model$terms <- Terms
    model$call <- match.call()

    return(model)
}



#' @param categorical A character vector with column names to be treated as categorical variable.
#' @note
#' When using a data.frame, variables defined as factors are automatically assigned as
#' categorical variables. When using a matrix or a formula with caret, you must the argument
#' \code{categorical} if any of the variables are factors/categorical.\cr
#' Due to the way \code{caret::train} handles the data when using a formula, it is not possible to
#' automatically get which variables are categorical.
#' @rdname maxent
#' @export
maxent.matrix <- function(x, y, ..., categorical = NULL) {
    x <- as.data.frame(x)
    if (!is.null(categorical)) {
        x <- matrix.inverse(x, categorical)
    }
    return(maxent.default(x, y, ...))
}



matrix.inverse <- function(x, categorical) {
    index <- numeric()
    factors <- list()
    for (c in categorical) {
        i <- grep(c, colnames(x))
        if (length(i) > 0) {
            index <- c(index, i)
            if (length(i) > 1) {
                # melt dummy variables
                factors[[c]] <- factor(apply(x[, i], 1, which.max2))
            } else {
                factors[[c]] <- factor(x[, i])
            }
        } else {
            warning("categorical ", c, " provided, but no match were found in column names.")
        }
    }

    x <- cbind(x[, -index], data.frame(factors))
    return(x)
}


which.max2 <- function(x) {
    if (all(x == 0)) {
        return(0)
    } else {
        return(which.max(x))
    }
}
correapvf/caretSDM documentation built on June 2, 2022, 8:29 a.m.