`ensemble.calibrate.models` <- function(
x=NULL, p=NULL,
a=NULL, an=1000, excludep=FALSE, target.groups=FALSE,
k=0, pt=NULL, at=NULL, SSB.reduce=FALSE, CIRCLES.d=250000,
TrainData=NULL, TestData=NULL,
VIF=FALSE, COR=FALSE,
SINK=FALSE, PLOTS=FALSE, CATCH.OFF=FALSE,
threshold.method="spec_sens", threshold.sensitivity=0.9, threshold.PresenceAbsence=FALSE,
evaluations.keep=FALSE,
models.list=NULL, models.keep=FALSE,
models.save=FALSE, species.name="Species001",
ENSEMBLE.tune=FALSE,
ENSEMBLE.best=0, ENSEMBLE.min=0.7, ENSEMBLE.exponent=1.0, ENSEMBLE.weight.min=0.05,
input.weights=NULL,
MAXENT=1, MAXNET=1, MAXLIKE=1, GBM=1, GBMSTEP=1, RF=1, CF=1,
GLM=1, GLMSTEP=1, GAM=1, GAMSTEP=1, MGCV=1, MGCVFIX=0,
EARTH=1, RPART=1, NNET=1, FDA=1, SVM=1, SVME=1, GLMNET=1,
BIOCLIM.O=0, BIOCLIM=1, DOMAIN=1, MAHAL=1, MAHAL01=1,
PROBIT=FALSE,
Yweights="BIOMOD",
layer.drops=NULL, factors=NULL, dummy.vars=NULL,
formulae.defaults=TRUE, maxit=100,
MAXENT.a=NULL, MAXENT.an=10000,
MAXENT.path=paste(getwd(), "/models/maxent_", species.name, sep=""),
MAXNET.classes="default", MAXNET.clamp=FALSE, MAXNET.type="cloglog",
MAXLIKE.formula=NULL, MAXLIKE.method="BFGS",
GBM.formula=NULL, GBM.n.trees=2001,
GBMSTEP.gbm.x=2:(ncol(TrainData.orig)), GBMSTEP.tree.complexity=5, GBMSTEP.learning.rate=0.005,
GBMSTEP.bag.fraction=0.5, GBMSTEP.step.size=100,
RF.formula=NULL, RF.ntree=751, RF.mtry=floor(sqrt(ncol(TrainData.vars))),
CF.formula=NULL, CF.ntree=751, CF.mtry=floor(sqrt(ncol(TrainData.vars))),
GLM.formula=NULL, GLM.family=binomial(link="logit"),
GLMSTEP.steps=1000, STEP.formula=NULL, GLMSTEP.scope=NULL, GLMSTEP.k=2,
GAM.formula=NULL, GAM.family=binomial(link="logit"),
GAMSTEP.steps=1000, GAMSTEP.scope=NULL, GAMSTEP.pos=1,
MGCV.formula=NULL, MGCV.select=FALSE,
MGCVFIX.formula=NULL,
EARTH.formula=NULL, EARTH.glm=list(family=binomial(link="logit"), maxit=maxit),
RPART.formula=NULL, RPART.xval=50,
NNET.formula=NULL, NNET.size=8, NNET.decay=0.01,
FDA.formula=NULL,
SVM.formula=NULL,
SVME.formula=NULL,
GLMNET.nlambda=100, GLMNET.class=FALSE,
BIOCLIM.O.fraction=0.9,
MAHAL.shape=1
)
{
.BiodiversityR <- new.env()
# if (! require(dismo)) {stop("Please install the dismo package")}
#
if (is.list(k) == F) {
k <- as.integer(k)
k.listed <- FALSE
}else{
k.listed <- TRUE
k.list <- k
k <- max(k.list$groupp)
}
x.was.terra <- FALSE
# check data
if (is.null(TrainData) == T) {
if(is.null(x) == T) {stop("value for parameter x is missing (raster::RasterStack or terra::rast object)")}
if(inherits(x,"RasterStack") == FALSE && inherits(x, "SpatRaster") == FALSE) {stop("x is not a RasterStack or SpatRaster object")}
if(inherits(x, "SpatRaster")) {
cat(paste("\n", "NOTE: raster procedures will be done via the raster package, not terra", "\n", sep = ""))
cat(paste("This also allows usage of dismo::prepareData internally.", "\n", sep = ""))
x <- raster::stack(x)
x.was.terra <- TRUE
}
if(is.null(p) == T) {stop("presence locations are missing (parameter p)")}
}
if(is.null(p) == F) {
p <- data.frame(p)
names(p) <- c("x", "y")
}
if(is.null(a) == F) {
a <- data.frame(a)
names(a) <- c("x", "y")
}
if(is.null(pt) == F) {
pt <- data.frame(pt)
names(pt) <- c("x", "y")
}
if(is.null(at) == F) {
at <- data.frame(at)
names(at) <- c("x", "y")
}
if(is.null(MAXENT.a) == F) {
MAXENT.a <- data.frame(MAXENT.a)
names(MAXENT.a) <- c("x", "y")
}
#
if(models.save==T) {
models.keep <- TRUE
dir.create("models", showWarnings = F)
}
# create output file
dir.create("outputs", showWarnings = F)
paste.file <- paste(getwd(), "/outputs/", species.name, "_output.txt", sep="")
OLD.SINK <- TRUE
if (sink.number(type="output") == 0) {OLD.SINK <- F}
if (SINK==T && OLD.SINK==F) {
if (file.exists(paste.file) == F) {
cat(paste("\n", "NOTE: results captured in file: ", paste.file, "\n", sep = ""))
}else{
cat(paste("\n", "NOTE: results appended in file: ", paste.file, "\n", sep = ""))
}
cat(paste(sep="\n\n", "RESULTS (ensemble.calibrate.models function)"), file=paste.file, sep="\n\n", append=T)
sink(file=paste.file, append=T)
cat(paste(date()), sep="\n")
print(match.call())
cat(paste(" "), sep="\n")
}
# check TrainData
if (is.null(TrainData) == F) {
TrainData <- data.frame(TrainData)
if (names(TrainData)[1] !="pb") {stop("first column for TrainData should be 'pb' containing presence (1) and absence (0) data")}
if (raster::nlayers(x) != (ncol(TrainData)-1)) {
cat(paste("\n", "WARNING: different number of explanatory variables in rasterStack and TrainData", sep = ""))
}
}
# modify list of variables
# if TrainData is provided, then this data set takes precedence over raster x in the selection of variables
#
# modify TrainData if layer.drops
if (is.null(TrainData) == F) {
if (is.null(layer.drops) == F) {
vars <- names(TrainData)
layer.drops <- as.character(layer.drops)
dummy.vars <- as.character(dummy.vars)
nd <- length(layer.drops)
for (i in 1:nd) {
if (any(vars==layer.drops[i]) == FALSE) {
cat(paste("\n", "WARNING: variable to exclude '", layer.drops[i], "' not among columns of TrainData", sep = ""))
}else{
cat(paste("\n", "NOTE: variable '", layer.drops[i], "' will not be included as explanatory variable", sep = ""))
TrainData <- TrainData[, which(names(TrainData) != layer.drops[i]), drop=F]
if (is.null(TestData) == F) {TestData <- TestData[, which(names(TestData) != layer.drops[i]), drop=F]}
vars <- names(TrainData)
if (length(factors) > 0) {
factors <- factors[factors != layer.drops[i]]
}
if (length(dummy.vars) > 0) {
dummy.vars <- dummy.vars[dummy.vars != layer.drops[i]]
}
}
}
if(length(layer.drops) == 0) {layer.drops <- NULL}
if(length(factors) == 0) {factors <- NULL}
if(length(dummy.vars) == 0) {dummy.vars <- NULL}
}
if (is.null(factors) == F) {
vars <- names(TrainData)
factors <- as.character(factors)
nf <- length(factors)
old.factors <- factors
for (i in 1:nf) {
if (any(vars==old.factors[i]) == FALSE) {
cat(paste("\n", "WARNING: categorical variable '", old.factors[i], "' not among columns of TrainData", sep = ""))
factors <- factors[factors != old.factors[i]]
}
}
if(length(factors) == 0) {factors <- NULL}
}
if (is.null(dummy.vars) == F) {
vars <- names(TrainData)
dummy.vars <- as.character(dummy.vars)
nf <- length(dummy.vars)
old.dummy.vars <- dummy.vars
for (i in 1:nf) {
if (any(vars==old.dummy.vars[i]) == FALSE) {
cat(paste("\n", "WARNING: dummy variable '", old.dummy.vars[i], "' not among columns of TrainData", sep = ""))
dummy.vars <- dummy.vars[dummy.vars != old.dummy.vars[i]]
}
}
if(length(dummy.vars) == 0) {dummy.vars <- NULL}
}
}
#
# modify RasterStack x only if this RasterStack was provided
if (is.null(x) == F) {
# same variables as TrainData in the rasterstack
if (is.null(TrainData) == F) {
vars <- names(TrainData)
vars <- vars[which(vars!="pb")]
x <- raster::subset(x, subset=vars)
x <- raster::stack(x)
}
if (is.null(TrainData) == T) {
if (is.null(layer.drops) == F) {
vars <- names(x)
layer.drops <- as.character(layer.drops)
factors <- as.character(factors)
dummy.vars <- as.character(dummy.vars)
nd <- length(layer.drops)
for (i in 1:nd) {
if (any(vars==layer.drops[i])==FALSE) {
cat(paste("\n", "WARNING: variable to exclude '", layer.drops[i], "' not among grid layers", sep = ""))
}else{
cat(paste("\n", "NOTE: variable '", layer.drops[i], "' will not be included as explanatory variable", sep = ""))
x <- raster::dropLayer(x, which(names(x) %in% c(layer.drops[i]) ))
x <- raster::stack(x)
vars <- names(x)
if (length(factors) > 0) {
factors <- factors[factors != layer.drops[i]]
}
if (length(dummy.vars) > 0) {
dummy.vars <- dummy.vars[dummy.vars != layer.drops[i]]
}
}
}
if(length(layer.drops) == 0) {layer.drops <- NULL}
if(length(factors) == 0) {factors <- NULL}
if(length(dummy.vars) == 0) {dummy.vars <- NULL}
}
if (is.null(factors) == F) {
vars <- names(x)
factors <- as.character(factors)
nf <- length(factors)
old.factors <- factors
for (i in 1:nf) {
if (any(vars==old.factors[i])==FALSE) {
cat(paste("\n", "WARNING: categorical variable '", old.factors[i], "' not among grid layers", sep = ""))
factors <- factors[factors != old.factors[i]]
}
}
if(length(factors) == 0) {factors <- NULL}
}
if (is.null(dummy.vars) == F) {
vars <- names(x)
dummy.vars <- as.character(dummy.vars)
nf <- length(dummy.vars)
old.dummy.vars <- dummy.vars
for (i in 1:nf) {
if (any(vars==old.dummy.vars[i]) == FALSE) {
cat(paste("\n", "WARNING: dummy variable '", old.dummy.vars[i], "' not among grid layers", sep = ""))
dummy.vars <- dummy.vars[dummy.vars != old.dummy.vars[i]]
}
}
if(length(dummy.vars) == 0) {dummy.vars <- NULL}
}
}
# set minimum and maximum values
for (i in 1:raster::nlayers(x)) {
x[[i]] <- raster::setMinMax(x[[i]])
}
# declare factor layers
if(is.null(factors)==F) {
for (i in 1:length(factors)) {
j <- which(names(x) == factors[i])
x[[j]] <- raster::as.factor(x[[j]])
}
}
}
#
if (is.null(input.weights) == F) {
MAXENT <- max(c(input.weights["MAXENT"], -1), na.rm=T)
MAXNET <- max(c(input.weights["MAXNET"], -1), na.rm=T)
MAXLIKE <- max(c(input.weights["MAXLIKE"], -1), na.rm=T)
GBM <- max(c(input.weights["GBM"], -1), na.rm=T)
GBMSTEP <- max(c(input.weights["GBMSTEP"], -1), na.rm=T)
RF <- max(c(input.weights["RF"], -1), na.rm=T)
CF <- max(c(input.weights["CF"], -1), na.rm=T)
GLM <- max(c(input.weights["GLM"], -1), na.rm=T)
GLMSTEP <- max(c(input.weights["GLMSTEP"], -1), na.rm=T)
GAM <- max(c(input.weights["GAM"], -1), na.rm=T)
GAMSTEP <- max(c(input.weights["GAMSTEP"], -1), na.rm=T)
MGCV <- max(c(input.weights["MGCV"], -1), na.rm=T)
MGCVFIX <- max(c(input.weights["MGCVFIX"], -1), na.rm=T)
EARTH <- max(c(input.weights["EARTH"], -1), na.rm=T)
RPART <- max(c(input.weights["RPART"], -1), na.rm=T)
NNET <- max(c(input.weights["NNET"], -1), na.rm=T)
FDA <- max(c(input.weights["FDA"], -1), na.rm=T)
SVM <- max(c(input.weights["SVM"], -1), na.rm=T)
SVME <- max(c(input.weights["SVME"], -1), na.rm=T)
GLMNET <- max(c(input.weights["GLMNET"], -1), na.rm=T)
BIOCLIM.O <- max(c(input.weights["BIOCLIM.O"], -1), na.rm=T)
BIOCLIM <- max(c(input.weights["BIOCLIM"], -1), na.rm=T)
DOMAIN <- max(c(input.weights["DOMAIN"], -1), na.rm=T)
MAHAL <- max(c(input.weights["MAHAL"], -1), na.rm=T)
MAHAL01 <- max(c(input.weights["MAHAL01"], -1), na.rm=T)
}
ws <- as.numeric(c(MAXENT, MAXNET, MAXLIKE, GBM, GBMSTEP, RF, CF, GLM, GLMSTEP, GAM, GAMSTEP, MGCV,
MGCVFIX, EARTH, RPART, NNET, FDA, SVM, SVME, GLMNET,
BIOCLIM.O, BIOCLIM, DOMAIN, MAHAL, MAHAL01))
names(ws) <- c("MAXENT", "MAXNET", "MAXLIKE", "GBM", "GBMSTEP", "RF", "CF", "GLM", "GLMSTEP", "GAM", "GAMSTEP", "MGCV",
"MGCVFIX", "EARTH", "RPART", "NNET", "FDA", "SVM", "SVME", "GLMNET",
"BIOCLIM.O", "BIOCLIM", "DOMAIN", "MAHAL", "MAHAL01")
ws <- ensemble.weights(weights=ws, exponent=1, best=0, min.weight=0)
#
thresholds <- c(ws, -1)
names(thresholds) <- c(names(ws), "ENSEMBLE")
AUC.calibration <- thresholds
AUC.calibration[] <- NA
AUC.testing <- AUC.calibration
#
#
MAXENT.OLD <- MAXNET.OLD <- MAXLIKE.OLD <- GBM.OLD <- GBMSTEP.OLD <- RF.OLD <- CF.OLD <- GLM.OLD <- GLMSTEP.OLD <- GAM.OLD <- GAMSTEP.OLD <- MGCV.OLD <- NULL
MGCVFIX.OLD <- EARTH.OLD <- RPART.OLD <- NNET.OLD <- FDA.OLD <- SVM.OLD <- SVME.OLD <- GLMNET.OLD <- BIOCLIM.O.OLD <- BIOCLIM.OLD <- DOMAIN.OLD <- MAHAL.OLD <- MAHAL01.OLD <- NULL
# probit models, NULL if no probit model fitted
MAXENT.PROBIT.OLD <- MAXNET.PROBIT.OLD <- MAXLIKE.PROBIT.OLD <- GBM.PROBIT.OLD <- GBMSTEP.PROBIT.OLD <- RF.PROBIT.OLD <- CF.PROBIT.OLD <- GLM.PROBIT.OLD <- GLMSTEP.PROBIT.OLD <- GAM.PROBIT.OLD <- GAMSTEP.PROBIT.OLD <- MGCV.PROBIT.OLD <- NULL
MGCVFIX.PROBIT.OLD <- EARTH.PROBIT.OLD <- RPART.PROBIT.OLD <- NNET.PROBIT.OLD <- FDA.PROBIT.OLD <- SVM.PROBIT.OLD <- SVME.PROBIT.OLD <- GLMNET.PROBIT.OLD <- BIOCLIM.O.PROBIT.OLD <- BIOCLIM.PROBIT.OLD <- DOMAIN.PROBIT.OLD <- MAHAL.PROBIT.OLD <- MAHAL01.PROBIT.OLD <- NULL
if (is.null(models.list) == F) {
if (is.null(models.list$MAXENT) == F) {MAXENT.OLD <- models.list$MAXENT}
if (is.null(models.list$MAXNET) == F) {MAXNET.OLD <- models.list$MAXNET}
if (is.null(models.list$MAXLIKE) == F) {MAXLIKE.OLD <- models.list$MAXLIKE}
if (is.null(models.list$GBM) == F) {GBM.OLD <- models.list$GBM}
if (is.null(models.list$GBMSTEP) == F) {GBMSTEP.OLD <- models.list$GBMSTEP}
if (is.null(models.list$RF) == F) {RF.OLD <- models.list$RF}
if (is.null(models.list$CF) == F) {RF.OLD <- models.list$CF}
if (is.null(models.list$GLM) == F) {GLM.OLD <- models.list$GLM}
if (is.null(models.list$GLMSTEP) == F) {GLMSTEP.OLD <- models.list$GLMSTEP}
if (is.null(models.list$GAM) == F) {GAM.OLD <- models.list$GAM}
if (is.null(models.list$GAMSTEP) == F) {GAMSTEP.OLD <- models.list$GAMSTEP}
if (is.null(models.list$MGCV) == F) {MGCV.OLD <- models.list$MGCV}
if (is.null(models.list$MGCVFIX) == F) {MGCVFIX.OLD <- models.list$MGCVFIX}
if (is.null(models.list$EARTH) == F) {EARTH.OLD <- models.list$EARTH}
if (is.null(models.list$RPART) == F) {RPART.OLD <- models.list$RPART}
if (is.null(models.list$NNET) == F) {NNET.OLD <- models.list$NNET}
if (is.null(models.list$FDA) == F) {FDA.OLD <- models.list$FDA}
if (is.null(models.list$SVM) == F) {SVM.OLD <- models.list$SVM}
if (is.null(models.list$SVME) == F) {SVME.OLD <- models.list$SVME}
if (is.null(models.list$GLMNET) == F) {GLMNET.OLD <- models.list$GLMNET}
if (is.null(models.list$BIOCLIM.O) == F) {BIOCLIM.O.OLD <- models.list$BIOCLIM.O}
if (is.null(models.list$BIOCLIM) == F) {BIOCLIM.OLD <- models.list$BIOCLIM}
if (is.null(models.list$DOMAIN) == F) {DOMAIN.OLD <- models.list$DOMAIN}
if (is.null(models.list$MAHAL) == F) {MAHAL.OLD <- models.list$MAHAL}
if (is.null(models.list$MAHAL01) == F) {MAHAL01.OLD <- models.list$MAHAL01}
# probit models
if (is.null(models.list$MAXENT.PROBIT) == F) {MAXENT.PROBIT.OLD <- models.list$MAXENT.PROBIT}
if (is.null(models.list$MAXNET.PROBIT) == F) {MAXNET.PROBIT.OLD <- models.list$MAXNET.PROBIT}
if (is.null(models.list$MAXLIKE.PROBIT) == F) {MAXLIKE.PROBIT.OLD <- models.list$MAXLIKE.PROBIT}
if (is.null(models.list$GBM.PROBIT) == F) {GBM.PROBIT.OLD <- models.list$GBM.PROBIT}
if (is.null(models.list$GBMSTEP.PROBIT) == F) {GBMSTEP.PROBIT.OLD <- models.list$GBMSTEP.PROBIT}
if (is.null(models.list$RF.PROBIT) == F) {RF.PROBIT.OLD <- models.list$RF.PROBIT}
if (is.null(models.list$CF.PROBIT) == F) {CF.PROBIT.OLD <- models.list$CF.PROBIT}
if (is.null(models.list$GLM.PROBIT) == F) {GLM.PROBIT.OLD <- models.list$GLM.PROBIT}
if (is.null(models.list$GLMSTEP.PROBIT) == F) {GLMSTEP.PROBIT.OLD <- models.list$GLMSTEP.PROBIT}
if (is.null(models.list$GAM.PROBIT) == F) {GAM.PROBIT.OLD <- models.list$GAM.PROBIT}
if (is.null(models.list$GAMSTEP.PROBIT) == F) {GAMSTEP.PROBIT.OLD <- models.list$GAMSTEP.PROBIT}
if (is.null(models.list$MGCV.PROBIT) == F) {MGCV.PROBIT.OLD <- models.list$MGCV.PROBIT}
if (is.null(models.list$MGCVFIX.PROBIT) == F) {MGCVFIX.PROBIT.OLD <- models.list$MGCVFIX.PROBIT}
if (is.null(models.list$EARTH.PROBIT) == F) {EARTH.PROBIT.OLD <- models.list$EARTH.PROBIT}
if (is.null(models.list$RPART.PROBIT) == F) {RPART.PROBIT.OLD <- models.list$RPART.PROBIT}
if (is.null(models.list$NNET.PROBIT) == F) {NNET.PROBIT.OLD <- models.list$NNET.PROBIT}
if (is.null(models.list$FDA.PROBIT) == F) {FDA.PROBIT.OLD <- models.list$FDA.PROBIT}
if (is.null(models.list$SVM.PROBIT) == F) {SVM.PROBIT.OLD <- models.list$SVM.PROBIT}
if (is.null(models.list$SVME.PROBIT) == F) {SVME.PROBIT.OLD <- models.list$SVME.PROBIT}
if (is.null(models.list$GLMNET.PROBIT) == F) {GLMNET.PROBIT.OLD <- models.list$GLMNET.PROBIT}
if (is.null(models.list$BIOCLIM.O.PROBIT) == F) {BIOCLIM.O.PROBIT.OLD <- models.list$BIOCLIM.O.PROBIT}
if (is.null(models.list$BIOCLIM.PROBIT) == F) {BIOCLIM.PROBIT.OLD <- models.list$BIOCLIM.PROBIT}
if (is.null(models.list$DOMAIN.PROBIT) == F) {DOMAIN.PROBIT.OLD <- models.list$DOMAIN.PROBIT}
if (is.null(models.list$MAHAL.PROBIT) == F) {MAHAL.PROBIT.OLD <- models.list$MAHAL.PROBIT}
if (is.null(models.list$MAHAL01.PROBIT) == F) {MAHAL01.PROBIT.OLD <- models.list$MAHAL01.PROBIT}
}
# check formulae and packages
if (formulae.defaults == T) {
if (is.null(TrainData) == T) {
formulae <- ensemble.formulae(x, layer.drops=layer.drops, factors=factors, dummy.vars=dummy.vars, weights=ws)
}else{
formulae <- ensemble.formulae(TrainData, layer.drops=layer.drops, factors=factors, dummy.vars=dummy.vars, weights=ws)
}
}
if (ws["MAXENT"] > 0) {
jar <- paste(system.file(package="dismo"), "/java/maxent.jar", sep='')
if (!file.exists(jar)) {stop('maxent program is missing: ', jar, '\n', 'Please download it here: http://www.cs.princeton.edu/~schapire/maxent/')}
}
if (ws["MAXNET"] > 0) {
if (! requireNamespace("maxnet")) {stop("Please install the maxnet package")}
predict.maxnet2 <- function(object, newdata, clamp=F, type=c("cloglog")) {
p <- predict(object=object, newdata=newdata, clamp=clamp, type=type)
return(as.numeric(p))
}
}
if (ws["MAXLIKE"] > 0) {
if (! requireNamespace("maxlike")) {stop("Please install the maxlike package")}
if (is.null(MAXLIKE.formula) == T && formulae.defaults == T) {MAXLIKE.formula <- formulae$MAXLIKE.formula}
if (is.null(MAXLIKE.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate MAXLIKE as no explanatory variables available" ,"\n", sep = ""))
ws["MAXLIKE"] <- 0
}else{
environment(MAXLIKE.formula) <- .BiodiversityR
}
}
if (ws["GBM"] > 0) {
if (! requireNamespace("gbm")) {stop("Please install the gbm package")}
requireNamespace("splines")
if (is.null(GBM.formula) == T && formulae.defaults == T) {GBM.formula <- formulae$GBM.formula}
if (is.null(GBM.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate GBM as no explanatory variables available" ,"\n", sep = ""))
ws["GBM"] <- 0
}else{
environment(GBM.formula) <- .BiodiversityR
}
}
if (ws["GBMSTEP"] > 0) {
# if (! require(gbm)) {stop("Please install the gbm package")}
}
if (ws["RF"] > 0) {
# if (! require(randomForest)) {stop("Please install the randomForest package")}
if (is.null(RF.formula) == T && formulae.defaults == T) {RF.formula <- formulae$RF.formula}
if (is.null(RF.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate RF as no explanatory variables available" ,"\n", sep = ""))
ws["RF"] <- 0
}else{
environment(RF.formula) <- .BiodiversityR
if (identical(RF.ntree, trunc(RF.ntree/2)) == F) {RF.ntree <- RF.ntree + 1}
# get the probabilities from RF
predict.RF <- function(object, newdata) {
p <- predict(object=object, newdata=newdata, type="response")
return(as.numeric(p))
}
}
}
if (ws["CF"] > 0) {
if (! requireNamespace("party")) {stop("Please install the party package")}
if (is.null(CF.formula) == T && formulae.defaults == T) {CF.formula <- formulae$CF.formula}
if (is.null(CF.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate CF as no explanatory variables available" ,"\n", sep = ""))
ws["CF"] <- 0
}else{
environment(CF.formula) <- .BiodiversityR
if (identical(CF.ntree, trunc(CF.ntree/2)) == F) {CF.ntree <- CF.ntree + 1}
# get the probabilities from CF
# note that randomForest used predict.randomForest
predict.CF <- function(object, newdata) {
# avoid problems with single variables, especially with raster::predict
for (i in 1:ncol(newdata)) {
if (is.integer(newdata[, i])) {newdata[, i] <- as.numeric(newdata[, i])}
}
p1 <- predict(object=object, newdata=newdata, type="prob")
p <- numeric(length(p1))
for (i in 1:length(p1)) {p[i] <- p1[[i]][2]}
return(as.numeric(p))
}
}
}
if (ws["GLM"] > 0) {
if (is.null(GLM.formula) == T && formulae.defaults == T) {GLM.formula <- formulae$GLM.formula}
if (is.null(GLM.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate GLM as no explanatory variables available" ,"\n", sep = ""))
ws["GLM"] <- 0
}else{
environment(GLM.formula) <- .BiodiversityR
assign("GLM.family", GLM.family, envir=.BiodiversityR)
}
}
if (ws["GLMSTEP"] > 0) {
# if (! require(MASS)) {stop("Please install the MASS package")}
if (is.null(STEP.formula) == T && formulae.defaults == T) {STEP.formula <- formulae$STEP.formula}
if (is.null(GLMSTEP.scope) == T && formulae.defaults == T) {GLMSTEP.scope <- formulae$GLMSTEP.scope}
if (is.null(STEP.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate GLMSTEP as no explanatory variables available" ,"\n", sep = ""))
ws["GLMSTEP"] <- 0
}else{
environment(STEP.formula) <- .BiodiversityR
assign("GLM.family", GLM.family, envir=.BiodiversityR)
}
}
if (ws["GAM"] > 0) {
if (is.null(GAM.formula) == T && formulae.defaults == T) {GAM.formula <- formulae$GAM.formula}
if (is.null(GAM.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate GAM as no explanatory variables available" ,"\n", sep = ""))
ws["GAM"] <- 0
}else{
environment(GAM.formula) <- .BiodiversityR
assign("GAM.family", GAM.family, envir=.BiodiversityR)
}
}
if (ws["GAMSTEP"] > 0) {
if (is.null(STEP.formula) == T && formulae.defaults == T) {STEP.formula <- formulae$STEP.formula}
if (is.null(GAMSTEP.scope) == T && formulae.defaults == T) {GAMSTEP.scope <- formulae$GAMSTEP.scope}
if (is.null(STEP.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate GAMSTEP as no explanatory variables available" ,"\n", sep = ""))
ws["GAMSTEP"] <- 0
}else{
environment(STEP.formula) <- .BiodiversityR
assign("GAM.family", GAM.family, envir=.BiodiversityR)
}
}
if (ws["MGCV"] > 0 || ws["MGCVFIX"] > 0) {
cat(paste("\n\n"))
# get the probabilities from MGCV
predict.MGCV <- function(object, newdata, type="response") {
p <- mgcv::predict.gam(object=object, newdata=newdata, type=type)
return(as.numeric(p))
}
# options(warn=0)
}
if (ws["MGCV"] > 0) {
if (is.null(MGCV.formula) == T && formulae.defaults == T) {MGCV.formula <- formulae$MGCV.formula}
if (is.null(MGCV.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate MGCV as no explanatory variables available" ,"\n", sep = ""))
ws["MGCV"] <- 0
}else{
environment(MGCV.formula) <- .BiodiversityR
assign("GAM.family", GAM.family, envir=.BiodiversityR)
}
}
if (ws["MGCVFIX"] > 0) {
if (is.null(MGCVFIX.formula) == T && formulae.defaults == T) {MGCVFIX.formula <- formulae$MGCVFIX.formula}
if (is.null(MGCVFIX.formula) == T) {stop("Please provide the MGCVFIX.formula (hint: use ensemble.formulae function)")}
if (is.null(MGCVFIX.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate MGCVFIX as no explanatory variables available" ,"\n", sep = ""))
ws["MGCVFIX"] <- 0
}else{
environment(MGCVFIX.formula) <- .BiodiversityR
assign("GAM.family", GAM.family, envir=.BiodiversityR)
}
}
if (ws["EARTH"] > 0) {
# if (! require(earth)) {stop("Please install the earth package")}
if (is.null(EARTH.formula) == T && formulae.defaults == T) {EARTH.formula <- formulae$EARTH.formula}
if (is.null(EARTH.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate EARTH as no explanatory variables available" ,"\n", sep = ""))
ws["EARTH"] <- 0
}else{
environment(EARTH.formula) <- .BiodiversityR
# get the probabilities from earth
predict.EARTH <- function(object, newdata, type="response") {
p <- predict(object=object, newdata=newdata, type=type)
return(as.numeric(p))
}
}
}
if (ws["RPART"] > 0) {
# if (! require(rpart)) {stop("Please install the rpart package")}
if (is.null(RPART.formula) == T && formulae.defaults == T) {RPART.formula <- formulae$RPART.formula}
if (is.null(RPART.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate RPART as no explanatory variables available" ,"\n", sep = ""))
ws["RPART"] <- 0
}else{
environment(RPART.formula) <- .BiodiversityR
}
}
if (ws["NNET"] > 0) {
# if (! require(nnet)) {stop("Please install the nnet package")}
if (is.null(NNET.formula) == T && formulae.defaults == T) {NNET.formula <- formulae$NNET.formula}
if (is.null(NNET.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate NNET as no explanatory variables available" ,"\n", sep = ""))
ws["NNET"] <- 0
}else{
environment(NNET.formula) <- .BiodiversityR
# get the probabilities from nnet
predict.NNET <- function(object, newdata, type="raw") {
p <- predict(object=object, newdata=newdata, type=type)
return(as.numeric(p))
}
}
}
if (ws["FDA"] > 0) {
# if (! require(mda)) {stop("Please install the mda package")}
if (is.null(FDA.formula) == T && formulae.defaults == T) {FDA.formula <- formulae$FDA.formula}
if (is.null(FDA.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate FDA as no explanatory variables available" ,"\n", sep = ""))
ws["FDA"] <- 0
}else{
environment(FDA.formula) <- .BiodiversityR
}
}
if (ws["SVM"] > 0) {
# if (! require(kernlab)) {stop("Please install the kernlab package")}
if (is.null(SVM.formula) == T && formulae.defaults == T) {SVM.formula <- formulae$SVM.formula}
if (is.null(SVM.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate SVM as no explanatory variables available" ,"\n", sep = ""))
ws["SVM"] <- 0
}else{
environment(SVM.formula) <- .BiodiversityR
}
}
if (ws["SVME"] > 0) {
# if (! require(e1071)) {stop("Please install the e1071 package")}
if (is.null(SVME.formula) == T && formulae.defaults == T) {SVME.formula <- formulae$SVME.formula}
if (is.null(SVME.formula) == T) {stop("Please provide the SVME.formula (hint: use ensemble.formulae function)")}
if (is.null(SVME.formula) == T) {
cat(paste("\n", "NOTE: not possible to calibrate SVME as no explanatory variables available" ,"\n", sep = ""))
ws["SVME"] <- 0
}else{
environment(SVME.formula) <- .BiodiversityR
# get the probabilities from svm
predict.SVME <- function(model, newdata) {
p <- predict(model, newdata, probability=T)
return(attr(p, "probabilities")[,1])
}
}
}
if (ws["GLMNET"] > 0) {
if (! requireNamespace("glmnet")) {stop("Please install the glmnet package")}
# get the mean probabilities from glmnet
predict.GLMNET <- function(model, newdata, GLMNET.class=FALSE) {
newdata <- as.matrix(newdata)
if (GLMNET.class == TRUE) {
p <- predict(model, newx=newdata, type="class", exact=T)
n.obs <- nrow(p)
nv <- ncol(p)
result <- numeric(n.obs)
for (i in 1:n.obs) {
for (j in 1:nv) {
if(p[i, j] == 1) {result[i] <- result[i] + 1}
}
}
result <- result/nv
return(result)
}else{
p <- predict(model, newx=newdata, type="response", exact=T)
n.obs <- nrow(p)
nv <- ncol(p)
result <- numeric(n.obs)
for (i in 1:n.obs) {
for (j in 1:nv) {
result[i] <- result[i] + p[i, j]
}
}
result <- result/nv
return(result)
}
}
}
if (ws["BIOCLIM.O"] > 0) {
predict.BIOCLIM.O <- function(object, newdata) {
lower.limits <- object$lower.limits
upper.limits <- object$upper.limits
minima <- object$minima
maxima <- object$maxima
#
newdata <- newdata[, which(names(newdata) %in% names(lower.limits)), drop=F]
result <- as.numeric(rep(NA, nrow(newdata)))
varnames <- names(newdata)
nvars <- ncol(newdata)
#
for (i in 1:nrow(newdata)) {
datai <- newdata[i,]
resulti <- 1
j <- 0
while (resulti > 0 && j <= (nvars-1)) {
j <- j+1
focal.var <- varnames[j]
if (resulti == 1) {
lowerj <- lower.limits[which(names(lower.limits) == focal.var)]
if (datai[, j] < lowerj) {resulti <- 0.5}
upperj <- upper.limits[which(names(upper.limits) == focal.var)]
if (datai[, j] > upperj) {resulti <- 0.5}
}
minj <- minima[which(names(minima) == focal.var)]
if (datai[, j] < minj) {resulti <- 0}
maxj <- maxima[which(names(maxima) == focal.var)]
if (datai[, j] > maxj) {resulti <- 0}
}
result[i] <- resulti
}
p <- as.numeric(result)
return(p)
}
}
if (ws["MAHAL"] > 0) {
# get the probabilities from mahal
predict.MAHAL <- function(model, newdata, PROBIT) {
p <- dismo::predict(object=model, x=newdata)
if (PROBIT == F) {
p[p<0] <- 0
p[p>1] <- 1
}
return(as.numeric(p))
}
}
if (ws["MAHAL01"] > 0) {
# get the probabilities from transformed mahal
predict.MAHAL01 <- function(model, newdata, MAHAL.shape) {
p <- dismo::predict(object=model, x=newdata)
p <- p - 1 - MAHAL.shape
p <- abs(p)
p <- MAHAL.shape / p
return(p)
}
}
# create TrainData and TestData
#
# tuning takes precedense over different exponents
# if(length(ENSEMBLE.exponent) > 1 || length(ENSEMBLE.best) > 1 || length(ENSEMBLE.min) > 1) {ENSEMBLE.tune <- TRUE}
no.tests <- FALSE
if (is.null(pt)==T && is.null(at)==T && is.null(TestData)==T && k < 2) {
no.tests <- TRUE
if (ENSEMBLE.tune == T) {
cat(paste("\n", "WARNING: not possible to tune (ENSEMBLE.tune=FALSE) as no Test Data available or will be created", sep = ""))
ENSEMBLE.tune <- FALSE
}
}
if (is.null(TrainData) == F) {
if(any(is.na(TrainData))) {
cat(paste("\n", "WARNING: sample units with missing data removed from calibration data","\n\n",sep = ""))
}
TrainValid <- complete.cases(TrainData)
TrainData <- TrainData[TrainValid,,drop=F]
if (is.null(p) == F) {
TrainValid <- complete.cases(TrainData[TrainData[,"pb"]==1,])
p <- p[TrainValid,]
}
if (is.null(a) == F) {
TrainValid <- complete.cases(TrainData[TrainData[,"pb"]==0,])
a <- a[TrainValid,]
}
if(is.null(TestData) == T) {
TestData <- TrainData
if (k > 1) {
TrainData.p <- TrainData[TrainData$pb == 1, ]
TrainData.a <- TrainData[TrainData$pb == 0, ]
if (k.listed == F) {
groupp <- dismo::kfold(TrainData.p, k=k)
groupa <- dismo::kfold(TrainData.a, k=k)
}else{
groupp <- k.list$groupp
groupa <- k.list$groupa
}
TrainData.pc <- TrainData.p[groupp != 1,]
TrainData.ac <- TrainData.a[groupa != 1,]
TestData.pc <- TrainData.p[groupp == 1,]
TestData.ac <- TrainData.a[groupa == 1,]
TrainData <- rbind(TrainData.pc, TrainData.ac)
TestData <- rbind(TestData.pc, TestData.ac)
}
}
}else{
if (is.null(a)==T) {
if (target.groups == T) {
cat(paste("\n", "WARNING: not possible for target group pseudo-absence data as 'a' (locations of all species) not specified", sep = ""))
cat(paste("\n", "Instead background locations selected randomly", "\n\n", sep = ""))
}
if (excludep == T) {
a <- dismo::randomPoints(x[[1]], n=an, p=p, excludep=T)
}else{
a <- dismo::randomPoints(x[[1]], n=an, p=NULL, excludep=F)
}
}else{
if (target.groups == T) {
cat(paste("\n", "target group (biased pseudo-absence locations) in centres of cells with locations of all target group species ('a')", "\n\n", sep = ""))
p.cell <- unique(raster::cellFromXY(x[[1]], p))
a.cell <- unique(raster::cellFromXY(x[[1]], a))
if (excludep == T) {a.cell <- a.cell[!(a.cell %in% p.cell)]}
a <- raster::xyFromCell(x[[1]], cell=a.cell, spatial=F)
}
}
if (is.null(pt)==T && is.null(TestData)) {pt <- p}
if (k > 1 && identical(pt, p) == T) {
if (k.listed == F) {
groupp <- dismo::kfold(p, k=k)
}else{
groupp <- k.list$groupp
}
pc <- p[groupp != 1,]
pt <- p[groupp == 1,]
p <- pc
}
if (is.null(at)==T && is.null(TestData)) {at <- a}
if (k > 1 && identical(at, a) == T) {
if (k.listed == F) {
groupa <- dismo::kfold(a, k=k)
}else{
groupa <- k.list$groupa
}
ac <- a[groupa != 1,]
at <- a[groupa == 1,]
a <- ac
}
# check for spatial sorting bias (are the testing absences farther away than testing presences)
if (is.null(p)==F && identical(pt, p)==F) {
sb.bias <- dismo::ssb(p=pt, a=at, reference=p)
sb.bias2 <- sb.bias[, 1]/sb.bias[, 2]
cat(paste("\n", "Spatial sorting bias (dismo package, no bias=1, extreme bias=0): ", sb.bias2, "\n", sep = ""))
}
# attempt to reduce spatial bias by searching absence testing locations within circles around all known presences
if (SSB.reduce == T) {
if (identical(pt, p) == T) {
cat(paste("\n", "No search for testing absences in circular neighbourhoods since no separate testing presences", sep = ""))
SSB.reduce <- FALSE
}else{
d.km <- CIRCLES.d/1000
cat(paste("\n", "Random selection of testing absences in circular neighbourhoods of ", d.km, " km", "\n", sep = ""))
pres_all <- rbind(pt, p)
circles.calibrate <- dismo::circles(p=pres_all, lonlat=raster::isLonLat(x[[1]]), d=CIRCLES.d)
circles.predicted <- dismo::predict(circles.calibrate, x[[1]])
at <- dismo::randomPoints(circles.predicted, n=nrow(at), p=pres_all, excludep=T)
sb.bias <- dismo::ssb(p=pt, a=at, reference=p)
sb.bias2 <- sb.bias[, 1]/sb.bias[, 2]
cat(paste("\n", "Spatial sorting bias with new testing absences: ", sb.bias2, "\n", sep = ""))
}
}
#
if (length(names(x)) == 1) {
xdouble <- raster::stack(x, x)
TrainData <- dismo::prepareData(x=xdouble, p, b=a, factors=factors, xy=FALSE)
TrainData <- TrainData[, -3]
names(TrainData)[2] <- names(x)
}else{
TrainData <- dismo::prepareData(x, p, b=a, factors=factors, xy=FALSE)
}
if(any(is.na(TrainData[TrainData[,"pb"]==1,]))) {
cat(paste("\n", "WARNING: presence locations with missing data removed from calibration data","\n\n",sep = ""))
}
TrainValid <- complete.cases(TrainData[TrainData[,"pb"]==1,])
p <- p[TrainValid,]
if(any(is.na(TrainData[TrainData[,"pb"]==0,]))) {
cat(paste("\n", "WARNING: background locations with missing data removed from calibration data","\n\n",sep = ""))
}
TrainValid <- complete.cases(TrainData[TrainData[,"pb"]==0,])
a <- a[TrainValid,]
if (length(names(x)) == 1) {
xdouble <- raster::stack(x, x)
TrainData <- dismo::prepareData(x=xdouble, p, b=a, factors=factors, xy=FALSE)
TrainData <- TrainData[, -3]
names(TrainData)[2] <- names(x)
}else{
TrainData <- dismo::prepareData(x, p, b=a, factors=factors, xy=FALSE)
}
}
#
if (no.tests == F) {
if (is.null(TestData) == F) {
TestData <- data.frame(TestData)
if(any(is.na(TestData))) {
cat(paste("\n", "WARNING: sample units with missing data removed from testing data","\n\n",sep = ""))
}
TestValid <- complete.cases(TestData)
TestData <- TestData[TestValid,,drop=F]
if (is.null(pt) == F) {
TestValid <- complete.cases(TestData[TestData[,"pb"]==1,])
pt <- pt[TestValid,]
}
if (is.null(at) == F) {
TestValid <- complete.cases(TestData[TestData[,"pb"]==0,])
at <- at[TestValid,]
}
if (all(names(TestData)!="pb") == T) {stop("one column needed of 'pb' with presence and absence for TestData")}
}else{
if (length(names(x)) == 1) {
xdouble <- raster::stack(x, x)
TestData <- dismo::prepareData(x=xdouble, p=pt, b=at, factors=factors, xy=FALSE)
TestData <- TestData[, -3]
names(TestData)[2] <- names(x)
}else{
TestData <- dismo::prepareData(x, p=pt, b=at, factors=factors, xy=FALSE)
}
if(any(is.na(TestData[TestData[,"pb"]==1,]))) {
cat(paste("\n", "WARNING: presence locations with missing data removed from evaluation data","\n\n",sep = ""))
}
TestValid <- complete.cases(TestData[TestData[,"pb"]==1,])
pt <- pt[TestValid,]
if(any(is.na(TestData[TestData[,"pb"]==0,]))) {
cat(paste("\n", "WARNING: background locations with missing data removed from evaluation data","\n\n",sep = ""))
}
TestValid <- complete.cases(TestData[TestData[,"pb"]==0,])
at <- at[TestValid,]
TestData <- dismo::prepareData(x, p=pt, b=at, factors=factors, xy=FALSE)
if (length(names(x)) == 1) {
xdouble <- raster::stack(x, x)
TestData <- dismo::prepareData(x=xdouble, p=pt, b=at, factors=factors, xy=FALSE)
TestData <- TestData[, -3]
names(TestData)[2] <- names(x)
}else{
TestData <- dismo::prepareData(x, p=pt, b=at, factors=factors, xy=FALSE)
}
}
}
#
# check if TestData is different from TrainData
if (identical(TrainData, TestData) == T) {no.tests <- TRUE}
#
# include all possible factor levels in TrainData (especially important if models are kept)
if (is.null(factors)==F && is.null(x)==T && no.tests==F) {
for (i in 1:length(factors)) {
if (identical(levels(droplevels(TrainData[,factors[i]])), levels(droplevels(TestData[,factors[i]])))==F) {
cat(paste("\n", "WARNING: differences in factor levels between calibration and evaluation data (variable ", factors[i], ")", "\n", sep = ""))
cat(paste("Same levels set for both data sets to avoid problems with some evaluations", "\n", sep = ""))
cat(paste("However, some predictions may still fail", "\n", sep = ""))
uniquelevels <- unique(c(levels(droplevels(TrainData[,factors[i]])), levels(droplevels(TestData[,factors[i]]))))
levels(TrainData[,factors[i]]) <- uniquelevels
levels(TestData[,factors[i]]) <- uniquelevels
}
}
}
if(is.null(factors)==F && is.null(x)==F) {
if(models.keep==T) {
categories <- as.list(factors)
names(categories) <- factors
}
for (i in 1:length(factors)) {
all.categories <- raster::freq(x[[which(names(x) == factors[i])]])[,1]
all.categories <- all.categories[is.na(all.categories) == F]
all.categories <- as.character(all.categories)
if(models.keep==T) {
categories[[as.name(factors[i])]] <- all.categories
}
train.categories <- levels(droplevels(TrainData[,factors[i]]))
new.categories <- c(all.categories[is.na(match(all.categories, train.categories))])
if (length(new.categories) > 0) {
cat(paste("\n", "The following levels were initially not captured by TrainData for factor '", factors[i], "'\n", sep = ""))
print(new.categories)
if (is.null(x)==F && is.null(p)==F && is.null(a)==F) {
# step 1: search if suitable presence locations in TestData
if (no.tests == F){
for (j in 1:length(new.categories)) {
if (any(TestData[TestData[,"pb"]==1, factors[i]] == new.categories[j])) {
cat(paste("Warning: level '", new.categories[j], "' available for presence location in Test Data", "\n", sep = ""))
}
}
}
# step 2: stratified background sample
strat1 <- raster::sampleStratified(x[[which(names(x) == factors[i])]], size=1, exp=1, na.rm=TRUE, xy=FALSE)
strat1 <- strat1[which(strat1[,2] %in% new.categories), 1]
xy1 <- raster::xyFromCell(x[[which(names(x) == factors[i])]], cell=strat1, spatial=FALSE)
a <- rbind(a, xy1)
if (length(names(x)) == 1) {
xdouble <- raster::stack(x, x)
TrainData <- dismo::prepareData(x=xdouble, p, b=a, factors=factors, xy=FALSE)
TrainData <- TrainData[, -3]
names(TrainData)[2] <- names(x)
}else{
TrainData <- dismo::prepareData(x, p, b=a, factors=factors, xy=FALSE)
}
TrainValid <- complete.cases(TrainData[TrainData[,"pb"]==0,])
a <- a[TrainValid,]
if (length(names(x)) == 1) {
xdouble <- raster::stack(x, x)
TrainData <- dismo::prepareData(x=xdouble, p, b=a, factors=factors, xy=FALSE)
TrainData <- TrainData[, -3]
names(TrainData)[2] <- names(x)
}else{
TrainData <- dismo::prepareData(x, p, b=a, factors=factors, xy=FALSE)
}
train.categories <- levels(droplevels(TrainData[,factors[i]]))
new.categories <- all.categories[is.na(match(all.categories, train.categories))]
if (length(new.categories) == 0) {
cat(paste("All levels have now been included as background data for TrainData for factor '", factors[i], "'\n", sep = ""))
}else{
cat(paste("The following levels were not captured by TrainData for factor '", factors[i], "'\n", sep = ""))
print(new.categories)
cat(paste("\n", "Attempt to include these levels was complicated by missing values in other layers", "\n", sep = ""))
}
}
}
# step 3: also modify test data, but only if no circular neighbourhood
if (no.tests == F) {
test.categories <- levels(droplevels(TestData[,factors[i]]))
new.categories <- c(all.categories[is.na(match(all.categories, test.categories))])
if (length(new.categories)>0 && SSB.reduce==F) {
cat(paste("\n", "The following levels were initially not captured by TestData for factor '", factors[i], "'\n", sep = ""))
print(new.categories)
if (is.null(x)==F && is.null(pt)==F && is.null(at)==F) {
strat1 <- raster::sampleStratified(x[[which(names(x) == factors[i])]], size=1, exp=1, na.rm=TRUE, xy=FALSE)
strat1 <- strat1[which(strat1[,2] %in% new.categories), 1]
xy1 <- raster::xyFromCell(x[[which(names(x) == factors[i])]], cell=strat1, spatial=FALSE)
at <- rbind(at, xy1)
if (length(names(x)) == 1) {
xdouble <- raster::stack(x, x)
TestData <- dismo::prepareData(x=xdouble, p=pt, b=at, factors=factors, xy=FALSE)
TestData <- TestData[, -3]
names(TestData)[2] <- names(x)
}else{
TestData <- dismo::prepareData(x, p=pt, b=at, factors=factors, xy=FALSE)
}
TestValid <- complete.cases(TestData[TestData[,"pb"]==0,])
at <- at[TestValid,]
if (length(names(x)) == 1) {
xdouble <- raster::stack(x, x)
TestData <- dismo::prepareData(x=xdouble, p=pt, b=at, factors=factors, xy=FALSE)
TestData <- TestData[, -3]
names(TestData)[2] <- names(x)
}else{
TestData <- dismo::prepareData(x, p=pt, b=at, factors=factors, xy=FALSE)
}
test.categories <- levels(droplevels(TestData[,factors[i]]))
new.categories <- all.categories[is.na(match(all.categories, test.categories))]
if (length(new.categories) == 0) {
cat(paste("All levels have now been included as background data for TestData for factor '", factors[i], "'\n", sep = ""))
}else{
cat(paste("The following levels were not captured by TestData for factor '", factors[i], "'\n", sep = ""))
print(new.categories)
cat(paste("\n", "Attempt to include these levels was complicated by missing values in other layers", "\n", sep = ""))
}
}
}
if (length(new.categories)>0 && SSB.reduce==T) {
cat(paste("\n", "Note that the following levels were not captured in the circular neighbourhood by TestData for factor '", factors[i], "'\n", sep = ""))
print(new.categories)
}
}
}
}
#
factlevels <- NULL
if (is.null(factors) == F) {
factlevels <- list()
for (i in 1:length(factors)) {
factlevels[[i]] <- levels(TrainData[, factors[i]])
names(factlevels)[i] <- factors[i]
}
}
#
if (sum(ws, na.rm=T) > 0) {
cat(paste("\n", "Summary of Training data set used for calibrations (rows: ", nrow(TrainData), ")\n", sep = ""))
print(summary(TrainData))
cat(paste("\n"))
utils::str(TrainData)
if (no.tests == F) {
cat(paste("\n", "Summary of Testing data set used for evaluations (rows: ", nrow(TestData), ")\n", sep = ""))
print(summary(TestData))
cat(paste("\n"))
utils::str(TestData)
}else{
cat(paste("\n", "(no tests with separate data set)", "\n", sep = ""))
}
}
#
dummy.vars.noDOMAIN <- NULL
#
if(models.keep == T) {
models <- list(MAXENT=NULL, MAXNET=NULL, MAXLIKE=NULL, GBM=NULL, GBMSTEP=NULL, RF=NULL, CF=NULL, GLM=NULL,
GLMSTEP=NULL, GAM=NULL, GAMSTEP=NULL, MGCV=NULL, MGCVFIX=NULL, EARTH=NULL, RPART=NULL,
NNET=NULL, FDA=NULL, SVM=NULL, SVME=NULL, GLMNET=NULL, BIOCLIM.O=NULL, BIOCLIM=NULL, DOMAIN=NULL, MAHAL=NULL, MAHAL01=NULL,
formulae=NULL, output.weights=NULL,
TrainData=NULL, TestData=NULL, p=NULL, a=NULL, pt=NULL, at=NULL,
MAXENT.a=NULL,
vars=NULL, factors=NULL, categories=NULL, dummy.vars=NULL, dummy.vars.noDOMAIN=NULL,
thresholds=NULL, threshold.method=NULL, threshold.sensitivity=NULL, threshold.PresenceAbsence=NULL,
species.name=NULL)
models$TrainData <- TrainData
models$p <- p
models$a <- a
models$MAXENT.a <- MAXENT.a
if (no.tests==F) {models$pt <- pt}
if (no.tests==F) {models$at <- at}
vars <- names(TrainData)
vars <- vars[which(vars!="pb")]
models$vars <- vars
models$factors <- factors
if(is.null(factors)==F) {models$categories <- categories}
models$dummy.vars <- dummy.vars
models$threshold.method <- threshold.method
models$threshold.sensitivity <- threshold.sensitivity
if (threshold.PresenceAbsence == T) {
models$threshold.PresenceAbsence <- TRUE
}else{
models$threshold.PresenceAbsence <- FALSE
}
models$species.name <- species.name
if (no.tests == F) {models$TestData <- TestData}
}else{
models <- NULL
}
#
if(ws["GBMSTEP"] > 0) {
TrainData.orig <- TrainData
assign("TrainData.orig", TrainData.orig, envir=.BiodiversityR)
}
#
# Data frames for distance-based methods, maxnet, SVME and GLMNET
TrainData.vars <- TrainData[, names(TrainData) != "pb", drop=F]
assign("TrainData.vars", TrainData.vars, envir=.BiodiversityR)
TrainData.pa <- as.vector(TrainData[ , "pb"])
assign("TrainData.pa", TrainData.pa, envir=.BiodiversityR)
TrainData.numvars <- TrainData.vars
if (is.null(factors) == F) {
for (i in 1:length(factors)) {
TrainData.numvars <- TrainData.numvars[, which(names(TrainData.numvars) != factors[i]), drop=F]
}
}
num.vars <- names(TrainData.numvars)
assign("TrainData.numvars", TrainData.numvars, envir=.BiodiversityR)
TrainData.pres <- TrainData[TrainData[,"pb"]==1,,drop=F]
TrainData.pres <- TrainData.pres[,names(TrainData.pres) != "pb", drop=F]
assign("TrainData.pres", TrainData.pres, envir=.BiodiversityR)
var.names <- names(TrainData.pres)
if (no.tests == F) {
TestData.vars <- TestData[,names(TestData) != "pb", drop=F]
assign("TestData.vars", TestData.vars, envir=.BiodiversityR)
TestData.numvars <- TestData.vars
if (is.null(factors) == F) {
for (i in 1:length(factors)) {
TestData.numvars <- TestData.numvars[, which(names(TestData.numvars) != factors[i]), drop=F]
}
}
assign("TestData.numvars", TestData.numvars, envir=.BiodiversityR)
}
#
# make MAXENT.TrainData
if (ws["MAXENT"] > 0 || ws["MAXLIKE"] > 0) {
# make these work when x is not provided (14-JUNE-2020)
if (is.null(x) == FALSE) {
if (is.null(MAXENT.a)==T) {
MAXENT.a <- dismo::randomPoints(x[[1]], n=MAXENT.an, p=p, excludep=T)
}
colnames(MAXENT.a) <- colnames(p)
if (length(names(x)) == 1) {
xdouble <- raster::stack(x, x)
MAXENT.TrainData <- dismo::prepareData(x, p, b=MAXENT.a, factors=factors, xy=FALSE)
MAXENT.TrainData <- MAXENT.TrainData[, -3]
names(MAXENT.TrainData)[2] <- names(x)
}else{
MAXENT.TrainData <- dismo::prepareData(x, p, b=MAXENT.a, factors=factors, xy=FALSE)
}
if(any(is.na(MAXENT.TrainData[MAXENT.TrainData[,"pb"]==0,]))) {
cat(paste("\n", "WARNING: background locations with missing data removed from MAXENT calibration data","\n\n",sep = ""))
}
TrainValid <- complete.cases(MAXENT.TrainData[MAXENT.TrainData[,"pb"]==0,])
MAXENT.a <- MAXENT.a[TrainValid,]
if (length(names(x)) == 1) {
xdouble <- raster::stack(x, x)
MAXENT.TrainData <- dismo::prepareData(x=xdouble, p, b=MAXENT.a, factors=factors, xy=FALSE)
MAXENT.TrainData <- MAXENT.TrainData[, -3]
names(MAXENT.TrainData)[2] <- names(x)
}else{
MAXENT.TrainData <- dismo::prepareData(x, p, b=MAXENT.a, factors=factors, xy=FALSE)
}
MAXENT.pa <- as.vector(MAXENT.TrainData[ , "pb"])
MAXENT.TrainData <- MAXENT.TrainData[, which(names(MAXENT.TrainData) != "pb"), drop=F]
}else{
MAXENT.pa <- as.vector(TrainData[ , "pb"])
MAXENT.TrainData <- TrainData[, which(names(TrainData) != "pb"), drop=F]
}
cat(paste("\n", "Summary of Training data set used for calibration of MAXENT or MAXLIKE model (rows: ", nrow(MAXENT.TrainData), ", presence locations: ", sum(MAXENT.pa), ")\n", sep = ""))
print(summary(MAXENT.TrainData))
assign("MAXENT.TrainData", MAXENT.TrainData, envir=.BiodiversityR)
assign("MAXENT.pa", MAXENT.pa, envir=.BiodiversityR)
}
#
eval.table <- as.numeric(rep(NA, 8))
names(eval.table) <- c("AUC", "TSS", "SEDI", "TSS.fixed", "SEDI.fixed", "FNR.fixed", "MCR.fixed", "AUCdiff")
eval.table <- as.data.frame(t(eval.table))[FALSE, ]
#
newVIF <- NULL
if (VIF == T && length(names(TrainData.vars)) > 1 ) {
# if (! require(car)) {stop("Please install the car package")}
# only use background data
TrainDataNum <- TrainData[TrainData[,"pb"]==0,]
LM.formula <- ensemble.formulae(TrainData, factors=factors)$RF.formula
# create possible response
TrainDataNum[,"pb"] <- mean(as.numeric(TrainDataNum[,2]))
vifresult <- NULL
if (CATCH.OFF == F) {
tryCatch(vifresult <- car::vif(lm(formula=LM.formula, data=TrainDataNum)),
error= function(err) {print(paste("\n", "WARNING: VIF (package: car) evaluation failed", "\n", sep=""))},
silent=F)
}else{
vifresult <- car::vif(lm(formula=LM.formula, data=TrainDataNum))
}
if (is.null(vifresult) == F) {
cat(paste("\n", "Variance inflation (package: car)", "\n", sep = ""))
print(vifresult)
}
cat(paste("\n", "VIF directly calculated from linear model with focal numeric variable as response", "\n", sep = ""))
TrainDataNum <- TrainDataNum[, names(TrainDataNum)!="pb"]
varnames <- names(TrainDataNum)
newVIF <- numeric(length=length(varnames))
newVIF[] <- NA
names(newVIF) <- varnames
for (i in 1:length(varnames)) {
response.name <- varnames[i]
explan.names <- varnames[-i]
if ((response.name %in% factors) == F) {
LM.formula <- as.formula(paste(response.name, "~", paste(explan.names, collapse="+"), sep=""))
newVIF[i] <- summary(lm(formula=LM.formula, data=TrainDataNum))$r.squared
}
}
newVIF <- 1/(1-newVIF)
newVIF <- sort(newVIF, decreasing=T, na.last=T)
print(newVIF)
}
if (COR == T) {
TrainDataNum <- TrainData[, names(TrainData) != "pb", drop=F]
if(is.null(factors)) {
for (i in 1:length(factors)) {
TrainDataNum <- TrainDataNum[, names(TrainDataNum) != factors[i], drop=F]
}
}
corresult <- cor(TrainDataNum)
corresult <- round(100*corresult, digits=2)
cat(paste("\n", "Correlation between numeric variables (as percentage)", "\n", sep = ""))
print(corresult)
}
#
modelresults <- data.frame(array(dim=c(nrow(TrainData), length(ws)+1), 0))
names(modelresults) <- c("MAXENT", "MAXNET", "MAXLIKE", "GBM", "GBMSTEP", "RF", "CF", "GLM", "GLMSTEP", "GAM", "GAMSTEP", "MGCV",
"MGCVFIX", "EARTH", "RPART", "NNET", "FDA", "SVM", "SVME", "GLMNET",
"BIOCLIM.O", "BIOCLIM", "DOMAIN", "MAHAL", "MAHAL01", "ENSEMBLE")
TrainData <- cbind(TrainData, modelresults)
assign("TrainData", TrainData, envir=.BiodiversityR)
if (no.tests == F) {
modelresults <- data.frame(array(dim=c(nrow(TestData), length(ws)+1), 0))
names(modelresults) <- c("MAXENT", "MAXNET", "MAXLIKE", "GBM", "GBMSTEP", "RF", "CF", "GLM", "GLMSTEP", "GAM", "GAMSTEP", "MGCV",
"MGCVFIX", "EARTH", "RPART", "NNET", "FDA", "SVM", "SVME", "GLMNET",
"BIOCLIM.O", "BIOCLIM", "DOMAIN", "MAHAL", "MAHAL01", "ENSEMBLE")
TestData <- cbind(TestData, modelresults)
assign("TestData", TestData, envir=.BiodiversityR)
}
weights <- as.numeric(array(dim=length(ws), 0))
names(weights) <- c("MAXENT", "MAXNET", "MAXLIKE", "GBM", "GBMSTEP", "RF", "CF", "GLM", "GLMSTEP", "GAM", "GAMSTEP", "MGCV", "MGCVFIX",
"EARTH", "RPART", "NNET", "FDA", "SVM", "SVME", "GLMNET", "BIOCLIM.O", "BIOCLIM", "DOMAIN", "MAHAL", "MAHAL01")
#
if(evaluations.keep==T) {
evaluations <- list(MAXENT.C=NULL, MAXENT.T=NULL, MAXNET.C=NULL, MAXNET.T=NULL, MAXLIKE.C=NULL, MAXLIKE.T=NULL,
GBM.trees=NULL, GBM.C=NULL, GBM.T=NULL, GBMSTEP.trees=NULL, GBMSTEP.C=NULL, GBMSTEP.T=NULL,
RF.C=NULL, RF.T=NULL, CF.C=NULL, CF.T=NULL, GLM.C=NULL, GLM.T=NULL, GLMS.C=NULL, GLMS.T=NULL,
GAM.C=NULL, GAM.T=NULL, GAMS.C=NULL, GAMS.T=NULL, MGCV.C=NULL, MGCV.T=NULL, MGCVF.C=NULL, MGCVF.T=NULL,
EARTH.C=NULL, EARTH.T=NULL, RPART.C=NULL, RPART.T=NULL,
NNET.C=NULL, NNET.T=NULL, FDA.C=NULL, FDA.T=NULL, SVM.C=NULL, SVM.T=NULL, SVME.C=NULL, SVME.T=NULL, GLMNET.C=NULL, GLMNET.T=NULL,
BIOCLIM.O.C=NULL, BIOCLIM.O.T=NULL, BIOCLIM.C=NULL, BIOCLIM.T=NULL, DOMAIN.C=NULL, DOMAIN.T=NULL, MAHAL.C=NULL, MAHAL.T=NULL, MAHAL01.C=NULL, MAHAL01.T=NULL,
ENSEMBLE.C=NULL, ENSEMBLE.T=NULL, STRATEGY.weights=NULL,
TrainData=NULL, TestData=NULL, MAXENT.a=NULL,
factors=NULL, dummy.vars=NULL)
evaluations$factors <- factors
evaluations$dummy.vars <- dummy.vars
}else{
evaluations <- NULL
}
#
Yweights1 <- Yweights
if (Yweights == "BIOMOD") {
#have equal weight of presence vs. background
Yweights1 <- numeric(length = nrow(TrainData))
pressum <- sum(TrainData[,"pb"]==1)
abssum <- sum(TrainData[,"pb"]==0)
Yweights1[which(TrainData[, "pb"] == 1)] <- 1
Yweights1[which(TrainData[, "pb"] == 0)] <- pressum/abssum
}
if (Yweights == "equal") {
Yweights1 <- numeric(length = nrow(TrainData))
Yweights1[] <- 1
}
assign("Yweights1", Yweights1, envir=.BiodiversityR)
#
# prepare for calculation of deviance
obs1 <- TrainData[, "pb"]
#
# count models
mc <- 0
#
# Different modelling algorithms
#
if(ws["MAXENT"] > 0 && length(names(MAXENT.TrainData)) == 0) {
cat(paste("\n", "WARNING: no explanatory variables available", sep = ""))
cat(paste("\n", "MAXENT model can therefore not be calibrated", "\n", sep = ""))
ws["MAXENT"] <- weights["MAXENT"] <- 0
}
if (ws["MAXENT"] > 0) {
mc <- mc+1
cat(paste("\n", mc, ". Maximum entropy algorithm (package: dismo)\n", sep=""))
eval1 <- eval2 <- results <- results2 <- TrainPres <- TrainAbs <- TestPres <- TestAbs <- NULL
# Put the file 'maxent.jar' in the 'java' folder of dismo
# the file 'maxent.jar' can be obtained from from http://www.cs.princeton.edu/~schapire/maxent/.
jar <- paste(system.file(package="dismo"), "/java/maxent.jar", sep='')
if(is.null(MAXENT.OLD) == T) {
if (CATCH.OFF == F) {
tryCatch(results <- dismo::maxent(x=MAXENT.TrainData, p=MAXENT.pa, factors=factors, path=MAXENT.path),
error= function(err) {print(paste("MAXENT calibration failed"))},
silent=F)
}else{
results <- dismo::maxent(x=MAXENT.TrainData, p=MAXENT.pa, factors=factors, path=MAXENT.path)
}
}else{
results <- MAXENT.OLD
}
if (is.null(results) == F) {
cat(paste("\n", "Evaluation with calibration data of other algorithms","\n\n", sep = ""))
TrainData[,"MAXENT"] <- dismo::predict(object=results, x=TrainData.vars)
if (PROBIT == T) {
if(is.null(MAXENT.PROBIT.OLD) == T) {
probit.formula <- as.formula(paste("pb ~ MAXENT"))
results2 <- glm(probit.formula, family=binomial(link="probit"), data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit))
}else{
results2 <- MAXENT.PROBIT.OLD
}
cat(paste("(Predictions transformed with probit link)","\n", sep = ""))
TrainData[,"MAXENT.step1"] <- TrainData[,"MAXENT"]
TrainData[,"MAXENT"] <- predict.glm(object=results2, newdata=TrainData, type="response")
}else{
TrainData[which(TrainData[, "MAXENT"] < 0), "MAXENT"] <- 0
TrainData[which(TrainData[, "MAXENT"] > 1), "MAXENT"] <- 1
}
pred1 <- TrainData[, "MAXENT"]
pred1[pred1 < 0.0000000001] <- 0.0000000001
pred1[pred1 > 0.9999999999] <- 0.9999999999
cat(paste("Residual deviance (dismo package): ", dismo::calc.deviance(obs=obs1, pred=pred1, calc.mean=F), "\n\n", sep = ""))
TrainPres <- TrainData[TrainData[,"pb"]==1,"MAXENT"]
TrainAbs <- TrainData[TrainData[,"pb"]==0,"MAXENT"]
eval1 <- dismo::evaluate(p=TrainPres, a=TrainAbs)
print(eval1)
# thresholds["MAXENT"] <- threshold(eval1, sensitivity=threshold.sensitivity)[[threshold.method]]
thresholds["MAXENT"] <- ensemble.threshold(eval1, threshold.method=threshold.method, threshold.sensitivity=threshold.sensitivity, threshold.PresenceAbsence=threshold.PresenceAbsence, Pres=TrainPres, Abs=TrainAbs)
cat(paste("\n", "Threshold (method: ", threshold.method, ") \n", sep = ""))
print(as.numeric(thresholds["MAXENT"]))
weights["MAXENT"] <- max(c(eval1@auc, 0), na.rm=T)
AUC.calibration["MAXENT"] <- max(c(eval1@auc, 0), na.rm=T)
if (no.tests == F) {
cat(paste("\n", "Evaluation with test data","\n\n", sep = ""))
TestData[,"MAXENT"] <- dismo::predict(object=results, x=TestData.vars)
if (PROBIT == T) {
TestData[,"MAXENT.step1"] <- TestData[,"MAXENT"]
TestData[,"MAXENT"] <- predict.glm(object=results2, newdata=TestData, type="response")
}else{
TestData[which(TestData[, "MAXENT"] < 0), "MAXENT"] <- 0
TestData[which(TestData[, "MAXENT"] > 1), "MAXENT"] <- 1
}
TestPres <- TestData[TestData[,"pb"]==1,"MAXENT"]
TestAbs <- TestData[TestData[,"pb"]==0,"MAXENT"]
eval2 <- dismo::evaluate(p=TestPres, a=TestAbs)
if (is.null(eval2) == F) {
print(eval2)
weights["MAXENT"] <- max(c(eval2@auc, 0), na.rm=T)
AUC.testing["MAXENT"] <- max(c(eval2@auc, 0), na.rm=T)
cat(paste("\n", "Results with ensemble.evaluate", "\n\n", sep = ""))
eval3 <- ensemble.evaluate(eval=eval2, eval.train=eval1)
print(eval3)
eval.table <- data.frame(rbind(eval.table, t(eval3)))
rownames(eval.table)[nrow(eval.table)] <- "MAXENT"
}else{
cat(paste("\n", "WARNING: MAXENT evaluation failed","\n\n",sep = ""))
ws["MAXENT"] <- 0
weights["MAXENT"] <- 0
TrainData[,"MAXENT"] <- 0
TestData[,"MAXENT"] <- 0
}
}
if(evaluations.keep ==T) {
evaluations$MAXENT.C <- eval1
evaluations$MAXENT.T <- eval2
}
if (models.keep==T) {
models$MAXENT <- results
models$MAXENT.PROBIT <- results2
}
}else{
cat(paste("\n", "WARNING: MAXENT calibration failed", "\n", "\n"))
ws["MAXENT"] <- weights["MAXENT"] <- 0
TrainData[,"MAXENT"] <- 0
if (no.tests == F) {TestData[,"MAXENT"] <- 0}
}
}
if(ws["MAXNET"] > 0 && length(names(TrainData.vars)) == 0) {
cat(paste("\n", "WARNING: no explanatory variables available", sep = ""))
cat(paste("\n", "MAXNET model can therefore not be calibrated", "\n", sep = ""))
ws["MAXNET"] <- weights["MAXNET"] <- 0
}
if (ws["MAXNET"] > 0) {
mc <- mc+1
cat(paste("\n", mc, ". Maximum entropy algorithm (package: maxnet)\n", sep=""))
eval1 <- eval2 <- results <- results2 <- TrainPres <- TrainAbs <- TestPres <- TestAbs <- NULL
if(is.null(MAXNET.OLD) == T) {
if (CATCH.OFF == F) {
tryCatch(results <- maxnet::maxnet(p=TrainData.pa, data=TrainData.vars, f=maxnet::maxnet.formula(p=TrainData.pa, data=TrainData.vars, classes=MAXNET.classes)),
error= function(err) {print(paste("MAXNET calibration failed"))},
silent=F)
}else{
results <- maxnet::maxnet(p=TrainData.pa, data=TrainData.vars, f=maxnet::maxnet.formula(p=TrainData.pa, data=TrainData.vars, classes=MAXNET.classes))
}
}else{
results <- MAXNET.OLD
}
if (is.null(results) == F) {
TrainData[,"MAXNET"] <- predict.maxnet2(object=results, newdata=TrainData.vars, clamp=MAXNET.clamp, type=MAXNET.type)
if (PROBIT == T) {
if(is.null(MAXNET.PROBIT.OLD) == T) {
probit.formula <- as.formula(paste("pb ~ MAXNET"))
results2 <- glm(probit.formula, family=binomial(link="probit"), data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit))
}else{
results2 <- MAXNET.PROBIT.OLD
}
cat(paste("(Predictions transformed with probit link)","\n", sep = ""))
TrainData[,"MAXNET.step1"] <- TrainData[,"MAXNET"]
TrainData[,"MAXNET"] <- predict.glm(object=results2, newdata=TrainData, type="response")
}else{
TrainData[which(TrainData[, "MAXNET"] < 0), "MAXNET"] <- 0
TrainData[which(TrainData[, "MAXNET"] > 1), "MAXNET"] <- 1
}
pred1 <- TrainData[, "MAXNET"]
pred1[pred1 < 0.0000000001] <- 0.0000000001
pred1[pred1 > 0.9999999999] <- 0.9999999999
cat(paste("Residual deviance (dismo package): ", dismo::calc.deviance(obs=obs1, pred=pred1, calc.mean=F), "\n\n", sep = ""))
TrainPres <- TrainData[TrainData[,"pb"]==1,"MAXNET"]
TrainAbs <- TrainData[TrainData[,"pb"]==0,"MAXNET"]
eval1 <- dismo::evaluate(p=TrainPres, a=TrainAbs)
print(eval1)
# thresholds["MAXNET"] <- threshold(eval1, sensitivity=threshold.sensitivity)[[threshold.method]]
thresholds["MAXNET"] <- ensemble.threshold(eval1, threshold.method=threshold.method, threshold.sensitivity=threshold.sensitivity, threshold.PresenceAbsence=threshold.PresenceAbsence, Pres=TrainPres, Abs=TrainAbs)
cat(paste("\n", "Threshold (method: ", threshold.method, ") \n", sep = ""))
print(as.numeric(thresholds["MAXNET"]))
weights["MAXNET"] <- max(c(eval1@auc, 0), na.rm=T)
AUC.calibration["MAXNET"] <- max(c(eval1@auc, 0), na.rm=T)
if (no.tests == F) {
cat(paste("\n", "Evaluation with test data","\n\n", sep = ""))
TestData[,"MAXNET"] <- predict.maxnet2(object=results, newdata=TestData.vars, clamp=MAXNET.clamp, type=MAXNET.type)
if (PROBIT == T) {
TestData[,"MAXNET.step1"] <- TestData[,"MAXNET"]
TestData[,"MAXNET"] <- predict.glm(object=results2, newdata=TestData, type="response")
}else{
TestData[which(TestData[, "MAXNET"] < 0), "MAXNET"] <- 0
TestData[which(TestData[, "MAXNET"] > 1), "MAXNET"] <- 1
}
TestPres <- TestData[TestData[,"pb"]==1,"MAXNET"]
TestAbs <- TestData[TestData[,"pb"]==0,"MAXNET"]
eval2 <- dismo::evaluate(p=TestPres, a=TestAbs)
if (is.null(eval2) == F) {
print(eval2)
weights["MAXNET"] <- max(c(eval2@auc, 0), na.rm=T)
AUC.testing["MAXNET"] <- max(c(eval2@auc, 0), na.rm=T)
cat(paste("\n", "Results with ensemble.evaluate", "\n\n", sep = ""))
eval3 <- ensemble.evaluate(eval=eval2, eval.train=eval1)
print(eval3)
eval.table <- data.frame(rbind(eval.table, t(eval3)))
rownames(eval.table)[nrow(eval.table)] <- "MAXNET"
}else{
cat(paste("\n", "WARNING: MAXNET evaluation failed","\n\n",sep = ""))
ws["MAXNET"] <- 0
weights["MAXNET"] <- 0
TrainData[,"MAXNET"] <- 0
TestData[,"MAXNET"] <- 0
}
}
if(evaluations.keep ==T) {
evaluations$MAXNET.C <- eval1
evaluations$MAXNET.T <- eval2
}
if (models.keep==T) {
models$MAXNET <- results
models$MAXNET.PROBIT <- results2
models$formulae$MAXNET.clamp <- MAXNET.clamp
models$formulae$MAXNET.type <- MAXNET.type
}
}else{
cat(paste("\n", "WARNING: MAXNET calibration failed", "\n", "\n"))
ws["MAXNET"] <- weights["MAXNET"] <- 0
TrainData[,"MAXNET"] <- 0
if (no.tests == F) {TestData[,"MAXNET"] <- 0}
}
}
if(ws["MAXLIKE"] > 0 && length(names(MAXENT.TrainData)) == 0) {
cat(paste("\n", "WARNING: no explanatory variables available", sep = ""))
cat(paste("\n", "MAXLIKE model can therefore not be calibrated", "\n", sep = ""))
ws["MAXLIKE"] <- weights["MAXLIKE"] <- 0
}
if (ws["MAXLIKE"] > 0) {
mc <- mc+1
cat(paste("\n", mc, ". Maxlike algorithm (package: maxlike)\n", sep=""))
eval1 <- eval2 <- results <- results2 <- TrainPres <- TrainAbs <- TestPres <- TestAbs <- NULL
MAXLIKE.x <- MAXENT.TrainData[MAXENT.pa == 1, ]
MAXLIKE.z <- MAXENT.TrainData[MAXENT.pa == 0, ]
if(is.null(MAXLIKE.OLD) == T) {
if (CATCH.OFF == F) {
tryCatch(results <- maxlike::maxlike(formula=MAXLIKE.formula, rasters=NULL, points=NULL, x=MAXLIKE.x, z=MAXLIKE.z,
method=MAXLIKE.method, control=list(maxit=maxit)),
error= function(err) {print(paste("MAXLIKE calibration failed"))},
silent=F)
}else{
results <- maxlike::maxlike(formula=MAXLIKE.formula, rasters=NULL, points=NULL, x=MAXLIKE.x, z=MAXLIKE.z,
method=MAXLIKE.method, control=list(maxit=maxit))
}
}else{
results <- MAXLIKE.OLD
}
if (is.null(results) == F) {
cat(paste("\n", "Evaluation with calibration data of other algorithms", "\n\n", sep = ""))
TrainData[, "MAXLIKE"] <- predict(results, newdata=TrainData.numvars)
if (PROBIT == T) {
if(is.null(MAXLIKE.PROBIT.OLD) == T) {
probit.formula <- as.formula(paste("pb ~ MAXLIKE"))
results2 <- glm(probit.formula, family=binomial(link="probit"), data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit))
}else{
results2 <- MAXLIKE.PROBIT.OLD
}
cat(paste("(Predictions transformed with probit link)","\n", sep = ""))
TrainData[,"MAXLIKE.step1"] <- TrainData[,"MAXLIKE"]
TrainData[,"MAXLIKE"] <- predict.glm(object=results2, newdata=TrainData, type="response")
}else{
TrainData[which(TrainData[, "MAXLIKE"] < 0), "MAXLIKE"] <- 0
TrainData[which(TrainData[, "MAXLIKE"] > 1), "MAXLIKE"] <- 1
}
pred1 <- TrainData[, "MAXLIKE"]
pred1[pred1 < 0.0000000001] <- 0.0000000001
pred1[pred1 > 0.9999999999] <- 0.9999999999
cat(paste("Residual deviance (dismo package): ", dismo::calc.deviance(obs=obs1, pred=pred1, calc.mean=F), "\n\n", sep = ""))
TrainPres <- TrainData[TrainData[,"pb"]==1, "MAXLIKE"]
TrainAbs <- TrainData[TrainData[,"pb"]==0, "MAXLIKE"]
eval1 <- dismo::evaluate(p=TrainPres, a=TrainAbs)
print(eval1)
# thresholds["MAXLIKE"] <- threshold(eval1, sensitivity=threshold.sensitivity)[[threshold.method]]
thresholds["MAXLIKE"] <- ensemble.threshold(eval1, threshold.method=threshold.method, threshold.sensitivity=threshold.sensitivity, threshold.PresenceAbsence=threshold.PresenceAbsence, Pres=TrainPres, Abs=TrainAbs)
cat(paste("\n", "Threshold (method: ", threshold.method, ") \n", sep = ""))
print(as.numeric(thresholds["MAXLIKE"]))
weights["MAXLIKE"] <- max(c(eval1@auc, 0), na.rm=T)
AUC.calibration["MAXLIKE"] <- max(c(eval1@auc, 0), na.rm=T)
if (no.tests == F) {
cat(paste("\n", "Evaluation with test data","\n\n", sep = ""))
TestData[, "MAXLIKE"] <- predict(results, newdata=TestData.numvars)
if (PROBIT == T) {
TestData[,"MAXLIKE.step1"] <- TestData[,"MAXLIKE"]
TestData[,"MAXLIKE"] <- predict.glm(object=results2, newdata=TestData, type="response")
}else{
TestData[which(TestData[, "MAXLIKE"] < 0), "MAXLIKE"] <- 0
TestData[which(TestData[, "MAXLIKE"] > 1), "MAXLIKE"] <- 1
}
TestPres <- TestData[TestData[,"pb"]==1, "MAXLIKE"]
TestAbs <- TestData[TestData[,"pb"]==0, "MAXLIKE"]
eval2 <- dismo::evaluate(p=TestPres, a=TestAbs)
if (is.null(eval2) == F) {
print(eval2)
weights["MAXLIKE"] <- max(c(eval2@auc, 0), na.rm=T)
AUC.testing["MAXLIKE"] <- max(c(eval2@auc, 0), na.rm=T)
cat(paste("\n", "Results with ensemble.evaluate", "\n\n", sep = ""))
eval3 <- ensemble.evaluate(eval=eval2, eval.train=eval1)
print(eval3)
eval.table <- data.frame(rbind(eval.table, t(eval3)))
rownames(eval.table)[nrow(eval.table)] <- "MAXLIKE"
}else{
cat(paste("\n", "WARNING: MAXLIKE evaluation failed","\n\n",sep = ""))
ws["MAXLIKE"] <- 0
weights["MAXLIKE"] <- 0
TrainData[,"MAXLIKE"] <- 0
TestData[,"MAXLIKE"] <- 0
}
}
if(evaluations.keep ==T) {
evaluations$MAXLIKE.C <- eval1
evaluations$MAXLIKE.T <- eval2
}
if (models.keep==T) {
models$MAXLIKE <- results
models$MAXLIKE.PROBIT <- results2
models$formulae$MAXLIKE.formula <- MAXLIKE.formula
}
}else{
cat(paste("\n", "WARNING: MAXLIKE calibration failed", "\n", "\n"))
ws["MAXLIKE"] <- weights["MAXLIKE"] <- 0
TrainData[,"MAXLIKE"] <- 0
if (no.tests == F) {TestData[,"MAXLIKE"] <- 0}
}
}
if(ws["GBM"] > 0 && length(names(TrainData.vars)) == 0) {
cat(paste("\n", "WARNING: no explanatory variables available", sep = ""))
cat(paste("\n", "GBM model can therefore not be calibrated", "\n", sep = ""))
ws["GBM"] <- weights["GBM"] <- 0
}
if (ws["GBM"] > 0) {
mc <- mc+1
cat(paste("\n", mc, ". Generalized boosted regression modeling (package: gbm) \n", sep=""))
eval1 <- eval2 <- results <- results2 <- TrainPres <- TrainAbs <- TestPres <- TestAbs <- NULL
if(is.null(GBM.OLD) == T) {
if (CATCH.OFF == F) {
tryCatch(results <- gbm::gbm(formula=GBM.formula, data=TrainData, weights=Yweights1, distribution="bernoulli",
interaction.depth=7, shrinkage=0.001, bag.fraction=0.5, train.fraction=1,
n.trees=GBM.n.trees, verbose=F, cv.folds=5),
error= function(err) {print(paste("GBM calibration failed"))},
silent=F)
}else{
results <- gbm::gbm(formula=GBM.formula, data=TrainData, weights=Yweights1, distribution="bernoulli",
interaction.depth=7, shrinkage=0.001, bag.fraction=0.5, train.fraction=1,
n.trees=GBM.n.trees, verbose=F, cv.folds=5)
}
}else{
results <- GBM.OLD
}
if (is.null(results) == F) {
cat(paste("\n", "Evaluation with calibration data","\n",sep = ""))
TrainData[,"GBM"] <- gbm::predict.gbm(object=results, newdata=TrainData.vars, n.trees=GBM.n.trees, type="response")
if (PROBIT == T) {
if(is.null(GBM.PROBIT.OLD) == T) {
probit.formula <- as.formula(paste("pb ~ GBM"))
results2 <- glm(probit.formula, family=binomial(link="probit"), data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit))
}else{
results2 <- GBM.PROBIT.OLD
}
cat(paste("(Predictions transformed with probit link)", "\n\n", sep = ""))
TrainData[,"GBM.step1"] <- TrainData[,"GBM"]
TrainData[,"GBM"] <- predict.glm(object=results2, newdata=TrainData, type="response")
}else{
TrainData[which(TrainData[, "GBM"] < 0), "GBM"] <- 0
TrainData[which(TrainData[, "GBM"] > 1), "GBM"] <- 1
}
pred1 <- TrainData[, "GBM"]
pred1[pred1 < 0.0000000001] <- 0.0000000001
pred1[pred1 > 0.9999999999] <- 0.9999999999
cat(paste("Residual deviance (dismo package): ", dismo::calc.deviance(obs=obs1, pred=pred1, calc.mean=F), "\n\n", sep = ""))
TrainPres <- TrainData[TrainData[,"pb"]==1,"GBM"]
TrainAbs <- TrainData[TrainData[,"pb"]==0,"GBM"]
eval1 <- dismo::evaluate(p=TrainPres, a=TrainAbs)
print(eval1)
thresholds["GBM"] <- ensemble.threshold(eval1, threshold.method=threshold.method, threshold.sensitivity=threshold.sensitivity, threshold.PresenceAbsence=threshold.PresenceAbsence, Pres=TrainPres, Abs=TrainAbs)
cat(paste("\n", "Threshold (method: ", threshold.method, ") \n", sep = ""))
print(as.numeric(thresholds["GBM"]))
weights["GBM"] <- max(c(eval1@auc, 0), na.rm=T)
AUC.calibration["GBM"] <- max(c(eval1@auc, 0), na.rm=T)
if (no.tests == F) {
cat(paste("\n", "Evaluation with test data","\n\n",sep = ""))
TestData[,"GBM"] <- gbm::predict.gbm(object=results, newdata=TestData.vars, n.trees=GBM.n.trees, type="response")
if (PROBIT == T) {
TestData[,"GBM.step1"] <- TestData[,"GBM"]
TestData[,"GBM"] <- predict.glm(object=results2, newdata=TestData, type="response")
}else{
TestData[which(TestData[, "GBM"] < 0), "GBM"] <- 0
TestData[which(TestData[, "GBM"] > 1), "GBM"] <- 1
}
TestPres <- TestData[TestData[,"pb"]==1,"GBM"]
TestAbs <- TestData[TestData[,"pb"]==0,"GBM"]
eval2 <- dismo::evaluate(p=TestPres, a=TestAbs)
if (is.null(eval2) == F) {
print(eval2)
weights["GBM"] <- max(c(eval2@auc, 0), na.rm=T)
AUC.testing["GBM"] <- max(c(eval2@auc, 0), na.rm=T)
cat(paste("\n", "Results with ensemble.evaluate", "\n\n", sep = ""))
eval3 <- ensemble.evaluate(eval=eval2, eval.train=eval1)
print(eval3)
eval.table <- data.frame(rbind(eval.table, t(eval3)))
rownames(eval.table)[nrow(eval.table)] <- "GBM"
}else{
cat(paste("\n", "WARNING: GBM evaluation failed","\n\n",sep = ""))
ws["GBM"] <- 0
weights["GBM"] <- 0
TrainData[,"GBM"] <- 0
TestData[,"GBM"] <- 0
}
}
if(evaluations.keep ==T) {
evaluations$GBM.trees <- results$n.trees
evaluations$GBM.C <- eval1
evaluations$GBM.T <- eval2
}
if (models.keep==T) {
models$GBM <- results
models$GBM.PROBIT <- results2
models$formulae$GBM.formula <- GBM.formula
}
}else{
cat(paste("\n", "WARNING: GBM calibration failed", "\n", "\n"))
ws["GBM"] <- weights["GBM"] <- 0
TrainData[,"GBM"] <- 0
if (no.tests == F) {TestData[,"GBM"] <- 0}
}
}
if(ws["GBMSTEP"] > 0 && length(names(TrainData.orig)) == 0) {
cat(paste("\n", "WARNING: no explanatory variables available", sep = ""))
cat(paste("\n", "GBMSTEP model can therefore not be calibrated", "\n", sep = ""))
ws["GBMSTEP"] <- weights["GBMSTEP"]<- 0
}
if (ws["GBMSTEP"] > 0) {
mc <- mc+1
cat(paste("\n", mc, ". gbm step algorithm (package: dismo)\n", sep=""))
# require(gbm, quietly=T)
eval1 <- eval2 <- results <- results2 <- TrainPres <- TrainAbs <- TestPres <- TestAbs <- NULL
if(is.null(GBMSTEP.OLD) == T) {
if (CATCH.OFF == F) {
tryCatch(results <- dismo::gbm.step(data=TrainData.orig, gbm.y=1, gbm.x=GBMSTEP.gbm.x, family="bernoulli",
site.weights=Yweights1, tree.complexity=GBMSTEP.tree.complexity, learning.rate = GBMSTEP.learning.rate,
bag.fraction=GBMSTEP.bag.fraction, step.size=GBMSTEP.step.size, verbose=F, silent=F, plot.main=F),
error= function(err) {print(paste("stepwise GBM calibration failed"))},
silent=F)
}else{
results <- dismo::gbm.step(data=TrainData.orig, gbm.y=1, gbm.x=GBMSTEP.gbm.x, family="bernoulli",
site.weights=Yweights1, tree.complexity=GBMSTEP.tree.complexity, learning.rate = GBMSTEP.learning.rate,
bag.fraction=GBMSTEP.bag.fraction, step.size=GBMSTEP.step.size, verbose=F, silent=F, plot.main=F)
}
}else{
results <- GBMSTEP.OLD
}
if (is.null(results) == F) {
cat(paste("\n", "stepwise GBM trees (target > 1000)", "\n", sep = ""))
print(results$n.trees)
cat(paste("\n", "Evaluation with calibration data","\n",sep = ""))
TrainData[,"GBMSTEP"] <- gbm::predict.gbm(object=results, newdata=TrainData.vars, n.trees=GBM.n.trees, type="response")
if (PROBIT == T) {
if(is.null(GBMSTEP.PROBIT.OLD) == T) {
probit.formula <- as.formula(paste("pb ~ GBMSTEP"))
results2 <- glm(probit.formula, family=binomial(link="probit"), data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit))
}else{
results2 <- GBMSTEP.PROBIT.OLD
}
cat(paste("(Predictions transformed with probit link)","\n\n", sep = ""))
TrainData[,"GBMSTEP.step1"] <- TrainData[,"GBMSTEP"]
TrainData[,"GBMSTEP"] <- predict.glm(object=results2, newdata=TrainData, type="response")
}else{
TrainData[which(TrainData[, "GBMSTEP"] < 0), "GBMSTEP"] <- 0
TrainData[which(TrainData[, "GBMSTEP"] > 1), "GBMSTEP"] <- 1
}
pred1 <- TrainData[, "GBMSTEP"]
pred1[pred1 < 0.0000000001] <- 0.0000000001
pred1[pred1 > 0.9999999999] <- 0.9999999999
cat(paste("Residual deviance (dismo package): ", dismo::calc.deviance(obs=obs1, pred=pred1, calc.mean=F), "\n\n", sep = ""))
TrainPres <- TrainData[TrainData[,"pb"]==1,"GBMSTEP"]
TrainAbs <- TrainData[TrainData[,"pb"]==0,"GBMSTEP"]
eval1 <- dismo::evaluate(p=TrainPres, a=TrainAbs)
print(eval1)
thresholds["GBMSTEP"] <- ensemble.threshold(eval1, threshold.method=threshold.method, threshold.sensitivity=threshold.sensitivity, threshold.PresenceAbsence=threshold.PresenceAbsence, Pres=TrainPres, Abs=TrainAbs)
cat(paste("\n", "Threshold (method: ", threshold.method, ") \n", sep = ""))
print(as.numeric(thresholds["GBMSTEP"]))
weights["GBMSTEP"] <- max(c(eval1@auc, 0), na.rm=T)
AUC.calibration["GBMSTEP"] <- max(c(eval1@auc, 0), na.rm=T)
if (no.tests == F) {
cat(paste("\n", "Evaluation with test data","\n\n",sep = ""))
TestData[,"GBMSTEP"] <- gbm::predict.gbm(object=results, newdata=TestData.vars, n.trees=GBM.n.trees, type="response")
if (PROBIT == T) {
TestData[,"GBMSTEP.step1"] <- TestData[,"GBMSTEP"]
TestData[,"GBMSTEP"] <- predict.glm(object=results2, newdata=TestData, type="response")
}else{
TestData[which(TestData[, "GBMSTEP"] < 0), "GBMSTEP"] <- 0
TestData[which(TestData[, "GBMSTEP"] > 1), "GBMSTEP"] <- 1
}
TestPres <- TestData[TestData[,"pb"]==1,"GBMSTEP"]
TestAbs <- TestData[TestData[,"pb"]==0,"GBMSTEP"]
eval2 <- dismo::evaluate(p=TestPres, a=TestAbs)
if (is.null(eval2) == F) {
print(eval2)
weights["GBMSTEP"] <- max(c(eval2@auc, 0), na.rm=T)
AUC.testing["GBMSTEP"] <- max(c(eval2@auc, 0), na.rm=T)
cat(paste("\n", "Results with ensemble.evaluate", "\n\n", sep = ""))
eval3 <- ensemble.evaluate(eval=eval2, eval.train=eval1)
print(eval3)
eval.table <- data.frame(rbind(eval.table, t(eval3)))
rownames(eval.table)[nrow(eval.table)] <- "GBMSTEP"
}else{
cat(paste("\n", "WARNING: stepwise GBM evaluation failed","\n\n",sep = ""))
ws["GBMSTEP"] <- 0
weights["GBMSTEP"] <- 0
TrainData[,"GBMSTEP"] <- 0
TestData[,"GBMSTEP"] <- 0
}
}
if(evaluations.keep ==T) {
evaluations$GBMSTEP.trees <- results$n.trees
evaluations$GBMSTEP.C <- eval1
evaluations$GBMSTEP.T <- eval2
}
if (models.keep==T) {
models$GBMSTEP <- results
models$GBMSTEP.PROBIT <- results2
}
}else{
cat(paste("\n", "WARNING: stepwise GBM calibration failed", "\n", "\n"))
ws["GBMSTEP"] <- weights["GBMSTEP"] <- 0
TrainData[,"GBMSTEP"] <- 0
if (no.tests == F) {TestData[,"GBMSTEP"] <- 0}
}
}
if(ws["RF"] > 0 && length(names(TrainData.vars)) == 0) {
cat(paste("\n", "WARNING: no explanatory variables available", sep = ""))
cat(paste("\n", "RF model can therefore not be calibrated", "\n", sep = ""))
ws["RF"] <- weights["RF"] <- 0
}
if (ws["RF"] > 0) {
mc <- mc+1
cat(paste("\n", mc, ". Random forest algorithm (package: randomForest)\n", sep=""))
eval1 <- eval2 <- results <- results2 <- TrainPres <- TrainAbs <- TestPres <- TestAbs <- NULL
if(is.null(RF.OLD) == T) {
if (CATCH.OFF == F) {
tryCatch(results <- randomForest::randomForest(formula=RF.formula, ntree=RF.ntree, mtry=RF.mtry, data=TrainData, na.action=na.omit),
error= function(err) {print(paste("random forest calibration failed"))},
silent=F)
}else{
results <- randomForest::randomForest(formula=RF.formula, ntree=RF.ntree, mtry=RF.mtry, data=TrainData, na.action=na.omit)
}
}else{
results <- RF.OLD
}
if (is.null(results) == F) {
cat(paste("\n", "Evaluation with calibration data","\n",sep = ""))
TrainData[,"RF"] <- predict.RF(object=results, newdata=TrainData.vars)
if (PROBIT == T) {
if(is.null(RF.PROBIT.OLD) == T) {
probit.formula <- as.formula(paste("pb ~ RF"))
results2 <- glm(probit.formula, family=binomial(link="probit"), data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit))
}else{
results2 <- RF.PROBIT.OLD
}
cat(paste("(Predictions transformed with probit link)","\n\n", sep = ""))
TrainData[,"RF.step1"] <- TrainData[,"RF"]
TrainData[,"RF"] <- predict.glm(object=results2, newdata=TrainData, type="response")
}else{
TrainData[which(TrainData[, "RF"] < 0), "RF"] <- 0
TrainData[which(TrainData[, "RF"] > 1), "RF"] <- 1
}
pred1 <- TrainData[, "RF"]
pred1[pred1 < 0.0000000001] <- 0.0000000001
pred1[pred1 > 0.9999999999] <- 0.9999999999
cat(paste("Residual deviance (dismo package): ", dismo::calc.deviance(obs=obs1, pred=pred1, calc.mean=F), "\n\n", sep = ""))
TrainPres <- TrainData[TrainData[,"pb"]==1,"RF"]
TrainAbs <- TrainData[TrainData[,"pb"]==0,"RF"]
eval1 <- dismo::evaluate(p=TrainPres, a=TrainAbs)
print(eval1)
thresholds["RF"] <- ensemble.threshold(eval1, threshold.method=threshold.method, threshold.sensitivity=threshold.sensitivity, threshold.PresenceAbsence=threshold.PresenceAbsence, Pres=TrainPres, Abs=TrainAbs)
cat(paste("\n", "Threshold (method: ", threshold.method, ") \n", sep = ""))
print(as.numeric(thresholds["RF"]))
weights["RF"] <- max(c(eval1@auc, 0), na.rm=T)
AUC.calibration["RF"] <- max(c(eval1@auc, 0), na.rm=T)
if (no.tests == F) {
cat(paste("\n", "Evaluation with test data","\n\n",sep = ""))
TestData[,"RF"] <- predict.RF(object=results, newdata=TestData.vars)
if (PROBIT == T) {
TestData[,"RF.step1"] <- TestData[,"RF"]
TestData[,"RF"] <- predict.glm(object=results2, newdata=TestData, type="response")
}else{
TestData[which(TestData[, "RF"] < 0), "RF"] <- 0
TestData[which(TestData[, "RF"] > 1), "RF"] <- 1
}
TestPres <- TestData[TestData[,"pb"]==1,"RF"]
TestAbs <- TestData[TestData[,"pb"]==0,"RF"]
eval2 <- dismo::evaluate(p=TestPres, a=TestAbs)
if (is.null(eval2) == F) {
print(eval2)
weights["RF"] <- max(c(eval2@auc, 0), na.rm=T)
AUC.testing["RF"] <- max(c(eval2@auc, 0), na.rm=T)
cat(paste("\n", "Results with ensemble.evaluate", "\n\n", sep = ""))
eval3 <- ensemble.evaluate(eval=eval2, eval.train=eval1)
print(eval3)
eval.table <- data.frame(rbind(eval.table, t(eval3)))
rownames(eval.table)[nrow(eval.table)] <- "RF"
}else{
cat(paste("\n", "WARNING: random forest evaluation failed","\n\n",sep = ""))
ws["RF"] <- 0
weights["RF"] <- 0
TrainData[,"RF"] <- 0
TestData[,"RF"] <- 0
}
}
if(evaluations.keep ==T) {
evaluations$RF.C <- eval1
evaluations$RF.T <- eval2
}
if (models.keep==T) {
models$RF <- results
models$RF.PROBIT <- results2
models$formulae$RF.formula <- RF.formula
}
}else{
cat(paste("\n", "WARNING: random forest calibration failed", "\n", "\n"))
ws["RF"] <- weights["RF"] <- 0
TrainData[,"RF"] <- 0
if (no.tests == F) {TestData[,"RF"] <- 0}
}
}
if(ws["CF"] > 0 && length(names(TrainData.vars)) == 0) {
cat(paste("\n", "WARNING: no explanatory variables available", sep = ""))
cat(paste("\n", "CF model can therefore not be calibrated", "\n", sep = ""))
ws["CF"] <- weights["CF"] <- 0
}
if (ws["CF"] > 0) {
mc <- mc+1
cat(paste("\n", mc, ". Random forest algorithm (package: party)\n", sep=""))
eval1 <- eval2 <- results <- results2 <- TrainPres <- TrainAbs <- TestPres <- TestAbs <- NULL
if(is.null(CF.OLD) == T) {
if (CATCH.OFF == F) {
tryCatch(results <- party::cforest(formula=CF.formula, data=TrainData, weights=Yweights1, control=party::cforest_unbiased(ntree=CF.ntree, mtry=CF.mtry)),
error= function(err) {print(paste("random forest calibration failed"))},
silent=F)
}else{
results <- party::cforest(formula=CF.formula, data=TrainData, weights=Yweights1, control=party::cforest_unbiased(ntree=CF.ntree, mtry=CF.mtry))
}
}else{
results <- CF.OLD
}
if (is.null(results) == F) {
cat(paste("\n", "Evaluation with calibration data","\n",sep = ""))
TrainData[,"CF"] <- predict.CF(object=results, newdata=TrainData.vars)
if (PROBIT == T) {
if(is.null(CF.PROBIT.OLD) == T) {
probit.formula <- as.formula(paste("pb ~ CF"))
results2 <- glm(probit.formula, family=binomial(link="probit"), data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit))
}else{
results2 <- CF.PROBIT.OLD
}
cat(paste("(Predictions transformed with probit link)","\n\n", sep = ""))
TrainData[,"CF.step1"] <- TrainData[,"CF"]
TrainData[,"CF"] <- predict.glm(object=results2, newdata=TrainData, type="response")
}else{
TrainData[which(TrainData[, "CF"] < 0), "CF"] <- 0
TrainData[which(TrainData[, "CF"] > 1), "CF"] <- 1
}
pred1 <- TrainData[, "CF"]
pred1[pred1 < 0.0000000001] <- 0.0000000001
pred1[pred1 > 0.9999999999] <- 0.9999999999
cat(paste("Residual deviance (dismo package): ", dismo::calc.deviance(obs=obs1, pred=pred1, calc.mean=F), "\n\n", sep = ""))
TrainPres <- TrainData[TrainData[,"pb"]==1,"CF"]
TrainAbs <- TrainData[TrainData[,"pb"]==0,"CF"]
eval1 <- dismo::evaluate(p=TrainPres, a=TrainAbs)
print(eval1)
thresholds["CF"] <- ensemble.threshold(eval1, threshold.method=threshold.method, threshold.sensitivity=threshold.sensitivity, threshold.PresenceAbsence=threshold.PresenceAbsence, Pres=TrainPres, Abs=TrainAbs)
cat(paste("\n", "Threshold (method: ", threshold.method, ") \n", sep = ""))
print(as.numeric(thresholds["CF"]))
weights["CF"] <- max(c(eval1@auc, 0), na.rm=T)
AUC.calibration["CF"] <- max(c(eval1@auc, 0), na.rm=T)
if (no.tests == F) {
cat(paste("\n", "Evaluation with test data","\n\n",sep = ""))
TestData[,"CF"] <- predict.CF(object=results, newdata=TestData.vars)
if (PROBIT == T) {
TestData[,"CF.step1"] <- TestData[,"CF"]
TestData[,"CF"] <- predict.glm(object=results2, newdata=TestData, type="response")
}else{
TestData[which(TestData[, "CF"] < 0), "CF"] <- 0
TestData[which(TestData[, "CF"] > 1), "CF"] <- 1
}
TestPres <- TestData[TestData[,"pb"]==1,"CF"]
TestAbs <- TestData[TestData[,"pb"]==0,"CF"]
eval2 <- dismo::evaluate(p=TestPres, a=TestAbs)
if (is.null(eval2) == F) {
print(eval2)
weights["CF"] <- max(c(eval2@auc, 0), na.rm=T)
AUC.testing["CF"] <- max(c(eval2@auc, 0), na.rm=T)
cat(paste("\n", "Results with ensemble.evaluate", "\n\n", sep = ""))
eval3 <- ensemble.evaluate(eval=eval2, eval.train=eval1)
print(eval3)
eval.table <- data.frame(rbind(eval.table, t(eval3)))
rownames(eval.table)[nrow(eval.table)] <- "CF"
}else{
cat(paste("\n", "WARNING: random forest evaluation failed","\n\n",sep = ""))
ws["CF"] <- 0
weights["CF"] <- 0
TrainData[,"CF"] <- 0
TestData[,"CF"] <- 0
}
}
if(evaluations.keep ==T) {
evaluations$CF.C <- eval1
evaluations$CF.T <- eval2
}
if (models.keep==T) {
models$CF <- results
models$CF.PROBIT <- results2
models$formulae$CF.formula <- CF.formula
}
}else{
cat(paste("\n", "WARNING: random forest calibration failed", "\n", "\n"))
ws["CF"] <- weights["CF"] <- 0
TrainData[,"CF"] <- 0
if (no.tests == F) {TestData[,"CF"] <- 0}
}
}
if(ws["GLM"] > 0 && length(names(TrainData.vars)) == 0) {
cat(paste("\n", "WARNING: no explanatory variables available", sep = ""))
cat(paste("\n", "GLM model can therefore not be calibrated", "\n", sep = ""))
ws["GLM"] <- weights["GLM"] <- 0
}
if (ws["GLM"] > 0) {
mc <- mc+1
cat(paste("\n", mc, ". Generalized Linear Model \n", sep=""))
eval1 <- eval2 <- results <- results2 <- TrainPres <- TrainAbs <- TestPres <- TestAbs <- NULL
if(is.null(GLM.OLD) == T) {
if (CATCH.OFF == F) {
tryCatch(results <- glm(formula=GLM.formula, family=GLM.family, data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit)),
error= function(err) {print(paste("GLM calibration failed"))},
silent=F)
}else{
results <- glm(formula=GLM.formula, family=GLM.family, data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit))
}
}else{
results <- GLM.OLD
}
if (is.null(results) == F) {
cat(paste("\n", "Evaluation with calibration data","\n",sep = ""))
TrainData[,"GLM"] <- predict.glm(object=results, newdata=TrainData.vars, type="response")
if (PROBIT == T) {
if(is.null(GLM.PROBIT.OLD) == T) {
probit.formula <- as.formula(paste("pb ~ GLM"))
results2 <- glm(probit.formula, family=binomial(link="probit"), data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit))
}else{
results2 <- GLM.PROBIT.OLD
}
cat(paste("(Predictions transformed with probit link)","\n\n", sep = ""))
TrainData[,"GLM.step1"] <- TrainData[,"GLM"]
TrainData[,"GLM"] <- predict.glm(object=results2, newdata=TrainData, type="response")
}else{
TrainData[which(TrainData[, "GLM"] < 0), "GLM"] <- 0
TrainData[which(TrainData[, "GLM"] > 1), "GLM"] <- 1
}
pred1 <- TrainData[, "GLM"]
pred1[pred1 < 0.0000000001] <- 0.0000000001
pred1[pred1 > 0.9999999999] <- 0.9999999999
cat(paste("Residual deviance (dismo package): ", dismo::calc.deviance(obs=obs1, pred=pred1, calc.mean=F), "\n\n", sep = ""))
TrainPres <- TrainData[TrainData[,"pb"]==1,"GLM"]
TrainAbs <- TrainData[TrainData[,"pb"]==0,"GLM"]
eval1 <- dismo::evaluate(p=TrainPres, a=TrainAbs)
print(eval1)
thresholds["GLM"] <- ensemble.threshold(eval1, threshold.method=threshold.method, threshold.sensitivity=threshold.sensitivity, threshold.PresenceAbsence=threshold.PresenceAbsence, Pres=TrainPres, Abs=TrainAbs)
cat(paste("\n", "Threshold (method: ", threshold.method, ") \n", sep = ""))
print(as.numeric(thresholds["GLM"]))
weights["GLM"] <- max(c(eval1@auc, 0), na.rm=T)
AUC.calibration["GLM"] <- max(c(eval1@auc, 0), na.rm=T)
if (no.tests == F) {
cat(paste("\n", "Evaluation with test data","\n\n",sep = ""))
TestData[,"GLM"] <- predict.glm(object=results, newdata=TestData.vars, type="response")
if (PROBIT == T) {
TestData[,"GLM.step1"] <- TestData[,"GLM"]
TestData[,"GLM"] <- predict.glm(object=results2, newdata=TestData, type="response")
}else{
TestData[which(TestData[, "GLM"] < 0), "GLM"] <- 0
TestData[which(TestData[, "GLM"] > 1), "GLM"] <- 1
}
TestPres <- TestData[TestData[,"pb"]==1,"GLM"]
TestAbs <- TestData[TestData[,"pb"]==0,"GLM"]
eval2 <- dismo::evaluate(p=TestPres, a=TestAbs)
if (is.null(eval2) == F) {
print(eval2)
weights["GLM"] <- max(c(eval2@auc, 0), na.rm=T)
AUC.testing["GLM"] <- max(c(eval2@auc, 0), na.rm=T)
cat(paste("\n", "Results with ensemble.evaluate", "\n\n", sep = ""))
eval3 <- ensemble.evaluate(eval=eval2, eval.train=eval1)
print(eval3)
eval.table <- data.frame(rbind(eval.table, t(eval3)))
rownames(eval.table)[nrow(eval.table)] <- "GLM"
}else{
cat(paste("\n", "WARNING: GLM evaluation failed","\n\n",sep = ""))
ws["GLM"] <- 0
weights["GLM"] <- 0
TrainData[,"GLM"] <- 0
TestData[,"GLM"] <- 0
}
}
if(evaluations.keep ==T) {
evaluations$GLM.C <- eval1
evaluations$GLM.T <- eval2
}
if (models.keep==T) {
models$GLM <- results
models$GLM.PROBIT <- results2
models$formulae$GLM.formula <- GLM.formula
}
}else{
cat(paste("\n", "WARNING: GLM calibration failed", "\n", "\n"))
ws["GLM"] <- weights["GLM"] <- 0
TrainData[,"GLM"] <- 0
if (no.tests == F) {TestData[,"GLM"] <- 0}
}
}
if(ws["GLMSTEP"] > 0 && length(names(TrainData.vars)) == 0) {
cat(paste("\n", "WARNING: no explanatory variables available", sep = ""))
cat(paste("\n", "GLMSTEP model can therefore not be calibrated", "\n", sep = ""))
ws["GLMSTEP"] <- weights["GLMSTEP"] <-0
}
if (ws["GLMSTEP"] > 0) {
mc <- mc+1
cat(paste("\n", mc, ". Stepwise Generalized Linear Model \n", sep=""))
eval1 <- eval2 <- results <- results2 <- TrainPres <- TrainAbs <- TestPres <- TestAbs <- NULL
if(is.null(GLMSTEP.OLD) == T) {
if (CATCH.OFF == F) {
tryCatch(results <- glm(formula=STEP.formula, family=GLM.family, data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit)),
error= function(err) {print(paste("first step of stepwise GLM calibration failed"))},
silent=F)
}else{
results <- glm(formula=STEP.formula, family=GLM.family, data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit))
}
if (CATCH.OFF == F) {
tryCatch(results2 <- MASS::stepAIC(results, scope=GLMSTEP.scope, direction="both", trace=F, steps=GLMSTEP.steps, k=GLMSTEP.k),
error= function(err) {print(paste("stepwise GLM calibration failed"))},
silent=F)
}else{
results2 <- MASS::stepAIC(results, scope=GLMSTEP.scope, direction="both", trace=F, steps=GLMSTEP.steps, k=GLMSTEP.k)
}
}else{
results2 <- GLMSTEP.OLD
}
if (is.null(results2) == F) {
results <- results2
results2 <- NULL
cat(paste("\n", "stepwise GLM formula","\n\n",sep = ""))
print(formula(results))
cat(paste("\n", "Evaluation with calibration data","\n",sep = ""))
TrainData[,"GLMSTEP"] <- predict.glm(object=results, newdata=TrainData.vars, type="response")
if (PROBIT == T) {
if(is.null(GLMSTEP.PROBIT.OLD) == T) {
probit.formula <- as.formula(paste("pb ~ GLMSTEP"))
results2 <- glm(probit.formula, family=binomial(link="probit"), data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit))
}else{
results2 <- GLMSTEP.PROBIT.OLD
}
cat(paste("(Predictions transformed with probit link)","\n\n", sep = ""))
TrainData[,"GLMSTEP.step1"] <- TrainData[,"GLMSTEP"]
TrainData[,"GLMSTEP"] <- predict.glm(object=results2, newdata=TrainData, type="response")
}else{
TrainData[which(TrainData[, "GLMSTEP"] < 0), "GLMSTEP"] <- 0
TrainData[which(TrainData[, "GLMSTEP"] > 1), "GLMSTEP"] <- 1
}
pred1 <- TrainData[, "GLMSTEP"]
pred1[pred1 < 0.0000000001] <- 0.0000000001
pred1[pred1 > 0.9999999999] <- 0.9999999999
cat(paste("Residual deviance (dismo package): ", dismo::calc.deviance(obs=obs1, pred=pred1, calc.mean=F), "\n\n", sep = ""))
TrainPres <- TrainData[TrainData[,"pb"]==1,"GLMSTEP"]
TrainAbs <- TrainData[TrainData[,"pb"]==0,"GLMSTEP"]
eval1 <- dismo::evaluate(p=TrainPres, a=TrainAbs)
print(eval1)
thresholds["GLMSTEP"] <- ensemble.threshold(eval1, threshold.method=threshold.method, threshold.sensitivity=threshold.sensitivity, threshold.PresenceAbsence=threshold.PresenceAbsence, Pres=TrainPres, Abs=TrainAbs)
cat(paste("\n", "Threshold (method: ", threshold.method, ") \n", sep = ""))
print(as.numeric(thresholds["GLMSTEP"]))
weights["GLMSTEP"] <- max(c(eval1@auc, 0), na.rm=T)
AUC.calibration["GLMSTEP"] <- max(c(eval1@auc, 0), na.rm=T)
if (no.tests == F) {
cat(paste("\n", "Evaluation with test data","\n\n",sep = ""))
TestData[,"GLMSTEP"] <- predict.glm(object=results, newdata=TestData.vars, type="response")
if (PROBIT == T) {
TestData[,"GLMSTEP.step1"] <- TestData[,"GLMSTEP"]
TestData[,"GLMSTEP"] <- predict.glm(object=results2, newdata=TestData, type="response")
}else{
TestData[which(TestData[, "GLMSTEP"] < 0), "GLMSTEP"] <- 0
TestData[which(TestData[, "GLMSTEP"] > 1), "GLMSTEP"] <- 1
}
TestPres <- TestData[TestData[,"pb"]==1,"GLMSTEP"]
TestAbs <- TestData[TestData[,"pb"]==0,"GLMSTEP"]
eval2 <- dismo::evaluate(p=TestPres, a=TestAbs)
if (is.null(eval2) == F) {
print(eval2)
weights["GLMSTEP"] <- max(c(eval2@auc, 0), na.rm=T)
AUC.testing["GLMSTEP"] <- max(c(eval2@auc, 0), na.rm=T)
cat(paste("\n", "Results with ensemble.evaluate", "\n\n", sep = ""))
eval3 <- ensemble.evaluate(eval=eval2, eval.train=eval1)
print(eval3)
eval.table <- data.frame(rbind(eval.table, t(eval3)))
rownames(eval.table)[nrow(eval.table)] <- "GLMSTEP"
}else{
cat(paste("\n", "WARNING: stepwise GLM evaluation failed","\n\n",sep = ""))
ws["GLMSTEP"] <- 0
weights["GLMSTEP"] <- 0
TrainData[,"GLMSTEP"] <- 0
TestData[,"GLMSTEP"] <- 0
}
}
if(evaluations.keep ==T) {
evaluations$GLMS.C <- eval1
evaluations$GLMS.T <- eval2
}
if (models.keep==T) {
models$GLMSTEP <- results
models$GLMSTEP.PROBIT <- results2
models$formulae$STEP.formula <- STEP.formula
models$formulae$GLMSTEP.scope <- GLMSTEP.scope
}
}else{
cat(paste("\n", "WARNING: stepwise GLM calibration failed", "\n", "\n"))
ws["GLMSTEP"] <- weights["GLMSTEP"] <- 0
TrainData[,"GLMSTEP"] <- 0
if (no.tests == F) {TestData[,"GLMSTEP"] <- 0}
}
}
if(ws["GAM"] > 0 && length(names(TrainData.vars)) == 0) {
cat(paste("\n", "WARNING: no explanatory variables available", sep = ""))
cat(paste("\n", "GAM model can therefore not be calibrated", "\n", sep = ""))
ws["GAM"] <- weights["GAM"] <- 0
}
if (ws["GAM"] > 0) {
mc <- mc+1
cat(paste("\n", mc, ". Generalized Additive Model (package: gam)\n", sep=""))
eval1 <- eval2 <- results <- results2 <- TrainPres <- TrainAbs <- TestPres <- TestAbs <- NULL
if(is.null(GAM.OLD) == T) {
if (CATCH.OFF == F) {
tryCatch(results <- gam::gam(formula=GAM.formula, family=GAM.family, data=TrainData, weights=Yweights1, control=gam::gam.control(maxit=maxit, bf.maxit=50)),
error= function(err) {print(paste("GAM (package: gam) calibration failed"))},
silent=F)
}else{
results <- gam::gam(formula=GAM.formula, family=GAM.family, data=TrainData, weights=Yweights1, control=gam::gam.control(maxit=maxit, bf.maxit=50))
}
}else{
results <- GAM.OLD
}
if (is.null(results) == F) {
cat(paste("\n", "Evaluation with calibration data","\n",sep = ""))
TrainData[,"GAM"] <- gam::predict.Gam(object=results, newdata=TrainData.vars, type="response")
if (PROBIT == T) {
if(is.null(GAM.PROBIT.OLD) == T) {
probit.formula <- as.formula(paste("pb ~ GAM"))
results2 <- glm(probit.formula, family=binomial(link="probit"), data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit))
}else{
results2 <- GAM.PROBIT.OLD
}
cat(paste("(Predictions transformed with probit link)","\n\n", sep = ""))
TrainData[,"GAM.step1"] <- TrainData[,"GAM"]
TrainData[,"GAM"] <- predict.glm(object=results2, newdata=TrainData, type="response")
}else{
TrainData[which(TrainData[, "GAM"] < 0), "GAM"] <- 0
TrainData[which(TrainData[, "GAM"] > 1), "GAM"] <- 1
}
pred1 <- TrainData[, "GAM"]
pred1[pred1 < 0.0000000001] <- 0.0000000001
pred1[pred1 > 0.9999999999] <- 0.9999999999
cat(paste("Residual deviance (dismo package): ", dismo::calc.deviance(obs=obs1, pred=pred1, calc.mean=F), "\n\n", sep = ""))
TrainPres <- TrainData[TrainData[,"pb"]==1,"GAM"]
TrainAbs <- TrainData[TrainData[,"pb"]==0,"GAM"]
eval1 <- dismo::evaluate(p=TrainPres, a=TrainAbs)
print(eval1)
thresholds["GAM"] <- ensemble.threshold(eval1, threshold.method=threshold.method, threshold.sensitivity=threshold.sensitivity, threshold.PresenceAbsence=threshold.PresenceAbsence, Pres=TrainPres, Abs=TrainAbs)
cat(paste("\n", "Threshold (method: ", threshold.method, ") \n", sep = ""))
print(as.numeric(thresholds["GAM"]))
weights["GAM"] <- max(c(eval1@auc, 0), na.rm=T)
AUC.calibration["GAM"] <- max(c(eval1@auc, 0), na.rm=T)
if (no.tests == F) {
cat(paste("\n", "Evaluation with test data","\n\n",sep = ""))
TestData[,"GAM"] <- gam::predict.Gam(object=results, newdata=TestData.vars, type="response")
if (PROBIT == T) {
TestData[,"GAM.step1"] <- TestData[,"GAM"]
TestData[,"GAM"] <- predict.glm(object=results2, newdata=TestData, type="response")
}else{
TestData[which(TestData[, "GAM"] < 0), "GAM"] <- 0
TestData[which(TestData[, "GAM"] > 1), "GAM"] <- 1
}
TestPres <- TestData[TestData[,"pb"]==1,"GAM"]
TestAbs <- TestData[TestData[,"pb"]==0,"GAM"]
eval2 <- dismo::evaluate(p=TestPres, a=TestAbs)
if (is.null(eval2) == F) {
print(eval2)
weights["GAM"] <- max(c(eval2@auc, 0), na.rm=T)
AUC.testing["GAM"] <- max(c(eval2@auc, 0), na.rm=T)
cat(paste("\n", "Results with ensemble.evaluate", "\n\n", sep = ""))
eval3 <- ensemble.evaluate(eval=eval2, eval.train=eval1)
print(eval3)
eval.table <- data.frame(rbind(eval.table, t(eval3)))
rownames(eval.table)[nrow(eval.table)] <- "GAM"
}else{
cat(paste("\n", "WARNING: GAM (package: gam) evaluation failed","\n\n",sep = ""))
ws["GAM"] <- 0
weights["GAM"] <- 0
TrainData[,"GAM"] <- 0
TestData[,"GAM"] <- 0
}
}
if(evaluations.keep ==T) {
evaluations$GAM.C <- eval1
evaluations$GAM.T <- eval2
}
if (models.keep==T) {
models$GAM <- results
models$GAM.PROBIT <- results2
models$formulae$GAM.formula <- GAM.formula
}
}else{
cat(paste("\n", "WARNING: GAM (package: gam) calibration failed", "\n", "\n"))
ws["GAM"] <- weights["GAM"] <- 0
TrainData[,"GAM"] <- 0
if (no.tests == F) {TestData[,"GAM"] <- 0}
}
}
if(ws["GAMSTEP"] > 0 && length(names(TrainData.vars)) == 0) {
cat(paste("\n", "WARNING: no explanatory variables available", sep = ""))
cat(paste("\n", "GAMSTEP model can therefore not be calibrated", "\n", sep = ""))
ws["GAMSTEP"] <- weights["GAMSTEP"] <- 0
}
if (ws["GAMSTEP"] > 0) {
mc <- mc+1
cat(paste("\n", mc, ". Stepwise Generalized Additive Model (package: gam)\n", sep=""))
eval1 <- eval2 <- results <- results2 <- TrainPres <- TrainAbs <- TestPres <- TestAbs <- NULL
if(is.null(GAMSTEP.OLD) == T) {
if (CATCH.OFF == F) {
tryCatch(results <- gam::gam(formula=STEP.formula, family=GAM.family, data=TrainData, weights=Yweights1, control=gam::gam.control(maxit=maxit, bf.maxit=50)),
error= function(err) {print(paste("first step of stepwise GAM (package: gam) calibration failed"))},
silent=F)
}else{
results <- gam::gam(formula=STEP.formula, family=GAM.family, data=TrainData, weights=Yweights1, control=gam::gam.control(maxit=maxit, bf.maxit=50))
}
assign("TrainData", TrainData, pos=GAMSTEP.pos)
assign("GAM.family", GAM.family, pos=GAMSTEP.pos)
assign("maxit", maxit, pos=GAMSTEP.pos)
if (CATCH.OFF == F) {
tryCatch(results2 <- gam::step.Gam(results, scope=GAMSTEP.scope, direction="both", trace=F, steps=GAMSTEP.steps),
error= function(err) {print(paste("stepwise GAM (package: gam) calibration failed"))},
silent=F)
}else{
results2 <- gam::step.Gam(results, scope=GAMSTEP.scope, direction="both", trace=F, steps=GAMSTEP.steps)
}
remove(TrainData, pos=GAMSTEP.pos)
remove(GAM.family, pos=GAMSTEP.pos)
remove(maxit, pos=GAMSTEP.pos)
}else{
results2 <- GAMSTEP.OLD
}
if (is.null(results2) == F) {
results <- results2
results2 <- NULL
cat(paste("\n", "stepwise GAM formula (gam package)","\n\n",sep = ""))
print(formula(results))
cat(paste("\n", "Evaluation with calibration data","\n",sep = ""))
TrainData[,"GAMSTEP"] <- gam::predict.Gam(object=results, newdata=TrainData.vars, type="response")
if (PROBIT == T) {
if(is.null(GAMSTEP.PROBIT.OLD) == T) {
probit.formula <- as.formula(paste("pb ~ GAMSTEP"))
results2 <- glm(probit.formula, family=binomial(link="probit"), data=TrainData, weights=Yweights1, control=glm.control(maxit=maxit))
}else{
results2 <- GAMSTEP.PROBIT.OLD
}
cat(paste("(Predictions transformed with probit link)","\n\n", sep = ""))
TrainData[,"GAMSTEP.step1"] <- TrainData[,"GAMSTEP"]
TrainData[,"GAMSTEP"] <- predict.glm(object=results2,