##############################################################################
# #
# GENETIC ALGORITHMS in R #
# modified version for eicm #
# adapted from https://cran.r-project.org/package=GA version 3.1.1. #
##############################################################################
my.ga <- function(
fitness, criterion, ...,
lower, upper, nBits,
population = "gabin_Population",
selection = "gabin_lrSelection",
crossover = "gabin_spCrossover",
mutation,
popSize = 50,
pcrossover = 0.8,
pmutation = 0.1,
elitism = base::max(1, round(popSize*0.05)),
updatePop = FALSE,
postFitness = NULL,
maxiter = 100,
run = maxiter,
maxFitness = Inf,
names = NULL,
suggestions = NULL,
optim = FALSE,
optimArgs = list(method = "L-BFGS-B",
poptim = 0.05,
pressel = 0.5,
control = list(fnscale = -1, maxit = 100)),
keepBest = FALSE,
parallel = FALSE,
monitor = NULL,
############## START CHANGED CODE #########################
max.cached = 15000,
############### END CHANGED CODE ##########################
seed = NULL)
{
call <- match.call()
if(!is.function(population)) population <- get(population, envir=environment(GA::ga))
if(!is.function(selection)) selection <- get(selection, envir=environment(GA::ga))
if(!is.function(crossover)) crossover <- get(crossover, envir=environment(GA::ga))
if(!is.function(mutation)) mutation <- get(mutation, envir=environment(GA::ga))
if(missing(fitness))
{ stop("A fitness function must be provided") }
if(!is.function(fitness))
{ stop("A fitness function must be provided") }
if(popSize < 10)
{ warning("The population size is less than 10.") }
if(maxiter < 1)
{ stop("The maximum number of iterations must be at least 1.") }
if(elitism > popSize)
{ stop("The elitism cannot be larger that population size.") }
if(pcrossover < 0 | pcrossover > 1)
{ stop("Probability of crossover must be between 0 and 1.") }
if(is.numeric(pmutation))
{
if(pmutation < 0 | pmutation > 1)
{ stop("If numeric probability of mutation must be between 0 and 1.") }
else if(!is.function(population))
{ stop("pmutation must be a numeric value in (0,1) or a function.") }
}
# check for min and max arguments instead of lower and upper
callArgs <- list(...)
penalty <- callArgs$penalty
callArgs$penalty <- NULL
modelobject <- callArgs$modelobject
callArgs$modelobject <- NULL
if(any("min" %in% names(callArgs)))
{
lower <- callArgs$min
callArgs$min <- NULL
warning("'min' arg is deprecated. Use 'lower' instead.")
}
if(any("max" %in% names(callArgs)))
{
upper <- callArgs$max
callArgs$max <- NULL
warning("'max' arg is deprecated. Use 'upper' instead.")
}
if(missing(lower) & missing(upper) & missing(nBits))
{ stop("A lower and upper range of values (for 'real-valued' or 'permutation' GA) or nBits (for 'binary' GA) must be provided!") }
nBits <- as.vector(nBits)[1]
lower <- upper <- NA
nvars <- nBits
if(is.null(names))
names <- paste0("x", 1:nvars)
# check suggestions
if(is.null(suggestions))
{ suggestions <- matrix(nrow = 0, ncol = nvars) }
else
{ if(is.vector(suggestions))
{ if(nvars > 1) suggestions <- matrix(suggestions, nrow = 1)
else suggestions <- matrix(suggestions, ncol = 1) }
else
{ suggestions <- as.matrix(suggestions) }
if(nvars != ncol(suggestions))
stop("Provided suggestions (ncol) matrix do not match number of variables of the problem!")
}
###########################################################
############## START CHANGED CODE #########################
###########################################################
if(is.numeric(parallel) && parallel > 1) {
cls <- parallel::makeCluster(parallel)
# doParallel::registerDoParallel(cls) # doParallel does not support progress bar!
doSNOW::registerDoSNOW(cls)
`%DO%` <- foreach::`%dopar%`
} else {
if(!is.numeric(parallel)) stop("parallel must be the number of cores.")
cls <- NULL
`%DO%` <- foreach::`%do%`
}
###########################################################
############### END CHANGED CODE ##########################
###########################################################
# set seed for reproducibility
if(!is.null(seed)) set.seed(seed)
i. <- NULL # dummy to trick R CMD check
fitnessSummary <- matrix(as.double(NA), nrow = maxiter, ncol = 6)
colnames(fitnessSummary) <- names(GA::gaSummary(stats::rnorm(10)))
bestSol <- if(keepBest) vector(mode = "list", length = maxiter)
else list()
Fitness <- rep(NA, popSize)
object <- methods::new("ga.eicm",
call = call,
lower = lower,
upper = upper,
nBits = nBits,
names = if(is.null(names)) character() else names,
popSize = popSize,
iter = 0,
run = 1,
maxiter = maxiter,
suggestions = suggestions,
population = matrix(),
elitism = elitism,
pcrossover = pcrossover,
pmutation = if(is.numeric(pmutation)) pmutation else NA,
fitness = Fitness,
summary = fitnessSummary,
bestSol = bestSol)
if(maxiter == 0)
return(object)
# generate beginning population
Pop <- matrix(as.double(NA), nrow = popSize, ncol = nvars)
ng <- min(nrow(suggestions), popSize)
if(ng > 0) # use suggestion if provided
{ Pop[1:ng,] <- suggestions }
# fill the rest with a random population
if(popSize > ng)
{ Pop[(ng+1):popSize,] <- population(object)[1:(popSize-ng),] }
object@population <- Pop
############## START CHANGED CODE #########################
evaluation.history <- list() # that's the fitted model cache
message(sprintf("Percentage of mutation: %.3f; poisson mean: %.3f", pmutation, attr(mutation, "mean")))
############### END CHANGED CODE ##########################
# start iterations
for(iter in seq_len(maxiter))
{
object@iter <- iter
############ CHANGED LINE
start.time <- Sys.time()
# evalute fitness function (when needed)
if(!parallel) # TODO DEPRECATED
{ for(i in seq_len(popSize))
if(is.na(Fitness[i]))
{ fit <- do.call(fitness, c(list(Pop[i,]), callArgs))
if(updatePop)
Pop[i,] <- attributes(fit)[[1]]
Fitness[i] <- fit
}
}
###########################################################
############## START CHANGED CODE #########################
###########################################################
else {
# if(gc()["Vcells", 2] > 1000) {
if((iter %% 25) == 0 && !is.null(cls)) {
message("Restarting cluster to free memory.")
parallel::stopCluster(cls)
cls <- parallel::makeCluster(parallel)
# doParallel::registerDoParallel(cls)
doSNOW::registerDoSNOW(cls)
}
current.pop.strings <- apply(Pop, 1, paste, collapse="")
# returns fitness if it has survived or if it's cached, NA otherwise.
tmp.fit <- vapply(seq_len(popSize), FUN=function(i.) {
if(is.na(Fitness[i.])) { # individual has changed, recalculate
if(!is.null(evaluation.history[[current.pop.strings[i.]]]) # result is cached
) {
return(attr(evaluation.history[[current.pop.strings[i.]]], "criterion")) # there's an equal model, return fitness
}
return(NA)
} else
return(Fitness[i.])
}, FUN.VALUE=1)
object@survived <- 0
object@cached <- sum(!sapply(tmp.fit, is.na))
individuals.to.eval <- sapply(tmp.fit, is.na)
# the unique bit strings to evaluate fitness
bitstrings.to.eval <- unique(Pop[individuals.to.eval, , drop=FALSE])
# returns a similar model or NA is there is none
if(length(evaluation.history) == 0)
similar.models <- rep(NA, nrow(bitstrings.to.eval))
else {
similar.models <- .Call(SR__getMostSimilarModel, bitstrings.to.eval, evaluation.history)
similar.models <- lapply(similar.models, function(i) {
if(is.na(i))
return(NA)
else
return(evaluation.history[[i]])
})
}
object@informed <- sum(!sapply(similar.models, identical, NA))
object@evaluated <- length(similar.models)
if(interactive()) {
message(sprintf("\rFitting %d models... ", length(similar.models)), appendLF=is.null(cls))
# cat(sprintf("\rFitting %d models... ", length(similar.models)))
progress <- function(n) {
message(sprintf("\rFitting %d models... %d complete", length(similar.models), n), appendLF=FALSE)
# cat(sprintf("\rFitting %d models... %d complete", length(similar.models), n))
# writeLines(sprintf("\rFitting %d models... %d complete", length(similar.models), n), conn, sep="")
# flush(conn)
}
} else
progress <- function(n) {}
# conn <- base::stdout()
# parallel::clusterExport(cls, c("progress", "conn"), envir=environment())
# no duplicates are evaluated
# NOTE we need the following loop (a workaround) because, for some reason, sometimes a
# corrupted occurence matrix is passed to the parallel workers.
# TODO investigate whether this is due to a memory leak, or an external bug of doParallel.
tmpEval <- rep(list(NA), length(similar.models))
while(any(sapply(tmpEval, identical, NA))) {
NAs <- sapply(tmpEval, identical, NA)
if(sum(NAs) < length(tmpEval))
message(sprintf("Repeating %d models...", sum(NAs)))
tmpEval[NAs] <- foreach::foreach(i. = seq_along(similar.models[NAs]), .options.multicore=list(preschedule = FALSE)
, .options.snow=list(progress=progress)) %DO% {
# tmpEval <- lapply(seq_along(similar.models), function(i.) {
if(!all(sort(unique(as.vector(modelobject$data$occurrences))) == c(0, 1))) {
# corrupted matrix! TODO: why does it happen?
# saveRDS(modelobject$data$occurrences, file="debug-GA2error.rds")
return(NA)
}
if(identical(similar.models[[i.]], NA)) {
suppressMessages(
tmp <- do.call(fitness, args=c(string=list(bitstrings.to.eval[i., ]),
modelobject=list(modelobject),
callArgs)) # no similar models, that's for the first evaluation
)
} else {
suppressMessages(
tmp <- do.call(fitness, args=c(string=list(bitstrings.to.eval[i., ]),
modelobject=list(modelobject),
similar.model=list(similar.models[[i.]]),
callArgs)) # similar model informs fitting
)
}
if(inherits(tmp, "CV.models")) {
# CROSS VALIDATION TODO
rmse <- sapply(tmp, function(m) {
if(!attr(m, "possible")) return(NA)
testdata <- attr(m, "testingdata")
m$data$env <- testdata$env
return(logLik.eicm(m, testdata$presences))
#preds <- predict(m, 1000)
#sum((preds - testdata$presences) ^ 2)
})
criterion.value <- mean(rmse)
if(is.na(criterion.value))
out <- NA
else
out <- tmp[[1]]$model
} else {
# AIC-like
#if(!attr(tmp, "possible")) {
# stop("Fitting not possible or not improved", list(i., bitstrings.to.eval, similar.models, tmp))
# out <- NA
# criterion.value <- NA
#} else {
out <- tmp$model
criterion.value <- -criterion(tmp) -
penalty * sum(tmp$model$options$mask$sp != 0)
#}
}
if(identical(out, NA) || identical(criterion.value, NA))
stop("This should never happen")
else
attr(out, "criterion") <- criterion.value
rm(tmp)
gc()
# progress(i., conn)
return(out)
}
}
# )
# now merge with fitness vector
string.eval <- apply(bitstrings.to.eval, 1, paste, collapse="")
if(any(individuals.to.eval))
tmp.fit[individuals.to.eval] <- sapply(tmpEval, attr, "criterion")[match(current.pop.strings[individuals.to.eval], string.eval)]
Fitness <- tmp.fit
# update cached
for(i in seq_along(tmpEval)) {
cached.copy <- evaluation.history[[string.eval[i]]]
if(is.null(cached.copy)) {
# clean up cache to avoid out of memory
if(length(evaluation.history) > max.cached) {
tokeep <- which(names(evaluation.history) %in% current.pop.strings)
evaluation.history[setdiff(1:5000, tokeep)] <- NULL
gc()
}
evaluation.history[[string.eval[i]]] <- tmpEval[[i]]
}
}
# splitted.bits <- strsplit(names(evaluation.history), NULL)
}
###########################################################
############### END CHANGED CODE ##########################
###########################################################
# update object
object@population <- Pop
object@fitness <- Fitness
# update iterations summary
fitnessSummary[iter,] <- GA::gaSummary(object@fitness)
object@summary <- fitnessSummary
############ CHANGED CODE
object@time.took <- as.numeric(difftime(Sys.time(), start.time, units="secs"))
object@cache.size <- length(evaluation.history)
if(is.function(monitor)) {
monitor(object, evaluation.history[[current.pop.strings[which.max(Fitness)]]], evaluation.history[[current.pop.strings[which.min(Fitness)]]])
} else {
gaMonitor.eicm(object, evaluation.history[[current.pop.strings[which.max(Fitness)]]], evaluation.history[[current.pop.strings[which.min(Fitness)]]])
}
############ END CHANGED CODE
if(keepBest)
{ object@bestSol[[iter]] <- unique(Pop[Fitness == max(Fitness, na.rm = TRUE),,drop=FALSE]) }
# apply a user's defined function to update the GA object
if(is.function(postFitness))
{
# object <- postFitness(object, ...)
object <- do.call(postFitness, c(object, callArgs))
Fitness <- object@fitness
Pop <- object@population
}
# check stopping criteria
if(iter > 1)
object@run <- GA::garun(fitnessSummary[seq(iter),1])
if(object@run >= run) break
if(max(Fitness, na.rm = TRUE) >= maxFitness) break
if(object@iter == maxiter) break
ord <- order(Fitness, decreasing = TRUE)
PopSorted <- Pop[ord,,drop=FALSE]
FitnessSorted <- Fitness[ord]
# selection
if(is.function(selection))
{ sel <- selection(object)
Pop <- sel$population
Fitness <- sel$fitness
}
else
{ sel <- sample(1:popSize, size = popSize, replace = TRUE)
Pop <- object@population[sel,]
Fitness <- object@fitness[sel]
}
object@population <- Pop
object@fitness <- Fitness
# crossover
if(is.function(crossover) & pcrossover > 0)
{ nmating <- floor(popSize/2)
mating <- matrix(sample(1:(2*nmating), size = (2*nmating)), ncol = 2)
for(i in seq_len(nmating))
{ if(pcrossover > stats::runif(1))
{ parents <- mating[i,]
Crossover <- crossover(object, parents)
Pop[parents,] <- Crossover$children
Fitness[parents] <- Crossover$fitness
}
}
object@population <- Pop
object@fitness <- Fitness
}
# mutation
pm <- if(is.function(pmutation)) pmutation(object) else pmutation
if(is.function(mutation) & pm > 0)
{ for(i in seq_len(popSize))
{ if(pm > stats::runif(1))
{ Mutation <- mutation(object, i)
Pop[i,] <- Mutation
Fitness[i] <- NA
}
}
object@population <- Pop
object@fitness <- Fitness
}
# elitism
if(elitism > 0) # (elitism > 0 & iter > 1)
{ ord <- order(object@fitness, na.last = TRUE)
u <- which(!duplicated(PopSorted, margin = 1))
possibleElitism <- min(elitism, length(u))
Pop[ord[1:possibleElitism],] <- PopSorted[u[1:possibleElitism],]
Fitness[ord[1:possibleElitism]] <- FitnessSorted[u[1:possibleElitism]]
object@population <- Pop
object@fitness <- Fitness
}
if(is.function(monitor))
############ CHANGED LINE
{ utils::flush.console() }
}
if(is.function(monitor))
{ utils::flush.console() }
# in case of premature convergence remove NAs from summary
# fitness evalutations
object@summary <- stats::na.exclude(object@summary)
attr(object@summary, "na.action") <- NULL
# get solution(s)
object@fitnessValue <- max(object@fitness, na.rm = TRUE)
valueAt <- which(object@fitness == object@fitnessValue)
solution <- object@population[valueAt,,drop=FALSE]
if(nrow(solution) > 1)
{ # find unique solutions to precision given by default tolerance
eps <- GA::gaControl("eps")
solution <- unique(round(solution/eps)*eps, margin = 1)
}
#colnames(solution) <- GA:::parNames(object)
object@solution <- solution
if(keepBest)
object@bestSol <- object@bestSol[!sapply(object@bestSol, is.null)]
object@allEvaluations <- evaluation.history
if(!is.null(cls)) parallel::stopCluster(cls)
# return an object of class 'ga'
return(object)
}
# This is a mutator for circular integer variables, that only mutates for the contiguous class,
# and wraps if overflow.
make.mutator <- function(poissonmean=0.8, num.classes) {
out <- function (object, parent, ...) {
mutate <- parent <- as.vector(object@population[parent, ])
n <- length(parent)
nbitstomutate <- stats::rpois(1, poissonmean) + 1
j <- sample(1:n, size = nbitstomutate)
mutate[j] <- (mutate[j] + sample(c(-1,1), length(j), replace=TRUE)) %% (num.classes[j] + 1)
return(mutate)
}
attr(out, "mean") <- poissonmean
return(out)
}
setClass(Class = "ga.eicm",
representation(allEvaluations = "list"
, time.took="numeric"
, cached="numeric"
, informed="numeric"
, survived="numeric"
, evaluated="numeric"
, cache.size="numeric"
)
, contains="ga", package = "eicm")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.