#' Creates ensemble models from several algorithms
#'
#' This function reads the output of \code{final_model} for each species and
#' multiple algorithms and builds a simple ensemble model by calculating the
#' mean of the final models in order to obtain one model per species. It also
#' calculates median, standard deviation and range (maximum - minimum)
#'
#' @inheritParams setup_sdmdata
#' @inheritParams final_model
#' @param ensemble_dir Character string, name of the folder to save the output
#' files. A subfolder will be created. Defaults to "\code{ensemble}"
#' @param algorithms Character vector specifying which algorithms will be
#' processed. Note that it can have length > 1, ex. \code{c("bioclim", "rf")}.
#' Defaults to NULL: it no name is given it will process all algorithms present
#' in the final_models folder
#' @param which_ensemble Which method to apply consensus between algorithms will
#' be used? Current options are:
#' \describe{
#' \item{\code{best}}{Selects models from the best-performing algorithm. A
#' performance metric must be specified (\code{performance_metric}). Parameter
#' \code{which_final} indicates which model will be returned}
#' \item{\code{average}}{Computes the means between models. Parameter
#' \code{which_final} indicates which model will be returned}
#' \item{\code{weighted_average}}{Computes a weighted mean between models. A
#' performance metric must be specified. Parameter \code{which_final} indicates
#' which model will be returned}
#' \item{\code{median}}{Computes the median between models. Parameter
#' \code{which_final} indicates which model will be returned}
#' \item{\code{frequency}}{Computes the mean between binary models, which is
#' analogous to calculating a relative consensus}
#' \item{\code{consensus}}{Computes a binary model with the final consensus area.
#' A \code{consensus_level} must be specified}
#' \item{\code{pca}}{Computes a PCA between the models for each algorithm and extract the first axis, that summarizes variation between them}
#' }
#' @param which_final Which \code{final_model} will be used to calculate the
#' average, weighted average or median ensembles? See \code{\link{final_model}}
#' @param performance_metric Which performance metric will be used to define
#' the \code{"best"} algorithm any in \code{c("AUC", "pROC", "TSSmax",
#' "KAPPAmax", "CCR", "F_score", "Jaccard")}
#' @param dismo_threshold Character string indicating threshold (cut-off) to
#' transform raw_mean final models to binary for frequency and consensus methods.
#' The options are from \code{\link[dismo]{threshold}}:
#' "\code{kappa}", "\code{spec_sens}", "\code{no_omission}",
#' "\code{prevalence}", "\code{equal_sens_spec}",
#' "\code{sensitivity}". Default value is "\code{spec_sens}"
#' @param png_ensemble Logical. If \code{TRUE} writes png files of the
#' ensemble models
#' @param write_map Logical. If \code{TRUE} adds a map contour to the png file
#' of the ensemble models
#' @param write_occs Logical. If \code{TRUE} writes the occurrence points on the
#' png file of the ensemble model
#' @param ... Other parameters from \code{\link[raster]{writeRaster}}
#' @param uncertainty Calculates the uncertainty between models, as a range
#' (maximum - minimum)
#' @import raster
#' @importFrom scales alpha
#' @import graphics
#' @importFrom stats sd
#' @importFrom stats prcomp
#' @export
#' @seealso \code{\link{final_model}}
#' @return Retuns a RasterStack with all generated statistics written in the
#' \code{ensemble_dir} subfolder
#' @return Writes on disk raster files with the median, mean and standard
#' deviation and range of the assembled models
#' @return If \code{png_ensemble = TRUE} writes .png figures
#' in the \code{ensemble_dir} subfolder
#' @examples
#' \dontrun{
#' # run setup_sdmdata
#' sp <- names(example_occs)[1]
#' sp_coord <- example_occs[[1]]
#' sp_setup <- setup_sdmdata(species_name = sp,
#' occurrences = sp_coord,
#' predictors = example_vars,
#' clean_uni = TRUE)
#'
#' # run do_many
#' sp_many <- do_many(species_name = sp,
#' predictors = example_vars,
#' bioclim = TRUE)
#'
#' # run final_model
#' sp_final <- final_model(species_name = sp,
#' algorithms = c("bioclim"),
#' select_partitions = TRUE,
#' select_par = "TSSmax",
#' select_par_val = 0,
#' which_models = c("raw_mean"),
#' consensus_level = 0.5,
#' overwrite = TRUE)
#'
#' # run ensemble model
#' sp_ensemble <- ensemble_model(species_name = sp,
#' occurrences = sp_coord,
#' overwrite = TRUE)
#' }
ensemble_model <- function(species_name,
occurrences,
lon = "lon",
lat = "lat",
models_dir = "./models",
final_dir = "final_models",
ensemble_dir = "ensemble",
proj_dir = "present",
algorithms = NULL,
which_ensemble = c("average"),
which_final = c("raw_mean"),
performance_metric = "TSSmax",
dismo_threshold = "spec_sens",
consensus_level = 0.5,
png_ensemble = TRUE,
write_occs = FALSE,
write_map = FALSE,
scale_models = TRUE,
uncertainty = TRUE,
...) {
## output folder
present_folder <- paste(models_dir, species_name, "present", sep = "/")
final_folder <- paste(models_dir, species_name, proj_dir, final_dir,
sep = "/")
ensemble_folder <- paste(models_dir, species_name, proj_dir, ensemble_dir,
sep = "/")
if (!file.exists(ensemble_folder)) {
dir.create(ensemble_folder)
}
print(date())
message(species_name)
message(paste("Reading mean evaluation files for", species_name, "in", proj_dir))
## reads mean statistics table per algorithm
stats_summary <- read.csv(paste0(present_folder, "/",
final_dir, "/", species_name,
"_mean_statistics.csv"), header = TRUE,
row.names = 1)
#specify algorithms or not
if (is.null(algorithms)) {
algorithms <- unique(stats_summary$algorithm)
}
stats_summary <- stats_summary[, stats_summary$algorithm %in% algorithms]
ensemble_mods <- raster::stack()
if ("best" %in% which_ensemble) {
if (is.null(performance_metric))
stop("A performance metric must be specified to select the 'best' algorithm")
best <- which.max(stats_summary[,performance_metric])
message(paste("The best performing algorithm was",
stats_summary$algorithm[best], "according to",
performance_metric, "values"))
if (is.null(which_final))
stop("A which_final model must be specified to return the 'best' algorithm")
best_mod_files <- list.files(final_folder,
recursive = TRUE,
full.names = TRUE,
pattern =
paste0(stats_summary$algorithm[best],
"_", which_final, ".tif$"))
if (length(best_mod_files) == 0)
stop(paste("No", which_final, "models to ensemble from for", species_name))
best_mod <- raster(best_mod_files)
names(best_mod) <- "best"
writeRaster(best_mod,
filename = paste0(ensemble_folder, "/", species_name,
"_", which_final,
"_ensemble", "_best",
".tif"), ...)
ensemble_mods <- raster::addLayer(ensemble_mods, best_mod)
}
if ("average" %in% which_ensemble) {
raw_mean_files <- list.files(final_folder,
recursive = TRUE,
full.names = TRUE,
pattern = paste0("_raw_mean.tif$"))
if (length(raw_mean_files) == 0)
stop(paste("No models to assemble from for", species_name))
raw_mean_models <- raster::stack(raw_mean_files)
average_ensemble <- mean(raw_mean_models)
names(average_ensemble) <- "average"
writeRaster(average_ensemble,
filename = paste0(ensemble_folder, "/", species_name,
"_ensemble", "_average",
".tif"), ...)
ensemble_mods <- raster::addLayer(ensemble_mods, average_ensemble)
}
if ("weighted_average" %in% which_ensemble) {
if (is.null(performance_metric))
stop("A performance metric must be specified to compute the weighted average")
w_coefs <- stats_summary[,performance_metric]
raw_mean_files <- list.files(final_folder,
recursive = TRUE,
full.names = TRUE,
pattern = "_raw_mean.tif$")
if (length(raw_mean_files) == 0)
stop(paste("No models to ensemble from for", species_name))
raw_mean_models <- raster::stack(raw_mean_files)
weighted_average <- raster::weighted.mean(raw_mean_models, w_coefs)
names(weighted_average) <- "weighted_average"
writeRaster(weighted_average,
filename = paste0(ensemble_folder, "/", species_name,
"_", performance_metric,
"_ensemble_weighted_average",
".tif"), ...)
ensemble_mods <- raster::addLayer(ensemble_mods, weighted_average)
}
if ("median" %in% which_ensemble) {
raw_mean_files <- list.files(final_folder,
recursive = TRUE,
full.names = TRUE,
pattern = paste0("_raw_mean.tif$"))
if (length(raw_mean_files) == 0)
stop(paste("No models to ensemble from for", species_name))
raw_mean_models <- raster::stack(raw_mean_files)
median_ensemble <- raster::calc(raw_mean_models,
fun = function(x) {
stats::median(x, na.rm = TRUE)
}
)
names(median_ensemble) <- "median"
writeRaster(median_ensemble,
filename = paste0(ensemble_folder, "/", species_name,
"_ensemble", "_median",
".tif"), ...)
ensemble_mods <- raster::addLayer(ensemble_mods, median_ensemble)
}
if (any(c("frequency", "consensus") %in% which_ensemble)) {
#reads raw
raw_mean_files <- list.files(final_folder,
recursive = TRUE,
full.names = TRUE,
pattern =
paste0(
"_raw_mean.tif$"))
if (length(raw_mean_files) == 0)
stop(paste("No models to ensemble from for", species_name))
raw_mean_models <- raster::stack(raw_mean_files)
if (is.null(dismo_threshold))
stop("A dismo_threshold must be specified to create binary models")
#cuts the models by the mean threshold for each algorithm
th <- stats_summary[,dismo_threshold]
bin_mean_models <- raw_mean_models > th
#calculates the mean
frequency_ensemble <- mean(bin_mean_models)
names(frequency_ensemble) <- "frequency"
if ("frequency" %in% which_ensemble) {
writeRaster(frequency_ensemble,
filename = paste0(ensemble_folder, "/", species_name,
"_ensemble", "_frequency",
".tif"), ...)
ensemble_mods <- raster::addLayer(ensemble_mods, frequency_ensemble)
}
if ("consensus" %in% which_ensemble) {
if (missing("consensus_level"))
stop("Parameter consensus_level must be specified to calculate consensus average")
consensus_ensemble <- frequency_ensemble > consensus_level
names(consensus_ensemble) <- "consensus"
writeRaster(consensus_ensemble,
filename = paste0(ensemble_folder, "/", species_name,
"_ensemble_", consensus_level,
"_consensus",
".tif"), ...)
ensemble_mods <- raster::addLayer(ensemble_mods, consensus_ensemble)
}
}
if ("pca" %in% which_ensemble) {
raw_mean_files <- list.files(final_folder,
recursive = TRUE,
full.names = TRUE,
pattern =
paste0(
"_raw_mean.tif$"))
if (length(raw_mean_files) == 0)
stop(paste("No models to ensemble from for", species_name))
raw_mean_models <- raster::stack(raw_mean_files)
vals <- raster::getValues(raw_mean_models)
vals <- vals[!is.na(rowSums(vals)),]
vals.st <- scale(vals)
pca_mod <- prcomp(vals.st)
summary_pca <- summary(pca_mod)
#axis_nb <- which(summary_pca$importance["Cumulative Proportion",] >= 0.95)[1]
expl <- summary_pca$importance["Cumulative Proportion",1]
first_axis <- predict(raw_mean_models, pca_mod, index = 1)
pca_vals <- rescale_layer(first_axis)
pca1 <- raster(pca_vals)
values(pca1) <- values(pca_vals)
names(pca1) <- "pca"
raster::writeRaster(pca1,
filename = paste0(ensemble_folder, "/", species_name,
"_ensemble_pca_", as.character(round(expl,3)),
".tif"), ...)
ensemble_mods <- raster::addLayer(ensemble_mods, pca1)
}
if (uncertainty == TRUE) {
raw_mean_files <- list.files(final_folder,
recursive = TRUE,
full.names = TRUE,
pattern =
paste0(
"_raw_mean.tif$"))
raw_mean_models <- raster::stack(raw_mean_files)
message("Calculating range")
ensemble_inctz <- raster::calc(raw_mean_models,
fun = function(x) {
max(x) - min(x)
}
)
names(ensemble_inctz) <- "uncertainty"
writeRaster(ensemble_inctz,
filename = paste0(models_dir, "/", species_name,
"/", proj_dir, "/",
ensemble_dir, "/", species_name,
"_uncertainty.tif"),
...
)
ensemble_mods <- raster::addLayer(ensemble_mods, ensemble_inctz)
}
# #scale models to 0-1#รถ escalar??
# if (scale_models == TRUE) {
# mod2 <- rescale_layer(mod2)
# }
if (png_ensemble) {
message("Writing pngs")
for (i in 1:dim(ensemble_mods)[3]) {
png(filename = paste0(models_dir, "/",
species_name, "/",
proj_dir, "/",
ensemble_dir, "/",
species_name, "_",
names(ensemble_mods)[i], ".png"),
res = 300, width = 410 * 300 / 72, height = 480 * 300 / 72)
par(mfrow = c(1, 1), mar = c(4, 4, 0, 0))
raster::plot(ensemble_mods[[i]])
if (write_map) {
maps::map("world",
add = TRUE,
col = "grey")
}
if (write_occs) {
if (is.null(occurrences))
stop("occurrences must be provided to write the png if write_occs = TRUE")
coord <- occurrences[, c(lon, lat)]
points(coord, pch = 21, cex = 0.6,
bg = scales::alpha("cyan", 0.6))
}
dev.off()
}
}
# creating and writing ensemble_model metadata
metadata <- data.frame(
species_name = as.character(species_name),
algorithms = paste(algorithms, collapse = "-"),
which_ensemble = paste(which_ensemble, collapse = "-"),
which_final = paste(which_final, collapse = "-"),
performance_metric = as.character(performance_metric),
dismo_threshold = as.character(dismo_threshold),
consensus_level = ifelse("consensus" %in% which_ensemble,
consensus_level, NA),
scale_models = ifelse(scale_models, "yes", "no"),
uncertainty = ifelse(uncertainty, "yes", "no")
)
message("writing metadata")
write.csv(metadata, file = paste0(ensemble_folder,
"/metadata.csv"))
#writes session info
write_session_info(ensemble_folder)
print("DONE!")
print(date())
return(ensemble_mods)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.