#' @title EN MODELLING internal FUNCTION
#'
#' @description Samples pseudo-absences from a environmental extent based on euclidean distances to the observations in a environmental PCA
#'
#' @param env Environmental data frame
#' @param occ Occurrence dataset
#' @param sample.pseudoabsences Boolean to indicate if pseudoabsences are to be sampled
#' @param extrapolate.niche Boolean whether to allow niche extrapolation if needed
#' @param nstart Burn up start for clustering estimation
#' @param k.max Maximum number of clusters
#' @param B Number of runs
#' @param combine.clusters Boolean whether to combine regions into global models
#' @param cluster data frame with clustered regions as levels
#' @param n.clus number of clusters to perform
#' @param R niche space grid
#' @param eval Boolean whether to evaluate the model
#' @param split.data Boolean whether to split data in case of model bootstrapping
#' @param split.percentage Split percentage, Default is 0.25
#' @param split.method String indicating the method to split occurrence data. Default is kmeans
#' @param res grid resolution of the spatial extent
#' @param crs CRS object or a character string describing a projection and datum in PROJ.4 format
#' @param relative.niche logical. Only in case of using clustering method. If TRUE, computes the relative species niche density over the overall species niche clusters.
#' @param cor Logical
#' @param h smoothing parameter for the kernel estimation. Default is 'href'. Altenrtanively can be set to 'LSCV' or any given numeric value
#' @param mask raster mask to resample the created kernel densiity grid raster
#' @param th.o numeric threshold to filter density values of occurrences
#' @param th.s numeric threshold to filter density values of environment
#' @param density.method "epanechnikov" or "bivnorm"
#' @param th threshold to perform cut off for model evaluation
#' @param ras raster to constrain pseudoabsences sampling in model evalluation
#' @param plot.eval Logical to whether plot the evaluation
#' @param sample.pseudoabsences Boolean to whether sample pseudo-absences
#' @param rep number of randomzation tests
#' @param best.th method to select the best thresholt. Default is "similarity"
#'
#' @return List of elements
#'
#' @details
#'
#' @examples
#' \dontrun{
#' EN <- EN_model_(env_data, occ_data2, cluster = "env", n.clus = 5)
#' }
#'
#' @importFrom raster stack rasterFromXYZ maxValue res crs
#' @importFrom stats na.exclude
#' @importFrom ade4 dudi.pca
#' @importFrom ecospat ecospat.sample.envar ecospat.grid.clim.dyn
#' @importFrom tidyr spread
#' @importFrom plyr ldply
#'
#' @keywords internal
#' @noRd
#'
EN_model_ <- function(env, occ, res = NULL, sample.pseudoabsences = TRUE, crs = "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0",
extrapolate.niche = FALSE, nstart = 25, k.max = NULL, B = 100, cor = T, h = "href", mask = NULL,
th.o = NULL, th.s = NULL, density.method = c("epa", "bivnorm"),
combine.clusters = FALSE, cluster = NULL, n.clus = NULL, R = 100, relative.niche = T,
eval = FALSE, split.data = FALSE, split.percentage = 0.25, split.method = c("kmeans", "Euclidean"),
plot.eval = FALSE, rep = 100, th = NULL, ras = NULL,
best.th = c("accuracy", "similarity") ){
split.method = split.method[1]
method = density.method[1]
# check if there are enough occurrences per species
###################
rm.sp = names(table(occ[,3]))[table(occ[,3]) < 4]
spsNames = names(table(occ[,3]))[!names(table(occ[,3])) %in% rm.sp]
occ = occ[occ[,3] %in% spsNames,]
if (ncol(occ) == 3){
occ$PA = 1
}
if (is.character(cluster) && cluster == "obs") { obs = occ }
if (split.data == TRUE){
split.df <- split_data(occ, method = split.method, split.percentage = split.percentage)
occ <- split.df$occ.train
occ.test <- split.df$occ.test
}
#occ = occ[!duplicated(occ),]
occ.df = tidyr::spread(occ, 3, 4, fill = 0)
occ.df[is.na(occ.df)] = 0
if (length(rm.sp) > 0){
warning(length(rm.sp), " species removed from the analysis... Not enough observations.")
message("Following species were dropped out of the analyses")
print(rm.sp)
}
message("Occurrence dataset ... OK")
###################
# check env argument
###################
if (is.data.frame(env)){
env = stats::na.exclude(env)
env.var = colnames(env[,-c(1:2)]) # environmental variables
env.stack = raster_projection(env, crs = crs)
}
if (any(class(env) %in% c("raster", "RasterBrick", "RasterStack"))){
env.stack = env
crs = raster::crs(env.stack)
env <- stats::na.exclude(raster::as.data.frame(env.stack, xy = T))
env.var = colnames(env[,-c(1:2)]) # environmental variables
}
message("Environmental dataset ... OK")
###################
# check res argument
###################
if (is.null(res)){
res = max(raster::res(env.stack))
}
###################
### inner elements
output = list()
sps.scores = NULL
z.mod = list()
mod.Val = list()
fail.m <- NULL
### error messages
### function
message("\t- Conforming environmental space...")
pca.cal <- dudi.pca(stats::na.exclude(env[,env.var]), center = T, scale = T, scannf = F, nf = 2) # the pca is calibrated on all the sites of the study area
env.scores = cbind(env[,1:2], pca.cal$li) # environmental PCA scores
occ.df <- ecospat::ecospat.sample.envar(dfsp=env,colspxy=1:2,colspkept=1:2,dfvar=occ.df,colvarxy=1:2,colvar= 3:ncol(occ.df) ,resolution=res) # match occ.df coordinates to env coordinates
occ.df[is.na(occ.df)] = 0
## ENN method
### Whole niche
###################
if (is.null(cluster)){
message("\t- Carrying out species EN models...")
sps.scores = lapply(spsNames, function(i) env.scores[occ.df[,i] != 0,])
names(sps.scores) <- spsNames
for(i in spsNames) {
message("\t\t- Modelling ", i, " Environmental Niche...")
sp.scores <- sps.scores[[i]][,3:4]
if (nrow(sp.scores) > 4) {
z.mod[[i]] <- estimate_niche(env.scores[,3:4],env.scores[,3:4],sp.scores,R,
h = h, mask = mask,
th.o = th.o, th.s = th.s, method = method)
class(z.mod[[i]]) <- c("NINA", "niche")
if (cor) {
mod.Val[[i]] <- cbind(env.scores[,1:2], response = raster::extract(z.mod[[i]]$z/z.mod[[i]]$Z, z.mod[[i]]$glob))
} else {
mod.Val[[i]] <- cbind(env.scores[,1:2], response = raster::extract(z.mod[[i]]$z, z.mod[[i]]$glob))
}
}
else {
fail.m <- c(fail.m, i)
}
}
if (length(mod.Val) != 0) {
tab = table(names(mod.Val))
mod.Val <- ldply(mod.Val, data.frame, .id = "species")
sps.scores <- ldply(sps.scores, data.frame, .id = "species")
output$tab <-tab
if (length(fail.m) > 0){
output$fail <- fail.m
warning("Model failed to predict the following species due to lack of observations", immediate. = T)
print(fail.m)
}
}
else {
stop("Modelling has failed to predict any species selected. \nProbable cause not enough scores inside the niche space.")
}
}
###################
### Cluster niche
###################
if (!is.null(cluster)) {
if (is.character(cluster) && cluster == "obs") {
message("\t- Clustering species observations...")
obs = spread(obs, 3, 4)
obs <- ecospat.sample.envar(dfsp=env,colspxy=1:2,colspkept=1:2,dfvar=obs,colvarxy=1:2,colvar= 3:ncol(obs) ,resolution=res) # match occ.df coordinates to env coordinates
obs[is.na(obs)] = 0
clus.df = cluster_regions(obs, plot = F, n.clus = n.clus, nstart = nstart, K.max = k.max, B = B)
clus.df[,3] = as.factor(clus.df[,3])
regions <- LETTERS[1:length(levels(clus.df[,3]))]
levels(clus.df[,3]) = regions
}
if (is.character(cluster) && cluster == "env") {
message("\t- Clustering environmental variables...")
clus.df = cluster_regions(env, plot = F, n.clus = n.clus, nstart = nstart, K.max = k.max, B = B)
clus.df[,3] = as.factor(clus.df[,3])
regions <- LETTERS[1:length(levels(clus.df[,3]))]
levels(clus.df[,3]) = regions
}
if (is.data.frame(cluster)) {
message("\t- Clustering based on provided regions...")
cluster[,3] <- as.factor(cluster[,3])
regions <- levels(cluster[,3])
clus.df = na.exclude(ecospat.sample.envar(dfsp=env,colspxy=1:2,colspkept=1:2,dfvar=cluster,colvarxy=1:2,colvar= 3 ,resolution=res))
clus.df[,3] = as.factor(clus.df[,3])
levels(clus.df[,3]) <- regions
}
if (is.vector(cluster) && length(cluster) == nrow(env)) {
message("\t- Clustering based on provided regions...")
clus.df <- cbind(env[,1:2], cluster)
clus.df[,3] = as.factor(clus.df[,3])
regions <-levels(clus.df[,3])
}
message("\t- Regions:")
print(regions)
fail.m <- data.frame()
for (e in regions) {
message("\t- Carrying out species EN models in region ", e, "...")
reg = rownames(clus.df)[which(clus.df[,3] == e)]
occ.subset <- occ.df[reg,]
# subseting environmental grid allows to have better resolution on each region
env.scores.subset <- env.scores[reg,]
if (nrow(occ.subset) > 0) {
sps.subset <- names(which(sapply(spsNames, function(x) sum(occ.subset[,x]) > 0)))
z.mod[[e]] = list()
mod.Val[[e]] <- list()
#fail.m[[e]] <- list()
for(i in sps.subset) {
message("\t\t- Modelling ", i, " Environmental Niche...")
sp.scores <- merge(occ.df[occ.df[reg,i] != 0,1:2], env.scores.subset, by= 1:2)
if (nrow(sp.scores) <= 4){
if (extrapolate.niche == TRUE){
sp.scores <- merge(occ.df[occ.df[,i] != 0,1:2], env.scores, by= 1:2)
#sp.scores <- merge(sp.scores, env.scores.subset, by= 3:4)
#sp.scores <- sp.scores[,c(5:6,1:2)]
#colnames(sp.scores) <- colnames(env.scores.subset)
}
}
if (nrow(sp.scores) > 4) {
message("\t\t- Estimating ", i, " niche response in region ", e, "...")
z.mod[[e]][[i]] <- estimate_niche(env.scores.subset[,3:4],env.scores.subset[,3:4],sp.scores[,3:4], R,
h = h, mask = mask,
th.o = th.o, th.s = th.s, method = method)
class(z.mod[[e]][[i]]) <- c("NINA", "niche")
sps.scores <- rbind(sps.scores, cbind(region = e, species = i, sp.scores))
if (cor) {
mod.Val[[e]][[i]] <- cbind(env.scores.subset[,1:2], response = raster::extract(z.mod[[e]][[i]]$z/z.mod[[e]][[i]]$Z, z.mod[[e]][[i]]$glob))
} else {
mod.Val[[e]][[i]] <- cbind(env.scores.subset[,1:2], response = raster::extract(z.mod[[e]][[i]]$z, z.mod[[e]][[i]]$glob))
}
# z.mod[[e]][[i]]$z/z.mod[[e]][[i]]$Z to standardize by environment densitiies
}
else {
warning("Not enough observations of ", i , " in region ", e, ".", immediate. = T)
fail.m <- rbind(fail.m, cbind( region = e , species = i))
}
}
z.mod[[e]] <- z.mod[[e]][unlist(lapply(z.mod[[e]], length) != 0)]
z.mod[[e]] <- z.mod[[e]][unlist(lapply(z.mod[[e]], function(x) !is.na(maxValue(x$z.uncor))))]
mod.Val[[e]] <- ldply(mod.Val[[e]], data.frame, .id = "species")
}
else {
warning("There are no observations in region ", e, ".", immediate. = T)
}
}
if (relative.niche){
z.rev <- reverse_list(z.mod)
for (ii in names(z.rev)){
max.zdens = max(sapply(z.rev[[ii]], function(i) raster::cellStats(i$z, "max")))
max.Zdens = max(sapply(z.rev[[ii]], function(i) raster::cellStats(i$Z, "max")))
for (jj in names(z.rev[[ii]])){
z = z.rev[[ii]][[jj]]
z$z.uncor = z$z / max.zdens
z$z.uncor[is.na(z$z.uncor)] <- 0
z$w <- z$z.uncor
z$w[z$w > 0] <- 1
z$z.cor <- z$z/z$Z
z$z.cor[is.na(z$z.cor)] <- 0
z$z.cor <- z$z.cor/max.Zdens
z.rev[[ii]][[jj]] = z
}
}
z.mod <- reverse_list(z.rev)
}
z.mod <- z.mod[unlist(lapply(z.mod, length) != 0)]
if (length(mod.Val) != 0) {
if (combine.clusters == TRUE){
message("\t- Assembling regions into global model...")
output$z.mod.global = combine_regions(z.mod, env.scores = env.scores[,3:4], R = R)
}
tab = cbind(ldply(sapply(z.mod, function(x) names(x)), data.frame, .id = "region"), P = 1)
tab = spread(tab, "region", "P")
rownames(tab) <- tab[,1]
tab <- tab[,-1]
tab[is.na(tab)] = 0
output$tab <-t(tab)
mod.Val <- ldply(mod.Val, data.frame, .id = "region")
mod.Val = mod.Val[,-1]
output$clus <- clus.df
if (nrow(fail.m) > 0){
#fail.m <- plyr::ldply(fail.m, data.frame, .id = "region")
#colnames(fail.m)[2] = "species"
output$fail <- fail.m
warning("Model failed to predict the following species due to lack of observations", immediate. = T)
print(fail.m)
}
else {
message("All species have been modelled correctly.")
}
}
else {
stop("Modelling has failed to predict any species selected. \nProbable cause not enough scores inside the niche space.")
}
}
###############
mod.Val <- spread(mod.Val, "species", "response")
mod.Val[is.na(mod.Val)] = 0
if ( ncol(mod.Val) == 3){
mod.Val[,3] <- mod.Val[,3] / max(mod.Val[,3])
} else {
mod.Val[,-c(1:2)] <- apply(mod.Val[,-c(1:2)], 2, function(i) i/max(i, na.rm = T))
}
output$pca <- pca.cal
output$sp.scores <- sps.scores
output$env.scores <- env.scores
output$z.mod <- z.mod
output$pred.dis <- mod.Val
if (split.data == TRUE){output$obs <- list(train = occ, test = occ.test)} else { output$obs <- occ}
## Model evaluation
###############
if (eval == TRUE){
message("\t- Carrying out models evaluations...")
output$eval <- models_evaluation(mod.Val, if (split.data == TRUE){occ.test} else {occ}, predictors = env, sample.pseudoabsences = sample.pseudoabsences, res = res, plot = plot.eval,
rep = rep, th = th, best.th = best.th)
}
###############
output$maps = raster_projection(mod.Val, ras = env.stack[[1]], crs = crs)
output$predictors = env.var
output$crs = crs
#output$type = "EN"
attr(output, "class") <- c("NINA", "ENmodel")
message("Species EN models succesfully completed!")
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.