Nothing
`Ensemble.Forecasting` <-
function(ANN=TRUE,CTA=TRUE,GAM=TRUE,GBM=TRUE,GLM=TRUE,MARS=TRUE,FDA=TRUE,RF=TRUE,SRE=TRUE, Proj.name, weight.method, decay=1.6, PCA.median=TRUE, binary=TRUE, bin.method='Roc', Test=FALSE, repetition.models=TRUE, final.model.out=FALSE, qual.th=0, compress="xz")
{
#Verify user input is correct
Th <- c('Roc', 'Kappa', 'TSS')
if(!any(Th == bin.method)) stop("\n bin.method should be one of 'Roc', 'Kappa' or 'TSS' \n")
if(!any(Th == weight.method)) stop("\n weight.method should be one of 'Roc', 'Kappa', or 'TSS' \n")
if(sum(weight.method==names(Biomod.material$evaluation.choice))!=1) stop("\n the weight.method selected was not run in Models \n")
if(is.null(Biomod.material[[paste("proj.", Proj.name, ".length", sep="")]])) stop("unknown Projection name \n")
Comp <- c(FALSE, 'gzip', 'xz')
if(!any(Comp == compress)) stop("\n compress should be one of FALSE, 'gzip' or 'xz' \n")
#store the models wanted and shut down the models that cannot run (not trained)
ens.choice <- p.choice <- Biomod.material[[paste("proj.", Proj.name, ".choice", sep="")]]
for(j in Biomod.material$algo) if(!eval(parse(text=j))) ens.choice[j] <- FALSE
#create storing list of outputs
list.out <- vector('list', Biomod.material$NbSpecies)
names(list.out) <- Biomod.material$species.names
#final consensus array using all repetitions into a single output (3D array: D1=projected records, D2=species,D3=ensemble forecasting methods)
ARRAY.tot <- ARRAY.tot.bin <- array(NA, c(Biomod.material[[paste("proj.", Proj.name, ".length", sep="")]], Biomod.material$NbSpecies, 6),
dimnames=list(1:Biomod.material[[paste("proj.", Proj.name, ".length", sep="")]], Biomod.material$species.names, c('prob.mean','prob.mean.weighted','median','Roc.mean','Kappa.mean','TSS.mean')))
#turn repetition models off if were not used for projections
if(Biomod.material[[paste("proj.", Proj.name, ".repetition.models", sep="")]]==FALSE){
repetition.models <- FALSE
cat("repetition models cannot be used for consensus : they have not been used to render projections \n")
}
#--------- start species loop ---------
for(i in 1:Biomod.material$NbSpecies){
cat(paste(Biomod.material$species.names[i], " \n"))
if(Test) nbrun <- 2 else nbrun <- 1
for(ii in 1:nbrun){
#load the projection data (data generated by the "Projection()" function)
if(ii==1){
load(paste(getwd(), "/proj.", Proj.name, "/Proj_", Proj.name, "_", Biomod.material$species.names[i], sep=""))
sp.data <- eval(parse(text=paste("Proj_", Proj.name, "_", Biomod.material$species.names[i], sep="")))
} else {
load(paste(getwd(), "/pred/Pred_", Biomod.material$species.names[i], sep=""))
sp.data <- eval(parse(text=paste("Pred_", Biomod.material$species.names[i], sep="")))
}
#create storing array (3D Array: 1D=projected records, 2D=Evaluation*PAs replicates, 3D=ensemble forecasting method)
NbPA <- Biomod.material$NbRun[i] / (Biomod.material$NbRunEval+1) #considering the number of PA runs that were actually done
nbrep <- 1
if(repetition.models) nbrep <- nbrep + Biomod.material$NbRunEval
ARRAY <- ARRAY.bin <- array(NA, c(dim(sp.data)[1], nbrep*NbPA, 6), dimnames=list(dimnames(sp.data)[1][[1]],rep("NA", nbrep*NbPA), c('prob.mean','prob.mean.weighted','median','Roc.mean','Kappa.mean','TSS.mean')))
#store the thresholds produced by the ensemble forecasts
ths <- vector('list', 6)
#create a list "out" that will be used for storing the information on weights awarded, evaluation results, pca.median model selected, etc.
out <- list()
out[["weights"]] <- matrix(NA, nr=nbrep*NbPA, nc=9, dimnames=list(rep("rep", nbrep*NbPA), Biomod.material$algo))
out[["PCA.median"]] <- matrix(NA, nr=nbrep*NbPA, nc=1, dimnames=list(rep("rep", nbrep*NbPA), "model.selected"))
out[["thresholds"]] <- matrix(NA, nr=6, nc=nbrep*NbPA, dimnames=list(c('prob.mean','prob.mean.weighted','median','Roc.mean','Kappa.mean','TSS.mean'), rep("rep", nbrep*NbPA)))
#--------- PAs and reps loops ---------
for(j in 1:NbPA){ #Loop through PAs replicates
for(k in 1:nbrep){ #Loop through Eval replicates
#writing the name to use for getting the right info in Evaluation.results lists
if(Biomod.material$NbRepPA==0) nam <- "full" else nam <- paste("PA", j, sep="")
if(k!=1) nam <- paste(nam, "_rep", k-1, sep="")
dimnames(ARRAY)[[2]][(j-1)*nbrep+k] <- nam #assign name to column of array
#Check if there was model fails in Models() -> turn this model off for that run
RUNens.choice <- ens.choice
for(a in Biomod.material$algo) if(ens.choice[a])
if(!file.exists(paste(getwd(), "/models/", Biomod.material$species.names[i], "_", a, "_", nam, sep=""))) RUNens.choice[a] <- FALSE
#adding species name after check of model fails for convenience
nam <- paste(Biomod.material$species.names[i], nam, sep="_")
#set models to false if under the quality threshold
if(Biomod.material$NbRunEval!=0) whichEval <- 1 else whichEval <- 3
for(a in Biomod.material$algo) if(RUNens.choice[a]) if(as.numeric(eval(parse(text=paste("Evaluation.results.", weight.method, sep='')))[[nam]][a,whichEval]) < qual.th) RUNens.choice[a] <- FALSE #Weights are based on the Cross-validated evaluation values.
#If all models are set to false, skip to next rep
if(sum(RUNens.choice)!=0){
if(sum(RUNens.choice)>1){ #if more than 1 model is wanted for ensemble forecating
#defining the data to use as a 2d matrix
cons.data <- sp.data[,RUNens.choice,k,j]
#-------- Mean and Median ensemble forecasting ---------#
ARRAY[, (j-1)*nbrep+k, 'prob.mean'] <- apply(cons.data, 1, mean)
ARRAY[, (j-1)*nbrep+k, 'median'] <- apply(cons.data, 1, median)
#-------- binary results means ensemble forecating ---------# #mean of the binary projections accross all selected techniques
for(jj in 1:3){ if(Biomod.material$evaluation.choice[Th[jj]]){
#create a vector to accumulate the binary prediction for each model successively
kdata <- rep(0, dim(sp.data)[1])
for(kk in Biomod.material$algo[RUNens.choice]) if(kk!='SRE') kdata <- kdata + BinaryTransformation(as.data.frame(cons.data[,kk]), as.numeric(eval(parse(text=paste("Evaluation.results.", Th[jj], sep="")))[[nam]][kk,4]))
if(RUNens.choice['SRE']) kdata <- kdata + cons.data[,'SRE']/1000 #because SRE already in binary
ARRAY[, (j-1)*nbrep+k, paste(Th[jj],'.mean',sep="")] <- kdata / sum(RUNens.choice) *1000
}}
#---------- Weighted Average Ensemble Forecasting --------#
#This is like a mean accross all selected methods but with a weight associated to each technique depending on its score during evaluation
#Recover the weights (depending on the chosen "weight.method" from the "Evaluation.results.weightMethod" object)
wk <- ens.choice
if(Biomod.material$NbRunEval!=0) whichEval <- 1 else whichEval <- 3
for(a in Biomod.material$algo) if(RUNens.choice[a]) wk[a] <- as.numeric(eval(parse(text=paste("Evaluation.results.", weight.method, sep='')))[[nam]][a,whichEval]) else wk[a] <- NA # Weights are based on the Cross-validated evaluation values.
if(weight.method=='Roc') wk['SRE'] <- NA
#deal with cases where scores only=0, or rep model failed
if(sum(wk!=0, na.rm=TRUE)==0) wk[wk==0] <- 0.1 # 0.1 = arbitrary value > 0 -> those models will be used and not set to NA by next line
wk[wk==0] <- NA
# Calculate and attribute Weights to each modelling techniques
if(decay=="proportional"){ # proportional: the weights are proportional to the chosen evaluation value
wk[is.na(wk)] <- 0
if(weight.method=='Roc') wk[wk!=0] <- (wk[wk!=0]-0.5)*2
W <- wk/sum(wk)
}
if(is.numeric(decay)){ # weights are "decay" times decreased for each subsequent model in model quality order.
if(sum(is.na(wk))<8){
z <-rep(1,sum(!is.na(wk)))
for(wj in 2:sum(!is.na(wk))) z[wj] <- z[wj-1]*decay
z <- c(rep(0,(length(wk)-sum(!is.na(wk)))), z/sum(z))
#determine which weight for which model
wk[is.na(wk)] <- 0
W <- rep(0,9)
for(m in 1:9) {
if(sum(wk[m]==wk)!=1){ #if 2 or more score are identical -> make a mean weight between the ones concerned
if(!is.na(wk[m])){
for(nbm in 1:sum(wk[m]==wk)) W[m] <- W[m] + z[sum(wk[m]>wk)+nbm]
W[m] <- W[m] / sum(wk[m]==wk)
}
} else W[m] <- z[sum(wk[m]>wk)+1]
}
} else if(sum(is.na(wk))==8) { wk <- is.na(wk) ; wk[TRUE] <- 1 }
}
#applying weights to projections
ARRAY[, (j-1)*nbrep+k, 'prob.mean.weighted'] <- apply((cons.data*rep(W[RUNens.choice], each=dim(sp.data)[1])), 1, sum)
#calculating the weighted threshold to convert the weighted probabilities to binary and/or filtered values
thmi <- thpondi <- c()
for(a in Biomod.material$algo[RUNens.choice]) {
thmi <- c(thmi, eval(parse(text=paste("Evaluation.results.", bin.method, sep="")))[[nam]][a,4])
thpondi <- c(thpondi, eval(parse(text=paste("Evaluation.results.", weight.method, sep="")))[[nam]][a,4])
}
ths[[1]] <- c(ths[[1]], mean(as.numeric(thmi), na.rm=TRUE))
thpondi[is.na(thpondi)] <- 0
ths[[2]] <- c(ths[[2]], sum(as.numeric(thpondi)*W[RUNens.choice]))
ths[[3]] <- c(ths[[3]], median(as.numeric(thmi), na.rm=TRUE))
#-----------------------end weights------------------------#
#determine the model selected by the PCA consensus approach
if(PCA.median){
if(sum(search()=="package:ade4")==0) library(ade4)
cons <- dudi.pca(cons.data, scale=TRUE, scannf = FALSE, nf=2)
pca.select <- colnames(cons.data)[which.min(abs(cons$co[,2]))]
#x11() #plotting the pca
#s.corcircle(cons$co, lab = colnames(sp.data[,RUNens.choice]), full = FALSE, box = FALSE, sub=Biomod.material$species.names[i])
}
#store the information for each run
out[["thresholds"]][,(j-1)*nbrep+k] <- c(mean(as.numeric(thmi), na.rm=TRUE), sum(as.numeric(thpondi)*W[RUNens.choice]) ,median(as.numeric(thmi), na.rm=TRUE),500,500,500)
out[["weights"]][(j-1)*nbrep+k, ] <- round(W,digits=4)
if(PCA.median) out[["PCA.median"]][(j-1)*nbrep+k, ] <- pca.select
} else { #only one model is available
if(Biomod.material$algo[RUNens.choice] != 'SRE'){
ARRAY[, (j-1)*nbrep+k, 1:3] <- sp.data[,RUNens.choice,k,j]
#binary values
for(jj in 1:3){ if(Biomod.material$evaluation.choice[Th[jj]]){
thresh <- as.numeric(eval(parse(text=paste("Evaluation.results.", Th[jj], sep="")))[[nam]][Biomod.material$algo[RUNens.choice],4])
# cat("\n*** tresh =", thresh)
# cat("\n*** dim(data.frame()) =",dim(as.data.frame(sp.data[,RUNens.choice,k,j])))
ARRAY[, (j-1)*nbrep+k, paste(Th[jj],'.mean',sep="")] <- BinaryTransformation(as.data.frame(sp.data[,RUNens.choice,k,j]), thresh ) * 1000
#store thresholds
if(Th[[jj]] == weight.method){
ths[[1]] <- c(ths[[1]], thresh)
ths[[2]] <- c(ths[[2]], thresh)
}
}}
} else ARRAY[, (j-1)*nbrep+k, 1:6] <- sp.data[,RUNens.choice,k,j]
out[["thresholds"]][,(j-1)*nbrep+k] <- c(ths[[1]][(j-1)*nbrep+k], ths[[1]][(j-1)*nbrep+k],ths[[1]][(j-1)*nbrep+k],500,500,500)
out[["weights"]][(j-1)*nbrep+k, ] <- rep(0,9) ; out[["weights"]][(j-1)*nbrep+k, RUNens.choice] <- 1
}
} else{ #if RUNens.choice==0 -> count it for no bin conversion (else bin conv = fail)
ths[[1]] <- c(ths[[1]], NA) ; ths[[2]] <- c(ths[[2]], NA); ths[[3]] <- c(ths[[3]], NA) }
} #Evaluation replicates k loop
} #PAs replicates j loop
ths[[4]] <- ths[[5]] <- ths[[6]] <- rep(500, nbrep*NbPA)
rownames(out[["weights"]]) <- colnames(out[["thresholds"]]) <- rownames(out[["PCA.median"]]) <-dimnames(ARRAY)[[2]]
list.out[[i]] <- out
EnsRun <- !is.na(ths[[1]])
assign("ARRAY", ARRAY, pos=1)
assign("ths", ths, pos=1)
#----------------------If normal ensFor run or Test run-------------------------#
if(ii==1){ #normal ensFor run --> saving on disk, transform in binary, total consensus
#save results on hard disk per species
# assign(paste("consensus_", Biomod.material$species.names[i], "_", Proj.name, sep=""), ARRAY)
eval(parse(text=paste("consensus_", Biomod.material$species.names[i], "_", Proj.name, " <- ARRAY", sep="")))
if(compress=="xz")
eval(parse(text=paste("save(consensus_", Biomod.material$species.names[i], "_", Proj.name, ", file='", getwd(),"/proj.", Proj.name, "/consensus_", Biomod.material$species.names[i], "_", Proj.name,"', compress='xz')", sep="")))
if(compress=="gzip")
eval(parse(text=paste("save(consensus_", Biomod.material$species.names[i], "_", Proj.name, ", file='", getwd(),"/proj.", Proj.name, "/consensus_", Biomod.material$species.names[i], "_", Proj.name,"', compress='gzip')", sep="")))
if(compress==FALSE)
eval(parse(text=paste("save(consensus_", Biomod.material$species.names[i], "_", Proj.name, ", file='", getwd(),"/proj.", Proj.name, "/consensus_", Biomod.material$species.names[i], "_", Proj.name,"')", sep="")))
eval(parse(text=paste("rm(consensus_", Biomod.material$species.names[i], "_", Proj.name,")", sep="")))
# if binary=TRUE, then we transform the ensemble forecasting values into binary ones
if(binary){
for(j in c(1,2,4,5,6)){ #no conversuion for 3 -> median = no associated threshold
if(j<4){ARRAY.bin[,EnsRun,j] <- BinaryTransformation(as.data.frame(ARRAY[,EnsRun,j]), as.numeric(unlist(ths[[j]][EnsRun])))
} else if(Biomod.material$evaluation.choice[Th[j-3]]) ARRAY.bin[,EnsRun,j] <- BinaryTransformation(as.data.frame(ARRAY[,EnsRun,j]), as.numeric(unlist(ths[[j]][EnsRun]))) #to check if method was chosen
}
# assign(paste("consensus_", Biomod.material$species.names[i], "_", Proj.name, "_Bin", sep=""), ARRAY.bin)
eval(parse(text=paste("consensus_", Biomod.material$species.names[i], "_", Proj.name, "_Bin <- ARRAY.bin", sep="")))
if(compress=="xz")
eval(parse(text=paste("save(consensus_", Biomod.material$species.names[i], "_", Proj.name, "_Bin, file='", getwd(),"/proj.", Proj.name, "/consensus_", Biomod.material$species.names[i], "_", Proj.name,"_Bin', compress='xz')", sep="")))
if(compress=="gzip")
eval(parse(text=paste("save(consensus_", Biomod.material$species.names[i], "_", Proj.name, "_Bin, file='", getwd(),"/proj.", Proj.name, "/consensus_", Biomod.material$species.names[i], "_", Proj.name,"_Bin', compress='gzip')", sep="")))
if(compress==FALSE)
eval(parse(text=paste("save(consensus_", Biomod.material$species.names[i], "_", Proj.name, "_Bin, file='", getwd(),"/proj.", Proj.name, "/consensus_", Biomod.material$species.names[i], "_", Proj.name,"_Bin')", sep="")))
eval(parse(text=paste("rm(consensus_", Biomod.material$species.names[i], "_", Proj.name, "_Bin)", sep="")))
}
#Final consensus on all data available (average across all Evaluation Replicates and All PAs replicates)
if(dim(ARRAY)[2] != 1){
#define which models to take into account i.e. if final.model.out=FALSE
TotalModels <- rep(TRUE, dim(ARRAY)[2])
if(final.model.out){
if(nbrep!=1){ #if ==1 -> no repetitions are wanted, full models (ony ones available) are left as True
FinMod <- c()
for(dbc in 1:NbPA) FinMod <- c(FinMod, ((dbc-1)*nbrep)+1)
TotalModels[FinMod] <- FALSE
}
}
TotalModels[!EnsRun] <- FALSE
#calculate total consensus for first 3 methods
ARRAY.tot[, i, 'prob.mean'] <- apply(ARRAY[,TotalModels,1], 1, mean)
ARRAY.tot[, i, 'prob.mean.weighted'] <- apply(ARRAY[,TotalModels,2], 1, mean)
ARRAY.tot[, i, 'median'] <- apply(ARRAY[,TotalModels,3], 1, median)
#convert those first 3 methods final consensus in binary
ARRAY.tot.bin[, i, 'prob.mean'] <- BinaryTransformation(as.data.frame(ARRAY.tot[, i, 'prob.mean']), as.numeric(mean(ths[[1]][TotalModels])))
ARRAY.tot.bin[, i, 'prob.mean.weighted'] <- BinaryTransformation(as.data.frame(ARRAY.tot[, i, 'prob.mean.weighted']), as.numeric(mean(ths[[2]][TotalModels])))
ARRAY.tot.bin[, i, 'median'] <- BinaryTransformation(as.data.frame(ARRAY.tot[, i, 'median']), as.numeric(median(ths[[3]][TotalModels])))
if(Biomod.material$evaluation.choice[["Roc"]]){
ARRAY.tot[, i, 'Roc.mean'] <- apply(ARRAY[,TotalModels,4], 1, mean)
ARRAY.tot.bin[, i, 'Roc.mean'] <- BinaryTransformation(as.data.frame(ARRAY.tot[, i, 'Roc.mean']), 500)
}
if(Biomod.material$evaluation.choice[["Kappa"]]){
ARRAY.tot[, i, 'Kappa.mean'] <- apply(ARRAY[,TotalModels,5], 1, mean)
ARRAY.tot.bin[, i, 'Kappa.mean'] <- BinaryTransformation(as.data.frame(ARRAY.tot[, i, 'Roc.mean']), 500)
}
if(Biomod.material$evaluation.choice[["TSS"]]){
ARRAY.tot[, i, 'TSS.mean'] <- apply(ARRAY[,TotalModels,6], 1, mean)
ARRAY.tot.bin[, i, 'TSS.mean'] <- BinaryTransformation(as.data.frame(ARRAY.tot[, i, 'TSS.mean']), 500)
}
} else { #if only one run was done there is no further calculation possible
ARRAY.tot[,i,] <- ARRAY[,1,]
for(j in c(1,2,4,5,6)){
if(!is.na(ARRAY[1,1,j]))
ARRAY.tot.bin[,i,j] <- BinaryTransformation(as.data.frame(ARRAY.tot[,i,j]), as.numeric(ths[[j]]))
}
}
}#if ii==1
if(ii==2){ # Test run : consensus methods done on current data
#test the methods with AUC
test <- matrix(nc=nbrep*NbPA, nr=6, dimnames=list(c('prob.mean','prob.mean.weighted','median','Roc.mean','Kappa.mean','TSS.mean'),dimnames(ARRAY)[[2]]))
for(j in 1:NbPA){
if(Biomod.material$NbRepPA != 0) lin <- Biomod.PA.sample[[Biomod.material$species.names[i]]][[j]]
else lin <- 1:dim(ARRAY)[1]
for(k in 1:nbrep){ # Loop through Model Evaluation replicates
for(m in 1:6){ # Loop through ensemble forecasting methods
if(m<4) {test[m,(j-1)*nbrep+k] <- .somers2(ARRAY[,(j-1)*nbrep+k,m], DataBIOMOD[lin, Biomod.material$NbVar+i])["C"] #to check if method was chosen
} else if(Biomod.material$evaluation.choice[Th[m-3]]) test[m,(j-1)*nbrep+k] <- .somers2(ARRAY[,(j-1)*nbrep+k,m], DataBIOMOD[lin, Biomod.material$NbVar+i])["C"]
}
}
}
list.out[[i]][["test.results"]] <- test
}
} #end of ii loop (test run (ensemble forecast calculated on calibration data) or normal ensFor run)
} #end of species i loop
#assign objects to the name they should have and store them on disk
#assign(paste("Total_consensus_", Proj.name, sep=""), ARRAY.tot)
eval(parse(text=paste("Total_consensus_", Proj.name,"<- ARRAY.tot", sep="")))
#assign(paste("Total_consensus_", Proj.name, "_Bin", sep=""), ARRAY.tot.bin)
eval(parse(text=paste("Total_consensus_", Proj.name, "_Bin <- ARRAY.tot.bin", sep="")))
#assign(paste("consensus_", Proj.name,"_results", sep=""), list.out, pos=1)
eval(parse(text=paste("consensus_", Proj.name,"_results <- list.out", sep="")))
if(compress=="xz"){
eval(parse(text=paste("save(Total_consensus_", Proj.name, ", file='", getwd(),"/proj.", Proj.name, "/Total_consensus_", Proj.name,"', compress='xz')", sep="")))
eval(parse(text=paste("save(Total_consensus_", Proj.name, "_Bin , file='", getwd(),"/proj.", Proj.name, "/Total_consensus_", Proj.name, "_Bin', compress='xz')", sep="")))
eval(parse(text=paste("save(consensus_", Proj.name,"_results, file='", getwd(),"/proj.", Proj.name, "/consensus_", Proj.name,"_results', compress='xz')", sep="")))
}
if(compress=="gzip"){
eval(parse(text=paste("save(Total_consensus_", Proj.name, ", file='", getwd(),"/proj.", Proj.name, "/Total_consensus_", Proj.name,"', compress='gzip')", sep="")))
eval(parse(text=paste("save(Total_consensus_", Proj.name, "_Bin , file='", getwd(),"/proj.", Proj.name, "/Total_consensus_", Proj.name, "_Bin', compress='gzip')", sep="")))
eval(parse(text=paste("save(consensus_", Proj.name,"_results, file='", getwd(),"/proj.", Proj.name, "/consensus_", Proj.name,"_results', compress='gzip')", sep="")))
}
if(compress==FALSE){
eval(parse(text=paste("save(Total_consensus_", Proj.name, ", file='", getwd(),"/proj.", Proj.name, "/Total_consensus_", Proj.name,"')", sep="")))
eval(parse(text=paste("save(Total_consensus_", Proj.name, "_Bin , file='", getwd(),"/proj.", Proj.name, "/Total_consensus_", Proj.name, "_Bin')", sep="")))
eval(parse(text=paste("save(consensus_", Proj.name,"_results, file='", getwd(),"/proj.", Proj.name, "/consensus_", Proj.name,"_results')", sep="")))
}
rm(ARRAY, ths, pos=1)
cat(paste("\n consensus_", Proj.name,"_results \n", sep=""))
return(list.out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.