#' performs SFS to identify combination of input variables maximizing a criterion
#'
#' @param input_raster SpatRaster or list of SpatRaster
#' @param obs_vect SpatVector or SpatVectorCollection
#' @param obs2optimize numeric .list of ground obs diversity metrics
#' corresponding to obs_vect.
#' Expected values: richness, shannon, simpson, hill, BC
#' @param obs_criterion character. richness, shannon, simpson or BC
#' @param corrMethod character. select between method available for cor.test
#' @param input_mask SpatRaster corresponding to mask
#' @param Hill_order numeric.
#' @param nbclusters numeric.
#' @param MinSun numeric.
#' @param nbPix numeric.
#' @param nbIter numeric.
#' @param pcelim numeric.
#' @param verbose boolean.
#' @param nbWorkers numeric.
#' @param nbCPU numeric.
#'
#' @return list including performances (correlation) of SFS with additional
#' features and assessed diversity metrics corresponding to each step
#' @importFrom doFuture registerDoFuture
#' @importFrom future plan multisession sequential
#' @importFrom foreach foreach %dopar%
#' @importFrom dplyr group_split
#' @importFrom vegan mantel
#' @importFrom progress progress_bar
#' @importFrom stats cor.test
#' @importFrom parallel makeCluster stopCluster
#'
#' @export
biodivMapR_SFS <- function(input_raster, obs_vect, obs2optimize,
obs_criterion = 'shannon', corrMethod = 'spearman',
input_mask = NULL,
Hill_order = 1, nbclusters = 50, MinSun = 0.25,
nbPix = 1e5, nbIter = 20, pcelim = 0.02, verbose = T,
nbWorkers = 1, nbCPU = 1){
FullListIndices <- c("richness", "shannon", "simpson", "hill", "BC", "FRic", "FEve", "FDiv")
#### Which diversity metrics should be computed?
alphamet <- c('richness', 'shannon', 'simpson', 'hill')
betamet <- 'BC'
fmet <- c('FRic', 'FEve', 'FDiv')
# if computation of functional metrics required
alphametrics <- alphamet[which(alphamet %in% names(obs2optimize))]
if (length(alphametrics)==0) alphametrics <- NULL
# computation of beta diversity required?
betametrics <- betamet[which(betamet %in% names(obs2optimize))]
if (length(betametrics)==0) getBeta <- F
if (length(betametrics)>0) getBeta <- T
functionalmetrics <- fmet[which(fmet %in% names(obs2optimize))]
if (length(functionalmetrics)==0) Functional <- NULL
if (length(functionalmetrics)>0) Functional <- functionalmetrics
# 1- prepare for kmeans over the full spatial extent
if (verbose ==T) message('sampling pixels to compute spectral species')
Pix_Per_Iter <- define_pixels_per_iter(input_rast = input_raster,
input_mask = input_mask,
nbPix = nbPix,
nbIter = nbIter)
extent_area <- get_raster_extent(input_raster[[1]])
nbSamples <- Pix_Per_Iter*nbIter
rast_sample <- sample_from_raster(extent_area = extent_area,
nbSamples = nbSamples,
input_rast = input_raster,
input_mask = input_mask)
if (verbose ==T) message('sampling done')
# 2- extract information from field plots
if (inherits(obs_vect, what = 'SpatVectorCollection')){
# extract from list of SpatVectors in collection
rastext <- lapply(X = obs_vect,
FUN = extract_vect_from_rast,
input_rast = input_raster,
input_mask = input_mask,
MinSun = MinSun, prog = F)
# update plot ID in collection
nbPlots_total <- 0
for (ind_vect in seq_len(length(obs_vect))){
AttributeTable <- rastext[[ind_vect]]$AttributeTable
rast_sample_vect <- rastext[[ind_vect]]$rast_sample_vect
AttributeTable$ID_biodivMapR <- AttributeTable$ID_biodivMapR + nbPlots_total
rast_sample_vect$ID <- rast_sample_vect$ID + nbPlots_total
nbPlots_total <- max(AttributeTable$ID_biodivMapR)
rastext[[ind_vect]]$AttributeTable <- AttributeTable
rastext[[ind_vect]]$rast_sample_vect <- rast_sample_vect
}
rast_sample_vect <- lapply(rastext,'[[','rast_sample_vect')
AttributeTable <- lapply(rastext,'[[','AttributeTable')
rast_val <- do.call(rbind,rast_sample_vect)
Attributes <- do.call(rbind,AttributeTable)
} else if (inherits(obs_vect, what = 'SpatVector')){
# extract from list of SpatVectors in collection
rastext <- extract_vect_from_rast(SpatVector = obs_vect,
input_rast = input_raster,
input_mask = input_mask,
MinSun = MinSun)
# update plot ID in collection
# rast_sample_vect <- lapply(rastext,'[[','rast_sample_vect')
# AttributeTable <- lapply(rastext,'[[','AttributeTable')
# rast_sample_vect <- rastext$rast_sample_vect
# AttributeTable <- rastext$AttributeTable
rast_val <- rastext$rast_sample_vect
Attributes <- rastext$AttributeTable
nbPlots_total <- length(Attributes$id)
}
if (verbose ==T) message('plot extraction done')
IDplot <- rast_val$ID
rast_val$ID <- NULL
# 3- perform SFS
if (verbose ==T) message('perform SFS')
# component visual pre-selection
# AllVars <- seq_len(ncol(rast_sample))
AllVars <- names(rast_sample)
NbPCs_To_Keep <- length(AllVars)
# multi-thread feature selection (SFS)
registerDoFuture()
# plan(multisession, workers = nbWorkers)
cl <- parallel::makeCluster(nbWorkers)
plan("cluster", workers = cl) ## same as plan(multisession, workers = nbCPU)
Corr_criterion <- EvolCorr <- CorrSFS <- AssessSFS <- list()
SelectedVars <- EvolCorr$richness <- EvolCorr$shannon <- EvolCorr$simpson <-
EvolCorr$hill <- EvolCorr$FRic <- EvolCorr$FEve <- EvolCorr$FDiv <-
EvolCorr$BC <- c()
pb <- progress::progress_bar$new(
format = "Perform feature selection [:bar] :percent in :elapsedfull",
total = NbPCs_To_Keep, clear = FALSE, width= 100)
for (nbvars2select in seq_len(NbPCs_To_Keep)){
NumVar_list <- as.list(seq_len(length(AllVars)))
numvar <- win_ID <- NULL
subfeatures_SFS <- function() {
foreach(numvar = NumVar_list) %dopar% {
CorrVal <- Assess <- list()
SelFeat_tmp <- c(SelectedVars,AllVars[[numvar]])
# select features to compute kmeans
Kmeans_info <- get_kmeans(rast_sample = rast_sample[SelFeat_tmp],
nbIter = nbIter,
nbclusters = nbclusters,
nbCPU = 1, progressbar = F)
if (!is.null(Functional)){
# center reduce data
inputdata_cr <- center_reduce(X = rast_val[SelFeat_tmp],
m = Kmeans_info$MinVal,
sig = Kmeans_info$Range)
inputdata_cr$ID <- IDplot
inputdata_cr <- inputdata_cr %>% split(.$ID)
inputdata_cr <- lapply(inputdata_cr, function(x) data.frame(x[ , !(names(x) %in% "ID")]))
FunctDiv <- lapply(X = inputdata_cr,
FUN = get_functional_diversity,
FDmetric = Functional)
FunctDiv <- data.frame('FRic' = unlist(lapply(FunctDiv, '[[',1)),
'FEve' = unlist(lapply(FunctDiv, '[[',2)),
'FDiv' = unlist(lapply(FunctDiv, '[[',3)))
for (idx in Functional){
Assess[[idx]] <- FunctDiv[[idx]]
CorrVal[[idx]] <- cor.test(obs2optimize[[idx]],
Assess[[idx]],
method = corrMethod)$estimate
}
}
# if (obs_criterion %in% c('richness', 'shannon', 'simpson', 'hill', 'BC')){
SSValid <- get_spectralSpecies(inputdata = rast_val[SelFeat_tmp],
Kmeans_info = Kmeans_info)
SSValid$win_ID <- IDplot
windows_per_plot <- split_chunk(SSchunk = SSValid, nbCPU = 1)
windows_per_plot$win_ID <- list(SSValid$win_ID)
alphabetaIdx_CPU <- lapply(X = windows_per_plot$SSwindow_perCPU,
FUN = alphabeta_window_list,
nbclusters = nbclusters,
alphametrics = c('richness', 'shannon', 'simpson', 'hill'),
Hill_order = Hill_order, pcelim = pcelim)
alphabetaIdx <- unlist(alphabetaIdx_CPU,recursive = F)
rm(alphabetaIdx_CPU)
gc()
# 7- reshape alpha diversity metrics
IDwindow <- unlist(windows_per_plot$IDwindow_perCPU)
selcrit <- list('richness'= 1,
'shannon' = 3,
'simpson' = 5,
'hill' = 9)
for (crit in names(selcrit)){
if (!is.null(obs2optimize[[crit]])){
Assess[[crit]] <- rep(x = NA,nbPlots_total)
Assess[[crit]][IDwindow] <- unlist(lapply(alphabetaIdx,'[[',
selcrit[[crit]]))
CorrVal[[crit]] <- cor.test(obs2optimize[[crit]],
Assess[[crit]],
method = corrMethod)$estimate
} else {
CorrVal[[crit]] <- Assess[[crit]] <- NA
}
}
if (!is.null(obs2optimize$BC)){
# compute BC matrix from spectral species
SSValid_win <- SSValid %>% group_split(win_ID, .keep = F)
# spectral species distribution
SSdist <- list()
for (iter in names(SSValid_win[[1]])) SSdist[[iter]] <- lapply(SSValid_win, '[[',iter)
# compute spectral species distribution for each cluster & BC dissimilarity
SSD_BCval <- lapply(SSdist,
FUN = get_BCdiss_from_SSD,
nbclusters = nbclusters,
pcelim = pcelim)
MatBC_iter <- lapply(SSD_BCval, '[[','MatBC')
SSD <- lapply(SSD_BCval, '[[','SSD')
MatBC <- Reduce('+', MatBC_iter)/nbIter
MatBC_Full <- matrix(data = NA, nrow = nbPlots_total, ncol = nbPlots_total)
MatBC_Full[IDwindow,IDwindow] <- MatBC
colnames(MatBC_Full) <- rownames(MatBC_Full) <- Attributes$ID_biodivMapR
# Corr_val <- cor.test(c(obs2optimize[['BC']]),c(MatBC_Full),
# method = 'pearson')$estimate
Assess[['BC']] <- MatBC_Full
mantelVal <- vegan::mantel(xdis = as.dist(MatBC_Full),
ydis = as.dist(obs2optimize[['BC']]),
na.rm = T, method = corrMethod)
CorrVal[['BC']] <- mantelVal$statistic
# CorrVal[['BC']] <- cor.test(obs2optimize[['BC']],
# c(Assess[['BC']]), method = 'pearson')$estimate
} else {
Assess[['BC']] <- CorrVal[['BC']] <- NA
}
# }
return(list('crit2Opt' = CorrVal,
'AssessedVal' = Assess))
}
}
subSFS <- subfeatures_SFS()
pb$tick()
CorrSFS[[nbvars2select]] <- list()
for (ind in FullListIndices) {
CorrSFS[[nbvars2select]][[ind]] <- unlist(lapply(lapply(subSFS,'[[','crit2Opt'),'[[',ind))
}
CorrSFS[[nbvars2select]] <- data.frame(CorrSFS[[nbvars2select]])
rownames(CorrSFS[[nbvars2select]]) <- AllVars
criterion <- CorrSFS[[nbvars2select]][[obs_criterion]]
SelVar <- which(criterion == max(criterion,na.rm = T))
AssessSFS[[nbvars2select]] <- list()
for (ind in FullListIndices) {
AssessSFS[[nbvars2select]][[ind]] <- unlist((lapply(lapply(subSFS,'[[','AssessedVal'),'[[',ind))[SelVar[1]])
}
# which criterion to maximize with SFS?
WhichVar <- rownames(CorrSFS[[nbvars2select]])[SelVar[1]]
# add selected component to selected vars
SelectedVars <- c(SelectedVars,WhichVar)
# delete selected component from AllVars
AllVars <- AllVars[-which(AllVars==WhichVar)]
EvolCorr$richness <- c(EvolCorr$richness,CorrSFS[[nbvars2select]]$richness[SelVar[1]])
EvolCorr$shannon <- c(EvolCorr$shannon,CorrSFS[[nbvars2select]]$shannon[SelVar[1]])
EvolCorr$simpson <- c(EvolCorr$simpson,CorrSFS[[nbvars2select]]$simpson[SelVar[1]])
EvolCorr$hill <- c(EvolCorr$hill,CorrSFS[[nbvars2select]]$hill[SelVar[1]])
EvolCorr$BC <- c(EvolCorr$BC,CorrSFS[[nbvars2select]]$BC[SelVar[1]])
EvolCorr$FRic <- c(EvolCorr$FRic, CorrSFS[[nbvars2select]]$FRic[SelVar[1]])
EvolCorr$FEve <- c(EvolCorr$FEve, CorrSFS[[nbvars2select]]$FEve[SelVar[1]])
EvolCorr$FDiv <- c(EvolCorr$FDiv, CorrSFS[[nbvars2select]]$FDiv[SelVar[1]])
}
parallel::stopCluster(cl)
plan(sequential)
EvolCorr <- data.frame(EvolCorr)
rownames(EvolCorr) <- SelectedVars
AssessSFS_comp <- list()
for (ind in FullListIndices) {
if (!is.null(AssessSFS[[1]][[ind]])) {
AssessSFS_comp[[ind]] <- data.frame(lapply(AssessSFS,'[[',ind))
names(AssessSFS_comp[[ind]]) <- SelectedVars
AssessSFS_comp[[ind]]$obs <- c(obs2optimize[[ind]])
} else {
AssessSFS_comp[[ind]] <- NA
}
}
return(list('SFS_perf' = EvolCorr,
'AssessDiv' = AssessSFS_comp))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.