Nothing
#### Ecosystem Functional Types: Clustering
#'
#' @author Xavier Rotllan-Puig
#' @title EFT_clust
#' @description EFT_clust derives the Ecosystem Functional Types using K-means to perform a clustering on the
#' pixels of the SpatRaster object
#' @details \code{\link[stats]{kmeans}} does not optimize the final number of clusters. It needs to be set by means of 'n_clust'
#' (default = 20). There are several methods and statistics to determine the optimal number. \code{\link{clust_optim}}
#' produces a scree plot to help the user to decide the optimal number of clusters.
#'
#' EFT_clust passes as default to \code{\link[stats]{kmeans}} iter.max = 500 and algorithm = "MacQueen", but these can be
#' modified passing these arguments through '...'
#'
#' Please note that the variables are standardised (mean = 0; sd = 1) before running the clustering
#'
#' An evaluation of the clustering is provided together with the SpatRaster object.
#' It is calculated as model$betweenss / model$totss * 100;
#' where 'betweenss' and 'totss' are generated by \code{\link[stats]{kmeans}}
#'
#' @rawNamespace import(data.table, except = shift)
#' @importFrom stats complete.cases kmeans na.omit
#' @importFrom dplyr bind_rows
#' @rawNamespace import(terra, except = na.omit)
#' @param obj2clust SpatRaster object (or its file name). Each layer is one variable
#' @param n_clust Numeric. Number of total clusters. Optional. Default = 20
#' @param standardise_vars Logical. Optional. If TRUE (default), variables are standardised (mean = 0; sd = 1)
#' @param filename Character. Output filename. Optional
#' @param ... Arguments for \code{\link[stats]{kmeans}}. Optional
#' @return A list with two components: (1) a SpatRaster object with the clusters and (2) a vector with the clustering evaluation in percentage
#' @seealso \code{\link{PCAs4clust}}; \code{\link{clust_optim}}; \code{\link[stats]{kmeans}}
#' @name EFT_clust
#' @export
#' @examples
#' \donttest{
#' dirctry <- paste0(system.file(package='LPDynR'), "/extdata")
#' variables_noCor <- rm_multicol(dir2process = dirctry,
#' multicol_cutoff = 0.7)
#' EFT_clust(obj2clust = variables_noCor,
#' n_clust = 10)
#'}
EFT_clust <- function(obj2clust = NULL,
n_clust = 20,
standardise_vars = TRUE,
filename = "",
...){
## Reading in data
if(is.null(obj2clust)) stop("Please provide objects of classe SpatRaster (or file names to read in some)")
if(is.character(obj2clust)){
obj2clust <- rast(obj2clust)
}else if(!class(obj2clust) %in% c("SpatRaster")){
stop("Please provide objects of classe SpatRaster (or a file name to read in from)")
}
if(!is.numeric(n_clust) | is.na(n_clust) | is.null(n_clust))
stop("Please provide a number of clusters")
obj2clust_ini <- as.data.frame(obj2clust)
obj2clust_ini$rn <- 1:nrow(obj2clust_ini)
setDT(obj2clust_ini)
clstr <- NULL
obj2clust_ini_NA <- obj2clust_ini[!complete.cases(obj2clust_ini), ] #to be used at the end to fill the raster
obj2clust_ini_NA[, clstr := NA]
obj2clust_ini_NA[, clstr := as.integer(clstr)]
obj2clust_ini_NA <- obj2clust_ini_NA[, .SD, .SDcols = c("clstr", "rn")]
obj2clust_ini <- stats::na.omit(obj2clust_ini)
## Standardising
if(standardise_vars == TRUE){
cols2scale <- colnames(obj2clust_ini)
cols2scale <- cols2scale[!cols2scale %in% c("rn")]
cols2keep <- paste0(cols2scale, "_scld")
obj2clust_ini[, (cols2keep) := lapply(.SD, function(x) as.vector(scale(x))), .SDcols = cols2scale]
obj2clust_ini <- obj2clust_ini[, .SD, .SDcols = c(cols2keep, "rn")]
}
## Clustering using optimal number of clusters
dts <- list(...)
if(is.null(dts$nstart)) dts$nstart <- 1 # kmeans makes 'nstart' initial configurations and reports the best one
if(is.null(dts$iter.max)) dts$iter.max <- 500
if(is.null(dts$algorithm)) dts$algorithm <- "MacQueen"
# Hartigan-Wong algorithm generally does a better job than either of those, but trying several random
# starts (‘nstart’> 1) is often recommended. In rare cases, when some of the points (rows of ‘x’) are
# extremely close, the algorithm may not converge in the “Quick-Transfer” stage, signalling a warning
#(and returning ‘ifault = 4’). Slight rounding of the data may be advisable in that case. (from ?kmeans)
kmeans_clustring <- kmeans(obj2clust_ini[, - c("rn")],
centers = n_clust,
nstart = dts$nstart,
iter.max = dts$iter.max,
algorithm = dts$algorithm
)
clust_eval <- kmeans_clustring$betweenss / kmeans_clustring$totss * 100
## Binding NA data and adding back spatial information ####
obj2clust_ini[, clstr := kmeans_clustring$cluster]
obj2clust_ini <- obj2clust_ini[, .SD, .SDcols = c("clstr", "rn")]
all_data <- bind_rows(obj2clust_ini, obj2clust_ini_NA)
#rm(obj2clust_ini, obj2clust_ini_NA)
#gc()
setorder(all_data, rn)
#obj2clust1 <- obj2clust[[1]]
EFTs_raster <- suppressWarnings(rast(nrows = nrow(obj2clust), ncols = ncol(obj2clust),
crs = crs(obj2clust),
ext = ext(obj2clust),
#resolution,
vals = all_data$clstr))
names(EFTs_raster) <- "clusterNum"
## Saving results
if (filename != "") writeRaster(EFTs_raster, filename = filename, overwrite = TRUE)
return(list(EFTs_raster, clust_eval))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.