Nothing
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
##### Fonctions de visualisation #####
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' @title Mapping the clusters
#'
#' @description Build some maps to visualize the results of the clustering
#'
#' @param geodata An object of class features collection from sf /
#' ordered like the original data used for the clustering. Can be Null if object is
#' a FCMres and has been created with rasters.
#' @param object A FCMres object, typically obtained from functions CMeans,
#' GCMeans, SFCMeans, SGFCMeans. Can also be a simple membership matrix.
#' @param undecided A float between 0 and 1 giving the minimum value that an
#' observation must get in the membership matrix to not be considered as
#' uncertain (default = NULL)
#' @return A named list with :
#' \itemize{
#' \item ProbaMaps : a list of tmap maps showing for each group the
#' probability of the observations to belong to that group
#' \item ClusterMap : a tmap map showing the most likely group for
#' observation
#' }
#' @export
#' @examples
#' \dontrun{
#' data(LyonIris)
#' AnalysisFields <-c("Lden","NO2","PM25","VegHautPrt","Pct0_14","Pct_65","Pct_Img",
#' "TxChom1564","Pct_brevet","NivVieMed")
#' dataset <- sf::st_drop_geometry(LyonIris[AnalysisFields])
#' queen <- spdep::poly2nb(LyonIris,queen=TRUE)
#' Wqueen <- spdep::nb2listw(queen,style="W")
#' result <- SFCMeans(dataset, Wqueen,k = 5, m = 1.5, alpha = 1.5, standardize = TRUE)
#' MyMaps <- mapClusters(LyonIris, result$Belongings)
#' }
mapClusters <- function(geodata = NULL, object, undecided = NULL) {# nocov start
#cls <- class(object)[[1]]
isRaster <- FALSE
if(inherits(object,"FCMres")){
belongmatrix <- object$Belongings
isRaster <- object$isRaster
}else if(inherits(object,"matrix")){
belongmatrix <- object
}else{
stop("object must be one of matrix of FCMres")
}
if(isRaster){
return(mapRasters(object, undecided))
}else{
geodata$OID <- as.character(1:nrow(geodata))
sf_type <- sf::st_geometry_type(geodata,by_geometry = FALSE)
if(sf_type %in% c("POLYGON","MULTIPOLYGON")){
geom_type <- "polygon"
}else if(sf_type %in% c("POINT","MULTIPOINT")){
geom_type <- "point"
}else if(sf_type %in% c("LINESTRING","MULTILINESTRING")){
geom_type <- "line"
}else{
stop("The object passed in geodata argument is not supported.
Supported classes are feature collections from sf with geometry type in
POLYGON, MULTIPOLYGON, POINT, MULTIPOINT, LINESTRING, MULTILINESTRING")
}
return(mapThis(geodata, belongmatrix, undecided, geom_type))
# if(sf::st_geometry_type(geodata,by_geometry = FALSE) %in% c("POLYGON","MULTIPOLYGON")){
# return(mapPolygons(geodata, belongmatrix, undecided))
# }else if(sf::st_geometry_type(geodata,by_geometry = FALSE) %in% c("POINT","MULTIPOINT")){
# return(mapPoints(geodata, belongmatrix, undecided))
# else if(sf::st_geometry_type(geodata,by_geometry = FALSE) %in% c("LINESTRING","MULTILINESTRING")){
# return(mapLines(geodata, belongmatrix, undecided))
# }else {
# stop("The object passed in geodata argument is not supported.
# Supported classes are feature collections from sf with geometry type in
# POLYGON, MULTIPOLYGON, POINT, MULTIPOINT, LINESTRING, MULTILINESTRING")
# }
}
}
#' @title Mapping the clusters (rasters)
#'
#' @description Internal function to realize maps based on rasters
#'
#' @param object A FCMres object
#' @param undecided A float between 0 and 1 giving the minimum value that an
#' observation must get in the membership matrix to not be considered as
#' uncertain (default = NULL)
#' @return A named list with :
#' \itemize{
#' \item ProbaMaps : a list of ggplot maps showing for each group the
#' probability of the observations to belong to that group
#' \item ClusterMap : a ggplot map showing the most likely group for each observation
#' }
#' @importFrom methods as
#' @keywords internal
#' @examples
#' #No example provided, this is an internal function, use the general wrapper function mapClusters
mapRasters <- function(object, undecided){
# realisation des cartes de probabilites
ProbaPlots <- lapply(1:ncol(object$Belongings), function(i) {
rast <- object$rasters[[i]]
#spdf <- as(rast, "SpatialPixelsDataFrame")
#df <- as.data.frame(spdf)
df <- terra::as.data.frame(rast, xy = TRUE)
colnames(df) <- c("x", "y", "value")
Plot <- ggplot2::ggplot(df) +
ggplot2::geom_raster(ggplot2::aes_string(x="x", y="y", fill="value"), alpha=0.8) +
ggplot2::scale_fill_gradient(low = "white", high = "blue") +
ggplot2::coord_fixed(ratio = 1)+
ggplot2::theme(
axis.title = ggplot2::element_blank(),
axis.text = ggplot2::element_blank(),
axis.ticks = ggplot2::element_blank()
) +
ggplot2::labs(title = paste("membership values for group ",i,sep=""))
return(Plot)
})
#finding the uncertain pixels
if(is.null(undecided)){
undecided <- 0
}
#finding for each pixel the max probability
uncertain_vec <- undecidedUnits(object$Belongings, tol = undecided, out = "numeric")
rast <- object$rasters[[1]]
vec <- rep(NA, times = terra::ncell(rast))
vec[object$missing] <- uncertain_vec
terra::values(rast) <- vec
#spdf <- as(rast, "SpatialPixelsDataFrame")
#df <- as.data.frame(spdf)
df <- terra::as.data.frame(rast, xy = TRUE)
colnames(df) <- c("x", "y","value")
df$value <- as.character(df$value)
df$value <- ifelse(df$value == "-1", "undecided", paste("group", df$value, sep = " "))
colors <- RColorBrewer::brewer.pal(ncol(object$Belongings),"Set3")
if("undecided" %in% df$values){
colors <- c(colors, "black")
}
ClusterMap <- ggplot2::ggplot(df) +
ggplot2::geom_raster(ggplot2::aes_string(x = "x", y = "y", fill = "value")) +
ggplot2::scale_fill_discrete(type = colors) +
ggplot2::coord_fixed(ratio = 1)+
ggplot2::theme(
legend.title = ggplot2::element_blank(),
axis.title = ggplot2::element_blank(),
axis.text = ggplot2::element_blank(),
axis.ticks = ggplot2::element_blank()
)
return(list(ProbaMaps = ProbaPlots, ClusterPlot = ClusterMap))
}
#' @title Mapping the clusters
#'
#' @description Internal function to realize maps
#'
#' @param geodata feature collections ordered like the original data used
#' for the clustering
#' @param belongmatrix The membership matrix obtained at the end of the algorithm
#' @param undecided A float between 0 and 1 giving the minimum value that an
#' observation must get in the membership matrix to not be considered as
#' uncertain (default = NULL)
#' @param geom_type A string indicating the type of geometry (polygon, string or point)
#' @return A named list with :
#' \itemize{
#' \item ProbaMaps : a list of ggplot maps showing for each group the
#' probability of the observations to belong to that group
#' \item ClusterMap : a ggplot map showing the most likely group for each observation
#' }
#' @importFrom tmap tm_shape tm_fill tm_borders tm_dots tm_layout tm_lines
#' @keywords internal
#' @examples
#' #No example provided, this is an internal function, use the general wrapper function mapClusters
mapThis <- function(geodata, belongmatrix, undecided = NULL, geom_type = "polygon"){
belongmatrix <- as.data.frame(belongmatrix)
names(belongmatrix) <- paste0("cluster",1:ncol(belongmatrix))
geodata <- cbind(geodata,belongmatrix)
# attribution des groupes
Groups <- names(belongmatrix)[max.col(belongmatrix, ties.method = "first")]
if (is.null(undecided) == FALSE) {
Maximums <- do.call(pmax, belongmatrix)
Undecided <- ifelse(Maximums < undecided, "Undecided", "Ok")
} else {
Undecided <- rep("Ok", length(Groups))
}
geodata$Cluster <- Groups
geodata$Undecided <- Undecided
# realisation des cartes de probabilites
ProbaPlots <- lapply(names(belongmatrix), function(Name) {
if(geom_type == "polygon"){
this_map <- tm_shape(geodata) +
tm_fill(col = Name, palette = "Blues", style = "cont") +
tm_borders("black") +
tm_layout(legend.outside = TRUE, frame = FALSE)
}else if(geom_type == "line"){
this_map <- tm_shape(geodata) +
tm_lines(col = Name, palette = "Blues", style = "cont") +
tm_layout(legend.outside = TRUE, frame = FALSE)
}else if(geom_type == "point"){
this_map <- tm_shape(geodata) +
tm_dots(col = Name, palette = "Blues", style = "cont", size = 0.1) +
tm_layout(legend.outside = TRUE, frame = FALSE)
}
return(this_map)
})
# realisation de la carte des groupes
if(geom_type == "polygon"){
ClusterMap <- tm_shape(geodata) +
tm_fill("Cluster", palette = "Set1") +
tm_borders("black")+
tm_layout(legend.outside = TRUE, frame = FALSE)
}else if (geom_type == "line"){
ClusterMap <- tm_shape(geodata) +
tm_lines("Cluster", palette = "Set1") +
tm_layout(legend.outside = TRUE, frame = FALSE)
}else if(geom_type == "point"){
ClusterMap <- tm_shape(geodata) +
tm_dots("Cluster", palette = "Set1", size = 0.1) +
tm_layout(legend.outside = TRUE, frame = FALSE)
}
if(is.null(undecided) == FALSE){
if(geom_type == "polygon"){
ClusterMap <- ClusterMap +
tm_shape(subset(geodata, geodata$Undecided == "Undecided")) +
tm_fill("white", alpha = 0.7)+
tm_borders("black") +
tm_layout(legend.outside = TRUE, frame = FALSE)
}else if (geom_type == "line"){
ClusterMap <- ClusterMap +
tm_shape(subset(geodata, geodata$Undecided == "Undecided")) +
tm_lines("white", alpha = 0.7)+
tm_layout(legend.outside = TRUE, frame = FALSE)
}else if (geom_type == "point"){
ClusterMap <- ClusterMap +
tm_shape(subset(geodata, geodata$Undecided == "Undecided")) +
tm_dots("white", alpha = 0.7, size = 0.1)+
tm_layout(legend.outside = TRUE, frame = FALSE)
}
}
return(list(ProbaMaps = ProbaPlots, ClusterPlot = ClusterMap))
}
#' @title Spider chart
#'
#' @description Display spider charts to quickly compare values between groups
#'
#' @details For each group, the weighted mean of each variable in data is calculated
#' based on the probability of belonging to this group of each observation.
#' On the chart the exterior ring represents the maximum value obtained for
#' all the groups and the interior ring the minimum. The groups are located
#' between these two limits in a linear way.
#'
#' @param data A dataframe with numeric columns
#' @param belongmatrix A membership matrix
#' @param chartcolors A vector of color names used for the spider plot
#' @return NULL, the plots are displayed directly by the function (see fmsb::radarchart)
#' @importFrom grDevices rgb colors col2rgb
#' @importFrom graphics legend
#' @importFrom stats weighted.mean
#' @export
#' @examples
#' data(LyonIris)
#' AnalysisFields <-c("Lden","NO2","PM25","VegHautPrt","Pct0_14","Pct_65","Pct_Img",
#' "TxChom1564","Pct_brevet","NivVieMed")
#' dataset <- sf::st_drop_geometry(LyonIris[AnalysisFields])
#' queen <- spdep::poly2nb(LyonIris,queen=TRUE)
#' Wqueen <- spdep::nb2listw(queen,style="W")
#' result <- SFCMeans(dataset, Wqueen,k = 5, m = 1.5, alpha = 1.5, standardize = TRUE)
#' spiderPlots(dataset,result$Belongings)
spiderPlots<- function(data, belongmatrix, chartcolors=NULL){
Groups <- ncol(belongmatrix)
Values <- do.call(rbind, lapply(1:Groups, function(i) {
W <- belongmatrix[,i]
return(apply(data,2, function(row){return(weighted.mean(row,W))}))
}))
Mins <- apply(Values, 2, min)
Maxs <- apply(Values, 2, max)
for (Gp in 1:Groups) {
Scores <- Values[Gp,]
names(Scores) <- names(data)
datam <- data.frame(rbind(Maxs, Mins, Scores))
fmsb::radarchart(datam, axistype = 1, pcol = rgb(0.2, 0.5, 0.5, 0.9),
pfcol = rgb(0.2, 0.5, 0.5, 0.5),
plwd = 4, cglcol = "grey", cglty = 1,
axislabcol = "grey", cglwd = 0.8, vlcex = 0.8,
title = paste("Group number : ",Gp))
}
if (is.null(chartcolors)){
selcolors <- sample(colors(),size = ncol(belongmatrix))
}else{
selcolors <- chartcolors
}
tocolors <- sapply(selcolors,function(i){
v1 <- as.list(col2rgb(i,alpha=TRUE))
v1[[length(v1)]] <- 100
v1$maxColorValue <- 255
new_color <- do.call(rgb,v1)
return(new_color)
})
alldata <- data.frame(rbind(Maxs, Mins,Values))
fmsb::radarchart(alldata,pcol=selcolors,pfcol = tocolors,
axislabcol = "grey",plwd = 2, cglcol = "grey",
cglty = 1,vlcex = 0.8,
plty = rep(1,ncol(belongmatrix)))
legend("right",legend = levels(as.factor(paste("Group ",1:ncol(belongmatrix),sep=""))),
fill=tocolors)
}
#' @title Violin plots
#'
#' @description Return violin plots to compare the distribution of each variable for each
#' group.
#'
#' @param data A dataframe with numeric columns
#' @param groups A vector indicating the group of each observation
#' @return A list of plots created with ggplot2
#' @importFrom dplyr %>%
#' @importFrom grDevices rgb
#' @export
#' @examples
#' \dontrun{
#' data(LyonIris)
#' AnalysisFields <-c("Lden","NO2","PM25","VegHautPrt","Pct0_14","Pct_65","Pct_Img",
#' "TxChom1564","Pct_brevet","NivVieMed")
#' dataset <- sf::st_drop_geometrie(LyonIris[AnalysisFields])
#' queen <- spdep::poly2nb(LyonIris,queen=TRUE)
#' Wqueen <- spdep::nb2listw(queen,style="W")
#' result <- SFCMeans(dataset, Wqueen,k = 5, m = 1.5, alpha = 1.5, standardize = TRUE)
#' violinPlots(dataset, result$Groups)
#' }
violinPlots <- function(data,groups){
data <- as.data.frame(data)
data$groups <- groups
groupvar <- "groups"
Plots <- list()
vars <- names(data)
for (Var in vars) {
if(Var!="groups"){
Plot <- ggplot2::ggplot(data, ggplot2::aes_string(x = groupvar, y = Var, fill = groupvar)) +
ggplot2::geom_violin(show.legend = FALSE) +
ggplot2::geom_boxplot(width = 0.1, fill = "white", show.legend = FALSE) +
ggplot2::scale_color_brewer(palette = "Dark2")
Plots[[length(Plots) + 1]] <- Plot
}
}
return(Plots)
}
#' @title Bar plots
#'
#' @description Return bar plots to compare groups
#'
#' @param data A dataframe with numeric columns
#' @param belongmatrix A membership matrix
#' @param what Can be "mean" (default) or "median"
#' @param ncol An integer indicating the number of columns for the bar plot
#' @return a barplot created with ggplot2
#' @export
#' @examples
#' \dontrun{
#' data(LyonIris)
#' AnalysisFields <-c("Lden","NO2","PM25","VegHautPrt","Pct0_14","Pct_65","Pct_Img",
#' "TxChom1564","Pct_brevet","NivVieMed")
#' dataset <- sf::st_drop_geometry(LyonIris[AnalysisFields])
#' queen <- spdep::poly2nb(LyonIris,queen=TRUE)
#' Wqueen <- spdep::nb2listw(queen,style="W")
#' result <- SFCMeans(dataset, Wqueen,k = 5, m = 1.5, alpha = 1.5, standardize = TRUE)
#' barPlots(dataset, result$Belongings)
#' }
barPlots <- function(data,belongmatrix, ncol = 3, what = "mean"){
datasummary <- summarizeClusters(data, belongmatrix)
if (what == "mean"){
values <- lapply(datasummary, function(i){as.numeric(i[8,])})
} else if (what == "median"){
values <- lapply(datasummary, function(i){as.numeric(i[4,])})
}else{
warning('The parameter what is invalid, using what = "mean"')
values <- lapply(datasummary, function(i){as.numeric(i[8,])})
}
values <- data.frame(do.call(rbind, values))
names(values) <- colnames(datasummary$Cluster_1)
values$Cluster <- rownames(values)
values <- reshape2::melt(values, id.vars = "Cluster")
faceplot <- ggplot2::ggplot(values) +
ggplot2::geom_bar(ggplot2::aes_string(x = "Cluster", weight = "value", fill = "Cluster"), width = 0.7) +
ggplot2::theme(panel.background = ggplot2::element_blank(),
panel.grid = ggplot2::element_blank(),
axis.text.x = ggplot2::element_blank(),
axis.ticks.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank()
) +
ggplot2::facet_wrap(ggplot2::vars(values$variable), ncol=ncol, scales="free_y")
return(faceplot)
}
#force_sp_sample <- function(spatial, n, type){
# result <- tryCatch(
# {sp::spsample(spatial,n,type,iter = 10)},
# error = function(e){
# tryCatch(
# {sp::spsample(spatial,n,type = "regular",iter = 10)},
# error = function(ee){
# stop(sprintf('Error when drawing random points: ',ee$message))
# }
# )
# }
# )
# return(result)
#}
#' @title Uncertainty map
#'
#' @description Return a map to visualize membership matrix
#'
#' @details This function maps the membership matrix by plotting
#' random points in polygons, along lines or around points representing the
#' original observations. Each cluster is associated with a color and each
#' random point has a probability to be of that color equal to the membership
#' value of the feature it belongs itself. Thus, it is possible to
#' visualize regions with uncertainty and to identify the strongest clusters.
#'
#' @param geodata An object of class feature collection from sf ordered
#' like the original data used for the clustering.
#' @param belongmatrix A membership matrix
#' @param njit The number of points to map on each feature.
#' @param radius When mapping points, the radius indicates how far random
#' points will be plotted around the original features.
#' @param colors A vector of colors to use for the groups.
#' @param pt_size A float giving the size of the random points on the final
#' map (default is 0.05)
#' @return a map created with tmap
#' @export
#' @examples
#' \dontrun{
#' data(LyonIris)
#' AnalysisFields <-c("Lden","NO2","PM25","VegHautPrt","Pct0_14","Pct_65","Pct_Img",
#' "TxChom1564","Pct_brevet","NivVieMed")
#' dataset <- sf::st_drop_geometry(LyonIris[AnalysisFields])
#' queen <- spdep::poly2nb(LyonIris,queen=TRUE)
#' Wqueen <- spdep::nb2listw(queen,style="W")
#' result <- SFCMeans(dataset, Wqueen,k = 5, m = 1.5, alpha = 1.5, standardize = TRUE)
#' uncertaintyMap(LyonIris, result$Belongings)
#' }
uncertaintyMap <- function(geodata, belongmatrix, njit = 150, radius = NULL, colors = NULL, pt_size = 0.05){
geodata$tmpOID <- 1:nrow(geodata)
groups <- 1:ncol(belongmatrix)
#cls <- class(geodata)[[1]]
geom_type <- sf::st_geometry_type(geodata, by_geometry = FALSE)
if(geom_type %in% c("POLYGON","MULTIPOLYGON")){
geodata$area <- sf::st_area(geodata)
}else if(geom_type %in% c("LINESTRING", "MULTILINESTRING")){
geodata$area <- sf::st_length(geodata)
}else if(geom_type %in% c("POINT","MULTIPOINT")){
geodata$area <- 1
coords <- sf::st_coordinates(geodata)
geodata$X <- coords[,1]
geodata$Y <- coords[,2]
if(is.null(radius)){
stop("When mapping points, the parameter radius can not be NULL")
}
}else{
stop("geodata must be a feature collection (POINT, MULTIPOINT, LINESTRING, MULTILINESTRING, POLYGON or MULTIPOLYGON)")
}
maxA <- max(geodata$area)
rt <- as.numeric(njit / maxA)
if((geom_type %in% c("POINT","MULTIPOINT"))==FALSE){
n_obs <- as.integer(as.numeric(geodata$area) * rt)
jitted <- sf::st_sample(geodata, n_obs)
coords <- sf::st_coordinates(jitted)
}else{
jitted <- sf::st_sample(
sf::st_buffer(geodata, dist = radius),
njit)
coords <- sf::st_coordinates(jitted)
n_obs <- rep(njit, nrow(geodata))
}
clusts <- lapply(1:nrow(geodata), function(i){
probs <- belongmatrix[i,]
clusters <- sample(groups, size = n_obs[[i]], prob = probs, replace = TRUE)
return(clusters)
})
clusts <- do.call(c, clusts)
all_pts <- sf::st_sf(data.frame(
"oid" = 1:length(jitted),
"cluster" = as.character(clusts)
),jitted)
if(is.null(colors)){
colors <- c("#1F77B4","#FF7F0E","#2CA02C","#D62728","#9467BD","#8C564B",
"#E377C2","#7F7F7F","#BCBD22","#17BECF","#AEC7E8","#FFBB78",
"#98DF8A","#FF9896","#C5B0D5","#C49C94","#F7B6D2","#C7C7C7",
"#DBDB8D","#9EDAE5")[1:ncol(belongmatrix)]
}
names(colors) <- as.character(groups)
uncertain_map <- tm_shape(all_pts) +
tm_dots("cluster", palette = colors, size = 0.01) +
tm_layout(frame = FALSE, legend.outside = TRUE)
if(geom_type %in% c("POLYGON","MULTIPOLYGON")){
uncertain_map <- uncertain_map +
tm_shape(geodata) +
tm_borders("black")
}
return(uncertain_map)
} # nocov end
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
##### Functions to select parameters #####
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' @title Worker function
#'
#' @description Worker function for select_parameters and select_parameters.mc
#'
#' @param algo A string indicating which method to use (FCM, GFCM, SFCM, SGFCM)
#' @param parameters A dataframe of parameters with columns k,m and alpha
#' @param data A dataframe with numeric columns
#' @template nblistw-arg
#' @template window-arg
#' @param standardize A boolean to specify if the variable must be centered and
#' reduce (default = True)
#' @param spconsist A boolean indicating if the spatial consistency must be
#' calculated
#' @param classidx A boolean indicating if the quality of classification
#' indices must be calculated
#' @param nrep An integer indicating the number of permutation to do to simulate
#' the random distribution of the spatial inconsistency. Only used if spconsist
#' is TRUE.
#' @param indices A character vector with the names of the indices to calculate, to
#' evaluate clustering quality. default is :c("Silhouette.index", "Partition.entropy",
#' "Partition.coeff", "XieBeni.index", "FukuyamaSugeno.index", "Explained.inertia").
#' Other available indices are : "DaviesBoulin.index", "CalinskiHarabasz.index",
#' "GD43.index", "GD53.index" and "Negentropy.index".
#' @param maxiter An integer for the maximum number of iteration
#' @param tol The tolerance criterion used in the evaluateMatrices function for
#' convergence assessment
#' @param seed An integer used for random number generation. It ensures that the
#' start centers will be the same if the same integer is selected.
#' @param init A string indicating how the initial centers must be selected. "random"
#' indicates that random observations are used as centers. "kpp" use a distance based method
#' resulting in more dispersed centers at the beginning. Both of them are heuristic.
#' @param verbose A boolean indicating if a progressbar should be displayed
#' @param wrapped A boolean indicating if the data passed is wrapped or not (see wrap function of terra)
#' @return a DataFrame containing for each combinations of parameters several clustering
#' quality indexes.
#' @importFrom utils setTxtProgressBar txtProgressBar
#' @keywords internal
#' @examples
#' #No example provided, this is an internal function
eval_parameters <- function(algo, parameters, data, nblistw = NULL, window = NULL, standardize = TRUE,
robust = FALSE, noise_cluster = FALSE,
spconsist = FALSE, classidx = TRUE, nrep = 30, indices = NULL,
tol, maxiter, seed = NULL, init = "random", verbose = TRUE,
wrapped = FALSE){
if(wrapped){
data <- lapply(data, terra::unwrap)
}
if(algo == "FCM"){
exefun <- function(data,x, ...){
return(CMeans(data, x$k, x$m, maxiter = maxiter, tol = tol, standardize = standardize,
robust = robust, noise_cluster = noise_cluster, delta = x$delta,
verbose = FALSE, seed = seed, init = init))
}
}else if(algo == "GFCM"){
exefun <- function(data,x,... ){
return(GCMeans(data, x$k, x$m, x$beta, maxiter = maxiter, tol = tol, standardize = standardize,
robust = robust, noise_cluster = noise_cluster, delta = x$delta,
verbose = FALSE, seed = seed, init = init))
}
}else if(algo == "SFCM"){
exefun <- function(data,x,...){
dots <- list(...)
return(SFCMeans(data, dots$lw, x$k, x$m, x$alpha, x$lag_method, window = dots$wd,
maxiter = maxiter, tol = tol, standardize = standardize,
robust = robust, noise_cluster = noise_cluster, delta = x$delta,
verbose = FALSE, seed = seed, init = init))
}
}else if(algo == "SGFCM"){
exefun <- function(data,x,...){
dots <- list(...)
return(SGFCMeans(data, dots$lw, x$k, x$m, x$alpha, x$beta, x$lag_method, window = dots$wd,
maxiter = maxiter, tol = tol, standardize = standardize,
robust = robust, noise_cluster = noise_cluster, delta = x$delta,
verbose = FALSE, seed = seed, init = init))
}
}else{
stop("The algo selected must be one in FCM, GFCM, SFCM, SGFCM")
}
if(verbose){
pb <- txtProgressBar(min = 0, max = nrow(parameters), style = 3)
}
cnt <- 1
allIndices <- lapply(1:nrow(parameters), function(i){
row <- parameters[i,]
cnt <<- cnt+1
templistw <- nblistw[[row$listsw]]
tempwindow <- window[[row$window]]
result <- exefun(data, row, lw = templistw, wd = tempwindow)
#calculating the quality indexes
indices_values <- list()
if(classidx){
indices_values <- calcqualityIndexes(result$Data,result$Belongings,as.numeric(row[[2]]),
indices = indices)
}
if(spconsist){
#calculating spatial diag
consist <- spConsistency(result, nrep = nrep)
indices_values$spConsistency <- consist$Mean
indices_values$spConsistency_05 <- consist$prt05
indices_values$spConsistency_95 <- consist$prt95
}
if(verbose){
setTxtProgressBar(pb, cnt)
}
return(unlist(indices_values))
})
dfIndices <- data.frame(do.call(rbind,allIndices))
dfIndices$k <- parameters$k
dfIndices$m <- parameters$m
if(algo %in% c("SFCM","SGFCM")){
dfIndices$alpha <- parameters$alpha
dfIndices$listw <- parameters$listsw
dfIndices$window <- parameters$window
dfIndices$lag_method <- parameters$lag_method
}
if(algo %in% c("GFCM","SGFCM")){
dfIndices$beta <- parameters$beta
}
return(dfIndices)
}
#' @title Select parameters for a clustering algorithm
#'
#' @description Function to select the parameters for a clustering algorithm.
#'
#' @param algo A string indicating which method to use (FCM, GFCM, SFCM, SGFCM)
#' @param data A dataframe with numeric columns or a list of rasters.
#' @param k A sequence of values for k to test (>=2)
#' @param m A sequence of values for m to test
#' @param alpha A sequence of values for alpha to test (NULL if not required)
#' @param beta A sequence of values for beta to test (NULL if not required)
#' @param nblistw A list of list.w objects describing the neighbours typically
#' produced by the spdep package (NULL if not required)
#' @param lag_method A string indicating if a classical lag must be used
#' ("mean") or if a weighted median must be used ("median"). Both can be
#' tested by specifying a vector : c("mean","median"). When working with rasters,
#' the string must be parsable to a function like mean, min, max, sum, etc. and will
#' be applied to all the pixels values in the window designated by the parameter window
#' and weighted according to the values of this matrix.
#' @param window A list of windows to use to calculate neighbouring values if
#' rasters are used.
#' @param robust A boolean indicating if the "robust" version of the algorithm must be used (see details)
#' @param noise_cluster A boolean indicatong if a noise cluster must be added to the solution (see details)
#' @param delta A float giving the distance of the noise cluster to each observation
#' @param standardize A boolean to specify if the variable must be centered and
#' reduce (default = True)
#' @param spconsist A boolean indicating if the spatial consistency must be
#' calculated
#' @param classidx A boolean indicating if the quality of classification
#' indices must be calculated
#' @param nrep An integer indicating the number of permutation to do to simulate
#' the random distribution of the spatial inconsistency. Only used if spconsist
#' is TRUE.
#' @param indices A character vector with the names of the indices to calculate, to
#' evaluate clustering quality. default is :c("Silhouette.index", "Partition.entropy",
#' "Partition.coeff", "XieBeni.index", "FukuyamaSugeno.index", "Explained.inertia").
#' Other available indices are : "DaviesBoulin.index", "CalinskiHarabasz.index",
#' "GD43.index", "GD53.index" and "Negentropy.index".
#' @param maxiter An integer for the maximum number of iteration
#' @param tol The tolerance criterion used in the evaluateMatrices function for
#' convergence assessment
#' @param seed An integer used for random number generation. It ensures that the
#' start centers will be the same if the same integer is selected.
#' @param init A string indicating how the initial centers must be selected. "random"
#' indicates that random observations are used as centers. "kpp" use a distance based method
#' resulting in more dispersed centers at the beginning. Both of them are heuristic.
#' @param verbose A boolean indicating if a progressbar should be displayed
#' @return A dataframe with indicators assessing the quality of classifications
#' @export
#' @examples
#' \donttest{
#' data(LyonIris)
#' AnalysisFields <-c("Lden","NO2","PM25","VegHautPrt","Pct0_14","Pct_65","Pct_Img",
#' "TxChom1564","Pct_brevet","NivVieMed")
#' dataset <- sf::st_drop_geometry(LyonIris[AnalysisFields])
#' queen <- spdep::poly2nb(LyonIris,queen=TRUE)
#' Wqueen <- spdep::nb2listw(queen,style="W")
#' #set spconsist to TRUE to calculate the spatial consistency indicator
#' #FALSE here to reduce the time during package check
#' values <- select_parameters(algo = "SFCM", dataset, k = 5, m = seq(2,3,0.1),
#' alpha = seq(0,2,0.1), nblistw = Wqueen, spconsist=FALSE)
#' }
select_parameters <- function(algo,data,k,m,alpha = NA, beta = NA, nblistw=NULL, lag_method="mean", window = NULL,
spconsist = TRUE, classidx = TRUE, nrep = 30, indices = NULL,
standardize = TRUE,
robust = FALSE, noise_cluster = FALSE, delta = NA,
maxiter = 500, tol = 0.01,
seed=NULL, init = "random", verbose = TRUE){
if(spconsist==FALSE & classidx==FALSE){
stop("one of spconsist and classidx must be TRUE")
}
if(inherits(nblistw,"list") == FALSE){
nblistw <- list(nblistw)
}
if(inherits(window,"list") == FALSE ){
window <- list(window)
}
if(is.null(indices)){
indices <- c("Silhouette.index", "Partition.entropy", "Partition.coeff", "XieBeni.index", "FukuyamaSugeno.index", "Explained.inertia")
}
allcombinaisons <- expand.grid(k=k,m=m,alpha=alpha,beta=beta,
listsw=1:length(nblistw),lag_method=lag_method,window = 1:length(window),
delta = delta
)
print(paste("number of combinaisons to estimate : ",nrow(allcombinaisons)))
dfIndices <- eval_parameters(algo, allcombinaisons, data, nblistw, window, standardize,
robust, noise_cluster,
spconsist, classidx, nrep, indices,
tol, maxiter, seed, init = init, verbose = verbose)
return(dfIndices)
}
#' @rdname select_parameters
#' @examples
#' \donttest{
#' data(LyonIris)
#' AnalysisFields <-c("Lden","NO2","PM25","VegHautPrt","Pct0_14","Pct_65","Pct_Img",
#' "TxChom1564","Pct_brevet","NivVieMed")
#' dataset <- sf::st_drop_geometry(LyonIris[AnalysisFields])
#' queen <- spdep::poly2nb(LyonIris,queen=TRUE)
#' Wqueen <- spdep::nb2listw(queen,style="W")
#' #set spconsist to TRUE to calculate the spatial consistency indicator
#' #FALSE here to reduce the time during package check
#' values <- selectParameters(algo = "SFCM", dataset, k = 5, m = seq(2,3,0.1),
#' alpha = seq(0,2,0.1), nblistw = Wqueen, spconsist=FALSE)
#' }
#' @export
selectParameters <- select_parameters
#' @title Select parameters for clustering algorithm (multicore)
#'
#' @description Function to select the parameters for a clustering algorithm.
#' This version of the function allows to use a plan defined with the package
#' future to reduce calculation time.
#'
#' @param algo A string indicating which method to use (FCM, GFCM, SFCM, SGFCM)
#' @param data A dataframe with numeric columns
#' @param k A sequence of values for k to test (>=2)
#' @param m A sequence of values for m to test
#' @param alpha A sequence of values for alpha to test (NULL if not required)
#' @param beta A sequence of values for beta to test (NULL if not required)
#' @param nblistw A list of list.w objects describing the neighbours typically
#' produced by the spdep package (NULL if not required)
#' @param lag_method A string indicating if a classical lag must be used
#' ("mean") or if a weighted median must be used ("median"). Both can be
#' tested by specifying a vector : c("mean","median"). When working with rasters,
#' the string must be parsable to a function like mean, min, max, sum, etc. and will
#' be applied to all the pixels values in the window designated by the parameter window
#' and weighted according to the values of this matrix.
#' @param window A list of windows to use to calculate neighbouring values if
#' rasters are used.
#' @param robust A boolean indicating if the "robust" version of the algorithm must be used (see details)
#' @param noise_cluster A boolean indicatong if a noise cluster must be added to the solution (see details)
#' @param delta A float giving the distance of the noise cluster to each observation
#' @param spconsist A boolean indicating if the spatial consistency must be
#' calculated
#' @param classidx A boolean indicating if the quality of classification
#' indices must be calculated
#' @param nrep An integer indicating the number of permutation to do to simulate
#' the random distribution of the spatial inconsistency. Only used if spconsist
#' is TRUE.
#' @param indices A character vector with the names of the indices to calculate, to
#' evaluate clustering quality. default is :c("Silhouette.index", "Partition.entropy",
#' "Partition.coeff", "XieBeni.index", "FukuyamaSugeno.index", "Explained.inertia").
#' Other available indices are : "DaviesBoulin.index", "CalinskiHarabasz.index",
#' "GD43.index", "GD53.index" and "Negentropy.index".
#' @param standardize A boolean to specify if the variable must be centered and
#' reduce (default = True)
#' @param maxiter An integer for the maximum number of iteration
#' @param tol The tolerance criterion used in the evaluateMatrices function for
#' convergence assessment
#' @param seed An integer used for random number generation. It ensures that the
#' start centers will be the same if the same integer is selected.
#' @param init A string indicating how the initial centers must be selected. "random"
#' indicates that random observations are used as centers. "kpp" use a distance based method
#' resulting in more dispersed centers at the beginning. Both of them are heuristic.
#' @param chunk_size The size of a chunk used for multiprocessing. Default is 100.
#' @param verbose A boolean indicating if a progressbar should be displayed
#' @return A dataframe with indicators assessing the quality of classifications
#' @export
#' @importFrom utils setTxtProgressBar txtProgressBar capture.output
#' @examples
#' \donttest{
#' data(LyonIris)
#' AnalysisFields <-c("Lden","NO2","PM25","VegHautPrt","Pct0_14","Pct_65","Pct_Img",
#' "TxChom1564","Pct_brevet","NivVieMed")
#' dataset <- sf::st_drop_geometry(LyonIris[AnalysisFields])
#' queen <- spdep::poly2nb(LyonIris,queen=TRUE)
#' Wqueen <- spdep::nb2listw(queen,style="W")
#' future::plan(future::multisession(workers=2))
#' #set spconsist to TRUE to calculate the spatial consistency indicator
#' #FALSE here to reduce the time during package check
#' values <- select_parameters.mc("SFCM", dataset, k = 5, m = seq(1,2.5,0.1),
#' alpha = seq(0,2,0.1), nblistw = Wqueen, spconsist=FALSE)
#' ## make sure any open connections are closed afterward
#' if (!inherits(future::plan(), "sequential")) future::plan(future::sequential)
#'}
select_parameters.mc <- function(algo,data,k,m,alpha = NA, beta = NA, nblistw=NULL, lag_method="mean", window = NULL,
spconsist = TRUE, classidx = TRUE, nrep = 30, indices = NULL,
standardize = TRUE,
robust = FALSE, noise_cluster = FALSE, delta = NA,
maxiter = 500, tol = 0.01, chunk_size = 5,
seed=NULL, init = "random", verbose = TRUE){
if(spconsist==FALSE & classidx==FALSE){
stop("one of spconsist and classidx must be TRUE")
}
if(inherits(nblistw, "list")== FALSE){
nblistw <- list(nblistw)
}
if(inherits(window, "list") == FALSE ){
window <- list(window)
}
if(is.null(indices)){
indices <- c("Silhouette.index", "Partition.entropy", "Partition.coeff", "XieBeni.index", "FukuyamaSugeno.index", "Explained.inertia")
}
allcombinaisons <- expand.grid(k=k,m=m,alpha=alpha,beta=beta,delta = delta,
listsw=1:length(nblistw),lag_method=lag_method, window = 1:length(window))
if (verbose){
print(paste("number of combinaisons to estimate : ",nrow(allcombinaisons)))
}
chunks <- split(1:nrow(allcombinaisons), rep(1:ceiling(nrow(allcombinaisons) / chunk_size),
each = chunk_size, length.out = nrow(allcombinaisons)))
chunks <- lapply(chunks,function(x){return(allcombinaisons[x,])})
if(inherits(data, "data.frame") == FALSE){
## NOTE HERE : the rasters from terra must be read here befond send to clusters...
dataw <- lapply(data, terra::wrap)
wrapped <- TRUE
}else{
dataw <- data
wrapped <- FALSE
}
# step2 : starting the function
iseq <- 1:length(chunks)
if(verbose){
progressr::with_progress({
p <- progressr::progressor(along = iseq)
values <- future.apply::future_lapply(iseq, function(i) {
sprintf(algo)
parameters <- chunks[[i]]
indices <- eval_parameters(algo, parameters, dataw, nblistw, window, standardize,
robust = robust, noise_cluster = noise_cluster,
spconsist, classidx, nrep, indices,
tol, maxiter, init = init, verbose = FALSE, wrapped = wrapped)
p(sprintf("i=%g", i))
return(indices)
}, future.seed = seed)
})
}else{
values <- future.apply::future_lapply(iseq, function(i) {
parameters <- chunks[[i]]
indices <- eval_parameters(algo, parameters, dataw, nblistw, window, standardize,
robust = robust, noise_cluster = noise_cluster,
spconsist, classidx, nrep, indices,
tol, maxiter, init = init, verbose = FALSE, wrapped = wrapped)
return(indices)
},future.seed = seed)
}
dfIndices <- do.call(rbind,values)
return(dfIndices)
}
#' @rdname select_parameters.mc
#' @examples
#' \donttest{
#' data(LyonIris)
#' AnalysisFields <-c("Lden","NO2","PM25","VegHautPrt","Pct0_14","Pct_65","Pct_Img",
#' "TxChom1564","Pct_brevet","NivVieMed")
#' dataset <- sf::st_drop_geometry(LyonIris[AnalysisFields])
#' queen <- spdep::poly2nb(LyonIris,queen=TRUE)
#' Wqueen <- spdep::nb2listw(queen,style="W")
#' future::plan(future::multisession(workers=2))
#' #set spconsist to TRUE to calculate the spatial consistency indicator
#' #FALSE here to reduce the time during package check
#' values <- select_parameters.mc("SFCM", dataset, k = 5, m = seq(1,2.5,0.1),
#' alpha = seq(0,2,0.1), nblistw = Wqueen, spconsist=FALSE)
#' \dontshow{
#' ## R CMD check: make sure any open connections are closed afterward
#' if (!inherits(future::plan(), "sequential")) future::plan(future::sequential)
#' }
#'}
#' @export
selectParameters.mc <- select_parameters.mc
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
##### Functions to recalculate spatial weights #####
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' @title Semantic adjusted spatial weights
#'
#' @description Function to adjust the spatial weights so that they represent semantic
#' distances between neighbours
#'
#' @param data A dataframe with numeric columns
#' @param listw A nb object from spdep
#' @param style A letter indicating the weighting scheme (see spdep doc)
#' @param mindist A minimum value for distance between two observations. If two
#' neighbours have exactly the same values, then the euclidean distance between
#' them is 0, leading to an infinite spatial weight. In that case, the minimum
#' distance is used instead of 0.
#'
#' @return A listw object (spdep like)
#' @export
#' @examples
#' data(LyonIris)
#' AnalysisFields <-c("Lden","NO2","PM25","VegHautPrt","Pct0_14","Pct_65","Pct_Img",
#' "TxChom1564","Pct_brevet","NivVieMed")
#' dataset <- sf::st_drop_geometry(LyonIris[AnalysisFields])
#' queen <- spdep::poly2nb(LyonIris,queen=TRUE)
#' Wqueen <- spdep::nb2listw(queen,style="W")
#' Wqueen2 <- adjustSpatialWeights(dataset,queen,style="C")
adjustSpatialWeights <- function(data,listw,style, mindist = 0.00000000001){
data <- as.matrix(data)
new_weights <- lapply(1:nrow(data),function(i){
row <- data[i,]
neighbours <- data[listw[[i]],]
dists <- calcEuclideanDistance2(neighbours,row)
err <- dists == 0
if(any(err)){
dists[err] <- mindist
warning("Some observartions have exactly the same values as one of their neighbours, leading to
an euclidean distance of 0 and an infinite weight. The mindist value is applied instead")
}
indists <- 1/dists
weights <- dists / sum(dists)
return(weights)
})
new_listw <- spdep::nb2listw(listw,glist=new_weights,style = style)
return(new_listw)
}
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
##### Utilitary functions #####
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#' @title Convert categories to membership matrix
#'
#' @description Function to convert a character vector to a membership matrix (binary matrix).
#' The columns of the matrix are ordered with the order function.
#'
#' @param categories A vector with the categories of each observation
#'
#' @return A binary matrix
#' @export
cat_to_belongings <- function(categories){
cats <- unique(categories)
cats <- cats[order(cats)]
cols <- lapply(cats, function(i){
return (ifelse(categories == i, 1, 0))
})
mat <- do.call(cbind, cols)
colnames(mat) <- cats
return(mat)
}
#' @rdname cat_to_belongings
#' @export
catToBelongings <- cat_to_belongings
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.