Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
## ----message=FALSE, warning=FALSE---------------------------------------------
#charging packages and data
library(geocmeans)
library(ggplot2)
library(ggpubr)
library(dplyr)
library(viridis)
library(spdep)
library(tmap)
spdep::set.mcOption(FALSE)
spdep::set.coresOption(1L)
data(LyonIris)
# selecting the columns for the analysis
AnalysisFields <-c("Lden","NO2","PM25","VegHautPrt","Pct0_14",
"Pct_65","Pct_Img","TxChom1564","Pct_brevet","NivVieMed")
# rescaling the columns
Data <- sf::st_drop_geometry(LyonIris[AnalysisFields])
for (Col in names(Data)){
Data[[Col]] <- scale(Data[[Col]])
}
## ----include=FALSE------------------------------------------------------------
# loading the pre-calculated results
load(system.file("extdata", "results_vignette_intro.rda",
package = "geocmeans", mustWork = TRUE))
## ----warning=FALSE, fig.cap = "Impact of the number of groups on the explained variance", out.width = "50%", fig.pos="H", fig.align="center"----
# finding the best k by using the r2 of the classification
# trying for k from 2 to 10
R2s <- sapply(2:10,function(k){
Clust <- kmeans(Data,centers=k,iter.max = 150)
R2 <- Clust$betweenss / Clust$totss
return(R2)
})
Df <- data.frame(K=2:10,
R2 = R2s)
ggplot(Df)+
geom_line(aes(x=K,y=R2s))+
geom_point(aes(x=K,y=R2s),color="red")+
xlab("Number of groups")+
ylab("R2 of classification")
## ----warning=FALSE, fig.cap="Clusters identified by classical k-means", fig.pos="H", fig.align="center"----
KMeanClust <- kmeans(Data,centers=4,iter.max = 150)
LyonIris$Cluster <-paste("cluster",KMeanClust$cluster,sep="_")
# mapping the groups
tm_shape(LyonIris) +
tm_fill("Cluster", palette = c("cluster_1"="palegreen3",
"cluster_2"="firebrick",
"cluster_3"="lightyellow2",
"cluster_4"="steelblue")) +
tm_borders("black") +
tm_layout(legend.outside = TRUE, frame = FALSE)
## ----warning=FALSE------------------------------------------------------------
Cmean <- CMeans(Data,4,1.5,500,standardize = FALSE, seed = 456, tol = 0.00001, verbose = FALSE)
## ----message=FALSE, warning=FALSE---------------------------------------------
calcqualityIndexes(Data, Cmean$Belongings, m = 1.5)
## ----warning=FALSE, eval = FALSE----------------------------------------------
# beta_values <- selectParameters("GFCM",data = Data, k = 4, m = 1.5,
# beta = seq(0,1,0.05), spconsist = FALSE,
# tol = 0.00001, seed = 456)
## ----warning=FALSE------------------------------------------------------------
knitr::kable(beta_values[c("beta","Silhouette.index","XieBeni.index","Explained.inertia")],
col.names = c("beta", "silhouette index",
"Xie and Beni index", "explained inertia"),digits = 3)
## ----warning=FALSE------------------------------------------------------------
GCmean <- GCMeans(Data,k = 4,m = 1.5, beta = 0.7,500,standardize = FALSE, seed=456,
tol = 0.00001, verbose = FALSE)
r1 <- calcqualityIndexes(Data,GCmean$Belongings,m=1.5)
r2 <- calcqualityIndexes(Data,Cmean$Belongings,m=1.5)
df <- cbind(unlist(r1), unlist(r2))
knitr::kable(df,
digits = 3,col.names = c("GFCM", "FCM"))
## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 1", out.width="80%", fig.pos="H", fig.align="center"----
cmeansMaps<- mapClusters(LyonIris,Cmean$Belongings,undecided = 0.45)
GcmeansMaps<- mapClusters(LyonIris,GCmean$Belongings,undecided = 0.45)
tmap_arrange(cmeansMaps$ProbaMaps[[1]],GcmeansMaps$ProbaMaps[[1]], nrow = 1, ncol = 2)
## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 2", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(cmeansMaps$ProbaMaps[[2]],GcmeansMaps$ProbaMaps[[2]],
nrow = 1, ncol = 2)
## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 3", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(cmeansMaps$ProbaMaps[[3]],GcmeansMaps$ProbaMaps[[3]],
nrow = 1, ncol = 2)
## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 4", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(cmeansMaps$ProbaMaps[[4]],GcmeansMaps$ProbaMaps[[4]],
nrow = 1, ncol = 2)
## ----warning=FALSE, fig.cap = "Most likely clusters and undecided units", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(cmeansMaps$ClusterPlot,GcmeansMaps$ClusterPlot,
nrow = 1, ncol = 2)
## ----message=FALSE, warning=FALSE---------------------------------------------
Neighbours <- poly2nb(LyonIris,queen = TRUE)
WMat <- nb2listw(Neighbours,style="W",zero.policy = TRUE)
## ----warning=FALSE, message=FALSE, eval = FALSE------------------------------
# DFindices_SFCM <- selectParameters(algo = "SFCM", data = Data,
# k = 4, m = 1.5, alpha = seq(0,2,0.05),
# nblistw = WMat, standardize = FALSE,
# tol = 0.0001, verbose = FALSE, seed = 456)
#
## ----warning=FALSE, fig.cap = "Link between alpha and spatial inconsistency", out.width="50%", fig.pos="H", fig.align="center"----
ggplot(DFindices_SFCM)+
geom_smooth(aes(x=alpha,y=spConsistency), color = "black")+
geom_point(aes(x=alpha,y=spConsistency), color = "red")
## ----warning=FALSE, fig.cap = "Link between alpha and explained inertia", out.width="50%", fig.pos="H", fig.align="center"----
ggplot(DFindices_SFCM)+
geom_smooth(aes(x=alpha,y=Explained.inertia), color = "black")+
geom_point(aes(x=alpha,y=Explained.inertia), color = "red")
## ----fig.cap="Link between alpha and silhouette index", message=FALSE, warning=FALSE, fig.pos="H", fig.align="center", out.width="50%"----
ggplot(DFindices_SFCM)+
geom_smooth(aes(x=alpha,y=Silhouette.index), color = "black")+
geom_point(aes(x=alpha,y=Silhouette.index), color = "red")
## ----fig.cap="Link between alpha and Xie and Beni index", message=FALSE, warning=FALSE, out.width="50%", fig.pos="H", fig.align="center"----
ggplot(DFindices_SFCM)+
geom_smooth(aes(x=alpha,y=XieBeni.index), color = "black")+
geom_point(aes(x=alpha,y=XieBeni.index), color = "red")
## ----warning=FALSE------------------------------------------------------------
SFCM <- SFCMeans(Data, WMat, k = 4, m = 1.5, alpha = 0.7,
tol = 0.0001, standardize = FALSE,
verbose = FALSE, seed = 456)
## ----warning=FALSE, eval = FALSE----------------------------------------------
# future::plan(future::multisession(workers=2))
# DFindices_SFGCM <- selectParameters.mc(algo = "SGFCM", data = Data,
# k = 4, m = 1.5, alpha = seq(0,2,0.05),
# beta = seq(0,0.85,0.05),
# nblistw = WMat, standardize = FALSE, chunk_size = 50,
# tol = 0.0001, verbose = FALSE, seed = 456)
## ----warning=FALSE, fig.cap = "Impact of beta and alpha on silhouette index", out.width = "80%", fig.pos="H", fig.align="center"----
ggplot(DFindices_SFGCM) +
geom_raster(aes(x = alpha, y = beta, fill = Silhouette.index), size = 5) +
scale_fill_viridis() +
coord_fixed(ratio=1)
## ----warning=FALSE, fig.cap = "Impact of beta and alpha on Xie and Beni index", out.width = "80%", fig.pos="H", fig.align="center"----
ggplot(DFindices_SFGCM) +
geom_raster(aes(x = alpha, y = beta, fill = XieBeni.index), size = 5) +
scale_fill_viridis() +
coord_fixed(ratio=1)
## ----warning=FALSE, fig.cap = "Impact of beta and alpha on spatial inconsistency", out.width = "80%", fig.pos="H", fig.align="center"----
ggplot(DFindices_SFGCM) +
geom_raster(aes(x = alpha, y = beta, fill = spConsistency), size = 5) +
scale_fill_viridis() +
coord_fixed(ratio=1)
## ----warning=FALSE------------------------------------------------------------
SGFCM <- SGFCMeans(Data,WMat,k = 4,m=1.5, alpha=0.95, beta = 0.65,
tol=0.0001, standardize = FALSE, verbose = FALSE, seed = 456)
## ----warning=FALSE------------------------------------------------------------
r1 <- calcqualityIndexes(Data, SFCM$Belongings,m = 1.5)
r2 <- calcqualityIndexes(Data, SGFCM$Belongings,m = 1.5)
diagSFCM <- spatialDiag(SFCM$Belongings, nblistw = WMat,
undecided = 0.45,nrep = 500)
diagSGFCM <- spatialDiag(SGFCM$Belongings, nblistw = WMat,
undecided = 0.45,nrep = 500)
df <- cbind(
c(unlist(r1),diagSFCM$SpConsist),
c(unlist(r2),diagSGFCM$SpConsist)
)
row.names(df)[length(row.names(df))] <- "sp.consistency"
knitr::kable(df,digits = 3,col.names = c("SFCM","SGFCM"))
## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 1", out.width="80%", fig.pos="H", fig.align="center"----
SFCMMaps <- mapClusters(geodata = LyonIris, object = SFCM$Belongings,undecided = 0.45)
SGFCMMaps <- mapClusters(geodata = LyonIris, object = SGFCM$Belongings,undecided = 0.45)
tmap_arrange(SFCMMaps$ProbaMaps[[1]],SGFCMMaps$ProbaMaps[[1]],
nrow = 1, ncol = 2)
## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 2", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(SFCMMaps$ProbaMaps[[2]],SGFCMMaps$ProbaMaps[[2]],
nrow = 1, ncol = 2)
## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 3", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(SFCMMaps$ProbaMaps[[3]],SGFCMMaps$ProbaMaps[[3]],
nrow = 1, ncol = 2)
## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 4", out.width="80%"----
tmap_arrange(SFCMMaps$ProbaMaps[[4]],SGFCMMaps$ProbaMaps[[4]],
nrow = 1, ncol = 2)
## ----warning=FALSE, fig.cap = "Most likely cluster and undecided units", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(SFCMMaps$ClusterPlot,SGFCMMaps$ClusterPlot,
nrow = 1, ncol = 2)
## ----warning=FALSE------------------------------------------------------------
spdiag_1 <- spatialDiag(Cmean$Belongings, nblistw = WMat, nrep=250)
spdiag_2 <- spatialDiag(GCmean$Belongings, nblistw = WMat, nrep=250)
spdiag_3 <- spatialDiag(SFCM$Belongings, nblistw = WMat, nrep=250)
spdiag_4 <- spatialDiag(SGFCM$Belongings, nblistw = WMat, nrep=250)
#looking at the moran I values for each group
moran_table <- data.frame(cbind(spdiag_1$MoranValues$MoranI,
spdiag_2$MoranValues$MoranI,
spdiag_3$MoranValues$MoranI,
spdiag_4$MoranValues$MoranI
))
row.names(moran_table) <- paste("cluster ",1:4,sep="")
knitr::kable(moran_table, digits = 3,
col.names = c("FCM","GFCM","SFCM","SGFCM"),
caption = "Moran I index for the columns of the membership matrix"
)
## ----warning=FALSE------------------------------------------------------------
print(c(spdiag_1$SpConsist, spdiag_2$SpConsist,spdiag_3$SpConsist,spdiag_4$SpConsist))
## ----warning=FALSE------------------------------------------------------------
sum(spdiag_4$SpConsist > spdiag_3$SpConsistSamples) / length(spdiag_3$SpConsistSamples)
## ----warning=FALSE------------------------------------------------------------
Undecided <- undecidedUnits(SGFCM$Belongings,0.45)
LyonIris$FinalCluster <- ifelse(Undecided=="Undecided",
"Undecided",paste("cluster",Undecided,sep="_"))
# mapping the groups
tm_shape(LyonIris) +
tm_fill("FinalCluster", palette = c("cluster_V1"="palegreen3",
"cluster_V2"="firebrick",
"cluster_V3"="lightyellow2",
"cluster_V4"="steelblue",
"cluster_V5"="pink",
"Undecided"=rgb(0,0,0,0.4))) +
tm_borders("black") +
tm_layout(frame = FALSE, legend.outside = TRUE)
## ----warning=FALSE, eval=FALSE------------------------------------------------
# colors <- c("palegreen3","firebrick","lightyellow2","steelblue","pink")
# uncertaintyMap(LyonIris, SGFCM$Belongings, color = colors)
## ----echo=FALSE, warning=FALSE------------------------------------------------
uncertainMap
## ----warning=FALSE------------------------------------------------------------
LyonIris$entropyidx <- calcUncertaintyIndex(SGFCM$Belongings)
# mapping the uncertainty
tm_shape(LyonIris) +
tm_fill("entropyidx", palette = "Greys", style = "cont") +
tm_borders("black") +
tm_layout(frame = FALSE, legend.outside = TRUE)
## ----warning=FALSE------------------------------------------------------------
Data <- sf::st_drop_geometry(LyonIris[AnalysisFields])
summarizeClusters(Data,
belongmatrix = SGFCM$Belongings,
weighted = TRUE, dec = 3)
# equivalent to :
# summary(SGFCM, Data)
## ----warning=FALSE------------------------------------------------------------
spiderPlots(Data, SGFCM$Belongings,
chartcolors = c("darkorange3","grey4","darkgreen","royalblue"))
violinPlots(Data, SGFCM$Groups)
## ----warning=FALSE, eval = FALSE----------------------------------------------
# bootvalues <- boot_group_validation(SGFCM, nsim = 1000, maxiter = 1000,
# tol = 0.0001, verbose = FALSE)
## ----warning=FALSE------------------------------------------------------------
melted_df <- reshape2::melt(bootvalues$group_consistency)
melted_df$variable <- as.factor(melted_df$variable)
ggplot() +
geom_histogram(mapping = aes(x = value), data = melted_df, bins = 30) +
labs(title = "stability of clusters", subtitle = "for 1000 iterations",
x = "Jaccard index") +
facet_wrap(vars(variable), ncol=2)
## ----warning=FALSE------------------------------------------------------------
df_gp3 <- bootvalues$group_centers[["group3"]]
melted_df <- reshape2::melt(df_gp3)
melted_df$variable <- as.factor(melted_df$variable)
ggplot() +
geom_histogram(mapping = aes(x = value), data = melted_df, bins = 30) +
labs(title = "stability of group 3 centers", subtitle = "for 1000 iterations") +
xlim(-3,3)+
facet_wrap(vars(variable), ncol=3)
## ----warning=FALSE, eval = FALSE----------------------------------------------
# # create the modified weight matrix
# WMat_adj <- adjustSpatialWeights(Data, WMat$neighbours, style = "W")
#
# # calculate the modified version of FCM with non-local information
# nl_SFCM <- SFCMeans(Data, WMat_adj, k = 4, m = 1.5, alpha = 0.7,
# tol = 0.0001, standardize = FALSE,
# verbose = FALSE, seed = 456)
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.