inst/doc/introduction.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## ----message=FALSE, warning=FALSE---------------------------------------------
#charging packages and data
library(geocmeans)
library(ggplot2)
library(ggpubr)
library(dplyr)
library(viridis)
library(spdep)
library(tmap)

spdep::set.mcOption(FALSE)
spdep::set.coresOption(1L)

data(LyonIris)

# selecting the columns for the analysis
AnalysisFields <-c("Lden","NO2","PM25","VegHautPrt","Pct0_14",
                   "Pct_65","Pct_Img","TxChom1564","Pct_brevet","NivVieMed")

# rescaling the columns
Data <- sf::st_drop_geometry(LyonIris[AnalysisFields])
for (Col in names(Data)){
  Data[[Col]] <- scale(Data[[Col]])
}


## ----include=FALSE------------------------------------------------------------
# loading the pre-calculated results
load(system.file("extdata", "results_vignette_intro.rda",
                           package = "geocmeans", mustWork = TRUE))

## ----warning=FALSE, fig.cap = "Impact of the number of groups on the explained variance", out.width = "50%", fig.pos="H", fig.align="center"----
# finding the best k by using the r2 of the classification
# trying for k from 2 to 10
R2s <- sapply(2:10,function(k){
  Clust <- kmeans(Data,centers=k,iter.max = 150)
  R2 <- Clust$betweenss / Clust$totss
  return(R2)
})

Df <- data.frame(K=2:10,
                 R2 = R2s)

ggplot(Df)+
  geom_line(aes(x=K,y=R2s))+
  geom_point(aes(x=K,y=R2s),color="red")+
  xlab("Number of groups")+
  ylab("R2 of classification")

## ----warning=FALSE, fig.cap="Clusters identified by classical k-means", fig.pos="H", fig.align="center"----
KMeanClust <-  kmeans(Data,centers=4,iter.max = 150)
LyonIris$Cluster <-paste("cluster",KMeanClust$cluster,sep="_")

# mapping the groups
tm_shape(LyonIris) + 
  tm_fill("Cluster", palette = c("cluster_1"="palegreen3",
                                 "cluster_2"="firebrick",
                                 "cluster_3"="lightyellow2",
                                 "cluster_4"="steelblue")) +
  tm_borders("black") +
  tm_layout(legend.outside = TRUE, frame = FALSE)


## ----warning=FALSE------------------------------------------------------------
Cmean <- CMeans(Data,4,1.5,500,standardize = FALSE, seed = 456, tol = 0.00001, verbose = FALSE)

## ----message=FALSE, warning=FALSE---------------------------------------------
calcqualityIndexes(Data, Cmean$Belongings, m = 1.5)

## ----warning=FALSE, eval = FALSE----------------------------------------------
#  beta_values <- selectParameters("GFCM",data = Data, k = 4, m = 1.5,
#                                   beta = seq(0,1,0.05), spconsist = FALSE,
#                                   tol = 0.00001, seed = 456)

## ----warning=FALSE------------------------------------------------------------
knitr::kable(beta_values[c("beta","Silhouette.index","XieBeni.index","Explained.inertia")],
             col.names = c("beta", "silhouette index",
                           "Xie and Beni index", "explained inertia"),digits = 3)

## ----warning=FALSE------------------------------------------------------------
GCmean <- GCMeans(Data,k = 4,m = 1.5, beta = 0.7,500,standardize = FALSE, seed=456,
                  tol = 0.00001, verbose = FALSE)
r1 <- calcqualityIndexes(Data,GCmean$Belongings,m=1.5)
r2 <- calcqualityIndexes(Data,Cmean$Belongings,m=1.5)
df <- cbind(unlist(r1), unlist(r2))

knitr::kable(df,
             digits = 3,col.names = c("GFCM", "FCM"))

## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 1", out.width="80%", fig.pos="H", fig.align="center"----
cmeansMaps<- mapClusters(LyonIris,Cmean$Belongings,undecided = 0.45)
GcmeansMaps<- mapClusters(LyonIris,GCmean$Belongings,undecided = 0.45)

tmap_arrange(cmeansMaps$ProbaMaps[[1]],GcmeansMaps$ProbaMaps[[1]], nrow = 1, ncol = 2)

## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 2", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(cmeansMaps$ProbaMaps[[2]],GcmeansMaps$ProbaMaps[[2]], 
          nrow = 1, ncol = 2)

## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 3", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(cmeansMaps$ProbaMaps[[3]],GcmeansMaps$ProbaMaps[[3]], 
          nrow = 1, ncol = 2)

## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 4", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(cmeansMaps$ProbaMaps[[4]],GcmeansMaps$ProbaMaps[[4]], 
          nrow = 1, ncol = 2)

## ----warning=FALSE, fig.cap = "Most likely clusters and undecided units", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(cmeansMaps$ClusterPlot,GcmeansMaps$ClusterPlot,
          nrow = 1, ncol = 2)

## ----message=FALSE, warning=FALSE---------------------------------------------
Neighbours <- poly2nb(LyonIris,queen = TRUE)
WMat <- nb2listw(Neighbours,style="W",zero.policy = TRUE)

## ----warning=FALSE,  message=FALSE, eval = FALSE------------------------------
#  DFindices_SFCM <- selectParameters(algo = "SFCM", data = Data,
#                                 k = 4, m = 1.5, alpha = seq(0,2,0.05),
#                                 nblistw = WMat, standardize = FALSE,
#                                 tol = 0.0001, verbose = FALSE, seed = 456)
#  

## ----warning=FALSE, fig.cap = "Link between alpha and spatial inconsistency", out.width="50%", fig.pos="H", fig.align="center"----
ggplot(DFindices_SFCM)+
  geom_smooth(aes(x=alpha,y=spConsistency), color = "black")+
  geom_point(aes(x=alpha,y=spConsistency), color = "red")


## ----warning=FALSE, fig.cap = "Link between alpha and explained inertia", out.width="50%", fig.pos="H", fig.align="center"----
ggplot(DFindices_SFCM)+
  geom_smooth(aes(x=alpha,y=Explained.inertia), color = "black")+
  geom_point(aes(x=alpha,y=Explained.inertia), color = "red")

## ----fig.cap="Link between alpha and silhouette index", message=FALSE, warning=FALSE, fig.pos="H", fig.align="center", out.width="50%"----
ggplot(DFindices_SFCM)+
  geom_smooth(aes(x=alpha,y=Silhouette.index), color = "black")+
  geom_point(aes(x=alpha,y=Silhouette.index), color = "red")

## ----fig.cap="Link between alpha and Xie and Beni index", message=FALSE, warning=FALSE, out.width="50%", fig.pos="H", fig.align="center"----
ggplot(DFindices_SFCM)+
  geom_smooth(aes(x=alpha,y=XieBeni.index), color = "black")+
  geom_point(aes(x=alpha,y=XieBeni.index), color = "red")


## ----warning=FALSE------------------------------------------------------------
SFCM <- SFCMeans(Data, WMat, k = 4, m = 1.5, alpha = 0.7,
                 tol = 0.0001, standardize = FALSE,
                 verbose = FALSE, seed = 456)

## ----warning=FALSE, eval = FALSE----------------------------------------------
#  future::plan(future::multisession(workers=2))
#  DFindices_SFGCM <- selectParameters.mc(algo = "SGFCM", data = Data,
#                                 k = 4, m = 1.5, alpha = seq(0,2,0.05),
#                                 beta = seq(0,0.85,0.05),
#                                 nblistw = WMat, standardize = FALSE, chunk_size = 50,
#                                 tol = 0.0001, verbose = FALSE, seed = 456)

## ----warning=FALSE, fig.cap = "Impact of beta and alpha on silhouette index", out.width = "80%", fig.pos="H", fig.align="center"----
ggplot(DFindices_SFGCM) + 
  geom_raster(aes(x = alpha, y = beta, fill = Silhouette.index),  size = 5) + 
  scale_fill_viridis() +
  coord_fixed(ratio=1)

## ----warning=FALSE, fig.cap = "Impact of beta and alpha on Xie and Beni index", out.width = "80%", fig.pos="H", fig.align="center"----
ggplot(DFindices_SFGCM) + 
  geom_raster(aes(x = alpha, y = beta, fill = XieBeni.index),  size = 5) + 
  scale_fill_viridis() +
  coord_fixed(ratio=1)

## ----warning=FALSE, fig.cap = "Impact of beta and alpha on spatial inconsistency", out.width = "80%", fig.pos="H", fig.align="center"----
ggplot(DFindices_SFGCM) + 
  geom_raster(aes(x = alpha, y = beta, fill = spConsistency),  size = 5) + 
  scale_fill_viridis() +
  coord_fixed(ratio=1)


## ----warning=FALSE------------------------------------------------------------
SGFCM <- SGFCMeans(Data,WMat,k = 4,m=1.5, alpha=0.95, beta = 0.65,
                    tol=0.0001, standardize = FALSE, verbose = FALSE, seed = 456)

## ----warning=FALSE------------------------------------------------------------
r1 <- calcqualityIndexes(Data, SFCM$Belongings,m = 1.5)
r2 <- calcqualityIndexes(Data, SGFCM$Belongings,m = 1.5)

diagSFCM <- spatialDiag(SFCM$Belongings, nblistw = WMat,
                        undecided = 0.45,nrep = 500)
diagSGFCM <- spatialDiag(SGFCM$Belongings, nblistw = WMat,
                         undecided = 0.45,nrep = 500)

df <- cbind(
  c(unlist(r1),diagSFCM$SpConsist),
  c(unlist(r2),diagSGFCM$SpConsist)
)
row.names(df)[length(row.names(df))] <- "sp.consistency"

knitr::kable(df,digits = 3,col.names = c("SFCM","SGFCM"))

## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 1", out.width="80%", fig.pos="H", fig.align="center"----
SFCMMaps <- mapClusters(geodata = LyonIris, object = SFCM$Belongings,undecided = 0.45)
SGFCMMaps <- mapClusters(geodata = LyonIris, object = SGFCM$Belongings,undecided = 0.45)

tmap_arrange(SFCMMaps$ProbaMaps[[1]],SGFCMMaps$ProbaMaps[[1]],
             nrow = 1, ncol = 2)

## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 2", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(SFCMMaps$ProbaMaps[[2]],SGFCMMaps$ProbaMaps[[2]],
             nrow = 1, ncol = 2)

## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 3", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(SFCMMaps$ProbaMaps[[3]],SGFCMMaps$ProbaMaps[[3]],
             nrow = 1, ncol = 2)

## ----warning=FALSE, fig.cap = "Probability of belonging to cluster 4", out.width="80%"----
tmap_arrange(SFCMMaps$ProbaMaps[[4]],SGFCMMaps$ProbaMaps[[4]],
             nrow = 1, ncol = 2)

## ----warning=FALSE, fig.cap = "Most likely cluster and undecided units", out.width="80%", fig.pos="H", fig.align="center"----
tmap_arrange(SFCMMaps$ClusterPlot,SGFCMMaps$ClusterPlot,
             nrow = 1, ncol = 2)

## ----warning=FALSE------------------------------------------------------------
spdiag_1 <- spatialDiag(Cmean$Belongings, nblistw = WMat, nrep=250)
spdiag_2 <- spatialDiag(GCmean$Belongings, nblistw = WMat, nrep=250)
spdiag_3 <- spatialDiag(SFCM$Belongings, nblistw = WMat, nrep=250)
spdiag_4 <- spatialDiag(SGFCM$Belongings, nblistw = WMat, nrep=250)

#looking at the moran I values for each group
moran_table <- data.frame(cbind(spdiag_1$MoranValues$MoranI,
                     spdiag_2$MoranValues$MoranI,
                     spdiag_3$MoranValues$MoranI,
                     spdiag_4$MoranValues$MoranI
                     ))
row.names(moran_table) <- paste("cluster ",1:4,sep="")
knitr::kable(moran_table, digits = 3,
             col.names = c("FCM","GFCM","SFCM","SGFCM"),
             caption = "Moran I index for the columns of the membership matrix"
             )

## ----warning=FALSE------------------------------------------------------------
print(c(spdiag_1$SpConsist, spdiag_2$SpConsist,spdiag_3$SpConsist,spdiag_4$SpConsist))

## ----warning=FALSE------------------------------------------------------------
sum(spdiag_4$SpConsist > spdiag_3$SpConsistSamples) / length(spdiag_3$SpConsistSamples)

## ----warning=FALSE------------------------------------------------------------
Undecided <- undecidedUnits(SGFCM$Belongings,0.45)
LyonIris$FinalCluster <- ifelse(Undecided=="Undecided",
                                "Undecided",paste("cluster",Undecided,sep="_"))

# mapping the groups
tm_shape(LyonIris) + 
  tm_fill("FinalCluster", palette = c("cluster_V1"="palegreen3",
                                 "cluster_V2"="firebrick",
                                 "cluster_V3"="lightyellow2",
                                 "cluster_V4"="steelblue",
                                 "cluster_V5"="pink",
                                 "Undecided"=rgb(0,0,0,0.4))) + 
  tm_borders("black") + 
  tm_layout(frame = FALSE, legend.outside = TRUE)


## ----warning=FALSE, eval=FALSE------------------------------------------------
#  colors <- c("palegreen3","firebrick","lightyellow2","steelblue","pink")
#  uncertaintyMap(LyonIris, SGFCM$Belongings, color = colors)

## ----echo=FALSE, warning=FALSE------------------------------------------------
uncertainMap

## ----warning=FALSE------------------------------------------------------------
LyonIris$entropyidx  <- calcUncertaintyIndex(SGFCM$Belongings)

# mapping the uncertainty
tm_shape(LyonIris) + 
  tm_fill("entropyidx", palette = "Greys", style = "cont") + 
  tm_borders("black") + 
  tm_layout(frame = FALSE, legend.outside = TRUE)


## ----warning=FALSE------------------------------------------------------------
Data <- sf::st_drop_geometry(LyonIris[AnalysisFields])
summarizeClusters(Data,
                  belongmatrix = SGFCM$Belongings,
                  weighted = TRUE, dec = 3)
# equivalent to : 
# summary(SGFCM, Data)

## ----warning=FALSE------------------------------------------------------------
spiderPlots(Data, SGFCM$Belongings,
            chartcolors = c("darkorange3","grey4","darkgreen","royalblue"))
violinPlots(Data, SGFCM$Groups)

## ----warning=FALSE, eval = FALSE----------------------------------------------
#  bootvalues <- boot_group_validation(SGFCM, nsim = 1000, maxiter = 1000,
#                                       tol = 0.0001, verbose = FALSE)

## ----warning=FALSE------------------------------------------------------------
melted_df <- reshape2::melt(bootvalues$group_consistency)
melted_df$variable <- as.factor(melted_df$variable)

ggplot() +
  geom_histogram(mapping = aes(x = value), data = melted_df, bins = 30) +
  labs(title = "stability of clusters", subtitle = "for 1000 iterations",
       x = "Jaccard index") +
  facet_wrap(vars(variable), ncol=2)

## ----warning=FALSE------------------------------------------------------------
df_gp3 <- bootvalues$group_centers[["group3"]]

melted_df <- reshape2::melt(df_gp3)
melted_df$variable <- as.factor(melted_df$variable)

ggplot() +
  geom_histogram(mapping = aes(x = value), data = melted_df, bins = 30) +
  labs(title = "stability of group 3 centers", subtitle = "for 1000 iterations") +
  xlim(-3,3)+
  facet_wrap(vars(variable), ncol=3)

## ----warning=FALSE, eval = FALSE----------------------------------------------
#  # create the modified weight matrix
#  WMat_adj <- adjustSpatialWeights(Data, WMat$neighbours, style = "W")
#  
#  # calculate the modified version of FCM with non-local information
#  nl_SFCM <- SFCMeans(Data, WMat_adj, k = 4, m = 1.5, alpha = 0.7,
#                   tol = 0.0001, standardize = FALSE,
#                   verbose = FALSE, seed = 456)
#  

Try the geocmeans package in your browser

Any scripts or data that you put into this service are public.

geocmeans documentation built on Sept. 12, 2023, 9:06 a.m.