inst/doc/PrimarySchool.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----load data----------------------------------------------------------------
library(Matrix)
library(gsbm)
library(igraph)
library(RColorBrewer)
library(combinat)

data(PrimarySchool)
A <- PrimarySchool$A
class <- PrimarySchool$class
n <- dim(A)[1]
class.names <- levels(class)
class.effectifs <- table(class)

## ----compute frequency of interaction-----------------------------------------
# Compute frequency of interactions
K = length(class.names)
Q = matrix(rep(0, K*K), ncol = K, nrow = K)
for (k1 in 1:K){
  com1 = class.names[k1]
  for (k2 in 1:K){
    com2 = class.names[k2]
    Q[k1, k2] <- mean(A[class == com1, class == com2], na.rm = TRUE)
    Q[k2, k1] <- Q[k1, k2]
  }
}
Q <- round(Q, 2)
colnames(Q) <- class.names
rownames(Q) <- class.names
print(Q)

## ----Plot network-------------------------------------------------------------
# Vertex labels
v.label <- rep(NA, n)

# Graph without NA's
A.noNA <- matrix(0, ncol = n, nrow = n)
A.noNA[!is.na(A)] <- A[!is.na(A)]
g <- graph_from_adjacency_matrix(A.noNA, mode = "undirected")

# Layout
W<- matrix(0, n, n)
for (i in 1:n){
  for (j in 1:n){
      if ((!is.na(A[i,j])) && A[i,j]==1){
        if (class[i]==class[j] && class[i]!= "teachers"){
          W[i,j] <- 2
        }
        else{
          W[i,j] <- 1
        }
      }
  }
}
gw <- graph_from_adjacency_matrix(W, mode = "undirected")
lay <- layout_with_fr(gw)
pal <- brewer.pal(11, "Paired")

# Plot network
plot(g, vertex.label = v.label, vertex.size = (2 + colSums(A, na.rm =T)/5), vertex.color = class, layout = lay, palette = pal, edge.width = 0.3, edge.curved=0.1)

legend("bottomleft", legend = class.names, col = pal, pch = 1, bty = 'n', cex = 0.5, ncol = 1)

## ----run GSBM-----------------------------------------------------------------
# Investigate these outliers
T1<-Sys.time()
res <- gsbm_mcgd(A, 8.5, 8, maxit = 100)
T2<-Sys.time()
Tdiff= difftime(T1, T2)
print(paste0("running time : ", Tdiff))
outliers <- which(colSums(res$S)>0)
no <- length(outliers)
outliers.class <- class[outliers]
outliers.Q <- matrix(0,nrow = no, ncol = K)
for (o in 1:no){
  for (k in 1:K){
    comk = class.names[k]
    outliers.Q[o,k] <- mean(A[outliers[o],class == comk], na.rm = T)
  }
  print(paste0("node ", outliers[o], " is in class ", outliers.class[o]))
  print(outliers.Q[o,], 2)
  print("")
}

## ----recover the classes------------------------------------------------------
# Compute svd and run k-means
L.U <- svd(res$L[-outliers, -outliers], nu = 10)$u 
est_class <- kmeans(L.U, 10, nstart = 100)$cluster
length(est_class)

# Recover the class among the permutations of labels
equ_label = combinat::permn(1:10)
error <- rep(0, length(equ_label))
for (i in 1:length(equ_label)){
  error[i] <- sum(class[-outliers] != (class.names[unlist(equ_label[i])])[est_class])
}
error[which.min(error)]
class_est <- class.names[unlist(equ_label[which.min(error)])][est_class]
class_est_out <- rep(NA, n)
class_est_out[outliers] <- "outlier"
class_est_out[-outliers] <- class_est
class_est_out <- as.factor(class_est_out)

# Plot network
plot(g, vertex.label = v.label, vertex.size = (2 + colSums(A, na.rm =T)/5), vertex.color = class_est_out, layout = lay, palette = pal, edge.width = 0.3, edge.curved=0.1)

class_est_out <- as.factor(class_est_out)

Try the gsbm package in your browser

Any scripts or data that you put into this service are public.

gsbm documentation built on Sept. 20, 2022, 9:06 a.m.