chapter6.R
In sand: Statistical Analysis of Network Data with R, 2nd Edition

# SAND with R, chapter6.tex

# CHUNK 1
library(sand)
data(lazega)
A <- as_adjacency_matrix(lazega)
v.attrs <- as_data_frame(lazega, what="vertices")

# CHUNK 2
library(ergm)  # Will load package 'network' as well.
lazega.s <- network::as.network(as.matrix(A),
  directed=FALSE)
network::set.vertex.attribute(lazega.s, "Office",
   v.attrs$Office)
network::set.vertex.attribute(lazega.s, "Practice",
   v.attrs$Practice)
network::set.vertex.attribute(lazega.s, "Gender",
   v.attrs$Gender)
network::set.vertex.attribute(lazega.s, "Seniority",
   v.attrs$Seniority)

# CHUNK 3
my.ergm.bern <- formula(lazega.s ~ edges)
my.ergm.bern
# ---
## lazega.s ~ edges
# ---

# CHUNK 4
summary(my.ergm.bern)
# ---
## edges
##   115
# ---

# CHUNK 5
my.ergm <- formula(lazega.s ~ edges + kstar(2)
   + kstar(3) + triangle)
summary(my.ergm)
# ---
##    edges   kstar2   kstar3 triangle
##      115      926     2681      120
# ---
     
# CHUNK 6
my.ergm <- formula(lazega.s ~ edges
   + gwesp(1, fixed=TRUE))
summary(my.ergm)
# ---
##         edges gwesp.fixed.1
##      115.0000      213.1753
# ---

# CHUNK 7
lazega.ergm <- formula(lazega.s ~ edges
   + gwesp(log(3), fixed=TRUE)
   + nodemain("Seniority")
   + nodemain("Practice")
   + match("Practice")
   + match("Gender")
   + match("Office"))

# CHUNK 8
set.seed(42)
lazega.ergm.fit <- ergm(lazega.ergm)

# CHUNK 9
anova(lazega.ergm.fit)
# ---
## Analysis of Variance Table
##
## Model 1: lazega.s ~ edges + gwesp(log(3), fixed = TRUE) + 
##     nodemain("Seniority") + nodemain("Practice") + 
##     match("Practice") + match("Gender") + 
##     match("Office")
##          Df Deviance Resid. Df Resid. Dev Pr(>|Chisq|)    
## NULL                       630     873.37                 
## Model 1:  7   413.74       623     459.63    < 2.2e-16 ***
## ---
## Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# ---

# CHUNK 10
summary(lazega.ergm.fit)
# ---
## ==========================
## Summary of model fit
## ==========================
##
## Formula:   lazega.s ~ edges + gwesp(log(3), fixed = TRUE) + 
##     nodemain("Seniority") + nodemain("Practice") + 
##     match("Practice") + match("Gender") + 
##     match("Office")
##
## Iterations:  2 out of 20 
## 
## Monte Carlo MLE Results:
##                              Estimate Std. Error MCMC %      
## edges                        -7.00655    0.67114      0   
## gwesp.fixed.1.09861228866811  0.59166    0.08554      0     
## nodecov.Seniority             0.02456    0.00620      0     
## nodecov.Practice              0.39455    0.10218      0    
## nodematch.Practice            0.76966    0.19060      0     
## nodematch.Gender              0.73767    0.24362      0     
## nodematch.Office              1.16439    0.18753      0     
##
##                               z value     Pr(>|z|)
## edges                         -10.440    < 1e-04 ***
## gwesp.fixed.1.09861228866811    6.917    < 1e-04 ***
## nodecov.Seniority               3.962    < 1e-04 ***
## nodecov.Practice                3.861    0.000113 ***
## nodematch.Practice              4.038    < 1e-04 ***
## nodematch.Gender                3.028    0.002463 **
## nodematch.Office                6.209    < 1e-04 ***
## ---
## Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
##
##      Null Deviance: 873.4  on 630  degrees of freedom
##  Residual Deviance: 459.6  on 623  degrees of freedom
## 
## AIC: 473.6    BIC: 504.7    (Smaller is better.)
# ---

# CHUNK 11
gof.lazega.ergm <- gof(lazega.ergm.fit)

# CHUNK 12
plot(gof.lazega.ergm)

# CHUNK 13
library(blockmodels)
set.seed(42)
A.fblog <- as.matrix(as_adjacency_matrix(fblog))
fblog.sbm <- BM_bernoulli("SBM_sym", A.fblog, 
                           verbosity=0, plotting='')
fblog.sbm$estimate()

# CHUNK 14
ICLs <- fblog.sbm$ICL
Q <- which.max(ICLs)
Q
# ---
## [1] 10
# ---

# CHUNK 15
Z <- fblog.sbm$memberships[[Q]]$Z

# CHUNK 16
cl.labs <- apply(Z,1,which.max)

# CHUNK 17
nv <- vcount(fblog)
summary(Z[cbind(1:nv,cl.labs)])
# ---
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.8586  0.9953  0.9953  0.9938  0.9953  0.9953
# ---

# CHUNK 18
cl.cnts <- as.vector(table(cl.labs))
alpha <- cl.cnts/nv
alpha
# ---
##  [1] 0.18229167 0.14062500 0.05729167 0.10937500 
##  [5] 0.12500000 0.13020833 0.03125000 0.03645833 
##  [9] 0.06770833 0.11979167
# ---

# CHUNK 19
Pi.mat <- fblog.sbm$model_parameters[[Q]]$pi
Pi.mat[3,]
# ---
##  [1] 0.0030340287 0.0073657690 0.9102251927 0.0009221811 
##  [5] 0.0009170384 0.0364593875 0.0177621832 0.0024976022 
##  [9] 0.0431732528 0.0012495852
# ---

# CHUNK 20
ntrials <- 1000
Pi.mat <- (t(Pi.mat)+Pi.mat)/2
deg.summ <- list(ntrials)
for(i in (1:ntrials)){
   blk.sz <- rmultinom(1,nv,alpha)
   g.sbm <- sample_sbm(nv,pref.matrix=Pi.mat,
                       block.sizes=blk.sz,
                       directed=FALSE)
   deg.summ[[i]] <- summary(degree(g.sbm))
 }
Reduce('+',deg.summ)/ntrials
# ---
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.931   9.165  13.127  15.183  18.896  49.484 
# ---
summary(degree(fblog))
# ---
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    8.00   13.00   14.91   18.00   56.00
# ---

# CHUNK 21
plot(fblog.sbm$ICL,xlab="Q",ylab="ICL",type="b")
lines(c(Q,Q),c(min(ICLs),max(ICLs)),col="red",lty=2)

# CHUNK 22
edges <- as_edgelist(fblog,names=FALSE)
neworder<-order(cl.labs)
m<-t(matrix(order(neworder)[as.numeric(edges)],2))
plot(1, 1, xlim = c(0, nv + 1), ylim = c(nv + 1, 0), 
      type = "n", axes= FALSE, xlab="Classes",
      ylab="Classes",
      main="Reorganized Adjacency matrix")
rect(m[,2]-0.5,m[,1]-0.5,m[,2]+0.5,m[,1]+0.5,col=1)
rect(m[,1]-0.5,m[,2]-0.5,m[,1]+0.5,m[,2]+0.5,col=1)
cl.lim <- cl.cnts 
cl.lim <- cumsum(cl.lim)[1:(length(cl.lim)-1)]+0.5
clip(0,nv+1,nv+1,0)
abline(v=c(0.5,cl.lim,nv+0.5),
        h=c(0.5,cl.lim,nv+0.5),col="red")

# CHUNK 23
g.cl <- graph_from_adjacency_matrix(Pi.mat,
                                     mode="undirected",
                                     weighted=TRUE)
# Set necessary parameters
vsize <- 100*sqrt(alpha)
ewidth <- 10*E(g.cl)$weight
PolP <- V(fblog)$PolParty
class.by.PolP <- as.matrix(table(cl.labs,PolP))
pie.vals <- lapply(1:Q, function(i) 
                    as.vector(class.by.PolP[i,]))
my.cols <- topo.colors(length(unique(PolP)))
# Plot 
plot(g.cl, edge.width=ewidth, 
      vertex.shape="pie", vertex.pie=pie.vals, 
      vertex.pie.color=list(my.cols),
      vertex.size=vsize, vertex.label.dist=0.1*vsize,
      vertex.label.degree=pi)
# Add a legend
my.names <- names(table(PolP))
my.names[2] <- "Comm. Anal."
my.names[5] <- "PR de G"
legend(x="topleft", my.names,
        fill=my.cols, bty="n")

# CHUNK 24
summary(lazega)
# ---
## IGRAPH NA UN-- 36 115 -- 
## + attr: name (v/c), Seniority (v/n), Status (v/n), 
##     Gender (v/n), Office (v/n), Years (v/n), Age (v/n), 
##     Practice (v/n), School (v/n)
# ---

# CHUNK 25
library(eigenmodel)
set.seed(42)
A <- as_adjacency_matrix(lazega, sparse=FALSE)
lazega.leig.fit1 <- eigenmodel_mcmc(A, R=2, S=11000,
   burn=10000)

# CHUNK 26
same.prac.op <- v.attr.lazega$Practice %o%
   v.attr.lazega$Practice
same.prac <- matrix(as.numeric(same.prac.op
   %in% c(1, 4, 9)), 36, 36)
same.prac <- array(same.prac,dim=c(36, 36, 1))

# CHUNK 27
lazega.leig.fit2 <- eigenmodel_mcmc(A, same.prac, R=2,
   S=11000,burn=10000)

# CHUNK 28
same.off.op <- v.attr.lazega$Office %o%
   v.attr.lazega$Office
same.off <- matrix(as.numeric(same.off.op %in%
   c(1, 4, 9)), 36, 36)
same.off <- array(same.off,dim=c(36, 36, 1))
lazega.leig.fit3 <- eigenmodel_mcmc(A, same.off,
    R=2, S=11000, burn=10000)

# CHUNK 29
lat.sp.1 <-
   eigen(lazega.leig.fit1$ULU_postmean)$vec[, 1:2]
lat.sp.2 <-
   eigen(lazega.leig.fit2$ULU_postmean)$vec[, 1:2]
lat.sp.3 <-
   eigen(lazega.leig.fit3$ULU_postmean)$vec[, 1:2]

# CHUNK 30
colbar <- c("red", "dodgerblue", "goldenrod")
v.colors <- colbar[V(lazega)$Office]
v.shapes <- c("circle", "square")[V(lazega)$Practice]
v.size <- 3.5*sqrt(V(lazega)$Years)
v.label <- V(lazega)$Seniority
plot(lazega, layout=lat.sp.1, vertex.color=v.colors,
      vertex.shape=v.shapes, vertex.size=v.size,
      vertex.label=(1:36))

# CHUNK 31
perm.index <- sample(1:630)
nfolds <- 5
nmiss <- 630/nfolds
Avec <- A[lower.tri(A)]
Avec.pred1 <- numeric(length(Avec))

# CHUNK 32
for(i in seq(1,nfolds)){
  # Index of missing values.
  miss.index <- seq(((i-1) * nmiss + 1),
     (i*nmiss), 1)
  A.miss.index <- perm.index[miss.index]

  # Fill a new Atemp appropriately with NA's.
  Avec.temp <- Avec
  Avec.temp[A.miss.index] <-
     rep("NA", length(A.miss.index))
  Avec.temp <- as.numeric(Avec.temp)
  Atemp <- matrix(0, 36, 36)
  Atemp[lower.tri(Atemp)] <- Avec.temp
  Atemp <- Atemp + t(Atemp)

  # Now fit model and predict.
  Y <- Atemp

  model1.fit <- eigenmodel_mcmc(Y, R=2,
     S=11000, burn=10000)
  model1.pred <- model1.fit$Y_postmean
  model1.pred.vec <-
     model1.pred[lower.tri(model1.pred)]
  Avec.pred1[A.miss.index] <-
     model1.pred.vec[A.miss.index]
}

# CHUNK 33
library(ROCR)
pred1 <- prediction(Avec.pred1, Avec)
perf1 <- performance(pred1, "tpr", "fpr")
plot(perf1, col="blue", lwd=3)

# CHUNK 34
perf1.auc <- performance(pred1, "auc")
slot(perf1.auc, "y.values")
# ---
## [[1]]
## [1] 0.8191811
# ---

Any scripts or data that you put into this service are public.

sand documentation built on July 8, 2020, 7:16 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

sand
Statistical Analysis of Network Data with R, 2nd Edition

inst/code/chapter6.R
In sand: Statistical Analysis of Network Data with R, 2nd Edition

Try the sand package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sand Statistical Analysis of Network Data with R, 2nd Edition

inst/code/chapter6.R In sand: Statistical Analysis of Network Data with R, 2nd Edition

Try the sand package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sand
Statistical Analysis of Network Data with R, 2nd Edition

inst/code/chapter6.R
In sand: Statistical Analysis of Network Data with R, 2nd Edition