demo/UserGuide15.R
In R2MLwiN: Running 'MLwiN' from Within R

############################################################################
#     MLwiN User Manual
#
# 15  Diagnostics for Multilevel Models . . . . . . . . . . . . . . . . .225
#
#     Rasbash, J., Steele, F., Browne, W. J. and Goldstein, H. (2012).
#     A User's Guide to MLwiN, v2.26. Centre for Multilevel Modelling,
#     University of Bristol.
############################################################################
#     R script to replicate all analyses using R2MLwiN
#
#     Zhang, Z., Charlton, C., Parker, R, Leckie, G., and Browne, W.J.
#     Centre for Multilevel Modelling, 2012
#     http://www.bristol.ac.uk/cmm/software/R2MLwiN/
############################################################################

library(R2MLwiN)
# MLwiN folder
mlwin <- getOption("MLwiN_path")
while (!file.access(mlwin, mode = 1) == 0) {
  cat("Please specify the root MLwiN folder or the full path to the MLwiN executable:\n")
  mlwin <- scan(what = character(0), sep = "\n")
  mlwin <- gsub("\\", "/", mlwin, fixed = TRUE)
}
options(MLwiN_path = mlwin)


# 15.1 Introduction . . . . . . . . . . . . . . . . . . . . . . . . . . .225

data(diag1, package = "R2MLwiN")
summary(diag1)

(mymodel1 <- runMLwiN(n_ilea ~ 1 + n_vrq + (1 + n_vrq | school) + (1 | pupil), estoptions = list(resi.store = TRUE, 
  resioptions = c("standardised", "leverage", "influence", "deletion")), data = diag1))

xb <- predict(mymodel1)

u0 <- mymodel1@residual$lev_2_resi_est_Intercept
u1 <- mymodel1@residual$lev_2_resi_est_n_vrq

yhat <- xb + u0[mymodel1@data$school] + u1[mymodel1@data$school] * mymodel1@data$n_vrq

plot(mymodel1@data$n_vrq, yhat, type = "n")
for (i in 1:18) {
  lines(mymodel1@data$n_vrq[mymodel1@data$school == i], yhat[mymodel1@data$school == i], col = 1)
}
lines(mymodel1@data$n_vrq[mymodel1@data$school == 17], yhat[mymodel1@data$school == 17], col = 2, lwd = 3)

yhatold <- yhat

plot(mymodel1@data$n_vrq[mymodel1@data$school != 17], mymodel1@data$n_ilea[mymodel1@data$school != 17], type = "p")
points(mymodel1@data$n_vrq[mymodel1@data$school == 17], mymodel1@data$n_ilea[mymodel1@data$school == 17], col = 2, 
  cex = 2)

u0se <- mymodel1@residual$lev_2_resi_se_Intercept
u1se <- mymodel1@residual$lev_2_resi_se_n_vrq

u0CI95 <- 1.96 * u0se
u0rank <- rank(u0)
u0rankhi <- u0 + u0CI95
u0ranklo <- u0 - u0CI95
u0rankno <- order(u0rank)

u1CI95 <- 1.96 * u0se
u1rank <- rank(u1)
u1rankhi <- u1 + u1CI95
u1ranklo <- u1 - u1CI95
u1rankno <- order(u1rank)

sch17 <- which(levels(as.factor(mymodel1@data$school)) == 17)

plot(1:18, u0[u0rankno], ylim = c(-0.3, 0.3), pch = 15, xlab = "Rank", ylab = "u0 residual estimate")
points(1:18, u0rankhi[u0rankno], pch = 24, bg = "grey")
points(1:18, u0ranklo[u0rankno], pch = 25, bg = "grey")
for (i in 1:18) lines(rep(i, 2), c(u0ranklo[u0rankno[i]], u0rankhi[u0rankno[i]]))
points(x = which(u0rankno == sch17), y = u0[u0rankno[which(u0rankno == sch17)]], pch = 22, bg = 2, cex = 2)

plot(1:18, u1[u1rankno], ylim = c(-0.3, 0.3), pch = 15, xlab = "Rank", ylab = "u1 residual estimate")
points(1:18, u1rankhi[u1rankno], pch = 24, bg = "grey")
points(1:18, u1ranklo[u1rankno], pch = 25, bg = "grey")
for (i in 1:18) lines(rep(i, 2), c(u1ranklo[u1rankno[i]], u1rankhi[u1rankno[i]]))
points(x = which(u1rankno == sch17), y = u1[u1rankno[which(u1rankno == sch17)]], pch = 22, bg = 2, cex = 2)

plot(u0[-17], u1[-17], ylim = c(-0.3, 0.3), xlim = c(-0.1, 0.2), type = "p", xlab = "u0 residual estimate", ylab = "u1 residual estimate")
points(u0[17], u1[17], bg = 2, col = 2, cex = 2)

# 15.2 Diagnostics plotting: Deletion residuals, influence and leverage . 231

hist(u0, breaks = seq(min(u0) - 0.01, max(u0) + 0.01, 0.01))

u0std <- mymodel1@residual$lev_2_std_resi_est_Intercept
hist(u0std, breaks = seq(min(u0std) - 0.1, max(u0std) + 0.1, 0.1))

u0lev <- mymodel1@residual$lev_2_resi_leverage_Intercept
hist(u0lev, breaks = seq(min(u0lev) - 0.01, max(u0lev) + 0.01, 0.01))

u0inf <- mymodel1@residual$lev_2_resi_influence_Intercept
hist(u0inf, breaks = seq(min(u0inf) - 0.025, max(u0inf) + 0.025, 0.025))

u0del <- mymodel1@residual$lev_2_resi_deletion_Intercept
hist(u0del, breaks = seq(min(u0del) - 0.1, max(u0del) + 0.1, 0.1))

plot(u0std[-17], u0lev[-17], ylim = c(0.15, 0.4), xlim = c(-0.2, 2.2), type = "p", xlab = "u0 standardised residual", 
  ylab = "u0 leverage residual")
points(u0std[17], u0lev[17], bg = 2, col = 2, cex = 2)

hist(u1, breaks = seq(min(u1) - 0.01, max(u1) + 0.01, 0.01))

u1std <- mymodel1@residual$lev_2_std_resi_est_n_vrq
hist(u1std, breaks = seq(min(u1std) - 0.1, max(u1std) + 0.1, 0.1))

u1lev <- mymodel1@residual$lev_2_resi_leverage_n_vrq
hist(u1lev, breaks = seq(min(u1lev) - 0.01, max(u1lev) + 0.01, 0.01))

u1inf <- mymodel1@residual$lev_2_resi_influence_n_vrq
hist(u1inf, breaks = seq(min(u1inf) - 0.025, max(u1inf) + 0.025, 0.025))

u1del <- mymodel1@residual$lev_2_resi_deletion_n_vrq
hist(u1del, breaks = seq(min(u1del) - 0.1, max(u1del) + 0.1, 0.1))

plot(u1std[-17], u1lev[-17], ylim = c(0.1, 0.35), xlim = c(-1.4, 2.4), type = "p", xlab = "u1 standardised residual", 
  ylab = "u1 leverage residual")
points(u1std[17], u1lev[17], bg = 2, col = 2, cex = 2)

e0 <- mymodel1@residual$lev_1_resi_est_Intercept
e0rank <- rank(e0)
e0std <- (e0 - mean(e0))/sd(e0)
e0uniform <- e0rank/(length(e0rank) + 1)
e0nscore <- qnorm(e0uniform)

plot(e0nscore[mymodel1@data$school != 17], e0std[mymodel1@data$school != 17], ylim = c(-4, 5), xlim = c(-4, 4), type = "p", 
  xlab = "e0nscore", ylab = "e0std")
points(e0nscore[mymodel1@data$school == 17], e0std[mymodel1@data$school == 17], bg = 2, col = 2, cex = 2)

diag1$pupilnumber <- unlist(by(diag1$school, diag1$school, function(x) 1:length(x)))

diag1[order(e0)[1], c("school", "pupil", "pupilnumber")]

diag1$s17p22 <- as.integer(diag1$school == 17 & diag1$pupilnumber == 22)

(mymodel2 <- runMLwiN(n_ilea ~ 1 + n_vrq + s17p22 + (1 + n_vrq | school) + (1 | pupil), estoptions = list(resi.store = TRUE, 
  startval = list(FP.b = mymodel1@FP, FP.v = mymodel1@FP.cov, RP.b = mymodel1@RP, RP.v = mymodel1@RP.cov)), data = diag1))

u0 <- mymodel2@residual$lev_2_resi_est_Intercept
u1 <- mymodel2@residual$lev_2_resi_est_n_vrq

u0se <- sqrt(mymodel2@residual$lev_2_resi_var_Intercept)
u1se <- sqrt(mymodel2@residual$lev_2_resi_var_n_vrq)

u0CI95 <- 1.96 * u0se
u0rank <- rank(u0)
u0rankhi <- u0 + u0CI95
u0ranklo <- u0 - u0CI95
u0rankno <- order(u0rank)

u1CI95 <- 1.96 * u1se
u1rank <- rank(u1)
u1rankhi <- u1 + u1CI95
u1ranklo <- u1 - u1CI95
u1rankno <- order(u1rank)

plot(1:18, u0[u0rankno], ylim = c(-0.3, 0.3), pch = 15, xlab = "Rank", ylab = "u0 residual estimate")
points(1:18, u0rankhi[u0rankno], pch = 24, bg = "grey")
points(1:18, u0ranklo[u0rankno], pch = 25, bg = "grey")
for (i in 1:18) lines(rep(i, 2), c(u0ranklo[u0rankno[i]], u0rankhi[u0rankno[i]]))
points(x = which(u0rankno == sch17), y = u0[u0rankno[which(u0rankno == sch17)]], pch = 22, bg = 2, cex = 2)

plot(1:18, u1[u1rankno], ylim = c(-0.3, 0.3), pch = 15, xlab = "Rank", ylab = "u1 residual estimate")
points(1:18, u1rankhi[u1rankno], pch = 24, bg = "grey")
points(1:18, u1ranklo[u1rankno], pch = 25, bg = "grey")
for (i in 1:18) lines(rep(i, 2), c(u1ranklo[u1rankno[i]], u1rankhi[u1rankno[i]]))
points(x = which(u1rankno == sch17), y = u1[u1rankno[which(u1rankno == sch17)]], pch = 22, bg = 2, cex = 2)

diag1$s17 <- as.integer(diag1$school == 17)
diag1$s17Xn_vrq <- diag1$s17 * diag1$n_vrq

(mymodel3 <- runMLwiN(n_ilea ~ 1 + n_vrq + s17p22 + s17 + s17Xn_vrq + (1 + n_vrq | school) + (1 | pupil), estoptions = list(startval = list(FP.b = mymodel2@FP, 
  FP.v = mymodel2@FP.cov, RP.b = mymodel2@RP, RP.v = mymodel2@RP.cov)), data = diag1))

(mymodel4 <- runMLwiN(n_ilea ~ 1 + n_vrq + s17p22 + s17 + (1 + n_vrq | school) + (1 | pupil), estoptions = list(resi.store = TRUE, 
  startval = list(FP.b = mymodel3@FP, FP.v = mymodel3@FP.cov, RP.b = mymodel3@RP, RP.v = mymodel3@RP.cov)), data = diag1))

xb <- predict(mymodel4)

u0 <- mymodel4@residual$lev_2_resi_est_Intercept
u1 <- mymodel4@residual$lev_2_resi_est_n_vrq

yhat <- xb + u0[mymodel4@data$school] + u1[mymodel4@data$school] * mymodel4@data$n_vrq

plot(mymodel4@data$n_vrq, yhat, type = "n")
for (i in 1:18) {
  lines(mymodel4@data$n_vrq[mymodel4@data$school == i], yhatold[mymodel4@data$school == i], col = 1)
}
lines(mymodel4@data$n_vrq[mymodel4@data$school == 17], yhatold[mymodel4@data$school == 17], col = 2, lwd = 3)

plot(mymodel4@data$n_vrq, yhat, type = "n")
for (i in 1:18) {
  points(mymodel4@data$n_vrq[mymodel4@data$school == i], yhat[mymodel4@data$school == i], col = 1)
}
points(mymodel4@data$n_vrq[mymodel4@data$school == 17], yhat[mymodel4@data$school == 17], col = 2, lwd = 3)

# 15.3 A general approach to data exploration . . . . . . . . . . . . . .240

# Chapter learning outcomes . . . . . . . . . . . . . . . . . . . . . . .240

############################################################################