Nothing
vp_cp <- function(Z_mat, B.mat, control) {
#throughout, Z_mat is the data frame containing test score data
#and B.mat holds the binary outcome-data and student links
if(control$cpp.benchmark){
cpp.rmstep.times=c()
R.rmstep.times=c()
}
fam.binom <- control$outcome.family
persistence <- control$persistence
# H.eta is the inverse of \tilde{v} from equation 18 of Karl, Yang, Lohr (2013),
#Efficient Maximum Likelihood Estimation of multiple membership linear mixed
#models, with an application to educational value-added assesments, Computational
#Statistics and Data Analysis.
H.eta <- function(G.inv, Z, R.full.inv) {
h.eta <- G.inv
h.y <- crossprod(Z, R.full.inv) %*% Z
as(symmpart(h.eta + h.y), "sparseMatrix")
}
#if x is a matrix, ltriangle extracts the lower triangle of x as a vector.
#if x is a vector, ltriangle(x) builds a symmetric matrix with x as the
#lower triangle.
ltriangle <- function(x) {
if (!is.null(dim(x)[2])) {
resA <- as.vector(suppressMessages(x[lower.tri(x, diag = TRUE)]))
resA
} else {
nx <- length(x)
d <- 0.5 * (-1 + sqrt(1 + 8 * nx))
resB <- .symDiagonal(d)
resB[lower.tri(resB, diag = TRUE)] <- x
if (nx > 1) {
resB <- resB + t(resB) - diag(diag(resB))
}
as(resB, "sparseMatrix")
}
}
#If G is a matrix, then reduce.G extracts the unique parametres from G (G is the
#block diagonal matrix from Equation (9) of Karl, Yang, Lohr (2013)
#If G is a vector, then reduce.G builds the full (sparse, block diagonal) matrix
#G from the list of uniqu parameters.
reduce.G <- function(G, nyear.score, nteacher) {
if (!is.null(dim(G)[2])) {
temp_mat <- G
index1 <- 0
resA <- c(NULL)
for (j in 1:nyear.score) {
temp_mat_j <- ltriangle(as.matrix(suppressMessages(temp_mat[(index1 +
1):(index1 + 2), (index1 + 1):(index1 + 2)])))
resA <- c(resA, temp_mat_j)
index1 <- index1 + 2 * nteacher[j]
}
if (control$school.effects) {
temp_mat_j <- ltriangle(as.matrix(suppressMessages(temp_mat[(index1 +
1):(index1 + 2), (index1 + 1):(index1 + 2)])))
resA <- c(resA, temp_mat_j)
}
resA
} else {
resB <- Matrix(0, 0, 0)
for (j in 1:nyear.score) {
resB <- bdiag(resB, suppressMessages(kronecker(Diagonal(nteacher[j]),
ltriangle(G[(3 * j - 2):(3 * j)]))))
}
if (control$school.effects) {
resB <- bdiag(resB, suppressMessages(kronecker(Diagonal(nschool_effects),
ltriangle(G[(3 * (j + 1) - 2):(3 * (j + 1))]))))
}
rm(j)
resB
}
}
#update.eta returns both H.inv (which is \tilde{v} from equation (18) of
#Karl, Yang, Lohr (2013)) and eta (which is eta from equation (19).
update.eta <- function(X, Y, Z, R.full.inv, ybetas, G, cons.logLik, Ny, nstudent,
n_eta) {
G.chol <- chol(G)
G.inv <- chol2inv(G.chol)
H <- H.eta(G.inv = G.inv, Z = Z, R.full.inv = R.full.inv)
chol.H <- chol(H)
H.inv <- chol2inv(chol.H)
c.temp <- crossprod(X, R.full.inv) %*% Z
c.1 <- rbind(crossprod(X, R.full.inv) %*% X, t(c.temp))
c.2 <- rbind(c.temp, H)
C_inv <- cbind(c.1, c.2)
chol.C_inv <- chol(forceSymmetric(symmpart(C_inv)))
cs <- chol2inv(chol.C_inv)
C12 <- as.matrix(cs[1:n_ybeta,(n_ybeta+1):ncol(cs)])
C.mat <- cs[-c(1:n_ybeta), -c(1:n_ybeta)]
betacov<-as.matrix(cs[c(1:n_ybeta), c(1:n_ybeta)])
if (control$REML) {
var.eta <- C.mat
} else {
var.eta <- H.inv
}
res <- var.eta
sign.mult <- function(det) {
det$modulus * det$sign
}
eta <- H.inv %*% as.vector(crossprod(Z, R.full.inv) %*% (Y - X %*% ybetas))
log.p.eta <- -(n_eta/2) * log(2 * pi) - sign.mult(determinant(G.chol)) -
0.5 * crossprod(eta, G.inv) %*% eta
log.p.y <- -(Ny/2) * log(2 * pi) + sign.mult(determinant(chol(R.full.inv))) -
0.5 * crossprod(Y - X %*% ybetas - Z %*% eta, R.full.inv) %*% (Y - X %*%
ybetas - Z %*% eta)
if (control$REML) {
attr(res, "likelihood") <- as.vector(cons.logLik + log.p.eta + log.p.y -
sign.mult(determinant(chol.C_inv)))
} else {
attr(res, "likelihood") <- as.vector(cons.logLik + log.p.eta + log.p.y -
sign.mult(determinant(chol.H)))
}
attr(res, "eta") <- eta
attr(res,"betacov")<-betacov
attr(res,"C12")<- C12
attr(res, "h.inv") <- H.inv
res
}
#Given a vector, thetas, of current parameter values, Scor() returns a vector
#of the first-derivative of the likelihood function at that vector
#This function is not used to produce the parameter estimates, only
#to calculate the Hessian (matrix of second derivatives). After a few EM steps
#to improve the initial value, it would be possible to use the Score function in a
#Newton-Rhaphson algorithm to obtain the parameter estimates. We do not do that,
#however, since that routine is unstable in the presence of potentially strong
#correlations in the G matrix
Score <- function(thetas, eta = eta.hat, ybetas, X, Y, Z, n_ybeta, n_eta, sqrt.W,
inv.sqrt.W, nstudent, nteacher, Kg, cons.logLik, con = control, mis.list,
pattern.parmlist2, pattern.count, pattern.length, pattern.Rtemplate, pattern.diag,
pattern.key, Ny, pattern.sum = pattern.sum, persistence, P, alpha.diag, nalpha,
alpha) {
n_Rparm <- nyear.pseudo * (nyear.pseudo + 1)/2 - 1
G <- thetas[(n_Rparm + 1):(n_Rparm + ncol(G.mat))]
G <- reduce.G(G = G, nyear.score = nyear.score, nteacher = nteacher)
if (persistence == "VP") {
alpha.parm <- thetas[(n_Rparm + ncol(G.mat) + 1):length(thetas)]
alpha[!((1:nalpha) %in% alpha.diag)] <- alpha.parm
if (!control$school.effects) {
Z <- Matrix(0, nrow(Z_mat), nteach_effects)
} else {
Z <- Matrix(0, nrow(Z_mat), nteach_effects + nschool_effects)
}
for (i in 1:nalpha) {
comp <- which(tril(ltriangle(1:nalpha)) == i, arr.ind = TRUE)
Z <- Z + alpha[i] * P[[comp[1]]][[comp[2]]]
}
if (control$school.effects) {
colnames(Z) <- eta_effects
Z <- Z + Z.school.only
}
Z <- drop0(Matrix(Z))
# BZ structure
if (!control$school.effects) {
Z.expand <- Matrix(0, nrow(Z_mat), 2 * nteach_effects)
Z.expand[, seq(1, 2 * nteach_effects, by = 2)] <- Z
} else {
Z.expand <- Matrix(0, nrow(Z_mat), 2 * nteach_effects + 2 * nschool_effects)
Z.expand[, seq(1, 2 * nteach_effects + 2 * nschool_effects, by = 2)] <- Z
}
colnames(Z) <- eta_effects
J.Z <- rbind(Z.expand, B.Z.expand)
J.Z <- J.Z[order(J.mat.original$student, J.mat.original$year), ]
colnames(J.Z) <- interleave(colnames(Z), colnames(B.Z))
for (p in unique(J.mat$pat)) {
J.Z.p[[p]] <- J.Z[pat[[p]], , drop = FALSE]
}
Z <- J.Z
}
R_i <- ltriangle(c(as.vector(thetas[1:n_Rparm]), 1))
R_i.parm <- c(as.vector(thetas[1:n_Rparm]), 1)
LRI <- length(R_i.parm)
R_i.parm.constrained <- R_i.parm[-LRI]
if (length(mis.list) > 0) {
R <- symmpart(suppressMessages(kronecker(suppressMessages(Diagonal(nstudent)),
R_i)[-mis.list, -mis.list]))
rinv <- Matrix(0, 0, 0)
R_i.inv <- chol2inv(chol(R_i))
for (i in 1:nstudent) {
if (!any(mis.list %in% seq((i - 1) * nyear.pseudo + 1, i * nyear.pseudo))) {
rinv <- bdiag(rinv, R_i.inv)
} else {
inv.indx <- which(!(seq((i - 1) * nyear.pseudo + 1, i * nyear.pseudo) %in%
mis.list))
rinv <- bdiag(rinv, chol2inv(chol(R_i[inv.indx, inv.indx])))
}
}
R_inv <- rinv
} else {
R <- symmpart(suppressMessages(kronecker(suppressMessages(Diagonal(nstudent)),
R_i)))
R_inv <- symmpart(suppressMessages(kronecker(suppressMessages(Diagonal(nstudent)),
chol2inv(chol(R_i)))))
}
R.full <- drop0(symmpart(sqrt.W %*% R %*% sqrt.W))
R.full.inv <- drop0(symmpart(inv.sqrt.W %*% R_inv %*% inv.sqrt.W))
new.eta <- update.eta(X = X, Y = Y, Z = Z, R.full.inv = R.full.inv, ybetas = ybetas,
G = G, cons.logLik = cons.logLik, Ny = Ny, nstudent = nstudent, n_eta = n_eta)
eta.hat <- attr(new.eta, "eta")
C12 <- attr(new.eta, "C12")
betacov <- matrix(attr(new.eta, "betacov"),nrow=length(ybetas))
var.eta.hat <- new.eta
temp_mat <- var.eta.hat + tcrossprod(eta.hat, eta.hat)
temp_mat_R <- attr(new.eta, "h.inv") + tcrossprod(eta.hat, eta.hat)
####
pattern.sum <- list()
for (p in unique(J.mat$pat)) {
if(control$REML){
pattern.sum[[p]] <- Matrix(REML_Rm(invsqrtW_ = as.matrix(diag(inv.sqrt.W)), betacov_ = betacov, C12_ = C12,
JYp_ = as.matrix(J.Y.p[[p]]), loopsize_ = pattern.count[[p]]/pattern.length[[p]],
patternlength_ = pattern.length[[p]], rownumber_ = as.matrix(J.Y.p.rownumber[[p]]),
ybetas_ = as.matrix(ybetas), etahat_ = as.matrix(eta.hat), tempmatR_ = as.matrix(temp_mat),
JXpi_ = as.matrix(J.X.p[[p]]@i), JXpp_ = as.matrix(J.X.p[[p]]@p),
JXpx_ = as.matrix(J.X.p[[p]]@x), JXpdim_ = as.matrix(J.X.p[[p]]@Dim),
JZpi_ = as.matrix(J.Z.p[[p]]@i), JZpp_ = as.matrix(J.Z.p[[p]]@p),
JZpx_ = as.matrix(J.Z.p[[p]]@x), JZpdim_ = as.matrix(J.Z.p[[p]]@Dim)))
}else{
pattern.sum[[p]] <- Matrix(R_mstep2(invsqrtW_ = as.matrix(diag(inv.sqrt.W)),
JYp_ = as.matrix(J.Y.p[[p]]), loopsize_ = pattern.count[[p]]/pattern.length[[p]],
patternlength_ = pattern.length[[p]], rownumber_ = as.matrix(J.Y.p.rownumber[[p]]),
ybetas_ = as.matrix(ybetas), etahat_ = as.matrix(eta.hat), tempmatR_ = as.matrix(temp_mat_R),
JXpi_ = as.matrix(J.X.p[[p]]@i), JXpp_ = as.matrix(J.X.p[[p]]@p),
JXpx_ = as.matrix(J.X.p[[p]]@x), JXpdim_ = as.matrix(J.X.p[[p]]@Dim),
JZpi_ = as.matrix(J.Z.p[[p]]@i), JZpp_ = as.matrix(J.Z.p[[p]]@p),
JZpx_ = as.matrix(J.Z.p[[p]]@x), JZpdim_ = as.matrix(J.Z.p[[p]]@Dim)))
}
}
####
score.R <- -pattern.f.score.constrained(R_i.parm.constrained = R_i.parm.constrained,
nyear = nyear.pseudo, pattern.parmlist2 = pattern.parmlist2, pattern.count = pattern.count,
pattern.length = pattern.length, pattern.Rtemplate = pattern.Rtemplate,
pattern.diag = pattern.diag, pattern.key = pattern.key, pattern.sum = pattern.sum)
G.originial <- G
gam_t <- list()
sv_gam_t <- list()
index1 <- 0
for (j in 1:nyear.score) {
gam_t[[j]] <- matrix(0, Kg[j], Kg[j])
G.originial_j <- G.originial[(index1 + 1):(index1 + nteacher[j] * Kg[j]),
(index1 + 1):(index1 + nteacher[j] * Kg[j])]
gam_t[[j]] <- G.originial_j[1:Kg[j], 1:Kg[j]]
sv_gam_t[[j]] <- chol2inv(chol(gam_t[[j]]))
index1 <- index1 + nteacher[j] * Kg[j]
}
if (control$school.effects) {
gam_t_school <- matrix(0, 2, 2)
G.originial_j <- G.originial[(index1 + 1):(index1 + nschool_effects *
2), (index1 + 1):(index1 + nschool_effects * 2)]
gam_t_school <- G.originial_j[1:2, 1:2]
sv_gam_t_school <- chol2inv(chol(gam_t_school))
index1 <- index1 + nschool_effects * 2
}
rm(j)
gam_t_sc <- list()
index1 <- 0
score.G <- Matrix(0, 0, 0)
for (j in 1:nyear.score) {
gam_t_sc[[j]] <- matrix(0, Kg[j], Kg[j])
temp_mat_j <- temp_mat[(index1 + 1):(index1 + nteacher[j] * Kg[j]), (index1 +
1):(index1 + nteacher[j] * Kg[j])]
index2 <- c(1)
for (k in 1:nteacher[j]) {
gam_t_sc[[j]] <- gam_t_sc[[j]] + temp_mat_j[(index2):(index2 + Kg[j] -
1), (index2):(index2 + Kg[j] - 1)]
index2 <- index2 + Kg[j]
}
index1 <- index1 + nteacher[j] * Kg[j]
der <- -0.5 * (nteacher[j] * sv_gam_t[[j]] - sv_gam_t[[j]] %*% gam_t_sc[[j]] %*%
sv_gam_t[[j]])
if (is.numeric(drop(sv_gam_t[[j]]))) {
score.eta.t <- der
} else {
score.eta.t <- 2 * der - diag(diag(der))
}
for (k in 1:nteacher[j]) {
score.G <- bdiag(score.G, score.eta.t)
}
}
if (control$school.effects) {
gam_t_school <- matrix(0, 2, 2)
temp_mat_j <- temp_mat[(index1 + 1):(index1 + nschool_effects * 2), (index1 +
1):(index1 + nschool_effects * 2)]
index2 <- c(1)
for (k in 1:nschool_effects) {
gam_t_school <- gam_t_school + temp_mat_j[(index2):(index2 + 2 -
1), (index2):(index2 + 2 - 1)]
index2 <- index2 + 2
}
index1 <- index1 + nschool_effects * 2
der <- -0.5 * (nschool_effects * sv_gam_t_school - sv_gam_t_school %*%
gam_t_school %*% sv_gam_t_school)
if (is.numeric(drop(sv_gam_t_school))) {
score.eta.t <- der
} else {
score.eta.t <- 2 * der - diag(diag(der))
}
for (k in 1:nschool_effects) {
score.G <- bdiag(score.G, score.eta.t)
}
}
rm(j, k)
if (persistence == "CP" | persistence == "ZP") {
-c(score.R, reduce.G(G = score.G, nyear.score = nyear.score, nteacher = nteacher))
} else if (persistence == "VP") {
alpha.parm <- alpha[!((1:nalpha) %in% alpha.diag)]
score.a <- alpha.score(alpha.parm, alpha = alpha, temp_mat = temp_mat_R[seq(1,
n_eta, 2), seq(1, n_eta, 2)], nalpha = nalpha, alpha.diag = alpha.diag,
P = P, R_inv = R.full.inv[J.mat$response == "score", J.mat$response ==
"score"], eta.hat = eta.hat[seq(1, n_eta, 2)], ybetas = ybetas,
X = X[J.mat$response == "score", ], Y = Y[J.mat$response == "score"])
-c(score.R, reduce.G(G = score.G, nyear.score = nyear.score, nteacher = nteacher),
score.a)
}
}
#hessian.f() is used to calculate the Hessian for the purpose of calculating
#standard errors. It is not used to obtain the parameter estimates.
hessian.f <- function() {
lgLik.hist <- lgLik
lgLik <- lgLik[it]
Hessian <- NA
std_errors <- c(rep(NA, length(thetas)))
if (control$hessian == TRUE) {
if (control$verbose)
# cat('Calculating the Hessian...\n')
flush.console()
if (control$hes.method == "richardson") {
Hessian <- ltriangle(ltriangle(jacobian(Score, thetas, eta = eta.hat,
ybetas = ybetas, X = J.X, Y = J.Y, Z = J.Z, sqrt.W = sqrt.W, inv.sqrt.W = inv.sqrt.W,
pattern.sum = pattern.sum, con = control, n_ybeta = n_ybeta, n_eta = n_eta,
nstudent = nstudent, nteacher = nteacher, Kg = Kg, cons.logLik = cons.logLik,
mis.list = mis.list, pattern.parmlist2 = pattern.parmlist2, pattern.count = pattern.count,
pattern.length = pattern.length, pattern.Rtemplate = pattern.Rtemplate,
pattern.diag = pattern.diag, pattern.key = pattern.key, Ny = Ny,
persistence = persistence, P = P, alpha.diag = alpha.diag, nalpha = nalpha,
alpha = alpha)))
} else {
Hessian <- ltriangle(ltriangle(jacobian(Score, thetas, method = "simple",
eta = eta.hat, ybetas = ybetas, X = J.X, Y = J.Y, Z = J.Z, sqrt.W = sqrt.W,
inv.sqrt.W = inv.sqrt.W, pattern.sum = pattern.sum, con = control,
n_ybeta = n_ybeta, n_eta = n_eta, nstudent = nstudent, nteacher = nteacher,
Kg = Kg, cons.logLik = cons.logLik, mis.list = mis.list, pattern.parmlist2 = pattern.parmlist2,
pattern.count = pattern.count, pattern.length = pattern.length,
pattern.Rtemplate = pattern.Rtemplate, pattern.diag = pattern.diag,
pattern.key = pattern.key, Ny = Ny, persistence = persistence,
P = P, alpha.diag = alpha.diag, nalpha = nalpha, alpha = alpha)))
}
std_errors <- try(c(sqrt(diag(solve(Hessian)))), silent = TRUE)
hes.warn <- FALSE
if (any(eigen(Hessian)$values <= 0)) {
if (control$verbose)
cat("Warning: Hessian not PD", "\n")
std_errors <- c(rep(NA, length(thetas)))
hes.warn <- TRUE
}
}
chol2inv(chol(Hessian))
}
#update.ybeta is only used to calculate the initial fixed effects vector
#given inital values for R and eta
#a different calculation is used throughout the EM algorithm
#The souce of this function is Equation (15) of Karl, Yang, Lohr (2013)
update.ybeta <- function(X, Y, Z, R_inv, eta.hat) {
A.ybeta <- crossprod(X, R_inv) %*% X
B.ybeta <- crossprod(X, R_inv) %*% (Y - Z %*% eta.hat)
as.vector(solve(A.ybeta, B.ybeta))
}
#bin2dec and dec2 bin are used to move between the columns of the
#base2 calculations shown in Table 1 of Karl, Yang, Lohr (2013)
bin2dec <- function(s) sum(s * 2^(rev(seq_along(s)) - 1))
dec2bin <- function(s) {
L <- length(s)
maxs <- max(s)
digits <- floor(logb(maxs, base = 2)) + 1
res <- array(NA, dim = c(L, digits))
for (i in 1:digits) {
res[, digits - i + 1] <- (s%%2)
s <- (s%/%2)
}
if (L == 1)
res[1, ] else res
}
#alpha.score gives the score vector of the persistence parameters
#See section 3.4 of Karl, Yang, Lohr (2013)
alpha.score <- function(alpha.parm, alpha, temp_mat_R, nalpha, alpha.diag, P,
R_inv, eta.hat, ybetas, X, Y) {
score.a <- c(NULL)
alpha.s <- alpha
alpha.s[!((1:nalpha) %in% alpha.diag)] <- alpha.parm
alpha.set <- (1:nalpha)[!((1:nalpha) %in% alpha.diag)]
if (!control$school.effects) {
Z.a <- Matrix(0, nrow(Z_mat), nteach_effects)
} else {
Z.a <- Matrix(0, nrow(Z_mat), nteach_effects + nschool_effects)
}
for (i in 1:nalpha) {
comp <- which(tril(ltriangle(1:nalpha)) == i, arr.ind = TRUE)
Z.a <- Z.a + alpha.s[i] * P[[comp[1]]][[comp[2]]]
}
if (control$school.effects) {
colnames(Z.a) <- eta_effects
Z.a <- Z.a + Z.school.only
}
for (i in alpha.set) {
comp <- which(tril(ltriangle(1:nalpha)) == i, arr.ind = TRUE)
score.a <- c(score.a, as.numeric((t(Y) - t(ybetas) %*% t(X)) %*% R_inv %*%
P[[comp[1]]][[comp[2]]] %*% eta.hat - sum(diag(crossprod(Z.a, R_inv %*%
P[[comp[1]]][[comp[2]]] %*% temp_mat_R)))))
}
score.a
}
#pattern.f.score.constrained calculates the matrices shown in Equation 16
#of Karl, Yang, Lohr (2013). The "constrained" refers to the fact that
#the bottom right element of R is fixed at 1.
pattern.f.score.constrained <- function(R_i.parm.constrained, nyear, pattern.parmlist2,
pattern.count, pattern.length, pattern.Rtemplate, pattern.diag, pattern.key,
pattern.sum) {
R_i <- as.matrix(ltriangle(c(as.vector(R_i.parm.constrained), 1)))
pattern.score <- numeric(nyear/2 * (nyear + 1) - 1)
for (p in nonempty.patterns) {
pattern.y <- solve(pattern.f.R(R_i, p, nyear, pattern.key))
YSY <- pattern.y %*% pattern.sum[[p]] %*% pattern.y
PCL <- pattern.countoverlength[[p]]
for (r.parm in 1:(nyear/2 * (nyear + 1) - 1)) {
if (is.null(pat.coord <- pat.coord.guide[[r.parm]][[p]]))
next
pattern.yc <- pattern.y[pat.coord]
pattern.score[r.parm] <- pattern.score[r.parm] - (PCL * pattern.yc -
YSY[pat.coord])
}
}
pattern.score[1:(nyear/2 * (nyear + 1) - 1) %in% pattern.diag] <- 0.5 * pattern.score[1:(nyear/2 *
(nyear + 1) - 1) %in% pattern.diag]
-pattern.score
}
#pattern.f.R takes a vector of unique parameters and build it into a
#symmetric matrix of appropriate size. Even though R is block diagonal,
#not all of the blocks are the same due to missing data. For example,
#if the year-2 observation is missing, then the second row and column
#need to be deleted from the particular R-block.
pattern.f.R <- function(R, p, nyear, pattern.key) {
R[pattern.key[p, ] * (1:nyear), pattern.key[p, ] * (1:nyear), drop = FALSE]
}
#mark the time
ptm.total <- proc.time()[3]
#make sure year is numeric
Z_mat$year <- as.numeric(Z_mat$year)
#student.delete.list records any students that appear in the data
#set that do not have any scores recorded
student.delete.list <- c(NULL)
for (s in unique(Z_mat$student)) {
if (sum(!is.na(Z_mat[Z_mat$student == s, "y"])) == 0)
student.delete.list <- c(student.delete.list, s)
}
#any students in student.delete.list are deleted
if (length(student.delete.list) > 0) {
Z_mat <- Z_mat[!Z_mat$student %in% student.delete.list, ]
B.mat <- B.mat[!B.mat$student %in% student.delete.list, ]
}
#how many years of score data are there?
nyear.score <- length(unique(Z_mat$year))
#In the program, the binary observations are just treated as the last
#year of the data set. For example, if there are 3 years of score
#data, then "year 4" will actually record the binary observations
#(Think about how the R matrix is structured to see why this makes
#sense)
B.year <- nyear.score + 1
#B.mat holds the binary outcome observations and student links
B.mat$year <- B.year
B.mat$mis <- 0
B.mat$y <- B.mat$r
B.mat$r <- NULL
B.mat <- B.mat[order(B.mat$student, decreasing = TRUE), ]
#Z_mat holds the test score data
#Z_mat$mis will hold indicators for whether or not the observation missing
#These will be needed to figure out which subset of the R-blocks are needed
#for each student (see pattern.f.r, etc).
Z_mat$mis <- rep(0, dim(Z_mat)[1])
student_list <- unique(Z_mat$student)
Z_mat[is.na(Z_mat$y), ]$mis <- rep(1, dim(Z_mat[is.na(Z_mat$y), ])[1])
#the following loop "fills in the gaps" of missing student scores,
#making sure that each student has a row in each year
for (g in 1:nyear.score) {
mis_stu <- student_list[!(student_list %in% unique(Z_mat[Z_mat$year == g,
]$student))]
le <- length(mis_stu)
if (le > 0) {
temp.exp <- Z_mat[1:le, ]
temp.exp$year <- rep(g, le)
temp.exp$mis <- rep(1, le)
temp.exp$student <- mis_stu
temp.exp$teacher <- rep(NA, le)
temp.exp$y <- rep(NA, le)
Z_mat <- rbind(Z_mat, temp.exp)
}
}
rm(g, le)
#Z_mat.full will be the current Z_mat matrix, while
#any missing observations will be deleted from Z_mat
Z_mat.full <- Z_mat
Z_mat <- Z_mat[!is.na(Z_mat$y), ]
Z_mat.full <- Z_mat.full[which((Z_mat.full$student %in% Z_mat$student)), ]
#create missing indicators for the binary observations
B.mat[is.na(B.mat$y), ]$mis <- rep(1, dim(B.mat[is.na(B.mat$y), ])[1])
#the following lines and loop "fill in the blanks" to make sure that
#each student has a row in B.mat, the data frame of binary outcomes
mis_stu <- student_list[!(student_list %in% unique(B.mat[B.mat$year == B.year,
]$student))]
le <- length(mis_stu)
if (le > 0) {
temp.exp <- B.mat[1:le, ]
temp.exp$year <- rep(B.year, le)
temp.exp$mis <- rep(1, le)
temp.exp$student <- mis_stu
temp.exp$y <- rep(NA, le)
B.mat <- rbind(B.mat, temp.exp)
}
rm(le)
B.mat.full <- B.mat
B.mat <- B.mat[!is.na(B.mat$y), ]
nstudent <- length(unique(Z_mat$student))
#Kg is the dimension of the square matrix of each block of the G matrix
#E.g. year 1 has a teacher-score variance component and a teacher-outcome
#variance compoenent (and an implied covariance). Same for each year.
Kg <- rep(2, nyear.score)
Z_mat <- Z_mat[order(Z_mat$year, Z_mat$teacher), ]
Z_mat.full <- Z_mat.full[order(Z_mat.full$year, Z_mat.full$teacher), ]
na_list <- grep("^NA", Z_mat$teacher)
#teachyearcomb tracks the unique teachers in the data set.
#if a teacher teaches multiple years in the data set, those are treated
#as different effects and count as different "teachers"
if (length(na_list) > 0) {
teachyearcomb <- unique(cbind(Z_mat[-na_list, ]$year, Z_mat[-na_list, ]$teacher))
} else {
teachyearcomb <- unique(cbind(Z_mat$year, Z_mat$teacher))
}
Z_mat <- Z_mat[order(Z_mat$student, Z_mat$year, Z_mat$teacher), , drop = FALSE]
Z_mat.full <- Z_mat.full[order(Z_mat.full$student, Z_mat.full$year, Z_mat.full$teacher),
]
nteach_effects <- dim(teachyearcomb)[1]
teacheffZ_mat <- Matrix(0, nrow = nrow(Z_mat), ncol = nteach_effects)
t_effects <- rep(NA, nteach_effects)
indx <- 1
eblup.tracker <- matrix(0, 0, 3)
#dP is used as a building block of indicators for the persistence parameters
dP <- list()
for (i in 1:nyear.score) {
dP[[i]] <- list()
for (j in 1:i) dP[[i]][[j]] <- matrix(0, 0, 2)
}
#nalpha is the number of persistence parameters
nalpha <- nyear.score/2 * (nyear.score + 1)
if (persistence == "ZP") {
alpha <- ltriangle(diag(nyear.score))
} else if (persistence == "CP" | persistence == "VP") {
#initial values of persistence parameters are all 1
alpha <- rep(1, nalpha)
}
#alpha_key is a lower trianglular matrix that indexes the alpha
#it tells us where in the matrix of alphas to find each particular alpha
#E.g. the alpha for the effect of year1 teachers on year2 scores appears in
#column 1, row 2
alpha_key <- tril(ltriangle(1:nalpha))
#which alpha are on the diagonal? E.g. with two years of observations, these
#these would be alpha 1 and alpha 3. But these are all assumed to equal 1
alpha.diag <- diag(alpha_key)
#alpha.parm are the unconstrained alpha (remove the diagonal elements equal
#to 1
alpha.parm <- alpha[!((1:nalpha) %in% alpha.diag)]
#populate dP with persistence information
for (k in 1:nrow(teachyearcomb)) {
student_subset <- Z_mat.full$student[Z_mat.full$year == teachyearcomb[k,
1] & Z_mat.full$teacher == teachyearcomb[k, 2]]
# t_effects[k] <- paste(teachyearcomb[k, 1], '_', teachyearcomb[k, 2], sep = '')
t_effects[k] <- paste(teachyearcomb[k, 2], sep = "")
eblup.tracker <- rbind(eblup.tracker, c(teachyearcomb[k, ], teachyearcomb[k,
1]))
for (yr in as.numeric(teachyearcomb[k, 1]):nyear.score) {
if (sum(is.element(Z_mat$student, student_subset) & Z_mat$year == yr) !=
0) {
q1 <- (1:nrow(Z_mat))[is.element(Z_mat$student, student_subset) &
Z_mat$year == yr & !is.na(Z_mat$y)]
q2 <- rep(k, length(q1))
dP[[yr]][[as.numeric(teachyearcomb[k, 1])]] <- rbind(dP[[yr]][[as.numeric(teachyearcomb[k,
1])]], cbind(q1, q2))
}
}
}
#school effects calculates an extra variance component for school-level
#variation. One new instance of this component will be added to G for each
#school. This information needs to be righ-concatonated to the Z matrix
if (control$school.effects) {
school.start <- nteach_effects + 1
z.school <- t(fac2sparse(Z_mat$schoolID))
# school.length<-ncol(z.school)
Z.school.only <- cbind(Matrix(0, nrow(Z_mat), nteach_effects), z.school)
nschool_effects <- ncol(z.school)
}
#P holds indicator matrices (built from dP) to show which persistence
#parameters are modeled against each row
P <- list()
for (i in 1:nyear.score) {
P[[i]] <- list()
for (j in 1:i) {
P[[i]][[j]] <- as(sparseMatrix(i = dP[[i]][[j]][, 1], j = dP[[i]][[j]][,
2], dims = c(nrow(Z_mat), nteach_effects)), "dMatrix")
if (control$school.effects)
P[[i]][[j]] <- cbind(P[[i]][[j]], Matrix(0, nrow(Z_mat), nschool_effects))
}
}
if (!control$school.effects) {
Z <- Matrix(0, nrow(Z_mat), nteach_effects)
} else {
Z <- Matrix(0, nrow(Z_mat), nteach_effects + nschool_effects)
}
#build persistence information into Z. See the discussion in section
#3.4 of Karl, Yang, Lohr 2013
for (i in 1:nalpha) {
comp <- which(tril(ltriangle(1:nalpha)) == i, arr.ind = TRUE)
Z <- Z + alpha[i] * P[[comp[1]]][[comp[2]]]
}
if (control$school.effects) {
eta_effects <- colnames(Z) <- colnames(Z.school.only) <- c(t_effects, colnames(z.school))
colnames(Z) <- eta_effects
Z <- Z + Z.school.only
} else {
eta_effects <- colnames(Z) <- t_effects
}
#temp_Z is used to build the portion of the Z matrix (B.Z, below)
#corresponding to teacher random effects on the binary outcomes
temp_Z <- Z[order(Z_mat$year, Z_mat$student, decreasing = TRUE), ]
temp_Z_mat <- Z_mat[order(Z_mat$year, Z_mat$student, decreasing = TRUE), ]
temp_Z <- temp_Z[!(duplicated(temp_Z_mat$student)), ]
temp_Z_mat <- temp_Z_mat[!(duplicated(temp_Z_mat$student)), ]
temp_Z <- temp_Z[temp_Z_mat$student %in% B.mat$student, ]
temp_Z_mat <- temp_Z_mat[temp_Z_mat$student %in% B.mat$student, ]
temp_Z_mat <- temp_Z_mat[, "student", drop = FALSE]
B.Z <- temp_Z
colnames(B.Z) <- paste(colnames(temp_Z), "_outcome", sep = "")
B.mat <- merge(temp_Z_mat, B.mat, by = "student", sort = FALSE)
B.mat$response <- "outcome"
Z_mat$response <- "score"
B.mat.full$response <- "outcome"
Z_mat.full$response <- "score"
#J.mat is the "joint" maxtix, combining score and outcome data
J.mat <- J.mat.original <- rbind(Z_mat[, c("student", "year", "mis", "y", "response")],
B.mat[, c("student", "year", "mis", "y", "response")])
J.mat.full <- J.mat.full.original <- rbind(Z_mat.full[, c("student", "year",
"mis", "y", "response")], B.mat.full[, c("student", "year", "mis", "y", "response")])
#nyear.pseudo is nyear+1. The "pseudo" refers to the fact that we treat
#the binary observations as a pseudo additional-year. This is not related
#to the "pseudo-likelihood" routine that is used by this program to estimate
#GLMM
nyear.pseudo <- length(unique(J.mat$year))
#create X for the score submodel
X_mat <- sparse.model.matrix(control$score.fixed.effects, Z_mat, drop.unused.levels = TRUE)
X_mat <- X_mat[, !(colSums(abs(X_mat)) == 0), drop = FALSE]
if (rankMatrix(X_mat, method = "qrLINPACK")[1] != dim(X_mat)[2]) {
cat("WARNING: Fixed-effects design matrix for test scores is not full-rank",
"\n")
flush.console()
ANSWER <- readline("Continue anyway? (Y/N)")
if (substr(ANSWER, 1, 1) == "n" | substr(ANSWER, 1, 1) == "N")
stop("WARNING: Fixed-effects design matrix not full-rank")
}
#create the X matrix for the binary submodel
B_X_mat <- sparse.model.matrix(control$outcome.fixed.effects, B.mat, drop.unused.levels = TRUE)
B_X_mat <- B_X_mat[, !(colSums(abs(B_X_mat)) == 0), drop = FALSE]
if (rankMatrix(B_X_mat, method = "qrLINPACK")[1] != dim(B_X_mat)[2]) {
cat("WARNING: Fixed-effects design matrix for outcomes is not full-rank",
"\n")
flush.console()
ANSWER <- readline("Continue anyway? (Y/N)")
if (substr(ANSWER, 1, 1) == "n" | substr(ANSWER, 1, 1) == "N")
stop("WARNING: Fixed-effects design matrix not full-rank")
}
#combine the fixed effects design matrices for both models into a single
#matrix.
J.X <- bdiag(X_mat, B_X_mat)
colnames(J.X) <- c(paste(colnames(X_mat), "_score", sep = ""), paste(colnames(B_X_mat),
"_outcome", sep = ""))
#The ".expand" matrices are used to place the elements of Z and B.Z into
#appropriate places in matrices of equal dimension, so that they may
#be interleaved (Teacher-1 outcome effect needs to be next to Teacher-1
#score effect so that G can be block-diagonal
if (!control$school.effects) {
Z.expand <- Matrix(0, nrow(Z_mat), 2 * nteach_effects)
Z.expand[, seq(1, 2 * nteach_effects, by = 2)] <- Z
} else {
Z.expand <- Matrix(0, nrow(Z_mat), 2 * nteach_effects + 2 * nschool_effects)
Z.expand[, seq(1, 2 * nteach_effects + 2 * nschool_effects, by = 2)] <- Z
}
if (!control$school.effects) {
B.Z.expand <- Matrix(0, nrow(B.mat), 2 * nteach_effects)
B.Z.expand[, seq(2, 2 * nteach_effects, by = 2)] <- B.Z
} else {
B.Z.expand <- Matrix(0, nrow(B.mat), 2 * nteach_effects + 2 * nschool_effects)
B.Z.expand[, seq(2, 2 * nteach_effects + 2 * nschool_effects, by = 2)] <- B.Z
}
#J.Z will be the master Z matrix going forward. We are modelling
#J.Y = J.X * ybetas + J.Z * eta.hat
J.Z <- rbind(Z.expand, B.Z.expand)
J.Z <- J.Z[order(J.mat$student, J.mat$year), ]
J.X <- J.X[order(J.mat$student, J.mat$year), ]
interleave <- function(v1, v2) {
ord1 <- 2 * (1:length(v1)) - 1
ord2 <- 2 * (1:length(v2))
c(v1, v2)[order(c(ord1, ord2))]
}
colnames(J.Z) <- interleave(colnames(Z), colnames(B.Z))
J.mat.full <- J.mat.full[order(J.mat.full$student, J.mat.full$year), ]
J.mat <- J.mat[order(J.mat$student, J.mat$year), ]
mis.list <- which(J.mat.full$mis == 1)
#nteacher is a vector of length equal to the number of years of score data
#that records how many teachers taught in each year
nteacher <- as.vector(tapply(teachyearcomb[, 2], teachyearcomb[, 1], length))
n_eta <- ncol(J.Z)
#ybeta would be perhaps more approriately named "jbeta" since it contains
#both the score and the outcome fixed effects
n_ybeta <- ncol(J.X)
J.Y <- as.vector(J.mat$y)
#indicator for if observation present
J.mat.full$r <- 1 - J.mat.full$mis
#pattern.student: each student has a row of indicators showing which year present
#see Table1 of Karl, Yang, Lohr (2013)
pattern.student <- matrix(J.mat.full$r, nstudent, nyear.pseudo, byrow = TRUE)
#for each row of J.mat, what missingness pattern the student in that row
#belongs to
#all of the pattern variables below are used to track the attendence
#patterns discussed in Section 4 of Karl, Yang, Lohr (2013)
J.mat.full$pat <- rep(apply(pattern.student, 1, bin2dec), each = nyear.pseudo)
J.mat$pat <- J.mat.full[J.mat.full$r == 1, ]$pat
pat <- list()
pattern.count <- list()
pattern.length <- list()
pattern.countoverlength <- list()
J.Y.p <- list()
J.X.p <- list()
J.Z.p <- list()
J.Y.p.rownumber <- list()
J.mat$rownumber <- 1:nrow(J.mat)
pattern.key <- dec2bin(1:(2^nyear.pseudo - 1))
for (p in unique(J.mat$pat)) {
pat[[p]] <- which(J.mat$pat == p)
J.X.p[[p]] <- J.X[pat[[p]], , drop = FALSE]
J.Z.p[[p]] <- J.Z[pat[[p]], , drop = FALSE]
J.Y.p[[p]] <- J.Y[pat[[p]]]
J.Y.p.rownumber[[p]] <- J.mat$rownumber[pat[[p]]]
pattern.count[[p]] <- length(J.Y.p[[p]])
pattern.length[[p]] <- sum(pattern.key[p, ])
pattern.countoverlength[[p]] <- pattern.count[[p]]/pattern.length[[p]]
}
pattern.yguide <- list()
for (g in 1:nyear.pseudo) {
pattern.yguide[[g]] <- which(pattern.key[, g] == 1)
}
pattern.Rtemplate <- ltriangle(1:(nyear.pseudo/2 * (nyear.pseudo + 1)))
pattern.diag <- diag(pattern.Rtemplate)
pattern.parmlist1 <- list()
pattern.parmlist2 <- list()
for (p in unique(J.mat$pat)) {
pattern.parmlist1[[p]] <- sort(unique(as.vector(pattern.f.R(pattern.Rtemplate,
p, nyear.pseudo, pattern.key))))
}
for (r.parm in 1:(nyear.pseudo/2 * (nyear.pseudo + 1))) {
pattern.parmlist2[[r.parm]] <- which(sapply(pattern.parmlist1, f <- function(x) r.parm %in%
x))
}
#eta.hat is eta from Karl, Yang, Lohr (2013) (The vector of random effects)
eta.hat <- numeric(n_eta)
#var.eta.hat is the quantity defined in Equation (18) of
#Karl, Yang, Lohr (2013)
var.eta.hat <- Matrix(0, n_eta, n_eta)
#R.temp.comp are the initial estimates for the diagonal elements of R
R.temp.comp <- rep(1, nyear.pseudo)
for (g in 1:nyear.score) {
R.temp.comp[g] <- var(J.mat[J.mat$year == g, ]$y)/4
}
#R_i is the block of R corresponding to a student with no missing data.
#R is built from the blocks of R with appropriate rows and columns deleted
#where observations are missing
R_i <- diag(R.temp.comp)
if (length(mis.list) > 0) {
R <- symmpart(suppressMessages(kronecker(suppressMessages(Diagonal(nstudent)),
R_i)[-mis.list, -mis.list]))
rinv <- Matrix(0, 0, 0)
R_i.inv <- chol2inv(chol(R_i))
for (i in 1:nstudent) {
if (!any(mis.list %in% seq((i - 1) * nyear.pseudo + 1, i * nyear.pseudo))) {
rinv <- bdiag(rinv, R_i.inv)
} else {
inv.indx <- which(!(seq((i - 1) * nyear.pseudo + 1, i * nyear.pseudo) %in%
mis.list))
rinv <- bdiag(rinv, chol2inv(chol(R_i[inv.indx, inv.indx])))
}
}
R_inv <- rinv
} else {
R <- symmpart(suppressMessages(kronecker(suppressMessages(Diagonal(nstudent)),
R_i)))
R_inv <- symmpart(suppressMessages(kronecker(suppressMessages(Diagonal(nstudent)),
chol2inv(chol(R_i)))))
}
#the only use of update.ybeta() in the program. This calculates the initial
#values of the fixed effects (for both score and response submodels).
#calculation for score model fixed effects
ybetas <- update.ybeta(J.X, J.Y, J.Z, R_inv, eta.hat)
#calculation for binary model fixed effects
ybetas[(ncol(X_mat) + 1):n_ybeta] <- glm.fit(x = B_X_mat, y = B.mat$y, family = binomial("probit"))[[1]]
names(ybetas) <- colnames(J.X)
#initial value for G. A digaonal matrix with 100 on the diagonal
G <- 100 * .symDiagonal(n_eta)
#if(control$independent.responses) G <- 0.01 * .symDiagonal(n_eta)
#number of observations (both score and binary).
Ny <- sum(J.mat$mis == 0)
#pat.coord.guide tells where to find each of the R parameters in each of the
#possible blocks of R (across all possible missing data patterns).
pat.coord.guide <- list()
for (r.parm in 1:(nyear.pseudo/2 * (nyear.pseudo + 1) - 1)) {
pat.coord.guide[[r.parm]] <- list()
for (p in pattern.parmlist2[[r.parm]]) {
pat.coord.guide[[r.parm]][[p]] <- which(tril(pattern.f.R(pattern.Rtemplate,
p, nyear.pseudo, pattern.key)) == r.parm)
}
}
nonempty.patterns <- NULL
for (p in 1:length(pat.coord.guide[[1]])) {
if (is.null(retrieve.parm <- pattern.parmlist1[[p]]))
next
nonempty.patterns <- c(nonempty.patterns, p)
}
#the loglikelihood function. Do not use this for likelihood ratio tests,
#which are not valid for pseudo-likelihood estimation
if (control$REML) {
cons.logLik <- 0.5 * (n_eta + n_ybeta) * log(2 * pi)
} else {
cons.logLik <- 0.5 * (n_eta) * log(2 * pi)
}
J.mat$fit <- J.mat$mu <- J.mat$mu.eta.val <- J.mat$nu <- J.mat$sqrt.w <- NA
#J.mat$fit is the vector of estimated responses (and psuedoresponses)
J.mat[J.mat$response == "outcome", ]$fit <- as.vector(J.X[J.mat$response == "outcome",
] %*% ybetas + J.Z[J.mat$response == "outcome", ] %*% eta.hat)
#J.mat$mu is defined on page 236 of Wolfinger and O'Connell (1993)
J.mat[J.mat$response == "outcome", ]$mu <- as.vector(fam.binom$linkinv(J.mat[J.mat$response ==
"outcome", ]$fit))
#J.mat$mu.eta.val is the value of the first derivative of the link function
#at each predicted point. This is g' on page 236 of Wolfinger and O'Connell (1993)
J.mat[J.mat$response == "outcome", ]$mu.eta.val <- fam.binom$mu.eta(J.mat[J.mat$response ==
"outcome", ]$fit)
#J.mat$nu is nu from Step 2 in Section 3 of Wolfinger and O'Connell (1993)
J.mat[J.mat$response == "outcome", ]$nu <- as.vector(J.mat[J.mat$response ==
"outcome", ]$fit) + (J.mat[J.mat$response == "outcome", ]$y - J.mat[J.mat$response ==
"outcome", ]$mu)/J.mat[J.mat$response == "outcome", ]$mu.eta.val
#In Wolfinger and O'Connell (1993), diag(J.mat$sqrt.w) is called R_{\mu}
J.mat[J.mat$response == "outcome", ]$sqrt.w <- sqrt(fam.binom$variance(J.mat[J.mat$response ==
"outcome", ]$mu)/J.mat[J.mat$response == "outcome", ]$mu.eta.val^2)
#no transformation needed for the normal-distributed scores. The
#pseudoresponses are the responses
J.mat[J.mat$response == "score", ]$sqrt.w <- 1
J.mat[J.mat$response == "score", ]$nu <- J.mat[J.mat$response == "score", ]$y
sqrt.W <- Diagonal(x = J.mat$sqrt.w)
inv.sqrt.W <- Diagonal(x = 1/(J.mat$sqrt.w))
#R.full is the error covariance matrix, which is formed as the product
#diag(sqrt.w)%*%R%*%diag(sqrt.w). The matrix R assumes a variance of 1
#for all of the binomial responses, while R.full includes the variance from the
#binomial distribution (in Wolfinger (1993), diag(sqrt.w) is called R_mu).
R.full <- drop0(symmpart(sqrt.W %*% R %*% sqrt.W))
R.full.inv <- drop0(symmpart(inv.sqrt.W %*% R_inv %*% inv.sqrt.W))
#replace vector of responses with vector of updated pseudoresponses
J.Y <- J.mat$nu
for (p in unique(J.mat$pat)) {
J.Y.p[[p]] <- J.Y[pat[[p]]]
}
for (PQL.it in 1:control$max.PQL.it) {
#begin the outer pseudo-likelihood loop
# if (PQL.it <= control$EM.to.NR) {
EM.iter <- control$max.iter.EM[1]
# } else {
# EM.iter <- control$max.iter.EM[2]
# }
Y.mat <- Matrix(0, EM.iter + 1, n_ybeta)
G.mat <- Matrix(0, EM.iter + 1, length(reduce.G(G = G, nyear = nyear.score,
nteacher = nteacher)))
R.mat <- Matrix(0, EM.iter + 1, nyear.pseudo * (nyear.pseudo + 1)/2)
lgLik <- numeric(EM.iter + 1)
conv <- FALSE
if (control$verbose)
flush.console()
#There used to be a conditional test in place of if(TRUE): no longer
#needed
if (TRUE) {
#begin the (inner) EM algorithm loop
#note control$EM.to.NR is fixed at a large value and not relevant
cat("Beginning EM algorithm\n")
for (it in 1:1000) {
#loop will exit before 1000 reached due to stopping conditions
ptm <- proc.time()[3]
suppressWarnings(rm(var.eta.hat, temp_mat, temp_mat_R))
mresid <- as.numeric(J.Y - J.X %*% ybetas)
cresid <- as.numeric(mresid - J.Z %*% eta.hat)
yhat <- as.numeric(J.X %*% ybetas + J.Z %*% eta.hat)
yhat.m <- as.numeric(J.X %*% ybetas)
#The E-step. Calculate the new random effects
new.eta <- update.eta(X = J.X, Y = J.Y, Z = J.Z, R.full.inv = R.full.inv,
ybetas = ybetas, G = G, cons.logLik = cons.logLik, Ny = Ny, nstudent = nstudent,
n_eta = n_eta)
eta.hat <- attr(new.eta, "eta")
C12 <- attr(new.eta, "C12")
betacov <- matrix(attr(new.eta, "betacov"),nrow=length(ybetas))
var.eta.hat <- new.eta
#temp_mat is the value inside the summand of Equation (14) of
#Karl, Yang, Lohr (2013)
temp_mat <- var.eta.hat + tcrossprod(eta.hat, eta.hat)
temp_mat_R <- attr(new.eta, "h.inv") + tcrossprod(eta.hat, eta.hat)
loglikelihood <- attr(new.eta, "likelihood")
lgLik[it] <- attr(new.eta, "likelihood")
Y.mat[it, ] <- ybetas
R.mat[it, ] <- suppressMessages(ltriangle(R_i))
G.mat[it, ] <- suppressMessages(reduce.G(G, nyear.score, nteacher))
rm(new.eta)
thets1 <- c(Y.mat[it - 1, ], R.mat[it - 1, ], G.mat[it - 1, ])
thets2 <- c(Y.mat[it, ], R.mat[it, ], G.mat[it, ])
if (it >= 2) {
# if (persistence == "CP" | persistence == "ZP") {
# thetas <- c(ltriangle(as.matrix(R_i))[-LRI], reduce.G(G = G, nyear.score = nyear.score,
# nteacher = nteacher))
# } else if (persistence == "VP") {
# thetas <- c(ltriangle(as.matrix(R_i))[-LRI], reduce.G(G = G, nyear.score = nyear.score,
# nteacher = nteacher), alpha[!((1:nalpha) %in% alpha.diag)])
# }
# gradient<-Score(thetas,
# eta = eta.hat, ybetas = ybetas, X = J.X, Y = J.Y, Z = J.Z, sqrt.W = sqrt.W,
# inv.sqrt.W = inv.sqrt.W, pattern.sum = pattern.sum, con = control,
# n_ybeta = n_ybeta, n_eta = n_eta, nstudent = nstudent, nteacher = nteacher,
# Kg = Kg, cons.logLik = cons.logLik, mis.list = mis.list, pattern.parmlist2 = pattern.parmlist2,
# pattern.count = pattern.count, pattern.length = pattern.length, pattern.Rtemplate = pattern.Rtemplate,
# pattern.diag = pattern.diag, pattern.key = pattern.key, Ny = Ny,
# persistence = persistence, P = P, alpha.diag = alpha.diag, nalpha = nalpha,
# alpha = alpha)
#stopping condition for EM (inner) loop
check.lik <- abs(lgLik[it] - lgLik[it - 1])/abs(lgLik[it] + control$tol1) <
control$tol1
if (check.lik | it == (EM.iter)) {
conv <- TRUE
EM.conv <- TRUE
if (control$verbose) {
cat("\n\n End EM Algorithm.\n")
cat("\n\niter:", it, "\n")
cat("log-likelihood:", sprintf("%.7f", lgLik[it]), "\n")
cat("change in loglik:", sprintf("%.7f", lgLik[it] - lgLik[it -
1]), "\n")
cat("fixed effects:", round(ybetas, 4), "\n")
cat("R_i:\n")
print(round(as.matrix(R_i), 4))
cat("\n")
print(round(cov2cor(as.matrix(R_i)), 4))
cat("\n")
for (j in 1:nyear.score) {
cat("\ngamma_teach_year", j, "\n")
print(round(ltriangle(reduce.G(G, nyear.score, nteacher)[(3 *
j - 2):(3 * j)]), 4))
cat("\n")
print(round(cov2cor(ltriangle(reduce.G(G, nyear.score, nteacher)[(3 *
j - 2):(3 * j)])), 4))
cat("\n")
flush.console()
}
if (control$school.effects) {
j <- j + 1
cat("\n")
cat("\ngamma_school_effects\n")
print(round(ltriangle(reduce.G(G, nyear.score, nteacher)[(3 *
j - 2):(3 * j)]), 4))
cat("\n")
print(round(cov2cor(ltriangle(reduce.G(G, nyear.score, nteacher)[(3 *
j - 2):(3 * j)])), 4))
cat("\n")
flush.console()
}
cat("\n")
cat("alphas (persistence parameters):\n")
print(round(alpha, 4))
rm(j)
}
break
}
}
if ((control$verbose) & (it > 1)) {
cat("\n\niter:", it, "\n")
cat("log-likelihood:", sprintf("%.7f", lgLik[it]), "\n")
cat("change in loglik:", sprintf("%.7f", lgLik[it] - lgLik[it -
1]), "\n")
cat("fixed effects:", round(ybetas, 4), "\n")
cat("R_i:\n")
print(round(as.matrix(R_i), 4))
cat("\n")
print(round(cov2cor(as.matrix(R_i)), 4))
cat("\n")
cat("G:\n")
for (j in 1:nyear.score) {
cat("\ngamma_teach_year", j, "\n")
print(round(ltriangle(reduce.G(G, nyear.score, nteacher)[(3 *
j - 2):(3 * j)]), 4))
cat("\n")
print(round(cov2cor(ltriangle(reduce.G(G, nyear.score, nteacher)[(3 *
j - 2):(3 * j)])), 4))
cat("\n")
flush.console()
}
if (control$school.effects) {
j <- j + 1
cat("\n")
cat("\ngamma_school_effects\n")
print(round(ltriangle(reduce.G(G, nyear.score, nteacher)[(3 *
j - 2):(3 * j)]), 4))
cat("\n")
print(round(cov2cor(ltriangle(reduce.G(G, nyear.score, nteacher)[(3 *
j - 2):(3 * j)])), 4))
cat("\n")
flush.console()
}
cat("\n")
cat("alphas (persistence parameters):\n")
print(round(alpha, 4))
rm(j)
}
flush.console()
#M step follows
#M step update for persistence parameters requies a single
#newton step
if (persistence == "VP") {
alpha.parm <- alpha[!((1:nalpha) %in% alpha.diag)]
alpha.parm.old <- alpha.parm
s.prev <- numeric(length(alpha.parm))
# one step is sufficient because score function is linear in alpha
s <- alpha.score(alpha.parm, alpha = alpha, temp_mat_R = temp_mat_R[seq(1,
n_eta, 2), seq(1, n_eta, 2)], nalpha = nalpha, alpha.diag = alpha.diag,
P = P, R_inv = R.full.inv[J.mat$response == "score", J.mat$response ==
"score"], eta.hat = eta.hat[seq(1, n_eta, 2)], ybetas = ybetas,
X = J.X[J.mat$response == "score", ], Y = J.Y[J.mat$response ==
"score"])
j <- symmpart(jacobian(alpha.score, alpha.parm, method = "simple", alpha = alpha,
temp_mat_R = temp_mat_R[seq(1, n_eta, 2), seq(1, n_eta, 2)],
nalpha = nalpha, alpha.diag = alpha.diag, P = P, R_inv = R.full.inv[J.mat$response ==
"score", J.mat$response == "score"], eta.hat = eta.hat[seq(1,
n_eta, 2)], ybetas = ybetas, X = J.X[J.mat$response == "score",
], Y = J.Y[J.mat$response == "score"]))
if(control$independent.responses){
indep.key=ltriangle(1:LRI)
coord.to.delete=indep.key[nrow(indep.key),-ncol(indep.key)]
for(CTD.ind in 1:length(coord.to.delete)){
j[coord.to.delete[CTD.ind],]<-j[,coord.to.delete[CTD.ind]]<-numeric(LRI-1)
j[coord.to.delete[CTD.ind],coord.to.delete[CTD.ind]]<-1
s[coord.to.delete[CTD.ind]]<-0
}
}
hesprod <- solve(j, s)
alpha.parm <- alpha.parm - hesprod
alphan <- alpha
alphan[!((1:nalpha) %in% alpha.diag)] <- alpha.parm
}
# end update alphas
#M-step update for G. See Equation (14) of Karl, Yang, Lohr(2013)
indx <- 1
Gn <- c(NULL)
gt <- list()
for (i in 1:nyear.score) {
gt[[i]] <- matrix(0, 2, 2)
for (j in 1:nteacher[i]) {
gt[[i]] <- gt[[i]] + temp_mat[(2 * (indx - 1) + 1):(2 * indx),
(2 * (indx - 1) + 1):(2 * indx)]
indx <- indx + 1
}
if(control$independent.responses) gt[[i]][2,1]<- gt[[i]][1,2]<-0
}
if (control$school.effects) {
gt.school <- matrix(0, 2, 2)
for (j in 1:nschool_effects) {
gt.school <- gt.school + temp_mat[(2 * (indx - 1) + 1):(2 * indx),
(2 * (indx - 1) + 1):(2 * indx)]
indx <- indx + 1
}
}
Gn <- Matrix(0, 0, 0)
for (i in 1:nyear.score) {
Gn <- bdiag(Gn, suppressMessages(kronecker(Diagonal(nteacher[i]),
gt[[i]]/nteacher[i])))
}
if (control$school.effects) {
Gn <- bdiag(Gn, suppressMessages(kronecker(Diagonal(nschool_effects),
gt.school/nschool_effects)))
}
################7_16_17
#The code in this if statement is not used in the calculations.
#This is older code that has been replaced by C++ code. This code is
#left in the program for running benchmarks against the C++ code.
if(control$cpp.benchmark){
pattern.sum <- list()
J.Y.p <- list()
t.J.X.p <- list()
t.J.Z.p <- list()
J.Y.p.rownumber <- list()
J.mat$rownumber <- 1:nrow(J.mat)
pattern.key <- dec2bin(1:(2^nyear.pseudo - 1))
for (p in unique(J.mat$pat)) {
pat[[p]] <- which(J.mat$pat == p)
t.J.X.p[[p]] <- t(J.X[pat[[p]], , drop = FALSE] )
t.J.Z.p[[p]] <- t(J.Z[pat[[p]], , drop = FALSE])
J.Y.p[[p]] <- J.Y[pat[[p]]]
J.Y.p.rownumber[[p]] <- J.mat$rownumber[pat[[p]]]
pattern.count[[p]] <- length(J.Y.p[[p]])
pattern.length[[p]] <- sum(pattern.key[p, ])
}
rtime<-proc.time()[3]
if(control$REML){
for (p in unique(J.mat$pat)) {
pattern.sum[[p]] <- matrix(0, pattern.length[[p]], pattern.length[[p]])
for (i in 1:(pattern.count[[p]]/pattern.length[[p]])) {
X.t <- t(t.J.X.p[[p]][,(1 + (i - 1) * pattern.length[[p]]):(i * pattern.length[[p]]) , drop = FALSE])
Y.t <- J.Y.p[[p]][(1 + (i - 1) * pattern.length[[p]]):(i * pattern.length[[p]])]
Z.t <- t(t.J.Z.p[[p]][,(1 + (i - 1) * pattern.length[[p]]):(i * pattern.length[[p]]) , drop = FALSE])
rownumber.t <- J.Y.p.rownumber[[p]][(1 + (i - 1) * pattern.length[[p]]):(i * pattern.length[[p]])]
temp.t <- Y.t - X.t %*% ybetas
pattern.sum[[p]] <- pattern.sum[[p]] + inv.sqrt.W[rownumber.t, rownumber.t] %*% (tcrossprod(temp.t) - tcrossprod(temp.t, Z.t %*% eta.hat) - tcrossprod(Z.t %*% eta.hat, temp.t) + Z.t %*%
tcrossprod(temp_mat, Z.t)+X.t %*%tcrossprod(betacov, X.t)+X.t %*%tcrossprod(C12, Z.t)+Z.t %*%tcrossprod(t(C12), X.t)) %*% inv.sqrt.W[rownumber.t, rownumber.t]
}
}
}else{
for (p in unique(J.mat$pat)) {
pattern.sum[[p]] <- matrix(0, pattern.length[[p]], pattern.length[[p]])
for (i in 1:(pattern.count[[p]]/pattern.length[[p]])) {
X.t <- t(t.J.X.p[[p]][,(1 + (i - 1) * pattern.length[[p]]):(i * pattern.length[[p]]) , drop = FALSE])
Y.t <- J.Y.p[[p]][(1 + (i - 1) * pattern.length[[p]]):(i * pattern.length[[p]])]
Z.t <- t(t.J.Z.p[[p]][,(1 + (i - 1) * pattern.length[[p]]):(i * pattern.length[[p]]) , drop = FALSE])
rownumber.t <- J.Y.p.rownumber[[p]][(1 + (i - 1) * pattern.length[[p]]):(i * pattern.length[[p]])]
temp.t <- Y.t - X.t %*% ybetas
pattern.sum[[p]] <- pattern.sum[[p]] + inv.sqrt.W[rownumber.t, rownumber.t] %*% (tcrossprod(temp.t) - tcrossprod(temp.t, Z.t %*% eta.hat) - tcrossprod(Z.t %*% eta.hat, temp.t) + Z.t %*%
tcrossprod(temp_mat_R, Z.t)) %*% inv.sqrt.W[rownumber.t, rownumber.t]
}
}
}
cat("R pattern sum:\n")
print(pattern.sum)
r.block.runtime= proc.time()[3] - rtime
R.rmstep.times<-c(R.rmstep.times,r.block.runtime)
cat("R code time for R-matrix calculations: ", r.block.runtime, " seconds\n")
}
################7_16_17
#pattern.sum stores the sums shown in Equation (16) of Karl, Yang
#Lohr (2013). If no observations are missing, then there is only one
#element in the pattern.sum list, a matrix of size equal to one of the
#blocks of R. Otherwise, there is a different element (a matrix)
#in the list for each missing data pattern.
#This function needs to sum the blocks along the diagonal of a matrix
#of size equal to that of R. Since J.X and J.Z are stored in sparse
#(triplet) format in R, we need to extract the columns of those triplets
#and send them to C++, where they will again be built into sparse
#matrices. That is the function of the @i, @p, and @x. See the documentation
#for the R package Matrix for details.
pattern.sum <- list()
cpptime<-proc.time()[3]
for (p in unique(J.mat$pat)) {
if(control$REML){
pattern.sum[[p]] <- REML_Rm(invsqrtW_ = as.matrix(diag(inv.sqrt.W)), betacov_ = betacov, C12_ = C12,
JYp_ = as.matrix(J.Y.p[[p]]), loopsize_ = pattern.count[[p]]/pattern.length[[p]],
patternlength_ = pattern.length[[p]], rownumber_ = as.matrix(J.Y.p.rownumber[[p]]),
ybetas_ = as.matrix(ybetas), etahat_ = as.matrix(eta.hat), tempmatR_ = as.matrix(temp_mat),
JXpi_ = as.matrix(J.X.p[[p]]@i), JXpp_ = as.matrix(J.X.p[[p]]@p),
JXpx_ = as.matrix(J.X.p[[p]]@x), JXpdim_ = as.matrix(J.X.p[[p]]@Dim),
JZpi_ = as.matrix(J.Z.p[[p]]@i), JZpp_ = as.matrix(J.Z.p[[p]]@p),
JZpx_ = as.matrix(J.Z.p[[p]]@x), JZpdim_ = as.matrix(J.Z.p[[p]]@Dim))
}else{
pattern.sum[[p]] <- R_mstep2(invsqrtW_ = as.matrix(diag(inv.sqrt.W)),
JYp_ = as.matrix(J.Y.p[[p]]), loopsize_ = pattern.count[[p]]/pattern.length[[p]],
patternlength_ = pattern.length[[p]], rownumber_ = as.matrix(J.Y.p.rownumber[[p]]),
ybetas_ = as.matrix(ybetas), etahat_ = as.matrix(eta.hat), tempmatR_ = as.matrix(temp_mat_R),
JXpi_ = as.matrix(J.X.p[[p]]@i), JXpp_ = as.matrix(J.X.p[[p]]@p),
JXpx_ = as.matrix(J.X.p[[p]]@x), JXpdim_ = as.matrix(J.X.p[[p]]@Dim),
JZpi_ = as.matrix(J.Z.p[[p]]@i), JZpp_ = as.matrix(J.Z.p[[p]]@p),
JZpx_ = as.matrix(J.Z.p[[p]]@x), JZpdim_ = as.matrix(J.Z.p[[p]]@Dim))
}
}
if(control$cpp.benchmark){
cat("C++ pattern sum:\n")
print(pattern.sum)
cpp.block.runtime= proc.time()[3] - cpptime
cat("C++ code time for R-matrix calculations: ", cpp.block.runtime, " seconds\n")
cpp.rmstep.times<-c(cpp.rmstep.times,cpp.block.runtime)
}
#extract the unique parameters in the (full) block of R, R_i.
#M step update for R
#a NR algorithm (with gradient ascent at the beginning for
#stability) is used to solve the score equations from
#Equation (16) of Karl, Yang, Lohr (2013)
R_i.parm <- ltriangle(as.matrix(R_i))
LRI <- length(R_i.parm)
R_i.parm.constrained <- R_i.parm[-LRI]
R.cc <- 1
hes.count <- 1
s.prev <- numeric(length(R_i.parm))
while (R.cc > 1e-04) {
s <- pattern.f.score.constrained(R_i.parm.constrained = R_i.parm.constrained,
nyear = nyear.pseudo, pattern.parmlist2 = pattern.parmlist2,
pattern.count = pattern.count, pattern.length = pattern.length,
pattern.Rtemplate = pattern.Rtemplate, pattern.diag = pattern.diag,
pattern.key = pattern.key, pattern.sum = pattern.sum)
j <- symmpart(jacobian(pattern.f.score.constrained, c(R_i.parm.constrained),
method = "simple", nyear = nyear.pseudo, pattern.parmlist2 = pattern.parmlist2,
pattern.count = pattern.count, pattern.length = pattern.length,
pattern.Rtemplate = pattern.Rtemplate, pattern.diag = pattern.diag,
pattern.key = pattern.key, pattern.sum = pattern.sum))
if(control$independent.responses){
indep.key=ltriangle(1:LRI)
coord.to.delete=indep.key[nrow(indep.key),-ncol(indep.key)]
for(CTD.ind in 1:length(coord.to.delete)){
j[coord.to.delete[CTD.ind],]<-j[,coord.to.delete[CTD.ind]]<-numeric(LRI-1)
j[coord.to.delete[CTD.ind],coord.to.delete[CTD.ind]]<-1
s[coord.to.delete[CTD.ind]]<-0
}
}
if (it == 1 & (hes.count < 10) & PQL.it <= 3) {
hesprod <- solve(j + max(c(diag(j), 5)) * diag(LRI - 1), s)
R.cc <- s %*% s
if (hes.count == 9)
R.cc <- 0
} else if ((it <= 4) & (hes.count < 30) & PQL.it == 1) {
hesprod <- solve(j + max(c(diag(j), 5) * ((1 - hes.count/31)^2)) *
diag(LRI - 1), s)
R.cc <- s %*% s
if (hes.count == 29)
R.cc <- 0
} else {
hesprod <- solve(j, s)
R.cc <- s %*% s
}
R_i.parm.constrained <- R_i.parm.constrained - hesprod
hes.count <- hes.count + 1
s.prev <- s
}
R_i.parm[-LRI] <- R_i.parm.constrained
R_i <- ltriangle(R_i.parm)
if(control$independent.responses){
R_i[nrow(R_i),]<-R_i[,nrow(R_i)]<-0
R_i[nrow(R_i),nrow(R_i)]<-1
}
rm(R_i.parm)
dimnames(R_i) <- list(NULL, NULL)
if (length(mis.list) > 0) {
R <- symmpart(suppressMessages(kronecker(Diagonal(nstudent), R_i))[-mis.list,
-mis.list])
rinv <- Matrix(0, 0, 0)
R_i.inv <- chol2inv(chol(R_i))
for (i in 1:nstudent) {
if (!any(mis.list %in% seq((i - 1) * nyear.pseudo + 1, i * nyear.pseudo))) {
rinv <- bdiag(rinv, R_i.inv)
} else {
inv.indx <- which(!(seq((i - 1) * nyear.pseudo + 1, i * nyear.pseudo) %in%
mis.list))
rinv <- bdiag(rinv, chol2inv(chol(R_i[inv.indx, inv.indx])))
}
}
R_inv <- rinv
} else {
R <- symmpart(suppressMessages(kronecker(Diagonal(nstudent), R_i)))
R_inv <- symmpart(suppressMessages(kronecker(Diagonal(nstudent),
chol2inv(chol(R_i)))))
}
R.full <- drop0(symmpart(sqrt.W %*% R %*% sqrt.W))
R.full.inv <- drop0(symmpart(inv.sqrt.W %*% R_inv %*% inv.sqrt.W))
names(ybetas) <- colnames(J.X)
G <- Gn
#if the VP model is used, then Z need to be updated with the
#new persistence parameters
#See the discussion in Section 3.4 of Karl, Yang, Lohr (2013)
if (persistence == "VP") {
alpha <- alphan
if (!control$school.effects) {
Z <- Matrix(0, nrow(Z_mat), nteach_effects)
} else {
Z <- Matrix(0, nrow(Z_mat), nteach_effects + nschool_effects)
}
for (i in 1:nalpha) {
comp <- which(tril(ltriangle(1:nalpha)) == i, arr.ind = TRUE)
Z <- Z + alpha[i] * P[[comp[1]]][[comp[2]]]
}
if (control$school.effects) {
colnames(Z) <- eta_effects
Z <- Z + Z.school.only
}
if (!control$school.effects) {
Z.expand <- Matrix(0, nrow(Z_mat), 2 * nteach_effects)
Z.expand[, seq(1, 2 * nteach_effects, by = 2)] <- Z
} else {
Z.expand <- Matrix(0, nrow(Z_mat), 2 * nteach_effects + 2 * nschool_effects)
Z.expand[, seq(1, 2 * nteach_effects + 2 * nschool_effects, by = 2)] <- Z
}
colnames(Z) <- eta_effects
# B.Z.expand <- Matrix(0, nrow(B.mat), 2 * nteach_effects) B.Z.expand[, seq(2, 2
# * nteach_effects, by = 2)] <- B.Z
J.Z <- rbind(Z.expand, B.Z.expand)
J.Z <- J.Z[order(J.mat.original$student, J.mat.original$year),
]
colnames(J.Z) <- interleave(colnames(Z), colnames(B.Z))
for (p in unique(J.mat$pat)) {
J.Z.p[[p]] <- J.Z[pat[[p]], , drop = FALSE]
}
}
G.chol <- chol(G)
G.inv <- chol2inv(G.chol)
R.inv.Z <- R.full.inv %*% J.Z
V.1 <- chol2inv(chol(G.inv + t(J.Z) %*% R.inv.Z))
tX.Rinv.Z <- t(J.X) %*% R.inv.Z
tX.Rinv.X <- t(J.X) %*% R.full.inv %*% J.X
#M-step update for fixed effects vector (both those from the
#score and the binary submodels)
ybetas <- as.vector(chol2inv(chol(forceSymmetric(symmpart(tX.Rinv.X -
tX.Rinv.Z %*% V.1 %*% t(tX.Rinv.Z))))) %*% (t(J.X) %*% R.full.inv -
tX.Rinv.Z %*% V.1 %*% t(R.inv.Z)) %*% J.Y)
if (control$verbose)
cat("Iteration Time: ", proc.time()[3] - ptm, " seconds\n")
flush.console()
}
}
# em loop # End of EM
nalpha.free <- sum(!((1:nalpha) %in% alpha.diag))
#The code in this block is not used. If uncommented, it shows how
#a NR approach may be used to estimate the model instead of an EM
#algorithm. It is not used because the NR algorithm is unstable in
#the presence of strong correlations in G. See the discussion in
#Section 3 of Karl, Yang, Lohr (2013)
# if (control$NR == TRUE & PQL.it > control$EM.to.NR) {
# # NR loop
# cat("Beginning NR algorithm\n")
# convh <- abs(loglikelihood)
# nr.counter <- 0
# while (as.numeric(convh/abs(loglikelihood)) > 1e-09) {
# nr.counter <- nr.counter + 1
# new.eta <- update.eta(X = J.X, Y = J.Y, Z = J.Z, R.full.inv = R.full.inv,
# ybetas = ybetas, G = G, cons.logLik = cons.logLik, Ny = Ny, nstudent = nstudent,
# n_eta = n_eta)
# eta.hat <- attr(new.eta, "eta")
# C12 <- attr(new.eta, "C12")
# var.eta.hat <- new.eta
# temp_mat <- var.eta.hat + tcrossprod(eta.hat, eta.hat)
# temp_mat_R <- attr(new.eta, "h.inv") + tcrossprod(eta.hat, eta.hat)
# cat("\n", attr(new.eta, "likelihood") - loglikelihood, "\n")
# loglikelihood <- attr(new.eta, "likelihood")
# # if (nr.counter == 1 | persistence == 'VP') {
# if (TRUE) {
# pattern.sum <- list()
# for (p in unique(J.mat$pat)) {
# pattern.sum[[p]] <- Matrix(R_mstep2(invsqrtW_ = as.matrix(diag(inv.sqrt.W)),
# JYp_ = as.matrix(J.Y.p[[p]]), loopsize_ = pattern.count[[p]]/pattern.length[[p]],
# patternlength_ = pattern.length[[p]], rownumber_ = as.matrix(J.Y.p.rownumber[[p]]),
# ybetas_ = as.matrix(ybetas), etahat_ = as.matrix(eta.hat),
# tempmatR_ = as.matrix(temp_mat_R), JXpi_ = as.matrix(J.X.p[[p]]@i),
# JXpp_ = as.matrix(J.X.p[[p]]@p), JXpx_ = as.matrix(J.X.p[[p]]@x),
# JXpdim_ = as.matrix(J.X.p[[p]]@Dim), JZpi_ = as.matrix(J.Z.p[[p]]@i),
# JZpp_ = as.matrix(J.Z.p[[p]]@p), JZpx_ = as.matrix(J.Z.p[[p]]@x),
# JZpdim_ = as.matrix(J.Z.p[[p]]@Dim)))
# }
#
# }
# names(ybetas) <- colnames(J.X)
# if (persistence == "CP" | persistence == "ZP") {
# thetas <- c(ltriangle(as.matrix(R_i))[-LRI], reduce.G(G = G, nyear.score = nyear.score,
# nteacher = nteacher))
# } else if (persistence == "VP") {
# thetas <- c(ltriangle(as.matrix(R_i))[-LRI], reduce.G(G = G, nyear.score = nyear.score,
# nteacher = nteacher), alpha[!((1:nalpha) %in% alpha.diag)])
# }
# thetas.old <- thetas
# ybetas.old <- ybetas
# Hessian.inv <- hessian.f()
# score.thetas <- Score(thetas = thetas, eta = eta.hat, ybetas = ybetas,
# X = J.X, Y = J.Y, Z = J.Z, sqrt.W = sqrt.W, inv.sqrt.W = inv.sqrt.W,
# pattern.sum = pattern.sum, con = control, n_ybeta = n_ybeta, n_eta = n_eta,
# nstudent = nstudent, nteacher = nteacher, Kg = Kg, cons.logLik = cons.logLik,
# mis.list = mis.list, pattern.parmlist2 = pattern.parmlist2, pattern.count = pattern.count,
# pattern.length = pattern.length, pattern.Rtemplate = pattern.Rtemplate,
# pattern.diag = pattern.diag, pattern.key = pattern.key, Ny = Ny,
# persistence = persistence, P = P, alpha.diag = alpha.diag, nalpha = nalpha,
# alpha = alpha)
# thetas <- thetas - Hessian.inv %*% score.thetas
# convh <- t(score.thetas) %*% Hessian.inv %*% score.thetas
# # score.ybetas<-t(J.X)%*%R.full.inv%*%(J.Y-J.X%*%ybetas-J.Z%*%eta.hat)
# n_Rparm <- nyear.pseudo * (nyear.pseudo + 1)/2 - 1
# G <- thetas[(n_Rparm + 1):(n_Rparm + ncol(G.mat))]
# G <- reduce.G(G = G, nyear.score = nyear.score, nteacher = nteacher)
# if (persistence == "VP") {
# alpha.parm <- thetas[(n_Rparm + ncol(G.mat) + 1):length(thetas)]
# alpha[!((1:nalpha) %in% alpha.diag)] <- alpha.parm
# if (!control$school.effects) {
# Z <- Matrix(0, nrow(Z_mat), nteach_effects)
# } else {
# Z <- Matrix(0, nrow(Z_mat), nteach_effects + nschool_effects)
# }
# for (i in 1:nalpha) {
# comp <- which(tril(ltriangle(1:nalpha)) == i, arr.ind = TRUE)
# Z <- Z + alpha[i] * P[[comp[1]]][[comp[2]]]
# }
# if (control$school.effects) {
# colnames(Z) <- eta_effects
# Z <- Z + Z.school.only
# }
# if (!control$school.effects) {
# Z.expand <- Matrix(0, nrow(Z_mat), 2 * nteach_effects)
# Z.expand[, seq(1, 2 * nteach_effects, by = 2)] <- Z
# } else {
# Z.expand <- Matrix(0, nrow(Z_mat), 2 * nteach_effects + 2 * nschool_effects)
# Z.expand[, seq(1, 2 * nteach_effects + 2 * nschool_effects, by = 2)] <- Z
# }
# colnames(Z) <- eta_effects
# J.Z <- rbind(Z.expand, B.Z.expand)
# J.Z <- J.Z[order(J.mat.original$student, J.mat.original$year),
# ]
# colnames(J.Z) <- interleave(colnames(Z), colnames(B.Z))
# for (p in unique(J.mat$pat)) {
# J.Z.p[[p]] <- J.Z[pat[[p]], , drop = FALSE]
# }
# # if(!huge.flag){Z.dense <- as.matrix(Z)} for (p in unique(Z_mat$pat)) {
# # if(!huge.flag){ Z.p[[p]] <- Z.dense[pat[[p]], , drop = FALSE]}else{ Z.p[[p]] <-
# # Z[pat[[p]], , drop = FALSE] } } rm(Z.dense)
# }
# R_i <- ltriangle(c(as.vector(thetas[1:n_Rparm]), 1))
# R_i.parm <- c(as.vector(thetas[1:n_Rparm]), 1)
# LRI <- length(R_i.parm)
# R_i.parm.constrained <- R_i.parm[-LRI]
# if (length(mis.list) > 0) {
# R <- symmpart(suppressMessages(kronecker(suppressMessages(Diagonal(nstudent)),
# R_i)[-mis.list, -mis.list]))
# rinv <- Matrix(0, 0, 0)
# R_i.inv <- chol2inv(chol(R_i))
# for (i in 1:nstudent) {
# if (!any(mis.list %in% seq((i - 1) * nyear.pseudo + 1, i * nyear.pseudo))) {
# rinv <- bdiag(rinv, R_i.inv)
# } else {
# inv.indx <- which(!(seq((i - 1) * nyear.pseudo + 1, i * nyear.pseudo) %in%
# mis.list))
# rinv <- bdiag(rinv, chol2inv(chol(R_i[inv.indx, inv.indx])))
# }
# }
# R_inv <- rinv
# } else {
# R <- symmpart(suppressMessages(kronecker(suppressMessages(Diagonal(nstudent)),
# R_i)))
# R_inv <- symmpart(suppressMessages(kronecker(suppressMessages(Diagonal(nstudent)),
# chol2inv(chol(R_i)))))
# }
# R.full <- drop0(symmpart(sqrt.W %*% R %*% sqrt.W))
# R.full.inv <- drop0(symmpart(inv.sqrt.W %*% R_inv %*% inv.sqrt.W))
# G.chol <- chol(G)
# G.inv <- chol2inv(G.chol)
# R.inv.Z <- R.full.inv %*% J.Z
# V.1 <- chol2inv(chol(G.inv + t(J.Z) %*% R.inv.Z))
# tX.Rinv.Z <- t(J.X) %*% R.inv.Z
# tX.Rinv.X <- t(J.X) %*% R.full.inv %*% J.X
# ybetas <- as.vector(chol2inv(chol(symmpart(tX.Rinv.X - tX.Rinv.Z %*%
# V.1 %*% t(tX.Rinv.Z)))) %*% (t(J.X) %*% R.full.inv - tX.Rinv.Z %*%
# V.1 %*% t(R.inv.Z)) %*% J.Y)
# }
# new.eta <- update.eta(X = J.X, Y = J.Y, Z = J.Z, R.full.inv = R.full.inv,
# ybetas = ybetas, G = G, cons.logLik = cons.logLik, Ny = Ny, nstudent = nstudent,
# n_eta = n_eta)
# eta.hat <- attr(new.eta, "eta")
# C12 <- attr(new.eta, "C12")
# var.eta.hat <- new.eta
# temp_mat <- var.eta.hat + tcrossprod(eta.hat, eta.hat)
# temp_mat_R <- attr(new.eta, "h.inv") + tcrossprod(eta.hat, eta.hat)
# cat("\n", attr(new.eta, "likelihood") - loglikelihood, "\n")
# }
# end NR
#extract the unique model parameters and compare to those from the
#previous pseudolikihood iteration
#The convergence criterion is the same as one of the available
#options in SAS PROC GLIMMIX
if (persistence == "CP" | persistence == "ZP") {
thetas.pql <- c(ltriangle(as.matrix(R_i))[-LRI], reduce.G(G = G, nyear.score = nyear.score,
nteacher = nteacher))
} else if (persistence == "VP") {
thetas.pql <- c(ltriangle(as.matrix(R_i))[-LRI], reduce.G(G = G, nyear.score = nyear.score,
nteacher = nteacher), alpha[!((1:nalpha) %in% alpha.diag)])
}
parms.new <- c(ybetas, thetas.pql)
if (PQL.it > 1) {
max.change <- 2 * max(abs(parms.new - parms.old)/(abs(parms.new) + abs(parms.old) +
1e-06))
if (control$verbose) {
cat("\nMaximum relative parameter change between PQL iterations: ",
max.change, "\n")
}
if (max.change < control$pconv)
break
}
#Update the pseudo-responses after each pseudo-likelihood iteration
#This is direct out of Wolfinger and O'Connell (1993).
#Note: even though the variables are called "PQL", this is actually
#a pseudo-likelihood approach. They are identical under some circumstances
thetas.pql.old <- thetas.pql
ybetas.pql.old <- ybetas
parms.old <- c(ybetas.pql.old, thetas.pql.old)
# we use inverse of Wolfinger's W this part of the code transforms the responses
# (pseudo-likelihood approach) most of this code was taken from the R function
# glmmPQL
J.mat[J.mat$response == "outcome", ]$fit <- as.vector(J.X[J.mat$response ==
"outcome", ] %*% ybetas + J.Z[J.mat$response == "outcome", ] %*% eta.hat)
J.mat[J.mat$response == "outcome", ]$mu <- as.vector(fam.binom$linkinv(J.mat[J.mat$response ==
"outcome", ]$fit))
J.mat[J.mat$response == "outcome", ]$mu.eta.val <- fam.binom$mu.eta(J.mat[J.mat$response ==
"outcome", ]$fit)
J.mat[J.mat$response == "outcome", ]$nu <- as.vector(J.mat[J.mat$response ==
"outcome", ]$fit) + (J.mat[J.mat$response == "outcome", ]$y - J.mat[J.mat$response ==
"outcome", ]$mu)/J.mat[J.mat$response == "outcome", ]$mu.eta.val
J.mat[J.mat$response == "outcome", ]$sqrt.w <- sqrt(fam.binom$variance(J.mat[J.mat$response ==
"outcome", ]$mu)/J.mat[J.mat$response == "outcome", ]$mu.eta.val^2)
J.mat[J.mat$response == "score", ]$sqrt.w <- 1
J.mat[J.mat$response == "score", ]$nu <- J.mat[J.mat$response == "score",
]$y
sqrt.W <- Diagonal(x = J.mat$sqrt.w)
inv.sqrt.W <- Diagonal(x = 1/(J.mat$sqrt.w))
R.full <- drop0(symmpart(sqrt.W %*% R %*% sqrt.W))
R.full.inv <- drop0(symmpart(inv.sqrt.W %*% R_inv %*% inv.sqrt.W))
J.Y <- J.mat$nu
for (p in unique(J.mat$pat)) {
J.Y.p[[p]] <- J.Y[pat[[p]]]
}
}
# pql loop
#end of the outer pseduo-likelihood loop
#parameter estimates do not changes past this point
names(ybetas) <- colnames(J.X)
if (persistence == "CP" | persistence == "ZP") {
thetas <- c(ltriangle(as.matrix(R_i))[-LRI], reduce.G(G = G, nyear.score = nyear.score,
nteacher = nteacher))
} else if (persistence == "VP") {
thetas <- c(ltriangle(as.matrix(R_i))[-LRI], reduce.G(G = G, nyear.score = nyear.score,
nteacher = nteacher), alpha[!((1:nalpha) %in% alpha.diag)])
}
lgLik.hist <- lgLik
lgLik <- lgLik[it]
Hessian <- NA
std_errors <- c(rep(NA, length(thetas)))
#hessian of the variance components (R and G) calculated with a central
#difference approximation using the Score() function
if (control$hessian == TRUE) {
if (control$verbose)
cat("Calculating Hessian of the variance components...")
flush.console()
pattern.sum <- list()
for (p in unique(J.mat$pat)) {
if(control$REML){
pattern.sum[[p]] <- Matrix(REML_Rm(invsqrtW_ = as.matrix(diag(inv.sqrt.W)), betacov_ = betacov, C12_ = C12,
JYp_ = as.matrix(J.Y.p[[p]]), loopsize_ = pattern.count[[p]]/pattern.length[[p]],
patternlength_ = pattern.length[[p]], rownumber_ = as.matrix(J.Y.p.rownumber[[p]]),
ybetas_ = as.matrix(ybetas), etahat_ = as.matrix(eta.hat), tempmatR_ = as.matrix(temp_mat),
JXpi_ = as.matrix(J.X.p[[p]]@i), JXpp_ = as.matrix(J.X.p[[p]]@p),
JXpx_ = as.matrix(J.X.p[[p]]@x), JXpdim_ = as.matrix(J.X.p[[p]]@Dim),
JZpi_ = as.matrix(J.Z.p[[p]]@i), JZpp_ = as.matrix(J.Z.p[[p]]@p),
JZpx_ = as.matrix(J.Z.p[[p]]@x), JZpdim_ = as.matrix(J.Z.p[[p]]@Dim)))
}else{
pattern.sum[[p]] <- Matrix(R_mstep2(invsqrtW_ = as.matrix(diag(inv.sqrt.W)),
JYp_ = as.matrix(J.Y.p[[p]]), loopsize_ = pattern.count[[p]]/pattern.length[[p]],
patternlength_ = pattern.length[[p]], rownumber_ = as.matrix(J.Y.p.rownumber[[p]]),
ybetas_ = as.matrix(ybetas), etahat_ = as.matrix(eta.hat), tempmatR_ = as.matrix(temp_mat_R),
JXpi_ = as.matrix(J.X.p[[p]]@i), JXpp_ = as.matrix(J.X.p[[p]]@p),
JXpx_ = as.matrix(J.X.p[[p]]@x), JXpdim_ = as.matrix(J.X.p[[p]]@Dim),
JZpi_ = as.matrix(J.Z.p[[p]]@i), JZpp_ = as.matrix(J.Z.p[[p]]@p),
JZpx_ = as.matrix(J.Z.p[[p]]@x), JZpdim_ = as.matrix(J.Z.p[[p]]@Dim)))
}
}
if (control$hes.method == "richardson") {
Hessian <- ltriangle(ltriangle(jacobian(Score, thetas, eta = eta.hat,
ybetas = ybetas, X = J.X, Y = J.Y, Z = J.Z, sqrt.W = sqrt.W, inv.sqrt.W = inv.sqrt.W,
pattern.sum = pattern.sum, con = control, n_ybeta = n_ybeta, n_eta = n_eta,
nstudent = nstudent, nteacher = nteacher, Kg = Kg, cons.logLik = cons.logLik,
mis.list = mis.list, pattern.parmlist2 = pattern.parmlist2, pattern.count = pattern.count,
pattern.length = pattern.length, pattern.Rtemplate = pattern.Rtemplate,
pattern.diag = pattern.diag, pattern.key = pattern.key, Ny = Ny,
persistence = persistence, P = P, alpha.diag = alpha.diag, nalpha = nalpha,
alpha = alpha)))
} else {
Hessian <- ltriangle(ltriangle(jacobian(Score, thetas, method = "simple",
eta = eta.hat, ybetas = ybetas, X = J.X, Y = J.Y, Z = J.Z, sqrt.W = sqrt.W,
inv.sqrt.W = inv.sqrt.W, pattern.sum = pattern.sum, con = control,
n_ybeta = n_ybeta, n_eta = n_eta, nstudent = nstudent, nteacher = nteacher,
Kg = Kg, cons.logLik = cons.logLik, mis.list = mis.list, pattern.parmlist2 = pattern.parmlist2,
pattern.count = pattern.count, pattern.length = pattern.length, pattern.Rtemplate = pattern.Rtemplate,
pattern.diag = pattern.diag, pattern.key = pattern.key, Ny = Ny,
persistence = persistence, P = P, alpha.diag = alpha.diag, nalpha = nalpha,
alpha = alpha)))
}
std_errors <- try(c(sqrt(diag(solve(Hessian)))), silent = TRUE)
hes.warn <- FALSE
if (any(eigen(Hessian)$values <= 0)) {
if (control$verbose)
cat("Warning: Hessian not PD", "\n")
std_errors <- c(rep(NA, length(thetas)))
hes.warn <- TRUE
}
}
#Henderson mixed model equations produce standard errors for
#fixed effects and EBLUPs
c.temp <- crossprod(J.X, R.full.inv) %*% J.Z
c.1 <- rbind(crossprod(J.X, R.full.inv) %*% J.X, t(c.temp))
G.inv <- chol2inv(chol(G))
c.2 <- rbind(c.temp, H.eta(G.inv, J.Z, R.full.inv))
C_inv <- cbind(c.1, c.2)
C <- solve(C_inv)
eblup_stderror <- sqrt(diag(C)[-c(1:n_ybeta)])
ybetas_stderror <- sqrt(diag(C)[1:n_ybeta])
C<-as.matrix(C)
rownames(C)<-colnames(C)<-c(names(ybetas),colnames(J.Z))
rm( C_inv, c.2, c.1, c.temp)
eblup <- as.matrix(cbind(eta.hat, eblup_stderror))
eblup <- as.data.frame(eblup)
eblup <- as.data.frame(cbind(colnames(J.Z), eblup))
#remainder of the code is just formatting to return to user
colnames(eblup) <- c("effect", "EBLUP", "std_error")
t_lab <- as.vector(NULL)
r_lab <- as.vector(NULL)
for (j in 1:nyear.score) {
# ne <- (Kg[j] * (Kg[j] + 1))/2 y <- c(NULL) x <- c(NULL) for (k in 1:Kg[j]) { x
# <- c(x, k:Kg[j]) y <- c(y, rep(k, (Kg[j] - k + 1))) }
t_lab <- c(t_lab, paste("teacher effect from year", j, " [1,1] (score variance)",
sep = ""))
t_lab <- c(t_lab, paste("teacher effect from year", j, " [2,1] (score-outcome covariance)",
sep = ""))
t_lab <- c(t_lab, paste("teacher effect from year", j, " [2,2] (outcome variance)",
sep = ""))
}
if (control$school.effects) {
t_lab <- c(t_lab, rep("School Effect", 3))
}
y <- c(NULL)
x <- c(NULL)
for (k in 1:nyear.pseudo) {
x <- c(x, k:nyear.pseudo)
y <- c(y, rep(k, (nyear.pseudo - k + 1)))
}
r_lab <- paste("error covariance", ":[", x, ",", y, "]", sep = "")
# rm(j, ne)
alpha.label <- c(NULL)
for (i in 1:(nyear.score - 1)) {
for (j in (i + 1):(nyear.score)) {
alpha.label <- c(alpha.label, paste("alpha_", j, i, sep = ""))
}
}
if (persistence == "CP" | persistence == "ZP") {
effect_la <- c(names(ybetas), r_lab, t_lab)
} else if (persistence == "VP") {
effect_la <- c(names(ybetas), r_lab, t_lab, alpha.label)
}
constant.element <- which(effect_la == paste("error covariance", ":[", nyear.pseudo,
",", nyear.pseudo, "]", sep = ""))
if (control$hessian == TRUE) {
parameters <- round(cbind(append(c(ybetas, thetas), 1, after = constant.element -
1), append(c(ybetas_stderror, std_errors), NA, after = constant.element -
1)), 4)
colnames(parameters) <- c("Estimate", "Standard Error")
rownames(parameters) <- as.character(effect_la)
}
if (control$hessian == FALSE) {
parameters <- round(cbind(append(c(ybetas, thetas), 1, after = constant.element -
1), c(ybetas_stderror, rep(NA, length(thetas) + 1))), 4)
colnames(parameters) <- c("Estimate", "Standard Error")
rownames(parameters) <- as.character(effect_la)
}
if (control$verbose)
cat("done.\n")
mresid <- as.numeric(J.Y - J.X %*% ybetas)
cresid <- as.numeric(mresid - J.Z %*% eta.hat)
yhat <- as.numeric(J.X %*% ybetas + J.Z %*% eta.hat)
yhat.m <- as.numeric(J.X %*% ybetas)
# rchol <- try(chol(R.full.inv)) yhat.s <- try(as.vector(rchol %*% (yhat)))
# sresid <- try(as.vector(rchol %*% J.Y - yhat.s)) print(system.time(
# Vchol<-chol(J.Z%*%G%*%t(J.Z)+R.full))) print(system.time(
# sresid<-solve(Vchol,J.Y-res$yhat) ))
gam_t <- list()
for (i in 1:nyear.score) {
gam_t[[i]] <- as.matrix(ltriangle(reduce.G(G, nyear.score = nyear.score,
nteacher = nteacher)[(1 + 3 * (i - 1)):(3 * i)]))
colnames(gam_t[[i]]) <- rownames(gam_t[[i]]) <- c("score", "outcome")
}
persistence_parameters = ltriangle(alpha)
persistence_parameters[upper.tri(persistence_parameters)] <- NA
if (!control$school.effects) {
school.subset <- NULL
} else {
school.subset <- eblup[(2 * nteach_effects + 1):n_eta, ]
}
teach.cov <- lapply(gam_t, function(x) round(x, 4))
names(J.mat)[names(J.mat) == "y"] <- "y.combined.original"
R_i <- as.matrix(R_i)
rownames(R_i) <- colnames(R_i) <- c(paste("year", 1:(ncol(R_i) - 1), "score",
sep = "_"), "outcome")
parameters <- cbind(parameters, z = round(parameters[, 1]/parameters[, 2], 2))
parameters[, 3][grepl("variance", rownames(parameters)) & !grepl("covariance",
rownames(parameters))] <- NA
parameters <- cbind(parameters, pvalue = round(2 * (1 - pnorm(abs(parameters[,
3]))), 3))
parameters <- cbind(parameters, lower.95.CI = round(parameters[, 1] - parameters[,
2] * qnorm(0.975), 3))
parameters <- cbind(parameters, upper.95.CI = round(parameters[, 1] + parameters[,
2] * qnorm(0.975), 3))
parameters[, 5:6][grepl("variance", rownames(parameters)) & !grepl("covariance",
rownames(parameters))] <- NA
res <- list(loglik = lgLik, teach.effects = eblup[1:(2 * nteach_effects), ],
school.effects = school.subset, parameters = parameters, Hessian = Hessian,
R_i = R_i, teach.cov = gam_t, mresid = mresid, cresid = cresid, y.combined = J.Y,
y.combined.hat = yhat, y.response.type = J.mat$response, y.year = J.mat$year,
num.obs = Ny, num.student = nstudent, num.year = nyear.score, num.teach = nteacher,
persistence = control$persistence, persistence_parameters = persistence_parameters,
X = J.X, Z = J.Z, G = G, R = R,C=C, R.full = R.full, sqrt.W = J.mat$sqrt.w, eblup = eblup,
fixed.effects = ybetas, joined.table = J.mat)
try(res$teach.effects$teacher_year <- as.numeric(substr(gsub(".*year", "", res$teach.effects[,
1]), 1, 1)))
# res$teach.effects$teacher_year <- key[match(res$teach.effects$teacher_year,
# key[, 2]), 1] res$teach.effects$effect_year <-
# key[match(res$teach.effects$effect_year, key[, 2]), 1]
class(res) <- "RealVAMS"
cat("Total Time: ", proc.time()[3] - ptm.total, " seconds\n")
if(control$cpp.benchmark){
cat("Run times for identical R and C++ code during each iteration\n")
df.bench=data.frame(Cpp.times=cpp.rmstep.times,R.times=R.rmstep.times)
print(df.bench)
print(summary(df.bench))
}
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.