###############################
# reml()
###############################
#' REML optimization algorithms for mixed-effect models.
#'
#' Evaluate the REML likelihood and algorithms for iterating to find maximum
#' REML estimates.
#'
#' @aliases reml ai em gradFun
#' @param nu,nuvin A \code{list} or \code{vector} of (co)variance parameters to
#' estimate on the transformed, or nu, scale.
#' @param skel A skeleton for reconstructing the list of (co)variance parameters.
#' @param thetaG,thetaR \code{Integer} vectors indexing the G-structure or
#' R-structure of the (co)variance parameters.
#' @param sigma2e A \code{numeric} value for the residual variance estimate
#' when it has been factored out of the Coefficient matrix of the Mixed Model
#' Equations, thus converting the (co)variance components to ratios
#' (represented by the variable lambda).
#' @param conv A \code{character} vector of (co)variance parameter constraints.
#' @param sLc A sparse \code{Matrix} containing the symbolic Cholesky
#' factorization of the coefficient matrix of the Mixed Model Equations.
#' @param Cinv A sparse \code{Matrix} containing the inverse of the Coefficient
#' matrix to the Mixed Model Equations.
#' @param modMats A \code{list} of the model matrices used to construct the
#' mixed model equations.
#' @param W,tWW A sparse \code{Matrix} containing the design matrices for the fixed
#' and random effects (\code{W}) and the cross-product of this (\code{tWW}).
#' @param Bpinv A matrix inverse of the matrix containing the prior specification
#' for fixed effects.
#' @param nminffx,nminfrfx,rfxlvls \code{Integers} specifying: (1) the difference
#' between the number of observations and fixed effects (of the full rank fixed
#' effects design matrix (X), (2) \code{nminffx} minus the total number of
#' random effects, and (3) a \code{vector} of levels for each term in the
#' random effects.
#' @param rfxIncContrib2loglik A \code{numeric} indicating the sum of constraint
#' contributions to the log-likelihood across all terms in the random effects
#' that have non-diagonal generalized inverse matrices (\code{ginverse}).
#' associated with a generalized inverse (\code{ginverse}).
#' @param RHS A sparse \code{Matrix} containing the Right-Hand Side to the
#' Mixed Model Equations.
#' @param sln,r Sparse \code{Matrices} containing the solutions or residuals
#' of the Mixed Model Equations.
#'
#' @return A \code{list} or \code{matrix} containing any of the previous
#' parameters described above, or the following that are in addition to or
#' instead of parameters above:
#' \describe{
#' \item{loglik }{The REML log-likelihood.}
#' \item{tyPy,logDetC }{Components of the REML log-likelihood derived from the
#' Cholesky factor of the Coefficient matrix to the Mixed Model Equations.}
#' \item{Cinv_ii }{A vector containing the diagonal elements of the inverse
#' of the Coefficient matrix to the Mixed Model Equations (i.e., the
#' diagonal entries of \code{Cinv}).}
#' \item{AI }{A \code{matrix} of values containing the Average Information
#' matrix, or second partial derivatives of the likelihood with respect to
#' the transformed (co)variance components (nu). The inverse of this matrix
#' gives the sampling variances of these transformed (co)variance components.}
#' \item{dLdnu }{A single column \code{matrix} of first derivatives of
#' the transformed (co)variance parameters (nu) with respect to the
#' log-Likelihood.}
#' }
#'
#' @author \email{matthewwolak@@gmail.com}
#' @export
reml <- function(nu, skel, thetaG, sLc,
modMats, W, Bpinv, nminffx, nminfrfx, rfxlvls, rfxIncContrib2loglik,
thetaR = NULL, #<-- non-NULL if lambda==FALSE
tWW = NULL, RHS = NULL){ #<-- non-NULL if lambda==TRUE
lambda <- is.null(thetaR)
#FIXME `[[thetaG+1]]` is just a kluge below: check when/if have > R matrices
Rinv <- as(solve(nu[[length(thetaG)+1]]), "symmetricMatrix")
Ginv <- lapply(thetaG, FUN = function(x){as(solve(nu[[x]]), "symmetricMatrix")}) # Meyer 1991, p.77 (<-- also see MMA on p70/eqn4)
##1c Now make coefficient matrix of MME
if(lambda){
tWKRinvW <- tWW #<-- if lambda, data "quadratic" with R factored out
tyRinvy <- crossprod(modMats$y)
} else{
### Rinv Kronecker with Diagonal/I
KRinv <- kronecker(Rinv, Diagonal(x = 1, n = modMats$Zr@Dim[[2L]]))
tyRinvy <- as(crossprod(modMats$y, KRinv) %*% modMats$y, "sparseMatrix")
### Components of Meyer 1989 eqn 2
tWKRinv <- crossprod(W, KRinv)
tWKRinvW <- tWKRinv %*% W
RHS <- Matrix(tWKRinv %*% modMats$y, sparse = TRUE)
## Meyer '97 eqn 11
### (Note different order of RHS from Meyer '89 eqn 6; Meyer '91 eqn 4)
}
##`sapply()` to multiply inverse G_i with ginverse element (e.g., I or geninv)
if(modMats$nG > 0){
C <- as(tWKRinvW + bdiag(c(Bpinv,
sapply(1:modMats$nG, FUN = function(u){kronecker(modMats$listGeninv[[u]], Ginv[[u]])}))), "symmetricMatrix")
} else C <- as(tWKRinvW + Bpinv, "symmetricMatrix")
## Make/Update symbolic factorization with variance parameters from this iteration
if(is.null(sLc)){
##1d Find best order of MMA/C/pivots!
# Graser et al. 1987 (p1363) when singular C, |C| not invariant to order
##of C, therefore same order (of pivots) must be used in each loglik iteration
## Hadfield (2010, MCMCglmm paper App B): details on chol(), ordering, and updating
# supernodal decomposition FALSE
## ?Cholesky documentation demos simplicial smaller sized object/more sparse
### However, tested `warcolak` trait1 with VA and VD univariate model
#### `super = FALSE` faster and smaller than when `super = TRUE`
#if(any(eigen(C)$values < 0)) cat(which(eigen(C)$values < 0), "\n") #FIXME
#TODO test if super=TRUE better
## supernodal might be better as coefficient matrix (C) becomes more dense
#### e.g., with a genomic relatedness matrix (see Masuda et al. 2014 & 2015)
sLc <- Cholesky(C, perm = TRUE, LDL = FALSE, super = FALSE)
# original order obtained by: t(P) %*% L %*% P or `crossprod(P, L) %*% P`
} else sLc <- update(sLc, C)
# solve MME for BLUEs/BLUPs
## Do now, because needed as part of calculating the log-likelihood
#XXX Do I need to solve for BLUEs (see Knight 2008 for just getting BLUPs eqn 2.13-15)
# chol2inv: Cinv in same permutation as C (not permutation of sLc/sLm)
#Cinv <<- chol2inv(sLc) #<-- XXX ~10x slower than `solve(C)` atleast for warcolak
# Cinv <<- solve(a = sLc, b = Ic, system = "A") #<-- XXX comparable speed to `solve(C)` at least for warcolak
#Cinv <<- solve(C)
##XXX NOTE Above Cinv is in original order of C and NOT permutation of M
sln <- solve(a = sLc, b = RHS, system = "A")
## Cholesky is more efficient and computationally stable
### see Matrix::CHMfactor-class expand note about fill-in causing many more non-zeroes of very small magnitude to occur
#### see Matrix file "CHMfactor.R" method for "determinant" (note differences with half the logdet of original matrix) and the functions:
##### `ldetL2up` & `destructive_Chol_update`
# 5 record log-like, check convergence, & determine next varcomps to evaluate
##5a determine log(|C|) and y'Py
# Boldman and Van Vleck 1991 eqn. 6 (tyPy) and 7 (log|C|)
# Meyer 1997 eqn. 13 (tyPy)
tyPy <- tyRinvy - crossprod(sln, RHS)
logDetC <- 2 * sum(log(sLc@x[sLc@p+1][1:sLc@Dim[[1L]]]))
# alternatively see `determinant` method for CHMfactor
# Factored out residual variance (only for lambda scale)
sigma2e <- if(lambda) tyPy / nminffx else NA
# Construct the log-likelihood (Meyer 1997, eqn. 8)
## (first put together as `-2log-likelihood`)
## 'log(|R|)'
### assumes X of full rank
if(lambda){
loglik <- nminfrfx * log(sigma2e)
} else{
# Meyer 1997 eqn. 10
loglik <- modMats$ny * log(nu[[thetaR]])
}
## 'log(|G|)'
#FIXME: Only works for independent random effects right now!
if(lambda){
logDetGfun <- function(x){rfxlvls[x] * log(as.vector(nu[[x]]*sigma2e))}
} else{
# Meyer 1997 eqn. 9
logDetGfun <- function(x){rfxlvls[x] * log(as.vector(nu[[x]]))}
}
if(modMats$nG != 0){
loglik <- loglik + sum(sapply(seq(modMats$nG), FUN = logDetGfun)) + rfxIncContrib2loglik
}
## log(|C|) + tyPy
### if `lambda` then nminffx=tyPy/sigma2e, simplified because sigma2e=tyPy/nminffx
### and mulitply by -0.5 to calculate `loglik` from `-2loglik`
loglik <- -0.5 * (loglik + logDetC + if(lambda) nminffx else tyPy)
# calculate residuals
r <- modMats$y - W %*% sln
return(list(loglik = loglik@x,
sigma2e = if(lambda) sigma2e@x else NA,
tyPy = tyPy@x, logDetC = logDetC,
sln = sln, r = r, sLc = sLc))
} #<-- end `reml()`
################################################################################
################################################################################
## Meyer and Smith 1996 for algorithm using derivatives of loglik
### eqn 15-18 (+ eqn 33-42ish) for derivatives of tyPy and logDetC
### Smith 1995 for very technical details
# EM refs: Hofer 1998 eqn 10-12
## note Hofer eqn 12 has no sigma2e in last term of non-residual formula
### ?mistake? in Mrode 2005 (p. 241-245), which does include sigma2e
#' @rdname reml
#' @param tugug A list of numeric values for the $u_g' u_g$ products of the
#' solution vector for each of the \code{g} variance parameters.
#' @param trace A list of traces of the inverse coefficient matrix for each of
#' of the \code{g} variance parameters.
#' @param y The response vector.
#' @export
em <- function(nuvin, thetaG, thetaR, conv,
rfxlvls, tugug, trace, y = NULL, r = NULL, nminffx = NULL){
# if no variance components (except residual), skip to the end
if(length(thetaG) > 0){
for(g in thetaG){
if(conv[g] == "F") next #<-- skip if parameter Fixed
qi <- rfxlvls[g]
#TODO check `*tail(nuv,1)` correctly handles models with covariance matrices
nuvin[g] <- as(as(matrix((tugug[[g]] + trace[[g]]) / qi), #XXX was `trace*tail(nuvin,1)`
"symmetricMatrix"),
"dsCMatrix")
} #<-- end `for g`
} #<-- end if no variance components besides residuals
if(conv[thetaR] != "F") nuvin[thetaR] <- crossprod(y, r) / nminffx
return(nuvin)
} #<-- end `em()`
################################################################################
################################################################################
#' @rdname reml
#' @export
ai <- function(nuvin, skel, thetaG,
modMats, W, sLc, sln, r,
thetaR = NULL, #<-- non-NULL if lambda==FALSE
sigma2e = NULL){ #<-- non-NULL if lambda==TRUE
lambda <- is.null(thetaR)
p <- length(nuvin)
nuin <- vech2matlist(nuvin, skel)
if(lambda){
Rinv <- as(solve(matrix(sigma2e)), "symmetricMatrix")
} else{
Rinv <- as(solve(nuin[[thetaR]]), "symmetricMatrix")
}
B <- matrix(0, nrow = modMats$ny, ncol = p)
# if no variance components (except residual), skip to the end
if(length(thetaG) > 0){
Ginv <- lapply(thetaG, FUN = function(x){as(solve(nuin[[x]]), "symmetricMatrix")})
si <- modMats$nb+1
for(g in thetaG){ #FIXME assumes thetaG is same length as nuvin
qi <- ncol(modMats$Zg[[g]])
ei <- si - 1 + qi
# Meyer 1997: eqn. 20 [also f(theta) in Johnson & Thompson 1995 eqn 9c)]
# Knight 2008: eqn 2.32-2.35 and 3.11-3.13 (p.34)
#TODO for covariances see Johnson & Thompson 1995 eqn 11b
Bg <- modMats$Zg[[g]] %*% sln[si:ei, , drop = FALSE] %*% Ginv[[g]] #FIXME is order correct? See difference in order between Johnson & Thompson (e.g., eqn. 11b) and Meyer 1997 eqn 20
B[cbind(Bg@i+1, g)] <- Bg@x
si <- ei+1
} #<-- end `for g`
} else g <- p-1 #<-- end if no varcomps TODO: `g` assignment temporary until >1 residual
#FIXME TODO Check what to do if more than 1 residual variance parameter
if(g < p){
if(lambda){
B[, p] <- (modMats$y %*% Rinv)@x
} else{
B[, p] <- (r %*% Rinv)@x
}
}
# Set up modified MME like the MMA of Meyer 1997 eqn. 23
## Substitute `B` instead of `y`
if(lambda){
BRHS <- Matrix(crossprod(W, B), sparse = TRUE)
tBRinvB <- crossprod(B)
} else{
# could pass KRinv and tWKRinv (if lambda=FALSE) from reml() into ai()
KRinv <- kronecker(Rinv, Diagonal(x = 1, n = modMats$Zr@Dim[[2L]]))
tWKRinv <- crossprod(W, KRinv)
BRHS <- Matrix(tWKRinv %*% B, sparse = TRUE)
tBRinvB <- crossprod(B, KRinv) %*% B
}
## tBPB
### Johnson & Thompson 1995 eqns 8,9b,9c
#### (accomplishes same thing as Meyer 1997 eqns 22-23 for a Cholesky
#### factorization as Boldman & Van Vleck eqn 6 applied to AI calculation)
## how to do this in c++ with Csparse
# tS <- matrix(NA, nrow = ncol(BRHS), ncol = nrow(BRHS))
# for(c in 1:p){
# slv1P <- solve(sLc, BRHS[, c], system = "P")
# slv1L <- solve(sLc, slv1P, system = "L")
# slv1Lt <- solve(sLc, slv1L, system = "Lt")
# tS[c, ] <- solve(sLc, slv1Lt, system = "Pt")@x
# }
# tBPB <- tBRinvB - (tS %*% BRHS)
tBPB <- tBRinvB - crossprod(solve(sLc, BRHS, system = "A"), BRHS)
AI <- 0.5 * tBPB
if(lambda) AI <- AI / sigma2e
as(AI, "matrix")
} #<-- end `ai()`
################################################################################
################################################################################
#' @rdname reml
#' @param nb The number of columns in X.
#' @param listGeninv A list of generalized inverse matrices.
#' @param pinv An integer vector of the matrix permutation.
#' @export
tugug_trace <- function(thetaG, nb, rfxlvls, listGeninv, Cinv, sln, pinv = NULL){
if(!is.null(pinv)){
# do Cinv_ii before reorder Cinv
Cinv_ii <- Cinv@x[attr(Cinv, "Zdiagp")][invPerm(pinv)]
P <- as(pinv, "pMatrix")
Cinv <- crossprod(P, Cinv) %*% P
} else Cinv_ii <- NULL
# `trCinvGeninv_gg` = trace[Cinv_gg %*% geninv_gg]
# `tugug` = t(u_gg) %*% geninv_gg %*% u_gg
## `g` is the gth component of the G-structure to model
## `geninv` is the generalized inverse
### (not the inverse of the G-matrix/(co)variance components)
trCinvGeninv_gg <- tugug <- as.list(rep(0, length(thetaG)))
si <- nb + 1
for(g in thetaG){
qi <- rfxlvls[g]
ei <- si - 1 + qi
#TODO XXX for using covariance matrices, see Johnson & Thompson 1995 eqn 11a
## Johnson & Thompson 1995 equations 9 & 10
if(inherits(listGeninv[[g]], "ddiMatrix")){
### No generalized inverse associated with the random effects
### Johnson & Thompson 1995 eqn 10a
#### 3rd term in the equation
tugug[[g]] <- crossprod(sln[si:ei, , drop = FALSE])
#### 2nd term in the equation
trCinvGeninv_gg[[g]] <- ifelse(is.null(Cinv_ii),
tr(Cinv[si:ei, si:ei]), sum(Cinv_ii[si:ei]))
} else{
### Generalized inverse associated with the random effects
### Johnson & Thompson 1995 eqn 9a
#### 3rd term in the equation
tugug[[g]] <- crossprod(sln[si:ei, , drop = FALSE],
listGeninv[[g]]) %*% sln[si:ei, , drop = FALSE]
#### 2nd term in the equation
# the trace of a product = the sum of the element-by-element product
## trace(geninv[[g]] %*% Cinv[si:ei, si:ei]
### = sum((geninv[[g]] * Cinv[si:ei, si:ei])@x)
trCinvGeninv_gg[[g]] <- sum((listGeninv[[g]] * Cinv[si:ei, si:ei])@x)
} #<-- end if else whether a diagonal g-inverse associated with rfx
si <- ei+1
} #<-- end `for g in thetaG`
return(list(tugug = tugug,
trace = trCinvGeninv_gg))
} #<-- end tugug_trace
################################################################################
#' @rdname reml
#' @export
gradFun <- function(nuvin, thetaG, rfxlvls, sln,
tugug, trace,
sigma2e = NULL, #<-- non-NULL if lambda==TRUE
r = NULL, nminfrfx = NULL){ #<-- non-NULL if lambda==FALSE
lambda <- is.null(r) #<-- lambda scale model
p <- length(nuvin)
dLdnu <- matrix(0, nrow = p, ncol = 1, dimnames = list(names(nuvin), NULL))
# tee = e'e
if(!lambda) tee <- crossprod(r)
# Skip most of below if there are no variance components other than residual
if(length(thetaG) > 0){
# First derivatives (gradient/score)
if(lambda){
for(g in thetaG){
# Johnson and Thompson 1995 Appendix 2 eqn B3 and eqn 9a and 10a
dLdnu[g] <- (rfxlvls[g] / nuvin[g]) - (1 / nuvin[g]^2) * (trace[[g]] + tugug[[g]] / sigma2e)
} #<-- end for g
} else{ #<-- else when NOT lambda scale
## Johnson and Thompson 1995 eqn 9b
#FIXME change `[p]` below to be number of residual (co)variances
dLdnu[p] <- (nminfrfx / tail(nuvin, 1)) - (tee / tail(nuvin, 1)^2)
for(g in thetaG){
dLdnu[p] <- dLdnu[p] + (1 / tail(nuvin, 1)) * (trace[[g]] /nuvin[g])
# Johnson and Thompson 1995 eqn 9a and 10a
dLdnu[g] <- (rfxlvls[g] / nuvin[g]) - (1 / nuvin[g]^2) * (trace[[g]] + tugug[[g]])
} #<-- end for g
}
} else{ #<-- end if varcomps besides residuals
# First derivatives (gradient/score) if only residual variance in model
## if lambda==TRUE then don't do this at all and `dLdnu` is empty
###FIXME when >1 residual (co)variance need to change whether skip with lambda=TRUE
if(!lambda) dLdnu[p] <- (nminfrfx / tail(nuvin, 1)) - (tee / tail(nuvin, 1)^2)
} #<-- end if/else no varcomps besides residual
# Johnson and Thompson 1995 don't use -0.5, because likelihood is -2 log likelihood
## see `-2` on left-hand side of Johnson & Thompson eqn 3
-0.5 * dLdnu
} #<-- end `gradFun()`
##########################
# Changing ginverse elements to `dsCMatrix` doesn't speedup traces, since
## they end up more or less as dense matrices but in dgCMatrix from the product
##########################
################################################################################
#' @rdname reml
#' @param grObj An list of class \code{grMod}.
#' @param lL A numeric value for REML log-likelihood value.
#' @param fd A character indicating whether forward, combined, or backward finite
#' differences (\dQuote{fdiff}, \dQuote{cdiff}, or \dQuote{bdiff}, respectively)
#' are to be calculated.
#' @export
gradFun_fd <- function(nuvin, grObj, lL, fd = c("fdiff", "cdiff", "bdiff")){
fd <- match.arg(fd)
h <- grObj$h
denomSC <- ifelse(fd == "cdiff", 2, 1)
nuv_tmp <- nuvin
skel <- attr(nuvin, "skel")
lambda <- grObj$lambda
if(lambda){
thetaR <- NULL
tWW <- grObj$tWW
RHS <- grObj$RHS
} else{
thetaR <- grObj$thetaR
tWW <- RHS <- NULL
}
thetaG <- grObj$thetaG
p <- grObj$p
con <- grObj$con
fxL <- fxU <- matrix(lL, nrow = grObj$p, ncol = 1,
dimnames = list(names(nuvin), NULL))
# Skip most of below if there are no variance components other than residual
if(length(thetaG) > 0){
# `g` is the gth component of the G-structure to model
for(g in thetaG){
if(con[g] != "F"){
if(fd != "bdiff"){ # if either FORWARD or CENTRAL difference method
nuv_tmp[g] <- nuvin[g] + h
lL_fd <- reml(vech2matlist(nuv_tmp, skel), skel, thetaG,
grObj$sLc, grObj$modMats, grObj$W, grObj$Bpinv,
grObj$nminffx, grObj$nminfrfx, grObj$rfxlvls,
grObj$rfxIncContrib2loglik,
thetaR, tWW, RHS)$loglik
fxL[g, 1] <- lL_fd # either forward or central diff.
}
if(fd != "fdiff"){ # if either BACKWARD or CENTRAL difference method
nuv_tmp[g] <- nuvin[g] - h
lL_fd <- reml(vech2matlist(nuv_tmp, skel), skel, thetaG,
grObj$sLc, grObj$modMats, grObj$W, grObj$Bpinv,
grObj$nminffx, grObj$nminfrfx, grObj$rfxlvls,
grObj$rfxIncContrib2loglik,
thetaR, tWW, RHS)$loglik
fxU[g, 1] <- lL_fd # either backward or central diff.
}
nuv_tmp[g] <- nuvin[g] #<-- reset
} #<-- end if g is not fixed
} #<-- end `for g in thetaG`
} #<-- end if there are any G-structure elements
# Residual (co)variances when not on Lambda scale
#FIXME change `[p]` below to be number of residual (co)variances
if(!lambda){
if(con[p] != "F"){
if(fd != "bdiff"){ # if either FORWARD or CENTRAL difference method
nuv_tmp[p] <- nuvin[p] + h
lL_fd <- reml(vech2matlist(nuv_tmp, skel), skel, thetaG,
grObj$sLc, grObj$modMats, grObj$W, grObj$Bpinv,
grObj$nminffx, grObj$nminfrfx, grObj$rfxlvls,
grObj$rfxIncContrib2loglik,
thetaR, tWW, RHS)$loglik
# residual is backward hence `fxL` (instead of `fxU`)
fxL[p, 1] <- lL_fd # either forward or central diff.
}
if(fd != "fdiff"){ # if either BACKWARD or CENTRAL difference method
nuv_tmp[p] <- nuvin[p] - h
lL_fd <- reml(vech2matlist(nuv_tmp, skel), skel, thetaG,
grObj$sLc, grObj$modMats, grObj$W, grObj$Bpinv,
grObj$nminffx, grObj$nminfrfx, grObj$rfxlvls,
grObj$rfxIncContrib2loglik,
thetaR, tWW, RHS)$loglik
# residual is backward hence `fxU` (instead of `fxL`)
fxU[p, 1] <- lL_fd # either backward or central diff.
}
nuv_tmp[p] <- nuvin[p] #<-- reset
} #<-- end if g is not fixed
} #<-- end if lambda=FALSE
# First derivatives (gradient/score)
## forward = [f(x+h) - f(x)] / h
## backward = [f(x) - f(x-h)] / h
## central = [f(x+h) - f(x-h)] / 2h
dLdnu <- (fxU - fxL) / (denomSC * h)
-1 * dLdnu #<-- since optimizing the negative log-likelihood
} #<-- end `gradFun_fd()`
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.