#Function: ghap.lmm
#License: GPLv3 or later
#Modification date: 20 Oct 2022
#Written by: Yuri Tani Utsunomiya
#Contact: ytutsunomiya@gmail.com
#Description: mixed model fitting
ghap.lmm <- function(
formula,
data,
covmat = NULL,
weights = NULL,
vcp.initial = NULL,
vcp.estimate = TRUE,
vcp.conv = 1e-12,
errors = TRUE,
invcov = FALSE,
em.reml = 10,
tol = 1e-12,
extras = NULL,
verbose = TRUE
){
# Get formula information -------------------------------------------------
allterms <- unlist(strsplit(x = as.character(formula)[-1], split = "\\+"))
response <- gsub(pattern = "\\s+", replacement = "", x = allterms[1])
ranterms <- grep(pattern = "\\(", x = allterms)
fixterms <- gsub(pattern = "\\s+", replacement = "", allterms[-c(1,ranterms)])
ranterms <- gsub(pattern = "\\s+|\\(|\\)", replacement = "", x = allterms[ranterms])
if (verbose == TRUE) {
cat("\nAssembling design matrices... ")
}
# Sanity check for random effects -----------------------------------------
for(i in 1:length(ranterms)){
ranname <- unlist(strsplit(x = ranterms[i], split = "\\|"))
if(ranname[1] != "1"){
stop("This function does not support random regression yet.\n")
}else{
ranterms[i] <- gsub(pattern = "^.+\\|", replacement = "", x = ranterms[i])
}
}
# Assemble fixed effects -------------------------------------------------
form <- paste0(response, "~", paste(fixterms, collapse = "+"))
X <- sparse.model.matrix(as.formula(form), data=data)
fixnames <- colnames(X)
y <- data[,response]
n <- length(y)
s <- ncol(X)
rankX <- as.numeric(rankMatrix(X, method = "qr"))
# Sanity check for covariances --------------------------------------------
if(is.null(covmat) == FALSE){
for(i in names(covmat)){
if(i %in% ranterms == FALSE){
stop("Random effect ", i, " was not specified in the formula.")
}else if(is.null(covmat[[i]]) == TRUE){
lvl <- unique(data[,i])
data[,i] <- factor(data[,i], levels = sort(lvl))
covmat[[i]] <- Diagonal(nlevels(data[,i]))
}else{
if(identical(colnames(covmat[[i]]),rownames(covmat[[i]])) != TRUE){
stop("Factors do not match between columns and rows in the ", i," covariance matrix!")
}
if(any(data[,i] %in% colnames(covmat[[i]]) == F)){
stop("There are factors in ", i, " without covariance")
}
if(length(which(is.na(colnames(covmat[[i]])) == T)) != 0){
stop("NAs as factors in the ", i," convariance matrix")
}
cvr.dup <- table(colnames(covmat[[i]]))
cvr.dup <- length(which(cvr.dup > 1))
if(cvr.dup != 0){
stop("Duplicated factors declared in the ", i," covariance matrix!")
}
rm(cvr.dup)
data[,i] <- factor(data[,i], levels = colnames(covmat[[i]]))
}
}
}else{
covmat <- vector(mode = "list", length = length(ranterms))
names(covmat) <- ranterms
for(i in names(covmat)){
lvl <- unique(data[,i])
data[,i] <- factor(data[,i], levels = sort(lvl))
covmat[[i]] <- Diagonal(nlevels(data[,i]))
}
}
# Assemble random effects -------------------------------------------------
q <- NULL
Z <- vector(mode = "list", length = length(ranterms))
rannames <- NULL
for(i in 1:length(ranterms)){
form <- paste0("~ 0 + ", ranterms[i])
Z[[i]] <- sparse.model.matrix(as.formula(form), data=data, drop.unused.levels = FALSE)
rannames <- c(rannames, colnames(Z[[i]]))
colnames(Z[[i]]) <- gsub(pattern = ranterms[i], replacement = "", x = colnames(Z[[i]]))
q <- c(q,ncol(Z[[i]]))
}
names(Z) <- ranterms
names(q) <- ranterms
vcp.n <- length(ranterms)+1
# Sanity check for variance components ------------------------------------
if(is.null(vcp.initial) == FALSE){
if(length(vcp.initial) != vcp.n){
emsg <- paste0("Number of initial values for variance components (",
length(vcp.initial),
") does not match number of random effects (",
vcp.n,")\n")
stop(emsg)
}
if(identical(sort(names(vcp.initial)), sort(c(ranterms,"Residual"))) == FALSE){
emsg <- paste0("Labels of initial values for variance components (",
paste(names(vcp.initial), collapse=","),
") do not match labels of random effects (",
paste(c(names(ranterms),"Residual"), collapse=","), ")\n")
stop(emsg)
}
vcp.initial <- vcp.initial[c(ranterms,"Residual")]
}
# Check weights -----------------------------------------------------------
if(is.null(weights) == FALSE){
w <- sqrt(weights/mean(weights))
y <- w*y
X <- Diagonal(x = w)%*%X
for(i in 1:length(Z)){
Z[[i]] <- Diagonal(x = w)%*%Z[[i]]
}
}
# Pre-assemble mixed model components -------------------------------------
if(is.null(vcp.initial) == TRUE){
vcp.new <- rep(var(y)/vcp.n, times = vcp.n)
}else{
vcp.new <- unlist(vcp.initial)
}
names(vcp.new) <- c(ranterms, "Residual")
W <- X
XtX <- crossprod(X)
RHS <- crossprod(X, y)
ZtX <- vector(mode = "list", length = vcp.n-1)
ZtZ <- vector(mode = "list", length = vcp.n-1)
names(ZtX) <- ranterms
names(ZtZ) <- ranterms
for(i in ranterms){
RHS <- rbind(RHS, crossprod(Z[[i]], y))
ZtX[[i]] <- crossprod(Z[[i]], X)
ZtZ[[i]] <- vector(mode = "list", length = length(ranterms))
names(ZtZ[[i]]) <- ranterms
for(j in ranterms){
ZtZ[[i]][[j]] <- crossprod(Z[[i]],Z[[j]])
}
W <- cbind(W, Z[[i]])
}
# Initialize auxiliary functions ------------------------------------------
sparsesolve <- function(X){
Xchol <- cholPermute(X)
X <- Takahashi_Davis(X, cholQp = Xchol$Qpermchol, P = Xchol$P)
return(X)
}
pcgsolve <- function(A,b,criterion=1e-12){
m = 0
x <- rep(0, times=length(b))
r <- b
Minv <- (1/diag(A))
z <- Minv*r
p <- z
rss.new <- sum(r*z)
ss <- sum(b^2)
conv <- 100
while(conv > criterion){
m <- m + 1
g <- A%*%p
alpha <- as.numeric(rss.new/sum(p*g))
x <- x + alpha*p
if(m %% 50 == 0){
r <- b - A%*%x
}else{
r <- r - alpha*g
}
z <- Minv*r
rss.old <- rss.new
rss.new <- sum(r*z)
beta <- rss.new/rss.old
p <- z + beta*p
conv <- rss.new/ss
}
return(as.numeric(x))
}
# Log message -------------------------------------------------------------
if (verbose == TRUE) {
cat("Done.\n")
cat(n, " records will be fitted to\n ", s, " fixed effects\n ",
sum(q), " random effects\n", sep="")
}
# Check if covariance matrices should be inverted -------------------------
if(invcov == FALSE){
if(verbose == TRUE){
cat("Inverting covariance matrices...")
}
oricovmat <- covmat
for(i in names(covmat)){
icov <- try(solve(as.matrix(covmat[[i]])), silent = TRUE)
if(inherits(icov, "try-error")){
icov <- try(solve(as.matrix(covmat[[i]]) + Diagonal(nrow(covmat[[i]]))*tol), silent = TRUE)
if(inherits(icov, "try-error")){
emsg <- paste0("\nUnable to invert covariance matrix for effect ",
ranterms[[i]], " even after adding a tolerance of ", tol)
stop(emsg)
}
}
covmat[[i]] <- icov
}
if(verbose == TRUE){
cat(" Done.\n")
}
}
# Variance components estimation ------------------------------------------
if(vcp.estimate == TRUE){
convall <- 100
k <- 0
k.reml <- 0
if(verbose == TRUE){
cat("\nVariance components:\n\n",
"--------------------------------------------\n", sep="")
}
while(convall >= vcp.conv){
# Increment iteration
k <- k + 1
k.reml <- k.reml + 1
# Include variance components in mixed model equations
LHS <- XtX
for(i in ranterms){
LHS <- cbind(LHS, t(ZtX[[i]]))
}
for(i in ranterms){
tmp <- ZtX[[i]]
for(j in ranterms){
if(j == i){
tmp <- cbind(tmp, ZtZ[[i]][[i]] + as(covmat[[i]]*(vcp.new["Residual"]/vcp.new[i]),"dgCMatrix"))
}else{
tmp <- cbind(tmp,ZtZ[[i]][[j]])
}
}
LHS <- rbind(LHS, tmp)
rm(tmp)
}
LHS <- LHS/vcp.new["Residual"]
RHS <- RHS/vcp.new["Residual"]
# Solve mixed model equations
coef <- pcgsolve(LHS,RHS)
# Update fitted values
fit <- as.numeric(W%*%coef)
e <- y - fit
# Get sparse solution of left hand side
LHSi <- try(sparsesolve(LHS), silent = TRUE)
if(inherits(LHSi, "try-error")){
LHSi <- try(sparsesolve(LHS + Diagonal(nrow(LHS))*tol), silent = TRUE)
if(inherits(LHSi, "try-error")){
emsg <- paste0("\nUnable to solve LHS after adding a tolerance of ",
tol)
stop(emsg)
}
}
# Get REML updates of variance components
seconddev <- NULL
vcp.old <- vcp.new
if(k.reml <= em.reml){
itermethod <- "EM-REML"
vcp.new["Residual"] <- sum(y*e)/(n-rankX)
for(i in 1:length(ranterms)){
if(i == 1){
idx <- (s+1):(s+q[i])
}else{
idx <- which(1:length(q) < i)
idx <- (s+sum(q[idx])+1):(s+sum(q[1:i]))
}
su <- as.numeric(t(coef[idx])%*%covmat[[ranterms[i]]]%*%coef[idx])
tr <- sum(diag(covmat[[ranterms[i]]]%*%LHSi[idx,idx]))
vcp.new[i] <- as.numeric(su + tr)/q[i]
}
convall <- 100
}else{
itermethod <- "AI-REML"
Rinv <- Diagonal(length(y))/vcp.new["Residual"]
Py <- Rinv%*%e
firstdev <- NULL
Q <- NULL
D <- NULL
for(i in 1:length(ranterms)){
if(i == 1){
idx <- (s+1):(s+q[i])
}else{
idx <- which(1:length(q) < i)
idx <- (s+sum(q[idx])+1):(s+sum(q[1:i]))
}
su <- as.numeric(t(coef[idx])%*%covmat[[ranterms[i]]]%*%coef[idx])
tr <- sum(diag(covmat[[ranterms[i]]]%*%LHSi[idx,idx]))
firstdev <- c(firstdev, (q[i] - (su + tr)/vcp.new[i])/vcp.new[i])
Q <- cbind(Q, Z[[i]]%*%as.numeric((coef[idx]/vcp.new[i])))
RHSf <- crossprod(X, Q[,i])
for(j in 1:length(ranterms)){
RHSf <- rbind(RHSf, crossprod(Z[[ranterms[j]]], Q[,i]))
}
RHSf <- RHSf/vcp.new["Residual"]
D <- cbind(D, pcgsolve(LHS, RHSf))
}
Q <- cbind(Q, Py)
RHSf <- crossprod(X,Py)
for(i in ranterms){
RHSf <- rbind(RHSf, crossprod(Z[[i]], Py))
}
RHSf <- RHSf/vcp.new["Residual"]
D <- cbind(D, pcgsolve(LHS, RHSf))
trP <- sum(diag(Rinv)) - sum(diag(LHSi%*%crossprod(Rinv%*%W)))
firstdev <- c(firstdev, as.numeric(trP - crossprod(Py)))
RinvQ <- Rinv%*%Q
WtRinvQ <- crossprod(W, RinvQ)
seconddev <- solve(crossprod(Q, RinvQ) - crossprod(D,WtRinvQ))
if(isSymmetric(seconddev) == FALSE){
seconddevt <- t(seconddev)
seconddev[upper.tri(seconddev)] <- seconddevt[upper.tri(seconddevt)]
}
delta <- as.numeric(seconddev%*%firstdev)
vcp.new <- vcp.old - delta
if(any(vcp.new < 0)){
k.reml <- 0
vcp.new <- vcp.old
}
convall <- sum((vcp.new - vcp.old)^2)/(sum(vcp.new)^2)
}
# Remove residual variance from mixed model equations
LHS <- LHS*vcp.old["Residual"]
RHS <- RHS*vcp.old["Residual"]
# Log message for variance components update
if(verbose == TRUE){
if(itermethod == "EM-REML"){
cat(
"Variance components iteration: ", k, "\n",
"Method: ", itermethod, "\n ",
paste(paste(c(ranterms,"Residual"), "=", sprintf("%.12f", vcp.new)), collapse = "\n "),
"\n Convergence = ---",
"\n--------------------------------------------\n", sep="")
}else{
cat(
"Variance components iteration: ", k, "\n",
"Method: ", itermethod, "\n ",
paste(paste(c(ranterms,"Residual"), "=", sprintf("%.12f", vcp.new)), collapse = "\n "),
"\n Convergence = ", convall,
"\n--------------------------------------------\n", sep="")
}
}
}
if(verbose == TRUE & vcp.estimate == TRUE){
cat("\n\nVariance components converged.\n")
}
}else{
if(verbose == TRUE){
cat("Variance components assumed known\n ",
paste(paste(c(ranterms,"Residual"), "=", sprintf("%.12f", vcp.new)), collapse = "\n "), "\n", sep="")
}
seconddev <- NULL
}
# Get solutions -----------------------------------------------------------
LHS <- XtX
for(i in ranterms){
LHS <- cbind(LHS, t(ZtX[[i]]))
}
for(i in ranterms){
tmp <- ZtX[[i]]
for(j in ranterms){
if(j == i){
tmp <- cbind(tmp, ZtZ[[i]][[i]] + as(covmat[[i]]*(vcp.new["Residual"]/vcp.new[i]),"dgCMatrix"))
}else{
tmp <- cbind(tmp,ZtZ[[i]][[j]])
}
}
LHS <- rbind(LHS, tmp)
rm(tmp)
}
LHS <- LHS/vcp.new["Residual"]
RHS <- RHS/vcp.new["Residual"]
coef <- pcgsolve(LHS,RHS)
# Check extras ---------------------------------------------------------------
# Full LHSi
if("LHSi" %in% extras){
if(verbose == TRUE){
cat("Computing full inverse of coefficient matrix... ")
}
LHSi <- try(solve(LHS), silent = TRUE)
if(inherits(LHSi, "try-error")) {
LHSi <- solve(LHS + Diagonal(nrow(LHS))*tol)
}
colnames(LHSi) <- c(fixnames,rannames)
rownames(LHSi) <- colnames(LHSi)
if(verbose == TRUE){
cat("Done.\n")
}
}
# Phenotypic variance-covariance matrix
if("V" %in% extras){
if(verbose == TRUE){
cat("Computing phenotypic (co)variance matrix... ")
}
if(is.null(weights) == TRUE){
w <- rep(1, times = n)
}
V <- Diagonal(n)
diag(V) <- w*vcp.new["Residual"]
for(i in ranterms){
if(invcov == TRUE){
V <- V + tcrossprod(Z[[i]]%*%solve(covmat[[i]])*vcp.new[i],Z[[i]])
}else{
V <- V + tcrossprod(Z[[i]]%*%oricovmat[[i]]*vcp.new[i],Z[[i]])
}
}
if(verbose == TRUE){
cat("Done.\n")
}
}
# Calculate standard errors --------------------------------------------------
if(errors == TRUE){
if(verbose == TRUE){
cat("Computing standard errors... ")
}
if("LHSi" %in% extras == FALSE){
LHSi <- try(sparsesolve(LHS), silent = TRUE)
if(inherits(LHSi, "try-error")) {
LHSi <- sparsesolve(LHS + Diagonal(nrow(LHS))*tol)
}
}
stde <- sqrt(diag(LHSi))
if(is.null(seconddev) == FALSE){
vcp.stde <- sqrt(diag(seconddev))
}
if(verbose == TRUE){
cat("Done.")
}
}
# Return results ----------------------------------------------------------
results <- NULL
tmp <- as.data.frame(matrix(data = NA, nrow = s, ncol = 4))
colnames(tmp) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
rownames(tmp) <- colnames(X)
tmp[,1] <- coef[1:s]
if(errors == TRUE){
tmp[,2] <- stde[1:s]
tmp[,3] <- tmp[,1]/tmp[,2]
a <- pt(q = -abs(tmp[,3]), df = n - s, lower.tail = T, log.p = TRUE)
b <- pt(q = abs(tmp[,3]), df = n - s, lower.tail = F, log.p = TRUE)
tmp[,4] <- 10^((a + log(1 + exp(b - a)))/log(10))
}
results$fixed <- tmp
results$random <- vector(mode = "list", length = length(ranterms))
names(results$random) <- ranterms
for(i in 1:length(ranterms)){
if(i == 1){
idx <- (s+1):(s+q[i])
}else{
idx <- which(1:length(q) < i)
idx <- (s+sum(q[idx])+1):(s+sum(q[1:i]))
}
tmp <- as.data.frame(matrix(data = NA, nrow = length(idx), ncol = 3))
colnames(tmp) <- c("Estimate", "Std. Error", "Accuracy")
rownames(tmp) <- colnames(Z[[ranterms[i]]])
tmp[,1] <- coef[idx]
if(errors == TRUE){
tmp[,2] <- stde[idx]
tmp[,3] <- 1 - (stde[idx]^2)/vcp.new[i]
tmp[which(tmp[,3] <= 0),3] <- 1e-6
tmp[,3] <- sqrt(tmp[,3])
}
results$random[[ranterms[i]]] <- tmp
}
if(is.null(weights) == TRUE){
w <- rep(1, times = length(y))
}
tmp <- as.data.frame(matrix(data = NA, nrow = length(y), ncol = 3))
colnames(tmp) <- c("All", "Fixed", "Random")
tmp[,1] <- as.numeric(W%*%coef)/w
if(s == 1){
tmp[,2] <- as.numeric(W[,1]*coef[1])/w
}else{
tmp[,2] <- as.numeric(W[,1:s]%*%coef[1:s])/w
}
tmp[,3] <- as.numeric(W[,-c(1:s)]%*%coef[-c(1:s)])/w
results$fitted <- tmp
tmp <- as.data.frame(matrix(data = NA, nrow = length(y), ncol = 3))
colnames(tmp) <- c("All", "Fixed", "Random")
tmp[,1] <- (y/w) - results$fitted[,1]
tmp[,2] <- (y/w) - results$fitted[,2]
tmp[,3] <- (y/w) - results$fitted[,3]
results$residuals <- tmp
tmp <- as.data.frame(matrix(data = NA, nrow = vcp.n, ncol = 2))
colnames(tmp) <- c("Estimate", "Std. Error")
rownames(tmp) <- names(vcp.new)
tmp[,1] <- vcp.new
if(is.null(seconddev) == FALSE & errors == TRUE){
tmp[,2] <- vcp.stde
results$vcp <- tmp
results$AI <- seconddev
colnames(results$AI) <- names(vcp.new)
rownames(results$AI) <- names(vcp.new)
}else{
results$vcp <- tmp
}
results$extras <- NULL
if("LHSi" %in% extras){
results$extras$LHSi <- LHSi
}
if("V" %in% extras){
results$extras$V <- V
}
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.