#' Calculate point estimates from a linear mixed effects (LME) model for new data
#'
#' This function allows the user to make out-of-sample predictions from an LME model.
#'
#' @param model Object of class `lme` containing the fitted LME model
#' @param newdata Data frame containing data for which to make predictions.
#' The response variable should be set to NA for the rows of the data the user wishes to make predictions for.
#' The columns in the data should have the same names as those used to fit the model.
#' The variables should also be of the same type as in the data used to fit the mixed model (numeric, factor etc).
#' @return List containing `preddata` and `random`. Data frame `preddata` is a version of `newdata` updated to contain columns corresponding to the fixed effects values (`fixed`),
#' random effects values (`random`), and fitted values (`fitted`).
#' Data frame `random` contains the values of random effects components for each individual.
#' @author This code was originally written by Ruth Keogh (London School of Hygiene and Tropical Medicine) which can be viewed at
#' github.com/ruthkeogh/landmark_CF. There have been further contributions from Jessica Barrett (MRC Biostatistics Unit, University of Cambridge),
#' David Stevens (University of Liverpool), and Mike Sweeting (University of Leicester).
#' @export
mixoutsamp <- function(model, newdata) {
n = dim(newdata)[1]
#----error message
if (!(inherits(model,"lme"))) {
stop("Error: model is not of type lme")
}
#----end of error message
#----error message
if (is.null(newdata) == T) {
stop("Error: no data was provided in newdata")
}
#----end of error message
#name of the id (group) variable
id.name = names(model$groups)
#name of the response variable
y.name = as.character(attr(model$terms, "variables")[[2]])
#name of variable which distinguished between different response variables (for multivariate situation)
if (is.null(model$modelStruct$varStruct) == 0) {
ytype.name = as.character(stats::formula(model$modelStruct$varStruct)[[2]][[3]])
} else{
ytype.name = NULL
}
re.names = attr(model$modelStruct$reStr[[1]], "Dimnames")[[1]]
#type of within-person correlation structure
if (is.null(attributes(model$modelStruct$corStruct)$class[1]) == 1) {
corr.struct.type = "indep"
} else {
corr.struct.type = attributes(model$modelStruct$corStruct)$class[1]
}
#----error message
data.names <-
unique(c(all.vars(model$terms), all.vars(
attr(model$modelStruct$reStruct$patid, "formula")
), all.vars(
attr(model$modelStruct$varStruct, "formula")
)))
#names of all variables used in any part of the model
# if(is.null(ytype.name)==1){
# data.names=c(id.name,
# as.character(attributes(model$terms)$variables)[-1],#response variable and variables with fixed effects
# attributes(getVarCov(model))$dimnames[[1]])#variables with random effects
# }else{
# data.names=c(id.name,
# as.character(attributes(model$terms)$variables)[-1],#response variable and variables with fixed effects
# attributes(getVarCov(model))$dimnames[[1]]#variables with random effects
# )}
#
# data.names=data.names[data.names!="(Intercept)"]
# data.names=unique(data.names)
#data.names=data.names[-which(sapply(1:length(data.names),function(v)grepl(":",data.names[v]))==T)]
#this omits interactions between variables already listed (this is necessary to avoid an error occurring in the error message where I check whether newdata contains all the variables in the model)
#check that newdata contains all variable names in data.names
if (sum(names(newdata) %in% data.names) != length(data.names)) {
stop("Error: newdata does not contain the variables used in the model")
}
#----end of error message
#----error message
for (v in 1:length(data.names)) {
if (sum(is.na(newdata[, v])) == n) {
stop(paste0(
"Error: the variable ''",
data.names[v],
"'' has no non-missing values"
))
}
}
#----end of error message
#-------------------------------------------------------------
#model parameters
coef.fixed = matrix(model$coef$fixed,
nrow = length(model$coef$fixed),
ncol = 1) #vector of fixed coefficients
G = as.matrix(nlme::getVarCov(model)) #random effect variance-covariance matrix
#residual variance: this is a single value if the variance is homoscedastic but a vector of values in cases of heteroscedasticity (e.g. when we have a multivariable outcome)
if (is.null(attributes(model$modelStruct$varStruct)$weights) == 1) {
resid.var = (model$sigma) ^ 2
} else if (is.null(attributes(model$modelStruct$varStruct)$weights) ==
0) {
#resid.var= matrix((model$sigma/unique(attributes(model$modelStruct$varStruct)$weights))^2,nrow=1,byrow=TRUE)
resid.var = (model$sigma * stats::coef(
nlme::varFunc(model$modelStruct$varStruct),
unconstrained = F,
allCoef = T
)) ^ 2
}
observed <- !logical(dim(newdata)[1])
for (v in 1:length(data.names)) {
if (data.names[v] == y.name)
next
observed = observed & (!is.na(newdata[, data.names[v]]))
}
#-------------------------------------------------------------
#the observed data (rows of newdata where the response y is observed)
newdata <- newdata[observed, ]
ids <-
unique(newdata[, id.name]) #vector of unique id numbers (aka the grouping variable)
is.y.obs = !is.na(newdata[, y.name]) #indicator of whether y is observed (1: no, 0: yes)
x.mat = stats::model.matrix(model, data = newdata[is.y.obs, ]) #fixed effects design matrix
z.mat = stats::model.matrix(stats::formula(model$modelStruct$reStr)[[1]], data =
newdata[is.y.obs, ]) #random effects design matrix
y = newdata[, y.name][is.y.obs] #the outcome
xb = x.mat %*% coef.fixed #fitted values for the fixed part of the model
if (is.null(model$modelStruct$varStruct) == 0) {
ytype = newdata[, ytype.name][is.y.obs] #response variable type (for multivariate situation)
}
obs.ids = newdata[, id.name][is.y.obs] #id numbers
#vector of times used in the correlation structure
if (corr.struct.type != "indep") {
vec.corr.times = stats::model.matrix(stats::as.formula(paste(
"~", stats::formula(model$modelStruct$corStruct)[[2]][[2]]
)), data = newdata[is.y.obs == 0, ])[, -1]
}
#-------------------------------------------------------------
#within-person correlation parameters
num.raneff = dim(z.mat)[2] #number of random effects
#parameters associated with the within-person correlation
if (corr.struct.type == "corExp") {
range = stats::coef(model$modelStruct$corStruct, unconstrained = F)
} else if (corr.struct.type == "corLin") {
range = stats::coef(model$modelStruct$corStruct, unconstrained = F)
} else if (corr.struct.type == "corGaus") {
range = stats::coef(model$modelStruct$corStruct, unconstrained = F)
} else if (corr.struct.type == "corRatio") {
range = stats::coef(model$modelStruct$corStruct, unconstrained = F)
} else if (corr.struct.type == "corSpher") {
range = stats::coef(model$modelStruct$corStruct, unconstrained = F)
} else if (corr.struct.type == "corCompSymm") {
Rho = stats::coef(model$modelStruct$corStruct, unconstrained = F)
} else if (corr.struct.type == "corAR1" |
corr.struct.type == "corCAR1") {
Phi = stats::coef(model$modelStruct$corStruct, unconstrained = F)
}
#-------------------------------------------------------------
#the data for which predictions are required - this is for all individuals in newdata
xstar.mat = stats::model.matrix(model,
stats::model.frame( ~ ., newdata, na.action = stats::na.pass)) #fixed effects design matrix
zstar.mat = stats::model.matrix(stats::formula(model$modelStruct$reStr)[[1]], data =
newdata) #random effects design matrix
xbstar <-
xstar.mat %*% coef.fixed #fitted values for the fixed part of the model
pred.ids = newdata[, id.name] #id numbers
ids = unique(pred.ids) #unique id numbers
#-------------------------------------------------------------
#loop over individuals to give random effect part of the fitted values and individual random effects
reffects = matrix(nrow = length(pred.ids), ncol = 1)
reffects.individual = matrix(nrow = length(ids), ncol = num.raneff)
vec.zeros = as.vector(rep(0, num.raneff))
split.obs.ids <-
split(1:length(obs.ids), newdata[is.y.obs, id.name]) #list of ids with rows that contain observed values
split.pred.ids <-
split(1:length(pred.ids), newdata[, id.name]) #list of ids with all values
# pb<-txtProgressBar(0,length(split.obs.ids),style=3)
for (i in 1:length(split.obs.ids)) {
# setTxtProgressBar(pb,i)
oids.list <- split.obs.ids[[i]]
pids.list <- split.pred.ids[[i]]
if (corr.struct.type != "indep") {
t.rep = matrix(
rep(vec.corr.times[obs.ids %in% ids[i]], length(vec.corr.times[obs.ids %in%
ids[i]])),
nrow = length(vec.corr.times[obs.ids %in% ids[i]]),
ncol = length(vec.corr.times[obs.ids %in% ids[i]])
)
distances = abs(t.rep - t(t.rep))
distances.2 = t.rep - t(t.rep)
}
if (corr.struct.type == "indep") {
corr.mat = diag(1, length(oids.list))
} else if (corr.struct.type == "corExp") {
corr.mat = exp(-distances / range)
} else if (corr.struct.type == "corLin") {
corr.mat = ifelse(distances < range, 1 - distances / range, 0)
} else if (corr.struct.type == "corGaus") {
corr.mat = exp(-(distances / range) ^ 2)
} else if (corr.struct.type == "corRatio") {
corr.mat = 1 / (1 + (distances / range) ^ 2)
} else if (corr.struct.type == "corSpher") {
corr.mat = ifelse(distances < range,
1 - 1.5 * (distances / range) + 0.5 * (distances / range) ^ 3,
0)
} else if (corr.struct.type == "corCompSymm") {
corr.mat = matrix(Rho, nrow = nrow(t.rep), ncol = ncol(t.rep))
} else if (corr.struct.type == "corAR1" |
corr.struct.type == "corCAR1") {
corr.mat = Phi ^ distances
} #else if(corr.struct.type=="corARMA" & p==1 & q==0){
# corr.mat=Phi^distances
# }
if (is.null(model$modelStruct$varStruct) == 1) {
resid.var.mat = matrix(resid.var,
nrow = dim(corr.mat)[1],
ncol = dim(corr.mat)[1])
} else if (is.null(model$modelStruct$varStruct) == 0) {
resid.var.mat <-
as.matrix(diag(
resid.var[as.character(ytype[oids.list])],
nrow = dim(corr.mat)[1],
ncol = dim(corr.mat)[1]
))
}
z.mat.oids <- z.mat[oids.list, , drop = FALSE]
t.z.mat.oids <- t(z.mat.oids)
reffects.individual[i, ] <- (G %*% t.z.mat.oids) %*%
solve((z.mat.oids %*% G %*% t.z.mat.oids + resid.var.mat * corr.mat)) %*%
(y[oids.list] - xb[oids.list])
reffects[pids.list] <-
zstar.mat[pids.list, ] %*% t(reffects.individual[i, , drop = FALSE])
#the use of drop=FALSE in z.mat on the first and second lines handles the annoying fact that when we have a matrix with one row, R automatically converts it into a vector and we get non-conformability issues.
}
#-------------------------------------------------------------
#calculate fitted values
fitted = xbstar + reffects
#--------------
#things to output
preddata = cbind(newdata, xbstar, reffects, fitted)
names(preddata) = c(names(newdata), "fixed", "random", "fitted")
random = data.frame(names(split.obs.ids), reffects.individual)
names(random) = c(id.name, paste0("reff", re.names))
list(preddata = preddata, random = random)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.