Nothing
# Daniel J. Schad, Maximilian Rabe, Reinhold Kliegl, Feb. 22, 2020
#' @importFrom stats as.formula model.matrix runif terms
#' @importFrom lme4 fixef VarCorr
#' @importFrom MASS mvrnorm
NULL
#' Simulate data from a linear mixed-effects model
#'
#' This function simulates artificial data from a linear mixed-effects model. Parameters can either be set by hand using the parameters listed below *or* using a fitted \code{lmerMod} object.
#'
#' @param formula A formula as used in a call to the \code{lmer} function: a one-sided linear formula object describing both the fixed-effects and random-effects part of the model, with no response variable to the left of the \code{~} operator.
#' @param data a data frame containing the variables named in \code{formula}. Optional if \code{LMM} is provided.
#' @param Fixef a vector of all fixed-effect model parameters.
#' @param VC_sd standard deviations of the variance components for the random effects. This is a list of vectors, where different list entries reflect different grouping structures, and each vector contains standard deviations of variance components (random intercepts and random slopes) for one grouping factor. The last list entry is the standard deviation of the residual noise term (for \code{gaussian} or \code{lognormal} families only).
#' @param CP correlation parameters of the random effects. If \code{CP} is a single number, then all CP are set to this same value. If \code{CP} is a vector of length equal to the number of grouping factor, then each vector entry specifies one fixed value for all CP associated with this grouping factor. Otherwise, \code{CP} can be a list of correlation matrices, which specifies a full correlation matrix for each grouping structure.
#' @param LMM if a \code{lmerMod} object containing a fitted \code{lmer} model is provided, then \code{simLMM} uses the estimated model parameters for data simulation. Note: If \code{LMM} is provided, \code{formula}, \code{Fixef}, \code{VC_sd}, \code{CP}, and \code{family} are taken from the provided fitted model, the arguments are ignored in the model call, and a warning is given.
#' @param empirical logical. If true, \code{Fixef} specify the empirical not population fixed effects parameters. \code{empirical=TRUE} does not work for residual Bernoulli noise, and not if continuous covariates are used.
#' @param verbose logical. If \code{TRUE} (default), then information about the used model parameters is provided. If \code{FALSE}, then no output is generated.
#' @param family string specifying the response distribution: \code{"gaussian"} (default) assumes a normal distribution, \code{binomial} specifies a Bernoulli distribution with a logit link function, \code{"lognormal"} specifies a log-normal distribution; with \code{"lp"}, only the linear predictor is generated with no residual noise.
#'
#' @examples
#'
#' design <-
#' fixed.factor("X", levels=c("X1", "X2")) +
#' random.factor("Subj", instances=30)
#' dat <- design.codes(design)
#' contrasts(dat$X) <- c(-1, +1)
#'
#' dat$ysim <- simLMM(formula = ~ 1 + X + (1 + X | Subj),
#' data = dat,
#' Fixef = c(200, 10),
#' VC_sd = list(c(30,10), 50),
#' CP = 0.3,
#' empirical = TRUE)
#'
#' dat$Xn <- ifelse(dat$X=="X1",-1,1)
#' # lme4::lmer(ysim ~ Xn + (Xn || Subj), data=dat, control=lmerControl(calc.derivs=FALSE))
#'
#' @export
#==============================================================================
simLMM <- function(formula, data=NULL, Fixef, VC_sd, CP=0, LMM=NULL, empirical=FALSE, verbose=TRUE, family="gaussian") {
#------------------------------------------------------------------------
# simulate data from a linear mixed-effects model
#------------------------------------------------------------------------
if(family == "lognormal") {
cl <- sys.call()
cl$family <- "gaussian"
return(exp(eval(cl))) # rerun simLMM and exp-transform response variable
}
if (!is.null(LMM)) {
if(!missing(formula)||!missing(family)||!missing(Fixef)||!missing(VC_sd)||!missing(CP)) warning('You can either provide the values for the parameters as formula, Fixef, VC_sd, CP, and family, *or* a fitted model as LMM (see ?simLMM). To avoid this message, please decide for one specification. Otherwise, if you provide LMM, other parameters are ignored.')
if (!class(LMM)%in%c("lmerModLmerTest","lmerMod")) stop('LMM must be a model object from a fit with the lmer() function.')
EF <- extractLMM(LMM)
Fixef_ <- EF[[1]]
Ranef <- EF[[2]]
if (is.null(data)) data <- EF[[3]]
} else {
form <- formula
RE_sd <- VC_sd
RE_cor <- CP
detect_errors1(form, data, RE_sd, RE_cor, family)
# extract terms from formula
Terms <- extractTerms(form, data)
# collect fixed effects terms (+ fixed-effects estimates)
Fixef_ <- list(slopes=Terms[[1]], b=Fixef)
# obtain terms in random effects
Ranef <- list()
for (i in 1:length(Terms[[2]])) # i <- 1
Ranef[[i]] <- list(slopes=Terms[[2]][[i]])
# expand random effects correlations
if (!is.list(RE_cor)) {
RE_cor_list <- list()
for (i in 1:length(Ranef)) { # i <- 1
mm_ranef <- model.matrix(as.formula(paste0("~ ",paste0(Terms[[2]][[i]],collapse=" + "))), data=data )
nre <- dim(mm_ranef)[2]
#if (Terms[[2]][[i]][1]=="1") nre <- nre - 1
RE_cor_list[[i]] <- diag(rep(1,nre))
if (length(RE_cor)==length(Ranef))
RE_cor_list[[i]][RE_cor_list[[i]]==0] <- RE_cor[i]
if (length(RE_cor)==1)
RE_cor_list[[i]][RE_cor_list[[i]]==0] <- RE_cor
}
RE_cor <- RE_cor_list
}
# Store by, RE sd, and RE correlations
nvc <- length(Terms[[2]])
for (i in 1:nvc) { # i <- 1
Ranef[[i]]$by <- Terms[[3]][i]
Ranef[[i]]$sd <- RE_sd[[i]]
Ranef[[i]]$cor <- RE_cor[[i]]
}
if (family=="gaussian") Ranef[[nvc+1]] <- RE_sd[[nvc+1]]
}
detect_errors2(data, Fixef_, Ranef, family, empirical)
if (empirical==FALSE) resp <- simLMM_empF(data, Fixef_, Ranef, family)
if (empirical==TRUE ) resp <- simLMM_empT(data, Fixef_, Ranef, family)
if (verbose ==TRUE ) simLMM_verbose(data, Fixef_, Ranef, empirical, family)
return(resp)
}
#==============================================================================
detect_errors1 <- function(form, data, RE_sd, RE_cor, family) {
termlab <- attr(terms(as.formula(form)),"term.labels")
interc <- attr(terms(as.formula(form)),"intercept")
# determine random effects
idx <- c(); for (i in 1:length(termlab)) # i <- 1
idx[i] <- any(strsplit(termlab[i],"")[[1]] == "|")
# collect fixed effects terms
Fixterm <- termlab[!idx]
if (interc==1) Fixterm <- c("1", Fixterm)
if (interc==0) Fixterm <- c("0", Fixterm)
# obtain terms in random effects
ranterms <- which(idx)
if (is.list(RE_cor) & length(RE_cor)!=length(ranterms)) stop('Random effects correlations do not fit formula.')
if (!is.list(RE_cor) & !length(RE_cor)%in%c(1,length(ranterms))) stop('Random effects correlations do not fit formula.')
if (!is.list(RE_sd)) stop('RE_sd must be specified as a list.')
if (family=="gaussian") if (length(RE_sd)!=(length(ranterms)+1)) stop('Length of RE_sd does not correspond to formula.')
if (family!="gaussian") if (length(RE_sd)!= length(ranterms) ) stop('Length of RE_sd does not correspond to formula.')
#------ expand formula ------
for (i in 1:length(ranterms)) { # i <- 1
formRE <- termlab[ranterms[i]]
tmp <- which(strsplit(formRE,"")[[1]] == "|")
formRE1 <- substr(formRE, 1, tmp[1]-1)
REterms <- attr(terms(as.formula(paste0("~",formRE1))),"term.labels")
if (length(REterms)!=0) {
idx3 <- 1:length(REterms)
if (length(grep(":",REterms))>0) idx3 <- idx3[-grep(":",REterms)]
if (any(REterms=="0")) idx3 <- idx3[-which(REterms=="0")]
if (any(REterms=="1")) idx3 <- idx3[-which(REterms=="1")]
REterms1 <- REterms[idx3]
if (!all(REterms1 %in% names(data))) stop('Random effects predictor missing in data.')
}
}
}
#==============================================================================
detect_errors2 <- function(data, Fixef_, Ranef, family, empirical) {
data <- data.frame(data)
# family argument
if (!any(family%in%c("gaussian","binomial","lp"))) stop('The family argument must be one of "gaussian", "binomial", or "lp" (linear predictor).')
if( empirical==TRUE & family=="binomial" ) warning('The family argument only partially works with empirical=TRUE.')
# check whether variables exist in data frame
# Fixed effects
idx3 <- 1:length(Fixef_$slopes)
if (length(grep(":",Fixef_$slopes))>0) idx3 <- idx3[-grep(":",Fixef_$slopes)]
if (any(Fixef_$slopes=="0")) idx3 <- idx3[-which(Fixef_$slopes=="0")]
if (any(Fixef_$slopes=="1")) idx3 <- idx3[-which(Fixef_$slopes=="1")]
fixVars <- Fixef_$slopes[idx3]
if (!all(fixVars %in% names(data))) stop('Fixed effects predictor missing in data.')
# Random effects
if (family=="gaussian") nRanef <- length(Ranef)-1
if (family!="gaussian") nRanef <- length(Ranef)
for (i in 1:nRanef) { # i <- 1
if (!Ranef[[i]]$by %in% names(data)) stop('Grouping variable missing in data.')
idx3 <- 1:length(Ranef[[i]]$slopes)
if (length(grep(":",Ranef[[i]]$slopes))>0) idx3 <- idx3[-grep(":",Ranef[[i]]$slopes)]
if (any(Ranef[[i]]$slopes=="0")) idx3 <- idx3[-which(Ranef[[i]]$slopes=="0")]
if (any(Ranef[[i]]$slopes=="1")) idx3 <- idx3[-which(Ranef[[i]]$slopes=="1")]
ranVars <- Ranef[[i]]$slopes[idx3]
if (!all(ranVars %in% names(data))) stop('Random effects predictor missing in data.')
}
# check whether number of coefficients is correct
# Fixed effects
mm_fixef <- model.matrix(as.formula(paste0("~ ", paste0(Fixef_$slopes,collapse=" + "))), data=data)
if (ncol(mm_fixef)!=length(Fixef_$b)) stop('The number of fixed effects coefficients does not match the formula.')
# Random effects
if (family!="gaussian") ranef <- Ranef
if (family=="gaussian") {
ranef <- list()
for (i in 1:(length(Ranef)-1)) ranef[[i]] <- Ranef[[i]]
}
for (i in 1:length(ranef)) { # i <- 1
mm_ranef <- model.matrix(as.formula(paste0("~ ",paste0(ranef[[i]]$slopes,collapse=" + "))), data=data )
if (ncol(mm_ranef)!=length(ranef[[i]]$sd)) stop('The number of random effects coefficients (SDs) does not match the formula.')
if (ncol(mm_ranef)!=nrow(ranef[[i]]$cor)) stop('The random effects correlation matrix does not match the formula.')
if (ncol(mm_ranef)!=ncol(ranef[[i]]$cor)) stop('The random effects correlation matrix does not match the formula.')
if (nrow(ranef[[i]]$cor)!=nrow(ranef[[i]]$cor)) stop('The random effects correlation matrix must have the same number of columns as rows.')
as.numeric(ranef[[i]]$cor)
}
}
#==============================================================================
simLMM_verbose <- function(dat, Fixef_, Ranef, empirical, family) {
if (family=="gaussian")
cat("Data simulation from a linear mixed-effects model (LMM)\n")
if (family=="binomial")
cat("Data simulation from a logistic generalized linear mixed-effects model (GLMM)\n")
if (family=="lp")
cat("Data simulation from a linear mixed-effects model (LMM): linear predictor\n")
Sigma_res <- Ranef[[length(Ranef)]]
if (family=="gaussian") ranef <- Ranef[1:(length(Ranef)-1)]
if (family!="gaussian") ranef <- Ranef
dat <- data.frame(dat)
# fixed effects: obtain model matrix + write predictor names with coefficients
if (length(Fixef_$slopes)==1 & Fixef_$slopes[1]=="1") {
mm_fixef <- model.matrix(as.formula("~ 1"), data=dat)
} else {
mm_fixef <- model.matrix(as.formula(paste0("~ ", paste0(Fixef_$slopes,collapse=" + "))), data=dat)
}
fixNames <- colnames(mm_fixef)
bs <- Fixef_$b
names(bs) <- fixNames
if (fixNames[1]=="(Intercept)") fixNames[1] <- "1"
fix_form <- paste0(fixNames,collapse=" + ")
if (fixNames[1]!="1") fix_form <- paste0("0 + ",fix_form,collapse="")
#bs <- as.character(Fixef_$b, length=form_length)
# random effects: obtain model matrix + compute linear predictor
form_ranef <- ran_form <- list()
NC <- ranNames <- ranNames2 <- SDs <- list()
NCby <- c()
for (i in 1:length(ranef)) { # i <- 1
if (length(ranef[[i]]$slopes)==1 & ranef[[i]]$slopes[1]=="1") {
mm_ranef <- model.matrix(as.formula(paste0("~ 1")), data=dat)
} else {
mm_ranef <- model.matrix(as.formula(paste0("~ ",paste0(paste0(ranef[[i]]$slopes,collapse=" + ")))), data=dat )
}
ranNames[[i]] <- colnames(mm_ranef)
SDs[[i]] <- ranef[[i]]$sd
NC[[i]] <- nchar(ranNames[[i]])
NCby[i] <- nchar(ranef[[i]]$by)
ranNames2[[i]] <- ranNames[[i]]
if (ranNames2[[i]][1]=="(Intercept)") ranNames2[[i]][1] <- "1"
if (ranNames2[[i]][1]!="1") ranNames2[[i]][1] <- paste0("0 + ",ranNames2[[i]][1],collapse="")
ran_form[[i]] <- paste0("( ", paste0(ranNames2[[i]],collapse=" + "), " | ", ranef[[i]]$by," )")
}
FORM <- paste0(fix_form, " + ", paste0(ran_form, collapse=" + "), collapse="")
cat(paste0("Formula: ",family," ~ "))
cat(FORM)
cat("\n")
nx1 <- max(c(10,max(NCby))) # number of characters for by
nx2 <- max(c(7,max(sapply(NC,max)))) # number of characters for predictor variables
#x <- c(100.0001, 1e+1, 1e+0, 1e-1, 1e-2, 1e-3, 1e-10)
x <- unlist(SDs)
# number of digits after the comma
n0d <- floor( -log10( 1e-10 + abs(x) - floor( abs( x ) ) ) )
n0d[floor(x)!=0] <- 0
N0D <- min(c(7,max(n0d)+1))
# number of digits before the comma
N1D <- max(nchar(as.character(floor(x))))
if (empirical==TRUE ) cat("empirical = TRUE\n")
if (empirical==FALSE) cat("empirical = FALSE\n")
cat("Random effects:\n")
cat(paste0(" Groups", paste0(rep(" ",nx1-6),collapse=""),
"Name", paste0(rep(" ",nx2-3),collapse=""),
" Std.Dev.", paste0(rep(" ",N0D+N1D-2),collapse=""),
"Corr\n"))
for (i in 1:length(ranNames)) { # i <- 1
for (ii in 1:length(ranNames[[i]])) { # ii <- 1
# print by name
if (ii==1) {
tmp1 <- paste0(" ",ranef[[i]]$by, paste0(rep(" ",nx1-nchar(ranef[[i]]$by)-1),collapse="") )
nchar(tmp1)
} else {
tmp1 <- paste0(rep(" ",nx1),collapse="")
nchar(tmp1)
}
# print predictor variable
tmp2 <- paste0(" ",ranNames[[i]][ii], paste0(rep(" ",nx2-nchar(ranNames[[i]][ii])),collapse="") )
# print SD
tmp3 <- sprintf(paste0("%",N1D+N0D+1,".",N0D,"f"), SDs[[i]][ii])
# print correlation
if (ii==1) {
tmp4 <- ""
} else {
#tmp4 <- paste0(sprintf(paste0("%",N1D,".",N0D,"f"), ranef[[i]]$cor[ii,1:(ii-1)]), collapse=" ")
tmp4 <- paste0(sprintf(paste0("%",N1D,".",2,"f"), ranef[[i]]$cor[ii,1:(ii-1)]), collapse=" ")
}
cat(paste0(tmp1,tmp2," ",tmp3," ",tmp4,"\n",collapse=""))
}
}
if (family=="gaussian")
cat(paste0(" Residual", paste0(rep(" ",nx1+nx2-5),collapse=""), sprintf(paste0("%",N1D+N0D+1,".",N0D,"f"),Sigma_res),"\n"))
cat("Number of obs: ")
cat(nrow(dat))
cat(", groups: ")
nBY <- nRE <- c()
for (i in 1:length(ranef)) { # i <- 1
if ( (i==1) | (i>=2 & !ranef[[i]]$by %in% nBY)) {
nBY <- c(nBY, ranef[[i]]$by)
nRE <- c(nRE, length(unique(dat[,ranef[[i]]$by])))
}
}
for (i in 1:length(nBY)) {
if (i!=1) cat("; ")
cat(nBY[i])
cat(", ")
cat(nRE[i])
}
cat("\n")
#Number of obs: 60, groups: Subj, 15; Item, 4
cat("Fixed effects:\n")
print(bs)
}
#==============================================================================
simLMM_empF <- function(dat, Fixef, Ranef, family) {
#------------------------------------------------------------------------
# simulate data from a LMM using empirical=FALSE
#------------------------------------------------------------------------
Sigma_res <- Ranef[[length(Ranef)]]
if (family=="gaussian") ranef <- Ranef[1:(length(Ranef)-1)]
if (family!="gaussian") ranef <- Ranef
dat <- data.frame(dat)
# fixed effects: obtain model matrix + compute linear predictor
if (length(Fixef$slopes)==1 & Fixef$slopes[1]=="1") {
mm_fixef <- model.matrix(as.formula("~ 1"), data=dat)
} else {
mm_fixef <- model.matrix(as.formula(paste0("~ ", paste0(Fixef$slopes,collapse=" + "))), data=dat)
}
lp_fixef <- mm_fixef %*% t(t(Fixef$b))
# random effects: obtain model matrix + compute linear predictor
lp_ranef <- matrix(NA,nrow=nrow(dat),ncol=length(ranef))
for (i in 1:length(ranef)) { # i <- 1
if (length(ranef[[i]]$slopes)==1 & ranef[[i]]$slopes[1]=="1") {
mm_ranef <- model.matrix(as.formula("~ 1"), data=dat)
} else {
mm_ranef <- model.matrix(as.formula(paste0("~ ",paste0(ranef[[i]]$slopes,collapse=" + "))), data=dat )
}
# compute covariance matrix
if (length(ranef[[i]]$sd)==1) {
ranef[[i]]$cov <- ranef[[i]]$sd %*% ranef[[i]]$cor %*% ranef[[i]]$sd
} else {
ranef[[i]]$cov <- diag(ranef[[i]]$sd) %*% ranef[[i]]$cor %*% diag(ranef[[i]]$sd)
}
instances <- as.character(unique(dat[,ranef[[i]]$by]))
w <- mvrnorm(length(instances), rep(0,length(ranef[[i]]$sd)), ranef[[i]]$cov, empirical=FALSE)
for (j in 1:length(instances)) { # j <- 1
idx <- dat[,ranef[[i]]$by]==instances[j]
lp_ranef[idx,i] <- mm_ranef[idx,] %*% t(t(w[j,]))
}
}
# combine linear predictions
lp <- rowSums(cbind(lp_fixef,lp_ranef))
# LINK function
#link <- "logit"
#ff <- make.link(link)
#lp <- -2:2
#ff$linkinv(as.numeric(lp))
# simulate response
if (family=="gaussian")
resp <- lp + mvrnorm(nrow(dat), 0, Sigma_res^2, empirical=FALSE)
if (family=="lognormal")
resp <- exp(lp + mvrnorm(nrow(dat), 0, Sigma_res^2, empirical=FALSE))
if (family=="binomial")
resp <- as.numeric(1/(1+exp(-lp)) > runif(length(lp)))
if (family=="lp")
resp <- as.numeric(lp)
as.numeric(resp)
}
#------------------------------------------------------
#==============================================================================
simLMM_empT <- function(dat, Fixef, Ranef, family) {
#------------------------------------------------------------------------
# simulate data from a LMM using empirical=TRUE
#------------------------------------------------------------------------
Sigma_res <- Ranef[[length(Ranef)]]
if (family=="gaussian") ranef <- Ranef[1:(length(Ranef)-1)]
if (family!="gaussian") ranef <- Ranef
dat <- data.frame(dat)
# fixed effects: obtain model matrix + compute linear predictor
mm_fixef <- model.matrix(as.formula(paste0("~ ", paste0(Fixef$slopes,collapse=" + "))), data=dat)
lp_fixef <- mm_fixef %*% t(t(Fixef$b))
# random effects: obtain model matrix + compute linear predictor
lp_ranef <- matrix(NA,nrow=nrow(dat),ncol=length(ranef))
for (i in 1:length(ranef)) { # i <- 1
mm_ranef <- model.matrix(as.formula(paste0("~ ",paste0(ranef[[i]]$slopes,collapse=" + "))), data=dat )
idx3 <- 1:length(Fixef$slopes)
if (length(grep(":",Fixef$slopes))>0) idx3 <- idx3[-grep(":",Fixef$slopes)]
if (any(Fixef$slopes=="0")) idx3 <- idx3[-which(Fixef$slopes=="0")]
if (any(Fixef$slopes=="1")) idx3 <- idx3[-which(Fixef$slopes=="1")]
#------------------------------------------------------------------
if (length(idx3)>0) {
dTmp <- data.frame(matrix(NA,nrow=nrow(dat), ncol=length(idx3)))
for (m in 1:length(idx3)) # m <- 1
dTmp[,m] <- as.character(factor(dat[,Fixef$slopes[idx3[m]]]))
cell <- c()
for (l in 1:nrow(dat)) # l <- 1
cell[l] <- paste0(dTmp[l,], collapse=".")
} else {
cell <- rep(1,nrow(dat))
}
if (min(table(cell))<(length(ranef[[i]]$sd)+1))
stop('Empirical=TRUE is not possible if cell size is too small.')
#for (l in 1:nrow(dat)) { # l <- 1
# cell[l] <- paste0(dat[l,Fixef$slopes[idx3]], collapse=".")
#cell <- paste0(dat[,Fixef$slopes[idx3]],sep=".")
#------------------------------------------------------------------
cellu <- unique(cell)
table(cell)
besselFact <- (nrow(dat)-1)/(nrow(dat)-length(unique(cellu)))
for (k in 1:length(cellu)) { # k <- 1
idx2 <- cell==cellu[k]
instances <- as.character(unique(dat[idx2,ranef[[i]]$by]))
# compute covariance matrix
if (length(ranef[[i]]$sd)==1) {
ranef[[i]]$cov <- ranef[[i]]$sd*sqrt(besselFact) %*% ranef[[i]]$cor %*% ranef[[i]]$sd*sqrt(besselFact)
} else {
ranef[[i]]$cov <- diag(ranef[[i]]$sd*sqrt(besselFact)) %*% ranef[[i]]$cor %*% diag(ranef[[i]]$sd*sqrt(besselFact))
}
w <- mvrnorm(length(instances), rep(0,length(ranef[[i]]$sd)), ranef[[i]]$cov, empirical=TRUE)
for (j in 1:length(instances)) { # j <- 1
idx <- dat[,ranef[[i]]$by]==instances[j]
lp_ranef[idx,i] <- mm_ranef[idx,] %*% t(t(w[j,]))
}
}
}
# combine linear predictions
lp <- rowSums(cbind(lp_fixef,lp_ranef))
# simulate response
if (family=="gaussian" | family=="lognormal") {
resp <- lp
idx3 <- 1:length(Fixef$slopes)
if (length(grep(":",Fixef$slopes))>0) idx3 <- idx3[-grep(":",Fixef$slopes)]
if (any(Fixef$slopes=="0")) idx3 <- idx3[-which(Fixef$slopes=="0")]
if (any(Fixef$slopes=="1")) idx3 <- idx3[-which(Fixef$slopes=="1")]
if (length(idx3)>0) {
dTmp <- data.frame(matrix(NA,nrow=nrow(dat), ncol=length(idx3)))
for (m in 1:length(idx3)) # m <- 1
dTmp[,m] <- as.character(factor(dat[,Fixef$slopes[idx3[m]]]))
cell <- c()
for (l in 1:nrow(dat)) # l <- 1
cell[l] <- paste0(dTmp[l,], collapse=".")
} else {
cell <- rep(1,nrow(dat))
}
#if (min(table(cell))<3)
# stop('Empirical=TRUE is not possible if cell size is too small.')
#cell <- c()
#for (l in 1:nrow(dat)) # l <- 1
# cell[l] <- paste0(dat[l,Fixef$slopes[idx3]], collapse=".")
#cell <- paste0(dat[,Fixef$slopes],sep=".")
cellu <- unique(cell)
besselFact <- (nrow(dat)-1)/(nrow(dat)-length(unique(cellu)))
for (i in 1:length(cellu)) { # i <- 1
idx <- cell==cellu[i]
resp[idx] <- resp[idx] + mvrnorm(sum(idx), 0, Sigma_res^2*besselFact, empirical=TRUE)
}
}
if (family=="lognormal")
resp <- exp(resp)
if (family=="binomial")
resp <- as.numeric(1/(1+exp(-lp)) > runif(length(lp)))
if (family=="lp")
resp <- as.numeric(lp)
as.numeric(resp)
}
#------------------------------------------------------
#==============================================================================
extractLMM <- function(LMM) {
#------------------------------------------------------------------------
# Extract parameters (fixed and random effects) and model terms
# from a fitted linear mixed-effects model for use in model simulations
# using simLMM()
#------------------------------------------------------------------------
# extract formula from LMM
form <- as.character(LMM@call)[2]
data <- data.frame(LMM@frame)
# extract terms from formula
Terms <- extractTerms(form, data=data)
# collect fixed effects terms (+ fixed-effects estimates)
Fixef <- list(slopes=Terms[[1]], b=fixef(LMM))
# obtain terms in random effects
Ranef <- list()
for (i in 1:length(Terms[[2]])) # i <- 1
Ranef[[i]] <- list(slopes=Terms[[2]][[i]])
# Store by, RE sd, and RE correlations
VC <- VarCorr(LMM)
nvc <- length(VC)
#idx <- list(); for (j in 1:length(Terms[[3]])) # j <- 1
# idx[[j]] <- grep(Terms[[3]],names(VC)[j])
for (i in 1:nvc) { # i <- 1
Ranef[[i]]$by <- Terms[[3]][i] # names(VC)[i]
Ranef[[i]]$sd <- attr(VC[[i]],"stddev")
Ranef[[i]]$cor <- attr(VC[[i]],"correlation")
}
Ranef[[nvc+1]] <- attr(VarCorr(LMM),"sc")
# return
return(Terms <- list(Fixef, Ranef, data))
}
#==============================================================================
extractTerms <- function(form, data) {
#------------------------------------------------------------------------
# Extract model terms from a linear mixed-effects formula
#------------------------------------------------------------------------
# extract terms from formula
termlab <- attr(terms(as.formula(form)),"term.labels")
interc <- attr(terms(as.formula(form)),"intercept")
# determine random effects
idx <- c(); for (i in 1:length(termlab)) # i <- 1
idx[i] <- any(strsplit(termlab[i],"")[[1]] == "|")
# collect fixed effects terms
Fixterm <- termlab[!idx]
if (interc==1) Fixterm <- c("1", Fixterm)
if (interc==0) Fixterm <- c("0", Fixterm)
# obtain terms in random effects
ranterms <- which(idx)
#------ expand formula ------
Xterms <- c(Fixterm)
for (i in 1:length(ranterms)) { # i <- 1
formRE <- termlab[ranterms[i]]
tmp <- which(strsplit(formRE,"")[[1]] == "|")
if (length(tmp)==2) {
formRE1 <- substr(formRE, 1, tmp[1]-1)
xterms <- c()
# obtain information whether intercept is present
mm <- model.matrix(as.formula(paste0("~",formRE1)), data=data)
intercept <- apply(mm,2,function(x) all(x==1))
if (any(intercept))
xterms <- paste0("1", substr(formRE, tmp[length(tmp)], nchar(formRE)))
# collect random slope terms
REslopes <- attr(terms(as.formula(paste0("~",formRE1))),"term.labels")
xterms <- c(xterms, paste0("0 + ",REslopes,"|", substr(formRE, tmp[length(tmp)]+1, nchar(formRE))))
Xterms <- c(Xterms, xterms)
} else {
Xterms <- c(Xterms, formRE)
}
}
termlab <- Xterms
# determine random effects
idx <- c(); for (i in 1:length(termlab)) # i <- 1
idx[i] <- any(strsplit(termlab[i],"")[[1]] == "|")
ranterms <- which(idx)
#----------------------------
Ranterm <- list()
BY <- c()
for (i in 1:length(ranterms)) { # i <- 1
formRE <- termlab[ranterms[i]]
# extract random slopes
formRE1 <- substr(formRE,1,which(strsplit(formRE,"")[[1]] == "|")[1]-1)
# obtain information whether intercept is present
mm <- model.matrix(as.formula(paste0("~",formRE1)), data=data)
intercept <- apply(mm,2,function(x) all(x==1))
if (any(intercept)) {intercept <- "1"} else {intercept <- "0"}
# collect random slope terms
Ranterm[[i]] <- c(intercept, attr(terms(as.formula(paste0("~",formRE1))),"term.labels"))
if (length(Ranterm[[i]])==0) Ranterm[[i]] <- "1"
# extract BY argument
tmp <- which(strsplit(formRE,"")[[1]] == "|")
formRE2 <- substr(formRE,tmp[length(tmp)]+1,nchar(formRE))
BY[i] <- attr(terms(as.formula(paste0("~",formRE2))),"term.labels")
}
(Terms <- list(Fixterm, Ranterm, BY))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.