R/lmer_bs.R
In predint: Prediction Intervals

Documented in lmer_bs

#' Sampling of bootstrap data from a given random effects model
#'
#' \code{lmer_bs()} draws bootstrap samples based on the estimates for the mean
#' and the variance components drawn from a random effects model fit with \code{lme4::lmer()}.
#' Contrary to \code{lme4::bootMer()}, the number of observations for each random factor
#' can vary between the original data set and the bootstrapped data. Random effects
#' in \code{model} have to be specified as \code{(1|random effect)}.
#'
#' @param model a random effects model of class lmerMod
#' @param newdat a \code{data.frame} with the same column names as the historical data
#' on which \code{model} depends
#' @param futmat_list a list that contains design matrices for each random factor
#' @param nboot number of bootstrap samples
#'
#' @details
#' The data sampling is based on a list of design matrices (one for each random factor)
#' that can be obtained if \code{newdat} and the model formula are provided to
#' \code{lme4::lFormula()}. Hence, each random factor that is part of the initial
#' model must have at least two replicates in \code{newdat}. \cr
#' If a random factor in the future data set does not have any replicate, a list
#' that contains design matrices (one for each random factor) can be provided via
#' \code{futmat_list}.
#'
#' @return A list of length \code{nboot} containing the bootstrapped observations.
#'
#' @export
#'
#' @importFrom lme4 fixef VarCorr lFormula
#' @importFrom stats rnorm var
#' @importFrom methods is
#'
#' @examples
#'
#' # loading lme4
#' library(lme4)
#'
#' # Fitting a random effects model based on c2_dat1
#'
#' fit <- lmer(y_ijk~(1|a)+(1|b)+(1|a:b), c2_dat1)
#' \donttest{summary(fit)}
#'
#' #----------------------------------------------------------------------------
#'
#' ### Using c2_dat2 as newdat
#'
#' c2_dat2
#'
#' lmer_bs(model=fit, newdat=c2_dat2, nboot=100)
#'
#' #----------------------------------------------------------------------------
#'
#' ### Using futmat_list
#'
#' # c2_dat4 has no replication for b. Hence the list of design matrices can not be
#' # generated by lme4::lFormula() and have to be provided by hand via futmat_list.
#'
#' c2_dat4
#'
#' # Build a list containing the design matrices
#'
#' fml <- vector(length=4, "list")
#'
#' names(fml) <- c("a:b", "b", "a", "Residual")
#'
#' fml[["a:b"]] <- matrix(nrow=6, ncol=2, data=c(1,1,0,0,0,0, 0,0,1,1,1,1))
#'
#' fml[["b"]] <- matrix(nrow=6, ncol=1, data=c(1,1,1,1,1,1))
#'
#' fml[["a"]] <- matrix(nrow=6, ncol=2, data=c(1,1,0,0,0,0, 0,0,1,1,1,1))
#'
#' fml[["Residual"]] <- diag(6)
#'
#' fml
#'
#' lmer_bs(model=fit, futmat_list=fml, nboot=100)
#'
lmer_bs <- function(model,
                    newdat=NULL,
                    futmat_list=NULL,
                    nboot){

        # Model must be of class lmerMod
        if(!is(model, "lmerMod")){
                stop("class(model) != lmerMod")
        }

        # Model must be a random effect model
        if(length(fixef(model)) != 1){
                stop("length(fixef(model)) must be 1 (the model must be a random effects model)")

        }

        ### All random effects must be specified as (1|random_effect)
        # Get forumla object
        f <- model@call$formula

        # Right part of formula as a string
        fs <- as.character(f)[3]


        # First substitue all whitespace characters with nothing ("") to make shure they don't disturb.
        # '\\s' is regex for 'all whitespace characters like space, tab, line break, ...)'
        fs <- gsub("\\s", "", fs)

        # Are there any occurances where '|' is not preceded by a '1'?
        # '[^1]' is regex for 'not 1' and '\\|' is just regex for '|'.
        wrong_formula <- grepl('[^1]\\|', fs)

        if(wrong_formula){
                stop("Random effects must be specified as (1|random_effect)")
        }

        #-----------------------------------------------------------------------
        ### Actual data

        if(is.null(newdat) & is.null(futmat_list)){
                stop("newdat and futmat_list are both NULL")
        }

        if(!is.null(newdat) & !is.null(futmat_list)){
                stop("newdat and futmat_list are both defined")
        }

        ### newdat
        if(!is.null(newdat)){
                # newdat needs to be a data.frame or 1
                if(is.data.frame(newdat)==FALSE){

                        stop("newdat needs to be a data.frame")
                }

                # Check conditions if newdat is a data frame
                if(is.data.frame(newdat)){

                        # colnames of historical data and new data must be the same
                        if(all(colnames(model@frame) == colnames(newdat))==FALSE){
                                stop("columnames of historical data and newdat are not the same")
                        }
                }
        }

        ### futmat_list
        else if(!is.list(futmat_list)){
                stop("futmat_list needs to be a list that contains the design matrices for each random effect")
        }

        #-----------------------------------------------------------------------

        # Sampling if newdat is given
        if(!is.null(newdat)){

                # SD for the random factors
                sd_hist <- as.data.frame(VarCorr(model))[,c("grp", "sdcor")]

                # Model Frame for the new data without fitting the data
                modelframe_list <- lFormula(eval(model), data=newdat)

                # Intercept (fixed effect)
                mu <- matrix(unname(modelframe_list$X * fixef(model)))

                # number of future observations
                n_fut <- length(modelframe_list$X)

                # Random effects matrix
                Zt_list <- modelframe_list$reTrms$Ztlist

                # Names of Zt_list should be the same as for the SD in sd_hist
                names(Zt_list) <- gsub("\\s", "", gsub("[1|]", "", names(Zt_list)))

                # Design Matrix for random effects with residuals
                Zt_list <- lapply(X=Zt_list, as.matrix)
                Z_list <- c(lapply(X=Zt_list, FUN=t), Residual=list(diag(1, nrow=n_fut)))

                # Sampling of B bootstrap samples
                bsdat_list <- vector(length=nboot, "list")

                for(b in 1:length(bsdat_list)){

                        # Sampling of random effects
                        u_list <- vector("list",nrow(sd_hist))
                        names(u_list) <- names(Z_list)

                        for(c in 1:length(u_list)){
                                u_c <- rnorm(n=ncol(Z_list[[c]]), mean=0, sd=sd_hist[c, 2])
                                u_list[[c]] <- u_c
                        }

                        # Random effects times design matrix
                        ZU_list <- Map(function(x, y)  x%*%y, Z_list, u_list)
                        ZU_list[["mu"]] <- mu

                        # Bootstrap data
                        bsdat_list[[b]] <- rowSums(matrix(unlist(ZU_list), ncol = length(ZU_list)))
                }
        }


        #-----------------------------------------------------------------------

                ### Bootstrapping future observations if futmat_list is given

                if(!is.null(futmat_list) & is.null(newdat)){

                        # SD for the random factors
                        sd_hist <- as.data.frame(VarCorr(model))[,c("grp", "sdcor")]

                        if(length(names(futmat_list)) != length(sd_hist$grp)){
                                stop("length(names(futmat_list)) is not the same as the number of random factors plus the residuals")
                        }

                        if(!all(unlist(lapply(X=futmat_list, FUN=is.matrix)))){
                                stop("all elemts of futmat_list have to be a matrix")
                        }

                        # all matrices should contain only 1 and 0
                        ozfun <- function(mat){all(mat %in% c(0, 1))}
                        if(!all(unlist(lapply(futmat_list, ozfun)))){
                                stop("all matrices should contain only 1 and 0")
                        }

                        if(var(unname(unlist(lapply(X=futmat_list, FUN=nrow)))) != 0){
                                stop("all matrices need to have the same numbers of rows")
                        }

                        if(all(sd_hist$grp == names(futmat_list))==FALSE){
                                stop("futmat_list needs to have the same names as the random factors in the model. Maybe you forgot the residuals?")
                        }

                        else{
                                # Intercept (fixed effect)
                                mu <- matrix(1, nrow(futmat_list[[1]])) * fixef(model)

                                Z_list <- futmat_list

                                # Sampling of B bootstrap samples
                                bsdat_list <- vector(length=nboot, "list")

                                for(b in 1:length(bsdat_list)){

                                        # Sampling of random effects
                                        u_list <- vector("list",nrow(sd_hist))
                                        names(u_list) <- names(Z_list)

                                        for(c in 1:length(u_list)){

                                                u_c <- rnorm(n=ncol(Z_list[[c]]), mean=0,
                                                             sd=sd_hist[c, 2])
                                                u_list[[c]] <- u_c
                                        }

                                        # Random effects times design matrix
                                        ZU_list <- Map(function(x, y)  x%*%y, Z_list, u_list)
                                        ZU_list[["mu"]] <- mu

                                        # Bootstrap data
                                        bsdat_list[[b]] <- rowSums(matrix(unlist(ZU_list),
                                                                          ncol = length(ZU_list)))
                                }
                        }
                }

        return(bsdat_list)
}