### model.frame.R ---
##----------------------------------------------------------------------
## Author: Brice Ozenne
## Created: Jun 7 2021 (14:57)
## Version:
## Last-Updated: aug 8 2024 (13:32)
## By: Brice Ozenne
## Update #: 253
##----------------------------------------------------------------------
##
### Commentary:
##
### Change Log:
##----------------------------------------------------------------------
##
### Code:
## * model.frame.lmm (documentation)
##' @title Extracting the Model Frame from a Linear Mixed Model
##' @description Contruct a data frame containing all the variables involved in a Linear Mixed Model.
##' @param formula a \code{lmm} object.
##' @param newdata [data.frame] dataset relative to which the model frame should be constructed.
##' @param type [character] by default returns the processed dataset used to fit the Linear Mixed Model (\code{NULL}).
##' Can be used to add rows relative to missing repetitions (\code{"add.NA"})
##' or obtain a dataset with unique sets of covariates (\code{"unique"}) with respect to the mean structure.
##' @param add.index [logical] should columns indexing the row number from the original dataset, time variable, cluster variable, strata variable
##' be added to the output?
##' @param na.rm [logical] should rows containing missing values for the variables used in the linear mixed model be removed?
##' Not relevant when argument type is \code{"unique"}.
##' @param ... not used. For compatibility with the generic method.
##'
##' @details Column \code{"XXindexXX"} refers to the row of the original dataset (i.e. passed to argument \code{data} when calling \code{\link{lmm}}).
##' When adding rows relative to missing repetitions, since there is no row in the original dataset, a negative sign is used.
##'
##' @keywords methods
##'
##' @examples
##' data("armd.wide", package = "nlmeU")
##' e.lmH <- lmm(visual52 ~ lesion, structure = IND(~treat.f), data = armd.wide)
##' model.frame(e.lmH)
##' model.frame(e.lmH, add.index = TRUE)
##' model.frame(e.lmH, type = "unique")
##'
##' data("gastricbypassL", package = "LMMstar")
##' dfL.NNA <- na.omit(gastricbypassL)
##' e.lmm <- lmm(glucagonAUC ~ time, repetition = ~visit|id, data = dfL.NNA, df = FALSE)
##' model.frame(e.lmm, type = "unique")
##' model.frame(e.lmm, type = c("unique","correlation"))
##' model.frame(e.lmm, type = "add.NA", add.index = TRUE)
## * model.frame.lmm (code)
##' @export
model.frame.lmm <- function(formula, newdata = NULL, type = NULL, add.index = FALSE, na.rm = TRUE, ...){
mycall <- match.call()
## ** extract from object
var.manifest <- stats::variable.names(formula)
var.outcome <- formula$outcome$var
var.cluster <- formula$cluster$var
var.time <- formula$time$var
n.time <- formula$time$n
var.strata <- formula$strata$var
## ** normalize user imput
dots <- list(...)
if("options" %in% names(dots) && !is.null(dots$options)){
options <- dots$options
}else{
options <- LMMstar.options()
}
dots$options <- NULL
if(length(dots)>0){
stop("Unknown argument(s) \'",paste(names(dots),collapse="\' \'"),"\'. \n")
}
if(!is.null(type)){
type[1] <- match.arg(type[1], c("add.NA","unique"))
}
if(!is.null(type) && type[1]=="add.NA"){
if("na.rm" %in% names(mycall) && mycall$na.rm){
stop("Argument \'na.rm\' must be FALSE when arguement \'type\' equals \"add.NA\". \n")
}
na.rm <- FALSE
}
## ** generate data.frame
if(is.null(type) || type[1]=="add.NA"){
## *** reformat newdata
if(is.null(newdata) && (is.null(formula$index.na) || na.rm)){
out <- formula$data
index.na <- formula$index.na
index.data <- formula$design[c("index.cluster","index.clusterTime","index.clusterStrata")]
}else{ ## new data or obtain model frame without removing the row with missing values
if(is.null(newdata)){
newdata <- formula$data.original
}else if(any(setdiff(var.manifest,var.outcome) %in% names(newdata) == FALSE)){
missing.col <- setdiff(var.manifest,var.outcome)[setdiff(var.manifest,var.outcome) %in% names(newdata)]
stop("Incorrect argument \'newdata\' due to missing column(s): \"",paste(missing.col, collapse = "\", \""),"\". \n")
}
index.data <- stats::model.matrix(formula, newdata = newdata, effects = "index", na.rm = na.rm, simplify = FALSE)
out <- index.data$data
if(na.rm){
index.na <- index.data$index.na
}else{
index.na <- NULL
}
}
}else if(type[1]=="unique"){
if(length(type)==1){
effects <- "mean"
}else{
effects <- type[-1]
if(length(effects) == 1 && effects == "all"){
effects <- c("mean","variance","correlation")
}else{
effects <- match.arg(effects, c("mean","variance","correlation"), several.ok = TRUE)
}
}
if(is.null(newdata)){
design <- formula$design
}else{
design <- stats::model.matrix(formula, newdata = newdata, effects = "index")
}
var.cluster <- formula$cluster$var
var.time <- formula$time$var
## *** reorder data by cluster and time
newdata.index.cluster <- attr(design$index.cluster,"vectorwise")
newdata.index.time <- attr(design$index.clusterTime,"vectorwise")
vec.reorder <- order(newdata.index.cluster, newdata.index.time)
if(is.null(newdata)){
newdata <- formula$data[vec.reorder,,drop=FALSE]
}
## *** keep unique mean and/or covariance pattern
if("variance" %in% effects || "correlation" %in% effects){
keep.cluster <- sapply(attr(design$vcov$pattern,"list"),"[[",1)
newdata.row <- which(newdata$XXcluster.indexXX %in% keep.cluster)
var.vcov <- stats::variable.names(formula, effects = setdiff(effects, "mean"))
newdata.col <- stats::na.omit(unique(c(attr(var.cluster,"original"),unlist(var.vcov))))
}else{
newdata.row <- NULL
newdata.col <- NULL
}
if("mean" %in% effects){
var.mean <- stats::variable.names(formula, effects = "mean")
## but putting first clusters (possibly) kept for the covariance covariates
vec.reorder2 <- c(newdata.row,setdiff(vec.reorder,newdata.row))
## extract unique mean covariate sets
newdata.row <- sort(union(newdata.row, which(!duplicated(newdata[,names(newdata) %in% var.mean,drop=FALSE]))))
newdata.col <- union(newdata.col, var.mean)
}
## *** subset
out <- newdata[newdata.row,,drop=FALSE]
index.na <- NULL
}
## ** add missing row (NA outcome)
if(!is.null(type) && type[1]=="add.NA"){
n.clusterTime <- lengths(index.data$index.clusterTime)
incomplete.cluster <- which(n.clusterTime!=n.time)
if(length(incomplete.cluster)>0){
if(all(is.na(attr(var.time,"original")))){
stop("Cannot add rows relative to missing repetitions without time variable. \n",
"Consider specifying the argument \'repetiton\' when calling lmm() with something like ~time|id. \n")
}
## *** classify covariates
mean.type <- stats::variable.names(formula, effects = "mean.type")
var.meanBaseline <- names(mean.type)[mean.type=="baseline"] ## baseline covariate
var.meanTime <- names(mean.type)[mean.type=="time"] ## time covariate or simple transformation of time
var.meanTimeVar <- names(mean.type)[mean.type=="timevar"] ## time varying covariate
ls.tableTime.var <- attr(mean.type,"table") ## conversion from time variable to covariate
## *** augment dataset
var2.meanBaseline <- unique(stats::na.omit(c(var.meanBaseline, attr(var.cluster, "original"), attr(var.strata, "original"), "XXclusterXX", "XXstrataXX", "XXcluster.indexXX", "XXstrata.indexXX")))
var2.meanTime <- unique(stats::na.omit(c(var.meanTime, attr(var.time, "original"))))
if(any(var2.meanTime %in% names(ls.tableTime.var) == FALSE)){
var.newtime <- var2.meanTime[var2.meanTime %in% names(ls.tableTime.var) == FALSE]
ls.tableTime.var[var.newtime] <- lapply(var.newtime, function(iVar){ ## iVar <- var.newtime
attr(formula$time$level,"original")[,iVar]
})
}
ls.newrow <- lapply(incomplete.cluster, function(iC){ ## iC <- 1
iDF <- out[index.data$index.cluster[[iC]],,drop=FALSE]
iN.missingTime <- n.time - NROW(iDF)
iOut <- stats::setNames(vector(mode = "list", length = NCOL(iDF)), colnames(iDF))
iOut[[var.outcome]] <- rep(as.numeric(NA), iN.missingTime)
iOut$XXindexXX <- rep(as.numeric(NA), iN.missingTime)
iOut$XXtime.indexXX <- setdiff(1:n.time,iDF$XXtime.indexXX)
iOut$XXtimeXX <- formula$time$levels[iOut$XXtime.indexXX]
iOut[var2.meanBaseline] <- lapply(var2.meanBaseline, function(iVar){
rep(iDF[1,iVar], iN.missingTime)
})
iOut[var2.meanTime] <- lapply(var2.meanTime, function(iVar){ ## iVar <- meanTime.var
ls.tableTime.var[[iVar]][iOut$XXtime.indexXX]
})
iOut[var.meanTimeVar] <- lapply(var.meanTimeVar, function(iVar){
rep(NA, iN.missingTime)
})
return(as.data.frame(iOut))
})
out <- rbind(out, do.call(rbind,ls.newrow))
out[is.na(out$XXindexXX),"XXindexXX"] <- -(1:sum(is.na(out$XXindexXX)))
}
}
## ** export
if(add.index==FALSE){
out <- out[intersect(names(out),var.manifest)]
}
if(length(index.na)>0){
attr(out,"index.na") <- index.na
}
return(out)
}
## * model.frame.lmmCC
##' @export
model.frame.lmmCC <- model.frame.lmm
##----------------------------------------------------------------------
### model.frame.R ends here
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.