Nothing
### lmmCC.R ---
##----------------------------------------------------------------------
## Author: Brice Ozenne
## Created: mar 27 2023 (17:33)
## Version:
## Last-Updated: jul 26 2023 (11:07)
## By: Brice Ozenne
## Update #: 184
##----------------------------------------------------------------------
##
### Commentary:
##
### Change Log:
##----------------------------------------------------------------------
##
### Code:
## * lmmCC (documentation)
##' @title Fit Linear Mixed Model on Complete Case data
##' @description Fit a linear mixed model on the complete case data.
##' Mostly useful as a sanity check, to match the results of a univariate analysis on the change.
##' @name lmmCC
##'
##' @param object [formula] Specify the model for the mean.
##' On the left hand side the outcome and on the right hand side the covariates affecting the mean value.
##' E.g. Y ~ Gender + Gene.
##' @param repetition [formula] Specify the structure of the data: the time/repetition variable and the grouping variable, e.g. ~ time|id.
##' @param data [data.frame] dataset (in the long format) containing the observations.
##' @param lm.change [logical] Should a linear model on the change in outcome be estimated. Only possible with two repetitions.
##' Will match the mixed model if the later includes repetition-dependent effects for all covariates.
##' @param name.time [character] name of the time variable.
##' @param df [logical] Should the degree of freedom be computed using a Satterthwaite approximation?
##' @param trace [interger, >0] Show the progress of the execution of the function.
##' @param control [list] Control values for the optimization method.
##' The element \code{optimizer} indicates which optimizer to use and additional argument will be pass to the optimizer.
##' @param ... Not used. For compatibility with the generic method.
##'
##' @return A \code{lmmCC} object, which inherits from \code{lmm}.
##'
##' @keywords models
## * lmmCC (examples)
##' @examples
##' #### 1- simulate data in the wide format ####
##' set.seed(10)
##' dW <- sampleRem(100, n.times = 3, format = "wide")
##' dW$Y3[1:10] <- NA
##' dW$change2 <- dW$Y2 - dW$Y1
##' dW$change3 <- dW$Y3 - dW$Y1
##'
##' e.lm2 <- lm(change2 ~ X1 + X2, data = dW)
##' summary(e.lm2)$coef
##' e.lm3 <- lm(change3 ~ X1 + X2, data = dW)
##' summary(e.lm3)$coef
##'
##' #### 2- complete case LMM from LM ####
##' e.lmmCC2 <- lmmCC(e.lm2, repetition = change2~Y2-Y1)
##' model.tables(e.lmmCC2)
##' e.lmmCC3 <- lmmCC(e.lm3, repetition = change3~Y3-Y1)
##' model.tables(e.lmmCC3)
##'
##' #### 3- complete case LMM ####
##' dL <- reshape(dW[,c("id","X1","X2","Y1","Y2","Y3")],
##' direction = "long",
##' varying = c("Y1","Y2","Y3"), sep = "", idvar = "id")
##' dL$time <- as.character(dL$time)
##'
##' e.lmm2 <- lmmCC(Y ~ time + time*X1 + time*X2, repetition = ~time|id,
##' data = dL[dL$time %in% 1:2,])
##' model.tables(e.lmm2)
##' e.lmm3.bis <- lmmCC(Y ~ time + time*X1 + time*X2, repetition = ~time|id,
##' data = dL[dL$time %in% c(1,3),], lm.change = TRUE)
##' model.tables(e.lmm3.bis)
##' e.lmm3.bis$lm.change
##'
##' @export
`lmmCC` <-
function(object, ...) UseMethod("lmmCC")
## * lmmCC.formula (code)
##' @export
##' @rdname lmmCC
lmmCC.formula <- function(object, repetition, data,
lm.change = FALSE,
df = NULL, trace = TRUE, control = NULL, ...){
## ** normalize arguments
## repetition
if(!inherits(repetition,"formula")){
stop("Argument \'repetition\' must be of class formula, something like: ~ time|cluster or strata ~ time|cluster. \n")
}
res.split <- strsplit(deparse(repetition),"|", fixed = TRUE)[[1]]
if(length(res.split)!=2){
stop("Incorrect specification of argument \'repetition\'. \n",
"The symbol | should only exacly once, something like: ~ time|cluster or strata ~ time|cluster. \n")
}
var.cluster <- trimws(res.split[2], which = "both")
if(var.cluster %in% names(data) == FALSE){
stop("Argument \'repetition\' incompatible with argument \'data\'. \n",
"Could not find the cluster variable \"",var.cluster,"\" in argument \'data\'. \n")
}
var.time <- all.vars(stats::delete.response(stats::terms(stats::as.formula(res.split[1]))))
if(var.time %in% names(data) == FALSE){
stop("Argument \'repetition\' incompatible with argument \'data\'. \n",
"Could not find the time variable \"",var.time,"\" in argument \'data\'. \n")
}
## data
data <- as.data.frame(data)
if(is.factor(data[[var.time]])){
data[[var.time]] <- droplevels(data[[var.time]])
}
if(is.factor(data[[var.cluster]])){
data[[var.cluster]] <- droplevels(data[[var.cluster]])
}
## dots
dots <- list(...)
if(length(dots)>0){
stop("Unknown argument(s) \'",paste(names(dots),collapse="\' \'"),"\'. \n")
}
## ** complete case dataset
Uvars <- unique(c(all.vars(object),all.vars(repetition)))
## identify clusters with missing values
test.NA1 <- rowSums(is.na(data[,Uvars,drop=FALSE]))>0
id.NA1 <- unique(data[[var.cluster]][test.NA1])
## identify clusters with missing timepoints
test.NA2 <- rowSums(table(data[[var.cluster]],data[[var.time]])==0)>0
id.NA2 <- unique(data[[var.cluster]][test.NA2])
## remove missing values
id.NA <- union(id.NA1,id.NA2)
if(length(id.NA)>0){
if(trace>0){
cat("Remove ",length(id.NA)," clusters (",sum(data[[var.cluster]] %in% id.NA)," observations) \n",
" - ",sum(test.NA1)," observations with missing data (",length(id.NA1)," clusters) \n",
" - ",sum(test.NA2)," missing repetitions (",length(id.NA2)," clusters) \n",
sep="")
}
data.NNA <- data[data[[var.cluster]] %in% id.NA == FALSE,]
}else{
data.NNA <- data
}
## update factor levels
if(is.factor(data.NNA[[var.cluster]])){
data.NNA[[var.cluster]] <- droplevels(data.NNA[[var.cluster]])
}
## ** fit LMM
out <- lmm(formula = object, repetition = repetition, data = data.NNA,
structure = "UN", method.fit = "REML", df = df, type.information = "observed",
trace = trace-1, control = control)
## ** fit LM
if(lm.change){
var.outcome <- Uvars[1]
if(is.factor(data[[var.time]])){
Utime <- levels(data[[var.time]])
}else{
Utime <- sort(unique(data[[var.time]]))
}
if(length(Utime)!=2){
stop("Can only fit a univariate linear model on the change with two timepoint. \n",
"Consider setting argument \'lm.change\' to FALSE. \n")
}
var.X <- attr(out$design$mean,"variable")
n.level <- stats::setNames(rep(2, length(var.X)), var.X)
if(length(out$xfactor$mean)>0){
n.level[names(out$xfactor$mean)] <- (lengths(out$xfactor$mean)-1)*2
}
if(NCOL(out$design$mean) != sum(n.level)){
warning("Number of coefficients in the mixed model is not twice the number of variables. \n",
"Results will likely differ between lmmCC and lm due to missing interactions. \n")
}
Xvars <- all.vars(stats::delete.response(stats::terms(stats::as.formula(object))))
Xvars0 <- setdiff(Xvars,var.time)
if(length(Xvars0)>0){
test.cstX <- by(data.NNA[Xvars0],data.NNA[[var.cluster]],function(iData){NROW(unique(iData))})
if(any(test.cstX!=1)){
stop("Covariates should be constant within cluster to be able to fit a univariate linear model. \n",
"Consider setting argument \'lm.change\' to FALSE. \n")
}
}
dataW.NNA <- stats::reshape(data.NNA, direction = "wide", timevar = var.time,
idvar = var.cluster, v.names = var.outcome, sep = ".")
dataW.NNA$change <- dataW.NNA[[paste(var.outcome,Utime[2],sep=".")]] - dataW.NNA[[paste(var.outcome,Utime[1],sep=".")]]
if(length(Xvars0)>0){
formula.lm <- paste0("change~",paste(Xvars0, collapse = "+"))
}else{
formula.lm <- "change~1"
}
out$lm.change <- stats::lm(formula.lm, data = dataW.NNA)
out$lm.change$data <- dataW.NNA
}
## ** export
class(out) <- append("lmmCC",class(out))
return(out)
}
## * lmmCC.lm (code)
##' @export
##' @rdname lmmCC
lmmCC.lm <- function(object, repetition, data,
name.time = "time",
df = NULL, trace = TRUE, control = NULL, ...){
## ** normalize arguments
## data
if(missing(data)){
data <- try(eval(object$call$data), silent = TRUE)
if(inherits(data,"try-error")){
stop("Could not recover the original dataset. Consider specifying the argument \'data\'. \n")
}
}else{
test <- stats::update(object, data = data)
if(abs(logLik(test)-logLik(object))>1e-12){
warning("Argument \'data\' does not match argument \'object\'. \n",
"Difference in log-likelihood of the corresponding linear model of ",as.numeric(logLik(test)-logLik(object)),".\n")
}
}
data <- as.data.frame(data)
## repetition
if(!is.list(repetition)){
repetition <- list(repetition)
}
if(length(repetition)>2){
stop("Cannot handle more than 2 time-varying variables. \n")
}
if(any(sapply(repetition, inherits,"formula")==FALSE)){
stop("Argument \'repetition\' should be formula or list of formula, one for each time varying variable. \n",
"Typically Ychange ~ Yfollowup - Ybaseline. \n")
}
repetition.char <- lapply(repetition, deparse)
test.id <- sapply(repetition.char, grepl, pattern = "|", fixed = TRUE)
if(any(test.id)){
ls.var.cluster <- lapply(repetition.char[test.id], function(iRep){trimws(strsplit(iRep, split = "|", fixed = TRUE)[[1]][2])})
if(any(lengths(ls.var.cluster)!=1)){
stop("Incorrect argument \'formula\': there must be a single cluster variable per formula. \n",
"Typically Ychange ~ Yfollowup - Ybaseline|id. \n")
}
var.cluster <- unique(unlist(ls.var.cluster))
if(any(lengths(ls.var.cluster)!=1)){
stop("Incorrect argument \'formula\': the cluster variable must be the same for all formula. \n",
"Typically Ychange ~ Yfollowup - Ybaseline|id and Zchange ~ Zfollowup - Zbaseline|id. \n")
}
if(var.cluster %in% names(data) == FALSE){
stop("Inconsistency between argument \'data\' and argument \'formula\'. \n",
"Could not find a column named \"",var.cluster,"\" in \'data\'. \n")
}
repetition <- sapply(repetition.char, function(iRep){stats::as.formula(strsplit(iRep, split = "|", fixed = TRUE)[[1]][1])})
}else{
var.cluster <- "CCclusterCC"
if(var.cluster %in% names(data)){
stop("Argument \'data\' should not contain a column named \"",var.cluster,"\" as this name is used internally by the lmm function. \n")
}
data$CCclusterCC <- 1:NROW(data)
}
ls.XY <- lapply(repetition, all.vars)
if(any(lengths(ls.XY)!=3)){
stop("Argument \'repetition\' should be formula or list of formula, each containing three variables. \n",
"Typically Ychange ~ Yfollowup - Ybaseline. \n")
}
ls.X <- lapply(repetition, function(iFormula){all.vars(stats::delete.response(stats::terms(stats::as.formula(iFormula))))})
if(any(lengths(ls.X)!=2)){
stop("Argument \'repetition\' should be formula or list of formula, each containing three variables. \n",
"Typically Ychange ~ Yfollowup - Ybaseline. \n")
}
M.XY <- do.call(rbind,ls.XY)
if(any(as.character(M.XY) %in% names(data) == FALSE)){
stop.miss <- as.character(M.XY)[as.character(M.XY) %in% names(data) == FALSE]
stop("Inconsistency between argument \'repetition\' and argument \'data\'. \n",
"Could not find variable(s) \"",paste(stop.miss, collapse = "\", \""),"\" in \'data\'. \n")
}
object.vars <- all.vars(object$terms)
if(any(M.XY[,1] %in% object.vars == FALSE)){
stop.miss <- M.XY[,1][M.XY[,1] %in% object.vars == FALSE]
stop("Inconsistency between argument \'repetition\' and argument \'object\'. \n",
"Variable(s) \"",paste(stop.miss, collapse = "\", \""),"\" are not used in \'object\'. \n")
}
if(name.time %in% names(data)){
stop("Argument \'data\' should not contain a column named \"",name.time,"\" as this name is used internally by the lmm function. \n")
}
## dots
dots <- list(...)
if(length(dots)>0){
stop("Unknown argument(s) \'",paste(names(dots),collapse="\' \'"),"\'. \n")
}
## ** move from the wide to the long format
dataW <- data[,c(var.cluster,setdiff(object.vars,M.XY[,1]),M.XY[,2],M.XY[,3]),drop=FALSE]
## find unique names
vec.CS <- apply(M.XY,1,function(iVec){.commonString(iVec[2],iVec[3])})
if(any(is.na(vec.CS))){
stop.NA <- unlist(vec.CS)[unlist(vec.CS) %in% names(data)]
stop("Incorrect argument \'repetition\'. \n",
"Could not find a common name between the time varying variables. \n")
}
if(any(c(vec.CS,paste(vec.CS, collapse=".")) %in% names(data))){
stop.com <- c(vec.CS,paste(vec.CS, collapse="."))[c(vec.CS,paste(vec.CS, collapse=".")) %in% names(data)]
stop("Column name(s) \"",paste(stop.com, collapse ="\" \""),"\" already exist in argument \'data\'. \n",
"These name(s) will be used internally and may conflict with existing data.\n")
}
index.outcome <- which(M.XY[,1] %in% object.vars[1])
if(length(index.outcome)==0){
stop("Argument \'repetition\' should specify how the outcome (Ychange) can be split into time-specific variable (Ybaseline, Yfollowup) \n",
"Something like Ychange ~ Yfollowup - Ybaseline")
}
level.time <- rev(unlist(ls.X))
name.outcome <- paste(vec.CS,collapse="")
dataL <- stats::reshape(dataW, direction = "long",
varying = level.time,
v.names = name.outcome,
timevar = name.time,
idvar = var.cluster)
index.X <- setdiff(object.vars[-1], M.XY[,1])
if(length(repetition)==2){
dataL[[name.time]] <- factor(dataL[[name.time]], labels = level.time)
## dataL[[name.time3]] <- as.numeric(dataL[[name.time]]==level.time[3])
## dataL[[name.time4]] <- as.numeric(dataL[[name.time]]==level.time[4])
}
## ** fit LMM
formula.txt <- paste0(name.outcome,"~",name.time)
if(length(index.X)>0){
formula.txt <- paste0(formula.txt,"+",name.time,"*(",paste(index.X, collapse = "+"),")")
}
repetition.txt <- paste0("~",name.time,"|",var.cluster)
out <- lmmCC(object = stats::as.formula(formula.txt),
repetition = stats::as.formula(repetition.txt),
data = dataL,
lm.change = FALSE,
df = df, trace = trace, control = control)
out$lm.change <- object
## ** export
return(out)
}
## * .commonString
## find the common consecutive part between two strings
## copied from https://stackoverflow.com/questions/35381180/identify-a-common-pattern
## .commonString("aaachang2","aaabbb")
## .commonString("aaa235change2","aaachangebbb")
## .commonString("abcdef","xyz")
.commonString <- function(string1, string2){
if(is.na(string1) || is.na(string2)){return(NA)}
A <- strsplit(string1, "")[[1]]
B <- strsplit(string2, "")[[1]]
L <- matrix(0, length(A), length(B))
ones <- which(outer(A, B, "=="), arr.ind = TRUE)
ones <- ones[order(ones[, 1]), ,drop=FALSE]
if(NROW(ones)==0){return(NA)}
for(i in 1:NROW(ones)) {
v <- ones[i, , drop = FALSE]
L[v] <- ifelse(any(v == 1), 1, L[v - 1] + 1)
}
out <- paste0(A[(-max(L) + 1):0 + which(L == max(L), arr.ind = TRUE)[1]], collapse = "")
return(out)
}
##----------------------------------------------------------------------
### lmmCC.R ends here
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.