### permlme.R ---
##----------------------------------------------------------------------
## Author: Brice Ozenne
## Created: maj 28 2021 (13:19)
## Version:
## Last-Updated: nov 7 2022 (18:26)
## By: Brice Ozenne
## Update #: 22
##----------------------------------------------------------------------
##
### Commentary:
##
### Change Log:
##----------------------------------------------------------------------
##
### Code:
## * permlme (documentation)
##' @title Permutation test for fixed effects of a linear mixed model.
##' @description Permutation test for a single fixed effects of a linear mixed model.
##' This is essentially a re-implementation of the function \code{predictmeans::perlmer} limited to fixed effects and random intercept models.
##' Should be faster than the original function and output the test statistic relative to each permutation.
##' @name permlme
##'
##' @param lme0 [lme] model under the null.
##' @param lme1 [lme] model under the alternative.
##' @param data [data.frame] dataset used to fit the models. Normally not need as the data is extracted from the models.
##' @param seed [integer] specify a random number generator seed, for reproducible results.
##' @param statistic [character] type of test statistic: Wald test and/or likelihood ratio test.
##' @param perm.X [logical] should the covariates be also permuted?
##' @param perm.index.perm [logical] should the index used to permute of the normalized residuals be output?
##' @param nperm [integer] number of permutations.
##' @param cpus [cpus] number of cpus used to perform in parallel the permutations.
##' @param trace [logical] should a progress bar displayed to follow the progress of the permutations?
##'
##' @references Oliver E. Lee and Thomas M. Braun (2012), Permutation Tests for Random Effects in Linear Mixed Models. Biometrics, Journal 68(2).
##'
##' @return a list containing \itemize{
##' \item LRT: Likelihood ratio test for the coefficient. The p-value corresponding to the permutation test is denoted \code{p-value.perm}.
##' \item Wald: Wald test for the coefficient. The p-value corresponding to the permutation test is denoted \code{p-value.perm}.
##' \item n.perm: number of successful permutations
##' \item index.perm: index used to permute of the normalized residuals
##' }
## * permlme (examples)
##' @examples
##' n.perm <- 1000
##'
##' ## ** simulate data
##' library(lava)
##' library(data.table)
##' m <- lvm(Y1~1*eta+effect*conc,
##' Y2~1*eta+effect*conc,
##' Y3~1*eta+effect*conc,
##' Y4~1*eta+effect*conc,
##' Y5~1*eta+effect*conc)
##' latent(m) <- ~eta
##' transform(m, Id~eta) <- function(x,...){1:NROW(x)}
##'
##' set.seed(1)
##' dtW <- data.table::data.table(lava::sim(m, p = c("effect" = 0.1), n = 50, latent = FALSE))
##' dtL <- data.table::melt(dtW, id.vars = c("Id","conc"))
##' setkeyv(dtL,"Id")
##'
##' ## ** "homemade" permutation test
##' library(nlme)
##' e.lmeH0 <- nlme::lme(value ~ variable, random =~1|Id, data = dtL)
##' e.lmeH1 <- nlme::lme(value ~ variable + conc, random=~1|Id, data = dtL)
##' e.permlme <- permlme(e.lmeH0, e.lmeH1, perm.X = TRUE, nperm = n.perm, seed = 10, statistic = "LRT")
##' e.permlme ## p=0.2197802
##'
##' ## ** permutation test via the function permlmer
##' library(lme4)
##' library(predictmeans)
##' e.lmerH0 <- lme4::lmer(value ~ variable + (1|Id), data = dtL, REML = FALSE)
##' e.lmerH1 <- lme4::lmer(value ~ variable + conc + (1|Id), data = dtL, REML = FALSE)
##' e.permlmer <- predictmeans::permlmer(e.lmerH0, e.lmerH1, nperm = n.perm, ncore = 1, seed = 10)
##' e.permlmer
##'
##' ## ** compare values
##' e.permlme$LRT[2,"p.value.perm"]-e.permlmer[["Perm-p"]][2] ## same!!!
##'
##' ## ** compare timing
##' library(microbenchmark)
##' microbenchmark(manual0 = permlme(e.lmeH0, e.lmeH1, nperm = 100, seed = 10, trace = FALSE),
##' manual = permlme(e.lmeH0, e.lmeH1, nperm = 100, seed = 10, trace = FALSE, statistic = c("LRT","Wald")),
##' package = predictmeans::permlmer(e.lmerH0, e.lmerH1, nperm = 100, ncore = 1, seed = 10),
##' times = 10)
##'
##' ## Unit: milliseconds (n=5)
##' ## expr min lq mean median uq max neval cld
##' ## manual0 689.9769 695.0086 752.4042 749.6891 770.365 918.2436 10 a
##' ## manual 1963.7686 2053.5608 2111.0911 2135.7464 2147.678 2215.7608 10 b
##' ## package 3433.9876 3456.7219 3591.3811 3641.0649 3662.663 3688.3372 10 c
##'
##' ## Unit: milliseconds
##' ## expr min lq mean median uq max neval cld
##' ## manual0 912.5592 939.0943 1117.806 1110.615 1278.572 1378.842 10 a
##' ## manual 2646.7307 3291.1398 3368.645 3352.098 3466.784 3829.754 10 b
##' ## package 4022.4287 4479.0607 4795.183 4530.408 4903.027 6810.617 10 c
##'
##' ## ** multiple testing
##'
##' ## *** unadjusted p-value
##' p.value <- e.permlme$LRT[["p.value.perm"]][2]
##'
##' ## *** perfect correlation between tests
##' e.permlme2 <- permlme(e.lmeH0, e.lmeH1, perm.X = TRUE, nperm = n.perm, seed = 10, statistic = "LRT")
##' e.permMax <- pmax(e.permlme$stat.perm[,"LRT"],e.permlme2$stat.perm[,"LRT"])
##' e.obsMax <- pmax(e.permlme$LRT[["L.Ratio"]][2],e.permlme2$LRT[["L.Ratio"]][2])
##' (sum(e.permMax > e.obsMax)+1)/(n.perm+1) - p.value ## should be 0
##'
##' ## *** perfect independence between tests
##' set.seed(11)
##' dtW2 <- data.table::data.table(lava::sim(m, p = c("effect" = 0.1), n = 50, latent = FALSE))
##' dtL2 <- data.table::melt(dtW2, id.vars = c("Id","conc"))
##' setkeyv(dtL2,"Id")
##'
##' e.lmeH0.bis <- nlme::lme(value ~ variable, random =~1|Id, data = dtL2)
##' e.lmeH1.bis <- nlme::lme(value ~ variable + conc, random=~1|Id, data = dtL2)
##' e.permlme2 <- permlme(e.lmeH0.bis, e.lmeH1.bis, perm.X = TRUE, nperm = n.perm, seed = 10, statistic = "LRT")
##' p.value2 <- e.permlme2$LRT[["p.value.perm"]][2]
##'
##' e.permMax <- pmax(e.permlme$stat.perm[,"LRT"],e.permlme2$stat.perm[,"LRT"])
##' e.obsMax <- pmax(e.permlme$LRT[["L.Ratio"]][2],e.permlme2$LRT[["L.Ratio"]][2])
##' (sum(e.permMax > e.obsMax)+1)/(n.perm+1) - 2*min(p.value ,p.value2) ## should be approximately true
##'
##' ## *** partially correlated tests
##' e.lmeH0.bis <- nlme::lme(value ~ variable, random =~1|Id, data = rbind(dtL,dtL2))
##' e.lmeH1.bis <- nlme::lme(value ~ variable + conc, random=~1|Id, data = rbind(dtL,dtL2))
##' e.permlme2 <- permlme(e.lmeH0.bis, e.lmeH1.bis, perm.X = TRUE, nperm = n.perm, seed = 10, statistic = "LRT")
##' p.value2 <- e.permlme2$LRT[["p.value.perm"]][2]
##'
##' e.permMax <- pmax(e.permlme$stat.perm[,"LRT"],e.permlme2$stat.perm[,"LRT"])
##' e.obsMax <- pmax(e.permlme$LRT[["L.Ratio"]][2],e.permlme2$LRT[["L.Ratio"]][2])
##' (sum(e.permMax > e.obsMax)+1)/(n.perm+1)/min(p.value ,p.value2) ## should be between 1 and 2
##'
##' ## ** "homemade" permutation test in presence of missing values
##' set.seed(10)
##' dtL$valueMiss <- dtL$value
##' dtL$valueMiss[rbinom(NROW(dtL), size = 1, prob = 0.1)==1] <- NA
##'
##' e.lmeH0 <- nlme::lme(valueMiss ~ variable, random =~1|Id, data = dtL, na.action = na.omit)
##' e.lmeH1 <- nlme::lme(valueMiss ~ variable + conc, random=~1|Id, data = dtL, na.action = na.omit)
##' e.permlme <- permlme(e.lmeH0, e.lmeH1, perm.X = TRUE, nperm = n.perm, seed = 10, statistic = "LRT")
##' e.permlme
## * permlme (code)
##' @rdname permlme
##' @export
permlme <- function(lme0, lme1, data = NULL, seed = NULL,
statistic = "Wald", perm.X = FALSE, return.index.perm = FALSE,
nperm = 1000, cpus = 1, trace = TRUE){
statistic <- match.arg(statistic, c("LRT","Wald"), several.ok = TRUE)
if(is.null(data)){data <- nlme::getData(lme1)}
data <- as.data.frame(data)
## ** 0- normal test
name.coef <- setdiff(names(fixef(lme1)),names(fixef(lme0)))
if(length(name.coef)!=1 && "Wald" %in% statistic){
stop("There should be exactly one parameter more in lme1 than in lme0. \n")
}
lme0.ML <- update(lme0, data = data, method = "ML")
lme1.ML <- update(lme1, data = data, method = "ML")
out <- list(call = match.call(),
LRT = anova(lme0.ML, lme1.ML),
Wald = summary(update(lme1, data = data, method = "REML"))$tTable[name.coef,,drop=FALSE],
stat.perm = NULL,
n.perm = c(LRT = NA, Wald = NA))
## ** 1- extract key quantities from input
n.obs <- NROW(data) ## number of observations
cluster <- getGroups(lme0) ## to which cluster (patient) each observation belongs
U.cluster <- levels(cluster)
n.cluster <- length(U.cluster)
index.cluster <- lapply(U.cluster, function(iCluster){which(cluster==iCluster)}) ## used to restaure proper ordering after tapply
Y <- getResponse(lme1) ## response
name.Y <- all.vars(formula(lme1))[[1]] ## name of the response variable
X0 <- model.matrix(formula(lme0),data) ## design matrix
Omega0 <- getVarCov(lme0, type = "marginal", individuals = levels(cluster)) ## residual variance-covariance matrix
beta0 <- fixef(lme0) ## regression coefficients
## ** 2- compute residuals
Xbeta0 <- X0 %*% beta0
residuals0 <- as.double(Y - X0 %*% beta0)
## ** 3- compute normalized residuals
sqrtOmega0 <- lapply(Omega0,function(iOmega0){t(chol(iOmega0))})
sqrtOmega0M1 <- lapply(sqrtOmega0,solve)
residuals0N <- vector(length=n.obs, mode = "numeric")
for(iCluster in 1:n.cluster){ ## iCluster <- 1
residuals0N[index.cluster[[iCluster]]] <- sqrtOmega0M1[[iCluster]] %*% residuals0[index.cluster[[iCluster]]]
}
## ** 4- estimate the distribution of the test statistics under the null
warper <- function(iPerm){
data.perm <- data
## permute residuals and fixed effects
index.perm <- sample(1:n.obs)
residuals0N.perm <- residuals0N[index.perm]
## rescale residuals
for(iCluster in 1:n.cluster){ ## iCluster <- 1
data.perm[[name.Y]][index.cluster[[iCluster]]] <- sqrtOmega0[[iCluster]] %*% residuals0N.perm[index.cluster[[iCluster]]]
}
## add permuted fixed effects
if(perm.X){
data.perm[[name.Y]] <- data.perm[[name.Y]] + Xbeta0[index.perm,,drop=FALSE]
}
if("LRT" %in% statistic){
lme0.permML <- try(update(lme0, data = data.perm, method = "ML"), silent = TRUE)
lme1.permML <- try(update(lme1, data = data.perm, method = "ML"), silent = TRUE)
if(inherits(lme0.permML,"try-error")||inherits(lme0.permML,"try-error")){
LRT.stat <- NA
}else{
LRT.stat <- as.double(2*(logLik(lme1.permML)-logLik(lme0.permML)))
}
}else{
LRT.stat <- NA
}
if("Wald" %in% statistic){
lme1.permREML <- try(update(lme1, data = data.perm, method = "REML"), silent = TRUE)
if(inherits(lme1.permREML,"try-error")){
Wald.stat <- NA
}else{
Wald.stat <- summary(lme1.permREML)$tTable[name.coef,"t-value"]
}
}else{
Wald.stat <- NA
}
iOut <- c(LRT = LRT.stat, Wald = Wald.stat)
if(return.index.perm){
attr(iOut,"index.perm") <- index.perm
}
return(iOut)
}
if(cpus==1 || is.null(cpus)){
if(!is.null(seed)){set.seed(seed)}
if(trace){
requireNamespace("pbapply")
ls.perm <- pbapply::pblapply(1:nperm, function(i){warper(i)})
}else{
ls.perm <- lapply(1:nperm, function(i){warper(i)})
}
}else{
cl <- snow::makeSOCKcluster(cpus)
doSNOW::registerDoSNOW(cl)
pb <- txtProgressBar(max = nperm, style=3)
opts <- list(progress = function(n) setTxtProgressBar(pb, n))
if(is.null(seed)){
ls.perm <- foreach::`%dopar%`(
foreach::foreach(i=1:nperm, .options.snow=opts, .packages = "nlme"), {
warper(i)
})
}else{
requireNamespace(doRNG)
set.seed(seed)
ls.perm <- doRNG::`%dorng%`(
foreach::foreach(i=1:nperm, .options.snow=opts, .packages = "nlme"), {
warper(i)
})
}
parallel::stopCluster(cl)
}
out$stat.perm <- do.call(rbind,ls.perm)
if(return.index.perm){
out$index.perm <- do.call(cbind,lapply(ls.perm,attr,"index.perm"))
}
## ** 5- compare the observed statistic with its null distribution
n.permLRT <- sum(!is.na(out$stat.perm[,"LRT"]))
if(n.permLRT>0){
out$LRT[["p.value.perm"]] <- c(NA,(sum(pmax(out$stat.perm[,"LRT"],0) >= out$LRT$L.Ratio[2], na.rm = TRUE) + 1)/(n.permLRT+1))
}
out$n.perm["LRT"] <- n.permLRT
n.permWald <- sum(!is.na(out$stat.perm[,"Wald"]))
if(n.permWald>0){
out$Wald <- data.frame(out$Wald, "p.value.perm" = (sum(abs(out$stat.perm[,"Wald"]) >= abs(out$Wald[,"t-value"]), na.rm = TRUE) + 1)/(n.permWald+1))
}
out$n.perm["Wald"] <- n.permWald
## ** export
class(out) <- append("permlme", class(out))
return(out)
}
## * print.permlem
print.permlme <- function(x,...){
if("p.value.perm" %in% names(x$LRT)){
cat("Likelihood ratio test: \n")
print(x$LRT)
}
if("p.value.perm" %in% names(x$Wald)){
cat("Wald test: \n")
print(x$Wald)
}
return(NULL)
}
##----------------------------------------------------------------------
### permlme.R ends here
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.