### pool.R ---
##----------------------------------------------------------------------
## Author: Brice Ozenne
## Created: Jul 28 2024 (19:14)
## Version:
## Last-Updated: sep 30 2024 (14:26)
## By: Brice Ozenne
## Update #: 203
##----------------------------------------------------------------------
##
### Commentary:
##
### Change Log:
##----------------------------------------------------------------------
##
### Code:
## * pool.rbindWald_lmm
pool.rbindWald_lmm <- function(object, method, qt = NULL,
null, level, df, tol = 1e-10, options){
if(is.null(options)){
options <- LMMstar.options()
}
pool.method <- options$pool.method
adj.method <- options$adj.method
n.sample <- options$n.sampleCopula
## ** normalize user input
## *** level
alpha <- 1-level
if(!is.na(level) && object$args$simplify==TRUE && any(method!="pool.rubin")){
stop("Cannot evaluate the uncertainty of pooling estimator without the full variance-covariance matrix. \n",
"Consider setting the argument \'simplify\' to FALSE when calling anova. \n")
}
if(!is.na(level) && object$args$df==FALSE && any(method %in% c("pool.se","pool.gls","pool.gls1","p.rejection"))){
stop("Cannot evaluate the uncertainty of pooling estimator without the derivative of the variance-covariance matrix. \n",
"Consider setting the argument \'df\' to TRUE when calling lmm and anova. \n")
}
## *** qt (critical quantile)
if("p.rejection" %in% method){
if(!is.null(qt)){
if(is.character(qt) && length(qt)!=1){
stop("Argument \'qt\' should be have length 1 when character. \n")
}else if(is.numeric(qt) && (length(qt)!=1 && length(qt)!=NROW(object$univariate))){
stop("Argument \'qt\' should either have length 1 or the number of contrasts (here ",NROW(object$univariate),") when numeric. \n")
}
if(!is.numeric(qt) && (!is.character(qt) || (qt %in% c("none","bonferroni","single-step","single-step2")==FALSE))){
stop("Argument \'qt\' should either be numeric or one of \"none\", \"bonferroni\", \"single-step\", \"single-step2\". \n")
}
if(is.numeric(qt)){
critical.threshold <- qt
}else{
critical.threshold <- stats::confint(object, method = qt, columns = "quantile", options = options)$quantile
}
}else{
critical.threshold <- stats::confint(object, columns = "quantile", options = options)$quantile
}
}
## *** df
if(is.na(level)){
df <- FALSE
}
## ** prepare output
if(!is.null(names(method))){
pool.name <- stats::setNames(names(method),method)
}else{
pool.name <- stats::setNames(method,method)## stats::setNames(poolName.rbindWald_lmm(object, method = method),method)
}
out <- as.data.frame(matrix(NA, nrow = length(pool.name), ncol = NCOL(object$univariate),
dimnames = list(pool.name, colnames(object$univariate))))
out$model <- "all"
out$type <- ifelse(length(unique(object$univariate$type))==1, object$univariate$type[1], "all")
out$name <- method
out$term <- "pool"
out$n.param <- sum(object$univariate$n.param)
out$transformed <- ifelse(length(unique(object$univariate$type))==1, object$univariate$transformed[1], NA)
out$tobacktransform <- object$univariate$tobacktransform[1]
## ** extract information
## independence between models
independence <- object$object$independence
## estimates
tableUni <- stats::model.tables(object, method = "none", columns = c("estimate","se","statistic","df","null"), options = options)
n.test <- NROW(tableUni)
name.test <- rownames(tableUni)
## variance
if(any(method %in% c("pool.se","pool.gls","pool.gls1")) || ((!is.na(level) || null) && "p.rejection" %in% method)){
Wald.Sigma <- stats::vcov(object, effects = list("Wald",c("Wald","gradient"))[[(!is.na(level))+1]],
method = "none", transform.names = FALSE, options = options)
Wald.dSigma <- attr(Wald.Sigma,"gradient")
attr(Wald.dSigma,"gradient") <- NULL
}
if(!is.na(level) && any(method %in% c("average", "pool.se","pool.gls","pool.gls1","p.rejection"))){
contrast <- stats::model.tables(object, effects = "contrast", transform.names = FALSE, simplify = FALSE, options = options)
All.Sigma <- stats::vcov(object, effects = list("all",c("all","gradient"))[[df+1]],
method = "none", transform.names = FALSE, options = options)
All.dSigma <- attr(All.Sigma,"gradient")
attr(All.Sigma,"gradient") <- NULL
}
## ** point estimate
if("p.rejection" %in% method){
out[pool.name["p.rejection"],"estimate"] <- 1 - mean(stats::pt(critical.threshold - tableUni$statistic, df = tableUni$df) - stats::pt(-critical.threshold - tableUni$statistic, df = tableUni$df))
}
if(any(method != "p.rejection")){
pool.contrast <- matrix(NA, nrow = sum(method != "p.rejection"), ncol = n.test, dimnames = list(setdiff(method, "p.rejection"),name.test))
if("average" %in% method){
pool.contrast["average",] <- 1/n.test
}
if("pool.se" %in% method){
se.contrast <- 1/diag(Wald.Sigma)
pool.contrast["pool.se",] <- se.contrast/sum(se.contrast)
}
if("pool.gls" %in% method || "pool.gls1" %in% method){
if(is.invertible(Wald.Sigma, cov2cor = FALSE)){
Wald.SigmaM1 <- solve(Wald.Sigma)
gls.contrast <- rowSums(Wald.SigmaM1)/sum(Wald.SigmaM1)
}else{ ## use generalized inverse, same as MASS::ginv
Wald.Sigma.eigen <- eigen(Wald.Sigma, symmetric = TRUE)
index.subset <- which(abs(Wald.Sigma.eigen$values) > tol)
## truncate eigen value decomposition
if(length(index.subset)==0){
stop("All eigenvalues of the variance-covariance matrix are close to 0 (<",tol,"). \n",sep="")
}
attr(out,"message.estimate") <- paste(length(Wald.Sigma.eigen$values)-length(index.subset), " eigenvalues have been removed in the pseudo inverse")
lambda.k <- Wald.Sigma.eigen$values[index.subset]
q.kj <- Wald.Sigma.eigen$vector[,index.subset,drop=FALSE]
## (Fast) evaluation of weigths
if(is.na(level)){
qbar.k <- colSums(q.kj)
w.k <- qbar.k^2/lambda.k
gls.contrast <- rowSums(sweep(q.kj, FUN = "*", MARGIN = 2, STATS = w.k/qbar.k))/sum(w.k)
}
## (Slow but explicit) evaluation of the weights
if(!is.na(level)){
Wald.SigmaM1 <- q.kj %*% diag(1/lambda.k) %*% t(q.kj)
gls.contrast <- rowSums(Wald.SigmaM1)/sum(Wald.SigmaM1)
}
}
if("pool.gls" %in% method){
pool.contrast["pool.gls",] <- gls.contrast
}
if("pool.gls1" %in% method){ ## ensure no weight greater than 1
index.contrastMAX <- which.max(abs(gls.contrast))
xi.contrastMax <- sign(gls.contrast[index.contrastMAX])
value.contrastMax <- gls.contrast[index.contrastMAX]
kappaPwmax <- (1-n.test*xi.contrastMax*value.contrastMax)/(1-xi.contrastMax*n.test)
pool.contrast["pool.gls1",] <- (1-1/kappaPwmax)/n.test + gls.contrast/kappaPwmax
}
}
if("pool.rubin" %in% method){
pool.contrast["pool.rubin",] <- 1/n.test
}
out[pool.name[rownames(pool.contrast)],"estimate"] <- pool.contrast %*% tableUni$estimate
}
## ** variance
if(!is.na(level) && "pool.rubin" %in% method){
pool.U <- mean(diag(Wald.Sigma))
pool.B <- sum((tableUni$estimate - mean(tableUni$estimate))^2)/(n.test-1)
out[pool.name["pool.rubin"],"se"] <- sqrt(pool.U + (1 + 1/n.test) * pool.B)
}
if(!is.na(level) && any(method != "pool.rubin")){
grad.pool <- matrix(0, nrow = sum(method!="pool.rubin"), ncol = NCOL(All.Sigma), dimnames = list(setdiff(method,"pool.rubin"), colnames(contrast)))
if("average" %in% method){
grad.pool["average",] <- pool.contrast[rownames(pool.contrast)=="average",,drop=FALSE] %*% contrast
}
if("pool.se" %in% method){
grad.pool.seA <- pool.contrast[rownames(pool.contrast) == "pool.se",] %*% contrast
grad.pool.seB <- - tableUni$estimate %*% apply(Wald.dSigma, MARGIN = 3, FUN = function(iM){diag(iM)/(tableUni$se^4) })/sum(se.contrast)
grad.pool.seC <- out[pool.name["pool.se"],"estimate"]/sum(se.contrast) * apply(Wald.dSigma, MARGIN = 3, FUN = function(iM){ sum(diag(iM)/tableUni$se^4) })
grad.pool["pool.se",] <- grad.pool.seA[1,] + grad.pool.seB[1,] + grad.pool.seC
}
if("pool.gls" %in% method || "pool.gls1" %in% method){
if(is.invertible(Wald.Sigma, cov2cor = FALSE)){
Wald.dSigmaM1 <- - apply(Wald.dSigma, MARGIN = 3, FUN = function(iM){rowSums(Wald.SigmaM1 %*% iM %*% Wald.SigmaM1)})
}else{
## The Differentiation of Pseudo-Inverses and Nonlinear Least Squares Problems Whose Variables Separate. Author(s): G. H. Golub and V. Pereyra. Source: SIAM Journal on Numerical Analysis, Vol. 10, No. 2 (Apr., 1973), pp. 413-432 (formula 4.12)
## with simplification for symmetric matrix
Wald.SigmaM1.SigmaM1 <- Wald.SigmaM1 %*% Wald.SigmaM1
Wald.Im.Sigma.SigmaM1 <- diag(1, n.test) - Wald.Sigma %*% Wald.SigmaM1
Wald.Im.SigmaM1.Sigma <- diag(1, n.test) - Wald.SigmaM1 %*% Wald.Sigma
Wald.dSigmaM1 <- apply(Wald.dSigma, MARGIN = 3, FUN = function(iM){
rowSums( - Wald.SigmaM1 %*% iM %*% Wald.SigmaM1 + Wald.SigmaM1.SigmaM1 %*% iM %*% Wald.Im.Sigma.SigmaM1 + Wald.Im.SigmaM1.Sigma %*% iM %*% Wald.SigmaM1.SigmaM1)
})
}
## d\psi/d\theta = d (w/sum(x) \beta) /d\theta = w/sum(w) d\beta/d\theta + (dw/d\theta)/sum(x) \beta + w/(dsum(x)/d\theta) \beta
## = w/sum(w) d\beta/d\theta + (dw/d\theta)/sum(x) \beta - sum(dx/d\theta) \gamma/sum(x)
grad.pool.glsA <- pool.contrast[rownames(pool.contrast) == "pool.gls",] %*% contrast
grad.pool.glsB <- tableUni$estimate %*% Wald.dSigmaM1 /sum(Wald.SigmaM1)
grad.pool.glsC <- - out[pool.name["pool.gls"],"estimate"]/sum(Wald.SigmaM1) * colSums(Wald.dSigmaM1)
grad.pool.gls <- grad.pool.glsA[1,] + grad.pool.glsB[1,] + grad.pool.glsC
if("pool.gls" %in% method){
grad.pool["pool.gls",] <- grad.pool.gls
}
if("pool.gls1" %in% method){ ## ensure no weight greater than 1
if(kappaPwmax > 1){
## condense previous derivatives
grad.contrast.gls <- Wald.dSigmaM1/sum(Wald.SigmaM1) - cbind(rowSums(Wald.SigmaM1)) %*% rbind(colSums(Wald.dSigmaM1))/sum(Wald.SigmaM1)^2
## range(tableUni$estimate %*% grad.contrast.gls - grad.pool.glsB - grad.pool.glsC)
## d\psi/d\theta = d (w\beta) /d\theta = w d\beta /d\theta + \beta dw/d\beta
## where w = (1-1/kappaPwmax)/n.test + gls.contrast/kappaPwmax i.e. dw = d(gls.contrast)/kappaPwmax - gls.contrast d(kappaPwmax)/kappaPwmax^2 + d(kappaPwmax)/(kappaPwmax^2*n.test)
grad.pool.gls1A <- pool.contrast[rownames(pool.contrast) == "pool.gls1",] %*% contrast
grad.kappaPwmax <- -n.test*xi.contrastMax*grad.contrast.gls[index.contrastMAX,]/(1-xi.contrastMax*n.test)
grad.pool.gls1B <- tableUni$estimate %*% (grad.contrast.gls/kappaPwmax - cbind(gls.contrast) %*% rbind(grad.kappaPwmax)/kappaPwmax^2)
grad.pool.gls1C <- sum(tableUni$estimate) * grad.kappaPwmax/(kappaPwmax^2*n.test)
grad.pool["pool.gls1",] <- grad.pool.gls1A[1,] + grad.pool.gls1B[1,] + grad.pool.gls1C
}else{
grad.pool["pool.gls1",] <- grad.pool.gls1A[1,]
}
}
}
if("p.rejection" %in% method){
## Approximation: no variability of the degrees-of-freedom nor critical threshold
grad.statistic <- contrast / tableUni$se - (tableUni$estimate - tableUni$null) * apply(Wald.dSigma, MARGIN = 3, FUN = diag) / (2 * tableUni$se^3)
partial.integral <- stats::dt(critical.threshold - tableUni$statistic, df = tableUni$df) - stats::dt(-critical.threshold - tableUni$statistic, df = tableUni$df)
grad.pool[method == "p.rejection",] <- colMeans(grad.statistic * partial.integral)
}
out[pool.name[rownames(grad.pool)],"se"] <- sqrt(rowSums( (grad.pool %*% All.Sigma) * grad.pool))
}
## ** null hypothesis
if(null){
if("p.rejection" %in% method){
rho.linfct <- stats::cov2cor(Wald.Sigma)
myMvd <- copula::mvdc(copula = copula::normalCopula(param=rho.linfct[lower.tri(rho.linfct)], dim = NROW(rho.linfct), dispstr = "un"),
margins = rep("t", NROW(rho.linfct)),
paramMargins = as.list(stats::setNames(tableUni$df,rep("df",NROW(rho.linfct)))))
sample.copula <- copula::rMvdc(n.sample, myMvd)
null.integral <- do.call(cbind, lapply(1:n.test, function(iTest){
stats::pt(critical.threshold - sample.copula[,iTest], df = tableUni$df[iTest]) - stats::pt(-critical.threshold - sample.copula[,iTest], df = tableUni$df[iTest])
}))
out[pool.name["p.rejection"],"null"] <- mean(1 - rowMeans(null.integral))
}
if(any(method != "p.rejection")){
out[setdiff(pool.name, pool.name["p.rejection"]),"null"] <- (pool.contrast %*% tableUni$null)[,1]
}
}
## ** degrees-of-freedom
if(df == FALSE){
out$df <- Inf
}else{
if("pool.rubin" %in% method){
## MICE's approach: https://stefvanbuuren.name/fimd/sec-whyandwhen.html
pool.lambda <- (1+1/n.test)*pool.B / out[pool.name["pool.rubin"],"se"]^2 ## formula 2.24
pool.nu_old <- (n.test-1)/pool.lambda^2 ## formula 2.30 (Rubin 1987b eq 3.1.6)
pool.nu_obs <- (tableUni$df+1)/(tableUni$df+3)*tableUni$df*(1-pool.lambda) ## formula 2.31 (Barnard and Rubin (1999) )
out[pool.name["pool.rubin"],"df"] <- mean(pool.nu_old*pool.nu_obs/(pool.nu_old+pool.nu_obs)) ## formula 2.32
}
if(any(c("average","pool.se","pool.gls","pool.gls1","p.rejection") %in% method)){
out[pool.name[rownames(grad.pool)],"df"] <- .df_contrast(contrast = grad.pool[,rownames(All.dSigma),drop=FALSE], vcov.param = All.Sigma, dVcov.param = All.dSigma)
if(object$args$simplify!=FALSE){
if(all(out$type=="mu")){
attr(out,"message.df") <- "ignoring variability of the variance parameters"
}else{
attr(out,"message.df") <- "ignoring variability of some parameters"
}
}
## approximate df calculation
}
}
## ** export
out$statistic <- (out$estimate - out$null)/out$se
out$lower <- out$estimate + out$se * stats::qt(alpha/2, df = out$df)
out$upper <- out$estimate + out$se * stats::qt(1-alpha/2, df = out$df)
out$quantile <- stats::qt(1-alpha/2, df = out$df)
out$p.value = 2*(1-stats::pt(abs(out$statistic), df = out$df))
if(any(method != "p.rejection")){
attr(out,"contrast") <- pool.contrast
}
if(!is.na(level) && any(method != "pool.rubin")){
attr(out,"gradient") <- grad.pool
}
return(out)
}
## * poolName.rbindWald_lmm
poolName.rbindWald_lmm <- function(object, method){
## ** extract
table <- object$univariate
## ** build name
Wald.name <- rownames(table)
sep <- object$object$sep
pool.splitname <- strsplit(Wald.name, split = sep, fixed = TRUE)
if(length(Wald.name)==1){
pool.name <- paste0("<",Wald.name,">")
}else if(all(lengths(pool.splitname)==2)){
## ??
Mpool.splitname <- do.call(rbind,pool.splitname)
pool.splitname2 <- strsplit(Mpool.splitname[,1],split="=", fixed = TRUE)
if(all(lengths(pool.splitname2)==2)){
Mpool.splitname2 <- do.call(rbind,pool.splitname2)
if(length(unique(Mpool.splitname2[,1]))==1){
pool.name <- paste0(Mpool.splitname2[1,1],"=<",paste(Mpool.splitname2[1,2],Mpool.splitname2[NROW(Mpool.splitname2),2],sep=":"),">",sep,Mpool.splitname[1,2])
}else{
pool.name <- paste0("<",paste(Mpool.splitname[1,1],Mpool.splitname[NROW(Mpool.splitname),1],sep=":"),">",sep,Mpool.splitname[1,2])
}
}else if(length(unique(Mpool.splitname[,2]))==1){
pool.name <- paste0("<",paste(Mpool.splitname[1,1],Mpool.splitname[NROW(Mpool.splitname),1],sep=":"),">",sep,Mpool.splitname[1,2])
}else{
pool.name <- paste0("<",paste(Wald.name[1],Wald.name[length(Wald.name)],collapse=":"),">")
}
}else if(length(Wald.name)==2){
pool.name <- paste0("<",paste(Wald.name,collapse=", "),">")
}else{
## <first test, last test>
pool.name <- paste0("<",paste(Wald.name[1],"...",Wald.name[length(Wald.name)],sep=", "),">")
}
## ** export
return(paste0(method,pool.name))
}
##----------------------------------------------------------------------
### pool.R ends here
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.