Nothing
### model.matrix.R ---
##----------------------------------------------------------------------
## Author: Brice Ozenne
## Created: mar 5 2021 (21:50)
## Version:
## Last-Updated: aug 1 2023 (11:48)
## By: Brice Ozenne
## Update #: 2949
##----------------------------------------------------------------------
##
### Commentary:
##
### Change Log:
##----------------------------------------------------------------------
##
### Code:
## * model.matrix.lmm (code)
##' @export
model.matrix.lmm <- function(object, data = NULL, effects = "mean", simplify = TRUE, drop.X = NULL, ...){
## ** normalize user imput
if(identical(effects,"all")){
effects <- c("mean","variance","correlation")
}
if(is.null(drop.X)){
drop.X <- LMMstar.options()$drop.X
}
if(!identical(effects,"index")){
effects <- match.arg(effects, c("mean","variance","correlation"), several.ok = TRUE)
}
dots <- list(...)
if(length(dots)>0){
stop("Unknown argument(s) \'",paste(names(dots),collapse="\' \'"),"\'. \n")
}
## ** update design matrix with new dataset
if(!is.null(data)){
data <- as.data.frame(data)
## *** prepare output
design <- object$design
design[setdiff(names(design),c("vcov","param","drop.X"))] <- list(NULL)
design$vcov[c("var","cor","pattern","Upattern")] <- NULL
var.outcome <- object$outcome$var
var.cluster <- attr(object$cluster$var,"original")
var.time <- attr(object$time$var,"original")
var.strata <- attr(object$strata$var,"original")
## *** outcome
if(!simplify){
if(any(object$outcome$var %in% names(data) == FALSE)){
stop("Incorrect argument \'data\': missing outcome variable \"",object$outcome$var,"\".\n")
}
design$Y <- data[[object$outcome$var]]
}
## *** index
structure.var <- stats::na.omit(c(attr(object$cluster$var,"original"),attr(object$time$var,"original"),attr(object$strata$var,"original")))
if(("index" %in% effects) || ("variance" %in% effects) || ("correlation" %in% effects) || !simplify){
## check dataset
if(any(structure.var %in% names(data) == FALSE)){
stop("Incorrect argument \'data\': missing variable(s) for the data structure \"",paste(structure.var[structure.var %in% names(data) == FALSE], collapse = "\" \""),"\".\n")
}
## generate cluster/time/strata variables
data.Nindex <- .lmmNormalizeData(data = data[stats::na.omit(c(var.cluster,var.time,var.strata))],
var.outcome = NA,
var.cluster = var.cluster,
var.time = var.time,
var.strata = var.strata,
droplevels = list(time = object$time$levels,
strata = object$strata$levels),
initialize.cluster = object$design$vcov$ranef$crossed,
initialize.time = setdiff(object$design$vcov$ranef$vars, object$design$vcov$ranef$var.cluster))$data
## extract indexes
outInit <- .extractIndexData(data = data.Nindex, structure = design$vcov)
if(identical(effects,"index")){
return(outInit)
}
design$index.cluster <- outInit$index.cluster
design$index.clusterStrata <- outInit$index.clusterStrata
design$index.clusterTime <- outInit$index.clusterTime
}
## *** weights
if(!simplify){
if(!is.na(object$weight$var[1])){
if(any(object$weight$var[1] %in% names(data) == FALSE)){
stop("Incorrect argument \'data\': missing weight variable \"",object$weight$var[1],"\".\n")
}
design$weight <- data[[object$weight$var[1]]]
}
}
if(!simplify){
if(!is.na(object$weight$var[2])){
if(any(object$weight$var[2] %in% names(data) == FALSE)){
stop("Incorrect argument \'data\': missing weight variable \"",object$weight$var[2],"\".\n")
}
design$scale.Omega <- data[[object$weight$var[2]]]
}
}
## *** mean
if("mean" %in% effects){
## check dataset
ff.allvars <- all.vars(object$formula$mean.design)
if(any(ff.allvars %in% names(data) == FALSE)){
stop("Incorrect argument \'data\': missing variable(s) for the mean structure \"",paste(ff.allvars[ff.allvars %in% names(data) == FALSE], collapse = "\" \""),"\".\n")
}
## convert to factor with the right levels
data.mean <- .updateFactor(data, xfactor = object$xfactor$mean)
## update formula with attributes from the design matrix
ff.mean <- attr(object$design$mean,"terms") ## instead of object$formula$mean.design to handle spline (add attributes predvars with the position of the knots)
data.mean$XXindexXX <- 1 ## add latent variables used when terms where defined
data.mean$XXtimeXX <- 1
data.mean$XXclusterXX <- 1
data.mean$XXstrataXX <- 1
## use stats::model.frame to handle spline
data.mf.mean <- stats::model.frame(ff.mean, data = data.mean, na.action = stats::na.pass)
design$mean <- stats::model.matrix(ff.mean, data.mf.mean)[,colnames(object$design$mean),drop=FALSE]
}
## *** variance-covariance
if("variance" %in% effects){
## check dataset
ff.allvars <- c(all.vars(object$formula$var),all.vars(object$formula$cor))
if(any(ff.allvars %in% names(data) == FALSE)){
stop("Incorrect argument \'data\': missing variable(s) for the variance-covariance structure \"",paste(ff.allvars[ff.allvars %in% names(data) == FALSE], collapse = "\" \""),"\".\n")
}
## update levels
design$vcov$var <- list(data = .updateFactor(data, xfactor = object$xfactor$var),
lp2X = object$design$vcov$var$lp2X,
lp2data = object$design$vcov$var$lp2data,
X.attr = attributes(object$design$vcov$var$X))
design$vcov$cor <- list(data = .updateFactor(data, xfactor = object$xfactor$cor),
lp2X = object$design$vcov$cor$lp2X,
lp2data = object$design$vcov$cor$lp2data,
X.attr = attributes(object$design$vcov$cor$X))
## form design matrix
outDesign <- .vcov.matrix.lmm(structure = design$vcov, data = data.Nindex, index.cluster = outInit$index.cluster, drop.X = drop.X)
design$vcov$var <- outDesign$var
design$vcov$cor <- outDesign$cor
## handle the case where structure is UN even though each cluster contain a single observation
if(is.null(object$design$vcov$cor$X) && !is.null(outDesign$cor$X)){
stop("Cannot build the design matrix for the covariance structure of clusters containing several obserations. \n",
"When fitting the model there were not replicates within cluster, so the correlation structure is unknown. \n")
}
## find covariance patterns
design$vcov <- .findUpatterns(design$vcov,
index.clusterTime = outInit$index.clusterTime, U.time = outInit$U.time,
index.cluster = outInit$index.cluster, U.cluster = outInit$U.cluster,
index.clusterStrata = outInit$index.clusterStrata, U.strata = outInit$U.strata)
}
}else{
design <- object$design
}
## ** export
if(simplify){
design2 <- list(mean = design$mean,
variance = design$vcov$var$X,
correlation = design$vcov$cor$X)
attributes(design2$mean) <- attributes(design$mean)[c("dim","dimnames","assign","contrasts")]
attributes(design2$variance) <- attributes(design$variance)[c("dim","dimnames","assign","contrasts")]
attributes(design2$correlation) <- attributes(design$correlation)[c("dim","dimnames","assign","contrasts")]
if(length(effects)>1){
return(design2[effects])
}else{
return(design2[[effects]])
}
}else{
return(design)
}
}
## * .vcov.matrix.lmm
## output observation specific design matrix (but no covariance pattern)
.vcov.matrix.lmm <- function(structure, data, index.cluster, drop.X){
sep <- LMMstar.options()$sep["lp"]
var.cluster <- structure$name$cluster
var.time <- structure$name$time
var.strata <- structure$name$strata
U.cluster <- sort(unique(data[[var.cluster]]))
U.time <- sort(unique(data[[var.time]]))
structure.class <- structure$class
structure.type <- structure$type
formula <- structure$formula
out <- list(var = list(), cor = list(),
xfactor = list(var = NULL, cor = NULL))
test.newdata <- !is.null(structure$param) ## if TRUE then model.matrix call on newdata otherwise call from lmm->.model.matrix on original data
## ** normalize dataset
## time
n.time <- length(U.time)
## formula
out$var$formula <- structure$formula$var
out$cor$formula <- structure$formula$cor
## *** variance: convert variable to numeric/factor according to the structure
if(is.null(structure$var$data)){ ## from .model.matrix
out$var$data <- data
if(structure.class %in% c("ID","IND","CS","RE","UN","TOEPLITZ")){
col2factor.var <- all.vars(structure$formula$var)
}else if(!is.na(structure$name$strata)){
col2factor.var <- structure$name$strata
}else{
col2factor.var <- NULL
}
for(iVar in col2factor.var){
out$var$data[[iVar]] <- as.factor(data[[iVar]])
}
}else{ ## from model.matrix.lmm
out$var$data <- structure$var$data
}
## *** correlation: convert variable to numeric/factor according to the structure
if(is.null(structure$cor$data)){ ## from .model.matrix
out$cor$data <- data
if(structure.class == "UN" || (structure.class == "CS" && structure.type %in% "heterogeneous")){
col2factor.cor <- all.vars(structure$formula$cor)
}else if(!is.na(structure$name$strata)){
col2factor.cor <- structure$name$strata
}else{
col2factor.cor <- NULL
}
for(iVar in col2factor.cor){
out$cor$data[[iVar]] <- as.factor(data[[iVar]])
}
if(structure.class %in% c("RE","TOEPLITZ") || (structure.class == "CS" && structure.type %in% "homogeneous")){
col2num.cor <- setdiff(all.vars(structure$formula$cor), col2factor.cor)
}else{
col2num.cor <- NULL
}
for(iVar in col2num.cor){
if(is.logical(data[[iVar]])){
out$cor$data[[iVar]] <- as.numeric(data[[iVar]]) + 1
}else if(!is.numeric(data[[iVar]])){
out$cor$data[[iVar]] <- as.numeric(as.factor(data[[iVar]]))
}else if(is.numeric(data[[iVar]])){
out$cor$data[[iVar]] <- data[[iVar]] - min(data[[iVar]]) + 1
}
}
}else{ ## from model.matrix.lmm
out$cor$data <- structure$cor$data
}
## ** variance and correlation design lists
for(iMoment in c("var","cor")){ ## iMoment <- "var"
iMoment.txt <- c(var = "variance", cor = "correlation")[iMoment]
iData <- out[[iMoment]]$data
iU.strata <- levels(iData[[var.strata]])
iFormula <- out[[iMoment]]$formula
## *** design matrix
if(test.newdata == FALSE){ ## structure
if(!is.null(iFormula)){
if(structure.class == "CUSTOM"){
out[[iMoment]]$X <- iData[,all.vars(iFormula),drop=FALSE]
}else{
out[[iMoment]]$X <- .model.matrix_regularize(iFormula, data = iData, augmodel = TRUE, type = iMoment.txt, drop.X = drop.X)
}
attr(out[[iMoment]]$X, "original.colnames") <- colnames(out[[iMoment]]$X)
out$xfactor[[iMoment]] <- stats::.getXlevels(stats::terms(iFormula),stats::model.frame(iFormula,iData))
}
}else{ ## newdata
if(!is.null(iFormula)){
iAttr.X <- structure[[iMoment]]$X.attr
iKeep.attr <- setdiff(names(iAttr.X), c("dim","dimnames"))
out[[iMoment]]$X <- stats::model.matrix(iFormula, data = iData)[,iAttr.X$original.colnames,drop=FALSE]
colnames(out[[iMoment]]$X) <- iAttr.X$dimnames[[2]]
for(iAssign in iKeep.attr){
attr(out[[iMoment]]$X,iAssign) <- iAttr.X[[iAssign]]
}
}
}
## *** linear predictors & patterns
if(!is.null(iFormula)){
##------## linear predictor for each observation
iLp <- nlme::collapse(out[[iMoment]]$X, sep = sep, as.factor = !test.newdata)
if(!test.newdata){
##--## position of the observations with distinct linear predictors
iIndex.Ulp <- which(!duplicated(iLp))
##--## convert linear predictor to numeric, possible updating the order of the level to match "natural ordering"
if(NCOL(out[[iMoment]]$X)>1){
if(is.na(var.strata)){
iNewlevel <- orderLtoR(out[[iMoment]]$X[iIndex.Ulp,,drop=FALSE], strata = NULL)
}else if(structure.class == "CUSTOM"){
iNewOrder <- c(setdiff(colnames(out[[iMoment]]$X),var.strata),var.strata)
iNewlevel <- orderLtoR(out[[iMoment]]$X[iIndex.Ulp,iNewOrder,drop=FALSE], strata = NULL)
}else{
iNewlevel <- orderLtoR(out[[iMoment]]$X[iIndex.Ulp,,drop=FALSE], strata = factor(attr(out[[iMoment]]$X,"M.level")[[var.strata]], iU.strata))
## out[[iMoment]]$X[iIndex.Ulp[order(iNewlevel)],,drop=FALSE]
}
out[[iMoment]]$lp <- as.numeric(factor(iLp, levels = iLp[iIndex.Ulp][order(iNewlevel)]))
}else{
iNewlevel <- levels(iLp)
out[[iMoment]]$lp <- as.numeric(iLp)
}
##--## re-order position of the observations with distinct linear predictors to match increasing linear predictors
if(length(iIndex.Ulp)>1){
iIndex.Ulp <- iIndex.Ulp[order(out[[iMoment]]$lp[iIndex.Ulp], decreasing = FALSE)]
}
##--## re-express the linear predictor in term of design matrix or original data
if(structure.class == "CUSTOM"){
out[[iMoment]]$lp2data <- out[[iMoment]]$X[iIndex.Ulp,,drop=FALSE]
rownames(out[[iMoment]]$lp2data) <- iLp[iIndex.Ulp]
}else{
out[[iMoment]]$lp2X <- out[[iMoment]]$X[iIndex.Ulp,,drop=FALSE]
rownames(out[[iMoment]]$lp2X) <- iLp[iIndex.Ulp]
out[[iMoment]]$lp2data <- iData[iIndex.Ulp,attr(out[[iMoment]]$X,"variable"),drop=FALSE]
rownames(out[[iMoment]]$lp2data) <- iLp[iIndex.Ulp]
}
}else{
out[[iMoment]]$lp <- as.numeric(factor(iLp, levels = rownames(structure[[iMoment]]$lp2X)))
out[[iMoment]]$lp2X <- structure[[iMoment]]$lp2X
out[[iMoment]]$lp2data <- structure[[iMoment]]$lp2data
}
##------## pattern for each cluster
out[[iMoment]]$pattern <- as.numeric(as.factor(sapply(index.cluster, function(iC){paste(out[[iMoment]]$lp[iC], collapse = ".")})))
## Note: tapply(out[[iMoment]]$lp, attr(index.cluster,"vectorwise"), paste, collapse = ".")
## does not work when the time are not ordered in the initial dataset
if(is.null(structure[[iMoment]]$pattern2lp)){
## position of the cluster with distinct patterns
iIndex.pattern <- which(!duplicated(out[[iMoment]]$pattern))
iIndex.pattern <- iIndex.pattern[order(out[[iMoment]]$pattern[iIndex.pattern], decreasing = FALSE)] ## re-order
## re-express the pattern in term of linear predictors
out[[iMoment]]$pattern2lp <- unname(lapply(index.cluster[iIndex.pattern], function(iIndex){out[[iMoment]]$lp[iIndex]}))
}else{
out[[iMoment]]$pattern2lp <- structure[[iMoment]]$pattern2lp
}
}
}
## ** export
out$var$formula <- NULL
out$var$data <- NULL
out$cor$formula <- NULL
out$cor$data <- NULL
## out$var
## out$cor
return(out)
}
## * .model.matrix.lmm
.model.matrix.lmm <- function(formula.mean, structure,
data, var.outcome, var.weights,
drop.X, ## drop singular component of the design matrix
precompute.moments){
## ** indexes
outInit <- .extractIndexData(data = data, structure = structure)
U.cluster <- outInit$U.cluster
U.time <- outInit$U.time
U.strata <- outInit$U.strata
n.strata <- length(U.strata)
index.cluster <- outInit$index.cluster
index.clusterStrata <- outInit$index.clusterStrata
index.clusterTime <- outInit$index.clusterTime
## ** mean
## use stats::model.frame to handle splines
data.mf <- stats::model.frame(stats::update(formula.mean,~.+XXindexXX+XXtimeXX+XXclusterXX+XXstrataXX),data)
X.mean <- .model.matrix_regularize(formula.mean, data = data.mf, type = "mean", drop.X = drop.X)
attr(X.mean,"terms") <- attr(data.mf,"terms")
if(NCOL(X.mean)>0){
## gather all terms
all.terms <- c("(Intercept)",attr(stats::terms(formula.mean),"term.labels"))[attr(X.mean,"assign")+1]
## identify level
ls.sub <- sapply(all.terms, function(iSub){ ## iSub <- "X1:X2"
if(grepl("::",iSub,fixed = TRUE)){ ## wraper from a package, e.g. stats::poly()
return(unlist(strsplit(iSub,"::",fixed = TRUE))[-1])
}else if(grepl(":",iSub,fixed = TRUE)){ ## interaction
return(unlist(strsplit(iSub,":",fixed = TRUE)))
}else{
return(iSub)
}
}, simplify = FALSE)
mu.level <- mapply(sub = ls.sub,
name = colnames(X.mean),
FUN = function(sub,name){
for(iSub in sub){
name <- gsub(iSub,"",name, fixed = TRUE)
}
return(name)
})
## skeleton
skeleton.mu <- data.frame(name = colnames(X.mean),
index.strata = NA,
type = "mu",
constraint = NA,
level = gsub("^:","",gsub(":$","",mu.level)),
code = NA,
sigma = NA,
k.x = NA,
k.y = NA)
if(n.strata==1){
skeleton.mu$index.strata <- n.strata
}else if(structure$name$strata %in% attr(X.mean,"variable")){
## identify when stratifying the mean structure
X.mean2 <- .augmodel.matrix(stats::delete.response(stats::terms(formula.mean)),data.mf)
M.factors <- attr(attr(X.mean2,"formula"),"factors")
term.strata <- names(which(M.factors[structure$name$strata,]>0))
col.strata <- intersect(colnames(X.mean2)[attr(X.mean2,"term.labels") %in% term.strata],
colnames(X.mean))
skeleton.mu$index.strata[match(col.strata,skeleton.mu$name)] <- match(attr(X.mean2,"M.level")[col.strata,structure$name$strata], U.strata)
}
attr(X.mean,"strata") <- stats::setNames(skeleton.mu$index.strata, skeleton.mu$name)
}else{
skeleton.mu <- NULL
}
## ** variance
## *** design matrix
outDesign <- .vcov.matrix.lmm(structure = structure, data = data, index.cluster = outInit$index.cluster, drop.X = drop.X)
structure$xfactor <- outDesign$xfactor
structure$var <- outDesign$var
structure$cor <- outDesign$cor
## *** parametrization and patterns
structure <- .skeleton(structure = structure, data = data, indexData = outInit)
## *** covariance pattern
structure <- .findUpatterns(structure,
index.clusterTime = outInit$index.clusterTime, U.time = U.time,
index.cluster = outInit$index.cluster, U.cluster = U.cluster,
index.clusterStrata = outInit$index.clusterStrata, U.strata = U.strata)
## ** prepare calculation of the score
if(precompute.moments && NCOL(X.mean)>0){
if(is.na(var.weights[1])){
wX.mean <- X.mean
wY <- cbind(data[[var.outcome]])
}else{
wX.mean <- sweep(X.mean, FUN = "*", MARGIN = 1, STATS = sqrt(data[[var.weights[1]]]))
wY <- cbind(data[[var.outcome]]*sqrt(data[[var.weights[1]]]))
}
precompute.XX <- .precomputeXX(X = wX.mean, pattern = structure$Upattern$name,
pattern.ntime = stats::setNames(structure$Upattern$n.time, structure$Upattern$name),
pattern.cluster = attr(structure$pattern,"list"), index.cluster = index.cluster)
precompute.XY <- .precomputeXR(X = precompute.XX$Xpattern, residuals = wY, pattern = structure$Upattern$name,
pattern.ntime = stats::setNames(structure$Upattern$n.time, structure$Upattern$name),
pattern.cluster = attr(structure$pattern,"list"), index.cluster = index.cluster)
}else{
precompute.XX <- NULL
precompute.XY <- NULL
}
## ** find all pairs of coefficients
structure$pair.vcov <- stats::setNames(lapply(structure$Upattern$name, function(iName){## iName <- structure$Upattern$name[1]
iParamVcov <- structure$Upattern[structure$Upattern$name == iName,"param"][[1]]
if(length(iParamVcov)==0){return(NULL)}
iOut <- unorderedPairs(iParamVcov)
attr(iOut, "key") <- matrix(NA, nrow = length(iParamVcov), ncol = length(iParamVcov), dimnames = list(iParamVcov,iParamVcov))
for(iCol in 1:NCOL(iOut)){
attr(iOut, "key")[iOut[1,iCol],iOut[2,iCol]] <- iCol
attr(iOut, "key")[iOut[2,iCol],iOut[1,iCol]] <- iCol
}
return(iOut)
}),structure$Upattern$name)
structure$pair.meanvcov <- stats::setNames(lapply(structure$Upattern$name, function(iName){ ## iName <- structure$Upattern$name[1]
iPattern <- structure$Upattern[structure$Upattern$name == iName,,drop=FALSE]
if(length(iPattern$param[[1]])==0){return(NULL)}
iParamMu <- c(skeleton.mu$name[which(skeleton.mu$index.strata == iPattern$index.strata)],skeleton.mu$name[is.na(skeleton.mu$index.strata)>0])
iOut <- unname(t(expand.grid(iParamMu, iPattern$param[[1]])))
return(iOut)
}), structure$Upattern$name)
## ** param
skeleton.param <- rbind(skeleton.mu,
structure$param[is.na(structure$param$constraint),names(skeleton.mu),drop=FALSE] ## only keep free parameters
)
rownames(skeleton.param) <- NULL
order.type <- factor(skeleton.param$type, c("mu", "sigma", "k", "rho"))
if(is.unsorted(order.type)){ ## in presence of strata, typically need to reorder by type
skeleton.param <- skeleton.param[order(order.type),,drop=FALSE]
}
## ** gather and export
out <- list(mean = X.mean,
vcov = structure,
Y = data[[var.outcome]],
precompute.XX = precompute.XX,
precompute.XY = precompute.XY,
index.cluster = index.cluster,
index.clusterTime = index.clusterTime,
index.clusterStrata = index.clusterStrata,
param = skeleton.param,
drop.X = drop.X
)
if(!is.na(var.weights[1])){
out$weights <- data[[var.weights[1]]]
## NOTE: only take first weight for each cluster as weights should be constant within cluster
## out$weights <- sapply(index.cluster, function(iIndex){data[iIndex[1],var.weights[1]]})
}
if(!is.na(var.weights[2])){
out$scale.Omega <- data[[var.weights[2]]]
## NOTE: only take first weight for each cluster as weights should be constant within cluster
## out$scale.Omega <- sapply(index.cluster, function(iIndex){data[iIndex[1],var.weights[2]]})
}
return(out)
}
## * helpers
## ** .model.matrix_regularize
##' @description remove un-identifiable columns from the design matrix
##' @noRd
.model.matrix_regularize <- function(formula, data, augmodel = FALSE, type, drop.X){
## ## ** test 0: remove variable(s) with single level in the formula
test.1value <- sapply(all.vars(formula),function(iVar){length(unique(data[[iVar]]))})
if(any(test.1value==1)){
## no warning because this is normal behavior when stratifying
formula <- stats::update(formula, stats::as.formula(paste0("~.-",paste(names(test.1value)[test.1value==1],collapse="-"))))
}
## ** identify if there is an identifiability problem
X <- stats::model.matrix(formula, data)
attr(X,"variable") <- all.vars(formula)
X.qr <- qr(X)
if(X.qr$rank==NCOL(X)){
if(NCOL(X)==0){
return(X)
}else if(augmodel){
return(.augmodel.matrix(stats::delete.response(stats::terms(formula)),data))
}else{
return(X)
}
}else{
if(drop.X==FALSE){
stop("The design matrix for the ",type," structure does not have full rank according to the QR decomposition. \n", sep = "")
}
}
## ** prepare
tt <- stats::delete.response(stats::terms(formula))
tt.factors <- attr(tt,"factors")
tt.term.labels <- attr(tt,"term.labels")
tt.order <- attr(tt,"order")
name.var <- all.vars(tt)
test.factor <- sapply(name.var, function(iVar){is.character(data[[iVar]]) || is.factor(data[[iVar]])})
var.factor <- name.var[test.factor]
var.numeric <- name.var[!test.factor]
name.factor <- name.var[sapply(name.var,function(iVar){is.factor(data[[iVar]])})]
## ** test 1: remove factor with empty level
if(length(name.factor)>0){
test.factor <- sapply(name.factor, function(iVar){all(levels(data[[iVar]]) %in% data[[iVar]])})
if(any(test.factor==FALSE)){
message("Factor variable(s) with empty level: \"",paste(name.factor[test.factor==FALSE], collapse = "\" \""),"\"\n ",
"The empty level(s) will be remove internally for the ",type," structure.\n",
"Consider applying droplevels to avoid this warning. \n",
sep = "")
factor.drop <- name.factor[test.factor==FALSE]
for(iFactor in factor.drop){
data[[iFactor]] <- droplevels(data[[iFactor]])
}
}
}
## ** create "naive" design matrix
X <- .augmodel.matrix(tt,data)
## ** test 2: remove column(s) corresponding to the same level
index.keep <- which(!duplicated(attr(X,"M.level")))
Xsave <- X
X <- Xsave[,index.keep,drop=FALSE]
attrX <- attributes(Xsave)[setdiff(names(attributes(Xsave)),names(attributes(X)))]
attrX$assign <- attrX$assign[index.keep]
attrX$variable <- attrX$variable
attrX$term.labels <- attrX$term.labels[index.keep]
attrX$order <- attrX$order[index.keep]
attrX$ls.level <- attrX$ls.level[index.keep]
attrX$M.level <- attrX$M.level[index.keep,,drop=FALSE]
attributes(X) <- c(attributes(X),attrX)
X.Mlevel <- attr(X,"M.level")
X.reference <- attr(X,"reference")
## ** test 3: form interaction and identify columns of the design matrix that are constant
ls.rmX <- stats::setNames(lapply(which(tt.order>1), function(iInteraction){ ## iInteraction <- 2
## variables involved in the interactions
iVar <- names(which(tt.factors[,iInteraction]>0))
## identify coefficient relative to this interaction
iNoVar <- setdiff(name.var,iVar)
if(length(iNoVar)>0){ ## remove coefficients relative to other covariates
iMlevel <- X.Mlevel[apply(X.Mlevel[,iNoVar,drop=FALSE],1,function(iParam){all(iParam==X.reference[iNoVar])}),iVar,drop=FALSE]
}else{
iMlevel <- X.Mlevel
}
## create all possible values for the interaction and check if it is constant
test.constant <- sapply(1:NROW(iMlevel), FUN = function(iParam){ ## iParam <- 1
iValue <- do.call(cbind,lapply(var.factor, function(iFactor){data[[iFactor]]==iMlevel[iParam,iFactor]}))
iNumeric <- intersect(iVar, var.numeric)
if(length(iNumeric)>0){
iLs.ValueNum <- lapply(iNumeric, function(iN){if(iMlevel[iParam,iN]){data[[iN]]*iMlevel[iParam,iN]}else{rep(1,NROW(data))}})
iValue <- cbind(iValue,do.call(cbind,iLs.ValueNum))
}
return(length(unique(apply(iValue, MARGIN = 1, prod)))<2)
})
return(rownames(iMlevel)[which(test.constant)])
}), tt.term.labels[which(tt.order>1)])
rmX <- unlist(ls.rmX)
if(length(rmX)>0){
X.old <- X
test.keep <- colnames(X.old) %in% setdiff(colnames(X.old),rmX)
X <- X.old[,test.keep,drop=FALSE]
txt <- paste0("Constant values in the design matrix for the ",type," structure.\n")
if(length(unique(rmX))==1){
txt <- paste0(txt, "Coefficient \"",paste(unique(rmX), collapse = "\" \""),"\" relative to interaction \"",paste(names(ls.rmX), collapse = "\" \""),"\" has been removed. \n")
}else{
txt <- paste0(txt, "Coefficients \"",paste(unique(rmX), collapse = "\" \""),"\" relative to interactions \"",paste(names(ls.rmX), collapse = "\" \""),"\" have been removed. \n")
}
if(qr(X)$rank==X.qr$rank){
message(txt)
}else{
warning(txt)
}
attr(X,"assign") <- attr(X.old,"assign")[test.keep] ## as.numeric(as.factor(attr(X.old,"assign")[test.keep])) - "(Intercept)" %in% colnames(X)
attr(X,"contrasts") <- attr(X.old,"contrasts")
attr(X,"variable") <- attr(X.old,"variable")
if(augmodel || X.qr$rank!=NCOL(X.qr$qr)){
attr(X,"formula") <- attr(X.old,"formula")
attr(X,"term.labels") <- attr(X.old,"term.labels")[test.keep]
attr(X,"order") <- attr(X.old,"order")[test.keep]
attr(X,"ls.level") <- attr(X.old,"ls.level")[test.keep]
attr(X,"M.level") <- attr(X.old,"M.level")[test.keep,,drop=FALSE]
attr(X,"reference.level") <- attr(X.old,"reference.level")
}
}
## ** identify if there is still an identifiability problem
if(X.qr$rank==NCOL(X)){
return(X)
}
if(any(attr(X,"order")>2)){
stop("Cannot handle interaction involving more than two variables. \n")
}
## ** test 4: remove columns one at a time, starting from the higher order term (i.e. the last columns)
score.ref <- apply(attr(X,"M.level"),1,function(iRow){sum(iRow==attr(X,"reference"))})
test.name <- colnames(X)[order(attr(X,"order")+score.ref/(length(attr(X,"reference"))+1), decreasing = TRUE)]
rmX <- NULL
for(iCol in test.name){
iIndex <- colnames(X)!=iCol
X.test <- X[,iIndex,drop=FALSE]
if(X.qr$rank == qr(X.test)$rank){
rmX <- c(rmX, iCol)
keep.attr <- attributes(X)
X <- X.test
attr(X,"assign") <- keep.attr$assign[iIndex]
attr(X,"contrast") <- keep.attr$contrasts
attr(X,"variable") <- keep.attr$variable
if(augmodel){
attr(X,"formula") <- keep.attr$formula
attr(X,"term.labels") <- keep.attr$term.labels[iIndex]
attr(X,"order") <- keep.attr$order[iIndex]
attr(X,"M.level") <- keep.attr$M.level[iIndex,,drop=FALSE]
attr(X,"reference.level") <- keep.attr$reference.level
}
}
if(NCOL(X)==X.qr$rank){break}
}
if(length(rmX)>0){
message("Design matrix for the ",type," structure is singular. \n",
"Coefficient",if(length(rmX)>1){"s"}," \"",paste(rmX, collapse = "\" \""),"\" has been removed. \n")
} else if(!augmodel){
attr(X,"formula") <- NULL
attr(X,"term.labels") <- NULL
attr(X,"order") <- NULL
attr(X,"ls.level") <- NULL
attr(X,"M.level") <- NULL
attr(X,"reference.level") <- NULL
}
## ** export
return(X)
}
## ** .augmodel.matrix_match
##' @description add more information about which variable contribute to which column in the design matrix
##' @noRd
.augmodel.matrix <- function(formula, data){
## ** normalize formula
formula.terms <- stats::delete.response(stats::terms(formula))
var2term <- attr(formula.terms, "factors")
## if(NROW(var2term)>0){
## test1fac <- sapply(data[,rownames(var2term),drop=FALSE], function(iVar){length(unique(iVar))})
## if(any(test1fac==1)){
## stop("Cannot create the design matrix due to a factor variable with a single level. \n",
## "Variable: \"",paste(names(test1fac)[test1fac==1], collapse = "\" \""),"\". \n")
## }
## }
## ** create design matrix
## order(data[,all.vars(formula)])
X <- stats::model.matrix(formula,data)
p <- NCOL(X)
## ** identify factors and reference level
all.variable <- all.vars(formula.terms)
contrast.variable <- stats::setNames(lapply(all.variable, function(iVar){
if(is.character(data[[iVar]])){
out <- stats::contrasts(as.factor(data[[iVar]]))
if(any(colSums(abs(out)>1e-12)>1)){
if(any(options()$contrast!=c("contr.treatment","contr.poly"))){
stop("Cannot handle contrasts involving simultaneously several levels. \n",
"Could be because options()$contrast has been modified to non-standard contrasts. \n")
}else{
stop("Cannot handle contrasts involving simultaneously several levels. \n")
}
}
}else if(is.factor(data[[iVar]])){
out <- stats::contrasts(data[[iVar]])
if(any(colSums(abs(out)>1e-12)>1)){
if(any(options()$contrast!=c("contr.treatment","contr.poly"))){
stop("Cannot handle contrasts involving simultaneously several levels. \n",
"Could be because options()$contrast has been modified to non-standard contrasts. \n")
}else{
stop("Cannot handle contrasts involving simultaneously several levels. \n")
}
}
}else{
out <- NULL
}
return(out)
}),all.variable)
ls.reference <- stats::setNames(lapply(contrast.variable, function(iRef){
if(!is.null(iRef)){
return(rownames(iRef)[rowSums(abs(iRef)>0)==0])
}else{
return(NULL)
}
}),all.variable)
reference <- unlist(ls.reference[!sapply(ls.reference,is.null)])
reference2 <- data.frame(matrix(FALSE, nrow = 1, ncol = length(rownames(var2term)), dimnames = list(NULL, rownames(var2term))),stringsAsFactors = FALSE)
reference2[,names(reference)] <- reference
## ** addtional informations
X.names <- colnames(X)
X.assign <- attr(X,"assign")
X.order <- c(0,attr(formula.terms,"order"))[X.assign+1]
X.term <- c("(Intercept)",attr(formula.terms,"term.labels"))[X.assign+1]
X.level <- stats::setNames(vector(mode = "list", length = p), X.names)
X.level2 <- stats::setNames(lapply(X.names, function(iName){reference2}), X.names)
## ** loop for each element of the design matrix and identify the right level
for(iCol in 1:p){ ## iCol <- 1
if(X.order[iCol]==0){
## reference level for intercept
X.level[[iCol]] <- data.frame(reference,stringsAsFactors = FALSE)
rownames(X.level[[iCol]]) <- NULL
}else if(X.order[iCol]>0){
## variables involved
iVar <- rownames(var2term)[var2term[,X.term[iCol]]>=1]
## contrast for all factor/character variables involved
iContrast <- contrast.variable[iVar]
iContrast <- iContrast[!sapply(iContrast,is.null)]
if(!is.null(iContrast) && length(iContrast)>0){
## re-create all possible names and identify the one matching the column name
iLs.factor <- stats::setNames(lapply(iVar,function(iName){rownames(iContrast[[iName]])}), iVar)
iLs.grid <- expand.grid(iLs.factor[lengths(iLs.factor)>0])
iLs.allnameInteraction <- nlme::collapse(stats::setNames(lapply(iVar,function(iName){paste0(iName,iLs.grid[[iName]])}), iVar), sep = ":", as.factor = TRUE)
iIndex <- which(X.names[iCol] == iLs.allnameInteraction)
## deduce the factor variable
iValue.factor <- iLs.grid[iIndex,,drop=FALSE]
X.level[[iCol]] <- as.data.frame(stats::setNames(lapply(iVar, function(iName){
if(iName %in% names(iValue.factor) == FALSE){
as.numeric(NA)
}else{
as.character(iValue.factor[1,iName])
}}), iVar))
X.level2[[iCol]][,names(X.level[[iCol]])] <- X.level[[iCol]]
if(any(is.na(X.level[[iCol]]))){
X.level2[[iCol]][,is.na(X.level2[[iCol]])] <- TRUE
}
}else{
X.level[[iCol]] <- data.frame(matrix(as.numeric(NA), nrow = 1, ncol = length(iVar), dimnames = list(NULL, iVar)),stringsAsFactors = FALSE)
X.level2[[iCol]][,iVar] <- TRUE
}
}
}
## ** add attributes
attr(X,"formula") <- formula
attr(X,"variable") <- colnames(reference2)
attr(X,"term.labels") <- X.term
attr(X,"order") <- X.order
attr(X,"ls.level") <- X.level
attr(X,"M.level") <- do.call(rbind,X.level2)
attr(X,"reference.level") <- reference2
## ** export
return(X)
}
## ** .extractIndexData
.extractIndexData <- function(data, structure){
var.time <- structure$name$time
var.cluster <- structure$name$cluster
var.strata <- structure$name$strata
## *** find unique levels
U.cluster <- sort(unique(data[[var.cluster]]))
U.time <- sort(unique(data[[var.time]]))
if(!is.na(var.strata)){
U.strata <- sort(unique(data[[var.strata]]))
}else{
U.strata <- 1
}
## *** find position of each cluster and each time
data$XXindexXX <- 1:NROW(data)
data.order <- data[order(data[[var.time]]),c("XXindexXX",var.cluster,var.time)]
index.cluster <- unname(tapply(data.order$XXindexXX,data.order[[var.cluster]],base::identity, simplify = FALSE))
attr(index.cluster, "vectorwise") <- as.numeric(factor(data[[var.cluster]], levels = U.cluster))
index.clusterTime <- unname(tapply(data.order[[var.time]],data.order[[var.cluster]],base::identity, simplify = FALSE))
attr(index.clusterTime, "vectorwise") <- as.numeric(factor(data[[var.time]], levels = U.time))
## index.cluster <- unname(split(1:NROW(data),data[[var.cluster]]))
## index.clusterTime <- unname(split(as.numeric(factor(data[[var.time]],levels = U.time)),data[[var.cluster]]))
## order.clusterTime <- lapply(index.clusterTime, order)
## index.cluster <- mapply(x = index.cluster, y = order.clusterTime, function(x,y){x[y]}, SIMPLIFY = FALSE)
## index.clusterTime <- mapply(x = index.clusterTime, y = order.clusterTime, function(x,y){x[y]}, SIMPLIFY = FALSE)
## *** find strata corresponding to each cluster
if(!is.na(var.strata)){
iMatch <- tapply(as.character(data[[var.strata]]),data[[var.cluster]],unique)
index.clusterStrata <- match(iMatch, U.strata)
}else{
index.clusterStrata <- rep(1, length(U.cluster))
}
## *** export
out <- list(U.cluster = U.cluster,
U.time = U.time,
U.strata = U.strata,
index.cluster = index.cluster,
index.clusterTime = index.clusterTime,
index.clusterStrata = index.clusterStrata
)
return(out)
}
## ** .updatefactor
##' @description Update factor variables with previously defined levels
##' @noRd
.updateFactor <- function(data, xfactor){
if(length(xfactor)==0){return(data)}
## convert to factor with the right levels
ff.factor <- names(xfactor)
if(length(ff.factor)>0){
for(iVar in ff.factor){ ## iVar <- ff.factor[1]
iLevel <- xfactor[[iVar]]
if(any(data[[iVar]] %in% iLevel == FALSE)){
Wf <- setdiff(unique(data[[iVar]]), iLevel)
stop("Unknown factor(s) \"",paste0(Wf,collapse="\" \""),"\" for variable \"",iVar,"\".\n",
"Valid factors: \"",paste0(iLevel, collapse="\" \""),"\".\n")
}
data[[iVar]] <- factor(data[[iVar]], levels = iLevel)
}
}
return(data)
}
##----------------------------------------------------------------------
### model.matrix.R ends here
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.