#XXX RANDOM EFFECTS MUST ALREADY BE FACTORS IN DATA!!!!
################################################################################
#' @describeIn gremlin Generates model matrices.
#' @export
#' @importFrom stats na.pass model.response model.matrix lm reformulate na.omit
mkModMats <- function(formula, random = NULL, rcov = ~ units,
data = NULL, subset = NULL,
ginverse = NULL,
na.action = na.pass, offset = NULL, contrasts = NULL,
Xsparse = TRUE, # Should fxd effects design matrix be sparse
...)
{
cl <- match.call()
## keep only the arguments which should go into the model frame
#TODO: Need to build up 1 model frame with all of the components for the fit
## perhaps can add back X, y, Z, etc to 'mf' and return 'mf' - as list of fixed and random frames?
if(any(grepl("units", names(data)))){
stop("column names in data cannot be named 'units'")
}
if(!any("units" %in% all.vars(rcov))){
stop("'units' must be specified in 'rcov' argument")
}
##################################
##### Fixed effects ####
mf <- match.call(expand.dots = FALSE)
f <- match(c("formula", "data", "subset", "na.action", "offset"), names(mf), 0)
ff <- mf[c(1, f)]
#TODO: How does this handle missing data in fixed effects?
ff[[1]] <- as.name("model.frame")
rf <- gf <- ff # create rand effects model frames
ff$drop.unused.levels <- TRUE # do after assign gf so this is not set for rfx
ff <- eval.parent(ff) # fxd effects model frame
## 1) allow model.frame to update the terms object before saving it.
ft <- attr(ff, "terms")
resp <- model.response(ff, "numeric")
if(is.null(ncol(resp))) ncy <- 1 else ncy <- ncol(resp) #<--number of traits
if(ncy > 1) stop("gremlin isn't old enough to play with multivariate models") #FIXME!!!
#TODO: if multivar; order y as traits within individs (Meyer 1991, just b4 eqn 5)
y <- matrix(resp, ncol = 1)
if(ncy == 1) ny <- nrow(y) else ny <- NULL #FIXME # <-- sample size if univariate
## 2) retrieve the weights and offset from the model frame so
## they can be functions of columns in arg data.
#XXX offset <- model.offset(ff) # turned off for now
#TODO: Need to make list (e.g. rand fx) and create X for each response (in multivar)
## bdiag(lapply(seq(ncol(resp)), FUN = function(x){model.matrix(...
#TODO: Test if sparse or dense X is better/more efficient
if(Xsparse){
X <- sparse.model.matrix(ft, ff,
contrasts,
transpose = FALSE)
} else X <- model.matrix(ft, ff,
contrasts)
## if any subsetting is done, retrieve the "contrasts" attribute here.
#TODO: Figure out what this means in original context of lm and if applies
### use lm to find singularities in X (see `MCMCglmm()`, ~line 608 of v2.21)
sing.rm <- lm(y ~ as(X, "matrix") - 1)
sing.rm <- which(is.na(sing.rm$coef))
if(length(sing.rm) > 0){
warning(paste(length(sing.rm), "fixed effects are not estimable and have been removed (set to 0)"))
X <- X[, -sing.rm, drop = FALSE]
}
###END FIXME
#TODO OR created reduced rank X (see "./Wellham2008REML..." slide 15&16)
#TODO see how lme4 deals with rank-deficiencies:
## `rankMatrix()` test function and discussion https://github.com/lme4/lme4/pull/144
#####################################
###### R structure #######
#TODO: strip R formula of covariance functions (e.g., idh(), us(), etc.)
rForm <- rcov
#TODO: check below that when complex R structure this gets the formula right
rf$formula <- reformulate(deparse(rForm[[-1]]), cl$formula[[2]], intercept = FALSE)
#TODO: Need to add "units" to 'data'
Rdata <- data
Rdata$units <- factor(paste("units", seq(nrow(Rdata)), sep = "."),
levels = paste("units", seq(nrow(Rdata)), sep = "."))
rf$data <- Rdata
rf$drop.unused.levels <- TRUE
#TODO: how handle missing values in random effects?
rf <- eval.parent(rf) # R model frame
rt <- attr(rf, "terms")
Zr <- sparse.model.matrix(rt, rf,
transpose = FALSE,
row.names = TRUE)
# Placeholder for more complicated rcov: RENAME `Zr` to `Zinit` above
# emptCol <- which(diff(Zinit@p) == 0) #TODO handle missing values
# Zr <- sparseMatrix(i = Zinit@i, p = Zinit@p[-emptCol], x = Zinit@x,
# dimnames = list(NULL, Zinit@Dimnames[[2L]][-emptCol]),
# symmetric = FALSE, index1 = FALSE)
######################################
#### G structure ######
if(!is.null(random)){
if(random[[1]] != "~") stop("Tilde not first element in random formula")
#TODO: Make a more rigorous check of formula (and if a formula == is.formula)
# Split random formula up by terms "+" and pass, one by one to varStructures
#XXX: See what `terms(random)` does, particularly in a formula with multiple `us(trait):`
#XXX See how lme4 looks for terms in the formula, see `nobars`, but in full context of code
## what about `all.vars()` or `all.names()` for above questions?
#XXX (commented out 18 Oct 2015) varStructures(random[[2]])
# Check the ginverse
## taken from MCMCglmm.R
if(!is.null(ginverse)){
if(!is.list(ginverse)){
stop("ginverse must be an object of class 'list'")
}
if(any(duplicated(names(ginverse)))){
stop("duplicated names in ginverse. Must have only one unique name per ginverse entry")
}
for(i in 1:length(ginverse)){
if(is.null(rownames(ginverse[[i]]))){
stop(paste(names(ginverse)[i], "ginverse must have non-null rownames"))
}
if(names(ginverse)[i] %in% names(data) == FALSE){
stop(paste(names(ginverse)[i], "does not appear in data"))
}
dat_ilevels <- na.omit(unique(data[,names(ginverse)[i]]))
if(any(dat_ilevels %in% rownames(ginverse[[i]]) == FALSE)){
stop(cat("levels of", names(ginverse)[i], "do not have a row entry in ginverse:", as.character(dat_ilevels[dat_ilevels %in% rownames(ginverse[[i]]) == FALSE]), "\n"))
}
if(any(duplicated(rownames(ginverse[[i]])))){
stop(paste("rownames of ", names(ginverse)[i], "ginverse must be unique"))
}
# if(determinant(ginverse[[i]])$sign<=0){
# stop(paste(names(ginverse)[i], "ginverse is not positive definite"))
# }
#TODO: Now, check/turn ginverse list into sparse matrix (so that can allow ginverse elements to be triplet form
#XXX For now, make sure ginverse is "dgCMatrix" - consider dsCMatrix to save space?
if(class(ginverse[[i]]) != "dgCMatrix"){
stop(paste(names(ginverse)[i], "ginverse of", class(ginverse[[i]]), "must be a 'dgCMatrix' class"))
}
## Need to decide on "dgCMatrix" or "dsCMatrix"...
#FIXME: Make correspond to how previous function makes factors/checks levels
## Need to change data either in call/function environment of the random effects model frame (gf)
#below line: TEMPORARY to get factor levels in correct order so Z formed correctly
data[,names(ginverse)[i]] <- factor(data[,names(ginverse)[i]], levels=rownames(ginverse[[i]]))
}
}
#####
# Make list of G and R elements as well as list of Zs (just for G effects?).
## Construction of MMEs or likelihood function can go element by element in a list as the rows and columns
## May need/be able to get rid of much of what is below and the model frame parts...
#TODO: strip G formula of covariance functions (e.g., idh(), us(), etc.)
#TODO: how to deal with random regression
gForm <- random
gf$formula <- gForm
#TODO: how handle missing values in random effects?
# do I need the response variable so that the random effects below get cut to only
## the effects that have a response variable??? 21 Oct 2015, following Mrode ch. 3 YES
### INCLUDE levels in Z that do not have y observation as columns, but not rows
### see how asreml's na.omit.Y creates Z...
###XXX See MCMCglmm's `buildZ()`, line 115 of v2.21 - drop levels unless Va term.
#### Should I extend to any ginverse term (so V_dominance, etc.)
#TODO: See MCMCglmm's `buildZ()` - why create t(Z)Z directly??
Gnames <- strsplit(deparse(gForm[[-1]]), split = " \\+ ")[[1]]
nG <- length(Gnames)
#TODO: logDetG as a simple vector or scalar (1 element) that is added to!
Zg <- listGeninv <- logDetG <- vector("list", length = nG)
names(Zg) <- names(listGeninv) <- names(logDetG) <- Gnames
for(g in 1:nG){
ggf <- gf # temporary gf
#XXX Does including response in `reformulate()` work when multivariate?
ggf$formula <- reformulate(names(Zg)[g], response = cl$formula[[2]], intercept = FALSE)
# retain levels in ginverse if one is associated, else drop unused levels
if(!is.null(ginverse) && Gnames[g] %in% names(ginverse)){
Ggdata <- data
# index in `ginverse` when not every random term has a ginverse
ginvIndex <- which(names(ginverse) == Gnames[g])
levels(Ggdata[, Gnames[g]]) <- union(ginverse[[ginvIndex]]@Dimnames[[1]], levels(Ggdata[, Gnames[g]]))
ggf$data <- Ggdata
ggf$drop.unused.levels <- FALSE
ggf <- eval.parent(ggf) # G[g] model frame
gt <- attr(ggf, "terms")
Zg[[g]] <- sparse.model.matrix(gt, ggf,
transpose = FALSE,
drop.unused.levels = FALSE, #TODO correct?
row.names = TRUE)
listGeninv[[g]] <- ginverse[[ginvIndex]]
# calculate log(det(G)) from geninv; log(det(G)) = -1*log(det(G^-1))
#TODO: make check to see if ginverse[[ginvIndex]] has attribute=logdet (from nadiv)
logDetG[[g]] <- -1 * determinant(ginverse[[ginvIndex]], logarithm = TRUE)$modulus[1]
} else{
Zg[[g]] <- sparse.model.matrix(ggf$formula, eval.parent(ggf),
transpose = FALSE,
drop.unused.levels = TRUE, #TODO correct?
row.names = TRUE)
#XXX Might want to do this in c++, see cs_kroneckerI
#TODO check `n = ` value!?
## listGeninv need to be same dimensions as crossprod (in W) when making MMA
listGeninv[[g]] <- Diagonal(x = 1, n = ncol(Zg[[g]]))
logDetG[[g]] <- 0
}
}
#FIXME better check for missing values or handling of missing values
## e.g., What does MCMCglmm do?
nrowZi <- sapply(seq(length(Zg)), FUN = function(g){nrow(Zg[[g]])})
if(any(nrowZi != ny)){
missingZi <- which(nrowZi != ny)
stop(cat("Missing value(s) for", names(Zg)[missingZi], "\n"))
}
} else{
Zg <- listGeninv <- logDetG <- NULL
nG <- 0
}
structure(list(y = y, ny = ny, ncy = ncy,
X = X, nb = ncol(X),
Zr = Zr,
Zg = Zg, nG = nG, listGeninv = listGeninv, logDetG = logDetG),
class = "grModMats")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.