# Formula Manipulation ----
# Extract RHS of formula, also use for RHS of bar (e.g. 1 + x | u:v returns u:v)
getRHS <- function(formula) {
RHS <- formula[[3]]
return(RHS)
}
# Extract LHS of formula, also use for LHS of bar (in ex above returns 1 + x)
getLHS <- function(formula) {
LHS <- formula[[2]]
return(LHS)
}
# Recursive function for extracting fixed portion of RHS/formula, based on lme4 source code
RHSfixed <- function(RHS) {
if (!("|" %in% all.names(RHS))) return(RHS) #if no bars, return RHS as-is
if (RHS[[1]] == as.name("|")) return(NULL) #if it's an RE term, return as NULL
if (length(RHS) == 2) {
temp <- RHSfixed(RHS[[2]])
if (is.null(temp)) return(NULL)
RHS[[2]] <- temp
return(RHS)
}
rhs2 <- RHSfixed(RHS[[2]])
rhs3 <- RHSfixed(RHS[[3]])
if (is.null(rhs2)) return(rhs3)
if (is.null(rhs3)) return(rhs2)
RHS[[2]] <- rhs2
RHS[[3]] <- rhs3
return(RHS)
}
# Recursive function for extract RHS random intercept terms, based on lme4 source code. ONLY works on RHS
RHSrandom <- function(RHS) {
if (is.name(RHS)) return(NULL)
if (RHS[[1]] == as.name("(")) return(RHSrandom(RHS[[2]]))
if (RHS[[1]] == as.name("|")) return(RHS)
return(c(RHSrandom(RHS[[2]]), RHSrandom(RHS[[3]])))
}
# Data preparation ----
# Function for preparing FE component and DV
FEdat <- function(data, y=NULL, fe_formula) {
mf <- stats::model.frame(fe_formula, data) #need to add na.action at some point - understand how it intersects with missingness in RE groups
X <- stats::model.matrix(fe_formula, mf) #need to add contrasts at some point - also work out offsets and weights
if (is.null(y)) {
temp <- stats::model.response(mf)
n <- length(temp)
if (!is.factor(temp)) temp <- as.factor(temp)
lev <- levels(temp)
M <- length(lev)
y <- matrix(integer(n*M), nrow=n, ncol=M)
for (m in 1:M) {
y[,m] <- ifelse(temp == lev[m], 1L, 0L)
}
colnames(y) <- lev
} else {
y <- y[attr(mf, "na.action"), ] #need to understand how this will change for different na.actions
ymissing <- apply(y, 1, function(x) any(is.na(x)))
y <- y[ymissing,]
mf <- mf[ymissing,] # this will break the use of attr(mf, "na.action")
X <- X[ymissing,]
}
out <- list(mf, X, y)
names(out) <- c("mf", "X", "y")
return(out)
}
# Function for generating data blocks for REs
# - for now random intercepts only
# - next will add cross-classified REs
# - random slopes (correlated and uncorrelated) a long way off
REdat <- function(data, re_terms) {
# Currently assuming only random intercepts, so not processing LHS atm
# Extract RHS, convert to formula
groupNames <- as.character(sapply(re_terms, logitPlus:::getRHS))
groups <- paste0(groupNames, collapse=" + " )
# Convert desired variables to model frame
ref <- stats::model.frame(as.formula(paste0("~ ", groups)), data)
n <- nrow(ref)
# Column by column convert to block matrix
A <- ncol(ref) # will need to come back to this once nesting and slopes are added
Z <- vector("list", A)
for (i in 1:A) {
vec <- ref[[i]]
if (!is.factor(vec)) vec <- as.factor(vec)
lev <- levels(vec)
nlev <- length(lev)
Z[[i]] <- matrix(integer(n * nlev), nrow=n, ncol=nlev)
colnames(Z[[i]]) <- lev
rownames(Z[[i]]) <- rownames(ref)
for (j in 1:nlev) {
Z[[i]][,j] <- ifelse(vec == lev[j], 1L, 0L)
}
}
names(Z) <- groupNames
out <- list(ref, Z)
names(out) <- c("ref", "Z")
return(out) # in the future this will need to return meta-info such as whether it's an intercept or slope, etc
}
# Function for finalising missingness relations - need to come back to this once using na.action
data_filter <- function(fe_data, re_data) {
feindex <- rownames(fe_data$mf) %in% rownames(re_data$ref)
reindex <- rownames(re_data$ref) %in% rownames(fe_data$mf)
fe_data$mf <- fe_data$mf[feindex,]
fe_data$X <- fe_data$X[feindex,]
fe_data$y <- fe_data$y[feindex,]
re_data$ref <- re_data$ref[reindex,]
for (i in 1:length(re_data$Z)) re_data$Z[[i]] <- re_data$Z[[i]][reindex,]
model_data <- cbind(fe_data$mf, re_data$ref)
out <- list(fe_data, re_data, model_data)
names(out) <- c("fe_data", "re_data", "model_data")
return(out)
}
# Formula-data function ----
# Function that takes data, optional y matrix, formula, na.action, contrasts, weights, etc
# parses formula to return model frame(s) required for estimation using functions above
form_to_data <- function(data, y=NULL, formula) {
fe_formula <- logitPlus:::RHSfixed(formula)
re_terms <- logitPlus:::RHSrandom(logitPlus:::getRHS(formula))
fe_data <- logitPlus:::FEdat(data, y, fe_formula)
has_REs <- !is.null(re_terms)
if (has_REs) re_data <- logitPlus:::REdat(data, re_terms)
if (has_REs) {
mdata <- logitPlus:::data_filter(fe_data, re_data)
fe_data <- mdata$fe_data
re_data <- mdata$re_data
model_data <- mdata$model_data
y <- fe_data$y
X <- fe_data$X
Z <- re_data$Z
} else {
model_data <- fe_data$mf
y <- fe_data$y
X <- fe_data$X
}
out <- list(model_data, y, X)
if (has_REs) out <- append(out, list(Z))
onames <- c("model_data", "y", "X")
names(out) <- if (has_REs) c(onames, "Z") else onames
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.