# Internal helper functions for the art function
#
# Author: mjskay
###############################################################################
### Parses and validates model formula for art.
### Raises exception if formula does not validate (e.g. not factorial).
### Given a formula like y ~ a*b*c + (1|d) + Error(g * h) + Error(i),
### returns list with:
### fixed.only: formula with only fixed components (y ~ a+b+c)
### fixed.terms: additive formula with only variables from fixed terms and no response
### (for use with ddply) (e.g., ~ a + b + c)
### fixed.term.labels: character vector of term labels (e.g., c("a", "b", "c", "a:b", "a:c", "b:c", "a:b:c"))
### n.grouping.terms: number of grouping terms like (1|d) (e.g. 1)
### n.error.terms: number of error terms like Error(g) (e.g. 2)
### error.terms: formula with error terms extracted from within Error() (e.g. ~ g * h + i)
#' @importFrom stats terms
#' @importFrom plyr laply
parse.art.formula = function(formula) {
#extract terms from the formula
f.terms = terms(formula)
#ensure we have an independent variable and an intercept
if (attr(f.terms, "response") != 1) {
stop("Model must have exactly one dependent variable (got ", attr(f.terms, "response"), ")")
}
if (attr(f.terms, "intercept") == 0) {
stop("Model must have an intercept (got ", attr(f.terms, "intercept"), ")")
}
#unique variables in the rhs of the formula as list of quoted variables
#e.g. y ~ a*b*c + (1|d) + Error(g) -> list(quote(a), quote(b), quote(c), quote((1|d)), quote(Error(g))))
variables = as.list(attr(f.terms, "variables"))[c(-1,-2)]
#char vector of names of rhs terms and their interactions
#e.g. y ~ a*b*c + (1|d) + Error(g) -> c("a","b","c","1 | d","Error(g)","a:b","b:c","a:b:c"))
term.labels = attr(f.terms, "term.labels")
#vector of length(f.term.labels); value is the order of the interaction of the corresponding entry in term.labels
#e.g. y ~ a*b*c + (1|d) + Error(g) -> c(1,1,1,1,1,2,2,3)
term.order = attr(f.terms, "order")
#determine which variables on the rhs are grouping variables, error variables, or fixed variables
is.grouping.variable = laply(variables, function(term) as.list(term)[[1]] == quote(`|`))
is.error.variable = laply(variables, function(term) is.call(term) & as.list(term)[[1]] == quote(`Error`))
#all other variables that aren't grouping or error variables must be fixed variables
is.fixed.variable = !(is.grouping.variable | is.error.variable)
#ensure we have at least one fixed effect and are using either grouping terms or error terms but not both
if (sum(is.fixed.variable) == 0) {
stop("Model must have at least one fixed effect (0 given)")
}
if (any(is.grouping.variable) & any(is.error.variable)) {
stop("Model cannot contain both grouping terms, like (1|d), and error terms, like Error(d). Use one or the other.")
}
#get table with rows == rhs variables and cols == term labels, each cell == 1 if variable in term
variables.by.terms = attr(f.terms, "factors")[-1,,drop=FALSE] #prevent reducing to vector if only one cell
#make a version of the formula terms with only fixed effects
n.rhs.variables = length(variables)
n.rhs.terms = length(term.labels)
n.interaction.terms = n.rhs.terms - n.rhs.variables
if (n.interaction.terms < 0) {
#only happens when not factorial
stop("Model must include all combinations of interactions of fixed effects.")
}
is.fixed.term = c(is.fixed.variable, rep(TRUE, n.interaction.terms))
fixed.variables.by.terms = variables.by.terms[is.fixed.variable, is.fixed.term, drop=FALSE]
fixed.term.labels = term.labels[is.fixed.term]
fixed.term.order = term.order[is.fixed.term]
#ensure design of fixed effects portion of model has all interactions
#first, pull out the response and the main (i.e. order-1) fixed effect terms
response = formula[[2]]
#build a factorial model of all fixed effects
#e.g. y ~ a*b*c + (1|d) + Error(g) -> y ~ a*b*c
factorial.formula = eval(bquote(.(response) ~ .(Reduce(function(x,y) bquote(.(x) * .(y)), variables[is.fixed.variable]))))
environment(factorial.formula) = environment(formula)
#verify the factorial model is the same as the fixed effects in the supplied model
factorial.factors = attr(terms(factorial.formula), "factors")[-1,,drop=FALSE]
if (!all(dim(factorial.factors) == dim(fixed.variables.by.terms)) || !all(factorial.factors == fixed.variables.by.terms)) {
stop("Model must include all combinations of interactions of fixed effects.")
}
#build a formula with only fixed variables on the right-hand-side (added to each other)
#e.g. y ~ a*b*c + (1|d) + Error(g) -> ~ a + b + c
fixed.terms = eval(bquote(~ .(Reduce(function(x,y) bquote(.(x) + .(y)), variables[is.fixed.variable]))))
environment(fixed.terms) = environment(formula)
#build a formula with all Error terms extracted from Error() on the right-hand side (added to each other)
#e.g. y ~ a*b*c + (1|d) + Error(g * h) + Error(i) -> ~ g * h + i
error.terms = eval(bquote(~ .(Reduce(function(x,y) bquote(.(x) + .(y)), Map(function (v) v[[2]], variables[is.error.variable])))))
environment(error.terms) = environment(formula)
#return validated formulas
list(
fixed.only = factorial.formula,
fixed.terms = fixed.terms,
fixed.term.labels = fixed.term.labels,
n.grouping.terms = sum(is.grouping.variable),
n.error.terms = sum(is.error.variable),
error.terms = error.terms
)
}
### Given a factorial, fixed-effects-only formula,
### calculate the cell means and estimated effects
### for all responses. Returns a list of three
### data frames all indexed in parallel: data
### (the original data as determined by
### model.frame(formula, data)),
### cell.means, and estimated.effects
###
### Given some formula f and an input data frame df,
### parameters to art.estimated effects are:
### formula.terms = terms(f)
### data = model.frame(f, df)
#' @importFrom plyr ddply
art.estimated.effects = function(formula.terms, data) {
#N.B. in this method "interaction" refers to
#all 0 - n order interactions (i.e., grand mean,
#first-order/"main" effects, and 2+-order interactions)
#matrix with interactions as columns
#and the response + all first-order factors as rows,
#with each cell indicating if the first-order factor (row)
#contributes to the nth-order interaction (column)
interaction.matrix = cbind(data.frame(.grand=FALSE), attr(formula.terms, "factors") == 1)
interaction.names = colnames(interaction.matrix)
term.names = row.names(interaction.matrix)
#interaction order of each column in interaction.matrix
#(order of grand mean (first column) is 0, main effects
#are 1, n-way interactions are n)
interaction.order = c(0, attr(formula.terms, "order"))
#calculate cell means for each interaction
cell.means = data.frame(y=rep(mean(data[,1]), nrow(data)))
colnames(cell.means) = term.names[1]
data$.row = 1:nrow(data) #original row indices so we can keep rows in order when we split/combine
for (j in 2:ncol(interaction.matrix)) {
term.index = interaction.matrix[,j]
#calculate cell means
#must dervie term.formula as below (instead of just passing term.names[term.index] to ddply)
#because otherwise expressions like "factor(a)" would be converted to ~ factor(a) (instead of ~ `factor(a)`, which
#is what we want here because the expression has already been evaluated previously)
#A nicer way to do all this would be good to come up with eventually
term.formula = eval(bquote(~ .(Reduce(function(x,y) bquote(.(x) + .(y)), Map(as.name, term.names[term.index])))))
cell.mean.df = ddply(data, term.formula, function (df) {
df$.cell.mean = mean(df[,1]) #mean of response for this interaction
df
})
#put results into cell.means in the order of the original rows (so that they match up)
cell.means[[interaction.names[j]]] = cell.mean.df[order(cell.mean.df$.row), ".cell.mean"]
}
#calculate estimated effects for each interaction
estimated.effects = data.frame(y=cell.means[,1]) #estimated effect for grand mean == grand mean
colnames(estimated.effects) = term.names[1]
for (j in 2:ncol(interaction.matrix)) {
#index of which cell means (columns of cell.means)
#contribute to the estimated effect of this interaction.
#This will select any columns involving a subset of the same factors; e.g. for A:B:C it would select
#the grand mean, A, B, C, A:B, A:C, B:C, A:B:C; but not (say) A:D (since D is not in A:B:C).
cell.means.cols = colSums(interaction.matrix[interaction.matrix[,j],]) == interaction.order
#cell means contribute positively if they have the same
#order (mod 2) as this interaction and negatively otherwise
cell.means.multiplier = ifelse((interaction.order - interaction.order[j]) %% 2, -1, 1)
#calculate estimated effect
estimated.effects[[interaction.names[j]]] =
rowSums(t(t(cell.means[,cell.means.cols]) * cell.means.multiplier[cell.means.cols]))
}
estimated.effects[1] = NULL #drop first column (grand mean), not needed
list(
cell.means=cell.means,
estimated.effects=estimated.effects
)
}
## deparse1 backport
deparse0 = function(expr, collapse = " ", width.cutoff = 500L, ...) {
paste(deparse(expr, width.cutoff, ...), collapse = collapse)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.