#### Simulated dataset ####
# dataset size
n <- 1e3
# a covariate
x <- runif(n)
# a factor with 3 levels and associated values
f3 <- factor(sample(letters[1:3], n, replace = TRUE))
f3.val <- -1:1
# an ordered factor with 4 levels and associated values
# following a linear trend
o4 <- ordered(sample(month.abb[1:4], n, replace = TRUE),
levels = month.abb[1:4])
o4.val <- 1:4
# residual variance
sigma2 <- .5
dat <- data.frame(
y = x + f3.val[f3] + o4.val[o4] + rnorm(n, sd = sqrt(sigma2)),
x, f3, o4)
# considered as random effects, the factors f3 and o4 have variances
sigma2_f3 <- var(f3.val)
sigma2_o4 <- var(o4.val)
# # I recover the right coefficients with a standard lm
# lmres <- lm(y ~ 0 + x + f3 + o4, data = dat)
# summary(lmres)
#### Context: Linear Models ####
context("Linear Models")
lm_models <- list(y ~ 0 + f3,
y ~ 0 + x,
y ~ 0 + o4,
y ~ 0 + x + f3,
y ~ 0 + x + o4,
y ~ 0 + f3 + o4,
y ~ 0 + x + f3 + o4)
# Run REML and lm and save estimates and MLEs
run_model <- function(m, data = dat, method) {
res.reml <- try(suppressMessages(remlf90(m, data = data, method = method)))
res.lm <- lm(update(m, ~ . -1), data = data)
return(list(res.reml, res.lm))
}
# Compare progsf90 and lm results
run_expectations <- function(m, data = dat, method) {
res <- run_model(m, data, method)
# Expectations will depend on the number of covariates included in the model
# For one single covariate, progsf90 estimates should match exactly the
# MLE estimates from lm() with the intercept removed.
# For more than one covariate, the model paremeterizations are different
# and I can't compare the coefficients directly. Rather, I compare the
# fitted values.
# TODO: I can compare variance componens as well.
n.cov <- length(attr(terms(m), 'term.labels'))
if( n.cov == 1) {
# equal fixed effects estimates
pf90.beta <- coef(res[[1]])
lm.beta <- coef(res[[2]])
expect_that(pf90.beta,
equals(lm.beta, check.attributes = FALSE))
# equal standard errors (with more tolerance)
pf90.se <- drop(sapply(fixef(res[[1]]), attr, "se"))
lm.se <- coef(summary(res[[2]]))[, 'Std. Error']
expect_that(pf90.se,
equals(lm.se, check.attributes = FALSE, tolerance = 1e-05))
# (almost) equal residual variance estimates
pf90.sigmahat <- sqrt(as.numeric(res[[1]]$var[1, 1]))
lm.sigmahat <- summary(res[[2]])$sigma
expect_that(pf90.sigmahat, equals(lm.sigmahat, tolerance = 1e-03))
} else {
# equal fitted values
pf90.fitted <- fitted(res[[1]])
lm.fitted <- fitted(res[[2]])
expect_that(pf90.fitted, equals(lm.fitted))
}
}
test_that("Character variables are treated as factors", {
res_em.f3_char <- try(
suppressMessages(
remlf90(
y ~ f3,
data = within(dat, f3 <- as.character(f3)),
method = 'em')
)
)
res_ai.f3_char <- try(
suppressMessages(
remlf90(
y ~ f3,
data = within(dat, f3 <- as.character(f3)),
method = 'ai')
)
)
expect_that(is.factor(res_em.f3_char$mf$f3), is_true())
expect_that(is.factor(res_ai.f3_char$mf$f3), is_true())
})
test_that("remlf90() estimates matches lm()'s using EM", {
lapply(lm_models, run_expectations, method = 'em')
})
test_that("airemlf90() estimates matches lm()'s using AI", {
lapply(lm_models, run_expectations, method = 'ai')
})
# Standard Errors
# Notice that while the MLE estimates of beta are equal for continuous
# variables, the standard errors given by aireml and lm are quite different:
# 9.54 and 0.1048 respectively
#### Context: Linear Mixed Models ####
context("Linear Mixed Models")
suppressPackageStartupMessages(
require(lme4)
)
# Re-use lm_models terms as fixed and the available
# factors as random
lm2lmm <- function(x) {
fixed = x
random = NULL
isF <- sapply(dat, is.factor)
freeF <- names(isF)[which(isF)]
usedF <- match(attr(terms(x), 'term.labels'), names(isF)[which(isF)])
usedF <- usedF[!is.na(usedF)]
if(length(usedF)) freeF <- freeF[-usedF]
if(length(freeF))
random = as.formula(paste('~', paste(freeF, collapse = '+')),
env = parent.frame(2))
return(list(fixed = fixed, random = random))
}
lmm_models <- lapply(lm_models, lm2lmm)
# Don't run again models with NULL random terms
lmm_models <- lmm_models[!sapply(lmm_models, function(x) is.null(x$random))]
# Run REML and lm and save estimates and MLEs
run_lmm <- function(m, data = dat, method) {
res.reml <- try(
suppressMessages(
remlf90(
fixed = m$fixed,
random = m$random,
data = data,
method = method)
)
)
fml.lme4 <- breedR:::lme4_fml(m$fixed, m$random)
res.lmm <- lmer(fml.lme4, data = data)
return(list(res.reml, res.lmm))
}
# Compare progsf90 and lme4 results
run_lmmexpectations <- function(m, data = dat, method, tol = 1e-03) {
res <- run_lmm(m, data, method)
# progsf90 estimates should match exactly the
# MLE estimates from lmer() with the intercept removed.
# TODO: compare standard errors of effects and variance components.
# # equal standard errors (with more tolerance)
# pf90.se <- coef(res[[1]])[[1]]$'s.e.'
# lm.se <- coef(summary(res[[2]]))[, 'Std. Error']
# expect_that(pf90.se,
# equals(lm.se, check.attributes = FALSE, tolerance = 1e-05))
#
# equal fixed and random effects estimates
# note that the order of the components might differ
pf90.beta <- coef(res[[1]])
ranef_order <- names(ranef(res[[1]]))
lmm.beta <- c(fixef(res[[2]]),
do.call(rbind,
ranef(res[[2]])[ranef_order])[['(Intercept)']])
expect_that(pf90.beta,
equals(lmm.beta, check.attributes = FALSE, tolerance = tol))
# equal variance components estimations
# note that the order of the components might differ
pf90.var <- res[[1]]$var[, 1]
# exclude the residual term, which is extracted separately from lmer
varnames_order <- head(names(pf90.var), n = -1L)
lmm.var <- VarCorr(res[[2]])
lmm.var <- c(sapply(lmm.var, function(x) as.numeric(x))[varnames_order],
Residual = attr(lmm.var, 'sc')^2)
expect_that(as.numeric(pf90.var),
equals(lmm.var, check.attributes = FALSE, tolerance = tol))
# equal fitted values
pf90.fitted <- fitted(res[[1]])
lmm.fitted <- fitted(res[[2]])
expect_that(pf90.fitted,
equals(lmm.fitted, check.attributes = FALSE, tolerance = tol))
# qplot(pf90.fitted, lmm.fitted) + geom_abline(intercept = 0, slope = 1)
}
# Run character conversion test for each method
test_that("Character variables in random effects are treated as factors", {
res_em.f3_char <- try(
suppressMessages(
remlf90(fixed = y ~ x, random = ~ f3,
data = within(dat, f3 <- as.character(f3)),
method = 'em')
)
)
res_ai.f3_char <- try(
suppressMessages(
remlf90(fixed = y ~ x, random = ~ f3,
data = within(dat, f3 <- as.character(f3)),
method = 'em')
)
)
expect_that(is.factor(res_em.f3_char$mf$f3), is_true())
expect_that(is.factor(res_ai.f3_char$mf$f3), is_true())
})
# Run all test for each of the models and each method
for( m in lmm_models ) {
m_str <- paste(lapply(m, deparse), collapse = ' | ')
test_that(paste("remlf90() estimates matches lmer()'s for model", m_str), {
run_lmmexpectations(m, data = dat, method = 'em')
})
test_that(paste("airemlf90() estimates matches lmer()'s for model", m_str), {
run_lmmexpectations(m, data = dat, method = 'ai')
})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.