Nothing
library("testthat")
library("lme4")
library("lattice")
testLevel <- if (nzchar(s <- Sys.getenv("LME4_TEST_LEVEL"))) as.numeric(s) else 1
## use old (<=3.5.2) sample() algorithm if necessary
if ("sample.kind" %in% names(formals(RNGkind))) {
suppressWarnings(RNGkind("Mersenne-Twister", "Inversion", "Rounding"))
}
do.plots <- TRUE
L <- load(system.file("testdata/lme-tst-fits.rda",
package="lme4", mustWork=TRUE))
if (getRversion() > "3.0.0") {
## saved fits are not safe with old R versions
gm1 <- fit_cbpp_1
fm1 <- fit_sleepstudy_1
fm2 <- fit_sleepstudy_2
fm3 <- fit_penicillin_1
fm4 <- fit_cake_1
} else {
gm1 <- glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
data = cbpp, family = binomial)
fm1 <- lmer(Reaction ~ Days + (1|Subject), sleepstudy)
fm2 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy)
fm3 <- lmer(diameter ~ (1|plate) + (1|sample), Penicillin)
fm4 <- lmer(angle ~ temp + recipe + (1 | replicate), data=cake)
}
if (testLevel>1) {
context("predict")
test_that("fitted values", {
p0 <- predict(gm1)
p0B <- predict(gm1, newdata=cbpp)
expect_equal(p0, p0B, tolerance=2e-5) ## ? not sure why high tolerance necessary ? works OK on Linux/R 3.5.0
## fitted values, unconditional (level-0)
p1 <- predict(gm1, re.form=NA)
expect_true(length(unique(p1))==length(unique(cbpp$period)))
## fitted values, random-only
p1R <- predict(gm1, random.only=TRUE)
expect_equal(p1+p1R,p0)
if (do.plots) matplot(cbind(p0,p1),col=1:2,type="b")
## neither fixed nor random -- all zero
expect_equal(unique(predict(gm1,re.form=NA,random.only=TRUE)),0)
})
test_that("predict with newdata", {
newdata <- with(cbpp, expand.grid(period=unique(period),herd=unique(herd)))
## new data, all RE
p2 <- predict(gm1,newdata)
## new data, level-0
p3 <- predict(gm1,newdata, re.form=NA)
p3R <- predict(gm1,newdata, random.only=TRUE)
expect_equal(p3+p3R,p2)
if (do.plots) matplot(cbind(p2,p3),col=1:2,type="b")
})
test_that("predict on response scale", {
p0 <- predict(gm1)
p5 <- predict(gm1,type="response")
expect_equal(p5, plogis(p0))
})
test_that("predict with newdata and RE", {
newdata <- with(cbpp,expand.grid(period=unique(period),herd=unique(herd)))
## explicitly specify RE
p2 <- predict(gm1,newdata)
p4 <- predict(gm1,newdata, re.form=~(1|herd))
expect_equal(p2, p4)
})
test_that("effects of new RE levels", {
newdata <- with(cbpp, expand.grid(period=unique(period), herd=unique(herd)))
newdata2 <- rbind(newdata,
data.frame(period=as.character(1:4), herd=rep("new",4)))
expect_error(predict(gm1, newdata2), "new levels detected in newdata: new")
newdata3 <- rbind(newdata,
data.frame(period=as.character(1),
herd = paste0("new", 1:50)))
expect_error(predict(gm1, newdata3),
"new levels detected in newdata: new1, new2,")
p2 <- predict(gm1, newdata)
p6 <- predict(gm1, newdata2, allow.new.levels=TRUE)
expect_equal(p2, p6[1:length(p2)]) ## original values should match
## last 4 values should match unconditional values
expect_true(all(tail(p6,4) ==
predict(gm1, newdata=data.frame(period=factor(1:4)), re.form=NA)))
})
test_that("multi-group model", {
## fitted values
p0 <- predict(fm3)
expect_equal(head(round(p0,4)),
c(`1` = 25.9638, `2` = 22.7663, `3` = 25.7147, `4` = 23.6799,
`5` = 23.7629, `6` = 20.773))
## fitted values, unconditional (level-0)
p1 <- predict(fm3, re.form=NA)
expect_equal(unique(p1),22.9722222222251)
if (do.plots) matplot(cbind(p0,p1),col=1:2,type="b")
})
test_that("multi-group model with new data", {
newdata <- with(Penicillin,expand.grid(plate=unique(plate),sample=unique(sample)))
## new data, all RE
p2 <- predict(fm3, newdata)
## new data, level-0
p3 <- predict(fm3, newdata, re.form=NA)
## explicitly specify RE
p4 <- predict(fm3, newdata, re.form= ~(1|plate)+(~1|sample))
p4B <- predict(fm3, newdata, re.form= ~(1|sample)+(~1|plate)) ## ****
expect_equal(p2,p4)
expect_equal(p4,p4B)
p5 <- predict(fm3,newdata, re.form=~(1|sample))
p6 <- predict(fm3,newdata, re.form=~(1|plate))
if (do.plots) matplot(cbind(p2,p3,p5,p6),type="b",lty=1,pch=16)
})
test_that("random-slopes model", {
p0 <- predict(fm2)
p1 <- predict(fm2, re.form=NA)
## linear model, so results should be identical patterns but smaller --
## not including intermediate days
newdata <- with(sleepstudy,expand.grid(Days=range(Days),Subject=unique(Subject)))
newdata$p2 <- predict(fm2,newdata)
newdata$p3 <- predict(fm2,newdata, re.form=NA)
newdata$p4 <- predict(fm2,newdata, re.form=~(0+Days|Subject))
newdata$p5 <- predict(fm2,newdata, re.form=~(1|Subject))
## reference values from an apparently-working run
refval <- structure(
list(Days = c(0, 9, 0, 9, 0, 9),
Subject = structure(c(1L, 1L, 2L, 2L, 3L, 3L),
.Label = c("308", "309", "310", "330", "331", "332",
"333", "334", "335", "337", "349", "350",
"351", "352", "369", "370", "371", "372"), class = "factor"),
p2 = c(253.663652396798, 430.66001930835, 211.006415533628, 227.634788908917, 212.444742696829, 257.61053840953),
p3 = c(251.405104848485, 345.610678484848, 251.405104848485, 345.610678484848, 251.405104848485, 345.610678484848),
p4 = c(251.405104848485, 428.401471760037, 251.405104848485, 268.033478223774, 251.405104848485, 296.570900561186),
p5 = c(253.663652396798, 347.869226033161, 211.006415533628, 305.211989169991, 212.444742696829, 306.650316333193)),
out.attrs =
list(dim = c(Days = 2L, Subject = 18L),
dimnames = list(
Days = c("Days=0", "Days=9"),
Subject = c("Subject=308", "Subject=309", "Subject=310", "Subject=330", "Subject=331", "Subject=332",
"Subject=333", "Subject=334", "Subject=335", "Subject=337", "Subject=349", "Subject=350",
"Subject=351", "Subject=352", "Subject=369", "Subject=370", "Subject=371", "Subject=372")) ),
row.names = c(NA, 6L), class = "data.frame")
expect_equal(head(newdata), refval, tol=5e-7)
})
test_that("predict and plot random slopes", {
tmpf <- function(data,...) {
data$Reaction <- predict(fm2,data,...)
if (do.plots) xyplot(Reaction~Days,group=Subject,data=data,type="l")
return(unname(head(round(data$Reaction,3))))
}
expect_equal(tmpf(sleepstudy),c(253.664, 273.33, 292.996, 312.662, 332.329, 351.995))
expect_equal(tmpf(sleepstudy, re.form=NA), c(251.405, 261.872, 272.34, 282.807, 293.274, 303.742))
expect_equal(tmpf(sleepstudy, re.form= ~(0+Days|Subject)),
c(251.405, 271.071, 290.738, 310.404, 330.07, 349.736))
expect_equal(tmpf(sleepstudy, re.form= ~(1|Subject)),
c(253.664, 264.131, 274.598, 285.066, 295.533, 306))
})
test_that("fewer random effect levels than original", {
## from 'Colonel Triq'
summary(fm4)
## replicate 1 only appears in rows 1:18.
## rownames(cake[cake$replicate==1,])
predict(fm4, newdata=cake[-1:-17,], re.form=~ (1 | replicate))
predict(fm4, newdata=cake[-1:-18,], re.form=NA)
predict(fm4, newdata=cake[-1:-18,], re.form=~ (1 | replicate))
predict(fm4, newdata=cake[-1:-18,], re.form=~ (1 | replicate), allow.new.levels=TRUE)
##
p0 <- predict(fm1,newdata=data.frame(Days=6,Subject=c("308","309")))
p1 <- predict(fm1,newdata=data.frame(Days=rep(6,4),
Subject=c("308","309")))
expect_equal(rep(unname(p0),2),unname(p1))
p2 <- predict(fm1,newdata=data.frame(Days=6,Subject="308"))
nd <- data.frame(Days=6,
Subject=factor("308",levels=levels(sleepstudy$Subject)))
p3 <- predict(fm1,newdata=nd)
expect_equal(p2,p3)
expect_equal(p2,p0[1])
})
test_that("only drop columns when using new data", {
## Stack Overflow 34221564:
## should only drop columns from model matrix when using *new* data
## NB: Fit depends on optimizer somewhat: "nloptwrap" is really better than "bobyqa"
library(splines)
sleep <- sleepstudy #get the sleep data
set.seed(1234567)
sleep$age <- as.factor(sample(1:3,length(sleep),rep=TRUE))
form1 <- Reaction ~ Days + ns(Days, df=4) +
age + Days:age + (Days | Subject)
m4 <- lmer(form1, sleep) # fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
expect_lte(REMLcrit(m4), 1713.171) # FIXME !? why this regression?? had 1700.6431; "bobyqa" gave 1713.171
expect_equal(unname(head(predict(m4, re.form=NA))),
c(255.203, 259.688, 265.71, 282.583, 294.784, 304.933),
tolerance = 0.008)
})
test_that("only look for columns that exist in re.form", {
## GH 457
set.seed(101)
n <- 200
dd <- data.frame(x=1:n,
f=factor(rep(1:10,n/10)),
g=factor(rep(1:20,each=n/20)),
h=factor(rep(1:5,n/5)),
y=rnorm(n))
m1 <- lmer(y~1 + f + (1|h/f) + (poly(x,2)|g), data=dd, control=lmerControl(calc.derivs=FALSE))
expect_equal(unname(predict(m1,re.form= ~1 | h/f, newdata=dd[1,])), 0.14786, tolerance=1e-4)
expect_equal(unname(predict(m1,re.form= ~poly(x,2) | g, newdata=dd[1,])),
0.1533, tolerance=.001)
## *last* RE not included (off-by-one error)
m1B <- lmer(y~1 + f + (1|g) + (1|h), data=dd, control=lmerControl(calc.derivs=FALSE))
expect_equal(unname(predict(m1B,re.form=~(1|g),newdata=data.frame(f="1",g="2"))),0.1512895,tolerance=1e-5)
set.seed(101)
n <- 100
xx <- c("r1", "r2", "r3", "r4", "r5")
xxx <- c("e1", "e2", "e3")
p <- 0.3
School <- factor(sample(xxx, n, replace=TRUE), levels=xxx, ordered=FALSE)
Rank <- factor(sample(xx, n, replace=TRUE), levels=xx, ordered=FALSE)
df1 <- data.frame(
ID = as.integer(runif(n, min = 1, max = n/7)),
xx1 = runif(n, min = 0, max = 10),
xx2 = runif(n, min = 0, max = 10),
xx3 = runif(n, min = 0, max = 10),
School,
Rank,
yx = as.factor(rbinom(n, size = 1, prob = p))
)
df1 <- df1[order(df1$ID, decreasing=FALSE),]
mm2 <- glmer(yx ~ xx1 + xx2 + xx3 + Rank + (1 | ID) + (1 | School / Rank),
data = df1,
family = "binomial",control = glmerControl(calc.derivs =FALSE))
n11 <- data.frame(School= factor("e1", levels = levels(df1$School),ordered=FALSE),
Rank = factor("r1", levels = levels(df1$Rank), ordered=FALSE),
xx1=8.58, xx2=8.75, xx3=7.92)
expect_equal(unname(predict(mm2, n11, type="response",re.form= ~(1 | School / Rank))),
0.1174628,tolerance=1e-5)
## bad factor levels
mm3 <- update(mm2, . ~ . - (1|ID))
n12 = data.frame(School="e3",Rank="r2",xx1=8.58,xx2=8.75,xx3=7.92)
expect_equal(unname(predict(mm3, n12, type="response")),0.1832894,tolerance=1e-5)
## GH #452
## FIXME: would like to find a smaller/faster example that would test the same warning (10+ seconds)
set.seed(101)
n <- 300
df2 <- data.frame(
xx1 = runif(n, min = 0, max = 10),
xx2 = runif(n, min = 0, max = 10),
xx3 = runif(n, min = 0, max = 10),
School = factor(sample(xxx, n,replace=TRUE)),
Rank = factor(sample(xx, n, replace=TRUE)),
yx = as.factor(rbinom(n, size = 1, prob = p))
)
mm4 <- suppressWarnings(glmer(yx ~ xx1 + xx2 + xx3 + Rank + (Rank|School),
data = df2,
family = "binomial",control = glmerControl(calc.derivs =FALSE)))
## set tolerance to 0.1 (!) to pass win-builder on R-devel/i386 (only:
## tolerance = 3e-5 is OK for other combinations of (R-release, R-devel) x (i386,x64)
expect_equal(unname(predict(mm4, n11, type="response")), 0.2675081, tolerance=0.1)
})
test_that("simulation works with non-factor", {
set.seed(12345)
dd <- data.frame(a=gl(10,100), b = rnorm(1000))
test2 <- suppressMessages(simulate(~1+(b|a), newdata=dd, family=poisson,
newparams= list(beta = c("(Intercept)" = 1),
theta = c(1,1,1))))
expect_is(test2,"data.frame")
})
set.seed(666)
n <- 500
df <- data.frame(y=statmod::rinvgauss(n, mean=1, shape=2),
id=factor(1:20))
model_fit <- glmer(y ~ 1 + (1|id),
family = inverse.gaussian(link = "inverse"),
data = df,
control=glmerControl(check.conv.singular="ignore"))
test_that("simulation works for inverse gaussian", {
expect_equal(mean(simulate(model_fit)[[1]]), 1.02704392575914,
tolerance=1e-5)
})
test_that("simulation complains appropriately about bad family", {
ig <- inverse.gaussian()
ig$family <- "junk"
model_fit2 <- glmer(y ~ 1 + (1|id),
family = ig,
data = df,
control=glmerControl(check.conv.singular="ignore"))
expect_error(simulate(model_fit2),"simulation not implemented for family")
})
test_that("prediction from large factors", {
set.seed(101)
N <- 50000
X <- data.frame(y=rpois(N, 5), obs=as.factor(1:N))
fm <- glmer(y ~ (1|obs), family="poisson", data=X,
control=glmerControl(check.conv.singular="ignore"))
## FIXME: weak tests. The main issue here is that these should
## be reasonably speedy and non-memory-hogging, but those are
## hard to test portably ...
expect_is(predict(fm, re.form=~(1|obs)), "numeric")
expect_is(predict(fm, newdata=X), "numeric")
})
test_that("prediction with gamm4", {
if (suppressWarnings(requireNamespace("gamm4"))) {
## loading gamm4 warngs "replacing previous import 'Matrix::update' by 'lme4::update' when loading 'gamm4'"
## from ?gamm4
set.seed(0)
## simulate 4 term additive truth
dat <- mgcv::gamSim(1,n=400,scale=2,verbose=FALSE)
## Now add 20 level random effect `fac'...
dat$fac <- fac <- as.factor(sample(1:20,400,replace=TRUE))
dat$y <- dat$y + model.matrix(~fac-1)%*%rnorm(20)*.5
br <- gamm4::gamm4(y~s(x0)+x1+s(x2),data=dat,random=~(1|fac))
expect_warning(ss <- simulate(br$mer), "modified RE names")
expect_equal(dim(ss), c(400,1))
}
})
test_that("prediction with spaces in variable names", {
cbpp$`silly period` <- cbpp$period
m <- glmer(cbind(incidence,size-incidence) ~ `silly period` + (1|herd),
family=binomial, data=cbpp)
expect_equal(round(head(predict(m)),3),
c(`1` = -0.809, `2` = -1.801,
`3` = -1.937, `4` = -2.388, `5` = -1.697, `6` = -2.689))
})
if (requireNamespace("statmod")) {
test_that("simulate with rinvgauss", {
dd <- data.frame(f=factor(rep(1:20,each=10)))
dd$y <- simulate(~1+(1|f),
seed=101,
family=inverse.gaussian,
newdata=dd,
## ?? gives NaN (sqrt(eta)) for low beta ?
newparams=list(beta=5,theta=1,sigma=1))[[1]]
suppressMessages(m <- glmer(y~1+(1|f),
family=inverse.gaussian,
data=dd))
set.seed(101)
expect_equal(head(unlist(simulate(m))),
c(sim_11 = 0.451329390087728, sim_12 = 0.629516371309772,
sim_13 = 0.481236633500098,
sim_14 = 0.170060386109077, sim_15 = 0.258742371516342,
sim_16 = 0.949617440586848))
})
}
## GH 631
test_that("sparse contrasts don't mess up predict()", {
dd <- expand.grid(f = factor(1:101), rep1 = factor(1:2), rep2 = 1:2)
dd$y <- suppressMessages(simulate(~1 + (rep1|f),
seed = 101,
newdata = dd,
newparams = list(beta = 1,
theta = rep(1,3),
sigma = 1),
family = gaussian)[[1]])
m1 <- lmer( y ~ 1 + (1|f), data = dd)
p1 <- predict(m1)
p2 <- predict(m1, newdata = dd)
expect_identical(p1, p2)
})
} ## testLevel > 1
test_that("prediction with . in formula + newdata",
{
set.seed(101)
mydata <- data.frame(
groups = rep(1:3, each = 100),
x = rnorm(300),
dv = rnorm(300)
)
train_subset <- sample(1:300, 300 * .8)
train <- mydata[train_subset,]
test <- mydata[-train_subset,]
mod <- lmer(dv ~ . - groups + (1 | groups), data = train)
p1 <- predict(mod, newdata = test)
mod2 <- lmer(dv ~ x + (1 | groups), data = train)
p2 <- predict(mod2, newdata = test)
expect_identical(p1, p2)
})
test_that("simulate with a factor with one level", {
set.seed(1241)
y <- factor(c(rep(0,1000), 1, rep(0,1000), 1), levels = c("0","1"))
x <- rep(c("A","B"),each = 1001)
mod <- glmer(y ~ 1 + (1|x),family = binomial,
control = glmerControl(check.conv.singular = "ignore"))
s <- simulate(mod,newdata = data.frame(x = "A"), nsim = 10)
## very low mean, all simulated values zero
expect_true(all(s == 0))
})
test_that("prediction standard error", {
# note that predict.lm returns a list with
# fit, se.fit, df, residual.scale
mod1 <- lmer(Petal.Width ~ Sepal.Length + (1 | Species), iris)
p1 <- predict(mod1, se.fit = TRUE)
p2 <- predict(mod1, se.fit = TRUE, newdata = iris)
p3 <- predict(mod1, se.fit = TRUE, re.form = NA, newdata = iris)
p4 <- predict(mod1, se.fit = TRUE, re.form = NA)
p5 <- predict(mod1, se.fit = TRUE, re.form = ~(1 | Species))
p6 <- predict(mod1, se.fit = TRUE, re.form = ~(1 | Species), newdata = iris)
p7 <- predict(mod1, se.fit = TRUE, newdata = iris, random.only = TRUE)
p8 <- predict(mod1, se.fit = TRUE, re.form = ~(1 | Species), random.only = TRUE)
p9 <- predict(mod1, se.fit = TRUE, re.form = ~(1 | Species), newdata = iris, random.only = TRUE)
expect_equal(unname(head(p1$se.fit)),
c(0.0271816400250223, 0.0272298862268211, 0.0286188379907626,
0.0297645467444413, 0.0270330515271627, 0.0295876265523127))
# re.form = NA
expect_equal(unname(head(p3$se.fit)),
c(0.451147865048879, 0.451497971849052, 0.451930732595154, 0.452178035807842,
0.451312573619068, 0.450778089845166))
# random.only may need checking -- tolerance maybe too high for this?
expect_equal(unname(p8$se.fit),
rep(0.451569712647126, 150), tolerance = 0.001)
expect_equal(p1, p2)
expect_equal(p3, p4)
expect_equal(p1, p5)
expect_equal(p1, p6)
expect_equal(p7, p8)
expect_equal(p7, p9)
})
test_that("NA + re.form = NULL + simulate OK (GH #737)", {
d <- lme4::sleepstudy
d$Reaction[1] <- NA
fm1 <- lmer(Reaction ~ Days + (Days | Subject), d)
ss <- simulate(fm1, seed = 101, re.form = NULL)[[1]]
expect_equal(c(head(ss)),
c(266.139101412856, 308.148180398426,
296.081377893883, 338.367909016478,
360.294339946214, 401.91050930589))
ss0 <- simulate(fm1, seed = 101, re.form = NA)[[1]]
expect_equal(length(ss), length(ss0))
## correct dimensions with na.exclude as well ?
fm2 <- update(fm1, na.action = na.exclude)
ss2 <- simulate(fm2, seed = 101, re.form = NULL)[[1]]
ss3 <- simulate(fm2, seed = 101, re.form = NA)[[1]]
expect_equal(length(ss2), nrow(d))
expect_equal(length(ss3), nrow(d))
})
## GH 691 parts 1 and 2
test_that("predict works with factors in left-out REs", {
set.seed(101)
df2 <- data.frame(yield = rnorm(100),
lc = factor(rep(1:2, 50)),
g1 = factor(rep(1:10, 10)),
g3 = factor(rep(1:10, each = 10)))
m1B <- lmer(yield ~ 1 + ( 1 | g1) + (lc |g3), data = df2,
control = lmerControl(check.conv.singular = "ignore"))
expect_equal(head(predict(m1B, re.form = ~(1|g1)),1),
c(`1` = 0.146787496519465),
tolerance = 1e-4)
})
test_that("predict works with dummy() in left-out REs", {
set.seed(101)
df3 <- data.frame(v1 = rnorm(100),
v3 = factor(rep(1:10, each = 10)),
v4 = factor(rep(1:2, each = 50)),
v5 = factor(rep(1:10, 10)))
m1C <- lmer(v1~(1|v3) + (0+dummy(v4,"1")|v5),
data = df3,
control=lmerControl(check.nobs.vs.nlev="ignore",
check.nobs.vs.nRE="ignore",
check.conv.singular = "ignore"))
expect_equal(head(predict(m1C, re.form = ~1|v3), 1),
c(`1` = -0.035719520719991))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.