#############################################################################
# scores: wrapper of predict to automatically use lm()'s data as newdata
#############################################################################
context("scores function")
test_that("Works like predict", {
data(nuclearplants)
pg1 <- lm(cost ~ . - pr, data=nuclearplants, subset=(pr==0))
### function as predict correctly
pred1 <- predict(pg1, newdata=nuclearplants)
scores1 <- scores(pg1, newdata=nuclearplants)
expect_equal(pred1, scores1)
### error if missing newdata without being in a model (unlike predict)
expect_error(scores(pg1))
pg2 <- lm(pr ~ cap*cost, data=nuclearplants)
pred2 <- predict(pg2, newdata=nuclearplants)
scores2 <- scores(pg2, newdata=nuclearplants)
expect_equal(pred2, scores2)
})
test_that("Works like predict with 'with' and 'attach'", {
data(nuclearplants)
pg <- lm(cost ~ . - pr, data=nuclearplants, subset=(pr==0))
### function as predict correctly
pred <- predict(pg, newdata=nuclearplants)
scores1 <- with(nuclearplants, scores(pg))
expect_equal(pred, scores1, check.attributes=FALSE)
})
test_that("Correct in a model", {
data(nuclearplants)
pg <- lm(cost ~ ., data=nuclearplants, subset=(pr==0))
suppressWarnings({
ps1 <- glm(pr ~ cap + date + t1 + bw + scores(pg), data=nuclearplants)
ps2 <- glm(pr ~ cap + date + t1 + bw + scores(pg, newdata=nuclearplants), data=nuclearplants)
ps3 <- glm(pr ~ cap + date + t1 + bw + predict(pg, newdata=nuclearplants), data=nuclearplants)
})
expect_equal(fitted(ps1), fitted(ps2), check.attributes=FALSE)
expect_equal(fitted(ps1), fitted(ps3), check.attributes=FALSE)
})
test_that("Correct in a model using 'with'", {
data(nuclearplants)
pg <- lm(cost~., data=nuclearplants, subset=(pr==0))
suppressWarnings({
ps1 <- glm(pr ~ cap + date + t1 + bw + predict(pg, newdata=nuclearplants), data=nuclearplants)
ps2 <- with(nuclearplants, glm(pr ~ cap + date + t1 + bw + scores(pg)))
})
expect_equal(fitted(ps1), fitted(ps2), check.attributes=FALSE)
})
test_that("Works in match_on", {
data(nuclearplants)
pg <- lm(cost~., data=nuclearplants, subset=(pr==0))
expect_warning({
m1 <- match_on(pr~cap + predict(pg, newdata=nuclearplants), data=nuclearplants)
m2 <- match_on(pr~cap + scores(pg), data=nuclearplants)
m3 <- with(match_on(pr~cap + scores(pg)), data=nuclearplants)
},
"rank-deficient"
)
expect_equal(m1@.Data, m2@.Data)
expect_true(all.equal(m1@.Data, m3@.Data, check.attributes=FALSE))
})
test_that("Finds variables", {
data(iris)
iris$Species <- as.numeric(iris$Species=="setosa")
mod <- lm(Sepal.Length ~ . - Species, data=iris, subset=(Species==1))
psa <- glm(Petal.Width ~ scores(mod), data=iris) # to build its own newdata, scores() is forced
#to find a variable (Petal.Length) not OW mentioned in containing formula
psb <- glm(Petal.Width ~ predict(mod, newdata=iris), data=iris)
expect_equal(fitted(psa), fitted(psb))
})
test_that("indep vars of class logical properly handled", {
data(iris)
iris$Species <- iris$Species=="setosa"
mod <- lm(Sepal.Length ~ Species + Sepal.Width, data=iris, subset=Species)
suppressWarnings({
psb <- glm(Petal.Width ~ Species + predict(mod, newdata=iris), data=iris)
psa <- glm(Petal.Width ~ Species + scores(mod), data=iris)
})
expect_equal(fitted(psa), fitted(psb))
})
test_that("Handles weights with no missingness", {
data(nuclearplants)
mod1 <- lm(pr ~ cost, weights=t1, data=nuclearplants)
expect_true(length(scores(mod1, newdata=nuclearplants)) == 32)
mod2 <- lm(pr ~ scores(mod1), data=nuclearplants)
expect_true(length(mod2$fitted) == 32)
})
test_that("NA imputation", {
data(nuclearplants)
nuclearplants[c(3,5,12,19),1] <- NA
nuclearplants$cap[7] <- NA
g1 <- glm(pr ~ cost + t1 + t2, data=nuclearplants, family=binomial)
l1 <- lm(pr ~ ne + scores(g1), data=nuclearplants)
# check that all cases are used
expect_equal(length(fitted(l1)), 32)
# Filling data first should be equivalent
# First, fill data using the formula, plus the extra column needed in the model
np2 <- fill.NAs(update(formula(g1), ~ . + ne), data=nuclearplants)
# Then refit, including the missingness indicator
g2 <- glm(pr ~ cost + t1 + t2+ cost.NA, data=np2, family=binomial)
l2 <- lm(pr ~ ne + scores(g2), data=np2)
l3 <- lm(pr ~ ne + predict(g2, newdata=np2), data=nuclearplants)
expect_equal(fitted(l2), fitted(l3))
expect_equal(fitted(l1), fitted(l3))
g3 <- glm(pr ~ cost*cap + t1, data=nuclearplants, family=binomial)
np3 <- fill.NAs(pr ~ cost*cap + t1 + ne, data=nuclearplants)
# R'll be upset if we try to include `cost:cap` as a variable
# in a formula, so lets just fix that up.
names(np3)[6] <- "costcap"
g4 <- glm(pr ~ cost + cap + costcap + t1 + cost.NA + cap.NA,
data=np3, family=binomial)
suppressWarnings(l4 <- lm(pr ~ ne + scores(g3), data=nuclearplants))
l4a <- lm(pr ~ ne + scores(g4), data=np3)
l4b <- lm(pr ~ ne + predict(g4, data=np3), data=nuclearplants)
expect_equal(fitted(l4), fitted(l4a))
expect_equal(fitted(l4), fitted(l4b))
# if we fill.NAs twice, should be the same.
g1.filled <- glm(pr ~ cost + t1 + t2 + cost.NA, data=np2, family=binomial)
l1.filled <- lm(pr ~ ne + scores(g1.filled), data=np2)
expect_equal(fitted(l1), fitted(l1.filled))
# Subset shouldn't affect imputation
pgscore <- lm(cost + t1 ~ cap + interaction(ct,bw) + t2, subset=(pr==0),
data=nuclearplants)
l5 <- lm(pr ~ cap + scores(pgscore), data=nuclearplants)
l6 <- lm(pr ~ cap + scores(pgscore, newdata=nuclearplants), data=nuclearplants)
l7 <- lm(pr ~ cap + predict(pgscore, newdata=nuclearplants), data=nuclearplants)
np3 <- fill.NAs(cost ~ cap + pr + bw + ct + t1 + t2, data=nuclearplants)
pgscore.filled <- lm(cost + t1 ~ cap + interaction(ct,bw) + t2 + cap.NA,
subset=(pr==0), data=np3)
l8 <- lm(pr ~ cap + scores(pgscore.filled), data=np3)
l9 <- lm(pr ~ cap + scores(pgscore.filled, newdata=np3), data=np3)
l10 <- lm(pr ~ cap + predict(pgscore.filled, newdata=np3), data=np3)
expect_equal(fitted(l5), fitted(l6))
expect_equal(fitted(l5), fitted(l7))
expect_equal(fitted(l8), fitted(l9))
expect_equal(fitted(l8), fitted(l10))
# we don't expect l5 and l8 to be the same because np3 mean imputed over the
# entire data set, whereas scores(pgscore) only mean imputed over the subset
expect_equal(length(fitted(l5)), 31)
expect_equal(length(fitted(l8)), 32)
nuclearplants$ct[1] <- NA
pgscore <- lm(cost~cap + interaction(ct,bw), subset=(pr==0),
weights=t1, data=nuclearplants)
suppressWarnings({
w1 <- lm(pr ~ t2 + scores(pgscore, newdata=nuclearplants), data=nuclearplants)
w2 <- lm(pr ~ t2 + scores(pgscore), data=nuclearplants)
})
w3 <- lm(pr ~ t2 + predict(pgscore, newdata=nuclearplants), data=nuclearplants)
expect_equal(length(fitted(w1)), 32)
expect_equal(fitted(w1), fitted(w2))
expect_equal(length(fitted(w3)), 30)
expect_false(fitted(w1)[1] == fitted(w3)[1])
pgscore <- lm(cost~cap + ct*bw, subset=(pr==0),
weights=t1, data=nuclearplants)
suppressWarnings({
w4 <- lm(pr ~ t2 + scores(pgscore, newdata=nuclearplants), data=nuclearplants)
w5 <- lm(pr ~ t2 + scores(pgscore), data=nuclearplants)
})
w6 <- lm(pr ~ t2 + predict(pgscore, newdata=nuclearplants), data=nuclearplants)
expect_equal(length(fitted(w4)), 32)
expect_equal(fitted(w4), fitted(w5))
expect_equal(length(fitted(w6)), 30)
expect_false(fitted(w4)[1] == fitted(w6)[1])
})
# test_that("scores with bigglm", {
# Moved to test.notforCRAN.R
# also placing scores with survey package tests there
test_that("scores works with *, interaction and strata", {
data(nuclearplants)
g1 <- glm(pr ~ cost + interaction(ne,ct), data=nuclearplants, family=binomial)
g2 <- glm(pr ~ cost + strata(ne,ct), data=nuclearplants, family=binomial)
g3 <- glm(pr ~ cost + ne*ct, data=nuclearplants, family=binomial)
expect_equal(predict(g1), predict(g2))
expect_equal(predict(g1), scores(g1, newdata=nuclearplants))
expect_equal(predict(g2), scores(g2, newdata=nuclearplants))
expect_equal(scores(g1, newdata=nuclearplants),
scores(g2, newdata=nuclearplants))
expect_equal(predict(g1), predict(g3))
expect_equal(predict(g1), scores(g1, newdata=nuclearplants))
expect_equal(predict(g3), scores(g3, newdata=nuclearplants))
expect_equal(scores(g1, newdata=nuclearplants),
scores(g3, newdata=nuclearplants))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.