tests/testthat/tests-find_data.R

library("datasets")

context("Test `find_data()` behavior")

test_that("Test find_data.default()", {
    expect_true(inherits(find_data(lm(mpg ~ cyl, data = mtcars)), "data.frame"), label = "find_data.default() works")
    
    m1 <- lm(mpg ~ cyl, data = mtcars, subset = am == 1)
    expect_true(nrow(find_data(m1)) == nrow(mtcars[mtcars$am == 1, ]), label = "find_data.default(data, subset) works")
    
    mtcars2 <- mtcars
    mtcars2[1:3,] <- NA_real_
    
    m2 <- lm(mpg ~ cyl, data = mtcars2)
    expect_true(nrow(find_data(m2)) == nrow(mtcars2[-c(1:3), ]), label = "find_data.default(data, na.action) works")
    
    m3 <- lm(mpg ~ cyl, data = mtcars2, subset = am == 1)
    expect_true(nrow(find_data(m3)) == nrow(na.omit(mtcars2[mtcars2$am == 1, ])), label = "find_data.default(data, subset, na.action) works")
    
    expect_error(find_data(StructTS(log10(UKgas), type = "BSM")), label = "find_data.default([no formula]) errors")
})

test_that("Test find_data.lm()", {
    expect_true(inherits(find_data(lm(mpg ~ cyl, data = mtcars)), "data.frame"), label = "find_data.lm() works")
})

test_that("Test find_data.glm()", {
    expect_true(inherits(find_data(glm(mpg ~ cyl, data = mtcars)), "data.frame"), label = "find_data.glm() works")
})

test_that("Test find_data.data.frame()", {
    expect_true(inherits(find_data(mtcars), "data.frame"), label = "find_data.data.frame() works")
})

test_that("Test find_data.lm() and prediction.lm() with missing data", {
    mtcars2 <- mtcars
    mtcars2$mpg[1:4] <- NA_real_
    
    # na.omit
    m1 <- lm(mpg ~ cyl, data = mtcars2, na.action = na.omit)
    expect_true(identical(dim(find_data(m1)), dim(na.omit(mtcars2))),
                label = "find_data.lm() drops missing data when 'na.action = na.omit'")
    expect_true(nrow(prediction(m1)) == nrow(na.omit(mtcars2)),
                label = "prediction.lm() returns correct rows when 'na.action = na.omit'")
    
    # na.exclude
    m2 <- lm(mpg ~ cyl, data = mtcars2, na.action = na.exclude)
    expect_true(identical(dim(find_data(m2)), dim(na.omit(mtcars2))),
                label = "find_data.lm() drops missing data when 'na.action = na.exclude'")
    expect_true(nrow(prediction(m2)) == nrow(na.omit(mtcars2)),
                label = "prediction.lm() returns correct rows when 'na.action = na.exclude'")
    
    # prediction with missing data passed explicitly
    m3 <- lm(mpg ~ cyl, data = mtcars) # missing outcome
    p3 <- prediction(m3, mtcars2, na.action = na.pass)
    expect_true(nrow(p3) == nrow(mtcars),
                label = "prediction.lm() returns correct rows when prediction(na.action = na.pass) for missing outcome")
    expect_true(all(!is.na(p3$fitted)[1:4]),
                label = "prediction.lm() returns numeric predictions when prediction(na.action = na.pass) for missing outcome")
    expect_true(nrow(prediction(m3, mtcars2, na.action = na.omit)) == nrow(mtcars2),
                label = "prediction.lm() returns correct rows when prediction(na.action = na.omit) for missing outcome")
    expect_true(nrow(prediction(m3, mtcars2, na.action = na.exclude)) == nrow(mtcars2),
                label = "prediction.lm() returns correct rows when prediction(na.action = na.exclude) for missing outcome")
    
    m4 <- lm(cyl ~ mpg, data = mtcars) # missing covariate
    p4 <- prediction(m4, mtcars2, na.action = na.pass)
    expect_true(nrow(p4) == nrow(mtcars),
                label = "prediction.lm() returns correct rows when prediction(na.action = na.pass) for missing covariate")
    expect_true(all(is.na(p4$fitted)[1:4]),
                label = "prediction.lm() returns NA predictions when prediction(na.action = na.pass) for missing covariate")
    expect_error(prediction(m4, mtcars2, na.action = na.omit),
                label = "prediction.lm() fails when prediction(na.action = na.omit) for missing covariate")
    expect_error(prediction(m4, mtcars2, na.action = na.exclude),
                label = "prediction.lm() fails when prediction(na.action = na.exclude) for missing covariate")
    
    rm(mtcars2)
})

test_that("Test find_data.lm() with subsetted data", {
    mtcars2 <- mtcars
    mtcars2$mpg[1:4] <- NA_real_
    m1 <- lm(mpg ~ cyl, data = mtcars2, subset = !is.na(mpg))
    expect_true(identical(dim(find_data(m1)), dim(na.omit(mtcars2))),
                label = "find_data.lm() has correct dimensions when subsetting")
    expect_true(nrow(prediction(m1)) == nrow(na.omit(mtcars2)),
                label = "prediction.lm() returns correct rows when subsetting")
    x <- c(rep(TRUE, 30), FALSE, FALSE)
    m2 <- lm(mpg ~ cyl, data = mtcars2, subset = x)
    expect_true(identical(nrow(find_data(m2)), nrow(na.omit(mtcars2))-2L),
                label = "find_data.lm() subsets correctly when subsetting variable is global")
    expect_true(identical(rownames(find_data(m2)), head(rownames(na.omit(mtcars2)), 26)),
                label = "find_data.lm() returns correct rows when subsetting and missing data are present")
    rm(mtcars2)
})

test_that("Test find_data.lm() with subsetted data", {
    skip_if_not_installed("survey")
    library("survey")
    data(api)
    dstrat <- svydesign(id=~1, strata=~stype, weights=~pw, data=apistrat, fpc=~fpc)
    m <- svyglm(growth ~ target, dstrat)
    f <- find_data(m, design = dstrat)

    expect_true(identical(nrow(f), length(predict(m))), label = "Survey design model has correct rows")
    expect_true(identical(nrow(prediction(m)), length(predict(m))), label = "Survey design model has correct rows")
})
leeper/prediction documentation built on Jan. 1, 2020, 6:10 p.m.