skip_on_os("mac")
test_that("retrieve from same environment", {
foo <- data.frame(x = 1:10, y = 2:11)
fit <- lm(y ~ x, data = foo)
expect_no_warning({
cols <- names(get_data(fit))
})
expect_setequal(cols, c("x", "y"))
})
test_that("retrieve from correct environment", {
foo <- function() {
foo <- data.frame(x = 1:10, y = 2:11)
return(lm(y ~ x, data = foo))
}
# There should be no warning about "Could not recover model data from
# environment"
expect_no_warning({
cols <- names(get_data(foo()))
})
expect_setequal(cols, c("x", "y"))
})
test_that("fetch from local, not global, environment", {
# See #760. If the local environment has a modified version of data also in
# the global environment, we should find the local version first, not the
# global version.
foo <- function() {
mtcars$cylinders <- factor(mtcars$cyl)
return(lm(mpg ~ cylinders + disp, data = mtcars))
}
expect_setequal(
names(get_data(foo())),
c("mpg", "disp", "cylinders")
)
})
test_that("retrieve from call formula environment", {
skip_if_not_installed("AER")
foo <- function() {
d <- data.frame(
y = rnorm(100),
x = rnorm(100)
)
# find_formula(fit)$conditional happens to not have an environment for tobit
# models, so get_data() should check environment(get_call(fit)$formula). See
# #666
return(AER::tobit(y ~ x, data = d, right = 1.5))
}
expect_setequal(
names(get_data(foo())),
c("x", "y")
)
})
test_that("lme", {
skip_if_not_installed("nlme")
data("Orthodont", package = "nlme")
m <- nlme::lme( # a model of variance only
distance ~ 1,
data = Orthodont, # grand mean
weights = nlme::varConstPower(form = ~ age | Sex)
)
expect_identical(dim(get_data(m, source = "mf")), c(108L, 3L))
expect_identical(colnames(get_data(m, source = "mf")), c("distance", "age", "Sex"))
})
test_that("lme4", {
skip_if_not_installed("lme4")
data("cbpp", package = "lme4")
set.seed(123)
cbpp$cont <- rnorm(nrow(cbpp))
m <- lme4::glmer(cbind(incidence, size - incidence) ~ poly(cont, 2) + (1 | herd),
data = cbpp, family = binomial
)
expect_s3_class(get_data(m), "data.frame")
})
test_that("additional_variables = TRUE", {
k <- mtcars
k$qsec[1:10] <- NA
k <- k
mod <- lm(mpg ~ hp, k)
n1 <- nrow(k)
n2 <- nrow(insight::get_data(mod))
n3 <- nrow(insight::get_data(mod, additional_variables = TRUE))
expect_identical(n1, n2)
expect_identical(n1, n3)
})
test_that("lm", {
set.seed(1023)
x <- rnorm(1000, sd = 4)
y <- cos(x) + rnorm(1000)
dat <- data.frame(x, y)
mod1 <- lm(y ~ x, data = dat)
mod2 <- lm(y ~ cos(x), data = dat)
expect_equal(get_data(mod1), get_data(mod2), ignore_attr = TRUE)
expect_equal(get_data(mod1)$x, dat$x, ignore_attr = TRUE)
expect_equal(get_data(mod2)$x, dat$x, ignore_attr = TRUE)
})
test_that("get_data lavaan", {
skip_if_not_installed("lavaan")
data(PoliticalDemocracy, package = "lavaan")
model <- "
# latent variable definitions
ind60 =~ x1 + x2 + x3
dem60 =~ y1 + a*y2 + b*y3 + c*y4
dem65 =~ y5 + a*y6 + b*y7 + c*y8
# regressions
dem60 ~ ind60
dem65 ~ ind60 + dem60
# residual correlations
y1 ~~ y5
y2 ~~ y4 + y6
y3 ~~ y7
y4 ~~ y8
y6 ~~ y8
"
m <- lavaan::sem(model, data = PoliticalDemocracy)
expect_s3_class(get_data(m, verbose = FALSE), "data.frame")
expect_equal(head(get_data(m, verbose = FALSE)), head(PoliticalDemocracy), ignore_attr = TRUE, tolerance = 1e-3)
# works when data not in environment
holz_data <<- lavaan::HolzingerSwineford1939
HS.model <- " visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ x7 + x8 + x9 "
m_holz <- lavaan::lavaan(HS.model,
data = holz_data, auto.var = TRUE, auto.fix.first = TRUE,
auto.cov.lv.x = TRUE
)
skip_on_os(c("mac", "linux"))
out1 <- get_data(m_holz)
expect_named(
out1,
c(
"id", "sex", "ageyr", "agemo", "school", "grade", "x1", "x2",
"x3", "x4", "x5", "x6", "x7", "x8", "x9"
)
)
expect_identical(nrow(out1), 301L)
# rm(holz_data)
# out2 <- get_data(m_holz)
# expect_named(
# out2,
# c("x1", "x2","x3", "x4", "x5", "x6", "x7", "x8", "x9")
# )
# expect_identical(nrow(out2), 301L)
})
test_that("get_data include weights, even if ones", {
set.seed(123)
y <- rnorm(100)
x <- rnorm(100)
wn <- runif(100)
w1 <- rep(1, 100)
# Model with nonuniform weights
fn <- lm(y ~ x, weights = wn)
expect_identical(colnames(get_data(fn, verbose = FALSE)), c("y", "x", "(weights)", "wn"))
# Model with weights equal to 1
f1 <- lm(y ~ x, weights = w1)
expect_identical(colnames(get_data(f1, verbose = FALSE)), c("y", "x", "(weights)", "w1"))
# Model with no weights
f0 <- lm(y ~ x)
expect_identical(colnames(get_data(f0, verbose = FALSE)), c("y", "x"))
# check get_weights still works
expect_null(get_weights(f0))
expect_identical(get_weights(f0, null_as_ones = TRUE), w1)
})
test_that("lm with transformations", {
d <- data.frame(
time = as.factor(c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5)),
group = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2),
sum = c(0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50)
)
m <- lm(log(sum + 1) ~ as.numeric(time) * group, data = d)
expect_identical(colnames(get_data(m)), c("sum", "time", "group"))
})
test_that("lm with poly and NA in response", {
d <- iris
d[1:25, "Sepal.Length"] <- NA
d2 <- d
m <- lm(Sepal.Length ~ Species / poly(Petal.Width, 2), data = d2)
expect_equal(get_data(m), iris[26:150, c("Sepal.Length", "Species", "Petal.Width")], ignore_attr = TRUE)
})
test_that("mgcv", {
skip_if_not_installed("mgcv")
# mgcv::gam() deliberately does not keep its environment, so get_data() has to
# fall back to the model frame. See
# https://github.com/cran/mgcv/blob/a4e69cf44a49c84a41a42e90c86995a843733968/R/mgcv.r#L2156-L2159
d <- iris
d$NewFac <- rep(c(1, 2), length.out = 150)
model <- mgcv::gam(Sepal.Length ~ s(Petal.Length, by = interaction(Species, NewFac)), data = d)
# There should be two warnings: One for failing to get the data from the
# environment, and one for not recovering interaction() accurately
expect_warning(expect_warning({
model_data <- get_data(model)
}))
expect_equal(
head(model_data),
head(d[c("Sepal.Length", "Petal.Length", "Species", "NewFac")]),
ignore_attr = TRUE
)
})
test_that("lm with poly and NA in response", {
s1 <- summary(iris$Sepal.Length)
model <- lm(Petal.Length ~ log(Sepal.Width) + Sepal.Length,
data = iris
)
# Same min-max
s2 <- summary(insight::get_data(model)$Sepal.Length)
model <- lm(Petal.Length ~ log(1 + Sepal.Width) + Sepal.Length,
data = iris
)
s3 <- summary(insight::get_data(model)$Sepal.Length)
model <- lm(Petal.Length ~ log(Sepal.Width + 1) + Sepal.Length,
data = iris
)
s4 <- summary(insight::get_data(model)$Sepal.Length)
model <- lm(Petal.Length ~ log1p(Sepal.Width) + Sepal.Length,
data = iris
)
s5 <- summary(insight::get_data(model)$Sepal.Length)
expect_equal(s1, s2, tolerance = 1e-4)
expect_equal(s1, s3, tolerance = 1e-4)
expect_equal(s1, s4, tolerance = 1e-4)
expect_equal(s1, s5, tolerance = 1e-4)
expect_equal(s2, s3, tolerance = 1e-4)
expect_equal(s2, s4, tolerance = 1e-4)
expect_equal(s2, s5, tolerance = 1e-4)
expect_equal(s3, s4, tolerance = 1e-4)
expect_equal(s3, s5, tolerance = 1e-4)
expect_equal(s4, s5, tolerance = 1e-4)
})
mod <- lm(mpg ~ as.logical(am) + factor(cyl) + as.factor(gear), mtcars)
out <- get_data(mod)
test_that("logicals", {
expect_equal(out$am, mtcars$am, ignore_attr = TRUE)
})
# See #689
test_that("get_data() log transform", {
set.seed(123)
x <- abs(rnorm(100, sd = 5)) + 5
y <- exp(2 + 0.3 * x + rnorm(100, sd = 0.4))
dat <- data.frame(y, x)
mod <- lm(log(y) ~ log(x), data = dat)
expect_equal(
head(insight::get_data(mod)),
head(dat),
tolerance = 1e-3,
ignore_attr = TRUE
)
expect_identical(find_response(mod), "y")
expect_identical(find_response(mod, combine = FALSE), "y")
mod <- lm(log(y) ~ x, data = dat)
expect_equal(
head(insight::get_data(mod)),
head(dat),
tolerance = 1e-3,
ignore_attr = TRUE
)
expect_identical(find_response(mod), "y")
mod <- lm(y ~ log(x), data = dat)
expect_equal(
head(insight::get_data(mod)),
head(dat),
tolerance = 1e-3,
ignore_attr = TRUE
)
expect_identical(find_response(mod), "y")
mod <- lm(y ~ log(1 + x), data = dat)
expect_equal(
head(insight::get_data(mod)[c("y", "x")]),
head(dat),
tolerance = 1e-3,
ignore_attr = TRUE
)
expect_identical(find_response(mod), "y")
mod <- lm(y ~ log(x + 1), data = dat)
expect_equal(
head(insight::get_data(mod)),
head(dat),
tolerance = 1e-3,
ignore_attr = TRUE
)
mod <- lm(log(y) ~ log(1 + x), data = dat)
expect_equal(
head(insight::get_data(mod)[c("y", "x")]),
head(dat),
tolerance = 1e-3,
ignore_attr = TRUE
)
mod <- lm(log(y) ~ log(x + 1), data = dat)
expect_equal(
head(insight::get_data(mod)),
head(dat),
tolerance = 1e-3,
ignore_attr = TRUE
)
mod <- lm(log(1 + y) ~ log(1 + x), data = dat)
expect_equal(
head(insight::get_data(mod)),
head(dat),
tolerance = 1e-3,
ignore_attr = TRUE
)
expect_identical(find_response(mod), "y")
expect_identical(find_response(mod, combine = FALSE), "y")
mod <- lm(log(y + 1) ~ log(x + 1), data = dat)
expect_equal(
head(insight::get_data(mod)),
head(dat),
tolerance = 1e-3,
ignore_attr = TRUE
)
})
test_that("log-offset", {
skip_if_not_installed("MASS")
newdata <- data.frame(
y = c(602, 38, 616, 256, 21, 723, 245, 176, 89, 1614, 31, 27, 313, 251, 345),
x = c(31, 35, 21, 30, 37, 26, 45, 21, 74, 27, 37, 37, 31, 37, 25),
offset_1 = c(72, 50, 31, 30, 16, 25, 75, 16, 78, 40, 68, 25, 71, 52, 17)
)
moff <- MASS::glm.nb(y ~ x + offset(log(offset_1)), data = newdata)
out <- get_data(moff, source = "frame")
expect_equal(
out$offset_1,
c(72, 50, 31, 30, 16, 25, 75, 16, 78, 40, 68, 25, 71, 52, 17),
tolerance = 1e-3
)
})
skip_on_cran()
m <- lm(Sepal.Length ~ Sepal.Width, data = iris)
out <- get_data(m)
test_that("subsets", {
expect_identical(colnames(out), c("Sepal.Length", "Sepal.Width"))
expect_identical(nrow(out), 150L)
})
m <- lm(Sepal.Length ~ Sepal.Width, data = iris, subset = Species == "versicolor")
out <- get_data(m)
test_that("subsets", {
expect_identical(colnames(out), c("Sepal.Length", "Sepal.Width", "Species"))
expect_identical(nrow(out), 50L)
})
# d <- iris
# m <- lm(Petal.Length ~ poly(Sepal.Length), data = d)
# d <<- mtcars
# expect_warning(expect_warning(out <- get_data(m)))
# expect_equal(colnames(out), c("Petal.Length", "Sepal.Length"))
test_that("log", {
m <- lm(log(Sepal.Length) ~ sqrt(Sepal.Width), data = iris)
out <- get_data(m)
expect_equal(out, iris[c("Sepal.Length", "Sepal.Width")], ignore_attr = TRUE)
})
test_that("log II", {
m <- lm(log(Sepal.Length) ~ scale(Sepal.Width), data = iris)
out <- get_data(m)
expect_equal(out, iris[c("Sepal.Length", "Sepal.Width")], ignore_attr = TRUE)
})
test_that("workaround bug in estimatr", {
skip_if_not_installed("ivreg")
skip_if_not_installed("estimatr")
data("CigaretteDemand", package = "ivreg")
m <- estimatr::iv_robust(
log(packs) ~ log(rprice) + log(rincome) | salestax + log(rincome),
data = CigaretteDemand
)
out <- get_data(m)
expect_equal(
head(out$packs),
c(101.08543, 111.04297, 71.95417, 56.85931, 82.58292, 79.47219),
tolerance = 1e-3
)
expect_equal(
colnames(out),
c("packs", "rprice", "rincome", "salestax"),
tolerance = 1e-3
)
})
test_that("get_data colnames", {
skip_on_os("windows")
skip_if_not_installed("brms")
m <- suppressMessages(suppressWarnings(brms::brm(mpg ~ hp + mo(cyl), data = mtcars, refresh = 0, iter = 200, chains = 1)))
out <- get_data(m)
expect_type(out$cyl, "double")
expect_true(all(colnames(out) %in% c("mpg", "hp", "cyl")))
out <- get_data(m, additional_variables = TRUE)
expect_true("qsec" %in% colnames(out))
})
test_that("get_data works for fixest inside functions", {
skip_if_not_installed("fixest")
data(mtcars)
# fit within function
fixest_wrapper1 <- function(data) {
data$cylinders <- factor(data$cyl)
fit <- fixest::feglm(mpg ~ cylinders * disp + hp, data = data)
return(fit)
}
global_fixest1 <- fixest_wrapper1(data = mtcars)
data <- mtcars[, c("mpg", "disp")]
expect_named(
get_data(global_fixest1),
c("mpg", "cylinders", "disp", "hp")
)
# fit within function, subset
fixest_wrapper2 <- function(data) {
data$cylinders <- factor(data$cyl)
fit <- fixest::feglm(mpg ~ cylinders * disp + hp, data = data)
return(fit)
}
data <- mtcars
global_fixest2 <- fixest_wrapper2(data = data[1:20, ])
expect_identical(nrow(get_data(global_fixest2)), 20L)
expect_named(
get_data(global_fixest2),
c("mpg", "cylinders", "disp", "hp")
)
data(mtcars)
d_cyl <- mtcars
d_cyl$cylinders <- factor(d_cyl$cyl)
global_fixest3 <- fixest::feglm(mpg ~ cylinders * disp + hp, data = d_cyl)
expect_named(
get_data(global_fixest3),
c("mpg", "cylinders", "disp", "hp")
)
# regular example
data(iris)
res <- fixest::feglm(Sepal.Length ~ Sepal.Width + Petal.Length | Species, iris, "poisson")
expect_named(
get_data(res),
c("Sepal.Length", "Sepal.Width", "Petal.Length", "Species")
)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.