tests/testthat/test-felm.R

#' srr_stats (tests)
#' @srrstats {RE2.1} Ensures that models throw meaningful error messages when input parameters or data are invalid.
#' @srrstats {RE3.1} Validates consistency between `felm` and base R `lm` in terms of coefficients, R-squared, and fitted values.
#' @srrstats {RE3.2} Compares model outputs against established benchmarks such as base R's `lm`.
#' @srrstats {RE5.1} Validates appropriate error handling for omitted arguments or missing data.
#' @srrstats {RE6.0} Implements robust testing for invalid or collinear regressors.
#' @srrstats {RE7.1} Validates that proportional regressors or collinear terms are detected and produce errors.
#' @srrstats {RE7.1a} Adding noise to the depending variable minimally affects the speed. I tested that explicitly.
#' @srrstats {RE7.2} Confirms that model computations remain consistent when small noise is added to data.
#' @srrstats {RE8.1} Ensures computational times remain consistent under similar model specifications.
#' @noRd
NULL

test_that("felm works", {
  # 1-FE ----

  m1 <- felm(formula = mpg ~ wt | cyl, data = mtcars)
  # m1_fixest <- fixest::feols(mpg ~ wt | cyl, mtcars)
  m2 <- lm(mpg ~ wt + as.factor(cyl), mtcars)

  # coef(m1)
  # coef(m1_fixest)
  #        wt
  # -3.205613

  # m1$fixed.effects
  # fixest::fixef(m1_fixest)
  # $cyl
  #        4        6        8
  # 33.99079 29.73521 27.91993

  expect_equal(coef(m1), coef(m2)[2], tolerance = 1e-2)

  n <- nrow(mtcars)
  expect_equal(length(fitted(m1)), n)
  expect_equal(length(predict(m1)), n)
  expect_equal(length(coef(m1)), 1)
  expect_equal(length(coef(summary(m1))), 4)

  # coef(felm(mpg ~ wt + qsec | cyl, mtcars))
  # coef(fixest::feols(mpg ~ wt + qsec | cyl, mtcars))

  m1 <- felm(mpg ~ wt + qsec | cyl, mtcars)
  m2 <- lm(mpg ~ wt + qsec + as.factor(cyl), mtcars)

  expect_equal(coef(m1), coef(m2)[c(2, 3)], tolerance = 1e-2)

  # 2-FE ----

  m1 <- felm(mpg ~ wt + qsec | cyl + am, mtcars)

  # m1$coefficients
  # m1$fixed.effects

  # m1fixest <- fixest::feols(mpg ~ wt + qsec | cyl + am, mtcars)
  # m1fixest$coefficients
  # fixest::fixef(m1fixest)

  m2 <- lm(mpg ~ wt + qsec + as.factor(cyl) + as.factor(am), mtcars)

  expect_equal(coef(m1), coef(m2)[c(2, 3)], tolerance = 1e-2)

  s1 <- summary(m1)
  s2 <- summary(m2)

  expect_equal(s1$r.squared, s2$r.squared, tolerance = 1e-2)
  expect_equal(s1$adj.r.squared, s2$adj.r.squared, tolerance = 1e-2)

  mtcars2 <- mtcars
  mtcars2$wt[2] <- NA

  m1 <- felm(mpg ~ wt + qsec | cyl + am, mtcars2)
  m2 <- lm(mpg ~ wt + qsec + as.factor(cyl) + as.factor(am), mtcars2)

  expect_equal(coef(m1), coef(m2)[c(2, 3)], tolerance = 1e-2)

  s1 <- summary(m1)
  s2 <- summary(m2)

  expect_equal(s1$r.squared, s2$r.squared, tolerance = 1e-2)
  expect_equal(s1$adj.r.squared, s2$adj.r.squared, tolerance = 1e-2)

  m1 <- felm(mpg ~ wt + qsec | cyl + am | carb, mtcars)

  expect_equal(coef(m1), coef(m2)[c(2, 3)], tolerance = 1e-2)

  # 3-FE ----

  m1 <- felm(mpg ~ wt + qsec | cyl + am + gear, mtcars)
  m2 <- lm(mpg ~ wt + qsec + as.factor(cyl) + as.factor(am) + as.factor(gear), mtcars)

  expect_equal(coef(m1), coef(m2)[c(2, 3)], tolerance = 1e-2)

  s1 <- summary(m1)
  s2 <- summary(m2)
  expect_equal(s1$r.squared, s2$r.squared, tolerance = 1e-2)
  expect_equal(s1$adj.r.squared, s2$adj.r.squared, tolerance = 1e-2)
})

test_that("felm time is the minimally affected when adding noise to the data", {
  mtcars2 <- mtcars[, c("mpg", "wt", "cyl")]
  set.seed(200100)
  mtcars2$mpg <- mtcars2$mpg + rbinom(nrow(mtcars2), 1, 0.5) *
    .Machine$double.eps
  m1 <- felm(mpg ~ wt | cyl, mtcars)
  m2 <- felm(mpg ~ wt | cyl, mtcars2)
  expect_equal(coef(m1), coef(m2))
  expect_equal(m1$fixed.effects, m2$fixed.effects)

  t1 <- rep(NA, 10)
  t2 <- rep(NA, 10)
  for (i in 1:10) {
    a <- Sys.time()
    m1 <- felm(mpg ~ wt | cyl, mtcars)
    b <- Sys.time()
    t1[i] <- b - a

    a <- Sys.time()
    m2 <- felm(mpg ~ wt | cyl, mtcars2)
    b <- Sys.time()
    t2[i] <- b - a
  }
  expect_gte(0.05, abs(median(t1) - median(t2)))
})

test_that("proportional regressors return NA coefficients", {
  set.seed(200100)
  d <- data.frame(
    y = rnorm(100),
    x1 = rnorm(100),
    f = factor(sample(1:2, 100, replace = TRUE))
  )

  d$x2 <- 2 * d$x1
  fit1 <- lm(y ~ x1 + x2 + as.factor(f), data = d)
  fit2 <- felm(y ~ x1 + x2 | f, data = d)

  # fit1$coefficients
  # fit2$coefficients

  expect_equal(coef(fit2), coef(fit1)[2:3], tolerance = 1e-2)
  expect_equal(predict(fit2), predict(fit1), tolerance = 1e-2)
})

test_that("felm correctly predicts values outside the inter-quartile range", {
  # Helper function for MAPE calculation
  mape <- function(y, yhat) {
    mean(abs(y - yhat) / y)
  }

  # Create data subsets once
  d1 <- mtcars[mtcars$mpg >= quantile(mtcars$mpg, 0.25) & mtcars$mpg <= quantile(mtcars$mpg, 0.75), ]
  d2 <- mtcars[mtcars$mpg < quantile(mtcars$mpg, 0.25) | mtcars$mpg > quantile(mtcars$mpg, 0.75), ]

  m1_lm <- felm(mpg ~ wt + disp | cyl, mtcars)
  m2_lm <- lm(mpg ~ wt + disp + as.factor(cyl), mtcars)

  # print(coef(m1_lm))
  # print(coef(m2_lm)[2:3])

  # print(m1_lm$fixed.effects)
  # print(coef(m2_lm)[c(1,4:5)] + c(0, coef(m2_lm)[1], coef(m2_lm)[1]))

  pred1_lm <- predict(m1_lm, newdata = d1)
  pred2_lm <- predict(m1_lm, newdata = d2)

  mape1_lm <- mape(d1$mpg, pred1_lm)
  mape2_lm <- mape(d2$mpg, pred2_lm)

  expect_lt(mape1_lm, mape2_lm)

  # Compare with base R linear model
  pred1_base_lm <- predict(m2_lm, newdata = d1)
  pred2_base_lm <- predict(m2_lm, newdata = d2)

  # pred1_base_lm
  # pred2_base_lm

  expect_equal(pred1_lm, pred1_base_lm, tolerance = 1e-2)
  expect_equal(pred2_lm, pred2_base_lm, tolerance = 1e-2)
})

test_that("felm with weights works", {
  skip_on_cran()

  m1 <- felm(mpg ~ wt | am, weights = ~cyl, data = mtcars)
  m2 <- felm(mpg ~ wt | am, weights = mtcars$cyl, data = mtcars)

  w <- mtcars$cyl
  m3 <- felm(mpg ~ wt | am, weights = w, data = mtcars)

  expect_equal(coef(m2), coef(m1))
  expect_equal(coef(m3), coef(m1))

  w <- NULL
  m4 <- felm(mpg ~ wt | am, weights = w, data = mtcars)

  expect_gt(coef(m1), coef(m4))
})

Try the capybara package in your browser

Any scripts or data that you put into this service are public.

capybara documentation built on Aug. 27, 2025, 5:13 p.m.