test-felm.R
In capybara: Fast and Memory Efficient Fitting of Linear Models with High-Dimensional Fixed Effects

#' srr_stats (tests)
#' @srrstats {RE2.1} Ensures that models throw meaningful error messages when input parameters or data are invalid.
#' @srrstats {RE3.1} Validates consistency between `felm` and base R `lm` in terms of coefficients, R-squared, and fitted values.
#' @srrstats {RE3.2} Compares model outputs against established benchmarks such as base R's `lm`.
#' @srrstats {RE5.1} Validates appropriate error handling for omitted arguments or missing data.
#' @srrstats {RE6.0} Implements robust testing for invalid or collinear regressors.
#' @srrstats {RE7.1} Validates that proportional regressors or collinear terms are detected and produce errors.
#' @srrstats {RE7.1a} Adding noise to the depending variable minimally affects the speed. I tested that explicitly.
#' @srrstats {RE7.2} Confirms that model computations remain consistent when small noise is added to data.
#' @srrstats {RE8.1} Ensures computational times remain consistent under similar model specifications.
#' @noRd
NULL

test_that("felm works", {
  # 1-FE ----

  m1 <- felm(formula = mpg ~ wt | cyl, data = mtcars)
  # m1_fixest <- fixest::feols(mpg ~ wt | cyl, mtcars)
  m2 <- lm(mpg ~ wt + as.factor(cyl), mtcars)

  # coef(m1)
  # coef(m1_fixest)
  #        wt
  # -3.205613

  # m1$fixed.effects
  # fixest::fixef(m1_fixest)
  # $cyl
  #        4        6        8
  # 33.99079 29.73521 27.91993

  expect_equal(coef(m1), coef(m2)[2], tolerance = 1e-2)

  n <- nrow(mtcars)
  expect_equal(length(fitted(m1)), n)
  expect_equal(length(predict(m1)), n)
  expect_equal(length(coef(m1)), 1)
  expect_equal(length(coef(summary(m1))), 4)

  # coef(felm(mpg ~ wt + qsec | cyl, mtcars))
  # coef(fixest::feols(mpg ~ wt + qsec | cyl, mtcars))

  m1 <- felm(mpg ~ wt + qsec | cyl, mtcars)
  m2 <- lm(mpg ~ wt + qsec + as.factor(cyl), mtcars)

  expect_equal(coef(m1), coef(m2)[c(2, 3)], tolerance = 1e-2)

  # 2-FE ----

  m1 <- felm(mpg ~ wt + qsec | cyl + am, mtcars)

  # m1$coefficients
  # m1$fixed.effects

  # m1fixest <- fixest::feols(mpg ~ wt + qsec | cyl + am, mtcars)
  # m1fixest$coefficients
  # fixest::fixef(m1fixest)

  m2 <- lm(mpg ~ wt + qsec + as.factor(cyl) + as.factor(am), mtcars)

  expect_equal(coef(m1), coef(m2)[c(2, 3)], tolerance = 1e-2)

  s1 <- summary(m1)
  s2 <- summary(m2)

  expect_equal(s1$r.squared, s2$r.squared, tolerance = 1e-2)
  expect_equal(s1$adj.r.squared, s2$adj.r.squared, tolerance = 1e-2)

  mtcars2 <- mtcars
  mtcars2$wt[2] <- NA

  m1 <- felm(mpg ~ wt + qsec | cyl + am, mtcars2)
  m2 <- lm(mpg ~ wt + qsec + as.factor(cyl) + as.factor(am), mtcars2)

  expect_equal(coef(m1), coef(m2)[c(2, 3)], tolerance = 1e-2)

  s1 <- summary(m1)
  s2 <- summary(m2)

  expect_equal(s1$r.squared, s2$r.squared, tolerance = 1e-2)
  expect_equal(s1$adj.r.squared, s2$adj.r.squared, tolerance = 1e-2)

  m1 <- felm(mpg ~ wt + qsec | cyl + am | carb, mtcars)

  expect_equal(coef(m1), coef(m2)[c(2, 3)], tolerance = 1e-2)

  # 3-FE ----

  m1 <- felm(mpg ~ wt + qsec | cyl + am + gear, mtcars)
  m2 <- lm(mpg ~ wt + qsec + as.factor(cyl) + as.factor(am) + as.factor(gear), mtcars)

  expect_equal(coef(m1), coef(m2)[c(2, 3)], tolerance = 1e-2)

  s1 <- summary(m1)
  s2 <- summary(m2)
  expect_equal(s1$r.squared, s2$r.squared, tolerance = 1e-2)
  expect_equal(s1$adj.r.squared, s2$adj.r.squared, tolerance = 1e-2)
})

test_that("felm time is the minimally affected when adding noise to the data", {
  mtcars2 <- mtcars[, c("mpg", "wt", "cyl")]
  set.seed(200100)
  mtcars2$mpg <- mtcars2$mpg + rbinom(nrow(mtcars2), 1, 0.5) *
    .Machine$double.eps
  m1 <- felm(mpg ~ wt | cyl, mtcars)
  m2 <- felm(mpg ~ wt | cyl, mtcars2)
  expect_equal(coef(m1), coef(m2))
  expect_equal(m1$fixed.effects, m2$fixed.effects)

  t1 <- rep(NA, 10)
  t2 <- rep(NA, 10)
  for (i in 1:10) {
    a <- Sys.time()
    m1 <- felm(mpg ~ wt | cyl, mtcars)
    b <- Sys.time()
    t1[i] <- b - a

    a <- Sys.time()
    m2 <- felm(mpg ~ wt | cyl, mtcars2)
    b <- Sys.time()
    t2[i] <- b - a
  }
  expect_gte(0.05, abs(median(t1) - median(t2)))
})

test_that("proportional regressors return NA coefficients", {
  set.seed(200100)
  d <- data.frame(
    y = rnorm(100),
    x1 = rnorm(100),
    f = factor(sample(1:2, 100, replace = TRUE))
  )

  d$x2 <- 2 * d$x1
  fit1 <- lm(y ~ x1 + x2 + as.factor(f), data = d)
  fit2 <- felm(y ~ x1 + x2 | f, data = d)

  # fit1$coefficients
  # fit2$coefficients

  expect_equal(coef(fit2), coef(fit1)[2:3], tolerance = 1e-2)
  expect_equal(predict(fit2), predict(fit1), tolerance = 1e-2)
})

test_that("felm correctly predicts values outside the inter-quartile range", {
  # Helper function for MAPE calculation
  mape <- function(y, yhat) {
    mean(abs(y - yhat) / y)
  }

  # Create data subsets once
  d1 <- mtcars[mtcars$mpg >= quantile(mtcars$mpg, 0.25) & mtcars$mpg <= quantile(mtcars$mpg, 0.75), ]
  d2 <- mtcars[mtcars$mpg < quantile(mtcars$mpg, 0.25) | mtcars$mpg > quantile(mtcars$mpg, 0.75), ]

  m1_lm <- felm(mpg ~ wt + disp | cyl, mtcars)
  m2_lm <- lm(mpg ~ wt + disp + as.factor(cyl), mtcars)

  # print(coef(m1_lm))
  # print(coef(m2_lm)[2:3])

  # print(m1_lm$fixed.effects)
  # print(coef(m2_lm)[c(1,4:5)] + c(0, coef(m2_lm)[1], coef(m2_lm)[1]))

  pred1_lm <- predict(m1_lm, newdata = d1)
  pred2_lm <- predict(m1_lm, newdata = d2)

  mape1_lm <- mape(d1$mpg, pred1_lm)
  mape2_lm <- mape(d2$mpg, pred2_lm)

  expect_lt(mape1_lm, mape2_lm)

  # Compare with base R linear model
  pred1_base_lm <- predict(m2_lm, newdata = d1)
  pred2_base_lm <- predict(m2_lm, newdata = d2)

  # pred1_base_lm
  # pred2_base_lm

  expect_equal(pred1_lm, pred1_base_lm, tolerance = 1e-2)
  expect_equal(pred2_lm, pred2_base_lm, tolerance = 1e-2)
})

test_that("felm with weights works", {
  skip_on_cran()

  m1 <- felm(mpg ~ wt | am, weights = ~cyl, data = mtcars)
  m2 <- felm(mpg ~ wt | am, weights = mtcars$cyl, data = mtcars)

  w <- mtcars$cyl
  m3 <- felm(mpg ~ wt | am, weights = w, data = mtcars)

  expect_equal(coef(m2), coef(m1))
  expect_equal(coef(m3), coef(m1))

  w <- NULL
  m4 <- felm(mpg ~ wt | am, weights = w, data = mtcars)

  expect_gt(coef(m1), coef(m4))
})

Any scripts or data that you put into this service are public.

capybara documentation built on Aug. 27, 2025, 5:13 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

capybara
Fast and Memory Efficient Fitting of Linear Models with High-Dimensional Fixed Effects

tests/testthat/test-felm.R
In capybara: Fast and Memory Efficient Fitting of Linear Models with High-Dimensional Fixed Effects

Try the capybara package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

capybara Fast and Memory Efficient Fitting of Linear Models with High-Dimensional Fixed Effects

tests/testthat/test-felm.R In capybara: Fast and Memory Efficient Fitting of Linear Models with High-Dimensional Fixed Effects

Try the capybara package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

capybara
Fast and Memory Efficient Fitting of Linear Models with High-Dimensional Fixed Effects

tests/testthat/test-felm.R
In capybara: Fast and Memory Efficient Fitting of Linear Models with High-Dimensional Fixed Effects