tests/testthat/test-r-vs-stata.R

test_that("test against stata - CVR3 inference", {
  # note: minor discrepancies likely due to a) bug in my code or b)
  # different degrees of freedom in Stata vs R when computing t-stat
  # degrees of freedom (?)

  library(fixest)
  library(summclust)
  library(lmtest)
  library(haven)

  skip_on_cran()

  url <- "http://www.stata-press.com/data/r9/nlswork.dta"
  if (httr::http_error(url)) {
    stop("No internet connection or data source broken. Sorry about that!")
    return(NULL)
  } else {
    message("downloading the 'nlswork' dataset.")
    nlswork <- read_dta(url)
  }

  # drop NAs at the moment
  nlswork <- nlswork[, c("ln_wage",
                         "grade",
                         "age",
                         "birth_yr",
                         "union",
                         "race",
                         "msp",
                         "ind_code")]
  nlswork <- na.omit(nlswork)

  df2 <<- nlswork

  lm_fit <- lm(
    ln_wage ~ as.factor(grade) + as.factor(age) + as.factor(birth_yr) +
      union + race + msp,
    data = df2
  )

  summclust_res <- summclust(obj = lm_fit,
                             cluster = ~ ind_code,
                             params = c("msp","union"),
                             type = "CRV3")


  res <- tidy(
    summclust_res
  )

  # estimate
  expect_equal(res["msp", 1],-0.027515,
               ignore_attr = TRUE,
               tolerance = 1e-05)

  # t-stat
  expect_equal(round(res["msp", 2], 4),-1.9564,
               ignore_attr = TRUE)

  # standard error
  expect_equal(round(res["msp", 3], 6),
               0.014064,
               ignore_attr = TRUE)

  # p-value
  expect_equal(round(res["msp", 4], 4),
               0.0763,
               ignore_attr = TRUE)

  # conf int lower
  expect_equal(round(res["msp", "conf_int_l"], 6),-0.058470,
               ignore_attr = TRUE)

  # conf int upper
  expect_equal(round(res["msp", "conf_int_u"], 6),
               0.003440,
               ignore_attr = TRUE)

})



test_that("test against stata - leverage", {
  library(summclust)
  library(lmtest)
  library(haven)
  library(fixest)

  url <- "http://www.stata-press.com/data/r9/nlswork.dta"
  if (httr::http_error(url)) {
    stop("No internet connection or data source broken. Sorry about that!")
    return(NULL)
  } else {
    message("downloading the 'nlswork' dataset.")
    nlswork <- read_dta(url)
  }


  # drop NAs at the moment
  nlswork <- nlswork[, c("ln_wage",
                         "grade",
                         "age",
                         "birth_yr",
                         "union",
                         "race",
                         "msp",
                         "ind_code")]
  nlswork <- na.omit(nlswork)

  df2 <<- nlswork

  lm_fit <- lm(
    ln_wage ~ as.factor(grade) + as.factor(age) + as.factor(birth_yr) +
      union + race + msp,
    data = df2
  )

  summclust_res <- summclust(
    obj = lm_fit,
    cluster = ~ ind_code,
    params = c("msp")
  )

  # test leverage
  expect_equal(round(min(unlist(
    summclust_res$leverage_g
  )), 6), 0.093321)
  expect_equal(round(max(unlist(
    summclust_res$leverage_g
  )), 5), 20.28918)
  expect_equal(round(median(unlist(
    summclust_res$leverage_g
  )), 5), 3.51549)
  expect_equal(round(mean(unlist(
    summclust_res$leverage_g
  )), 6), 5.416667)

  # test beta no g
  expect_equal(
    round(min(summclust_res$beta_jack["msp",]), 6),-0.033200
  )
  expect_equal(
    round(max(summclust_res$beta_jack["msp",]), 6),-0.015835)
  expect_equal(
    round(median(summclust_res$beta_jack["msp",]), 6),-0.027765)
  expect_equal(
    round(mean(summclust_res$beta_jack["msp",]), 6),-0.026920)

  # test partial leverage
  expect_equal(
    round(min(summclust_res$partial_leverage["msp",]), 6), 0.001622)
  expect_equal(
    round(max(summclust_res$partial_leverage["msp",]), 6), 0.312995)
  expect_equal(
    round(median(summclust_res$partial_leverage["msp",]), 6), 0.056682)
  expect_equal(
    round(mean(summclust_res$partial_leverage["msp",]), 6), 0.083333)
})


test_that("test against stata - leverage, fixef absorb", {
  library(summclust)
  library(lmtest)
  library(haven)
  library(fixest)

  url <- "http://www.stata-press.com/data/r9/nlswork.dta"
  if (httr::http_error(url)) {
    stop("No internet connection or data source broken. Sorry about that!")
    return(NULL)
  } else {
    message("downloading the 'nlswork' dataset.")
    nlswork <- read_dta(url)
  }


  # drop NAs at the moment
  nlswork <-
    nlswork[, c("ln_wage",
                "grade",
                "age",
                "birth_yr",
                "union",
                "race",
                "msp",
                "ind_code")]
  nlswork <- na.omit(nlswork)

  df2 <<- nlswork

  feols_fit <- feols(ln_wage ~ union + race + msp |
                       grade + age + birth_yr + ind_code,
                     data = df2)

  lm_fit <- lm(
    ln_wage ~ union + race + msp + as.factor(grade) + as.factor(age) +
      as.factor(birth_yr) + as.factor(ind_code),
    data = df2
  )

  summclust_res <- summclust(obj = feols_fit,
                             cluster = ~ ind_code,
                             params = c("union", "race","msp"),
                             type = "CRV3")

  summclust_res_lm <- summclust(obj = lm_fit,
                                cluster = ~ ind_code,
                                params = c("union", "race","msp"),
                                type = "CRV3")

  # test leverage
  expect_equal(round(min(unlist(
    summclust_res$leverage_g
  )), 6), 0.087112)
  expect_equal(round(max(unlist(
    summclust_res$leverage_g
  )), 6), 20.011074)
  expect_equal(round(median(unlist(
    summclust_res$leverage_g
  )), 6), 3.442673)
  expect_equal(round(mean(unlist(
    summclust_res$leverage_g
  )), 6), 5.333333)

  expect_equal(round(min(unlist(
    summclust_res_lm$leverage_g
  )), 6), 0.087112 + 1)
  expect_equal(round(max(unlist(
    summclust_res_lm$leverage_g
  )), 6), 20.011074 + 1)
  expect_equal(round(median(unlist(
    summclust_res_lm$leverage_g
  )), 6), 3.442673 + 1)
  expect_equal(round(mean(unlist(
    summclust_res_lm$leverage_g
  )), 6), 5.333333 + 1)

  # test beta no g
  expect_equal(
    round(min(summclust_res$beta_jack["msp",]), 6),-0.023382)
  expect_equal(
    round(max(summclust_res$beta_jack["msp",]), 6),-0.015001)
  expect_equal(
    round(median(summclust_res$beta_jack["msp",]), 6),-0.021258)
  expect_equal(
    round(mean(summclust_res$beta_jack["msp",]), 6),-0.020770)

  expect_equal(
    round(min(summclust_res_lm$beta_jack["msp",]), 6),-0.023382)
  expect_equal(
    round(max(summclust_res_lm$beta_jack["msp",]), 6),-0.015001)
  expect_equal(
    round(median(summclust_res_lm$beta_jack["msp",]), 6),-0.021258)
  expect_equal(
    round(mean(summclust_res_lm$beta_jack["msp",]), 6),-0.020770)

  # test partial leverage
  expect_equal(
    round(min(summclust_res$partial_leverage["msp",]), 6), 0.001561)
  expect_equal(
    round(max(summclust_res$partial_leverage["msp",]), 6), 0.312377)
  expect_equal(
    round(median(summclust_res$partial_leverage["msp",]), 6), 0.056073)
  expect_equal(
    round(mean(summclust_res$partial_leverage["msp",]), 6), 0.083333)


  # coef of variation
  expect_equal(
    round(summclust_res$coef_var_leverage_g,6),
    1.155829
  )
  expect_equal(
    round(summclust_res$coef_var_N_G,2), 1.19
  )

  expect_equal(
    round(summclust_res$coef_var_partial_leverage["msp"],6),
    1.141658,
    ignore_attr = TRUE
  )
  expect_equal(
    round(summclust_res$coef_var_beta_jack["msp"],6),
    0.120094,
    ignore_attr = TRUE
  )

  expect_equal(
    round(summclust_res$coef_var_beta_jack["msp"],6),
    0.120094,
    ignore_attr = TRUE
  )


})

Try the summclust package in your browser

Any scripts or data that you put into this service are public.

summclust documentation built on Aug. 10, 2023, 9:07 a.m.