#### Testfile for pwaldtest()
#
# see also tests/test_pwaldtest_vcovG_attr_cluster.R for the attribute 'cluster' of the furnished vcovs
options(scipen = 999)
options(digits = 8)
library(plm)
data("Grunfeld", package="plm")
gp <- plm(inv ~ value + capital, data = Grunfeld, model = "pooling")
gi <- plm(inv ~ value + capital, data = Grunfeld,
effect = "individual", model = "within")
gt <- plm(inv ~ value + capital, data = Grunfeld,
effect = "time", model = "within")
gd <- plm(inv ~ value + capital, data = Grunfeld,
effect = "twoways", model = "within")
gre<- plm(inv ~ value + capital, data = Grunfeld,
effect = "individual", model = "random")
# Chisq
plm::pwaldtest(gp, test = "Chisq")
plm::pwaldtest(gi, test = "Chisq")
plm::pwaldtest(gt, test = "Chisq")
plm::pwaldtest(gd, test = "Chisq")
plm::pwaldtest(gre, test = "Chisq")
# F
plm::pwaldtest(gp, test = "F")
plm::pwaldtest(gi, test = "F")
plm::pwaldtest(gt, test = "F")
plm::pwaldtest(gd, test = "F")
plm::pwaldtest(gre, test = "F")
# Gretl uses Stata's small sample adjustment
g <- pdim(gi)$nT$n # no of individuals
n <- pdim(gi)$nT$N # no of total obs
k <- length(coefficients(gi))
adj_k1 <- (g/(g-1) * (n-1)/(n-k-1)) # k <- k + 1 because Stata and Gretl have the intercept in the FE model
adj <- (g/(g-1) * (n-1)/(n-k))
adj_gd <- (g/(g-1) * (n-1)/(n-k-1-19)) # Gretl has time dummies, not demeaning by time (20 periods for Grunfeld data)
# vcov with adjustment factors
vcov_mat_adj_gp <- adj_k1 * plm::vcovHC(gp)
vcov_mat_adj_gi <- adj_k1 * plm::vcovHC(gi)
vcov_mat_adj_gd <- adj_gd * plm::vcovHC(gd) # NB: adj_gd to be used here
vcov_mat_adj_gre <- adj_k1 * plm::vcovHC(gre)
vcov_mat_adj_gt <- adj_k1 * plm::vcovHC(gt)
# Chisq - robust - formula
plm::pwaldtest(gp, test = "Chisq", vcov = vcovHC)
plm::pwaldtest(gi, test = "Chisq", vcov = vcovHC)
plm::pwaldtest(gt, test = "Chisq", vcov = vcovHC)
plm::pwaldtest(gd, test = "Chisq", vcov = vcovHC)
plm::pwaldtest(gre, test = "Chisq", vcov = vcovHC)
# Chisq - robust - matrix
plm::pwaldtest(gp, test = "Chisq", vcov = vcovHC(gp))
plm::pwaldtest(gi, test = "Chisq", vcov = vcovHC(gi))
plm::pwaldtest(gt, test = "Chisq", vcov = vcovHC(gt))
plm::pwaldtest(gd, test = "Chisq", vcov = vcovHC(gd))
plm::pwaldtest(gre, test = "Chisq", vcov = vcov_mat_adj_gre) # replicates Gretl: Chi-square(2) = 70.1267
# F - robust
plm::pwaldtest(gp, test = "F", vcov = vcov_mat_adj_gp) # replicates Gretl: F(2, 9) = 51.59060
plm::pwaldtest(gi, test = "F", vcov = vcov_mat_adj_gi) # replicates Gretl: F(2, 9) = 28.3096
plm::pwaldtest(gi, test = "F", vcov = function(x) vcovHC(x, cluster = "time")) # cluster on time, df2 = 19
plm::pwaldtest(gt, test = "F", vcov = vcov_mat_adj_gt)
plm::pwaldtest(gd, test = "F", vcov = vcov_mat_adj_gd) # replicates Gretl: F(2, 9) = 60.0821
plm::pwaldtest(gre, test = "F", vcov = vcov_mat_adj_gre)
# F - robust - matrix
plm::pwaldtest(gp, test = "F", vcov = vcovHC(gp))
plm::pwaldtest(gi, test = "F", vcov = vcovHC(gi))
plm::pwaldtest(gi, test = "F", vcov = function(x) vcovHC(x, cluster = "time")) # cluster on time, df2 = 19
plm::pwaldtest(gt, test = "F", vcov = vcovHC(gt))
plm::pwaldtest(gd, test = "F", vcov = vcovHC(gd))
plm::pwaldtest(gre, test = "F", vcov = vcovHC(gre))
############### compare to other statistics packages:
## package 'lfe'
# library(lfe)
# data("Grunfeld", package = "plm")
# gi_lfe <- felm(inv ~ value + capital | firm, data = Grunfeld)
# gi_lfe_cluster <- felm(inv ~ value + capital | firm, data = Grunfeld, clustervar="firm")
# summary(gi_lfe)
# summary(gi_lfe_cluster)
# lfe::waldtest(gi_lfe, R = names(coef(gi_lfe))) # df1 = 2, df2 = 188
# lfe::waldtest(gi_lfe_cluster, R = names(coef(gi_lfe_cluster))) # chi2: 54.03250, F. 27.01625, df1 = 2, df2 = 9
# gi_lfe_cluster$clustervcv # # this vcov is not identical to vcovHC, so results do not match
### Stata ####
# See http://www.stata.com/manuals14/xtxtreg.pdf
# example 2 vs. example 3 (p 14 and 16):
# F(8, 23386) = 610.12 - normal
# F(8, 4696) = 273.86 - robust
# commented because it needs extra library 'foreign'
# library(plm)
# library(haven)
# nlswork <- read_dta("http://www.stata-press.com/data/r14/nlswork.dta") # large file
# nlswork$race <- factor(nlswork$race) # convert
# nlswork$race2 <- factor(ifelse(nlswork$race == 2, 1, 0)) # need this variable for example
# nlswork$grade <- as.numeric(nlswork$grade)
# pnlswork <- pdata.frame(nlswork, index=c("idcode", "year"), drop.index=F)
#
# form_nls_ex2 <- formula(ln_wage ~ grade + age + I(age^2) + ttl_exp + I(ttl_exp^2) + tenure + I(tenure^2) + race2 + not_smsa + south)
# plm_fe_nlswork <- plm(form_nls_ex2, data = pnlswork, model = "within")
#
# plm:::pwaldtest(plm_fe_nlswork, test = "F") # replicates Stata: F(8, 23386) = 610.12 - normal
# plm:::pwaldtest(plm_fe_nlswork, test = "F", vcov = vcovHC) # replicates Stata: F(8, 4696) = 273.86 - robust
### replicate Gretl ####
# library(foreign);library(plm)
# wagepan<-read.dta("http://fmwww.bc.edu/ec-p/data/wooldridge/wagepan.dta")
# pwagepan <- pdata.frame(wagepan, index = c("nr", "year"))
# pdim(pwagepan)
#
# mod_fe_ind <- plm(lwage ~ exper + hours + married + expersq, data = pwagepan, model = "within", effect = "individual")
#
# plm:::pwaldtest(mod_fe_ind, test="F")
# plm:::pwaldtest(mod_fe_ind, test="F", vcov = function(x) vcovHC(x)) # 121.4972
#
# # Gretl uses Stata's small sample adjustment
# g <- pdim(mod_fe_ind)$nT$n # no of individuals
# n <- pdim(mod_fe_ind)$nT$N # no of total obs
# k <- length(coefficients(mod_fe_ind))
# k <- k+1 # + 1 because Stata and Gretl have the intercept in the FE model
# adj <- (g/(g-1) * (n-1)/(n-k))
# vcov_mat_adj <- adj * plm::vcovHC(mod_fe_ind)
# print(plm:::pwaldtest(mod_fe_ind, test="F", vcov = vcov_mat_adj), digits = 12) # replicate Gretl: F(4, 544) = 121.163
# Reference: Gretl (2016b)
#
# Gretl, wagepan data, fixed effects (oneway, HAC SEs)
# Model 1: Fixed-effects, using 4360 observations
# Included 545 cross-sectional units
# Time-series length = 8
# Dependent variable: lwage
# Robust (HAC) standard errors
#
# coefficient std. error t-ratio p-value
# -----------------------------------------------------------
# const 1.30069 0.0550817 23.61 2.15e-085 ***
# exper 0.137331 0.0108430 12.67 2.12e-032 ***
# hours −0.000136467 2.13715e-05 −6.385 3.67e-010 ***
# married 0.0481248 0.0213232 2.257 0.0244 **
# expersq −0.00532076 0.000692182 −7.687 7.09e-014 ***
#
# Mean dependent var 1.649147 S.D. dependent var 0.532609
# Sum squared resid 459.8591 S.E. of regression 0.347371
# LSDV R-squared 0.628105 Within R-squared 0.196125
# Log-likelihood −1283.082 Akaike criterion 3664.165
# Schwarz criterion 7166.910 Hannan-Quinn 4900.376
# rho 0.065436 Durbin-Watson 1.546260
#
# Joint test on named regressors -
# Test statistic: F(4, 544) = 121.163
# with p-value = P(F(4, 544) > 121.163) = 7.19472e-074
#
# Robust test for differing group intercepts -
# Null hypothesis: The groups have a common intercept
# Test statistic: Welch F(544, 1276.3) = 26.9623
# with p-value = P(F(544, 1276.3) > 26.9623) = 0
# Model 1: Fixed-effects, using 200 observations
# Included 10 cross-sectional units
# Time-series length = 20
# Dependent variable: inv
# Robust (HAC) standard errors
#
# coefficient std. error t-ratio p-value
# --------------------------------------------------------
# const −58.7439 27.6029 −2.128 0.0622 *
# value 0.110124 0.0151945 7.248 4.83e-05 ***
# capital 0.310065 0.0527518 5.878 0.0002 ***
#
# Mean dependent var 145.9582 S.D. dependent var 216.8753
# Sum squared resid 523478.1 S.E. of regression 52.76797
# LSDV R-squared 0.944073 Within R-squared 0.766758
# Log-likelihood −1070.781 Akaike criterion 2165.562
# Schwarz criterion 2205.142 Hannan-Quinn 2181.579
# rho 0.663920 Durbin-Watson 0.684480
#
# Joint test on named regressors -
# Test statistic: F(2, 9) = 28.3096
# with p-value = P(F(2, 9) > 28.3096) = 0.000131055
#
# Robust test for differing group intercepts -
# Null hypothesis: The groups have a common intercept
# Test statistic: Welch F(9, 70.6) = 85.9578
# with p-value = P(F(9, 70.6) > 85.9578) = 1.90087e-034
# Model 6: Fixed-effects, using 200 observations
# Included 10 cross-sectional units
# Time-series length = 20
# Dependent variable: inv
# Robust (HAC) standard errors
#
# coefficient std. error t-ratio p-value
# --------------------------------------------------------
# const −32.8363 19.7826 −1.660 0.1313
# value 0.117716 0.0108244 10.88 1.77e-06 ***
# capital 0.357916 0.0478484 7.480 3.77e-05 ***
# dt_2 −19.1974 20.6986 −0.9275 0.3779
# dt_3 −40.6900 33.2832 −1.223 0.2526
# dt_4 −39.2264 15.7365 −2.493 0.0343 **
# dt_5 −69.4703 26.9988 −2.573 0.0300 **
# dt_6 −44.2351 17.3723 −2.546 0.0314 **
# dt_7 −18.8045 17.8475 −1.054 0.3195
# dt_8 −21.1398 14.1648 −1.492 0.1698
# dt_9 −42.9776 12.5441 −3.426 0.0076 ***
# dt_10 −43.0988 10.9959 −3.920 0.0035 ***
# dt_11 −55.6830 15.2019 −3.663 0.0052 ***
# dt_12 −31.1693 20.9169 −1.490 0.1704
# dt_13 −39.3922 26.4371 −1.490 0.1704
# dt_14 −43.7165 38.8786 −1.124 0.2899
# dt_15 −73.4951 38.2545 −1.921 0.0869 *
# dt_16 −75.8961 36.7985 −2.062 0.0692 *
# dt_17 −62.4809 49.4181 −1.264 0.2379
# dt_18 −64.6323 51.5621 −1.253 0.2416
# dt_19 −67.7180 43.7447 −1.548 0.1560
# dt_20 −93.5262 31.7263 −2.948 0.0163 **
#
# Mean dependent var 145.9582 S.D. dependent var 216.8753
# Sum squared resid 452147.1 S.E. of regression 51.72452
# LSDV R-squared 0.951693 Within R-squared 0.798540
# Log-likelihood −1056.132 Akaike criterion 2174.264
# Schwarz criterion 2276.512 Hannan-Quinn 2215.643
# rho 0.658860 Durbin-Watson 0.686728
#
# Joint test on named regressors -
# Test statistic: F(2, 9) = 60.0821
# with p-value = P(F(2, 9) > 60.0821) = 6.22231e-006
#
# Robust test for differing group intercepts -
# Null hypothesis: The groups have a common intercept
# Test statistic: Welch F(9, 76.7) = 53.1255
# with p-value = P(F(9, 76.7) > 53.1255) = 2.45306e-029
# Model 5: Pooled OLS, using 200 observations
# Included 10 cross-sectional units
# Time-series length = 20
# Dependent variable: inv
# Robust (HAC) standard errors
#
# coefficient std. error t-ratio p-value
# --------------------------------------------------------
# const −42.7144 20.4252 −2.091 0.0660 *
# value 0.115562 0.0158943 7.271 4.71e-05 ***
# capital 0.230678 0.0849671 2.715 0.0238 **
#
# Mean dependent var 145.9582 S.D. dependent var 216.8753
# Sum squared resid 1755850 S.E. of regression 94.40840
# R-squared 0.812408 Adjusted R-squared 0.810504
# F(2, 9) 51.59060 P-value(F) 0.000012
# Log-likelihood −1191.802 Akaike criterion 2389.605
# Schwarz criterion 2399.500 Hannan-Quinn 2393.609
# rho 0.956242 Durbin-Watson 0.209717
# Model 2: Random-effects (GLS), using 200 observations
# Included 10 cross-sectional units
# Time-series length = 20
# Dependent variable: inv
# Robust (HAC) standard errors
#
# coefficient std. error z p-value
# --------------------------------------------------------
# const −57.8344 24.8432 −2.328 0.0199 **
# value 0.109781 0.0137557 7.981 1.45e-015 ***
# capital 0.308113 0.0549728 5.605 2.08e-08 ***
#
# Mean dependent var 145.9582 S.D. dependent var 216.8753
# Sum squared resid 1841062 S.E. of regression 96.42765
# Log-likelihood −1196.541 Akaike criterion 2399.083
# Schwarz criterion 2408.978 Hannan-Quinn 2403.087
#
# 'Between' variance = 7089.8
# 'Within' variance = 2784.46
# theta used for quasi-demeaning = 0.861224
# corr(y,yhat)^2 = 0.806104
#
# Joint test on named regressors -
# Asymptotic test statistic: Chi-square(2) = 70.1267
# with p-value = 5.91814e-016
#
# Breusch-Pagan test -
# Null hypothesis: Variance of the unit-specific error = 0
# Asymptotic test statistic: Chi-square(1) = 798.162
# with p-value = 1.35448e-175
#
# Hausman test -
# Null hypothesis: GLS estimates are consistent
# Asymptotic test statistic: Chi-square(2) = 7.31971
# with p-value = 0.0257363
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.