# Test of new plmtest implementation (handling unbalanced panels)
#
# compare to grunfeld data example in Baltagi (2013), Econometric Analysis of Panel Data, 5th ed., p. 74-75 (Table 4.1/4.2)
# also Baltagi (2005), Econometric Analysis of Panel Data, 3rd ed., p. 65-66 (just Table 4.1,
# table 4.2 in Baltagi (2005) is only Stata's xttest0
# for Breusch-Pagan with chi2(1) = 798.16, Prob > chi2 = 0.0000)
#
# => statistics and p-values match => implementation of balanced tests is ok.
#
# The text book only supplies examples for the balanced Grunfeld data
# Thus, there are no reference values for an _un_balanced data set.
# -> compare calculation of EViews on an unbalanced data set (grunfeld without last observation)
# unbalanced formulas reduce in the case of a balanced panel to the formula for balanced panels:
#
# balanced panel: => test output as in the text book => implementation is ok.
# unbalanced panel: => test statistics for unbalanced panel differ from balanced panel
# => test matches calculation of EViews
# Tables from Baltagi
#
# Table 4.1
############ [statistic (critical values at 5% level)]
## note: SLM statistic is not implemented in plm
# ind time twoways
#---------------------------------
# [...]
##### Grunfeld data set - balanced ####
# Table 4.2 [Output from EViews], similar to above table but with p-values
##### EViews add-in BPTest for some older version of EViews needed:
##### http://www.eviews.com/Addins/addins.shtml#addins
##### http://forums.eviews.com/viewtopic.php?f=23&t=2228
##### In (at least) EViews 9, the LM tests are implemented, no need for add-in;
##### SLM is not outputted anymore but std. Honda and std. KW
##### and p-values for negative one-sided statistics are not
##### printed anymore (see unbalanced example below).
########### [statistic (p-values)]
# ind time twoways
#---------------------------------
## note: SLM statistic is not implemented in plm
# [...]
############ unbalanced ##########################################
##### Grunfeld unbalanced data set [see below]
##### (last observation deleted, i. e. first 199 obs)
# Own computation with EViews 9
##### In EViews 9, the LM tests are implemented, no need for add-in anymore;
##### SLM is not outputted but std. Honda and std. KW
##### p-values for the negative one-sided statistics
##### [in this example (std.) Honda, KW]
##### are not printed in EViews 9; from the help file:
########### [statistic (p-values)]
# ind time twoways
#---------------------------------
# [...]
#
## note: standardizised HO statistic is not implemented in plm
## note: standardizised KW statistic is not implemented in plm
options(digits = 10)
Sys.setenv(LANG = "en")
require(plm)
data("Grunfeld", package = "plm")
Grunfeldpdata <- pdata.frame(Grunfeld, index = c("firm", "year"), drop.index = FALSE, row.names = TRUE)
fe_grunfeld <- plm(inv ~ value + capital, data=Grunfeldpdata, model="within")
re_grunfeld <- plm(inv ~ value + capital, data=Grunfeldpdata, model="random")
pool_grunfeld <- plm(inv ~ value + capital, data=Grunfeldpdata, model="pooling")
# Make an unbalanced data set
Grunfeldpdata_unbalanced <- Grunfeld[1:(nrow(Grunfeld)-1), ]
Grunfeldpdata_unbalanced <- pdata.frame(Grunfeldpdata_unbalanced, index=c("firm"), drop.index = F)
fe_grunfeld_unbalanced <- plm(inv ~ value + capital, data=Grunfeldpdata_unbalanced, model="within")
re_grunfeld_unbalanced <- plm(inv ~ value + capital, data=Grunfeldpdata_unbalanced, model="random")
pool_grunfeld_unbalanced <- plm(inv ~ value + capital, data=Grunfeldpdata_unbalanced, model="pooling")
# Produc
# data("Produc", package = "plm")
# form_produc <- formula(gsp ~ log(pc) + log(pcap) + log(emp) + unemp)
# produc_pool <- plm(form_produc, data = Produc, model="pooling")
# Hedonic
# Stastics heavily differ for this unbalanced data, depending on one applies the
# balanced tests (v1.4-0) to this unbalanced data or the unbalanced test
#
# balanced test of v1.4-0: 849.45815 (individual effects) and 600.20821 (time effects)
# unbalanced test: 25.011274 (individual effects) and 1.5571417 (time effects)
data("Hedonic", package = "plm")
pHedonic <- pdata.frame(Hedonic, index = "townid", drop.index = F)
form_hedonic <- formula(mv ~ crim)
hedonic_pool <- plm(form_hedonic, data = pHedonic, model="pooling")
plmtest(hedonic_pool)
plmtest(hedonic_pool, effect = "time")
### generalized version of plmtest() to handle also unbalanced panels
# individual effect
print(honda_ind <- plmtest(pool_grunfeld, type="honda"))
print(honda_ind_unbalanced <- plmtest(pool_grunfeld_unbalanced, type="honda"))
print(bp_ind <- plmtest(pool_grunfeld, type="bp"))
print(bp_ind_unbalanced <- plmtest(pool_grunfeld_unbalanced, type="bp"))
print(kw_ind <- plmtest(pool_grunfeld, type="kw"))
print(kw_ind_unbalanced <- plmtest(pool_grunfeld_unbalanced, type="kw"))
# Note: ghm is only for twoways, hence not in this section
# time effect
print(honda_time <- plmtest(pool_grunfeld, type="honda", effect="time"))
print(honda_time_unbalanced <- plmtest(pool_grunfeld_unbalanced, type="honda", effect="time"))
print(bp_time <- plmtest(pool_grunfeld, type="bp", effect="time"))
print(bp_time_unbalanced <- plmtest(pool_grunfeld_unbalanced, type="bp", effect="time"))
print(kw_time <- plmtest(pool_grunfeld, type="kw", effect="time"))
print(kw_time_unbalanced <- plmtest(pool_grunfeld_unbalanced, type="kw", effect="time"))
# Note: ghm is only for twoways, hence not in this section
# twoways effect
print(honda_tw <- plmtest(pool_grunfeld, type="honda", effect="twoways"))
print(honda_tw_unbalanced <- plmtest(pool_grunfeld_unbalanced, type="honda", effect="twoways"))
print(bp_tw <- plmtest(pool_grunfeld, type="bp", effect="twoways"))
print(bp_tw_unbalanced <- plmtest(pool_grunfeld_unbalanced, type="bp", effect="twoways"))
print(kw_tw <- plmtest(pool_grunfeld, type="kw", effect="twoways"))
print(kw_tw_unbalanced <- plmtest(pool_grunfeld_unbalanced, type="kw", effect="twoways"))
print(ghm_tw <- plmtest(pool_grunfeld, type="ghm", effect="twoways"))
print(ghm_tw_unbalanced <- plmtest(pool_grunfeld_unbalanced, type="ghm", effect="twoways"))
### Test of formula interface
# individual effect
print(honda_ind_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata, type="honda"))
print(honda_ind_unbalanced_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata_unbalanced, type="honda"))
print(bp_ind_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata, type="bp"))
print(bp_ind_unbalanced_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata_unbalanced, type="bp"))
print(kw_ind_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata, type="kw"))
print(kw_ind_unbalanced_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata_unbalanced, type="kw"))
# Note: ghm is only for twoways, hence not in this section
# time effect
print(honda_time_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata, type="honda", effect="time"))
print(honda_time_unbalanced_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata_unbalanced, type="honda", effect="time"))
print(bp_time_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata, type="bp", effect="time"))
print(bp_time_unbalanced_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata_unbalanced, type="bp", effect="time"))
print(kw_time_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata, type="kw", effect="time"))
print(kw_time_unbalanced_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata_unbalanced, type="kw", effect="time"))
# Note: ghm is only for twoways, hence not in this section
# twoways effect
print(honda_tw_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata, type="honda", effect="twoways"))
print(honda_tw_unbalanced_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata_unbalanced, type="honda", effect="twoways"))
print(bp_tw_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata, type="bp", effect="twoways"))
print(bp_tw_unbalanced_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata_unbalanced, type="bp", effect="twoways"))
print(kw_tw_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata, type="kw", effect="twoways"))
print(kw_tw_unbalanced_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata_unbalanced, type="kw", effect="twoways"))
print(ghm_tw_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata, type="ghm", effect="twoways"))
print(ghm_tw_unbalanced_form <- plmtest(inv ~ value + capital, data=Grunfeldpdata_unbalanced, type="ghm", effect="twoways"))
# Should all be TRUE
if(!all(
identical(honda_ind, honda_ind_form),
identical(honda_ind_unbalanced, honda_ind_unbalanced_form),
identical(bp_ind, bp_ind_form),
identical(bp_ind_unbalanced, bp_ind_unbalanced_form),
identical(kw_ind, kw_ind_form),
identical(kw_ind_unbalanced, kw_ind_unbalanced_form),
identical(honda_time, honda_time_form),
identical(honda_time_unbalanced, honda_time_unbalanced_form),
identical(bp_time, bp_time_form),
identical(bp_time_unbalanced, bp_time_unbalanced_form),
identical(kw_time, kw_time_form),
identical(kw_time_unbalanced, kw_time_unbalanced_form),
identical(honda_tw, honda_tw_form),
identical(honda_tw_unbalanced, honda_tw_unbalanced_form),
identical(bp_tw, bp_tw_form),
identical(bp_tw_unbalanced, bp_tw_unbalanced_form),
identical(kw_tw, kw_tw_form),
identical(kw_tw_unbalanced, kw_tw_unbalanced_form),
identical(ghm_tw, ghm_tw_form),
identical(ghm_tw_unbalanced, ghm_tw_unbalanced_form))) stop("results of plm and formula interface differ!")
# Tests - unbalanced - statistics should be "sufficiently different" from balanced statistics,
# thus results should be TRUE
# individual
abs(honda_ind_unbalanced$statistic - honda_ind$statistic) > 0.0001
abs(bp_ind_unbalanced$statistic - bp_ind$statistic) > 0.0001
abs(kw_ind_unbalanced$statistic - kw_ind$statistic) > 0.0001
# time
abs(honda_time_unbalanced$statistic - honda_time$statistic) > 0.0001
abs(bp_time_unbalanced$statistic - bp_time$statistic) > 0.0001
abs(kw_time_unbalanced$statistic - kw_time$statistic) > 0.0001
# twoways
abs(honda_tw_unbalanced$statistic - honda_tw$statistic) > 0.0001
abs(bp_tw_unbalanced$statistic - bp_tw$statistic) > 0.0001
abs(kw_tw_unbalanced$statistic - kw_tw$statistic) > 0.0001
abs(ghm_tw_unbalanced$statistic - ghm_tw$statistic) > 0.0001
########## resemble critical values at alpha = 0.05 from Table 4.1 (Baltagi (2013), p. 74)
alpha <- 0.05
#### honda and kw oneway and twoway -> 1.645
qnorm(alpha, lower.tail = F)
# =>
pnorm(qnorm(alpha, lower.tail = F), lower.tail = F)
# honda (kw) p-value implementation as in plm_v1.4-0 (CRAN as of 2015-11-08):
# leads to the 10% level (not 5%):
# see also above the table for the unbalanced Grunfeld data on how EViews handles negative statistics for Honda and KW
pnorm(abs(1.645), lower.tail = FALSE)*2 # CRAN v1.4-0
# correct is -> p=0.05
pnorm(abs(1.645), lower.tail = FALSE)
#### bp: df=1 (oneway) -> 3.841
#### df=2 (twoway) -> 5.991
qchisq(alpha, df=1, lower.tail = F) # H0_a, H0_b
qchisq(alpha, df=2, lower.tail = F) # H0_c
# =>
pchisq(qchisq(alpha, df = 1, lower.tail = F), df=1, lower.tail = F)
pchisq(qchisq(alpha, df = 2, lower.tail = F), df=2, lower.tail = F)
#### ghm test for p-value of mixed chi-square distribution (more often called chi-bar-square)
# as implemented in fixed version.
# (was simple chisquare in plm_v1.4-0 on CRAN -> wrong)
#
# Baltagi (2013), p. 88 (note 2), p. 209 (note 10) gives critical values for 0.01, 0.05, 0.10 levels
# 4.321 is a typo in the notes of Baltagi's textbook, should be 4.231 [confirmed by private email from Badi Baltagi]
crit <- c(7.289, 4.231, 2.952) # without typo
# crit <- c(7.289, 4.312, 2.952) # with typo from text book
p.vals <- (1/4)*pchisq(crit, df=0, lower.tail = F) + (1/2) * pchisq(crit, df=1, lower.tail = F) + (1/4) * pchisq(crit, df=2, lower.tail = F)
# Baltagi (2013), p. 73, 74 contains another example of the mixed chi-square (chi-bar-square) distribution of another statistic
# The p-values for that example is also reassembled here
crit_2 <- c(2.706) # for alpha=0.05
p.val_2 <- (1/2)*pchisq(crit_2, df=0, lower.tail = F) + (1/2) * pchisq(crit_2, df=1, lower.tail = F)
################# Replicate an example from Stata
## example 1 in this manual:
## http://www.stata.com/manuals/xtxtregpostestimation.pdf
## It is an unbalanced panel
# require(haven) # required to read Stata data file
# nlswork <- read_dta("http://www.stata-press.com/data/r14/nlswork.dta")
# nlswork$race <- factor(nlswork$race) # fix data
# nlswork$race2 <- factor(ifelse(nlswork$race == 2, 1, 0)) # need this variable for example
# pnlswork <- pdata.frame(nlswork, index=c("idcode", "year"), drop.index=F)
#
# # note STAT 14 uses by default a different method compared to plm's Swamy–Arora variance component estimator
# # This is why in comparison with web examples from Stata the random effects coefficients slightly differ
# plm_re_nlswork <- plm(ln_wage ~ grade + age + I(age^2) + ttl_exp + I(ttl_exp^2) + tenure + I(tenure^2) + race2 + not_smsa + south
# , data = pnlswork, model = "random")
#
# # reassembles the FE estimation by Stata in Example 2 of http://www.stata.com/manuals13/xtxtreg.pdf
# plm_fe_nlswork <- plm(ln_wage ~ grade + age + I(age^2) + ttl_exp + I(ttl_exp^2) + tenure + I(tenure^2) + race2 + not_smsa + south
# , data = pnlswork, model = "within")
#
# plm_pool_nlswork <- plm(ln_wage ~ grade + age + I(age^2) + ttl_exp + I(ttl_exp^2) + tenure + I(tenure^2) + race2 + not_smsa + south
# , data = pnlswork, model = "pooling")
#
#
# # Reassembles Example 1 in http://www.stata.com/manuals14/xtxtregpostestimation.pdf
# # use modified plmtest() as a wrapper
# options(digits = 10)
# plmtest(plm_pool_nlswork, type="bp")
#
# # ## Lagrange Multiplier Test - individual effects - Breusch-Pagan Test for unbalanced Panels as in Baltagi/Li (1990)
# # ## data: ln_wage ~ grade + age + I(age^2) + ttl_exp + I(ttl_exp^2) + tenure + ...
# # ## BP_unbalanced = 14779.984, df = 1, p-value < 2.2204e-16
# # ## alternative hypothesis: significant effects
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.