R/test_general.R

Defines functions pooltest.formula pooltest.plm pooltest trans_clubSandwich_vcov pwaldtest.default pwaldtest.pgmm pwaldtest.pvcm pwaldtest.plm pwaldtest pFtest.plm pFtest.formula pFtest plmtest.formula plmtest.plm plmtest pchibarsq phtest.panelmodel phtest.formula phtest

Documented in pFtest pFtest.formula pFtest.plm phtest phtest.formula phtest.panelmodel plmtest plmtest.formula plmtest.plm pooltest pooltest.formula pooltest.plm pwaldtest pwaldtest.pgmm pwaldtest.plm pwaldtest.pvcm

#' Hausman Test for Panel Models
#' 
#' Specification test for panel models.
#' 
#' The Hausman test (sometimes also called Durbin--Wu--Hausman test)
#' is based on the difference of the vectors of coefficients of two
#' different models.  The `panelmodel` method computes the original
#' version of the test based on a quadratic form
#' \insertCite{HAUS:78}{plm}. The `formula` method, if
#' `method = "chisq"` (default), computes the original version of the
#' test based on a quadratic form; if `method ="aux"` then the
#' auxiliary-regression-based version as in \insertCite{WOOL:10;textual}{plm},
#' Sec.10.7.3. Only the latter can be robustified by specifying a robust
#' covariance estimator as a function through the argument `vcov` (see
#' **Examples**).
#'
#' The `effect` argument is only relevant for the formula method/interface and 
#' is then applied to both models. For the panelmodel method/interface, the test 
#' is run with the effects of the already estimated models.
#' 
#' The equivalent tests in the **one-way** case using a between
#' model (either "within vs. between" or "random vs. between")
#' \insertCite{@see @HAUS:TAYL:81 or @BALT:13 Sec.4.3}{plm} can also
#' be performed by `phtest`, but only for `test = "chisq"`, not for
#' the regression-based test. NB: These equivalent tests using the
#' between model do not extend to the two-ways case.  There are,
#' however, some other equivalent tests,
#' \insertCite{@see @KANG:85 or @BALT:13 Sec.4.3.7}{plm},
#' but those are unsupported by `phtest`.
#' 
#' @aliases phtest
#' @param x an object of class `"panelmodel"` or `"formula"`,
#' @param x2 an object of class `"panelmodel"` (only for panelmodel method/interface),
#' @param model a character vector containing the names of two models
#' (length(model) must be 2),
#' @param effect a character specifying the effect to be introduced to both models,
#'  one of `"individual"`, `"time"`, or `"twoways"` (only for formula method),
#' @param data a `data.frame`,
#' @param method one of `"chisq"` or `"aux"`,
#' @param index an optional vector of index variables,
#' @param vcov an optional covariance function,
#' @param \dots further arguments to be passed on (currently none).
#' @return An object of class `"htest"`.
#' @export
#' @author Yves Croissant, Giovanni Millo
#' @references
#'
#' \insertRef{HAUS:78}{plm}
#'
#' \insertRef{HAUS:TAYL:81}{plm}
#'
#' \insertRef{KANG:85}{plm}
#'
#' \insertRef{WOOL:10}{plm}
#'
#' \insertRef{BALT:13}{plm}
#' 
#' @keywords htest
#' @examples
#' 
#' data("Gasoline", package = "plm")
#' form <- lgaspcar ~ lincomep + lrpmg + lcarpcap
#' wi <- plm(form, data = Gasoline, model = "within")
#' re <- plm(form, data = Gasoline, model = "random")
#' phtest(wi, re)
#' phtest(form, data = Gasoline)
#' phtest(form, data = Gasoline, effect = "time")
#' 
#' # Regression-based Hausman test
#' phtest(form, data = Gasoline, method = "aux")
#' 
#' # robust Hausman test with vcov supplied as a function and 
#' # with additional parameters
#' phtest(form, data = Gasoline, method = "aux", vcov = vcovHC)
#' phtest(form, data = Gasoline, method = "aux",
#'   vcov = function(x) vcovHC(x, method="white2", type="HC3"))
#' 
phtest <- function(x,...){
  UseMethod("phtest")
}

#' @rdname phtest
#' @export
phtest.formula <- function(x, data, model = c("within", "random"),
                            effect = c("individual", "time", "twoways"),
                            method = c("chisq", "aux"),
                            index = NULL, vcov = NULL, ...) {
  
    if (length(model) != 2) stop("two models should be indicated in argument 'model'")
    for (i in 1:2){
        model.name <- model[i]
        if(!(model.name %in% names(model.plm.list))){
            stop("model must be one of ", oneof(model.plm.list))
        }
    }
  
    effect <- match.arg(effect)
  
    switch(match.arg(method),
           "chisq" = {
               cl <- match.call(expand.dots = TRUE)
               cl$model <- model[1L]
               cl$effect <- effect
               names(cl)[2L] <- "formula"
               m <- match(plm.arg, names(cl), 0L)
               cl <- cl[c(1L, m)]
               cl[[1L]] <- as.name("plm")
               plm.model.1 <- eval(cl, parent.frame())
               plm.model.2 <- update(plm.model.1, model = model[2L])
               return(phtest(plm.model.1, plm.model.2)) # exit to phtest.panelmodel
           },
           "aux" = {
               ## some interface checks here
               if (model[1L] != "within") {
                   stop("Please supply 'within' as first model type")
               }
             
               if (!is.null(vcov) && !is.function(vcov)) stop("argument 'vcov' needs to be a function")
             
               ## set pdata.frame
               if (!inherits(data, "pdata.frame")) data <- pdata.frame(data, index = index) #, ...)

               # reset rownames of original data set (->numbers rownames in clean sequence) to make rownames
               # comparable for later comparison to obs used in estimation of models (get rid of NA values)
               # [needed because pmodel.response() and model.matrix() do not retain fancy rownames, but rownames]
               row.names(data) <- NULL
               
               # calculate FE and RE model
               fe_mod <- plm(formula = x, data = data, model = model[1L], effect = effect)
               re_mod <- plm(formula = x, data = data, model = model[2L], effect = effect)

                ## DEBUG printing:
                 # print(effect)
                 # print(model)
                 # print(paste0("mod1: ", describe(fe_mod, "effect")))
                 # print(paste0("mod2: ", describe(re_mod, "effect")))
                 # print(fe_mod)
                 # print(re_mod)
               
               reY <- pmodel.response(re_mod)
#               reX <- model.matrix(re_mod)[ , -1, drop = FALSE] # intercept not needed; drop=F needed to prevent matrix
#               feX <- model.matrix(fe_mod, cstcovar.rm = TRUE)  # from degenerating to vector if only one regressor
               reX <- model.matrix(re_mod, cstcovar.rm = "intercept")
               feX <- model.matrix(fe_mod, cstcovar.rm = "all")

               ## estimated models could have fewer obs (due dropping of NAs) compared to the original data
               ## => match original data and observations used in estimated models
               ## routine adapted from lmtest::bptest
               commonrownames <- intersect(intersect(intersect(row.names(data), names(reY)), row.names(reX)), row.names(feX))
               if(!(all(c(row.names(data) %in% commonrownames, commonrownames %in% row.names(data))))) {
                 data <- data[commonrownames, , drop = FALSE]
                 reY <- reY[commonrownames]
                 reX <- reX[commonrownames, , drop = FALSE]
                 feX <- feX[commonrownames, , drop = FALSE]
               }
               
               # Tests of correct matching of obs (just for safety ...)
               if(!all(c(isTRUE(all.equal(length(reY), nrow(data))), isTRUE(all.equal(nrow(reX), nrow(feX))), 
                         isTRUE(all.equal(length(reY), nrow(reX))))))
                  stop("number of cases/observations do not match, most likely due to NAs in \"data\"")
                if(any(c(is.na(names(reY)), is.na(row.names(data)), is.na(row.names(reX)), is.na(row.names(feX)))))
                    stop("one (or more) rowname(s) is (are) NA")
                if(!all(c(isTRUE(all.equal(names(reY), row.names(data))), isTRUE(all.equal(row.names(reX), row.names(feX))),
                          isTRUE(all.equal(names(reY), row.names(reX))))))
                  stop("row.names of cases/observations do not match, most likely due to NAs in \"data\"")

               ## construct data set and formula for auxiliary regression
               dimnames(feX)[[2L]] <- paste(dimnames(feX)[[2L]], "tilde", sep=".")
               auxdata <- pdata.frame(cbind(index(data), reY, reX, feX))
               auxfm <- as.formula(paste("reY~",
                                         paste(dimnames(reX)[[2L]], collapse="+"),
                                         "+",
                                         paste(dimnames(feX)[[2L]], collapse="+"), sep=""))
               auxmod <- plm(formula = auxfm, data = auxdata, model = "pooling")
               nvars <- dim(feX)[[2L]]
               R <- diag(1, nvars)
               r <- rep(0, nvars) # here just for clarity of illustration
               range <- (nvars+2L):(nvars*2L + 1L)
               omega0 <- vcov(auxmod)[range, range]
               Rbr <- crossprod(R, coef(auxmod)[range]) - r
               
               df <- nvars
               
               h2t <- as.numeric(crossprod(Rbr, solve(omega0, Rbr)))
               ph2t <- pchisq(h2t, df = df, lower.tail = FALSE)

               names(df) <- "df"
               names(h2t) <- "chisq"

               if(!is.null(vcov)) {
                   vcov <- paste(", vcov: ",
                                  paste(deparse(substitute(vcov))),
                                  sep="")
               }

               haus2 <- list(statistic   = h2t,
                             p.value     = ph2t,
                             parameter   = df,
                             method      = paste("Regression-based Hausman test",
                                                  vcov, sep=""),
                             alternative = "one model is inconsistent",
                             data.name   = paste(deparse(substitute(x))),
                             auxmod      = auxmod)
               class(haus2) <- "htest"
               return(haus2)
           })
}

#' @rdname phtest
#' @export
phtest.panelmodel <- function(x, x2, ...) {
  coef.wi <- coef(x)
  coef.re <- coef(x2)
  vcov.wi <- vcov(x)
  vcov.re <- vcov(x2)
  names.wi <- names(coef.wi)
  names.re <- names(coef.re)
  common_coef_names <- names.re[names.re %in% names.wi]
  coef.h <- common_coef_names[!(common_coef_names %in% "(Intercept)")] # drop intercept if included (relevant when between model input)
  if(length(coef.h) == 0L) stop("no common coefficients in models")
  dbeta <- coef.wi[coef.h] - coef.re[coef.h]
  df <- length(dbeta)
  dvcov <- vcov.wi[coef.h, coef.h] - vcov.re[coef.h, coef.h]
  
  #### BEGIN cater for equivalent test within vs. between
    # Baltagi (2013), Sec. 4.3, pp. 77, 81
    modx  <- describe(x,  what = "model")
    modx2 <- describe(x2, what = "model")
    effx  <- describe(x,  what = "effect")
    effx2 <- describe(x2, what = "effect")
    
    # Tests with between model do not extend to two-ways case -> give error
    # There are, however, some equiv. tests with the individual/time between 
    # model, but let's not support them (see Kang (1985), Baltagi (2013), Sec. 4.3.7)
    if (   (modx  == "between" || modx2 == "between")
        && (effx == "twoways" || effx2 == "twoways")) stop("tests with between model in twoways case not supported")
    
    # in case of one-way within vs. between (m3 in Baltagi (2013), pp. 77, 81)
    # the variances need to be added (not subtracted like in the other cases)
    if (   (modx  == "within" && modx2 == "between")
        || (modx2 == "within" && modx  == "between")) {
      dvcov <- vcov.wi[coef.h, coef.h] + vcov.re[coef.h, coef.h]
    }
  #### END cater for equivalent tests with between model

  stat <- as.numeric(abs(t(dbeta) %*% solve(dvcov) %*% dbeta))
  pval <- pchisq(stat, df = df, lower.tail = FALSE)
  names(stat) <- "chisq"
  parameter <- df
  names(parameter) <- "df"
  alternative <- "one model is inconsistent"
  
  ## DEBUG printing:
     # print(paste0("mod1: ", describe(x,  "effect")))
     # print(paste0("mod2: ", describe(x2, "effect")))

  res <- list(statistic    = stat,
              p.value      = pval,
              parameter    = parameter,
              method       = "Hausman Test",
              data.name    = data.name(x),
              alternative  = alternative)
  class(res) <- "htest"
  return(res)
}

############## plmtest() ############################################
# For a concise overview with original references, see
# Baltagi (2013), Econometric Analysis of Panel Data, 5th edition, pp. 68-76 (balanced), pp. 200-203 (unbalanced).
#
# balanced (original) version of Breusch-Pagan test:
#     T.S. Breusch & A.R. Pagan (1979),
#       A Simple Test for Heteroscedasticity and Random Coefficient Variation,
#       Econometrica 47, pp. 1287-1294
#
# unbalanced version:
#     Baltagi/Li (1990),
#       A lagrange multiplier test for the error components model with incomplete panels,
#       Econometric Reviews, 9, pp. 103-107,


# pchibarsq: helper function: "p-function" for mixed chisq (also called chi-bar-squared)
# used in plmtest(., type = "ghm"), see Baltagi (2013), pp. 71-72, 74, 88, 202-203, 209
#
# a reference for the distribution seems to be
# Dykstra, R./El Barmi, H., Chi-Bar-Square Distributions, in: Encyclopedia of Statistical Sciences, 
# DOI: 10.1002/0471667196.ess0265.pub2
pchibarsq <- function(q, df, weights, lower.tail = TRUE, ... ) {
  # NB: other parameters in dots (...): not checked if valid! (ncp, log, ...)
  sum(weights * pchisq(q, df = df, lower.tail = lower.tail, ...))
}




#' Lagrange FF Multiplier Tests for Panel Models
#' 
#' Test of individual and/or time effects for panel models.
#' 
#' These Lagrange multiplier tests use only the residuals of the
#' pooling model.  The first argument of this function may be either a
#' pooling model of class `plm` or an object of class `formula`
#' describing the model. For input within (fixed effects) or random
#' effects models, the corresponding pooling model is calculated
#' internally first as the tests are based on the residuals of the
#' pooling model.
#' 
#' The `"bp"` test for unbalanced panels was derived in
#' \insertCite{BALT:LI:90;textual}{plm}
#' (1990), the `"kw"` test for unbalanced panels in
#' \insertCite{BALT:CHAN:LI:98;textual}{plm}.
#'
#' The `"ghm"` test and the `"kw"` test were extended to two-way
#' effects in \insertCite{BALT:CHAN:LI:92;textual}{plm}.
#' 
#' For a concise overview of all these statistics see
#' \insertCite{BALT:03;textual}{plm}, Sec. 4.2, pp. 68--76 (for balanced
#' panels) and Sec. 9.5, pp. 200--203 (for unbalanced panels).
#' 
#' @aliases plmtest
#' @param x an object of class `"plm"` or a formula of class
#'     `"formula"`,
#' @param data a `data.frame`,
#' @param effect a character string indicating which effects are
#'     tested: individual effects (`"individual"`), time effects
#'     (`"time"`) or both (`"twoways"`),
#' @param type a character string indicating the test to be performed:
#'
#' - `"honda"` (default) for \insertCite{HOND:85;textual}{plm},
#' - `"bp"` for \insertCite{BREU:PAGA:80;textual}{plm},
#' - `"kw"` for \insertCite{KING:WU:97;textual}{plm}, or
#' - `"ghm"` for \insertCite{GOUR:HOLL:MONF:82;textual}{plm} for
#'     unbalanced panel data sets, the respective unbalanced version
#'     of the tests are computed,
#' 
#' @param \dots further arguments passed to `plmtest`.
#' @return An object of class `"htest"`.
#' @note For the King-Wu statistics (`"kw"`), the oneway statistics
#'     (`"individual"` and `"time"`) coincide with the respective
#'     Honda statistics (`"honda"`); twoway statistics of `"kw"` and
#'     `"honda"` differ.
#' @export
#' @author Yves Croissant (initial implementation), Kevin Tappe
#'     (generalization to unbalanced panels)
#' @seealso [pFtest()] for individual and/or time effects tests based
#'     on the within model.
#' @references
#'
#' \insertRef{BALT:13}{plm}
#'
#' \insertRef{BALT:LI:90}{plm}
#'
#' \insertRef{BALT:CHAN:LI:92}{plm}
#'
#' \insertRef{BALT:CHAN:LI:98}{plm}
#'
#' \insertRef{BREU:PAGA:80}{plm}
#'
#' \insertRef{GOUR:HOLL:MONF:82}{plm}
#'
#' \insertRef{HOND:85}{plm}
#'
#' \insertRef{KING:WU:97}{plm}
#' 
#' @keywords htest
#' @examples
#' 
#' data("Grunfeld", package = "plm")
#' g <- plm(inv ~ value + capital, data = Grunfeld, model = "pooling")
#' plmtest(g)
#' plmtest(g, effect="time")
#' plmtest(inv ~ value + capital, data = Grunfeld, type = "honda")
#' plmtest(inv ~ value + capital, data = Grunfeld, type = "bp")
#' plmtest(inv ~ value + capital, data = Grunfeld, type = "bp",  effect = "twoways")
#' plmtest(inv ~ value + capital, data = Grunfeld, type = "ghm", effect = "twoways")
#' plmtest(inv ~ value + capital, data = Grunfeld, type = "kw",  effect = "twoways")
#' 
#' Grunfeld_unbal <- Grunfeld[1:(nrow(Grunfeld)-1), ] # create an unbalanced panel data set
#' g_unbal <- plm(inv ~ value + capital, data = Grunfeld_unbal, model = "pooling")
#' plmtest(g_unbal) # unbalanced version of test is indicated in output
#' 
plmtest <- function(x, ...){
  UseMethod("plmtest")
}

#' @rdname plmtest
#' @export
plmtest.plm <- function(x,
                        effect = c("individual", "time", "twoways"),
                        type = c("honda", "bp", "ghm", "kw"),
                        ...) {
  
  effect <- match.arg(effect)
  type <- match.arg(type)
  if (describe(x, "model") != "pooling") x <- update(x, model = "pooling")
  pdim <- pdim(x)
  N_obs <- pdim$nT$N
  balanced <- pdim$balanced
  index <- unclass(attr(model.frame(x), "index")) # unclass for speed
  id <- index[[1L]]
  time <- index[[2L]]
  T_i <- pdim$Tint$Ti
  N_t <- pdim$Tint$nt
  res <- resid(x)
  
  ### calc of parts of test statistic ##
  # calc. is done w/o using matrix calculation, see, e.g., Baltagi/Li (1990), p. 106
  CP.res <- crossprod(res)
  A1 <- as.numeric(crossprod(tapply(res, id,   sum)) / CP.res - 1) # == A1 <- sum(tapply(res,id,sum)^2)   / sum(res^2) - 1
  A2 <- as.numeric(crossprod(tapply(res, time, sum)) / CP.res - 1) # == A2 <- sum(tapply(res,time,sum)^2) / sum(res^2) - 1
  
  M11 <- sum(T_i ^ 2)
  M22 <- sum(N_t ^ 2)
  
  LM1 <- N_obs * (1 / sqrt(2 * (M11 - N_obs))) * A1 # == sqrt( (((N_obs)^2) / 2) * ( A1^2 / (M11 - N_obs)) ) [except sign due to positive sqrt]
  LM2 <- N_obs * (1 / sqrt(2 * (M22 - N_obs))) * A2 # == sqrt( (((N_obs)^2) / 2) * ( A2^2 / (M22 - N_obs)) ) [except sign due to positive sqrt]
  ### END calc of parts of test statistic ##
  
  
  if (effect != "twoways"){
    # oneway
    if (!type %in% c("honda", "bp", "kw"))
      stop("type must be one of \"honda\", \"bp\" or \"kw\" for a one way model") # kw oneway coincides with honda
    
    stat <- if(effect == "individual") LM1 else LM2
    stat <- switch(type,
                     honda = c(normal = stat),
                     bp    = c(chisq  = stat ^ 2),
                     kw    = c(normal = stat))
    
    parameter <- switch(type,
                          honda = NULL,
                          bp = c(df = 1), # df = 1 in the oneway case (Baltagi (2013), p. 70)
                          kw = NULL)
    
    pval <- switch(type,
                     honda = pnorm(stat, lower.tail = FALSE), # honda oneway ~ N(0,1), alternative is one-sided (Baltagi (2013), p. 71/202)
                     bp    = pchisq(stat, df = parameter, lower.tail = FALSE), # df = 1 in the one-way case, alternative is two-sided (Baltagi (2013), p. 70/201)
                     kw    = pnorm(stat, lower.tail = FALSE)) # kw oneway ~ N(0,1), alternative is one-sided (Baltagi (2013), p. 71/202)
    # END oneway
  }
  else { # twoways
    stat <- switch(type,
                   honda = c(normal = (LM1 + LM2) / sqrt(2)),
                   bp    = c(chisq = LM1 ^ 2 + LM2 ^ 2),
                   kw    = c(normal = (sqrt(M11 - N_obs) / sqrt(M11 + M22 - 2 * N_obs)) * LM1 +
                                 (sqrt(M22 - N_obs) / sqrt(M11 + M22 - 2 * N_obs)) * LM2),
                   ghm   = c(chibarsq = max(0, LM1) ^ 2 + max(0, LM2) ^ 2))
    
    parameter <- switch(type,
                          honda = NULL,
                          bp    = c(df = 2), # df = 2 in the twoway case (Baltagi (2013), p. 70/201)
                          kw    = NULL,
                          ghm   = c(df0 = 0L, df1 = 1L, df2 = 2L, w0 = 1/4, w1 = 1/2, w2 = 1/4)) # chibarsquared (mixed chisq) has several dfs and weights (Baltagi (2013), p. 72/202)
    
    pval <- switch(type,
                     honda = pnorm(stat, lower.tail = FALSE), # honda two-ways ~ N(0,1), alternative is one-sided (Baltagi (2013), p. 71/202)
                     bp    = pchisq(stat, df = parameter, lower.tail = FALSE),  # is df = 2 in the twoway case, alternative is two-sided (Baltagi (2013), p. 70/201)
                     kw    = pnorm(stat, lower.tail = FALSE), # kw twoways ~ N(0,1), alternative is one-sided (Baltagi (2013), p. 71/202)
                     ghm   = pchibarsq(stat, df = c(0L, 1L, 2L), weights = c(1/4, 1/2, 1/4), lower.tail = FALSE)) # mixed chisq (also called chi-bar-square), see Baltagi (2013), pp. 71-72, 74, 88, 202-203, 209
  } # END twoways
  
  method.type <- switch(type,
                          honda  = "Honda",
                          bp     = "Breusch-Pagan",
                          ghm    = "Gourieroux, Holly and Monfort",
                          kw     = "King and Wu")
  
  method.effect <- switch(effect,
                            id      = "individual effects",
                            time    = "time effects",
                            twoways = "two-ways effects")
  
  method <- paste("Lagrange Multiplier Test - ", method.effect,
                  " (", method.type, ")", sep="")
  
  if (type %in% c("honda", "kw")) {
    RVAL <- list(statistic = stat,
                 p.value   = pval,
                 method    = method,
                 data.name = data.name(x))
  }
  else { # bp, ghm
    RVAL <- list(statistic = stat,
                 p.value   = pval,
                 method    = method,
                 parameter = parameter,
                 data.name = data.name(x))
  }
  
  RVAL$alternative <- "significant effects" # TODO: maybe distinguish b/w one-sided and two-sided alternatives?
                                            #       (bp: two-sided alt.; all others: one-sided alt.?)
  
  class(RVAL) <- "htest"
  return(RVAL)
}

#' @rdname plmtest
#' @export
plmtest.formula <- function(x, data, ...,
                            effect = c("individual", "time", "twoways"),
                            type = c("honda", "bp", "ghm", "kw")) {
  
  cl <- match.call(expand.dots = TRUE)
  cl$model <- "pooling" # plmtest is performed on the pooling model...
  cl$effect <- NULL     # ... and pooling model has no argument effect...
  cl$type <- NULL       # ... and no argument type => see below: pass on args effect and type to plmtest.plm()
  names(cl)[2L] <- "formula"
  m <- match(plm.arg, names(cl), 0L)
  cl <- cl[c(1L, m)]
  cl[[1L]] <- as.name("plm")
  plm.model <- eval(cl, parent.frame())
  plmtest(plm.model, effect = effect, type = type) # pass on args effect and type to plmtest.plm()
}


#' F Test for Individual and/or Time Effects
#' 
#' Test of individual and/or time effects based on the comparison of the
#' `within` and the `pooling` model.
#' 
#' For the `plm` method, the argument of this function is two `plm`
#' objects, the first being a within model, the second a pooling
#' model. The effects tested are either individual, time or twoways,
#' depending on the effects introduced in the within model.
#' 
#' @aliases pFtest
#' @param x an object of class `"plm"` or of class `"formula"`,
#' @param z an object of class `"plm"`,
#' @param data a `data.frame`,
#' @param \dots further arguments.
#' @return An object of class `"htest"`.
#' @export
#' @author Yves Croissant
#' @seealso [plmtest()] for Lagrange multiplier tests of individuals
#'     and/or time effects.
#' @keywords htest
#' @examples
#' 
#' data("Grunfeld", package="plm")
#' gp <- plm(inv ~ value + capital, data = Grunfeld, model = "pooling")
#' gi <- plm(inv ~ value + capital, data = Grunfeld,
#'           effect = "individual", model = "within")
#' gt <- plm(inv ~ value + capital, data = Grunfeld,
#'           effect = "time", model = "within")
#' gd <- plm(inv ~ value + capital, data = Grunfeld,
#'           effect = "twoways", model = "within")
#' pFtest(gi, gp)
#' pFtest(gt, gp)
#' pFtest(gd, gp)
#' pFtest(inv ~ value + capital, data = Grunfeld, effect = "twoways")
#' 
pFtest <- function(x, ...){
  UseMethod("pFtest")
}

#' @rdname pFtest
#' @export
pFtest.formula <- function(x, data, ...){
  cl <- match.call(expand.dots = TRUE)
  cl$model <- "within"
  names(cl)[2L] <- "formula"
  m <- match(plm.arg,names(cl), 0L)
  cl <- cl[c(1L, m)]
  cl[[1L]] <- as.name("plm")
  plm.within <- eval(cl,parent.frame())
  plm.pooling <- update(plm.within, model = "pooling")
  pFtest(plm.within, plm.pooling, ...)
}

#' @rdname pFtest
#' @export
pFtest.plm <- function(x, z, ...){
  within <- x
  pooling <- z
  ## leave this interface check commented because pkg AER (reverse dependency) has examples that
  ## use pFtest(within_twoway, within_time)
  # if (! (describe(x, "model") == "within" && describe(z, "model") == "pooling"))
  #  stop("the two arguments should be a 'within' and a 'pooling' model (in this order)")
  
  effect <- describe(x, "effect")
  df1 <- df.residual(pooling)-df.residual(within)
  df2 <- df.residual(within)
  ssrp <- as.numeric(crossprod(residuals(pooling)))
  ssrw <- as.numeric(crossprod(residuals(within)))
  stat <- (ssrp-ssrw)/ssrw/df1*df2
  names(stat) <- "F"
  parameter <- c(df1, df2)
  names(parameter) <- c("df1", "df2")
  pval <- pf(stat, df1, df2, lower.tail = FALSE)
  alternative <- "significant effects"
  res <- list(statistic   = stat,
              p.value     = pval,
              method      = paste("F test for ", effect, " effects", sep=""),
              parameter   = parameter,
              data.name   = data.name(x),
              alternative = alternative)
  class(res) <- "htest"
  res
}

############## pwaldtest() ############################################
# pwaldtest is used in summary.plm, summary.pht, summary.pgmm to compute the
# Chi-square or F statistic, but can be used as a stand-alone test of
# joint significance of all slopes
#
# Short intro (but see associated help file)
# arg 'vcov' non-NULL => the robust tests are carried out
# arg df2adj == TRUE does finite-sample/cluster adjustment for F tests's df2
# args .df1, .df2 are only there if user wants to do overwriting of dfs (user has final say)
#
# Chi-sq test for IV models as in Wooldridge (1990), A note on the Lagrange multiplier and F-statistics for two stage least
#                                                    squares regressions, Economics Letters 34: 151-155.

#' Wald-style Chi-square Test and F Test
#' 
#' Wald-style Chi-square test and F test of slope coefficients being
#' zero jointly, including robust versions of the tests.
#' 
#' 
#' `pwaldtest` can be used stand--alone with a plm object, a pvcm object,
#' and a pgmm object (for pvcm objects only the 'random' type is valid and no
#' further arguments are processed; for pgmm objects only arguments `param`
#' and `vcov` are valid). It is also used in
#' [summary.plm()] to produce the F statistic and the Chi-square
#' statistic for the joint test of coefficients and in [summary.pgmm()].
#' 
#' `pwaldtest` performs the test if the slope coefficients of a panel
#' regression are jointly zero. It does not perform general purpose
#' Wald-style tests (for those, see [lmtest::waldtest()] (from package
#' \CRANpkg{lmtest}) or [car::linearHypothesis()] (from package
#' \CRANpkg{car})).
#' 
#' If a user specified variance-covariance matrix/function is given in
#' argument `vcov`, the robust version of the tests are carried out.
#' In that case, if the F test is requested (`test = "F"`) and no
#' overwriting of the second degrees of freedom parameter is given (by
#' supplying argument (`.df2`)), the adjustment of the second degrees
#' of freedom parameter is performed by default. The second degrees of
#' freedom parameter is adjusted to be the number of unique elements
#' of the cluster variable - 1, e. g., the number of individuals minus 1.
#' For the degrees of freedom adjustment of the F test in general,
#' see e. g. \insertCite{CAME:MILL:15;textual}{plm}, section VII;
#' \insertCite{ANDR:GOLS:SCMI:13}{plm}, pp. 126, footnote 4.
#' 
#' The degrees of freedom adjustment requires the vcov object supplied
#' or created by a supplied function to carry an attribute called
#' "cluster" with a known clustering described as a character (for now
#' this could be either `"group"` or `"time"`). The vcovXX functions
#' of the package \pkg{plm} provide such an attribute for their
#' returned variance--covariance matrices. No adjustment is done for
#' unknown descriptions given in the attribute "cluster" or when the
#' attribute "cluster" is not present. Robust vcov objects/functions
#' from package \CRANpkg{clubSandwich} work as inputs to `pwaldtest`'s
#' F test because a they are translated internally to match the needs
#' described above.
#' 
#' @aliases pwaldtest
#' @param x an estimated model of which the coefficients should be
#'     tested (usually of class `"plm"`/`"pvcm"`/`"pgmm"`)`,
#' @param test a character, indicating the test to be performed, may
#'     be either `"Chisq"` or `"F"` for the Wald-style
#'     Chi-square test or F test, respectively,
#' @param vcov `NULL` by default; a `matrix` giving a
#'     variance--covariance matrix or a function which computes such;
#'     if supplied (non `NULL`), the test is carried out using
#'     the variance--covariance matrix indicated resulting in a robust
#'     test,
#' @param df2adj logical, only relevant for `test = "F"`,
#'     indicating whether the adjustment for clustered standard errors
#'     for the second degrees of freedom parameter should be performed
#'     (see **Details**, also for further requirements regarding
#'     the variance--covariance matrix in `vcov` for the
#'     adjustment to be performed),
#' @param .df1 a numeric, used if one wants to overwrite the first
#'     degrees of freedom parameter in the performed test (usually not
#'     used),
#' @param .df2 a numeric, used if one wants to overwrite the second
#'     degrees of freedom parameter for the F test (usually not used),
#' @param param (for pgmm method only): select the parameters to be tested:
#'     `"coef"`, `"time"`, or `"all"``.
#' @param \dots further arguments (currently none).
#' @return An object of class `"htest"`, except for pvcm's within model for which
#'         a data.frame with results of the Wald chi-square tests and F tests per
#'         regression is returned.
#' @export
#' @author Yves Croissant (initial implementation) and Kevin Tappe
#'     (extensions: vcov argument and F test's df2 adjustment)
#' @seealso
#' 
#' [vcovHC()] for an example of the vcovXX functions, a robust
#' estimation for the variance--covariance matrix; [summary.plm()]
#' @references
#'
#' \insertRef{WOOL:10}{plm}
#'
#' \insertRef{ANDR:GOLS:SCMI:13}{plm}
#'
#' \insertRef{CAME:MILL:15}{plm}
#' 
#' @keywords htest
#' @examples
#' 
#' data("Grunfeld", package = "plm")
#' mod_fe <- plm(inv ~ value + capital, data = Grunfeld, model = "within")
#' mod_re <- plm(inv ~ value + capital, data = Grunfeld, model = "random")
#' pwaldtest(mod_fe, test = "F")
#' pwaldtest(mod_re, test = "Chisq")
#' 
#' # with robust vcov (matrix, function)
#' pwaldtest(mod_fe, vcov = vcovHC(mod_fe))
#' pwaldtest(mod_fe, vcov = function(x) vcovHC(x, type = "HC3"))
#' 
#' pwaldtest(mod_fe, vcov = vcovHC(mod_fe), df2adj = FALSE) # w/o df2 adjustment
#' 
#' # example without attribute "cluster" in the vcov
#' vcov_mat <- vcovHC(mod_fe)
#' attr(vcov_mat, "cluster") <- NULL  # remove attribute
#' pwaldtest(mod_fe, vcov = vcov_mat) # no df2 adjustment performed
#' 
#' 
pwaldtest <- function(x, ...) {
  UseMethod("pwaldtest")
}

#' @rdname pwaldtest
#' @export
pwaldtest.plm <- function(x, test = c("Chisq", "F"), vcov = NULL,
                          df2adj = (test == "F" && !is.null(vcov) && missing(.df2)), .df1, .df2, ...) {
  model <- describe(x, "model")
  test <- match.arg(test)
  df1 <- if(model == "within") length(coef(x)) else { length(coef(x)) - has.intercept(x) }
  df2 <- df.residual(x)
#  tss <- tss(x)        # not good for models without intercept
#  ssr <- deviance(x)   # -- " --
  vcov_arg <- vcov
  int <- "(Intercept)"
  coefs_wo_int <- coef(x)[!(names(coef(x)) %in% int)]
  if(!length(coefs_wo_int)) stop(paste("No non-intercept regressors in input model 'x',",
                                       "cannot perform Wald joint significance test"))
  # sanity check
  if (df2adj == TRUE && (is.null(vcov_arg) || test != "F")) {
    stop("df2adj == TRUE sensible only for robust F test, i.e., test == \"F\" and !is.null(vcov) and missing(.df2)")
  }
  
  # if robust test: prepare robust vcov
  if (!is.null(vcov_arg)) {
    if (is.matrix(vcov_arg))   rvcov <- rvcov_orig <- vcov_arg
    if (is.function(vcov_arg)) rvcov <- rvcov_orig <- vcov_arg(x)
    
    rvcov_name <- paste0(", vcov: ", paste0(deparse(substitute(vcov)))) # save "name" for later
    
    if (int %in% names(coef(x))) { # drop intercept, if present
      rvcov <- rvcov_orig[!rownames(rvcov_orig) %in% int, !colnames(rvcov_orig) %in% int]
      attr(rvcov, which = "cluster") <- attr(rvcov_orig, which = "cluster") # restore dropped 'cluster' attribute
    }
    # if robust F test: by default, do finite-sample adjustment for df2
    if (df2adj == TRUE && test == "F") {
      # determine the variable that the clustering is done on by
      # attribute "cluster" in the vcov (matrix object)
      # if only one member in cluster: fall back to original df2
      if (!is.null(attr(rvcov, which = "cluster"))) {
        
        # if supplied vcov is from package "clubSandwich": translate attr "cluster" to fit our code
        # (use rvcov_orig here for the test as the above dropping of the intercept drops the special classes of rvcov)
        if (inherits(rvcov_orig, "vcovCR")) rvcov <- trans_clubSandwich_vcov(CSvcov = rvcov, index = attr(model.frame(x), "index"))
        
        cluster <- attr(rvcov, which = "cluster")
        pdim <- pdim(x)
        df2 <- switch(cluster,
                      group = { if(pdim$nT$n == 1L) df2 else (pdim$nT$n - 1L) },
                      time  = { if(pdim$nT$T == 1L) df2 else (pdim$nT$T - 1L) },
                      # TODO: what about double clustering? vcovDC? vcovDC identifies itself as attr(obj, "cluster")="group-time")
                      # default:
                      { # warning("unknown/not implemented clustering, no df2 adjustment for finite-samples")
                        df2}
        )
      } else {
        # no information on clustering found, do not adjust df2
        # (other options would be: assume cluster = "group", or fall-back to non robust statistics (set vcov_arg <- NULL))
        warning("no attribute 'cluster' in robust vcov found, no finite-sample adjustment for df2") # assuming cluster = \"group\"")
        # df2 <- as.integer(pdim(x)$nT$n - 1) # assume cluster = "group"
      }
    }
  }
  
  # final say: overwrite Dfs if especially supplied
  if (!missing(.df1)) df1 <- .df1
  if (!missing(.df2)) df2 <- .df2
  
  if (test == "Chisq"){
    # perform non-robust chisq test
    if (is.null(vcov_arg)) {
      names.coefs_wo_int <- names(coefs_wo_int)
      stat <- as.numeric(crossprod(solve(vcov(x)[names.coefs_wo_int, names.coefs_wo_int], coefs_wo_int), coefs_wo_int))
#     stat < - (tss-ssr)/(ssr/df2) # does not produce correct results for unbalanced RE models and (un)balanced IV models
      names(stat) <- "Chisq"
      pval <- pchisq(stat, df = df1, lower.tail = FALSE)
      parameter <- c(df = df1)
      method <- "Wald test for joint significance"
    } else {
      # perform robust chisq test
      stat <- as.numeric(crossprod(solve(rvcov, coefs_wo_int), coefs_wo_int))
      names(stat) <- "Chisq"
      pval <- pchisq(stat, df = df1, lower.tail = FALSE)
      parameter <- c(df = df1)
      method <- paste0("Wald test for joint significance (robust)", rvcov_name)
    }
  }
  if (test == "F"){
    if(length(formula(x))[2L] > 1L) stop("test = \"F\" not sensible for IV models")
    if (is.null(vcov_arg)) {
      # perform "normal" F test
      names.coefs_wo_int <- names(coefs_wo_int)
      stat <- as.numeric(crossprod(solve(vcov(x)[names.coefs_wo_int, names.coefs_wo_int], coefs_wo_int), coefs_wo_int)) / df1
#      stat <- (tss-ssr)/ssr*df2/df1 # does not produce correct results for unbalanced RE models
      names(stat) <- "F"
      pval <- pf(stat, df1 = df1, df2 = df2, lower.tail = FALSE)
      parameter <- c(df1 = df1, df2 = df2)
      method <- "F test for joint significance"
    } else {
      # perform robust F test
      stat <- as.numeric(crossprod(solve(rvcov, coefs_wo_int), coefs_wo_int) / df1)
      names(stat) <- "F"
      pval <- pf(stat, df1 = df1, df2 = df2, lower.tail = FALSE)
      parameter <- c(df1 = df1, df2 = df2)
      method  <- paste0("F test for joint significance (robust)", rvcov_name)
    }
  }
  res <- list(data.name = data.name(x),
              statistic = stat,
              parameter = parameter,
              p.value   = pval,
              method    = method,
              alternative = "at least one coefficient is not null"
  )
  class(res) <- "htest"
  return(res)
}

#' @rdname pwaldtest
#' @export
pwaldtest.pvcm <- function(x, ...) {
  model <- describe(x, "model")
  effect <- describe(x, "effect")

  coefs.no.int <- !names(x$coefficients) %in% "(Intercept)" # logical with non-intercept regressors set to TRUE
  if(!length(names(x$coefficients)[coefs.no.int])) {
    # error informatively if only-intercept model (no other regressors)
    stop(paste("No non-intercept regressors in model(s) of input 'x',",
               "cannot perform Wald joint significance test(s)"))
  }

  if(model == "within") {
    # for the within case, simply return a data.frame with all test results
    # of single estimations (per individual or per time period)
    
    ii <- switch(effect, "individual" = 1L, "time" = 2L)
    
    residl <- collapse::gsplit(x$residuals, unclass(index(x))[[ii]], use.names = FALSE)
    
    # vcovs and coefficients w/o intercept
    vcovl <- lapply(x$vcov, function(x) x[coefs.no.int, coefs.no.int])
    coefl <- as.list(data.frame(t(x$coefficients[ , coefs.no.int, drop = FALSE])))
    df1 <- ncol(x$coefficients[ , coefs.no.int, drop = FALSE]) # ncol is same df1 for all models (as all models estimate the same coefs)
    df2 <- lengths(residl) - ncol(x$coefficients) # (any intercept is subtracted)
    
    statChisqs <- mapply(FUN = function(v, c) as.numeric(crossprod(solve(v, c), c)),
                     vcovl, coefl)
    statFs <- statChisqs / df1
    
    pstatChisqs <- pchisq(statChisqs, df = df1, lower.tail = FALSE)
    pstatFs <- pf(statFs, df1 = df1, df2 = df2, lower.tail = FALSE)
    
    stats.pvcm.within <- as.data.frame(cbind("Chisq"    = statChisqs,
                                             "p(chisq)" = pstatChisqs,
                                             "F"        = statFs,
                                             "p(F)"     = pstatFs,
                                             "df1"      = rep(df1, length(residl)),
                                             "df2"      = df2))
    # early return
    return(stats.pvcm.within)
  }
  
  ## case: model == "random"
  coefs_wo_int <- x$coefficients[coefs.no.int]
  stat <- as.numeric(crossprod(solve(vcov(x)[coefs.no.int, coefs.no.int], coefs_wo_int), coefs_wo_int))
  names(stat) <- "Chisq"
  df1 <- length(coefs_wo_int)
  pval <- pchisq(stat, df = df1, lower.tail = FALSE)
  parameter <- c(df = df1)
  method <- "Wald test for joint significance"
  
  res <- list(data.name = data.name(x),
              statistic = stat,
              parameter = parameter,
              p.value   = pval,
              method    = method,
              alternative = "at least one coefficient is not null"
  )
  class(res) <- "htest"
  return(res)
}


#' @rdname pwaldtest
#' @export
pwaldtest.pgmm <- function(x, param = c("coef", "time", "all"), vcov = NULL, ...) {
  param <- match.arg(param)
  vcov_supplied <- !is.null(vcov)
  myvcov <- vcov
  if (is.null(vcov)) vv <- vcov(x)
  else if (is.function(vcov)) vv <- myvcov(x)
  else vv <- myvcov

  model <- describe(x, "model")
  effect <- describe(x, "effect")
  if (param == "time" && effect == "individual") stop("no time dummies in this model")
  transformation <- describe(x, "transformation")
  coefficients <- if(model == "onestep") x$coefficients else x$coefficients[[2L]]
  Ktot <- length(coefficients)
  Kt <- length(x$args$namest)
  
  switch(param,
         "time" = {
           start <- Ktot - Kt + if(transformation == "ld") 2 else 1
           end <- Ktot
         },
         "coef" = {
           start <- 1
           end <- if (effect == "twoways") Ktot - Kt else Ktot
         },
         "all" = {
           start <- 1
           end <- Ktot
         })
  coef <- coefficients[start:end]
  vv <- vv[start:end, start:end]
  stat <- as.numeric(crossprod(coef, crossprod(solve(vv), coef)))
  names(stat) <- "chisq"
  parameter <- length(coef)
  names(parameter) <- "df"
  pval <- pchisq(stat, df = parameter, lower.tail = FALSE)
  method <- "Wald test for joint significance"
  if (vcov_supplied) {
    rvcov_name <- paste0(", vcov: ", paste0(deparse(substitute(vcov))))
    method <- paste0(method, " (robust)", rvcov_name)
  }
  wald.pgmm <- list(statistic = stat,
                    p.value   = pval,
                    parameter = parameter,
                    method    = method,
                    alternative = "at least one coefficient is not null",
                    data.name = data.name(x))
  class(wald.pgmm) <- "htest"
  return(wald.pgmm)
}

#' @export
pwaldtest.default <- function(x, ...) {
  pwaldtest.plm(x, ...)
}


# trans_clubSandwich_vcov: helper function for pwaldtest()
# translate vcov object from package clubSandwich so it is suitable for summary.plm, plm's pwaldtest.
# Attribute "cluster" in clubSandwich's vcov objects contains the cluster variable itself.
# plm's vcov object also has attribute "cluster" but it contains a character as
# information about the cluster dimension (either "group" or "time")
#
# inputs:
#   * CSvcov: a vcov as returned by clubSandwich's vcovCR function [class c("vcovCR", "clubSandwich")]
#   * index: the index belonging to a plm object/model
# return value:
#   * modified CSvcov (substituted attribute "cluster" with suitable character or NULL)
trans_clubSandwich_vcov <- function(CSvcov, index) {
  clustervar <- attr(CSvcov, "cluster")
  if (!is.null(clustervar)) {
    if (isTRUE(all.equal(index[[1L]], clustervar))) {
      attr(CSvcov, "cluster") <- "group"
      return(CSvcov)
    }
    if (isTRUE(all.equal(index[[2L]], clustervar))) {
      attr(CSvcov, "cluster") <- "time"
      return(CSvcov)
    } else {
      attr(CSvcov, "cluster") <- NULL
      return(CSvcov)
    }
  }
  warning("no attribute \"cluster\" found in supplied vcov object")
  return(CSvcov)
}



#' Test of Poolability
#' 
#' A Chow test for the poolability of the data.
#' 
#' `pooltest` is a *F* test of stability (or Chow test) for the
#' coefficients of a panel model. For argument `x`, the estimated
#' `plm` object should be a `"pooling"` model or a `"within"` model
#' (the default); intercepts are assumed to be identical in the first
#' case and different in the second case.
#' 
#' @aliases pooltest
#' @param x an object of class `"plm"` for the plm method; an object of
#' class `"formula"` for the formula interface,
#' @param z an object of class `"pvcm"` obtained with
#' `model="within"`,
#' @param data a `data.frame`,
#' @param \dots further arguments passed to plm.
#' @return An object of class `"htest"`.
#' @export
#' @author Yves Croissant
#' @keywords htest
#' @examples
#' 
#' data("Gasoline", package = "plm")
#' form <- lgaspcar ~ lincomep + lrpmg + lcarpcap
#' gasw <- plm(form, data = Gasoline, model = "within")
#' gasp <- plm(form, data = Gasoline, model = "pooling")
#' gasnp <- pvcm(form, data = Gasoline, model = "within")
#' pooltest(gasw, gasnp)
#' pooltest(gasp, gasnp)
#' 
#' pooltest(form, data = Gasoline, effect = "individual", model = "within")
#' pooltest(form, data = Gasoline, effect = "individual", model = "pooling")
#' 
pooltest <- function(x,...){
  UseMethod("pooltest")
}


#' @rdname pooltest
#' @export
pooltest.plm <- function(x, z, ...){
  rss <- deviance(x)
  uss <- as.numeric(crossprod(residuals(z)))
  dlr <- df.residual(x)
  dlu <- df.residual(z)
  df1 <- dlr - dlu
  df2 <- dlu
  stat <- (rss-uss)/uss*df2/df1
  pval <- pf(stat, df1 = df1, df2 = df2, lower.tail = FALSE)
  parameter <- c(df1 = df1, df2 = df2)
  names(stat) <- "F"
  res <- list(statistic   = stat,
              parameter   = parameter,
              p.value     = pval,
              data.name   = data.name(x),
              alternative = "unstability",
              method      = "F statistic")
  class(res) <- "htest"
  res
}

#' @rdname pooltest
#' @export
pooltest.formula <- function(x, data, ...){
  cl <- match.call(expand.dots = TRUE)
  cl[[1L]] <- as.name("plm")
  names(cl)[[2L]] <- "formula"
  if (is.null(cl$effect)) cl$effect <- "individual"
  plm.model <- eval(cl, parent.frame())

  cl[[1L]] <- as.name("pvcm")
  names(cl)[[2L]] <- "formula"
  if (is.null(cl$effect)) cl$effect <- "individual"
  cl$model <- "within"
  pvcm.model <- eval(cl, parent.frame())
  
  pooltest(plm.model, pvcm.model)
}

Try the plm package in your browser

Any scripts or data that you put into this service are public.

plm documentation built on June 22, 2024, 6:54 p.m.