R/test_uroot.R
In plm: Linear Models for Panel Data

Documented in phansitest print.phansitest print.purtest print.summary.purtest purtest summary.purtest

padf <- function(x, exo = c("none", "intercept", "trend"), p.approx = NULL, ...){
 # p-value approximation for tau distribution of (augmented) Dickey-Fuller test
 # as used in some panel unit root tests in purtest().
 #
 # argument 'x' must be a numeric (can be length == 1 or >= 1)
 #  
 # p-values approximation is performed by the method of MacKinnon (1994) or
 # MacKinnon (1996), the latter yielding better approximated p-values but
 # requires package 'urca'.
 # Default is NULL: check for availability of 'urca' and, if available, perform
 # MacKinnon (1996); fall back to MacKinnon (1994) if 'urca' is not available.
 # User can demand a specific method by setting the argument 'p.approx' to either
 # "MacKinnon1994" or "MacKinnon1996".

  exo <- match.arg(exo)
  
  # check if ellipsis (dots) has p.approx (could be passed from purtest()'s dots)
  # and if so, use p.approx from ellipsis
  dots <- list(...)
  if (!is.null(dots$p.approx)) p.approx <- dots$p.approx
  
  if (!is.null(p.approx) && !p.approx %in% c("MacKinnon1994", "MacKinnon1996"))
    stop(paste0("unknown argument value: p.approx = \"", p.approx, "\""))
  
  # Check if package 'urca' is available on local machine. We placed 'urca' 
  # in 'Suggests' rather than 'Imports' so that it is not an absolutely 
  # required dependency.)
  ## Procedure for pkg check for pkg in 'Suggests' as recommended in 
  ## Wickham, R packages (http://r-pkgs.had.co.nz/description.html).
  urca <- if(!requireNamespace("urca", quietly = TRUE)) FALSE else TRUE
  
  # default: if no p.approx specified by input (NULL),
  # use MacKinnon (1996) if 'urca' is available, else MacKinnon (1994)
  p.approx <- if(is.null(p.approx)) { if(urca)  "MacKinnon1996" else "MacKinnon1994" } else p.approx
  
  if (!is.null(p.approx) && p.approx == "MacKinnon1996" && !urca) {
    # catch case when user demands MacKinnon (1996) per argument but 'urca' is unavailable
    warning("method MacKinnon (1996) requested via argument 'p.approx' but requires non-installed package 'urca'; falling back to MacKinnon (1994)")
    p.approx <- "MacKinnon1994"
  }
  
  if(p.approx == "MacKinnon1996") {
    # translate exo argument to what urca::punitroot expects
    punitroot.exo <- switch (exo,
                             "none"      = "nc",
                             "intercept" = "c",
                             "trend"     = "ct")

    res <- urca::punitroot(x, N = Inf, trend = punitroot.exo) # return asymptotic value
  }
  
  if(p.approx == "MacKinnon1994") {
    # values from MacKinnon (1994), table 3, 4
    small <- matrix(c(0.6344, 1.2378, 3.2496,
                      2.1659, 1.4412, 3.8269,
                      3.2512, 1.6047, 4.9588),
                    nrow = 3, byrow = TRUE)
    small <- t(t(small) / c(1, 1, 100))
    large <- matrix(c(0.4797, 9.3557, -0.6999,  3.3066,
                      1.7339, 9.3202, -1.2745, -1.0368,
                      2.5261, 6.1654, -3.7956, -6.0285),
                    nrow = 3, byrow = TRUE)
    large <- t(t(large) / c(1, 10, 10, 100))
    limit <- c(-1.04, -1.61, -2.89)
    rownames(small) <- rownames(large) <- names(limit) <- c("none", "intercept", "trend")

    c.x.x2 <- rbind(1, x, x ^ 2)
    psmall <- colSums(small[exo, ] * c.x.x2)
    plarge <- colSums(large[exo, ] * rbind(c.x.x2, x ^ 3))
    
    res <- as.numeric(pnorm(psmall * (x <= limit[exo]) + plarge * (x > limit[exo])))
  }
  attr(res, "p.approx") <- p.approx
  return(res)
} ## END padf


## IPS (2003), table 3 for Wtbar statistic
# x1: means without time trend from table 3 in IPS (2003)
adj.ips.wtbar.x1 <- c(
  -1.504,-1.514,-1.522,-1.520,-1.526,-1.523,-1.527,-1.519,-1.524,-1.532,
  -1.488,-1.503,-1.516,-1.514,-1.519,-1.520,-1.524,-1.519,-1.522,-1.530,
  -1.319,-1.387,-1.428,-1.443,-1.460,-1.476,-1.493,-1.490,-1.498,-1.514,
  -1.306,-1.366,-1.413,-1.433,-1.453,-1.471,-1.489,-1.486,-1.495,-1.512,
  -1.171,-1.260,-1.329,-1.363,-1.394,-1.428,-1.454,-1.458,-1.470,-1.495,
      NA,    NA,-1.313,-1.351,-1.384,-1.421,-1.451,-1.454,-1.467,-1.494,
      NA,    NA,    NA,-1.289,-1.331,-1.380,-1.418,-1.427,-1.444,-1.476,
      NA,    NA,    NA,-1.273,-1.319,-1.371,-1.411,-1.423,-1.441,-1.474,
      NA,    NA,    NA,-1.212,-1.266,-1.329,-1.377,-1.393,-1.415,-1.456
)
# x2: variances without time trend from table 3 in IPS (2003)
adj.ips.wtbar.x2 <- c(
  1.069,0.923,0.851,0.809,0.789,0.770,0.760,0.749,0.736,0.735,
  1.255,1.011,0.915,0.861,0.831,0.803,0.781,0.770,0.753,0.745,
  1.421,1.078,0.969,0.905,0.865,0.830,0.798,0.789,0.766,0.754,
  1.759,1.181,1.037,0.952,0.907,0.858,0.819,0.802,0.782,0.761,
  2.080,1.279,1.097,1.005,0.946,0.886,0.842,0.819,0.801,0.771,
     NA,   NA,1.171,1.055,0.980,0.912,0.863,0.839,0.814,0.781,
     NA,   NA,   NA,1.114,1.023,0.942,0.886,0.858,0.834,0.795,
     NA,   NA,   NA,1.164,1.062,0.968,0.910,0.875,0.851,0.806,
     NA,   NA,   NA,1.217,1.105,0.996,0.929,0.896,0.871,0.818
)

# x3: means with time trend from table 3 in IPS (2003)
adj.ips.wtbar.x3 <- c(
  -2.166,-2.167,-2.168,-2.167,-2.172,-2.173,-2.176,-2.174,-2.174,-2.177,
  -2.173,-2.169,-2.172,-2.172,-2.173,-2.177,-2.180,-2.178,-2.176,-2.179,
  -1.914,-1.999,-2.047,-2.074,-2.095,-2.120,-2.137,-2.143,-2.146,-2.158,
  -1.922,-1.977,-2.032,-2.065,-2.091,-2.117,-2.137,-2.142,-2.146,-2.158,
  -1.750,-1.823,-1.911,-1.968,-2.009,-2.057,-2.091,-2.103,-2.114,-2.135,
      NA,    NA,-1.888,-1.955,-1.998,-2.051,-2.087,-2.101,-2.111,-2.135,
      NA,    NA,    NA,-1.868,-1.923,-1.995,-2.042,-2.065,-2.081,-2.113,
      NA,    NA,    NA,-1.851,-1.912,-1.986,-2.036,-2.063,-2.079,-2.112,
      NA,    NA,    NA,-1.761,-1.835,-1.925,-1.987,-2.024,-2.046,-2.088
)

# x4: variances with time trend from table 3 in IPS (2003)
adj.ips.wtbar.x4 <- c(
  1.132,0.869,0.763,0.713,0.690,0.655,0.633,0.621,0.610,0.597,
  1.453,0.975,0.845,0.769,0.734,0.687,0.654,0.641,0.627,0.605,
  1.627,1.036,0.882,0.796,0.756,0.702,0.661,0.653,0.634,0.613,
  2.482,1.214,0.983,0.861,0.808,0.735,0.688,0.674,0.650,0.625,
  3.947,1.332,1.052,0.913,0.845,0.759,0.705,0.685,0.662,0.629,
     NA,   NA,1.165,0.991,0.899,0.792,0.730,0.705,0.673,0.638,
     NA,   NA,   NA,1.055,0.945,0.828,0.753,0.725,0.689,0.650,
     NA,   NA,   NA,1.145,1.009,0.872,0.786,0.747,0.713,0.661,
     NA,   NA,   NA,1.208,1.063,0.902,0.808,0.766,0.728,0.670
)

adj.ips.wtbar <- c(adj.ips.wtbar.x1, adj.ips.wtbar.x2,
                   adj.ips.wtbar.x3, adj.ips.wtbar.x4)

adj.ips.wtbar <- array(adj.ips.wtbar, dim = c(10, 9, 2, 2),
                       dimnames = list(
                         c(10, 15, 20, 25, 30, 40, 50, 60, 70, 100),
                         0:8,
                         c("mean", "var"),
                         c("intercept", "trend"))
)

adj.ips.wtbar <- aperm(adj.ips.wtbar, c(2, 1, 3, 4))



###############
## IPS (2003), table 2 (obvious typos (missing minus signs) corrected)

# intercept 1% critical values
critval.ips.tbar.int1 <- c(
  -3.79, -2.66, -2.54, -2.50, -2.46, -2.44, -2.43, -2.42, -2.42, -2.40, -2.40,
  -3.45, -2.47, -2.38, -2.33, -2.32, -2.31, -2.29, -2.28, -2.28, -2.28, -2.27,
  -3.06, -2.32, -2.24, -2.21, -2.19, -2.18, -2.16, -2.16, -2.16, -2.16, -2.15,
  -2.79, -2.14, -2.10, -2.08, -2.07, -2.05, -2.04, -2.05, -2.04, -2.04, -2.04,
  -2.61, -2.06, -2.02, -2.00, -1.99, -1.99, -1.98, -1.98, -1.98, -1.97, -1.97,
  -2.51, -2.01, -1.97, -1.95, -1.94, -1.94, -1.93, -1.93, -1.93, -1.93, -1.92,
  -2.20, -1.85, -1.83, -1.82, -1.82, -1.82, -1.81, -1.81, -1.81, -1.81, -1.81,
  -2.00, -1.75, -1.74, -1.73, -1.73, -1.73, -1.73, -1.73, -1.73, -1.73, -1.73)
# intercept 5% critical values
critval.ips.tbar.int5 <- c(
  -2.76, -2.28, -2.21, -2.19, -2.18, -2.16, -2.16, -2.15, -2.16, -2.15,-2.15,
  -2.57, -2.17, -2.11, -2.09, -2.08, -2.07, -2.07, -2.06, -2.06, -2.06,-2.05,
  -2.42, -2.06, -2.02, -1.99, -1.99, -1.99, -1.98, -1.98, -1.97, -1.98,-1.97,
  -2.28, -1.95, -1.92, -1.91, -1.90, -1.90, -1.90, -1.89, -1.89, -1.89,-1.89,
  -2.18, -1.89, -1.87, -1.86, -1.85, -1.85, -1.85, -1.85, -1.84, -1.84,-1.84,
  -2.11, -1.85, -1.83, -1.82, -1.82, -1.82, -1.81, -1.81, -1.81, -1.81,-1.81,
  -1.95, -1.75, -1.74, -1.73, -1.73, -1.73, -1.73, -1.73, -1.73, -1.73,-1.73,
  -1.84, -1.68, -1.67, -1.67, -1.67, -1.67, -1.67, -1.67, -1.67, -1.67,-1.67)
# intercept 10% critical values	
critval.ips.tbar.int10 <- c(
  -2.38, -2.10, -2.06, -2.04, -2.04, -2.02, -2.02, -2.02, -2.02, -2.02, -2.01,
  -2.27, -2.01, -1.98, -1.96, -1.95, -1.95, -1.95, -1.95, -1.94, -1.95, -1.94,
  -2.17, -1.93, -1.90, -1.89, -1.88, -1.88, -1.88, -1.88, -1.88, -1.88, -1.88,
  -2.06, -1.85, -1.83, -1.82, -1.82, -1.82, -1.81, -1.81, -1.81, -1.81, -1.81,
  -2.00, -1.80, -1.79, -1.78, -1.78, -1.78, -1.78, -1.78, -1.78, -1.77, -1.77,
  -1.96, -1.77, -1.76, -1.75, -1.75, -1.75, -1.75, -1.75, -1.75, -1.75, -1.75,
  -1.85, -1.70, -1.69, -1.69, -1.69, -1.69, -1.68, -1.68, -1.68, -1.68, -1.69,
  -1.77, -1.64, -1.64, -1.64, -1.64, -1.64, -1.64, -1.64, -1.64, -1.64, -1.64)
# trend 1% critical values
critval.ips.tbar.trend1 <- c(
  -8.12, -3.42, -3.21, -3.13, -3.09, -3.05, -3.03, -3.02, -3.00, -3.00, -2.99,
  -7.36, -3.20, -3.03, -2.97, -2.94, -2.93, -2.90, -2.88, -2.88, -2.87, -2.86,
  -6.44, -3.03, -2.88, -2.84, -2.82, -2.79, -2.78, -2.77, -2.76, -2.75, -2.75,
  -5.72, -2.86, -2.74, -2.71, -2.69, -2.68, -2.67, -2.65, -2.66, -2.65, -2.64,
  -5.54, -2.75, -2.67, -2.63, -2.62, -2.61, -2.59, -2.60, -2.59, -2.58, -2.58,
  -5.16, -2.69, -2.61, -2.58, -2.58, -2.56, -2.55, -2.55, -2.55, -2.54, -2.54,
  -4.50, -2.53, -2.48, -2.46, -2.45, -2.45, -2.44, -2.44, -2.44, -2.44, -2.43,
  -4.00, -2.42, -2.39, -2.38, -2.37, -2.37, -2.36, -2.36, -2.36, -2.36, -2.36)
# trend 5% critical values
critval.ips.tbar.trend5 <- c(
  -4.66, -2.98, -2.87, -2.82, -2.80, -2.79, -2.77, -2.76, -2.75, -2.75, -2.75,
  -4.38, -2.85, -2.76, -2.72, -2.70, -2.69, -2.68, -2.67, -2.67, -2.66, -2.66,
  -4.11, -2.74, -2.66, -2.63, -2.62, -2.60, -2.60, -2.59, -2.59, -2.58, -2.58,
  -3.88, -2.63, -2.57, -2.55, -2.53, -2.53, -2.52, -2.52, -2.52, -2.51, -2.51,
  -3.73, -2.56, -2.52, -2.49, -2.48, -2.48, -2.48, -2.47, -2.47, -2.46, -2.46,
  -3.62, -2.52, -2.48, -2.46, -2.45, -2.45, -2.44, -2.44, -2.44, -2.44, -2.43,
  -3.35, -2.42, -2.38, -2.38, -2.37, -2.37, -2.36, -2.36, -2.36, -2.36, -2.36,
  -3.13, -2.34, -2.32, -2.32, -2.31, -2.31, -2.31, -2.31, -2.31, -2.31, -2.31)
# trend 10% critical values
critval.ips.tbar.trend10 <- c(
  -3.73, -2.77, -2.70, -2.67, -2.65, -2.64, -2.63, -2.62, -2.63, -2.62, -2.62,
  -3.60, -2.68, -2.62, -2.59, -2.58, -2.57, -2.57, -2.56, -2.56, -2.55, -2.55,
  -3.45, -2.59, -2.54, -2.52, -2.51, -2.51, -2.50, -2.50, -2.50, -2.49, -2.49,
  -3.33, -2.52, -2.47, -2.46, -2.45, -2.45, -2.44, -2.44, -2.44, -2.44, -2.44,
  -3.26, -2.47, -2.44, -2.42, -2.41, -2.41, -2.41, -2.40, -2.40, -2.40, -2.40,
  -3.18, -2.44, -2.40, -2.39, -2.39, -2.38, -2.38, -2.38, -2.38, -2.38, -2.38,
  -3.02, -2.36, -2.33, -2.33, -2.33, -2.32, -2.32, -2.32, -2.32, -2.32, -2.32,
  -2.90, -2.30, -2.29, -2.28, -2.28, -2.28, -2.28, -2.28, -2.28, -2.28, -2.28)

critval.ips.tbar <- c(critval.ips.tbar.int1,
                      critval.ips.tbar.int5,
                      critval.ips.tbar.int10,
                      critval.ips.tbar.trend1,
                      critval.ips.tbar.trend5,
                      critval.ips.tbar.trend10)

critval.ips.tbar <- array(critval.ips.tbar, dim = c(11, 8, 3, 2),
                          dimnames = list(
                            c(5, 10, 15, 20, 25, 30, 40, 50, 60, 70, 100),
                            c(5, 7, 10, 15, 20, 25, 50, 100),
                            c("1%", "5%", "10%"),
                            c("intercept", "trend"))
)

critval.ips.tbar <- aperm(critval.ips.tbar, c(2, 1, 3, 4))


###############

## IPS (2003), table 1
# right hand pane of table 1 for Ztbar statistic
adj.ips.zbar.time  <- c(6, 7, 8, 9, 10, 15, 20, 25, 30, 40, 50, 100, 500, 1000, 2000)
adj.ips.zbar.means <- c(-1.520, -1.514, -1.501, -1.501, -1.504, -1.514, -1.522, -1.520, -1.526, -1.523, -1.527, -1.532, -1.531, -1.529, -1.533)
adj.ips.zbar.vars  <- c(1.745, 1.414, 1.228, 1.132, 1.069, 0.923, 0.851, 0.809, 0.789, 0.770, 0.760, 0.735, 0.715, 0.707, 0.706) 
names(adj.ips.zbar.time) <- names(adj.ips.zbar.means) <- names(adj.ips.zbar.vars) <- adj.ips.zbar.time

# left pane of table 1 [not used]
adj.ips.zbarL.means <- c(-1.125, -1.178, -1.214, -1.244, -1.274, -1.349, -1.395, -1.423, -1.439, -1.463, -1.477, -1.504, -1.526, -1.526, -1.533)
adj.ips.zbarL.vars  <- c(0.497, 0.506, 0.506, 0.527, 0.521, 0.565, 0.592, 0.609, 0.623, 0.639, 0.656, 0.683, 0.704, 0.702, 0.706)

################

# table 2 in LLC (2002): mean and standard deviation adjustments
Tn <- c(  25,  30,  35,  40,  45,  50,  60,   70,   80,   90,  100,  250,   500)

v <- c(c( 0.004,  0.003,  0.002,  0.002,  0.001,  0.001,  0.001,  0.000,  0.000,  0.000,  0.000,  0.000,  0.000),
       c( 1.049,  1.035,  1.027,  1.021,  1.017,  1.014,  1.011,  1.008,  1.007,  1.006,  1.005,  1.001,  1.000),
       c(-0.554, -0.546, -0.541, -0.537, -0.533, -0.531, -0.527, -0.524, -0.521, -0.520, -0.518, -0.509, -0.500),
       c( 0.919,  0.889,  0.867,  0.850,  0.837,  0.826,  0.810,  0.798,  0.789,  0.782,  0.776,  0.742,  0.707),
       c(-0.703, -0.674, -0.653, -0.637, -0.624, -0.614, -0.598, -0.587, -0.578, -0.571, -0.566, -0.533, -0.500),
       c( 1.003,  0.949,  0.906,  0.871,  0.842,  0.818,  0.780,  0.751,  0.728,  0.710,  0.695,  0.603,  0.500)
)

adj.levinlin <- array(v, dim = c(13, 2, 3),
                      dimnames = list(Tn,
                                      c("mu", "sigma"),
                                      c("none", "intercept", "trend")))

purtest.names.exo <- c(none      = "None",
                       intercept = "Individual Intercepts",
                       trend     = "Individual Intercepts and Trend")

purtest.names.test <- c(levinlin  = "Levin-Lin-Chu Unit-Root Test",
                        ips       = "Im-Pesaran-Shin Unit-Root Test",
                        madwu     = "Maddala-Wu Unit-Root Test",
                        Pm        = "Choi's modified P Unit-Root Test",
                        invnormal = "Choi's Inverse Normal Unit-Root Test",
                        logit     = "Choi's Logit Unit-Root Test",
                        hadri     = "Hadri Test")


## General functions to transform series:

YClags <- function(object,  k = 3){
  n <- length(object)
  if(k > n) stop("lag value larger than length of series to lag")
  if(k > 0) {
    res <- sapply(seq_len(k), function(x) c(rep(NA, x), object[seq_len(n - x)]))
    if(is.null(dim(res))) res <- as.matrix(res) # guarantee to return a matrix
  } else res <-  NULL
  res
}

YCdiff <- function(object) {
  # NB/TODO: no sanity check here: for input of length(object) == 1, a result of 
  # length 3 is returned: c(NA, NA, 0)
  c(NA, object[seq_along(object)[-1L]] - object[seq_len(length(object)-1)])
}

selectT <- function(x, Ts){
  ## This function selects the length of the series as it is tabulated
  if (x %in% Ts) return(x)
  if (x < Ts[1L]){
    warning("the time series is short")
    return(Ts[1L])
  }
  if (x > Ts[length(Ts)]){
    warning("the time series is long")
    return(Ts[length(Ts)])
  }
  pos <- which((Ts - x) > 0)[1L]
  return(Ts[c(pos - 1, pos)])
}

lagsel <- function(object, exo = c("intercept", "none", "trend"),
                   method = c("Hall", "AIC", "SIC"), pmax = 10, 
                   dfcor = FALSE, fixedT = TRUE, ...){
  # select the optimal number of lags using Hall method, AIC, or SIC
  method <- match.arg(method)
  y <- object
  Dy <- YCdiff(object)
  Ly <- c(NA, object[seq_len(length(object)-1)])
  if (exo == "none")      m <- NULL
  if (exo == "intercept") m <- rep(1, length(object))
  if (exo == "trend")     m <- cbind(1, seq_along(object))
  LDy <- YClags(Dy, k = pmax)
  
  decreasei <- TRUE
  i <- 0L
  narow <- seq_len(pmax+1)
  if (method == "Hall"){
    while(decreasei){
      lags <- pmax - i
      if (!fixedT) narow <- seq_len(lags + 1L)
      X <- cbind(Ly, LDy[ , 0:lags], m)[-narow, , drop = FALSE]
      y <- Dy[-narow]
      sres <- my.lm.fit(X, y, dfcor = dfcor)
      tml <- sres$coef[lags + 1L]/sres$se[lags + 1L]
      if (abs(tml) < 1.96 && lags > 0L)
        i <- i + 1L
      else
        decreasei <- FALSE
    }
  }
  else{
    l <- c()
    while(i <= pmax){
      lags <- pmax - i
      if (!fixedT) narow <- seq_len(lags + 1L)
      X <- cbind(Ly, LDy[ , 0:lags], m)[-narow, , drop = FALSE]
      y <- Dy[-narow]
      sres <- my.lm.fit(X, y, dfcor = dfcor)
      AIC <- if (method == "AIC") {
        log(sres$rss / sres$n) + 2 * sres$K / sres$n # AIC
      } else {
        log(sres$rss / sres$n) + sres$K * log(sres$n) / sres$n # SIC
      }
      l <- c(l, AIC)
      i <- i + 1L
    }
    lags <- pmax + 1 - which.min(l)
  }
  lags
} ## END lagsel


adj.levinlin.value <- function(l, exo = c("intercept", "none", "trend")){
  ## extract the adjustment values for Levin-Lin-Chu test
  theTs <- as.numeric(dimnames(adj.levinlin)[[1L]])
  Ts <- selectT(l, theTs)
  res <- if (length(Ts) == 1L) {
    adj.levinlin[as.character(Ts), , exo]
  } else{
    low  <- adj.levinlin[as.character(Ts[1L]), , exo]
    high <- adj.levinlin[as.character(Ts[2L]), , exo]
    low + (l - Ts[1L]) / (Ts[2L] - Ts[1L]) * (high - low)
  }
  res
} ## END adj.levinlin.value

adj.ips.wtbar.value <- function(l = 30, lags = 2, exo = c("intercept", "trend")){
  ## extract the adjustment values for Im-Pesaran-Shin test for Wtbar statistic (table 3 in IPS (2003))
  if (!lags %in% 0:8) warning("lags should be an integer between 0 and 8")
  lags <- min(lags, 8)
  theTs <- as.numeric(dimnames(adj.ips.wtbar)[[2L]])
  Ts <- selectT(l, theTs)
  res <- if (length(Ts) == 1L) {
    # take value as in table
    adj.ips.wtbar[as.character(lags), as.character(Ts), , exo]
  }
  else{
    # interpolate value from table
    lags.char <- as.character(lags)
    low  <- adj.ips.wtbar[lags.char, as.character(Ts[1L]), , exo]
    high <- adj.ips.wtbar[lags.char, as.character(Ts[2L]), , exo]
    low + (l - Ts[1L]) / (Ts[2L] - Ts[1L]) * (high - low)
  }
  res
} ## END adj.ips.wtbar.value

adj.ips.ztbar.value <- function(l = 30L, time, means, vars){
  ## extract the adjustment values for Im-Pesaran-Shin test's Ztbar statistic
  ## from table 1, right hand pane in IPS (2003) fed by arguments means and vars
  Ts <- selectT(l, time)
  res <- if (length(Ts) == 1L){
    # take value as in table
    c("mean" = means[as.character(Ts)], "var" = vars[as.character(Ts)])
  }
  else{
    # interpolate value from table
    low  <- c("mean" = means[as.character(Ts[1L])], "var" = vars[as.character(Ts[1L])])
    high <- c("mean" = means[as.character(Ts[2L])], "var" = vars[as.character(Ts[2L])])
    low + (l - Ts[1L])/(Ts[2L] - Ts[1L]) * (high - low)
  }
  res
} ## END adj.ips.ztbar.value

critval.ips.tbar.value <- function(ind = 10L, time = 19L, critvals, exo = c("intercept", "trend")){
  ## extract and interpolate 1%, 5%, 10% critical values for Im-Pesaran-Shin test's
  ## tbar statistic (table 2 in IPS (2003))
  ##
  ## Interpolation is based on inverse distance weighting (IDW) of
  ## L1 distance (1d case) and L2 distance (euclidean distance) (2d case)
  ## (optical inspections shows this method is a good approximation)
  
  theInds <- as.numeric(dimnames(critvals)[[1L]])
  theTs <- as.numeric(dimnames(critvals)[[2L]])
  Inds <- selectT(ind, theInds)
  Ts <- selectT(time, theTs)
  
  exo <- match.arg(exo)
  
  ## check cases and early exit:
  if(length(Inds) == 1L && length(Ts) == 1L) {
    # exact hit for individual AND time: take value as in table
    return(critvals[as.character(Inds), as.character(Ts), , exo])
  }
  else{
    if(length(Inds) == 1L || length(Ts) == 1L) {
      # exact hit for individual (X)OR time: interpolate other dimension
      if(length(Inds) == 1L) {
        low  <- critvals[as.character(Inds), as.character(Ts[1L]), , exo]
        high <- critvals[as.character(Inds), as.character(Ts[2L]), , exo]
        # L1 distances and inverse weighting for time dimension
        dist1 <- abs(time - Ts[1L])
        dist2 <- abs(time - Ts[2L])
        weight1 <- 1/dist1
        weight2 <- 1/dist2
        return ((weight1 * low + weight2 * high ) / (weight1 + weight2))
      }
      if(length(Ts) == 1L) {
        # L1 distances and inverse weighting for individual dimension
        low  <- critvals[as.character(Inds[1L]), as.character(Ts), , exo]
        high <- critvals[as.character(Inds[2L]), as.character(Ts), , exo]
        dist1 <- abs(ind - Inds[1L])
        dist2 <- abs(ind - Inds[2L])
        weight1 <- 1/dist1
        weight2 <- 1/dist2
        return ((weight1 * low + weight2 * high ) / (weight1 + weight2))
      }
    } else {
      # only get to this part when both dimensions are not an exact hit:
      # 2d interpolate
      
      # extract the 4 critical values as basis of interpolation interpolate ("corners of box")
      crit4 <- critvals[as.character(Inds), as.character(Ts), , exo]
      dot <- c(ind, time) # point of interest
      m <- as.matrix(expand.grid(Inds, Ts))
      colnames(m) <- c("ind", "time")
      dist <- lapply(1:4, function(x) m[x, ] - dot)
      dist <- vapply(dist, function(x) sqrt(as.numeric(crossprod(x))), 0.0, USE.NAMES = FALSE)
      weight <- 1/dist
      
      res <- (
          crit4[as.character(Inds[1L]), as.character(Ts[1L]), ] * weight[1L] +
          crit4[as.character(Inds[2L]), as.character(Ts[1L]), ] * weight[2L] +
          crit4[as.character(Inds[1L]), as.character(Ts[2L]), ] * weight[3L] +
          crit4[as.character(Inds[2L]), as.character(Ts[2L]), ] * weight[4L]) / sum(weight)
      return(res)
    }
  }
} ## END critval.ips.tbar.value

tsadf <- function(object, exo = c("intercept", "none", "trend"),
                  lags = NULL, dfcor = FALSE, comp.aux.reg = FALSE, ...){
  # compute some ADF regressions for each time series
  y <- object
  L <- length(y)
  Dy <- YCdiff(object)
  Ly <- c(NA, object[seq_len(length(object)-1)])
  if(exo == "none")      m <- NULL
  if(exo == "intercept") m <- rep(1, length(object))
  if(exo == "trend")     m <- cbind(1, seq_along(object))
  narow <- seq_len(lags+1)
  LDy <- YClags(Dy, k = lags)
  X <- cbind(Ly, LDy, m)[-narow, , drop = FALSE]
  if(dim(X)[[1]] == 0L) stop("after lagging, no non-NA case left in a series")
  y <- Dy[- narow]
  result <- my.lm.fit(X, y, dfcor = dfcor)
  sigma <- result$sigma
  rho <- result$coef[1L]
  sdrho <- result$se[1L]
  trho <- rho/sdrho
  p.trho <- padf(trho, exo = exo, ...)
  result <- list(rho    = rho,
                 sdrho  = sdrho,
                 trho   = trho,
                 sigma  = sigma,
                 T      = L,
                 lags   = lags,
                 p.trho = p.trho)
  
  if(comp.aux.reg){
    # for Levin-Lin-Chu test only, compute the residuals of the auxiliary
    # regressions
    X <- cbind(LDy[ , 0:lags], m)[-narow, , drop = FALSE]
    if(lags == 0 && exo == "none"){
      resid.diff  <- Dy[-narow]/sigma
      resid.level <- Ly[-narow]/sigma
    }
    else{
      y <- Dy[-narow]
      resid.diff <- lm.fit(X, y)$residuals/sigma
      y <- Ly[-narow]
      resid.level <- lm.fit(X, y)$residuals/sigma
    }
    result$resid <- data.frame(resid.diff  = resid.diff,
                               resid.level = resid.level)
  }
  result
}


longrunvar <- function(x, exo = c("intercept", "none", "trend"), q = NULL){
  # compute the long run variance of the dependent variable
  
  # q: lag truncation parameter: default (q == NULL) as in LLC, p. 14
  # it can be seen from LLC, table 2, that round() was used to get an
  # integer from that formula (not, e.g., trunc)
  T <- length(x)
  if (is.null(q)) q <- round(3.21 * T^(1/3))
  dx <- x[2:T] - x[seq_len(T-1)]
  if(exo == "intercept") dx <- dx - mean(dx)
  if(exo == "trend")     dx <- lm.fit(cbind(1, seq_along(dx)), dx)$residuals
  dx <- c(NA, dx)
  res <- 1/(T-1)*sum(dx[-1]^2)+
    2*sum(
      sapply(seq_len(q),
             function(L){
               sum(dx[2:(T-L)] * dx[(L+2):T]) / (T-1) *
                 (1 - L / (q+1))
             }
      )
    )
  return(res)
}


hadritest <- function(object, exo, Hcons, dfcor, method,
                      cl, args, data.name, ...) {
  ## used by purtest(<.>, test = "hadri"); non-exported function
  ## Hadri's test is applicable to balanced data only
  ## input 'object' is a list with observations per individual
  if(!is.list(object)) stop("argument 'object' in hadritest is supposed to be a list")
  if(exo == "none") stop("exo = \"none\" (\"~0\" in the formula interface) is not a valid option for Hadri's test")
  # determine L (= time periods), unique for balanced panel and number of individuals (n)
  if(length(L <- unique(lengths(object, use.names = FALSE))) > 1L)
    stop("Hadri test is not applicable to unbalanced panels")
  n <- length(object)
  
  if(exo == "intercept"){
    # can use lm.fit here as NAs are dropped in beginning of 'purtest'
    resid <- lapply(object, function(x) lm.fit(matrix(1, nrow = length(x)), x)$residuals)
    adj <- c(1/6, 1/45) # xi, zeta^2 in eq. (17) in Hadri (2000)
  }
  if (exo == "trend"){
    resid <- lapply(object, function(x) {
                              lx <- length(x)
                              dmat <- matrix(c(rep(1, lx), seq_len(lx)), nrow = lx)
                              # can use lm.fit here as NAs are dropped in beginning of 'purtest'
                              lm.fit(dmat, x)$residuals
                              })
    adj <- c(1/15, 11/6300) # xi, zeta^2 in eq. (25) in Hadri (2000)
  }

  cumres2 <- lapply(resid, function(x) cumsum(x)^2)
  
  if (!dfcor) {
    sigma2  <- mean(unlist(resid, use.names = FALSE)^2)
    sigma2i <- vapply(resid, function(x) mean(x^2), FUN.VALUE = 0.0, USE.NAMES = FALSE)
  } else {
    # df correction as suggested in Hadri (2000), p. 157
    dfcorval <- switch(exo, "intercept" = (L-1), "trend" = (L-2))
    # -> apply to full length residuals over all individuals -> n*(L-1) or n*(L-2)
    sigma2 <- as.numeric(crossprod(unlist(resid, use.names = FALSE))) / (n * dfcorval)
    # -> apply to individual residuals' length, so just L -> L-1 or L-2
    sigma2i <- vapply(resid, function(x) crossprod(x)/dfcorval, FUN.VALUE = 0.0, USE.NAMES = FALSE)
  }
  
  Si2 <- vapply(cumres2, function(x) sum(x), FUN.VALUE = 0.0, USE.NAMES = FALSE)
  numerator <- 1/n * sum(1/(L^2) * Si2)
  LMi <- 1/(L^2) * Si2 / sigma2i # individual LM statistics
  
  LM <- if(!Hcons) {
                    numerator / sigma2 # non-het consistent case
                  } else {
                    method <- paste0(method, " (Heterosked. Consistent)")
                    mean(LMi) # het. consistent case
                  }
  
  stat <- c(z = sqrt(n) * (LM - adj[1L])  / sqrt(adj[2L])) # eq. (14), (22) in Hadri (2000)
  pvalue <- pnorm(stat, lower.tail = FALSE) # is one-sided!
  
  htest <- structure(list(statistic   = stat,
                          parameter   = NULL,
                          alternative = "at least one series has a unit root", # correct alternative (at least one unit root)
                          data.name   = data.name,
                          method      = method,
                          p.value     = pvalue),
                     class = "htest")
  
  idres <- mapply(list, LMi, sigma2i, SIMPLIFY = FALSE)
  idres <- lapply(idres, setNames, c("LM", "sigma2"))
  
  result <- list(statistic = htest,
                 call      = cl,
                 args      = args,
                 idres     = idres)
  
  class(result) <- "purtest"
  return(result)
} # END hadritest


#' Unit root tests for panel data
#' 
#' `purtest` implements several testing procedures that have been proposed
#' to test unit root hypotheses with panel data.
#' 
#' 
#' All these tests except `"hadri"` are based on the estimation of
#' augmented Dickey-Fuller (ADF) regressions for each time series. A
#' statistic is then computed using the t-statistics associated with
#' the lagged variable. The Hadri residual-based LM statistic is the
#' cross-sectional average of the individual KPSS statistics
#' \insertCite{KWIA:PHIL:SCHM:SHIN:92;textual}{plm}, standardized by their
#' asymptotic mean and standard deviation.
#' 
#' Several Fisher-type tests that combine p-values from tests based on
#' ADF regressions per individual are available:
#'
#' - `"madwu"` is the inverse chi-squared test
#' \insertCite{MADDA:WU:99;textual}{plm}, also called P test by
#' \insertCite{CHOI:01;textual}{plm}.
#'
#' - `"Pm"` is the modified P test proposed by
#' \insertCite{CHOI:01;textual}{plm} for large N,
#'
#' - `"invnormal"` is the inverse normal test by \insertCite{CHOI:01;textual}{plm}, and
#' 
#' - `"logit"` is the logit test by \insertCite{CHOI:01;textual}{plm}.
#'
#' The individual p-values for the Fisher-type tests are approximated
#' as described in \insertCite{MACK:96;textual}{plm} if the package \CRANpkg{urca}
#' (\insertCite{PFAFF:08;textual}{plm}) is available, otherwise as described in
#' \insertCite{MACK:94;textual}{plm}.
#' 
#' For the test statistic tbar of the test of Im/Pesaran/Shin (2003)
#' (`ips.stat = "tbar"`), no p-value is given but 1%, 5%, and 10% critical
#' values are interpolated from paper's tabulated values via inverse distance
#' weighting (printed and contained in the returned value's element
#' `statistic$ips.tbar.crit`).
#'
#' Hadri's test, the test of Levin/Lin/Chu, and the tbar statistic of
#' Im/Pesaran/Shin are not applicable to unbalanced panels; the tbar statistic
#' is not applicable when `lags > 0` is given.
#' 
#' The exogenous instruments of the tests (where applicable) can be specified
#' in several ways, depending on how the data is handed over to the function:
#' 
#' - For the `formula`/`data` interface (if `data` is a `data.frame`,
#' an additional `index` argument should be specified); the formula
#' should be of the form: `y ~ 0`, `y ~ 1`, or `y ~ trend` for a test
#' with no exogenous variables, with an intercept, or with individual
#' intercepts and time trend, respectively. The `exo` argument is
#' ignored in this case.
#' 
#' - For the `data.frame`, `matrix`, and `pseries` interfaces: in
#' these cases, the exogenous variables are specified using the `exo`
#' argument.
#' 
#' With the associated `summary` and `print` methods, additional
#' information can be extracted/displayed (see also Value).
#' 
#' @aliases purtest
#' @param object,x Either a `"data.frame"` or a matrix containing the
#'     time series (individuals as columns), a `"pseries"` object, a formula;
#'     a `"purtest"` object for the print and summary methods,
#' @param data a `"data.frame"` or a `"pdata.frame"` object (required for
#'     formula interface, see Details and Examples),
#' @param index the indexes,
#' @param test the test to be computed: one of `"levinlin"` for
#'     \insertCite{LEVIN:LIN:CHU:02;textual}{plm}, `"ips"` for
#'     \insertCite{IM:PESAR:SHIN:03;textual}{plm}, `"madwu"` for
#'     \insertCite{MADDA:WU:99;textual}{plm}, `"Pm"` , `"invnormal"`,
#'     or `"logit"` for various tests as in
#'     \insertCite{CHOI:01;textual}{plm}, or `"hadri"` for
#'     \insertCite{HADR:00;textual}{plm}, see Details,
#' @param exo the exogenous variables to introduce in the augmented
#'     Dickey--Fuller (ADF) regressions, one of: no exogenous
#'     variables (`"none"`), individual intercepts (`"intercept"`), or
#'     individual intercepts and trends (`"trend"`), but see Details,
#' @param lags the number of lags to be used for the augmented
#'     Dickey-Fuller regressions: either a single value integer (the number of
#'     lags for all time series), a vector of integers (one for each
#'     time series), or a character string for an automatic
#'     computation of the number of lags, based on the AIC
#'     (`"AIC"`), the SIC (`"SIC"`), or on the method by
#'     \insertCite{HALL:94;textual}{plm} (`"Hall"`); argument is irrelevant
#'     for `test = "hadri"`,
#' @param pmax maximum number of lags (irrelevant for `test = "hadri"`),
#' @param Hcons logical, only relevant for `test = "hadri"`,
#'     indicating whether the heteroskedasticity-consistent test of
#'     \insertCite{HADR:00;textual}{plm} should be computed,
#' @param q the bandwidth for the estimation of the long-run variance 
#'     (only relevant for `test = "levinlin"`, the default (`q = NULL`)
#'     gives the value as suggested by the authors as round(3.21 * T^(1/3))),
#' @param dfcor logical, indicating whether the standard deviation of
#'     the regressions is to be computed using a degrees-of-freedom
#'     correction,
#' @param fixedT logical, indicating whether the individual ADF
#'     regressions are to be computed using the same number of
#'     observations (irrelevant for `test = "hadri"`),
#' @param ips.stat `NULL` or character of length 1 to request a specific
#'     IPS statistic, one of `"Wtbar"` (also default if `ips.stat = NULL`),
#'     `"Ztbar"`, `"tbar"`,
#' @param \dots further arguments (can set argument `p.approx` to be passed on
#'  to non-exported function `padf` to either `"MacKinnon1994"` or `"MacKinnon1996"`
#'  to force a specific method for p-value approximation, the latter only being 
#'  possible if package 'urca' is installed).
#' @return For purtest: An object of class `"purtest"`: a list with the elements
#'   named:
#' - `"statistic"` (a `"htest"` object),
#' - `"call"`,
#' - `"args"`,
#' - `"idres"` (containing results from the individual regressions),
#' - `"adjval"` (containing the simulated means and variances needed to compute 
#'      the statistic, for `test = "levinlin"` and `"ips"`, otherwise `NULL`),
#' - `"sigma2"` (short-run and long-run variance for `test = "levinlin"`, 
#'       otherwise `NULL`).
#' @export
#' @importFrom stats setNames
#' @author Yves Croissant and for "Pm", "invnormal", and "logit" Kevin Tappe
#' @seealso [cipstest()], [phansitest()]

#' @references
#' \insertAllCited{}
#'  
#' @keywords htest
#
# TODO: add more examples / interfaces
#' @examples
#' 
#' data("Grunfeld", package = "plm")
#' y <- data.frame(split(Grunfeld$inv, Grunfeld$firm)) # individuals in columns
#' 
#' purtest(y, pmax = 4, exo = "intercept", test = "madwu")
#' 
#' ## same via pseries interface
#' pGrunfeld <- pdata.frame(Grunfeld, index = c("firm", "year"))
#' purtest(pGrunfeld$inv, pmax = 4, exo = "intercept", test = "madwu")
#' 
#' ## same via formula interface
#' purtest(inv ~ 1, data = Grunfeld, index = c("firm", "year"), pmax = 4, test = "madwu")
#' 
purtest <- function(object, data = NULL, index = NULL,
                    test = c("levinlin", "ips", "madwu", "Pm" , "invnormal", "logit", "hadri"),
                    exo = c("none", "intercept", "trend"),
                    lags = c("SIC", "AIC", "Hall"),
                    pmax = 10, Hcons = TRUE, q = NULL, dfcor = FALSE, 
                    fixedT = TRUE, ips.stat = NULL, ...) {
  
  data.name <- paste(deparse(substitute(object)))

  id <- NULL
  if (inherits(object, "formula")){
    # exo is derived from specified formula:
    terms <- terms(object)
    lab <- labels(terms)
    if(length(lab) == 0L){
      exo <- if(attr(terms, "intercept")) "intercept" else "none"
    }
    else{
      if(length(lab) > 1L || lab != "trend") stop("incorrect formula")
      exo <- "trend"
    }
    object <- paste(deparse(object[[2L]]))
    if(exists(object) && is.vector(get(object))){
      # is.vector because, eg, inv exists as a function
      object <- get(object)
    }
    else{
      if(is.null(data)) stop("unknown response")
      else{
        if(!inherits(data, "data.frame")) stop("'data' does not specify a data.frame/pdata.frame")
        if(object %in% names(data)){
          object <- data[[object]]
          if(!inherits(data, "pdata.frame")){
            if(is.null(index)) stop("the index attribute is required")
            else data <- pdata.frame(data, index)
          }
          id <- unclass(attr(data, "index"))[[1L]]
        }
        else{
          stop(paste0("unknown response (\"", object, "\" not in data)"))
        }
      }
    }
  } # END object is a formula
  else{
    exo <- match.arg(exo)
    if(is.null(dim(object))){
      if(inherits(object, "pseries")){
        id <- unclass(attr(object, "index"))[[1L]]
      }
      else stop("the individual dimension is undefined") # cannot derive individual dimension from a vector if not pseries
    }
    if(is.matrix(object) || is.data.frame(object)) {
      if(!is.null(data)) stop("object is data.frame or matrix but argument 'data' is not NULL")
      if(is.matrix(object)) object <- as.data.frame(object)
    }
  }
  
  # by now, object is either a pseries or a data.frame
  object <- na.omit(object)
  if(!is.null(attr(object, "na.action")))
    warning("NA value(s) encountered and dropped, results may not be reliable")
  
  if(!inherits(object, "data.frame")){
    if(is.null(id)) stop("the individual dimension is undefined")
    # adjust 'id' to correspond data in 'object' after NA dropping:
    if(!is.null(attr(object, "na.action"))) id <- id[-attr(object, "na.action")]
    object <- collapse::gsplit(object, id, use.g.names = TRUE) # was: object <- split(object, id)
  } else {
    if(!ncol(object) > 1L) warning("data.frame or matrix specified in argument object does not contain more than one individual (individuals are supposed to be in columns)")
    object <- as.list(object)
  }
  # by now, object is a list
  
  cl <- match.call()
  test <- match.arg(test)
  ips.stat <- if (is.null(ips.stat)) "Wtbar" else ips.stat # set default for IPS test
  if (is.character(lags)) lags <- match.arg(lags) # if character, match from list of possible values
  args <- list(test = test, exo = exo, pmax = pmax, lags = lags,
               dfcor = dfcor, fixedT = fixedT, ips.stat = ips.stat)
  n <- length(object) # number of individuals, assumes object is a list
  sigma2 <- NULL
  pvalues.trho <- NULL
  ips.tbar.crit <- NULL
  alternative <- "stationarity"
  method <- paste0(purtest.names.test[test], " (ex. var.: ",
                   purtest.names.exo[exo],")")
  
  # If Hadri test, call function and exit early
  if(test == "hadri") return(hadritest(object, exo, Hcons, dfcor,
                                        method, cl, args, data.name, ...)) 
  
  # compute the lags for each time series if necessary
  if(is.numeric(lags)){
    if(length(lags) == 1L) lags <- rep(lags, n)
    else{
      if(length(lags) != n) stop("lags should be of length 1 or n")
      else lags <- as.list(lags)
    }
  }
  else{ # one of the lag selection procedures SIC, AIC, or Hall
    lag.method <- match.arg(lags)
    lags <- sapply(object, function(x)
      lagsel(x, exo = exo, method = lag.method,
             pmax = pmax, dfcor = dfcor, fixedT = fixedT))
  }
  
  # compute the augmented Dickey-Fuller regressions for each time series
  comp.aux.reg <- (test == "levinlin")
  idres <- mapply(function(x, y)
                  tsadf(x, exo = exo, lags = y, dfcor = dfcor, comp.aux.reg = comp.aux.reg, ...),
                  object, as.list(lags), SIMPLIFY = FALSE)
  
  
  if(test == "levinlin"){
    if(length(T.levinlin <- unique(lengths(object, use.names = FALSE))) > 1L)
      stop("test = \"levinlin\" is not applicable to unbalanced panels")
    
    # get the adjustment parameters for the mean and the variance
    adjval <- adj.levinlin.value(T.levinlin, exo = exo)
    mymu  <- adjval[1L]
    mysig <- adjval[2L]
    # calculate the ratio of LT/ST variance
    sigmaST <- sapply(idres, function(x) x[["sigma"]])
    sigmaLT <- sqrt(sapply(object, longrunvar, exo = exo, q = q))
    si <- sigmaLT/sigmaST # LLC (2002), formula 6
    sbar <- mean(si) 
    
    # stack the residuals of each time series and perform the pooled
    # regression
    res.level <- unlist(lapply(idres, function(x) x$resid[["resid.level"]]), use.names = FALSE)
    res.diff  <- unlist(lapply(idres, function(x) x$resid[["resid.diff"]]), use.names = FALSE)
    z <- my.lm.fit(as.matrix(res.level), res.diff, dfcor = dfcor)
    # compute the Levin-Lin-Chu statistic
    tildeT <- T.levinlin - mean(lags) - 1
    sigmaeps2 <- z$rss / (n * tildeT)
    rho   <- z$coef
    sdrho <- z$se
    trho  <- rho/sdrho
    stat <- (trho - n * tildeT * sbar / sigmaeps2 * sdrho * mymu)/mysig # LLC (2002), formula 12
    names(stat) <- "z" # avoids a concatenated name like z.x1
    pvalue <- pnorm(stat, lower.tail = TRUE) # need lower.tail = TRUE (like ADF one-sided to the left)
    parameter <- NULL
    sigma2 <- cbind(sigmaST^2, sigmaLT^2)
    colnames(sigma2) <- c("sigma2ST", "sigma2LT")
    pvalues.trho <- vapply(idres, function(x) x[["p.trho"]], FUN.VALUE = 0.0)
  }
  
  if(test == "ips"){
    if(exo == "none") stop("exo = \"none\" (\"~0\" in the formula interface) is not a valid option for the Im-Pesaran-Shin test")
    if(!is.null(ips.stat) && !any(ips.stat %in% c("Wtbar", "Ztbar", "tbar"))) stop("argument 'ips.stat' must be one of \"Wtbar\", \"Ztbar\", \"tbar\"")
    lags  <- vapply(idres, function(x) x[["lags"]], FUN.VALUE = 0.0, USE.NAMES = FALSE)
    L.ips <- vapply(idres, function(x) x[["T"]],    FUN.VALUE = 0.0, USE.NAMES = FALSE) - lags - 1
    trho  <- vapply(idres, function(x) x[["trho"]], FUN.VALUE = 0.0, USE.NAMES = FALSE)
    pvalues.trho <- vapply(idres, function(x) x[["p.trho"]], FUN.VALUE = 0.0, USE.NAMES = FALSE)
    tbar <- mean(trho)
    parameter <- NULL
    adjval <- NULL
    
    
    if(is.null(ips.stat) || ips.stat == "Wtbar") {
      # calc Wtbar - default
      adjval <- mapply(function(x, y) adj.ips.wtbar.value(x, y, exo = exo),
                       as.list(L.ips), as.list(lags))
      Etbar <- mean(adjval[1L, ])
      Vtbar <- mean(adjval[2L, ])
      stat <- c("Wtbar" = sqrt(n) * (tbar - Etbar) / sqrt(Vtbar)) # (3.13) = (4.10) in IPS (2003) [same generic formula for Ztbar and Wtbar]
      pvalue <- pnorm(stat, lower.tail = TRUE) # need lower.tail = TRUE (like ADF one-sided to the left)
    }
    
    if(!is.null(ips.stat) && ips.stat == "Ztbar") {
      # calc Ztbar
      adjval <- adjval.ztbar <- sapply(L.ips, adj.ips.ztbar.value, 
                                       adj.ips.zbar.time, adj.ips.zbar.means, adj.ips.zbar.vars)
      rownames(adjval) <- rownames(adjval.ztbar) <- c("mean", "var")
      Etbar.ztbar <- mean(adjval.ztbar[1L, ])
      Vtbar.ztbar <- mean(adjval.ztbar[2L, ])
      stat <- stat.ztbar <- c("Ztbar" = sqrt(n) * (tbar - Etbar.ztbar) / sqrt(Vtbar.ztbar)) # (3.13) = (4.10) in IPS (2003) [same generic formula for Ztbar and Wtbar]
      pvalue <- pvalue.ztbar <- pnorm(stat.ztbar, lower.tail = TRUE)
    }
    
    if(!is.null(ips.stat) && ips.stat == "tbar") {
      # give tbar
      T.tbar <- unique(lengths(object, use.names = FALSE))
      if(length(T.tbar) > 1L) stop("tbar statistic is not applicable to unbalanced panels")
      if(any(lags > 0L)) stop("tbar statistic is not applicable when 'lags' > 0 is specified")
      L.tbar <- T.tbar - 1
      stat <- tbar
      names(stat) <- "tbar"
      pvalue <- NA
      ips.tbar.crit <- critval.ips.tbar.value(ind = n, time = L.tbar, critval.ips.tbar, exo = exo)
      adjval <- NULL
    }
  }
  
  if(test == "madwu"){
    # Maddala/Wu (1999), pp. 636-637; Choi (2001), p. 253; Baltagi (2013), pp. 283-285
    ## does not require a balanced panel
    trho         <- vapply(idres, function(x) x[["trho"]],   FUN.VALUE = 0.0, USE.NAMES = FALSE)
    pvalues.trho <- vapply(idres, function(x) x[["p.trho"]], FUN.VALUE = 0.0, USE.NAMES = FALSE)
    stat <- c(chisq = - 2 * sum(log(pvalues.trho)))
    n.madwu <- length(trho)
    parameter <- c(df = 2 * n.madwu)
    pvalue <- pchisq(stat, df = parameter, lower.tail = FALSE)
    adjval <- NULL
  }
  
  if(test == "Pm"){
    ## Choi Pm (modified P) [proposed for large N]
    trho         <- vapply(idres, function(x) x[["trho"]],   FUN.VALUE = 0.0, USE.NAMES = FALSE)
    pvalues.trho <- vapply(idres, function(x) x[["p.trho"]], FUN.VALUE = 0.0, USE.NAMES = FALSE)
    n.Pm <- length(trho)
    # formula (18) in Choi (2001), p. 255:
    stat <- c( "Pm" = 1/(2 * sqrt(n.Pm)) * sum(-2 * log(pvalues.trho) - 2) ) # == -1/sqrt(n.Pm) * sum(log(pvalues.trho) +1)
    pvalue <- pnorm(stat, lower.tail = FALSE) # one-sided
    parameter <- NULL
    adjval <- NULL
  }
  
  if(test == "invnormal"){
    # inverse normal test as in Choi (2001)
    trho         <- vapply(idres, function(x) x[["trho"]],   FUN.VALUE = 0.0, USE.NAMES = FALSE)
    pvalues.trho <- vapply(idres, function(x) x[["p.trho"]], FUN.VALUE = 0.0, USE.NAMES = FALSE)
    n.invnormal <- length(trho)
    stat <- c("z" = sum(qnorm(pvalues.trho)) / sqrt(n.invnormal)) # formula (9), Choi (2001), p. 253
    pvalue <- pnorm(stat, lower.tail = TRUE) # formula (12), Choi, p. 254
    parameter <- NULL
    adjval <- NULL
  }
  
  if(test == "logit"){
    # logit test as in Choi (2001)
    trho         <- vapply(idres, function(x) x[["trho"]],   FUN.VALUE = 0.0, USE.NAMES = FALSE)
    pvalues.trho <- vapply(idres, function(x) x[["p.trho"]], FUN.VALUE = 0.0, USE.NAMES = FALSE)
    n.logit <- length(trho)
    l_stat <-  c("L*" = sum(log(pvalues.trho / (1 - pvalues.trho)))) # formula (10), Choi (2001), p. 253
    k <- (3 * (5 * n.logit + 4)) / (pi^2 * n.logit * (5 * n.logit + 2))
    stat <- sqrt(k) * l_stat  # formula (13), Choi (2001), p. 254
    parameter <- c("df" = 5 * n.logit + 4)
    pvalue <- pt(stat, df = parameter, lower.tail = TRUE)
    adjval <- NULL
  }
  
  htest <- structure(list(statistic     = stat,
                          parameter     = parameter,
                          alternative   = alternative,
                          data.name     = data.name,
                          method        = method,
                          p.value       = pvalue,
                          ips.tbar.crit = ips.tbar.crit),
                     class = "htest")
  
  result <- list(statistic = htest,
                 call      = cl,
                 args      = args,
                 idres     = idres,
                 adjval    = adjval,
                 sigma2    = sigma2)
  class(result) <- "purtest"
  result
}


#' @rdname purtest
#' @export
print.purtest <- function(x, ...){
  print(x$statistic, ...)
  if (x$args$test == "ips" && x$args$ips.stat == "tbar"){
    cat("tbar critival values:\n")
    print(x$statistic$ips.tbar.crit, ...)
  }
  invisible(x)
}

#' @rdname purtest
#' @export
summary.purtest <- function(object, ...){
  if(!object$args$test == "hadri"){
    lags   <- vapply(object$idres, function(x) x[["lags"]],   FUN.VALUE = 0.0, USE.NAMES = FALSE)
    L      <- vapply(object$idres, function(x) x[["T"]],      FUN.VALUE = 0.0, USE.NAMES = FALSE)
    rho    <- vapply(object$idres, function(x) x[["rho"]],    FUN.VALUE = 0.0, USE.NAMES = FALSE)
    trho   <- vapply(object$idres, function(x) x[["trho"]],   FUN.VALUE = 0.0, USE.NAMES = FALSE)
    p.trho <- vapply(object$idres, function(x) x[["p.trho"]], FUN.VALUE = 0.0, USE.NAMES = FALSE)
    sumidres <- cbind("lags"   = lags,
                      "obs"    = L - lags - 1,
                      "rho"    = rho,
                      "trho"   = trho,
                      "p.trho" = p.trho)
    
    if (object$args$test == "ips" && !object$args$ips.stat == "tbar") {
      sumidres <- cbind(sumidres, t(object$adjval))
    }
    if (object$args$test == "levinlin") {
      sumidres <- cbind(sumidres, object$sigma2)
    }
  } else {
    # hadri case
    LM     <- vapply(object$idres, function(x) x[["LM"]],     FUN.VALUE = 0.0, USE.NAMES = FALSE)
    sigma2 <- vapply(object$idres, function(x) x[["sigma2"]], FUN.VALUE = 0.0, USE.NAMES = FALSE)
    sumidres <- cbind("LM" = LM, "sigma2" = sigma2)
  }
  
  nam <- names(object$idres)
  rownames(sumidres) <- nam
  object$sumidres <- sumidres
  class(object) <- c("summary.purtest", "purtest")
  object
}

#' @rdname purtest
#' @export
print.summary.purtest <- function(x, ...){
  cat(paste(purtest.names.test[x$args$test], "\n"))
  cat(paste("Exogenous variables:", purtest.names.exo[x$args$exo], "\n"))
  if (x$args$test != "hadri") {
    thelags <- vapply(x$idres, function(x) x[["lags"]], FUN.VALUE = 0.0, USE.NAMES = FALSE)
    if (is.character(x$args$lags)){
      lagselectionmethod <- if (x$args$lags == "Hall") "Hall's method" else x$args$lags
      cat(paste0("Automatic selection of lags using ", lagselectionmethod, ": ",
                 min(thelags), " - ", max(thelags), " lags (max: ", x$args$pmax, ")\n"))
    }
    else{
      cat("User-provided lags\n")
    }
  }
  
  if (x$args$test == "ips") {
    cat(paste(paste0("statistic (", x$args$ips.stat,"):"), round(x$statistic$statistic, 3), "\n"))
  } else {
    cat(paste("statistic:", round(x$statistic$statistic, 3), "\n"))
  }
  cat(paste("p-value:", round(x$statistic$p.value, 3),   "\n"))
  if (x$args$test == "ips" && x$args$ips.stat == "tbar"){
    cat("tbar critival values:\n")
    print(x$statistic$ips.tbar.crit, ...)
  }
  cat("\n")
  print(x$sumidres, ...)
  invisible(x)
}





#' Simes Test for unit roots in panel data
#' 
#' Simes' test of intersection of individual hypothesis tests
#' (\insertCite{SIMES:86;textual}{plm}) applied to panel unit root tests as 
#' suggested by \insertCite{HANCK:13;textual}{plm}.
#' 
#' Simes' approach to testing is combining p-values from single hypothesis tests
#' with a global (intersected) hypothesis. \insertCite{HANCK:13;textual}{plm}
#' mentions it can be applied to any panel unit root test which yields a p-value
#' for each individual series.
#' The test is robust versus general patterns of cross-sectional dependence.
#' 
#' Further, this approach allows to discriminate between individuals for which
#' the individual H0 (unit root present for individual series) is rejected/is
#' not rejected by Hommel's procedure (\insertCite{HOMM:88;textual}{plm}) for
#' family-wise error rate control (FWER) at a pre-specified significance level
#' \eqn{\alpha} via argument `alpha` (defaulting to `0.05`), i.e., it controls 
#' for the multiplicity in testing.
#' 
#' The function `phansitest` takes as main input `object` either a plain numeric
#' containing p-values of individual tests or a `purtest` object which holds
#' a suitable pre-computed panel unit root test (one that produces p-values per
#' individual series).
#' 
#' The function's return value (see section Value) is a list with detailed
#' evaluation of the applied Simes test.
#' 
#' The associated `print` method prints a verbal evaluation.
#' 
#' @aliases phansitest
#' @param object either a numeric containing p-values of individual unit root 
#' test results (does not need to be sorted) or a suitable `purtest` object
#' (as produced by `purtest()` for a test which gives p-values of the individuals
#' (Hadri's test in `purtest` is not suitable)),
#' @param alpha numeric, the pre-specified significance level (defaults to `0.05`),
#' @param x an object of class `c("phansitest", "list")` as produced by 
#'          `phansitest` to be printed,
#' @param cutoff integer, cutoff value for printing of enumeration of individuals 
#'          with rejected individual H0, for print method only,
#' @param \dots further arguments (currently not used).
#' 
#' @return For `phansitest`, an object of class `c("phansitest", "list")` which 
#'         is a list with the elements:
#' - `id`: integer, the identifier of the individual (integer sequence referring to
#' position in input),
#' - `name`: character, name of the input's individual (if it has a name,
#' otherwise "1", "2", "3", ...),
#' - `p`: numeric, p-values as input (either the numeric or extracted from
#' the purtest object),
#' - `p.hommel`: numeric, p-values after Hommel's transformation,
#' - `rejected`: logical, indicating for which individual the individual null
#' hypothesis is rejected (`TRUE`)/non-rejected (`FALSE`) (after controlling
#' for multiplicity),
#' - `rejected.no`: integer, giving the total number of rejected individual series,
#' - `alpha`: numeric, the input `alpha`.
#' 
#' @export
#' @importFrom stats p.adjust
#' 
#' @author Kevin Tappe
#' @seealso [purtest()], [cipstest()]
#' 
#' @references
#' \insertAllCited{}
#'  
#' @keywords htest
#
#' @examples
#' 
#' ### input is numeric (p-values)
#' #### example from Hanck (2013), Table 11 (left side)
#' pvals <- c(0.0001,0.0001,0.0001,0.0001,0.0001,0.0001,0.0050,0.0050,0.0050,
#'            0.0050,0.0175,0.0175,0.0200,0.0250,0.0400,0.0500,0.0575,0.2375,0.2475)
#'
#' countries <- c("Argentina","Sweden","Norway","Mexico","Italy","Finland","France",
#'               "Germany","Belgium","U.K.","Brazil","Australia","Netherlands",
#'               "Portugal","Canada", "Spain","Denmark","Switzerland","Japan")
#' names(pvals) <- countries
#' 
#' h <- phansitest(pvals)
#' print(h)              # (explicitly) prints test's evaluation
#' print(h, cutoff = 3L) # print only first 3 rejected ids 
#' h$rejected # logical indicating the individuals with rejected individual H0
#' 
#' 
#' ### input is a (suitable) purtest object
#' data("Grunfeld", package = "plm")
#' y <- data.frame(split(Grunfeld$inv, Grunfeld$firm))
#' obj <- purtest(y, pmax = 4, exo = "intercept", test = "madwu")
#' 
#' phansitest(obj)
#' 
phansitest <- function(object, alpha = 0.05) {
  
  is.purtest <- if(inherits(object, "purtest")) TRUE else FALSE
  if(!is.purtest) {
    if(is.numeric(object)) {
      if(anyNA(object)) stop("input p-values in 'object' contain at least one NA/NaN value")
      n <- length(object)
      p <- object
    } else {
      stop("argument 'object' needs to specify either a 'purtest' object or a numeric")
    }
  } else {
    # purtest object
    if(object$args$test == "hadri") stop("phansitest() [Hanck/Simes' test] not possible for purtest objects based on Hadri's test")
    p <- vapply(object$idres, function(x) x[["p.trho"]], FUN.VALUE = 0.0, USE.NAMES = FALSE)
    n <- length(p)
  }
  
  id <- seq_len(n)
  names(id) <- if(!is.null(names(p))) names(p) else id
  
  p.hommel <- p.adjust(p, method = "hommel")
  rejected.ind <- p.hommel <= alpha    # is TRUE for individual-H0-rejected individuals
  rejected.ind.no <- sum(rejected.ind) # number of rejected individuals
  
  res <- structure(list(id           = id,
                        name         = names(id),
                        p            = p,
                        p.hommel     = p.hommel,
                        rejected     = rejected.ind,
                        rejected.no  = rejected.ind.no,
                        alpha        = alpha),
                   class = c("phansitest", "list"))
  return(res)
}


#' @rdname phansitest
#' @export
print.phansitest <- function(x, cutoff = 10L, ...) {
  if(round(cutoff) != cutoff) stop("Argument 'cutoff' has to be an integer")
  id         <- x$id
  alpha      <- x$alpha
  rej.ind    <- x$rejected
  rej.ind.no <- x$rejected.no
  n <- length(rej.ind)
  H0.txt <- "H0: All individual series have a unit root\n"
  HA.txt <- "HA: Stationarity for at least some individuals\n"
  H0.rej.txt     <- "H0 rejected (globally)"
  H0.not.rej.txt <- "H0 not rejected (globally)"
  test.txt <- "    Simes Test as Panel Unit Root Test (Hanck (2013))"
  
  cat("\n")
  cat(paste0("    ", test.txt, "\n"))
  cat("\n")
  cat(H0.txt)
  cat(HA.txt)
  cat("\n")
  cat(paste0("Alpha: ", alpha, "\n"))
  cat(paste0("Number of individuals: ", n, "\n"))
  
  cat("\n")
  cat("Evaluation:\n")
  if(rej.ind.no > 0L) {
    cat(paste0(" ", H0.rej.txt, "\n"))
    cat("\n")
    
    if(rej.ind.no <= cutoff && cutoff >= 0L) {
      ind.cutoff <- paste0(paste0(id[rej.ind], collapse = ", "))
      ind.txt <- paste0("Individual H0 rejected for ", rej.ind.no, " individual(s) (integer id(s)):\n")
      cat(paste0(" ", ind.txt))
      cat(paste0("  ", ind.cutoff, "\n"))
    }
    else { # cut off enumeration of individuals if more than specified in cutoff
      if(cutoff > 0L) {
        ind.cutoff <- paste0(paste0(id[rej.ind][seq_len(cutoff)], collapse = ", "), ", ...")
        ind.txt <- paste0("Individual H0 rejected for ", rej.ind.no ," individuals, only first ", cutoff, " printed (integer id(s)):\n")
        cat(paste0(" ", ind.txt))
        cat(paste0("  ", ind.cutoff, "\n"))
      } else cat(paste0(" Individual H0 rejected for ", rej.ind.no ," individuals. None printed as 'cutoff' set to ", cutoff, ".\n"))
    }
  } else {
    cat(paste0(" ", H0.rej.txt, "\n"))
  }
  invisible(x)
}
Any scripts or data that you put into this service are public.
plm documentation built on April 11, 2025, 5:52 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
plm
Linear Models for Panel Data

R/test_uroot.R
In plm: Linear Models for Panel Data

Defines functions print.phansitest phansitest print.summary.purtest summary.purtest print.purtest purtest hadritest longrunvar tsadf critval.ips.tbar.value adj.ips.ztbar.value adj.ips.wtbar.value adj.levinlin.value lagsel selectT YCdiff YClags padf

Documented in phansitest print.phansitest print.purtest print.summary.purtest purtest summary.purtest

Try the plm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

plm Linear Models for Panel Data

R/test_uroot.R In plm: Linear Models for Panel Data

Defines functions print.phansitest phansitest print.summary.purtest summary.purtest print.purtest purtest hadritest longrunvar tsadf critval.ips.tbar.value adj.ips.ztbar.value adj.ips.wtbar.value adj.levinlin.value lagsel selectT YCdiff YClags padf

Documented in phansitest print.phansitest print.purtest print.summary.purtest purtest summary.purtest

Try the plm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

plm
Linear Models for Panel Data

R/test_uroot.R
In plm: Linear Models for Panel Data