R/ndvuong.R
In micsr: Microeconometrics with R

Documented in ndvuong vuong_sim

#' Non-degenerate Vuong test
#' 
#' An unhanced version of the Vuong test with a small-sample bias
#' correction
#' 
#' @aliases ndvuong
#' @param x a first fitted model
#' @param y a second fitted model
#' @param size the size of the test
#' @param pval should the p-value be computed ?
#' @param nested a boolean, `TRUE` for nested models
#' @param vartest a boolean, if `TRUE`, the variance test is computed
#' @param ndraws the number of draws for the simulations
#' @param diffnorm a creuser
#' @param seed the seed
#' @param numbers a user provided matrix of random numbers
#' @param nd a boolean, if `TRUE` (the default) the non-degenarate Vuong test is computed
#' @param print.level the level of details to be printed
#' @return an object of class `"htest"`.
#' @importFrom Rdpack reprompt
#' @importFrom CompQuadForm davies
#' @importFrom stats pnorm
#' @seealso the classical Vuong test is implemented in `pscl::vuong` and `nonnest2::vuongtest`.
#' @references
#'
#' \insertRef{VUON:89}{micsr}
#'
#' \insertRef{SHI:15}{micsr}
#'
#' @importFrom stats ecdf logLik optimize qnorm quantile rnorm uniroot
#' @keywords htest
## #' @examples
## #' ll <- lm(dist ~ log(speed), cars)
## #' loglin <- loglm(dist ~ log(speed), cars)
## #' ndvuong(ll, loglin)
#' @export
ndvuong <- function(x, y, size = 0.05, pval = TRUE,
                    nested = FALSE, vartest = FALSE,
                    ndraws = 1E04, diffnorm = 0.1, seed = 1,
                    numbers = NULL, nd = TRUE,
                    print.level = 0){
    
    # if pval is TRUE, size is adjusted, otherwise it is fixed
    data.name <- c(
        paste(deparse(substitute(x))),
        paste(deparse(substitute(y)))
    )
    data.name <- paste(data.name, collapse = "-")
    set.seed(seed)

    # the two models are renamed f and g like in Vuong paper, the
    # generics llobs, estfun and bread are used to extract the
    # relevant features of the log-likelihood
    f <- x                    ;  g  <- y    
    N <- length(llobs(f))
    K <- ncol(estfun(f)) + ncol(estfun(g))
    
    # Compute the LR statistic as an average and its variance
    LR <- as.numeric(logLik(f) - logLik(g)) / N
    w2 <- sum((llobs(f)- llobs(g)) ^ 2) / N - LR ^ 2#(LR / N) ^ 2
    
    # solveA is a block-diagonal matrix containing (H_f / N) ^ -1 and
    # - (H_g / N) ^ - 1. The bread function returns - (H / N) ^ - 1 =
    # - N x H ^ -1 = - N x vcov
    solveA <- bdiag(- bread(f), bread(g))
    # B binds the column of G_f and - G_g
    B <- cbind(estfun(f), - estfun(g))
    # substract the mean of the gradient (usefull if the gradient is
    # not null at the optimum)
    B <- t(t(B) - apply(B, 2, mean))
    # B is the estimation of the variance of the gradient
    B <- crossprod(B) / N
    eigen_B <- eigen(B)
    sqrtB <- eigen_B$vectors %*% diag(sqrt(abs(eigen_B$values))) %*% t(eigen_B$vectors)
    V <- sqrtB %*% solveA %*% sqrtB
    V <- eigen(V, symmetric = TRUE)$values
    # Compute the 4 matrices that are used to compute empirically the
    # distribution of the Vuong statistic
    if (! nested){
        # rho is a K vector containing 0 except at the position of the
        # higher eigen value of V (in absolute value), where a 1 is
        # inserted
        rho <- as.numeric((abs(V) - max(abs(V))) == 0)
        rho <- rho / sqrt(sum(rho))
    }
    else{
        # rho is irrelevant for nested models (a vector of K 0)
        rho <- rep(0, length(V))
    }
    # Sigma is the covariance matrix of the K + 1 normal random normal
    # draws
    Sigma <- rbind(c(1, rho),
                   cbind(rho, diag(K)))
    # To get the square root of Sigma, just insert a line of 0 at the
    # position of the highest eigen value
    Sigma[c(FALSE, as.logical(rho)), ] <- 0

    # if numbers is not null, it is a user provided matrix of random
    # draws. Otherwise rnorm is used.
    if (is.null(numbers)) Z <- matrix(rnorm(ndraws * (K + 1)), ndraws)
    else Z <- numbers

    # then transform the random numbers according to the relevant
    # covariance matrix
    Z <- Z %*% Sigma
    ZL <- Z[,   1]
    ZP <- Z[, - 1]

    # the scaled sum of the eigen values
    trVsq <- sum(V ^ 2)
    Vnmlzd <- V / sqrt(trVsq)

    # the 4 scalars used to compute random draws from the same
    # distribution as the one of the modified Vuong statistic
    A1 <- ZL
    A2 <- as.numeric(apply(ZP, 1, function(x) sum(Vnmlzd * x ^ 2) / 2) -
                     sum(Vnmlzd) / 2)
    A3 <- as.numeric(apply(ZP, 1, function(x) sum(Vnmlzd * rho * x)))
    A4 <- as.numeric(apply(ZP, 1, function(x) sum(x ^ 2 * Vnmlzd ^ 2)))

    Tmod <- function(sigma, cst){
        # R draws in the distribution of the Vuong statistic
        num <- sigma * A1 - A2
        denom <- sigma ^ 2 - 2 * sigma * A3 + A4 + cst
        num / sqrt(denom)
    }

    quant <- function(sigma, cst, size)
        # compute the empirical quantile of level 1 - size in the
        # distribution of the Vuong statistic
        as.numeric(quantile(abs(Tmod(sigma, cst)), 1 - size))
        
    sigstar <- function(cst, size)
        # compute the value of sigma which maximize the empirical
        # quantile of level 1 - size of the distribution of the Vuong
        # statistic
        optimize(function(x) quant(x, cst, size), c(0, 5), maximum = TRUE)$maximum
    
    seekpval <- function(size){
        # for a given size, compute the constant so that the critical
        # value equals the target
        c.value <- 0
        cv.value <- quant(sigstar(0, size), 0, size)
        # what is the interest of that equation
        cv.normal <- max(qnorm(1 - size / 2), quantile(abs(ZL), 1 - size))
        cv.normal <- qnorm(size / 2, lower.tail = FALSE)
        cv.target <- cv.normal + diffnorm
        if (cv.value < cv.target){
            cv.value <- max(cv.value, cv.normal)
        }
        else {
            froot <- function(cst) quant(sigstar(cst, size), cst, size) - cv.target
            zo <- uniroot(froot, c(0, 10))
            c.value <- zo$root
            cv.value <- quant(sigstar(c.value, size), c.value, size)
        }
        LRmod <- LR + sum(V) / (2 * N)
        w2mod <- w2 + c.value * sum(V ^ 2) / N
        Tnd <-  sqrt(N) * LRmod / sqrt(w2mod)
        pvalue <- 1 - ecdf(abs(Tmod(sigstar(c.value, size), c.value)))(abs(Tnd))
        list(pvalue = pvalue, stat = Tnd,
             constant = c.value, cv = cv.value)
    }

    if (vartest){
        # compute the variance test and quit"
        W <- eigen(crossprod(B, - solveA), only.values = TRUE)$values
        stat <- N * w2
#        pvalue <- mydavies(stat, W ^ 2)$Qq
        pvalue <- davies(stat, W ^ 2)$Qq
        results <- list(statistic = c(w2 = w2),
                        p.value = pvalue,
                        data.name = data.name,
                        alternative = "positive variance",
                        method = "variance test")
    }
    else{
        if (nested){
            # for the nested model, the statistic is the classical
            # log-likelihood ratio and the distribution is a weighted
            # chi^2
            Tvuong <- 2 * LR * N
            W <- eigen(crossprod(B, - solveA), only.values = TRUE)$values
#            pval_vuong <- mydavies(Tvuong, W)$Qq
            pval_vuong <- davies(Tvuong, W)$Qq
            if (nd){
                # for the non-degenarate version, just modify the
                # numerator, and compute the one sided p-value
                LRmod <- LR + sum(V) / (2 * N)
                Tnd <- sqrt(N) * (LRmod) / sqrt(w2)
            if (Tnd < 0) pvalue <- ecdf(Tmod(0, 0))(Tnd)
                else pvalue <- 1 - ecdf(Tmod(0, 0))(Tnd)
                results <- list(statistic = c(z = Tnd),
                                method = "Non-degenerate Vuong test for nested models",
                                p.value = pvalue,
                                data.name = data.name,
                                alternative = "different models",
                                parameters = c(
                                    vuong_stat = Tvuong,
                                    vuong_p.value = pval_vuong))
            }
            else{
                results <- list(statistic = c(wchisq = Tvuong),
                                method = "Vuong test for nested models",
                                p.value = pval_vuong,
                                data.name = data.name,
                                alternative = "different models")
            }
        }
        else{
            Tvuong <-sqrt(N) * LR / sqrt(w2)
            pval_vuong <- pnorm(abs(Tvuong), lower.tail = FALSE)# * 2 unilaterale
            if (nd){
                if (! pval){
                    results <- seekpval(size)
                    results <- list(statistic = c(z = as.numeric(results$stat)),
                                    method = "Non-degenerate Vuong test for non-nested models",
                                    data.name = data.name,
                                    alternative = "different models",
                                    parameters = c(
                                        size = size,
                                        vuong_stat = Tvuong,
                                        constant = results$constant,
                                        'crit-value' = results$cv,
                                        'sum e.v.' = sum(V),                                        
                                        'vuong_p.value' = pval_vuong))
                }
                else{
                    froot <- function(alpha) seekpval(alpha)$pvalue - alpha
                    if (froot(1E-100) < 0) results <- list(pvalue = 0, stat = Inf, constant = 0, cv = 0)
                    else{
                        pvalue <- uniroot(froot, c(1E-100, 1-1E-100))
                        results <- seekpval(pvalue$root)
                    }
                    results <- list(statistic = c(z = as.numeric(results$stat)),
                                    method = "Non-degenerate Vuong test for non-nested models",
                                    p.value = results$pvalue,
                                    data.name = data.name,
                                    alternative = "different models",
                                    parameters = c(
                                        vuong_stat = Tvuong,
                                        constant = results$constant,
                                        'sum e.v.' = sum(V),                                        
                                        'vuong_p.value' = pval_vuong))
                }
            }
            else{
                results <- list(statistic = c(z = Tvuong),
                                method = "Vuong test for non-nested models",
                                p.value = pval_vuong,
                                data.name = data.name,
                                alternative = "different models")
            }
        }
    }
    structure(results, class = "htest")
}

# a function to construct block-diagonal matrix (can't remember from
# which package it is borrowed)
bdiag <- function(...){
  if (nargs() == 1)
    x <- as.list(...)
  else
    x <- list(...)
  n <- length(x)
  if(n == 0) return(NULL)
  x <- lapply(x, function(y) if(length(y)) as.matrix(y) else
              stop("Zero-length component in x"))
  d <- array(unlist(lapply(x, dim)), c(2, n))
  rr <- d[1,]
  cc <- d[2,]
  rsum <- sum(rr)
  csum <- sum(cc)
  out <- array(0, c(rsum, csum))
  ind <- array(0, c(4, n))
  rcum <- cumsum(rr)
  ccum <- cumsum(cc)
  ind[1,-1] <- rcum[-n]
  ind[2,] <- rcum
  ind[3,-1] <- ccum[-n]
  ind[4,] <- ccum
  imat <- array(1:(rsum * csum), c(rsum, csum))
  iuse <- apply(ind, 2, function(y, imat) imat[(y[1]+1):y[2],
                                               (y[3]+1):y[4]], imat=imat)
  iuse <- as.vector(unlist(iuse))
  out[iuse] <- unlist(x)
  return(out)
} 


#' @importFrom sandwich estfun
#' @export
sandwich::estfun

#' @importFrom sandwich bread
#' @export
sandwich::bread

#' @importFrom sandwich meat
#' @export
sandwich::meat

#' Simulated pdfs for the Vuong statistics using linear models
#'
#' This function can be used to reproduce the examples given by Shi
#' (2015) which illustrate the fact that the distribution of the Vuong
#' statistic may be very different from a standard normal
#' 
#' @aliases vuong_sim
#' @param N sample size
#' @param R the number of replications
#' @param Kf the number of covariates for the first model
#' @param Kg the number of covariates for the second model
#' @param a the share of the variance of `y` explained by the two
#'     competing models
#' @return a numeric of length `N` containing the values of the Vuong statistic
#'
#' @importFrom stats lm.fit
#' 
#' @references
#'
#' \insertRef{SHI:15}{micsr}
#'
#' @examples
#' vuong_sim(N = 100, R = 10, Kf = 10, Kg = 2, a = 0.5)
#' @export
vuong_sim <- function(N = 1E03, R = 1E03, Kf = 15, Kg = 1, a = 0.125){
    Zf <- array(rnorm(R * Kf * N, sd = a / sqrt(Kf)), dim = c(N, Kf, R))
    Zg <- array(rnorm(R * Kg * N, sd = a / sqrt(Kg)), dim = c(N, Kg, R))
    eps <- matrix(rnorm(R * N, sd = sqrt(1 - a ^ 2)), N, R)
    y <- apply(Zf, c(1, 3), sum) + apply(Zg, c(1, 3), sum) + eps
    get_lnl <- function(x){
        res <- x$residuals
        N <- length(res)
        s2 <- sum(res ^ 2) / N
        - 1 / 2 * log(2 * pi) - 1 / 2 * log(s2) - 1 / 2 * res ^ 2 / s2
    }
    res_f <- sapply(1:R, function(i) get_lnl(lm.fit(Zf[,, i], y[, i])))
    res_g <- sapply(1:R, function(i) get_lnl(lm.fit(Zg[,, i, drop = FALSE], y[, i])))
    LR <- apply(res_f - res_g, 2, mean)
    w2 <- apply( (res_f - res_g) ^ 2, 2, mean) - LR ^ 2
    Vuong <- sqrt(N) * LR / sqrt(w2)
    Vuong
}



#' Turnout
#'
#' Turnout in Texas liquor referenda
#'
#' @name turnout
#' @docType data
#' @keywords dataset
#'
#' @format a list of three fitted models:
#' - group: the group-rule-utilitarian model
#' - intens: the intensity model
#' - sur: the reduced form SUR model
#'
#' @description these three models are replication in R of stata's
#'     code available on the web site of the American Economic
#'     Association. The estimation is complicated by the fact that
#'     some linear constraints are imposed.
#' @source
#' [American Economic Association data archive](https://www.aeaweb.org/aer/).
#' @references
#' \insertRef{COAT:CONL:04}{micsr}
#'
#' @examples
#' ndvuong(turnout$group, turnout$intens)
#' ndvuong(turnout$group, turnout$sur)
#' ndvuong(turnout$intens, turnout$sur)
NULL


## #' @rdname ndvuong
## #' @method bread maxLik2
## #' @export
## bread.maxLik2 <- function(x, ...) - solve(x$hessian) * length(x$lnl)

## #' @rdname ndvuong
## #' @method estfun maxLik2
## #' @export
## estfun.maxLik2 <- function(x, ...) x$gradient

## #' @rdname ndvtest
## #' @method logLik maxLik2
## #' @export
## logLik.maxLik2 <- function(object, ...) sum(object$lnl)