R/testGL.R

#' @title Test for Lorenz and Generalized Lorenz dominance
#'
#' @author A. Berihuete, C.D. Ramos and M.A. Sordo
#'
#' @description Statistical test procedure given by Xu (1997) to study Generalized Lorenz dominance from sample Generalized Lorenz curve estimates.  Lorenz dominance from sample Lorenz curve estimates can also be studied (Beach and Kaliski, 1986).
#'
#'
#' @param dataset1 a data.frame containing the variables.
#' @param dataset2 a data.frame containing the variables.
#' @param ipuc a character string indicating the variable name of the income per unit of consumption. Default is "ipuc".
#' @param hhcsw a character string indicating the variable name of the household cross-sectional weight. Default is "DB090".
#' @param hhsize a character string indicating the variable name of the household size. Default is "HX040".
#' @param generalized logical; if FALSE the test will be applied to compare two Lorenz curves. Otherwise Generalized Lorenz curves will be compared.
#' @param samplesize an integer which represents the number of Lorenz (Generalized Lorenz) curve ordinates to be estimated for comparison. The default is 10.
#' @param alpha a scalar indicating the significance level. Default is 0.05.
#'
#' @details The null hypothesis to be tested is that the  Lorenz (Generalized Lorenz) curve calculated from dataset1 dominates the one calculated from dataset2.
#'
#'
#' @return A list with the following components:
#'
#' \itemize{
#' \item Tvalue the value of the test-statistic
#' \item p.value simulated p-value of the test-statistic Tvalue (Wolak, 1989). It is calculated only when the Tvalue falls into an inconclusive region.
#' \item decision if the Tvalue is less than the lower-bound of the critical value at the \eqn{alpha} significance level the decision is "Do not reject null hypothesis". If the Tvalue is greater than the upper-bound of the critical value at the \eqn{alpha} significance level the decision is "Reject null hypothesis". Lower and upper-bounds critical values are obtained from Kodde and Palm (1986). If Tvalue falls into an inconclusive region (between the lower- and upper-bounds) the p-value will be estimated following Wolak (1989).
#' }
#'
#'
#' @references C. M. Beach and R. Davidson (1983) Distribution-free statistical inference with Lorenz curves
#' @references C. M. Beach and S. F. Kaliski (1986) Curve inference with sample weights: and application to the distribution of unemployment experience, Journal of the Royal Statistical Society. Series C (Applied Statistics), Vol. 35, No. 1, 38--45.
#' @references D.A. Kodde and F.C. Palm (1986) Wald criteria for jointly testing equality and inequality restrictions, Econometrica, 50, 1243--1248.
#' @references F.A. Wolak (1989), Testing inequality constrains in linear econometric models, Journal of Econometrics, 41, 205--235.
#' @references K. Xu (1997) Asymptotically distribution-free statistical test for generalized Lorenz curves: An alternative approach, Journal of Income Distribution, 7(1), 45--62.
#'
#' @examples
#' data(eusilc2)
#' ATdataset1 <- setupDataset(eusilc2, country = "AT", region = "Burgenland")
#' ATdataset2 <- setupDataset(eusilc2, country = "AT", region = "Carinthia")
#' testGL(ATdataset1, ATdataset2, generalized = TRUE, samplesize = 10, alpha = 0.05)
#'
#' @seealso OmegaGL, setupDataset
#' @import plyr
#' @import mvtnorm
#' @import rootSolve
#' @export


testGL <- function(dataset1, dataset2,
                   ipuc = "ipuc", # The income per unit of consumption
                   hhcsw = "DB090", # Household cross-sectional weight
                   hhsize = "HX040", # Household size
                   generalized = TRUE, samplesize = 10, alpha = 0.05){

  list1 <- OmegaGL(dataset1,
                   ipuc = ipuc, # The income per unit of consumption
                   hhcsw = hhcsw, # Household cross-sectional weight
                   hhsize = hhsize, # Household size
                   samplesize = samplesize, generalized = generalized)

  list2 <- OmegaGL(dataset2,
                   ipuc = ipuc, # The income per unit of consumption
                   hhcsw = hhcsw, # Household cross-sectional weight
                   hhsize = hhsize, # Household size
                   samplesize = samplesize, generalized = generalized)

  if(!generalized){
    estim.gl <- list1$gl.curve[-samplesize] - list2$gl.curve[-samplesize]
  }else{
    estim.gl <- list1$gl.curve - list2$gl.curve
    }

  Omega1 <- list1$Omega
  Omega2 <- list2$Omega
  OmegaTotal <-  Omega1 + Omega2
  chol.OmegaTotal <- chol(OmegaTotal)
  M <- chol2inv(chol.OmegaTotal)


  fr <- function(x){
    (estim.gl - x) %*% M %*% (estim.gl-x)
  }

  gr <- function(x){
    -2*M %*% (estim.gl - x)
  }

  res <- constrOptim(rep(0.5, length(estim.gl)), fr, gr,
                     ui = diag(1,length(estim.gl)),
                     ci = rep(0, length = length(estim.gl)))

  #phi.tilde <- sol$solution
  #Tvalue <- t(as.matrix(estim.phi-phi.tilde)) %*% M %*% t(t(as.matrix(estim.phi-phi.tilde)))
  gl.tilde <- res$par
  Tvalue <- res$value

  # Upper and Lower bounds for the critical value for jointly testing equality and inequality restrictions (David & Palm). alpha = 0.05, K = 1 to 17
  # bounds4critical.values <- c(2.706, 5.138, 7.045, 8.761, 10.371,
  #                             11.911, 13.401, 14.853, 16.274, 17.670,
  #                             19.045, 20.410, 21.742, 23.069, 24.384,
  #                             25.689, 26.983, 28.268, 29.545, 30.814,
  #                             32.077, 33.333, 34.583, 35.827, 37.066,
  #                             38.301, 39.531, 40.756, 41.977, 43.194,
  #                             44.408, 45.618, 46.825, 48.029, 49.229)

  # --- Thanks to the suggestions made by the referees in Rjournal ---
  f <- function(x, alpha, K) 1 - 0.5 * pchisq(x, K)-0.5 * pchisq(x, K-1)-alpha
  valInf <- rootSolve::uniroot.all(f, c(0, 10 * samplesize), alpha = alpha, K = 1)
  valSup <- rootSolve::uniroot.all(f, c(0, 10 * samplesize), alpha = alpha, K = samplesize)
  # ---

  if(Tvalue < valInf){
    p.value <- NA
    return(list(Tvalue = Tvalue,
                p.value = p.value,
                decision = "Do not reject null hypothesis"))
  }else if(Tvalue > valSup){
    p.value <- NA
    return(list(Tvalue = Tvalue,
                p.value = p.value,
                decision = "Reject null hypothesis"))
  }else{
    print("Inconclusive region... calculating p-value (10000 simulations)")
    vec.solved <- matrix(NA, 1000, length(estim.gl))
    i <- 1
    iterations <- 1
    while(i < 1001){
      estim.gl <- as.numeric(mvtnorm::rmvnorm(n=1, sigma=OmegaTotal))

      res <- try(constrOptim(rep(0.5, length(estim.gl)), fr, gr,
                             ui = diag(1, length(estim.gl)),
                             ci = rep(0, length = length(estim.gl)))$par, silent = TRUE)

      if(is.numeric(res)){
        vec.solved[i,] <- res
        i <- i + 1
      }
      iterations <- iterations + 1
      stopifnot(iterations < 3000)
    }

    diff.gl <- vec.solved

    count.pos <- function(diff.gl.vec){
      positv <- length(which(diff.gl.vec > 1e-15))
      return(positv)
      }

    n.positiv <- plyr::aaply(diff.gl,.margins=1, count.pos)
    props.positive <- table(n.positiv)/length(n.positiv)
    prob.chi <- rev(pchisq(Tvalue, df=0:length(estim.gl), lower.tail = FALSE))

    pos.weights <- as.numeric(names(props.positive)) + 1

    p.value <- sum(props.positive*prob.chi[pos.weights])

    return(list(Tvalue = Tvalue,
                p.value = p.value,
                decision = NA))
  }
}

Try the rtip package in your browser

Any scripts or data that you put into this service are public.

rtip documentation built on May 2, 2019, 7:55 a.m.