R/test_stat.R

Defines functions test_stat_freq_D

Documented in test_stat_freq_D

#' Return the results of inverse quantile regression without the \eqn{j}th endogenous variable
#'
#' @param j
#' @param Beta_j_null
#' @param B
#' @param Phi
#' @param alpha
#' @param Y
#' @param D data for endogenous variable D
#' @param X data for exogenous variable X
#' @param Z data for instrumental variable Z
#' @param tau tau in quantile regression
#' @param O_pos index for residuals whose sign are fixed as positive
#' @param O_neg index for residuals whose sign are fixed as negative
#' @param eps a small number
#' @param M a large number
#' @param TimeLimit gurobi parameter
#' @param homoskedastic
#' @param kernel
#' @param lpsolver "gurobi","cplexapi","lpsolveapi"
#' @return A list containing the result of short regression, p value and corresponding L_n
#' @examples
#' n = 100
#' pD = 3
#' angrist_data = angrist(n, pD)
#' Y = angrist_data$Y
#' X = matrix(1, n, 1)
#' Z = angrist_data$Z
#' D = angrist_data$D
#' homoskedastic = FALSE 
#' kernel = "Powell" 
#' XZ = as.matrix(cbind(X, Z))
#' PI_XZ = XZ %*% solve(t(XZ) %*% XZ) %*% t(XZ)
#' Phi_CH = PI_XZ %*% D
#' J = 2
#' test_stat_freq_D(
#'  j = J,
#'  Beta_j_null = 0.1,
#'  B = Phi_CH[, J, drop = FALSE],
#'  Phi = Phi_CH[, -J, drop = FALSE],
#'  Y = Y,
#'  D = D,
#'  X = X,
#'  Z = Z,
#'  tau = 0.5,
#'  homoskedastic = homoskedastic,
#'  kernel = kernel,
#'  lpsolver = "gurobi"
#'  )
#' @description This function is an internal function called inside function "ivqr_fit()"

test_stat_freq_D <-
  function(j,
           Beta_j_null,
           B = NULL,
           Phi = NULL,
           alpha = 0.1,
           Y,
           D,
           X,
           Z,
           tau = 0.5,
           O_pos = NULL,
           O_neg = NULL,
           eps = 1e-14,
           M = 10,
           TimeLimit = 300,
           homoskedastic = FALSE,
           kernel = "Powell",
           lpsolver = NULL) {
    out=list()
    n = length(Y)
    S_n = NA
    n = dim(Z)[1]
    p_Z = dim(Z)[2]
    
    Y_tilde = Y - D[, j, drop = FALSE] * c(Beta_j_null)
    
    
    XZ = cbind(X, Z)
    PI_XZ = XZ %*% solve(t(XZ) %*% XZ) %*% t(XZ)  #PI = X%*%solve(t(X)%*%X)%*%t(X)
    Phi_CH = PI_XZ %*% D #phi of Chernozhukov Hansen
    
    if (is.null(Phi)) {
      Phi <-
        Phi_CH[, -j, drop = FALSE]
      if (dim(Phi_CH)[2] == 2) {
        Phi <- as.matrix(Phi)
      }
    }
    if (is.null(dim(Phi)) &
        dim(as.matrix(Phi))[2] == 1) {
      Phi <- as.matrix(Phi)
    }
    
    short_reg = iqr_milp(
      Y = Y_tilde,
      D = D[, -j, drop = FALSE],
      X = X,
      Z = Phi,
      tau = tau,
      O_pos = O_pos,
      O_neg = O_neg,
      eps = 1e-14,
      M = M + max(abs(D[, j, drop = FALSE] * c(Beta_j_null))),
      TimeLimit = 300,
      FeasibilityTol = 1e-6,
      lpsolver = lpsolver
    ) #for succinct version from "code for roger koenker"
    
    out$status=short_reg$status
    out$obj=short_reg$objval
    #print(paste(out$status,out$obj))
    if(is.null(out$obj)){
      print(paste(out$status,out$obj)) 
      return(out)}
    else if(out$status=="TIME_LIMIT"|out$obj!=0){
      print(paste(out$status,out$obj))
      return(out)}
    
    ####build statistic
    
    if (is.null(B)) {
      B <-
        Phi_CH[, j, drop = FALSE]
    } #could use D_j_orth, could also proj D on Z and X (C-H 2006 p499)
    
    a_hat = short_reg$a
    b_hat = a_hat - (1 - tau)
    S_n = n ^ {
      -1 / 2
    } * t(B) %*% b_hat #S_n = n^{-1/2}*t(B)%*%b_hat
    #M_n =  t(B)%*%B/n
    
    #Q_n = t(S_n)%*%solve(M_n)%*%S_n / (tau*(1-tau))
    XD = cbind(X, D[, -j, drop = FALSE])
    XPhi = cbind(X, Phi)
    
    if (homoskedastic) {
      B_orth = (diag(n) - XPhi %*% solve(t(XD) %*% XPhi) %*% t(XD)) %*% B #make sure is correct
    }
    if (homoskedastic == F) {
      if (dim(D[, -j, drop = FALSE])[2] == 0) {
        res = Y_tilde - X %*% short_reg$B_X
      } else{
        res = Y_tilde - D[, -j, drop = FALSE] %*% short_reg$B_D - X %*% short_reg$B_X
      }
      #Hall and Sheather bandwidth
      hs <-
        n ^ (-1 / 3) * qnorm(1 - 0.5 * alpha) ^ (2 / 3) * (1.5 * dnorm(qnorm(tau)) ^
                                                             2 / (1 + 2 * qnorm(tau) ^ 2)) ^ (1 / 3)
      
      if (kernel == "Powell") {
        bdwt <- (qnorm(tau + hs) - qnorm(tau - hs)) *
            min(sd(res), (quantile(res, .75) - quantile(res, .25)) / 1.34)
        XDB = 0
        for (i in 1:n) {
          XDB <-
            XDB + 1 / (n * 2 * bdwt) * as.numeric(abs(res[i]) < bdwt) * t(t(XD[i, ])) %*%
            B[i]
        }
        XDXPhi = 0
        for (i in 1:n) {
          XDXPhi <-
            XDXPhi + 1 / (n * 2 * bdwt) * as.numeric(abs(res[i]) < bdwt) * t(t(XD[i, ])) %*%
            XPhi[i, ]
        }
        B_orth = (B - XPhi %*% solve(XDXPhi) %*% XDB)
      } else if (kernel == "Gaussian") {
        #if kernel
        bdwt <- hs
        XDB = 0
        for (i in 1:n) {
          XDB <-
            XDB + 1 / (n * bdwt) * dnorm(res[i] / bdwt) * t(t(XD[i, ])) %*% B[i]
        }
        XDXPhi = 0
        for (i in 1:n) {
          XDXPhi <-
            XDXPhi + 1 / (n * bdwt) * dnorm(res[i] / bdwt) * t(t(XD[i, ])) %*% XPhi[i, ]
        }
        B_orth = (B - XPhi %*% solve(XDXPhi) %*% XDB)
        #### XDXPhi can be singular ####
      }#if kernel
    }#if homoskedastic=F
    
    
    L_n = S_n / sqrt(tau * (1 - tau) * t(B_orth) %*% B_orth / n) #wrong - proj involves Z,X,D
    
    
    ####get p-value
    p_val = 2 * (1 - pnorm(abs(L_n)))
    
    out$p_val = p_val
    out$L_n = L_n
    print(paste(out$status,out$obj,out$p_val,out$L_n))
    return(out)
    
  }
#' Return the results of inverse quantile regression without the \eqn{j}th exogenous variable
#'
#' @param j
#' @param Beta_j_null
#' @param B
#' @param Phi
#' @param alpha
#' @param Y
#' @param D data for endogenous variable D
#' @param X data for exogenous variable X
#' @param Z data for instrumental variable Z
#' @param tau tau in quantile regression
#' @param O_pos index for residuals whose sign are fixed as positive
#' @param O_neg index for residuals whose sign are fixed as negative
#' @param eps a small number
#' @param M a large number
#' @param TimeLimit gurobi parameter
#' @param homoskedastic
#' @param kernel
#' @param lpsolver "gurobi","cplexapi","lpsolveapi"
#' @return A list containing the result of short regression, p value and corresponding L_n
#' @examples
#' n = 100
#' pD = 3
#' angrist_data = angrist(n, pD)
#' Y = angrist_data$Y
#' X = matrix(1, n, 1)
#' Z = angrist_data$Z
#' D = angrist_data$D
#' homoskedastic = FALSE 
#' kernel = "Powell" 
#' XZ = as.matrix(cbind(X, Z))
#' PI_XZ = XZ %*% solve(t(XZ) %*% XZ) %*% t(XZ)
#' Phi_CH = PI_XZ %*% D
#' J = 1
#' test_stat_freq_X(
#'  j = J,
#'  Beta_j_null = 0.1,
#'  B = NULL,
#'  Phi = Phi_CH[, -J, drop = FALSE],
#'  Y = Y,
#'  D = D,
#'  X = X,
#'  Z = Z,
#'  tau = 0.5,
#'  homoskedastic = homoskedastic,
#'  kernel = kernel,
#'  lpsolver = "gurobi"
#'  )
#' @description This function is an internal function called inside function "ivqr_fit()"

test_stat_freq_X <-
  #has to have the same param list as test_stat_freq_X in order to be passed into ivqr_confint
  function(j,
           Beta_j_null,
           B = NULL,
           Phi = NULL,
           alpha = 0.1,
           Y,
           D,
           X,
           Z,
           tau = 0.5,
           O_pos = NULL,
           O_neg = NULL,
           eps = 1e-14,
           M = 10,
           TimeLimit = 300,
           homoskedastic = FALSE,
           kernel = "Powell",
           lpsolver = lpsolver) {
    out=list()
    n = length(Y)
    
    S_n = NA
    n = dim(Z)[1]
    p_Z = dim(Z)[2]
    
    Y_tilde = Y - X[, j, drop = FALSE] * c(Beta_j_null) #20200120 changed
    #print(paste0("Y_tilde",Y_tilde))
    
    XZ = cbind(X, Z)
    PI_XZ = XZ %*% solve(t(XZ) %*% XZ) %*% t(XZ)  #PI = X%*%solve(t(X)%*%X)%*%t(X)
    Phi_CH = PI_XZ %*% D #phi of Chernozhukov Hansen
    
    if (is.null(Phi)) {
      Phi <-
        Phi_CH #20200120 changed
      if (dim(Phi_CH)[2] == 2) {
        Phi <- as.matrix(Phi)
      }
    }
    if (is.null(dim(Phi)) &
        dim(as.matrix(Phi))[2] == 1) {
      Phi <- as.matrix(Phi)
    }
    
    short_reg = iqr_milp(
      Y = Y_tilde,
      D = D,
      X = X[, -j, drop = FALSE],
      #20200120 changed, X passed into iqr_milp can be a matrix with no col dim
      Z = Phi,
      tau = tau,
      O_pos = O_pos,
      O_neg = O_neg,
      eps = 1e-14,
      M = M + max(abs(X[, j, drop = FALSE] * c(Beta_j_null))),
      TimeLimit = 300,
      FeasibilityTol = 1e-6,
      lpsolver = lpsolver
    ) #for succinct version from "code for roger koenker"
    
    out$status=short_reg$status
    out$obj=short_reg$objval
    if(is.null(out$obj)){return(out)}
    else if(is.null(out$obj)|out$status=="TIME_LIMIT"|out$obj!=0) {return(out)}
    
    ####build statistic
    
    X_j <- X[, j, drop = FALSE] #20200120 changed
    
    a_hat = short_reg$a
    b_hat = a_hat - (1 - tau)
    S_n = n ^ {
      -1 / 2
    } * t(X_j) %*% b_hat #20200120 changed
    
    
    #20200120 changed
    if (homoskedastic) {
      L_n = S_n / sqrt(tau * (1 - tau) * t(X_j) %*% X_j / n)
      print(paste0("S_n is", S_n))
      print(paste0("L_n is", L_n))
      print("----")
      #the scaling in the codes are correct, while that in the paper is not
    } else{
      if (dim(X[, -j, drop = FALSE])[2] == 0) {
        #in case when X consists of one variable, so after dropped X consists of 0 variables
        #a matrix containing 0 cols multiply by 0 is still a matrix containing 0 cols
        #and will raise non-conformable arrays when deducted from a matrix of the same row but with cols
        res = Y_tilde - D %*% short_reg$B_D
      } else{
        res = Y_tilde - D %*% short_reg$B_D - X[, -j, drop = FALSE] %*% short_reg$B_X
      }
      
      #Hall and Sheather bandwidth
      hs <-
        n ^ (-1 / 3) * qnorm(1 - 0.5 * alpha) ^ (2 / 3) * (1.5 * dnorm(qnorm(tau)) ^
                                                             2 / (1 + 2 * qnorm(tau) ^ 2)) ^ (1 / 3)
      
      if (kernel == "Powell") {
        bdwt <- (qnorm(tau + hs) - qnorm(tau - hs)) *
            min(sd(res), (quantile(res, .75) - quantile(res, .25)) / 1.34)
        XX = 0
        for (i in 1:n) {
          XX <-
            XX + 1 / (2 * bdwt) * as.numeric(abs(res[i]) < bdwt) * X_j[i] ^ 2
        }
        
        L_n = S_n / sqrt(tau * (1 - tau) * XX / n)
        print(paste0("S_n is", S_n))
        print(paste0("L_n is", L_n))
        print("----")
      } else if (kernel == "Gaussian") {
        bdwt <- hs
        XX = 0
        for (i in 1:n) {
          XX <- XX + 1 / (bdwt) * dnorm(res[i] / bdwt) * X_j[i] ^ 2
        }
        L_n = S_n / sqrt(tau * (1 - tau) * XX / n)
      }
    }
    #20200120 changed
    
    ####get p-value
    p_val = 2 * (1 - pnorm(abs(L_n)))
    
    out$p_val = p_val
    out$L_n = L_n
    
    return(out)
    
  }
ChenyueLiu/ivqr documentation built on Aug. 9, 2020, 7:49 p.m.