R/preprocess_ivqr_confint.R

Defines functions preprocess_ivqr_confint

Documented in preprocess_ivqr_confint

#' Compute the confidence interval for \eqn{\Betaj}
#'
#' @param test_stat
#' @param j
#' @param B
#' @param Phi
#' @param Beta_j_hat
#' @param B_tol
#' @param Srate
#' @param alpha
#' @param Y
#' @param D data for endogenous variable D
#' @param X data for exogenous variable X
#' @param Z data for instrumental variable Z
#' @param tau tau in quantile regression
#' @param width_ratio
#' @param M a large number
#' @param homoskedastic
#' @param kernel
#' @param prop_fixed_start
#' @param r
#' @param lpsolver "gurobi","cplexapi","lpsolveapi"
#' @return the estimates, upper bound and lower bound
#' @examples 
#' n=50
#' pD=3
#' sample_data<-chen_lee(n,pD)
#' Y=sample_data$Y
#' D=sample_data$D
#' Z=sample_data$Z
#' X = matrix(1, n, 1)
#' XZ = cbind(X, Z)
#' PI_XZ = XZ %*% solve(t(XZ) %*% XZ) %*% t(XZ)
#' Phi_CH = PI_XZ %*% D
#' lpsolver="gurobi"
#' tau = 0.5
#' iqr_milp_fit = iqr_milp(Y, D, X, Z, tau = tau, lpsolver = lpsolver)
#' B_D_hat = iqr_milp_fit$B_D
#' J=1
#' preprocess_ivqr_confint(
#' test_stat = test_stat_freq_D,
#' j = J,
#' B = Phi_CH[, J, drop = FALSE],
#' Phi = Phi_CH[, -J, drop = FALSE],
#' Beta_j_hat = B_D_hat[J],
#' Srate = 1 / 2,
#' alpha = 0.1,
#' Y=Y,
#' D=D,
#' X=X,
#' Z=Z,
#' tau = tau,
#' M = 10,
#' homoskedastic = TRUE,
#' kernel = "Gaussian",
#' prop_fixed_start = 0.7,
#' r=1.1,
#' lpsolver = lpsolver
#' )
preprocess_ivqr_confint <-
  function(test_stat = test_stat_freq_D,
           j = 1,
           B = NULL,
           Phi = NULL,
           Beta_j_hat = 1,
           Srate = 1 / 2,
           alpha = 0.1,
           Y,
           D,
           X,
           Z,
           tau = 0.5,
           width_ratio = 1,
           M = 10,
           homoskedastic = FALSE,
           kernel = "Powell",
           prop_fixed_start = 0.8,
           r=1.1,
           lpsolver = NULL) {
    start_time<-Sys.time()
    #get order of magnitude of confidence region
    if (identical(test_stat, test_stat_freq_D)) {
      #added 20200120
      bounds = summary(quantreg::rq(Y ~ D + X - 1))$coef[j, 2:3, drop = FALSE]
    } else if (identical(test_stat, test_stat_freq_X)) {
      #added 20200120
      bounds = summary(quantreg::rq(Y ~ D + X - 1))$coef[j + dim(D)[2], 2:3, drop = FALSE]
    }
    
    step_width = width_ratio * (bounds[2] - bounds[1])
    B_tol = log10_ceiling(step_width * 0.01)
    
    O_pos = NULL
    O_neg = NULL   
    p_D = dim(D)[2] ; if(is.null(p_D)){p_D<-0}
    n = dim(X)[1]
    XX = cbind(D[, -j, drop = FALSE], X)
    Y_tilde = Y - D[, j, drop = FALSE] * c(Beta_j_hat)
    qr = quantreg::rq(Y_tilde~XX-1)
    fit = qr$fitted.values
    start_band_width = quantile(abs(qr$res), prop_fixed_start)
    status = "TIME_LIMIT"
    obj     = 0.5
    band_width = start_band_width
    while(((status=="TIME_LIMIT")|(obj!=0))){ 
      #print(paste("1 lenOf O_pos ",length(O_pos),"; lenOf O_neg ",length(O_neg)))
      lb = fit - band_width
      ub = fit + band_width
      O_pos = which(Y_tilde > ub)
      O_neg = which(Y_tilde < lb)
      O = c(O_pos, O_neg)
      if(is.null(O)==F){O = sort(O)}
      I = setdiff(1:n, O)
      n_I = length(I)
      TT  = exp(n_I/200 + p_D/5 + n_I*p_D/1000)*4 #heuristic for time limit
      if(length(O)==0){TT=Inf}
      
      #check that starting null value is not rejected
      test_stat_start <-
        test_stat(
          j=j,
          Beta_j_null = Beta_j_hat,
          B=B ,
          Phi=Phi,
          alpha=alpha,
          Y=Y,
          D=D,
          X=X,
          Z=Z,
          tau=tau,
          O_pos = O_pos,
          O_neg = O_neg,
          eps = 1e-14,
          M=M ,
          TimeLimit = TT,
          homoskedastic=homoskedastic,
          kernel=kernel,
          lpsolver = lpsolver
        )
      status = test_stat_start$status
      obj   = test_stat_start$obj
      if(is.null(obj)){
        obj = 0.5
      }
      band_width = band_width*r
    }#end of while loop for preprocessing
    
    
    #### starting null value rejected ####
    if (test_stat_start$p_val < alpha) {
      stop("starting null value rejected")
    }
    start_O_pos=O_pos
    start_O_neg=O_neg
    start_band_width=band_width #update start_band_width which is previously determined by prop_fixed_start
    
    
    ####get upper bound
    Beta_j <- Beta_j_hat #start at point estimate
    S <- step_width / 2
    Sign <- +1
    O_pos=start_O_pos
    O_neg=start_O_neg
    band_width=start_band_width
    #O_pos ,O_neg, band_width reused for different Beta_j? 

    p_val = NULL
    T_n = NULL  #add code that takes x from full problem and creates starting solution for the short problem
    #print(paste0("Beta_j",Beta_j," width",width))
    
    #### S>B_tol ####
    while (S > B_tol) {
      old_Beta_j = Beta_j
      old_p_val = p_val
      old_T_n = T_n
      Beta_j <- Beta_j + Sign * S
      #print(paste("2 lenOf O_pos ",length(O_pos),"; lenOf O_neg ",length(O_neg)))
      status = "TIME_LIMIT"
      obj     = 0.5
      while(((status=="TIME_LIMIT")|(obj!=0))){
        #print("enter")
        O = c(O_pos, O_neg)
        if(is.null(O)==F){O = sort(O)}
        I = setdiff(1:n, O)
        n_I = length(I)
        TT  = exp(n_I/200 + p_D/5 + n_I*p_D/1000)*4 #heuristic for time limit
        if(length(O)==0){TT=Inf}
        #check that starting null value is not rejected
        test_stat_out <-
          test_stat(
            j=j,
            Beta_j_null = Beta_j,
            B=B ,
            Phi=Phi,
            alpha=alpha,
            Y=Y,
            D=D,
            X=X,
            Z=Z,
            tau=tau,
            O_pos = O_pos,
            O_neg = O_neg,
            eps = 1e-14,
            M=M ,
            TimeLimit = TT,
            homoskedastic=homoskedastic,
            kernel=kernel,
            lpsolver = lpsolver
          )
        status = test_stat_out$status
        obj     = test_stat_out$obj
        #print(paste("status ",status," obj ",obj))
        if(is.null(obj)){
          obj = 0.5
        }
        band_width = band_width*r
        lb = fit - band_width
        ub = fit + band_width
        O_pos = which(Y_tilde > ub)
        O_neg = which(Y_tilde < lb)
      }#end of while loop for preprocessing
      
      p_val = test_stat_out$p_val
      #print(paste0("old Beta j",old_Beta_j," Beta_j",Beta_j," p value",p_val))
      old_Sign = Sign
      Sign <- sign(p_val - alpha)
      if (old_Sign != Sign) {
        S <- S * Srate
      }
    }
    
    
    ####interpolate####
    pair_p_val = c(old_p_val, p_val)
    order_ppv = order(pair_p_val)
    pi = (alpha - pair_p_val[order_ppv[1]]) / (pair_p_val[order_ppv[2]] - pair_p_val[order_ppv[1]])
    pair_Beta_j = c(old_Beta_j, Beta_j)
    Beta_ub_int = (1 - pi) * pair_Beta_j[order_ppv[1]] + pi * pair_Beta_j[order_ppv[2]]
    
    
    print("-----lower bound-------")
    ####get lower bound
    Beta_j <- Beta_j_hat #start at point estimate
    S <- step_width / 2
    Sign <- -1
    O_pos=start_O_pos
    O_neg=start_O_neg
    band_width=start_band_width
    p_val = NULL
    T_n = NULL
    
    #### S>B_tol ####
    while (S > B_tol) {
      old_Beta_j = Beta_j
      old_p_val = p_val
      old_T_n = T_n
      Beta_j <- Beta_j + Sign * S
      status = "TIME_LIMIT"
      obj     = 0.5
      #print(paste("3 lenOf O_pos ",length(O_pos),"; lenOf O_neg ",length(O_neg)))
      while(((status=="TIME_LIMIT")|(obj!=0))){
        #print("enter")
        O = c(O_pos, O_neg)
        if(is.null(O)==F){O = sort(O)}
        I = setdiff(1:n, O)
        n_I = length(I)
        TT  = exp(n_I/200 + p_D/5 + n_I*p_D/1000)*4 #heuristic for time limit
        if(length(O)==0){TT=Inf}
        #check that starting null value is not rejected
        test_stat_out <-
          test_stat(
            j=j,
            Beta_j_null = Beta_j,
            B=B ,
            Phi=Phi,
            alpha=alpha,
            Y=Y,
            D=D,
            X=X,
            Z=Z,
            tau=tau,
            O_pos = O_pos,
            O_neg = O_neg,
            eps = 1e-14,
            M=M ,
            TimeLimit = TT,
            homoskedastic=homoskedastic,
            kernel=kernel,
            lpsolver = lpsolver
          )
        status = test_stat_out$status
        obj     = test_stat_out$obj
        #print(paste("status ",status," obj ",obj))
        if(is.null(obj)){
          obj = 0.5
        }
        band_width = band_width*r
        lb = fit - band_width
        ub = fit + band_width
        O_pos = which(Y_tilde > ub)
        O_neg = which(Y_tilde < lb)
      }#end of while loop for preprocessing
      
      p_val = test_stat_out$p_val
      old_Sign = Sign
      Sign <- -sign(p_val - alpha)
      if (old_Sign != Sign) {
        S <- S * Srate
      }
    }
    
    ####interpolate####
    pair_p_val = c(old_p_val, p_val)
    order_ppv = order(pair_p_val)
    pi = (alpha - pair_p_val[order_ppv[1]]) / (pair_p_val[order_ppv[2]] - pair_p_val[order_ppv[1]])
    pair_Beta_j = c(old_Beta_j, Beta_j)
    Beta_lb_int = (1 - pi) * pair_Beta_j[order_ppv[1]] + pi * pair_Beta_j[order_ppv[2]]
    
    end_time<-Sys.time()
    return(c(Beta_j_hat, Beta_lb_int, Beta_ub_int))
  }
ChenyueLiu/ivqr documentation built on Aug. 9, 2020, 7:49 p.m.