R/SamplingUncertainty.R

#' Get moments for sampling reduced-form parameters
#'
#' @param y_name
#' @param T_name
#' @param z_name
#' @param controls
#' @param data
#'
#' @return
#' @export
#'
#' @examples
getMomentsForSampler <- function(y_name, T_name, z_name, controls = NULL, data){
  # Extract data for named columns
  y <- get(y_name, data)
  Tobs <- get(T_name, data)
  z <- get(z_name, data)
  n <- length(y)

  ybar <- c(mean(y[Tobs == 0 & z == 0]),
            mean(y[Tobs == 0 & z == 1]),
            mean(y[Tobs == 1 & z == 0]),
            mean(y[Tobs == 1 & z == 1]))
  ynames <- c("yb00", "yb01", "yb10", "yb11")
  names(ybar) <- ynames

  s2y <- c(var(y[Tobs == 0 & z == 0]),
           var(y[Tobs == 0 & z == 1]),
           var(y[Tobs == 1 & z == 0]),
           var(y[Tobs == 1 & z == 1]))

  ny <- c(sum(Tobs == 0 & z == 0),
          sum(Tobs == 0 & z == 1),
          sum(Tobs == 1 & z == 0),
          sum(Tobs == 1 & z == 1))

  Syy <- diag(n * s2y / ny)
  rownames(Syy) <- colnames(Syy) <- ynames
  out_var <- Syy / n
  out_mean <- ybar

  if(!is.null(controls)){

    xiRegFormula <- y ~ Tobs + z + Tobs:z
    omega <- lm(xiRegFormula)$residuals
    Q <- matrix(c(1, 0, 0, 0,
                  1, 0, 1, 0,
                  1, 1, 0, 0,
                  1, 1, 1, 1), 4, 4, byrow = TRUE)
    A <- model.matrix(xiRegFormula)
    Sigma_AA_inv <- solve(crossprod(A) / n)

    second_stage <- reformulate(c(T_name, controls), response = y_name)
    first_stage <- reformulate(c(z_name, controls))
    bReg <- AER::ivreg(second_stage, first_stage, data)
    b_iv <- coefficients(bReg)[-1] # Don't need the constant
    names(b_iv)[1] <- "Treatment" # The IV estimate of the treatment effect
    rho <- residuals(bReg)
    R <- model.matrix(first_stage, data)
    W <- model.matrix(second_stage, data)
    Sigma_RW_inv <- solve(crossprod(R, W) / n)

    Xi_rr <- t(R) %*% diag(rho^2) %*%  R / (n - 1)
    Xi_rw <- t(R) %*% diag(rho * omega) %*% A / (n - 1)

    H_yy <- Syy
    H_bb <- Sigma_RW_inv %*% Xi_rr %*% t(Sigma_RW_inv)
    H_by <- Sigma_RW_inv %*% Xi_rw %*% t(Q %*% Sigma_AA_inv)
    H <- rbind(cbind(H_bb, H_by),
               cbind(t(H_by), H_yy))
    S <- H[-1, -1]

    out_mean <- c(b_iv, ybar)
    out_var <- S / n
  }
  return(list(mean = out_mean, var = out_var))
}


#' Make draws of reduced form parameters
#'
#' @param y_name
#' @param T_name
#' @param z_name
#' @param controls
#' @param data
#' @param nDraws
#'
#' @return Dataframe, each row of which is one draw for the reduced form parameters with column names that match the names of the list generated by the function \code{getObs}
#' @export
#'
#' @examples
drawObs <- function(y_name, T_name, z_name, controls = NULL, data,
                    nDraws = 1000){

  # Extract data for named columns
  y <- get(y_name, data)
  Tobs <- get(T_name, data)
  z <- get(z_name, data)

  # Quantities for which we do not propagate sampling uncertainty
  n <- length(Tobs)
  p <- mean(Tobs)
  q <- mean(z)
  p0 <- mean(Tobs[z == 0])
  p1 <- mean(Tobs[z == 1])
  p00 <- mean(Tobs == 0 & z == 0)
  p01 <- mean(Tobs == 0 & z == 1)
  p10 <- mean(Tobs == 1 & z == 0)
  p11 <- mean(Tobs == 1 & z == 1)
  s2_00 <- var(y[Tobs == 0 & z == 0])
  s2_01 <- var(y[Tobs == 0 & z == 1])
  s2_10 <- var(y[Tobs == 1 & z == 0])
  s2_11 <- var(y[Tobs == 1 & z == 1])

  # Draw reduced form parameters
  moments <- getMomentsForSampler(y_name, T_name, z_name, controls, data)
  RFdraws <- MASS::mvrnorm(nDraws, moments$mean, moments$var)

  yb00 <- RFdraws[, colnames(RFdraws) == "yb00"]
  yb01 <- RFdraws[, colnames(RFdraws) == "yb01"]
  yb10 <- RFdraws[, colnames(RFdraws) == "yb10"]
  yb11 <- RFdraws[, colnames(RFdraws) == "yb11"]

  # These are now vectors since yb00 etc are vectors
  yt00 <- (1 - p0) * yb00
  yt01 <- (1 - p1) * yb01
  yt10 <- p0 * yb10
  yt11 <- p1 * yb11


  if(!is.null(controls)){

    gamma_iv <- RFdraws[,-which(colnames(RFdraws) %in%
                                c("Treatment", "yb00", "yb01", "yb10", "yb11"))]
    beta_iv <- RFdraws[, colnames(RFdraws) == "Treatment"]

    # No intercept since we "project it out" by working with Cov matrix below
    x <- model.matrix(reformulate(controls, intercept = FALSE), data)
    Sigma <- rbind(cbind(cov(z, Tobs), cov(z, x)),
                       cbind(cov(x, Tobs), cov(x)))
    Sigma_inv <- solve(Sigma)
    s_zT_upper <- Sigma_inv[1,1] # We'll need this to back out the implied beta
    s_xT_upper <- matrix(Sigma_inv[-1,1], ncol(x), 1)

    # These don't vary across draws
    C2 <- drop(cov(z, x) %*% s_xT_upper)
    C4 <- drop(var(z)*cov(Tobs, x) %*% s_xT_upper)

    # These will be vectors: they vary across draws
    C1 <- drop(cov(z, x) %*% t(gamma_iv) / var(z))
    C3 <- drop(cov(Tobs, x) %*% t(gamma_iv))

  }else{
    yb1 <- (1 - p1) * yb01 + p1 * yb11
    yb0 <- (1 - p0) * yb00 + p0 * yb10
    beta_iv <- (yb1 - yb0) / (p1 - p0)

    s_zT_upper <- 1 / cov(z, Tobs) # Used to compute beta, Doesn't vary across draws
    C2 <- C4 <- 0 # Don't vary across draws
    C1 <- C3 <- rep(0, nDraws) # Varies across draws
  }

  # Pack things up and return
  obsDraws <- data.frame(n = rep(n, nDraws),
                         p = rep(p, nDraws),
                         q = rep(q, nDraws),
                         p0 = rep(p0, nDraws),
                         p1 = rep(p1, nDraws),
                         p00 = rep(p00, nDraws),
                         p01 = rep(p01, nDraws),
                         p10 = rep(p10, nDraws),
                         p11 = rep(p11, nDraws),
                         yb00 = yb00,
                         yb01 = yb01,
                         yb10 = yb10,
                         yb11 = yb11,
                         yt00 = yt00,
                         yt01 = yt01,
                         yt10 = yt10,
                         yt11 = yt11,
                         s2_00 = rep(s2_00, nDraws),
                         s2_01 = rep(s2_01, nDraws),
                         s2_10 = rep(s2_10, nDraws),
                         s2_11 = rep(s2_11, nDraws),
                         s_zT_upper = rep(s_zT_upper, nDraws),
                         C1 = C1,
                         C2 = rep(C2, nDraws),
                         C3 = C3,
                         C4 = rep(C4, nDraws),
                         beta_iv = beta_iv)
  return(obsDraws)
}


#' Make draws for dz_tilde
#'
#' @param y_name
#' @param T_name
#' @param z_name
#' @param controls
#' @param data
#' @param dTstar_tilde_range
#' @param a0bounds
#' @param a1bounds
#' @param IJ
#' @param nRF
#' @param nIS
#' @param PequalPstar
#'
#' @return
#' @export
#'
#' @examples
#' afghanControls <- c("headchild", "age",  "yrsvill",  "farsi",  "tajik",
#'                    "farmers",  "agehead",  "educhead",  "nhh",  "land",
#'                    "sheep", "distschool", "chagcharan")

#' foo <- draw_dz_tilde(y_name = "testscore", T_name = "enrolled",
#'                      z_name = "buildschool", controls = afghanControls,
#'                      data = afghan, dTstar_tilde_range = c(0, 1))
draw_dz_tilde <- function(y_name, T_name, z_name, controls = NULL, data,
                          dTstar_tilde_range, a0bound = NULL, a1bound = NULL,
                          nRF = 500, nIS = 500,
                          option = NULL){

  RFdraws <- drawObs(y_name, T_name, z_name, controls, data, nDraws = nRF)
  alpha_bounds <- t(sapply(1:nrow(RFdraws),
                           function(i) get_alpha_bounds(RFdraws[i,])))
  if(!is.null(a0bound)){
    if((a0bound <= 1) & (a0bound >= 0)){
      alpha_bounds[, 1] <- pmin(alpha_bounds[, 1], a0bound)
    }
  }
  if(!is.null(a1bound)){
    if((a1bound <= 1) & (a1bound >= 0)){
      alpha_bounds[, 2] <- pmin(alpha_bounds[, 2], a1bound)
    }
  }
  RFdraws <- cbind(RFdraws, alpha_bounds)

  emptySlice <- data.frame(matrix(NA_real_, nIS, 5))
  names(emptySlice) <- c("a0", "a1", "dTstar_tilde", "dz_tilde", "beta")
  ISdraws <- vector("list", nRF)

  # Loop over reduced form draws
  for(i in 1:nrow(RFdraws)){

    tempSlice <- emptySlice
    obs <- as.list(RFdraws[i,])
    dTstar_tilde <- runif(nIS, dTstar_tilde_range[1], dTstar_tilde_range[2])

    if(!is.null(option)){
      # Option that constrains a0 and a1 so that pstar equals p
      if(option == "PequalPstar"){
        slope <- with(obs, (1 - p) / p)
        if(slope * obs$a0upper <= obs$a1upper){
          a0 <- runif(nIS, 0, obs$a0upper)
        }else{
          a0 <- runif(nIS, 0, obs$a1upper / slope)
        }
        a1 <- slope * a0
      # Option that constrains a0 = a1
      }else if(option == "A0equalA1"){
        if(obs$a0upper <= obs$a1upper){
          a0 <- runif(nIS, 0, obs$a0upper)
          a1 <- a0
        }else{
          a1 <- runif(nIS, 0, obs$a1upper)
          a0 <- a1
        }
      }else{
        stop("Invalid option specified.")
      }
    }else{
      a0 <- runif(nIS, 0, obs$a0upper)
      a1 <- runif(nIS, 0, obs$a1upper)
    }

    dz_tilde <- get_dz_tilde(dTstar_tilde, a0, a1, obs)

    tempSlice$a0 <- a0
    tempSlice$a1 <- a1
    tempSlice$dTstar_tilde <- dTstar_tilde
    tempSlice$dz_tilde <- dz_tilde

    BBS <- (1 - tempSlice$a0 - tempSlice$a1)
    s_ze <- obs$q * (1 - obs$q) * tempSlice$dz_tilde
    tempSlice$beta <- BBS * (obs$beta_iv - s_ze * obs$s_zT_upper)
    ISdraws[[i]] <- tempSlice
  }
  return(list(IS = ISdraws, RF = RFdraws))
}

#' Make draws for dTstar_tilde under the assumption that dz_tilde = 0
#'
#' @param y_name
#' @param T_name
#' @param z_name
#' @param controls
#' @param data
#' @param a0bound
#' @param a1bound
#' @param nRF
#' @param nIS
#'
#' @return
#' @export
#'
#' @examples
#' afghanControls <- c("headchild", "age",  "yrsvill",  "farsi",  "tajik",
#'                    "farmers",  "agehead",  "educhead",  "nhh",  "land",
#'                    "sheep", "distschool", "chagcharan")
#' foo <- draw_dTstar_tilde_valid(y_name = "testscore", T_name = "enrolled",
#'                                z_name = "buildschool",
#'                                controls = afghanControls, data = afghan)
draw_dTstar_tilde_valid <- function(y_name, T_name, z_name, controls = NULL,
                                    data, a0bound = NULL, a1bound = NULL,
                                    nRF = 500, nIS = 500){

  RFdraws <- drawObs(y_name , T_name, z_name, controls, data , nDraws  = nRF)
  alpha_bounds <- t(sapply(1:nrow(RFdraws), function(i) get_alpha_bounds(RFdraws[i,])))
  if(!is.null(a0bound)){
    if((a0bound <= 1) & (a0bound >= 0)){
      alpha_bounds[, 1] <- pmin(alpha_bounds[, 1], a0bound)
    }
  }
  if(!is.null(a1bound)){
    if((a1bound <= 1) & (a1bound >= 0)){
      alpha_bounds[, 2] <- pmin(alpha_bounds[, 2], a1bound)
    }
  }
  RFdraws <- cbind(RFdraws, alpha_bounds)

  emptySlice <- data.frame(matrix(NA_real_, nIS, 4))
  names(emptySlice) <- c("a0", "a1", "dTstar_tilde", "beta")
  ISdraws <- vector("list", nRF)

  # Loop over reduced form draws
  for(i in 1:nrow(RFdraws)){

    tempSlice <- emptySlice
    obs <- as.list(RFdraws[i,])

    a0 <- runif(nIS, 0, obs$a0upper)
    a1 <- runif(nIS, 0, obs$a1upper)
    dTstar_tilde <- get_dTstar_tilde(0, a0, a1, obs) #Assume valid instrument

    tempSlice$a0 <- a0
    tempSlice$a1 <- a1
    tempSlice$dTstar_tilde <- dTstar_tilde

    BBS <- (1 - tempSlice$a0 - tempSlice$a1)
    tempSlice$beta <- BBS * obs$beta_iv
    ISdraws[[i]] <- tempSlice
  }
  return(list(IS = ISdraws, RF = RFdraws))
}
fditraglia/binivdoctr documentation built on May 16, 2019, 12:10 p.m.