bate: Computes Bias-Adjusted Treatment Effect

Documented in ovbias

#' Compute bias adjusted treatment effect taking parameter vector as input.
#'
#' @param parameters A vector of parameters (real numbers) that is generated by estimating the short, intermediate and auxiliary regressions.
#' @param deltalow The lower limit of delta.
#' @param deltahigh The upper limit of delta.
#' @param Rhigh The upper limit of Rmax.
#' @param e The step size.
#'
#' @return List with three elements:
#' 
#' \item{Data}{Data frame containing the bias ($bias) and bias-adjusted treatment effect ($bstar) for each point on the grid}
#' \item{bias_Distribution}{Quantiles (2.5,5.0,50,95,97.5) of the empirical distribution of bias}
#' \item{bstar_Distribution}{Quantiles (2.5,5.0,50,95,97.5) of the empirical distribution of the bias-adjusted treatment effect}
#' 
#' @export
#'
#' @examples 
#' ## Load data set
#' data("NLSY_IQ")
#'  
#' ## Set age and race as factor variables
#' NLSY_IQ$age <- factor(NLSY_IQ$age)
#' NLSY_IQ$race <- factor(NLSY_IQ$race)
#'    
#' ## Collect parameters from the short, intermediate and auxiliary regressions
#' parameters <- collect_par(
#' data = NLSY_IQ, outcome = "iq_std", 
#' treatment = "BF_months", 
#' control = c("age","sex","income","motherAge","motherEDU","mom_married","race"),
#' other_regressors = c("sex","age"))
#' 
#' ## Set limits for the bounded box
#' Rlow <- parameters$Rtilde
#' Rhigh <- 0.61
#' deltalow <- 0.01
#' deltahigh <- 0.99
#' e <- 0.01
#' 
#' \dontrun{
#' ## Compute bias and bias-adjusted treatment effect
#' OVB <- ovbias(
#' parameters = parameters, 
#' deltalow=deltalow, 
#' deltahigh=deltahigh, Rhigh=Rhigh, 
#' e=e)
#' 
#' ## Default quantiles of bias
#' (OVB$bias_Distribution)
#' 
#' ## Chosen quantilesof bias
#' quantile(OVB$Data$bias, c(0.01,0.05,0.1,0.9,0.95,0.975))
#' 
#' ## Default quantiles of bias-adjusted treatment effect
#' (OVB$bstar_Distribution)
#' 
#' ## Chosen quantiles of bias-adjusted treatment effect
#' quantile(OVB$Data$bstar, c(0.01,0.05,0.1,0.9,0.95,0.975))
#' }
#' 
ovbias <- function(parameters,deltalow,deltahigh,Rhigh,e){
  
  # Check if delta parameters are 1
  if((deltalow==1) | (deltahigh==1)) stop("Values for delta cannot be exactly equal to 1")
  
  # Set Rlow
  Rlow <- parameters$Rtilde
  
  # Create a grid of (delta,Rmax) ranges
  df <- expand.grid(delta = seq(deltalow,deltahigh,by=e),
                    Rmax = seq(Rlow,Rhigh,by=e))
  
  # Check for whether there are any values for where delta = 1
  if(nrow(filter(df,delta==1))>0){
    while(nrow(filter(df,delta==1))>0){
      e <- 0.99*e
      df <- expand.grid(delta = seq(deltalow,deltahigh,by=e),
                        Rmax = seq(Rlow,Rhigh,by=e))
      df.1 <- df %>% filter(delta==1)
    }
    warning(paste0("The grid includes delta = 1. e has been modified to ",e))
  }
  
  # Determine whether each coordinate has one (D=1) or three (D=0) real roots
  df <- df %>% 
    mutate(D = mydisc(parameters,delta,Rmax))
  
  # Case 1: the entire grid exists in URR region 
  # --> Simply use the URR as bias for all points
  if(sum(df$D)==nrow(df)){
    
    message("All points on the grid are in the URR region. Calculating roots...")
    
    #df <- df %>% mutate(bias = pmap(list(mydelta=delta,Rmax=Rmax),mycubic,parameters=parameters),
    #bstar = parameters$betatilde - bias) # Can either use rowwise or pmap. Leaving this here for now, delete later
    output <- df %>% 
      rowwise() %>% # rowwise() is necessary to call mycubic() one row at a time
      mutate(bias = Re(mycubic(parameters,delta,Rmax)[1]),
             bstar = parameters$betatilde - bias) %>%
      select(-"D")
  }
  
  # Cases 2 and 3
  else{
    # Case 2: part of the grid exists in URR region, part in NURR
    # --> Define URR and NURR
    if(sum(df$D)>0){
      
      message(paste0("Some points in the grid are in the NURR region. Calculating roots for points in the URR..."))
      
      # Define epsilon
      epsilon <- e*1.1
      
      # Define URR and calculate bias and bstar for all points in URR region
      urr <- df %>% filter(D==1) %>%
        rowwise() %>% # rowwise() is necessary to call mycubic() one row at a time
        mutate(bias = Re(mycubic(parameters,delta,Rmax)[1])) %>%
        select(-"D") %>%
        as.data.frame()
      
      # Define NURR region
      nurr <- df %>% 
        filter(D==0) %>%
        select(-"D") %>%
        as.data.frame()
      
    }
    
    # Case 3: the entire grid exists in NURR region
    # --> Expand grid outward until the URR is found, then define URR and NURR
    else if(sum(df$D)==0){
      
      message("All points on the grid are in the NURR region. Finding nearest points in the URR region...")
      
      # Initialize extendedRegion
      extendedRegion <- expand_border(parameters,deltalow,deltahigh,Rlow,Rhigh,e)
      
      # Repeatedly expand extendedRegion until a point within the URR has been found
      while(sum(extendedRegion$D)==0){
        extendedRegion <- rbind(
          extendedRegion,
          expand_border(parameters,
                        min(extendedRegion$delta),
                        max(extendedRegion$delta),
                        Rlow,Rhigh,e)
        )
      }
      
      # Remove points on the opposite side of the grid where the URR was found 
      if(filter(extendedRegion,D==1)$delta[1] < deltalow){
        extendedRegion <- filter(extendedRegion, delta < deltalow)
      } else{
        extendedRegion <- filter(extendedRegion, delta > deltahigh)
      }
      
      # Remove points which do not contain the same Rmax value as those in the URR
      extendedRegion <- filter(extendedRegion, Rmax %in% filter(extendedRegion,D==1)$Rmax)
      
      # Define URR and calculate bias and bstar for all points in URR region
      urr <- extendedRegion %>% 
        filter(D==1) %>%
        rowwise() %>% # rowwise() is necessary to call mycubic() one row at a time
        mutate(bias = Re(mycubic(parameters,delta,Rmax)[1])) %>%
        select(-"D") %>%
        as.data.frame()
      
      # Define NURR region
      nurr <- df %>%
        rbind(extendedRegion %>% filter(D==0)) %>%
        select(-"D") %>%
        as.data.frame()
    }
    
    # Define epsilon
    epsilon <- e*1.1
    
    # Number of coordinates in the entire grid
    n <- nrow(urr) + nrow(nurr)
    
    # Initialize output
    output <- urr
    
    message(paste0("\rUsing points in the URR region to select roots in the NURR region..."))
    
    # Run a loop which continuously calls split_nurr() until all points in the nurr
    # have had their roots selected (i.e. when output contains the same number of 
    # coordinates in the entire grid)
    regions.list <- list(get_border(urr,e),nurr)
    while(nrow(output)!=n){
      
      message(paste0("\rSelecting roots for ",
                     nrow(regions.list[[2]]), 
                     " points remaining in the NURR region..."),
              appendLF=FALSE)
      
      # Call split_nurr()
      regions.list <- split_nurr(regions.list[[1]],regions.list[[2]],epsilon,parameters,e)
      
      if(is.null(regions.list[[1]])){
        regions.list <- split_nurr(output,regions.list[[2]],epsilon,parameters,e)
      }
      
      # Combine output with the first element returned from split_nurr()
      output <- output %>% 
        rbind(regions.list[[1]])
    }
    
    message("")
    
    # For case 3, only return points within the bounded box defined by the user
    if(n != nrow(df)){
      output <- filter(output, delta >= deltalow, delta <= deltahigh)
    }
    
    # Calculate bstar from the bias
    output <- as.data.frame(output) %>% 
      mutate(bstar = parameters$betatilde - bias)
  }
  
  # For all three cases, return a list of the data and the quantiles of bias and bstar
  output.list <- list("Data" = output,
                      "bias_Distribution" = c(
                        round(
                          quantile(output$bias,
                                   probs = c(0.025,0.05,0.5,0.95,0.975)),
                          digits = 3)
                      ),
                      "bstar_Distribution" = c(
                        round(
                          quantile(output$bstar,
                                   probs = c(0.025,0.05,0.5,0.95,0.975)),
                          digits = 3)
                      ))
  
  return(output.list)
}