R/cfsens_pac.R

Defines functions predict.itepac cfsens_pac

Documented in cfsens_pac predict.itepac

#' Sensitivity analysis of individual treatment effects with PAC-type guarantees
#'
#' \code{cfsens_pac} conducts sensitivity analysis of individual treatment effects
#'  with PAC-type guarantee.
#' It currently supports testing the sharp null and the directional nulls.
#'
#' @details When \code{null_type = "sharp"}, the null hypothesis is H_0: Y(1) - Y(0) = 0.
#' When \code{null_type = "negative"}, the null hypothesis is H_0: Y(1) - Y(0) <= 0.
#' When \code{null_type = "positive"}, the null hypothesis is H_0: Y(1) - Y(0) >= 0.
#'
#'
#' @param X covariates.
#' @param Y the observed outcome vector.
#' @param T the vector of treatment assignments.
#' @param alpha the target confidence level.
#' @param delta the confidence level over the randomness over the calibration set.
#' @param null_type the null to be tested that takes value in \{"sharp", "negative", "positive"\}. See Details. 
#' @param score_type the type of nonconformity scores. The default is "cqr".
#' @param ps_fun a function that models the treatment assignment mechanism. The default is "regression_forest".
#' @param ps a vector of propensity score. The default is \code{NULL}. 
#' @param pred_fun a function that models the potential outcome conditional on the covariates. The default is "quantile_forest".
#' @param train_prop proportion of units used for training. The default is 75\%. 
#' @param train_id The index of the units used for training. The default is \code{NULL}.
#'
#' @return an \code{itepac} object.
#'
#' @seealso 
#' \code{\link{cfsens_mgn}}
#'
#' @examples
#' \donttest{
#' ## Data generating model
#'  data_model <- function(n, p, Gamma, seed){
#'  set.seed(seed)
#'  beta <- matrix(c(-0.531,0.126,-0.312,0.018,rep(0,p-4)), nrow=p)
#'  X <- matrix(runif(n * p), n, p)
#'  U <- rnorm(n) * abs(1 + 0.5 * sin(2.5 * X[,1]))
#'  Y1 <-  X %*% beta + U
#'  Y0 <- X %*% beta - U
#'  prop.x <- exp(X %*% beta) / (1 + exp(X %*% beta))
#'  p.x <- 1-(1/(prop.x + (1-prop.x)/Gamma ) -1)/( 1/(prop.x + (1-prop.x)/Gamma) - 1/(prop.x + Gamma*(1-prop.x)))
#'  t.x <- qnorm(1-p.x/2) * abs(1+0.5*sin(2.5*X[,1]))
#'  prop.xu <- (prop.x/(prop.x+Gamma*(1-prop.x)))*(abs(U)<=t.x) + (prop.x/(prop.x+ (1-prop.x)/Gamma))*(abs(U)>t.x)
#'  TT <- rbinom(n, size=1, prob=prop.xu)
#'  Y <- TT * Y1 + (1 - TT) * Y0
#'  
#'  return(list(Y1 = Y1, Y0 = Y0, Y = Y, X = X, T = TT))
#'}
#'
#' ## Generate confounded training data
#'  n <- 1000
#' n_test <- 5
#' p <- 10
#' Gamma <- 1.5
#' data <- data_model(n, p, Gamma, 2021)
#'
#' ## Generate confounded test data
#' test <- data_model(n_test, p, Gamma, 2022)
#' }
#'
#' ## Run sensitivity analysis with PAC-type guarantee
#' ## grf package needs to be installed
#' alpha <- 0.2
#' delta <- 0.1
#' res <- cfsens_pac(data$X, data$Y, data$T, 
#'                   alpha = alpha, delta = delta, null_type = "negative",
#'                   ps_fun = regression_forest, 
#'                   pred_fun = quantile_forest)
#' 
#' out_res <- predict(res, test$X, Y1_test = test$Y1, type = "att")
#' out_res
#' @export

cfsens_pac <- function(X, Y, T, 
                       alpha, delta,
                       null_type = c("sharp", "negative", "positive"),
                       score_type = c("cqr"),
                       ps_fun = regression_forest, 
                       ps = NULL, 
                       pred_fun = quantile_forest, 
                       train_prop = 0.75, train_id = NULL){
  
  ## Process the input
  score_type <- score_type[1]
  stopifnot(score_type %in% c("cqr"))
  null_type <- null_type[1]
  stopifnot(null_type %in% c("sharp", "negative", "positive"))

  ## Split the data into a training fold and a calibration fold
  n <- length(Y)
  if(is.null(train_id)){
    ntrain <- floor(n * train_prop)
    train_id <- sample(1:n, ntrain, replace = FALSE)
  }
  calib_id <- (1:n)[-train_id]

  X_train <- X[train_id,]
  T_train <- T[train_id]
  Y_train <- Y[train_id]
  ids1_train <- which(T_train == 1)

  X_calib <- X[calib_id,]
  T_calib <- T[calib_id]
  Y_calib <- Y[calib_id]
  ids1_calib <- which(T_calib == 1)

  ## Train the model of propensity scores on the training fold
  ps_mdl <- NULL
  if(is.null(ps)){
    ps_mdl <- ps_fun(X_train, T_train, num.threads = 1)
    ps <- predict(ps_mdl, X_calib)
  }

  ps0 <- ps[-ids1_calib,1]
  ps1 <- ps[ids1_calib,1]

  ## Compute P(T=1) and P(T=0) 
  p1 <- mean(T_train == 1)
  p0 <- mean(T_train == 0)

  ## Train the prediction function on the training fold
  pred_mdl0 <- pred_fun(X_train[-ids1_train,], Y_train[-ids1_train], num.threads = 1)
  pred_mdl1 <- pred_fun(X_train[ids1_train,], Y_train[ids1_train], num.threads = 1)

  ## Compute the nonconformity scores on the calibration fold
  if(score_type == "cqr"){

    q_lo0 <- predict(pred_mdl0, X_calib[-ids1_calib,], alpha)
    q_hi0 <- predict(pred_mdl0, X_calib[-ids1_calib,], 1 - alpha)
    q_lo1 <- predict(pred_mdl1, X_calib[ids1_calib,], alpha)
    q_hi1 <- predict(pred_mdl1, X_calib[ids1_calib,], 1 - alpha)
    
    ## H_0: Y(1) - Y(0) = 0
    if(null_type == "sharp"){
      score0 <- pmax(q_lo0 - Y_calib[-ids1_calib], Y_calib[-ids1_calib] - q_hi0)
      score1 <- pmax(q_lo1 - Y_calib[ids1_calib], Y_calib[ids1_calib] - q_hi1)
    }

    ## H0: Y(1) - Y(0) <= 0
    if(null_type == "negative"){
      score0 <-  Y_calib[-ids1_calib] - q_hi0
      score1 <- q_lo1 - Y_calib[ids1_calib]
    }

    ## H0: Y(1) - Y(0) >=0
    if(null_type == "positive"){
      score0 <- q_lo0 - Y_calib[-ids1_calib]
      score1 <- Y_calib[ids1_calib] - q_hi1
    }

  }
  
  ## Attach the results to the output object
  obj <- list(ps_mdl = ps_mdl, 
              pred_mdl0 = pred_mdl0, 
              pred_mdl1 = pred_mdl1, 
              ps0 = ps0, ps1 = ps1,
              score0 = score0,
              score1 = score1,
              alpha = alpha, delta = delta,
              p0 = p0, p1 = p1,
              score_type = score_type, 
              null_type = null_type)

  class(obj) <- "itepac"
  return(obj)

}

#' Prediction method for itepac objects
#'
#' Obtains gamma-values on a test dataset based on 
#' an \code{itepac} object from \code{link{cfsens_pac}}.
#'
#' @details When \code{type = "ate"}, the inference is valid unconditionally;
#' when \code{type = "att"}, the inference is valid conditional on T=1, and 
#' \code{Y1_test} should be provided;
#' when \code{type = "atc"}, the inference is valid conditional on T=0, and
#' \code{Y0_test} should be provided.
#'
#' 
#' @param obj an object of class \code{itepac}.
#' @param X_test testing covariates.
#' @param Y1_test the potential outcome when treated. 
#'                The default is \code{NULL}. See details.
#' @param Y0_test the potential outcome when not treated. 
#'                The default is \code{NULL}. See details.
#' @param type the type of inference target. Takes value in \{"ate", "att", "atc"\}. See details.
#' @param bnd_type the method for constructing lower bound for the CDFs. 
#'                 The default is "wsr".
#' @param Gamma_max the maximum value of Gamma to be considered for sensitivity analysis. The default is 5.
#' @param gamma_length the number of Gamma to be considered for sensitivity analysis. The default is 51.
#'
#' @return a vector of gamma-values.
#'
#' @export

predict.itepac <- function(obj, X_test, Y1_test = NULL, Y0_test = NULL,
                           type = c("ate", "att", "atc"),
                           bnd_type = c("wsr"), 
                           num_grids = 1000, rand_ind = NULL,
                           wsr_seed = 24601, max_gamma = 5, 
                           gamma_length = 51){
  ## Process the input
  type <- type[1]
  stopifnot(type %in% c("ate", "att", "atc"))
  bnd_type <- bnd_type[1]
  stopifnot(bnd_type %in% c("wsr"))

  ps_mdl <- obj$ps_mdl
  pred_mdl0 <- obj$pred_mdl0
  pred_mdl1 <- obj$pred_mdl1
  ps0 <- obj$ps0
  ps1 <- obj$ps1
  score0 <- obj$score0
  score1 <- obj$score1
  alpha <- obj$alpha
  delta <- obj$delta
  p0 <- obj$p0
  p1 <- obj$p1
  score_type <- obj$score_type
  null_type <- obj$null_type

  Gamma_grid <- seq(1, max_gamma, length.out = gamma_length)
  gamma_obj <- list(ps_mdl = ps_mdl, 
                    pred_mdl0 = pred_mdl0,
                    pred_mdl1 = pred_mdl1,
                    ps0 = ps0, ps1 = ps1,
                    score0 = score0, 
                    score1 = score1,
                    Gamma = NULL,
                    alpha = alpha,
                    delta = delta,
                    p0 = p0, p1 = p1,
                    score_type = score_type)  
  class(gamma_obj) <- "cfpac"
  
  ## Generate the predictions
  if(score_type == "cqr"){

    Y_pred_lo0 <- predict(pred_mdl0, X_test, alpha)
    Y_pred_hi0 <- predict(pred_mdl0, X_test, 1-alpha)
    Y_pred_lo1 <- predict(pred_mdl1, X_test, alpha)
    Y_pred_hi1 <- predict(pred_mdl1, X_test, 1-alpha)
    
  }

  ## Compute the likelihood ratio bounds for the calibration fold
  ind_cover <- c()
  if(type == "ate"){
    gamma_obj$alpha <- alpha / 2
    gamma_obj$delta <- delta / 2
  }
  for(Gamma in Gamma_grid){

    gamma_obj$Gamma <- Gamma

    ## Testing H0: Y(1) - Y(0) = 0
    if(null_type == "sharp"){
      gamma_obj$side <- "two"
      res_treated <- predict(gamma_obj, X_test, 
                             estimand = "treated", 
                             type = type,  bnd_type = bnd_type)
      res_control <- predict(gamma_obj, X_test, 
                             estimand = "control",
                             type = type, bnd_type = bnd_type)
    
      score_cutoff1 <- res_treated$score_cutoff
      score_cutoff0 <- res_control$score_cutoff
      Y0_lo <- Y_pred_lo0 - score_cutoff0
      Y0_hi <- Y_pred_hi0 + score_cutoff0
      Y1_lo <- Y_pred_lo1 - score_cutoff1
      Y1_hi <- Y_pred_hi1 + score_cutoff1
    
      if(type == "ate"){
        ite_lo <- Y1_lo - Y0_hi
        ite_hi <- Y1_hi - Y0_lo
      }

      if(type == "att"){
        ite_lo <- Y1_test - Y0_hi
        ite_hi <- Y1_test - Y0_lo
      }

      if(type == "atc"){
        ite_lo <- Y1_lo - Y0_test
        ite_hi <- Y1_hi - Y0_test
      }

      ind_cover <- cbind(ind_cover, (ite_lo <=0) * (ite_hi >=0))
    }

    ## Testing H0: Y(1) - Y(0) <= 0
    if(null_type == "negative"){
      gamma_obj$side <- "above"
      res_treated <- predict(gamma_obj, X_test, 
                             estimand = "treated", 
                             type = type,  bnd_type = bnd_type)
      gamma_obj$side <- "below"
      res_control <- predict(gamma_obj, X_test, 
                             estimand = "control",
                             type = type, bnd_type = bnd_type)
    
      score_cutoff1 <- res_treated$score_cutoff
      score_cutoff0 <- res_control$score_cutoff
      Y0_hi <- Y_pred_hi0 + score_cutoff0
      Y1_lo <- Y_pred_lo1 - score_cutoff1
    
      if(type == "ate"){
        ite_lo <- Y1_lo - Y0_hi
      }

      if(type == "att"){
        ite_lo <- Y1_test - Y0_hi
      }

      if(type == "atc"){
        ite_lo <- Y1_lo - Y0_test
      }

      ind_cover <- cbind(ind_cover, ite_lo <=0)
    }

  ## Testing H0: Y(1) - Y(0) >= 0
    if(null_type == "positive"){
      gamma_obj$side <- "below"
      res_treated <- predict(gamma_obj, X_test, 
                             estimand = "treated", 
                             type = type,  bnd_type = bnd_type)
      gamma_obj$side <- "above"
      res_control <- predict(gamma_obj, X_test, 
                             estimand = "control",
                             type = type, bnd_type = bnd_type)
    
      score_cutoff1 <- res_treated$score_cutoff
      score_cutoff0 <- res_control$score_cutoff
      Y0_lo <- Y_pred_lo0 - score_cutoff0
      Y1_hi <- Y_pred_hi1 + score_cutoff1
    
      if(type == "ate"){
        ite_hi <- Y1_hi - Y0_lo
      }

      if(type == "att"){
        ite_hi <- Y1_test - Y0_lo
      }

      if(type == "atc"){
        ite_lo <- Y1_hi - Y0_test
      }

      ind_cover <- cbind(ind_cover, ite_lo >=0)
    }
    
  }

  ## Generating the predictive interval
  hat_gamma <- apply(ind_cover, 1, get_hat_gamma, Gamma_grid)
  return(hat_gamma)
}
zhimeir/cfsensitivity documentation built on Feb. 10, 2022, 12:38 a.m.