R/generate_data_psr.R

Defines functions generate_data_psr

Documented in generate_data_psr

#' Generate data for compliance under pseudo-randomization.
#'
#' @param n sample size
#' @param p dimension of covariates
#' @param delta complier average causal effect
#' @param compliance_p compliance proportion
#'
#' @return data.frame including \code{id}, treatment assignment indicator \code{z}, complier indicator \code{c}, treatment receipt indicator \code{s}, outcome \code{y}, and covariates \code{x1, ..., xp}.
#' @importFrom withr with_seed
#' @importFrom tidyr spread
#' @importFrom tibble tibble
#' @export
#'
#' @examples
#' generate_data_psr(n = 100, p = 8)

generate_data_psr <- function(n, 
                             p,
                             delta = 0.5, 
                             compliance_p = 0.75
                             ) {
  # browser()
  x <- matrix(rnorm(n*p), n, p) ## covariates
  # w <- rnorm(n) ## additional covariate that only determines compliance
  theta <- withr::with_seed(0, rnorm(p))
  eta <- withr::with_seed(1, rnorm(p))
  gamma_1 <- withr::with_seed(2, rnorm(p))
  gamma_0 <- withr::with_seed(3, rnorm(p))
  
  pi_z <- plogis(x %*% theta)
  pi_c <- plogis(x %*% eta + qlogis(compliance_p)) ## probability of compliance
  z <- rbinom(n, prob = pi_z, size = 1) ## assignment indicator
  c <- rbinom(n, prob = pi_c, size = 1) ## compliance indicator
  epsilon <- rnorm(n)
  
  
  y_1c <- -1 + x %*% gamma_1 + delta + epsilon
  y_0c <- -1 + x %*% gamma_1 + epsilon
  y_1n <- x %*% gamma_0 + epsilon
  y_0n <- x %*% gamma_0 + epsilon
  
  sim_data <- tibble::tibble(id = rep(1:n, p),
                     x = c(x),
                     x_n = rep(paste('x', 1:p, sep = ''), each = n),
                     z = rep(z, p),
                     c = rep(c, p),
                     s = 1*(z == 1 & c == 1), ## receipt of treatment
                     y1c = rep(y_1c,p),
                     y0c = rep(y_0c,p),
                     y1n = rep(y_1n,p),
                     y0n = rep(y_0n,p),
                     y = dplyr::case_when(
                       s == 1 & c == 1 ~ y1c,
                       s == 0 & c == 1 ~ y0c,
                       s == 1 & c == 0 ~ y1n,
                       s == 0 & c == 0 ~ y0n)
  )
  sim_data <- tidyr::spread(sim_data, x_n, x)
  sim_data
}
denisagniel/synthate documentation built on April 16, 2020, 12:45 a.m.