inst/doc/intro-vignette.R

## ---- message=FALSE, warning=FALSE--------------------------------------------
gen_lr_dat <- function(n, seed=123)
{
set.seed(seed)
X_1 <- rnorm(n, sd = 1); X_2 <- rnorm(n, sd = 3) # generate x_1 and x_2
true_betas <- c(4,5) # generate true parameters
X <- model.matrix(~-1+X_1+X_2) # build the design matrix
Y <- rbinom(n, 1, 1/(1 + exp(-X %*% true_betas)) ) # generate Y from X and true_betas
dat  <-  data.frame(X_1=X_1, X_2=X_2, Y=Y) # build a simulated dataset
return(dat)
}

## ---- message=FALSE, warning=FALSE--------------------------------------------
dat <- gen_lr_dat(5000)
head(dat)

## ---- message=FALSE, warning=FALSE, results='hide'----------------------------
mod <- glm(Y~-1 + X_1 + X_2, data=dat, family = "binomial")
thetas_hat <- list(thetas_1=coef(mod)[1], thetas_2=coef(mod)[2])

## ---- message=FALSE, warning=FALSE, results='hide'----------------------------
psi_1 <- expression( ((1/(1+exp(-(thetas_1 * X_1 + thetas_2 * X_2)))) - Y) * X_1 )
psi_2 <- expression( ((1/(1+exp(-(thetas_1 * X_1 + thetas_2 * X_2)))) - Y) * X_2 )
psi <- list(psi_1, psi_2)

## ---- message=FALSE, warning=FALSE, results='hide'----------------------------
library(Mestim)
res <- get_vcov(data=dat, thetas=thetas_hat, M=psi)

## ---- message=FALSE, warning=FALSE--------------------------------------------
res$vcov

## ---- message=FALSE, warning=FALSE--------------------------------------------
vcov(mod)

## ---- message=FALSE, warning=FALSE--------------------------------------------
res$se

## ---- message=FALSE, warning=FALSE--------------------------------------------
gen_obs_dat <- function(n, seed=123)
{
set.seed(seed)
X <- rnorm(n) # generate X
A <- rbinom(n, 1, 1/(1 + exp(- 2 * X)) )  # generate treatment allocation A
X_mat <- model.matrix(~ -1 + X + A + A:X) # build the design matrix
true_gammas <- c(4,3,2) 
epsi <- rnorm(n,0,20) # generate gaussian noise 
Y <- (X_mat %*% true_gammas) + epsi # generate observed outcomes 
dat  <-  data.frame(X=X, A=A, Y=Y) # build a simulated dataset
return(dat)
}

## ---- message=FALSE, warning=FALSE--------------------------------------------
dat <- gen_obs_dat(5000)
head(dat)

## ---- message=FALSE, warning=FALSE, results='hide'----------------------------
m <- lm(Y~ -1 + X + A + A:X, data = dat)
gamma_1_hat <- coef(m)[1]
gamma_2_hat <- coef(m)[2]
gamma_3_hat <- coef(m)[3]

## ---- message=FALSE, warning=FALSE, results='hide'----------------------------
delta_hat <- mean(gamma_2_hat + gamma_3_hat*dat$X)

## ---- message=FALSE, warning=FALSE, results='hide'----------------------------
psi_1 <- expression( (Y - gamma_1*X - gamma_2*A - gamma_3*A*X) * X )
psi_2 <- expression( (Y - gamma_1*X - gamma_2*A - gamma_3*A*X) * A )
psi_3 <- expression( (Y - gamma_1*X - gamma_2*A - gamma_3*A*X) * A*X )
psi_4 <- expression( gamma_2 + gamma_3 * X - delta )
psi <- list(psi_1, psi_2, psi_3, psi_4)

## ---- message=FALSE, warning=FALSE, results='hide'----------------------------
thetas_hat <- list(gamma_1=gamma_1_hat,
                  gamma_2=gamma_2_hat,
                  gamma_3=gamma_3_hat,
                  delta=delta_hat)

## ---- message=FALSE, warning=FALSE--------------------------------------------
res <- get_vcov(data=dat, thetas=thetas_hat, M=psi)
res$vcov

## ---- message=FALSE, warning=FALSE--------------------------------------------
boot_fun <- function(d, i=1:nrow(d)) {
  z<-d[i,]
  mod <- lm(Y~ -1 + X + A + A:X, data = z)
  gamma_1_hat <- coef(mod)[1]
  gamma_2_hat <- coef(mod)[2]
  gamma_3_hat <- coef(mod)[3]
  delta_hat <- mean(gamma_2_hat*1 + gamma_3_hat*1*z$X)
  return( c(gamma_1_hat, gamma_2_hat, gamma_3_hat, delta_hat) )
}
boot_start_time <- Sys.time()
res_boot <- boot::boot(dat, boot_fun, R=999)
boot_end_time <- Sys.time()
paste("Bootstrapping took", round(as.numeric(boot_end_time - boot_start_time), 2), "seconds.")

## ---- message=FALSE, warning=FALSE, echo=FALSE--------------------------------
cov(res_boot$t)
Mestim_start_time <- Sys.time()
res <- get_vcov(data=dat, thetas=thetas_hat, M=psi)
Mestim_end_time <- Sys.time()
#paste("Mestim took", round(as.numeric(Mestim_end_time - Mestim_start_time), 2), "seconds.")

## ---- message=FALSE, warning=FALSE--------------------------------------------
gen_dtr_dat <- function(n, seed=456)
{
set.seed(seed)
expit <- function(x) 1/(1+exp(-x))

X_1 <- rnorm(n, sd=.1)
S_1 <- exp(rnorm(n, mean = X_1, sd = .1))
A_1 <- rbinom(n, size = 1, prob = expit(-.1+log(S_1)))

X_2 <- (X_1>0) * rnorm(n, mean = 1.1*X_1 - .5 * A_1, sd=.05) + (X_1<0) * X_1
S_2 <- exp(rnorm(n, mean = X_2, sd = .1))
A_2 <- rbinom(n, size = 1, prob = expit(.1+log(S_2)+3*A_1))

X_3 <- (X_2>0) * rnorm(n, mean = 1.1*X_2 - .5 * A_2, sd=.05) + (X_2<0) * X_2
Y <- exp(rnorm(n, mean = X_3 + .1*(A_1 + A_2), sd = .1)) #0.1 penalty for treating

dat <- data.frame(S_1=S_1, A_1=A_1, S_2=S_2, A_2=A_2, Y)  
return(dat)
}

## ---- message=FALSE, warning=FALSE--------------------------------------------
dat <- gen_dtr_dat(5000)
head(dat)

## ---- message=FALSE, warning=FALSE, results='hide'----------------------------
e_1 <- glm(A_1~I(log(S_1)), data=dat, family = "binomial")
delta_1_hat <- coef(e_1)[1]
delta_2_hat <- coef(e_1)[2]

e_2 <- glm(A_2~I(log(S_2)) + A_1 , data=dat, family = "binomial")
phi_1_hat <- coef(e_2)[1]
phi_2_hat <- coef(e_2)[2]
phi_3_hat <- coef(e_2)[3]

## ---- message=FALSE, warning=FALSE, results='hide'----------------------------
dat$log_S_1 <- log(dat$S_1) ; dat$log_S_2 <- log(dat$S_2) # For ease of programming

psi_1 <- expression( ((1/(1+exp(-(delta_1 + delta_2 * log_S_1)))) - A_1) * 1 )
psi_2 <- expression( ((1/(1+exp(-(delta_1 + delta_2 * log_S_1)))) - A_1) * log_S_1)

psi_3 <- expression( ((1/(1+exp(-(phi_1 + phi_2 * log_S_2 + phi_3 * A_1)))) - A_2) * 1 )
psi_4 <- expression( ((1/(1+exp(-(phi_1 + phi_2 * log_S_2 + phi_3 * A_1)))) - A_2) * log_S_2)
psi_5 <- expression( ((1/(1+exp(-(phi_1 + phi_2 * log_S_2 + phi_3 * A_1)))) - A_2) * A_1)

## ---- message=FALSE, warning=FALSE, results='hide'----------------------------
# The regime we are interested in
dat$d_1 <- dat$S_1>1
dat$d_2 <- dat$S_2>1

# For ease of programming
dat$C_d <- with(dat, d_1==A_1 & d_2==A_2)

# Store the last element of psi
psi_6 <- expression( Y * C_d *
          (  (1+exp(-(delta_1 + delta_2 * log_S_1)))^d_1 * # numerator
             (1+exp(-(phi_1 + phi_2 * log_S_2 + phi_3 * A_1)))^d_2  ) 
          
      /   (  (1-(1+exp(-(delta_1 + delta_2 * log_S_1)))^(-1))^(1-d_1) * # denominator
             (1-(1+exp(-(phi_1 + phi_2 * log_S_2 + phi_3 * A_1)))^(-1))^(1-d_2)  ) 
- V
)

## ---- message=FALSE, warning=FALSE, results='hide'----------------------------
psi <- list(psi_1, psi_2, psi_3, psi_4, psi_5, psi_6)

## ---- message=FALSE, warning=FALSE, results='hide'----------------------------
# Just delete "- V" from in the previous expression
# add _hat as appropriate
ipw_estimator <- expression( Y * C_d *
          (  (1+exp(-(delta_1_hat + delta_2_hat * log_S_1)))^d_1 * # numerator
             (1+exp(-(phi_1_hat + phi_2_hat * log_S_2 + phi_3_hat * A_1)))^d_2  ) 
          
      /   (  (1-(1+exp(-(delta_1_hat + delta_2_hat * log_S_1)))^(-1))^(1-d_1) * # denominator
             (1-(1+exp(-(phi_1_hat + phi_2_hat * log_S_2 + phi_3_hat * A_1)))^(-1))^(1-d_2)  )  
)

# Compute individual contribution and take the average
V_hat <- with(dat, mean(eval(ipw_estimator))) # Other ways to compute this quantity are OK too.

thetas_hat <- list(delta_1=delta_1_hat,
                   delta_2=delta_2_hat,
                   phi_1=phi_1_hat,
                   phi_2=phi_2_hat,
                   phi_3=phi_3_hat,
                   V=V_hat)

## ---- message=FALSE, warning=FALSE--------------------------------------------
res <- get_vcov(data=dat, thetas=thetas_hat, M=psi)
res$se

## ---- message=FALSE, warning=FALSE--------------------------------------------
boot_fun <- function(d, i=1:nrow(d)) {
  z<-d[i,]
  e_1 <- glm(A_1~I(log(S_1)), data=z, family = "binomial")
  e_2 <- glm(A_2~I(log(S_2)) + A_1 , data=z, family = "binomial")

  delta_1_hat <- coef(e_1)[1]
  delta_2_hat <- coef(e_1)[2]
  phi_1_hat <- coef(e_2)[1]
  phi_2_hat <- coef(e_2)[2]
  phi_3_hat <- coef(e_2)[3]

  ipw_estimator <- expression( z$Y * z$C_d *
            (  (1+exp(-(delta_1_hat + delta_2_hat * z$log_S_1)))^z$d_1 * # numerator
               (1+exp(-(phi_1_hat + phi_2_hat * z$log_S_2 + phi_3_hat * z$A_1)))^z$d_2  ) 
            
        /   (  (1-(1+exp(-(delta_1_hat + delta_2_hat * z$log_S_1)))^(-1))^(1-z$d_1) * # denominator
               (1-(1+exp(-(phi_1_hat + phi_2_hat * z$log_S_2 + phi_3_hat * z$A_1)))^(-1))^(1-z$d_2)  )  
  )

  V_hat <- mean(eval(ipw_estimator))
  return( c(delta_1_hat, delta_2_hat, phi_1_hat, phi_2_hat, phi_3_hat, V_hat) )
}
boot_start_time <- Sys.time()
res_boot <- boot::boot(dat, boot_fun, R=999)
boot_end_time <- Sys.time()
paste("Bootstrapping took", round(as.numeric(boot_end_time - boot_start_time), 2), "seconds.")

## ---- message=FALSE, warning=FALSE, echo=FALSE--------------------------------
res_boot
Mestim_start_time <- Sys.time()
res <- get_vcov(data=dat, thetas=thetas_hat, M=psi)
Mestim_end_time <- Sys.time()
#paste("Mestim took", round(as.numeric(Mestim_end_time - Mestim_start_time), 2), "seconds.")

Try the Mestim package in your browser

Any scripts or data that you put into this service are public.

Mestim documentation built on Dec. 28, 2022, 1:40 a.m.