Nothing
## ---- include = FALSE---------------------------------------------------------
library(drcarlate)
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## -----------------------------------------------------------------------------
# Parameter dgpflag declares three ways to generate random data.
# FuncDGP will generate random data according to the method specified in (i), (ii) or (iii) when dgpflag = 1, 2 or 3 respectively.
# Parameter rndflag declares four ways to randomly assign treatment effects.
# rndflag = 1 - SRS; rndflag = 2 - WEI; rndflag = 3 - BCD; rndflag = 4 - SBR
# Note that CovadpRnd is built into FuncDGP, so it is not necessary to use CovadpRnd alone in the actual operation of generating random data
# Let's take dgpflag=1 and rndflag=2 for example
random_dgp <- FuncDGP(dgptype = 1, rndflag = 2, n = 100, g = 4, pi = c(0.5, 0.5, 0.5, 0.5))
# We can see that the return value of FuncDGP is a list of nine matrices, We can easily extract what we need from it
Y <- random_dgp$Y
X <- random_dgp$X
S <- random_dgp$S
A <- random_dgp$A
Y1 <- random_dgp$Y1
Y0 <- random_dgp$Y0
D1 <- random_dgp$D1
D0 <- random_dgp$D0
D <- random_dgp$D
## -----------------------------------------------------------------------------
# compute estimated LATE
tauhat <- tau(muY1 = Y1, muY0 = Y0, muD1 = D1, muD0 = D0, A = A, S = S, Y = Y, D = D)
#compute estimated treatment assignment probabilities
pihat(A = A, S = S)
# compute estimated standard deviation
stanE(muY1 = Y1, muY0 = Y0, muD1 = D1, muD0 = D0, A = A, S = S, Y = Y, D = D, tauhat = tauhat)
## ---- message=FALSE,results='hide'--------------------------------------------
# let's take dgpflag = 1 for example.
true_value <- TrueValue(dgptype = 1, vIdx = 1:4, n = 100, g = 4, pi = c(0.5, 0.5, 0.5, 0.5))
true_tau <- true_value$tau
## -----------------------------------------------------------------------------
# SRS - WEI - BCD - SBR
true_tau
## -----------------------------------------------------------------------------
# remember that we set dgpflag = 1 and rndflag = 2 before
LinearLogit(Y = Y, D = D, A = A, X = X, S = S, s = 4, modelflag = 1, iridge = 0.001)
## -----------------------------------------------------------------------------
# set random seed
set.seed(1)
## ---- results='hide'----------------------------------------------------------
# get true tau
true_tau <- TrueValue(dgptype = 1, vIdx = 1:4, n = 1000, g = 4, pi = c(0.5, 0.5, 0.5, 0.5))
## ---- warning=FALSE-----------------------------------------------------------
# see the output: size, iPert = 0
Output(ii = 1, tau = tauhat[1], dgptype = 1, rndflag = 1, n = 1000, g = 4,
pi = c(0.5, 0.5, 0.5, 0.5),
iPert = 0, iq = 0.05, iridge = 0.001)
# see the output: power, iPert = 1
Output(ii = 1, tau = tauhat[1], dgptype = 1, rndflag = 1, n = 1000, g = 4,
pi = c(0.5, 0.5, 0.5, 0.5),
iPert = 1, iq = 0.05, iridge = 0.001)
## -----------------------------------------------------------------------------
# For example, if we wanted to get the results shown in Panel A in Table 1, we could run the following two functions separately.
# First of all, There are four strata data (g = 4), the probability of data being treated at each strata is equal to 0.5 (pi = c(0.5, 0.5, 0.5, 0.5)), the random data generation process follows the data generation process 1 (DGP = 1), the total sample size is 200 (n = 200), run 10,000 Monte Carlo simulations (iMonte = 10000), the confidence level for hypothesis testing is 5%, and finally,get the size of the Monte Carlo simulation (iPert = 0).
# Time Consumeing. This command will take about 1.4 hours to execute, so do not run it unless necessary.
# JLTZ(iMonte = 10000, dgptype = 1, n = 200, g = 4, pi = c(0.5, 0.5, 0.5, 0.5), iPert = 0, iq = 0.05, iridge = 0.001)
# Second of all, There are four strata data (g = 4), the probability of data being treated at each strata is equal to 0.5 (pi = c(0.5, 0.5, 0.5, 0.5)), the random data generation process follows the digital generation process 1 (DGP = 1), the total sample size was 200 (n = 200), run 10,000 Monte Carlo simulations (iMonte = 10000), the confidence level for hypothesis testing is 5%, and finally,get the power of the Monte Carlo simulation (iPert = 1).
# Time Consumeing. This command will take about 1.4 hours to execute, so do not run it unless necessary.
# JLTZ(iMonte = 10000, dgptype = 1, n = 200, g = 4, pi = c(0.5, 0.5, 0.5, 0.5), iPert = 1, iq = 0.05, iridge = 0.001)
## -----------------------------------------------------------------------------
# With the above Settings in mind, we get the following JLTZ function.
# JLTZ(iMonte = 10000, dgptype = 1, n = 200, g = 4, pi = c(0.5,0.5,0.5,0.5), iPert = 0,iq = 0.05, iridge = 0.001)
# Since it takes about 1.4 hours to run this function, we'll give you the results directly that users can check them out themselves.
# vProb_d1 vProb_d2 vProb_d3 vProb_d4
# [1,] 0.03139898 0.03548546 0.03071509 0.03139898
# [2,] 0.04356403 0.04189256 0.03951368 0.04356403
# [3,] 0.04307085 0.04238541 0.03919373 0.04307085
# [4,] 0.05622226 0.04632824 0.05327148 0.05622226
# [5,] 0.10833470 0.08854937 0.09486482 0.10833470
# [6,] NaN NaN NaN NaN
# [7,] 0.03435805 0.03334976 0.03455447 0.03435805
# [8,] 0.04586553 0.05076392 0.05087186 0.04586553
## -----------------------------------------------------------------------------
# Set up ------------------------------------------------------------------
library(drcarlate)
library(pracma)
# Load data ---------------------------------------------------------------
# data_for_final.csv add covariates b_total_income log_b_total_income b_exp_total_30days
# edu_index b_asset_index b_health_index to data_for_tab4.csv
data_table <- drcarlate::data_table
data1 <- as.matrix(data_table)
colnames(data1) <- NULL
# create S for wave 1: number of stratnum, total 41 stratnum
n <- size(data1,1)
S <- zeros(n,1)
for (i in 1:41) {
S[data1[, size(data1,2)-i+1] == 1] <- 41 - i + 1
}
data1 <- cbind(data1, S)
# create Y, X, S, D: number of outcomes considered is 9:
# save the results for NA, TSLS, L, NL, F
vtauhat <- NaN * ones(8,5)
vsighat <- NaN * ones(8,5)
n_vec <- NaN * ones(8,1)
iridge = 0.01 #tuning parameter for in ridge regression
for (i in 1:8) {
if (i <= 4) {
# data_used: 1st col- Y, 2nd col- A, 3rd col- D, 4~(end-1)th col- X,
# last col is strata number
data_used <- cbind(data1[, 4+(i-1)*3], data1[, 2:3], data1[, 5:6],
data1[, size(data1,2)-1-41], data1[, size(data1,2)])
} else {
data_used <- cbind(data1[, 4+(i-1)*3], data1[,2:3], data1[, (4+(i-1)*3+1):(4+(i-1)*3+2)],
data1[, size(data1,2)-1-41], data1[, size(data1,2)])
}
# delete the outcome variables with NaN
data_used <- data_used[!is.nan(data_used[,1]), ]
# check whether a strata has obs less than 10
for (s in 1:41) {
if (length(data_used[data_used[, size(data_used,2)] == s,1]) < 10) {
# delete strata has obs less than 10
data_used[data_used[, size(data_used,2)] ==s, ] <- matrix()
data_used <- data_used[!is.na(data_used[,1]),]
}
}
# update n
n <- size(data_used,1)
n_vec[i] <- n
# find the unique value for stratnum
stratnum <- sort(base::unique(data_used[, size(data_used,2)]))
# create A
A <- matrix(data_used[, 2])
# create D
D <- matrix(data_used[, 3])
# create Y
Y <- matrix(data_used[, 1])
# create S
S <- matrix(data_used[, size(data_used,2)])
# create X
# use baseline total income
X <- data_used[,4:6]
# make a index
print(stringr::str_c("Now i equals to ", i, " !"))
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# %% No adjustment (z) NA
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
muY0_z <- zeros(n,1)
muY1_z <- zeros(n,1)
muD0_z <- zeros(n,1)
muD1_z <- zeros(n,1)
vtauhat[i,1] <- tau(muY1 = muY1_z, muY0 = muY0_z, muD1 = muD1_z, muD0 = muD0_z,
A = A, S = S, Y = Y, D = D, stratnum = stratnum)
vsighat[i,1] <- stanE(muY1 = muY1_z, muY0 = muY0_z, muD1 = muD1_z, muD0 = muD0_z,
A = A, S = S, Y = Y, D = D, tauhat = vtauhat[i,1], stratnum = stratnum)/sqrt(n)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# %% TSLS
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
mS_iv <- zeros(n, length(stratnum))
for (j in 1:length(stratnum)) {
s <- stratnum[j]
mS_iv[S==s, j] <- 1
}
mX_iv <- cbind(D, X, mS_iv)
mZ_iv <- cbind(A, X, mS_iv)
K <- size(mX_iv,2)
vPara_iv <- inv(t(mZ_iv) %*% mX_iv) %*% t(mZ_iv) %*% Y
mE_iv2 <- diag(((Y - mX_iv %*% vPara_iv)^2)[,])
m0_iv <- inv(t(mZ_iv) %*% mX_iv/n) %*% (t(mZ_iv) %*% mE_iv2 %*% mZ_iv/n) %*% inv(t(mX_iv) %*% mZ_iv/n)
vtauhat[i,2] <- vPara_iv[1]
vsighat[i,2] <- sqrt(m0_iv[1,1])/sqrt(n)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# %% Linear+linear model (L)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
muY0_a <- NaN*ones(n,1)
muY1_a <- NaN*ones(n,1)
muD0_a <- NaN*ones(n,1)
muD1_a <- NaN*ones(n,1)
for (j in 1:length(stratnum)) {
s = stratnum[j]
result <- LinearLogit(Y = Y, D = D, A = A, X = X, S = S, s = s, modelflag = 1, iridge = iridge)
theta_0s_a <- result[["theta_0s"]]
theta_1s_a <- result[["theta_1s"]]
beta_0s_a <- result[["beta_0s"]]
beta_1s_a <- result[["beta_1s"]]
muY0_a[S==s] <- X[S==s,] %*% theta_0s_a
muY1_a[S==s] <- X[S==s,] %*% theta_1s_a
muD0_a[S==s] <- X[S==s,] %*% beta_0s_a
muD1_a[S==s] <- X[S==s,] %*% beta_1s_a
}
vtauhat[i,3] <- tau(muY1 = muY1_a, muY0 = muY0_a, muD1 = muD1_a, muD0 = muD0_a,
A = A, S = S, Y = Y, D = D, stratnum = stratnum)
vsighat[i,3] <- stanE(muY1 = muY1_a, muY0 = muY0_a, muD1 = muD1_a, muD0 = muD0_a,
A = A, S = S, Y = Y, D = D, tauhat = vtauhat[i,3], stratnum = stratnum)/sqrt(n)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# %% Linear+logistic model (NL)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
muY0_b <- NaN*ones(n,1)
muY1_b <- NaN*ones(n,1)
muD0_b <- NaN*ones(n,1)
muD1_b <- NaN*ones(n,1)
for (j in 1:length(stratnum)) {
s = stratnum[j]
result <- LinearLogit(Y = Y, D = D, A = A, X = X, S = S, s = s, modelflag = 2, iridge = iridge)
theta_0s_b <- result[["theta_0s"]]
theta_1s_b <- result[["theta_1s"]]
beta_0s_b <- result[["beta_0s"]]
beta_1s_b <- result[["beta_1s"]]
muY0_b[S==s] <- X[S==s,] %*% matrix(theta_0s_b[2:length(theta_0s_b)]) + theta_0s_b[1]
muY1_b[S==s] <- X[S==s,] %*% matrix(theta_1s_b[2:length(theta_0s_b)]) + theta_1s_b[1]
muD0_b[S==s] <- LogisticReg(x = (X[S==s,] %*% matrix(beta_0s_b[-1]) + beta_0s_b[1]))
muD1_b[S==s] <- LogisticReg(x = (X[S==s,] %*% matrix(beta_1s_b[-1]) + beta_1s_b[1]))
}
vtauhat[i,4] <- tau(muY1 = muY1_b, muY0 = muY0_b, muD1 = muD1_b, muD0 = muD0_b,
A = A, S = S, Y = Y, D = D, stratnum = stratnum)
vsighat[i,4] <- stanE(muY1 = muY1_b, muY0 = muY0_b, muD1 = muD1_b, muD0 = muD0_b,
A = A, S = S, Y = Y, D = D,tauhat = vtauhat[i,4], stratnum = stratnum)/sqrt(n)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# %% Further Efficiency Improvement
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
X_c <- cbind(X, muD1_b, muD0_b)
muY0_c <- NaN*ones(n,1)
muY1_c <- NaN*ones(n,1)
muD0_c <- NaN*ones(n,1)
muD1_c <- NaN*ones(n,1)
for (j in 1:length(stratnum)) {
s = stratnum[j]
result <- LinearLogit(Y = Y, D = D, A = A, X = X_c, S = S, s = s, modelflag = 1, iridge = iridge)
theta_0s_c <- result[["theta_0s"]]
theta_1s_c <- result[["theta_1s"]]
beta_0s_c <- result[["beta_0s"]]
beta_1s_c <- result[["beta_1s"]]
muY0_c[S==s] <- X_c[S==s,] %*% theta_0s_c
muY1_c[S==s] <- X_c[S==s,] %*% theta_1s_c
muD0_c[S==s] <- X_c[S==s,] %*% beta_0s_c
muD1_c[S==s] <- X_c[S==s,] %*%beta_1s_c
}
vtauhat[i,5] <- tau(muY1 = muY1_c, muY0 = muY0_c, muD1 = muD1_c, muD0 = muD0_c,
A = A, S = S, Y = Y, D = D, stratnum = stratnum)
vsighat[i,5] <- stanE(muY1 = muY1_c, muY0 = muY0_c, muD1 = muD1_c, muD0 = muD0_c,
A = A, S = S, Y = Y, D = D, tauhat = vtauhat[i,5], stratnum = stratnum)/sqrt(n)
}
# show the LATE estimates and standard errors in Table 5 of Jiang et al.(2022).
# LATE Estimates
vtauhat
# Standard Errors
vsighat
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.