Description Usage Format Details References
A dataset simulated as in Sun and Tan (2020), Section 4.
1 |
A data matrix with 800 rows and 203 columns.
The dataset is generated as follows, where y
, iv
, tr
and x
represent an outcome, an instrumental variable, a treatment, and covariates respectively.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | g<-function(z) {
1/(1+exp(z/b))^2*dnorm(z)
}
rnorm.trunct <- function(n, mu, sig, lft, rgt) {
x <- rep(0,n)
for (i in 1:n) {
x[i] <- rnorm(1,mu,sig)
while (x[i]<=lft | x[i]>rgt)
x[i] <- rnorm(1,mu,sig)
}
return(x)
}
### covariate mean and variance computed as in preprint of Tan (2020)
a<- 2.5;
c<- 2*pnorm(a)-1;
b<- sqrt(1-2*a*dnorm(a)/c)
m1<- exp(1/(8*b^2))*(pnorm(a-1/(2*b))-pnorm(-a-1/(2*b)))/c
v1<- exp(1/(2*b^2))*(pnorm(a-1/b)-pnorm(-a-1/b))/c-m1^2;
m2<- 10;
v2<- 1/c*integrate(g,-a,a)$value #by numerical integration
m3 <- 3/(25^2)*0.6+(0.6)^3;
mu4 <-(1/(b^4*c))*((3/2*(2*pnorm(a)-1)-a*(a^2+3)*dnorm(a))
-(3/2*(2*pnorm(-a)-1)-(-a)*((-a)^2+3)*dnorm(-a)))
mu6 <-(1/(b^6*c))*((15/2*(2*pnorm(a)-1)-a*(a^4+5*a^2+15)*dnorm(a))
-(15/2*(2*pnorm(-a)-1)-(-a)*((-a)^4+5*(-a)^2+15)*dnorm(-a)))
v3 <-mu6^2/25^6+15*mu4^2/25^4*0.6^2+15/25^2*0.6^4+0.6^6-m3^2
m4<- 2+20^2;
v4<- (2*mu4+6)+6*2*20^2+20^4-m4^2
###
set.seed(120)
n<- 800
p<- 200
# covariates
x<- matrix(rnorm.trunct(p*n, 0, 1, -a, a),n,p)/b
# transformation
z<- x
z[,1] <- (exp(0.5*x[,1])-m1)/sqrt(v1);
z[,2] <- (10+x[,2]/(1+exp(x[,1]))-m2)/sqrt(v2);
z[,3] <- ((0.04*x[,1]*x[,3]+0.6)^3-m3)/sqrt(v3);
z[,4] <- ((x[,2]+x[,4]+20)^2-m4)/sqrt(v4);
# instrumental variable
eta<- z[,1:4]
iv<- rbinom(n,1,prob=expit(eta));
# unmeasured confounder in latent index model
u<- rlogis(n, location = 0, scale = 1);
# treatment
eta.d<- 1+cbind(iv,z[,1:4])
tr<- as.numeric(eta.d >=u);
# outcome
late <- 1
eta.y <- late*tr +z[,1:4]
y <- rnorm(n, mean=eta.y, sd=1)
# save; if using main effects of x, then both the instrument propensity score
# and outcome models are misspecified
simu.iv.data <- cbind(y,tr,iv,x)
save(simu.iv.data, file="simu.iv.data.rda")
|
Tan, Z. (2020) Model-assisted inference for treatment effects using regularized calibrated estimation with high-dimensional data, Annals of Statistics, 48, 811<e2><80><93>837.
Sun, B. and Tan, Z. (2020) High-dimensional model-assisted inference for local average treatment effects with instrumental variables, arXiv:2009.09286.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.