simu.iv.data: Simulated instrumental variable data

Description Usage Format Details References

Description

A dataset simulated as in Sun and Tan (2020), Section 4.

Usage

1

Format

A data matrix with 800 rows and 203 columns.

Details

The dataset is generated as follows, where y, iv, tr and x represent an outcome, an instrumental variable, a treatment, and covariates respectively.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
g<-function(z) {
	1/(1+exp(z/b))^2*dnorm(z)
}

rnorm.trunct <- function(n, mu, sig, lft, rgt) {
   x <- rep(0,n)
   for (i in 1:n) {
      x[i] <- rnorm(1,mu,sig)  
      while (x[i]<=lft | x[i]>rgt)
         x[i] <- rnorm(1,mu,sig)                                  
   }
   return(x)
}

### covariate mean and variance computed as in preprint of Tan (2020)

a<- 2.5;
c<- 2*pnorm(a)-1;
b<- sqrt(1-2*a*dnorm(a)/c)

m1<- exp(1/(8*b^2))*(pnorm(a-1/(2*b))-pnorm(-a-1/(2*b)))/c
v1<- exp(1/(2*b^2))*(pnorm(a-1/b)-pnorm(-a-1/b))/c-m1^2;

m2<- 10;
v2<- 1/c*integrate(g,-a,a)$value #by numerical integration

m3  <- 3/(25^2)*0.6+(0.6)^3;
mu4 <-(1/(b^4*c))*((3/2*(2*pnorm(a)-1)-a*(a^2+3)*dnorm(a))
	-(3/2*(2*pnorm(-a)-1)-(-a)*((-a)^2+3)*dnorm(-a)))
mu6 <-(1/(b^6*c))*((15/2*(2*pnorm(a)-1)-a*(a^4+5*a^2+15)*dnorm(a))
	-(15/2*(2*pnorm(-a)-1)-(-a)*((-a)^4+5*(-a)^2+15)*dnorm(-a)))
v3  <-mu6^2/25^6+15*mu4^2/25^4*0.6^2+15/25^2*0.6^4+0.6^6-m3^2

m4<- 2+20^2;
v4<- (2*mu4+6)+6*2*20^2+20^4-m4^2

###

set.seed(120)

n<- 800
p<- 200 

# covariates
 
x<- matrix(rnorm.trunct(p*n, 0, 1, -a, a),n,p)/b

# transformation

z<- x
z[,1] <- (exp(0.5*x[,1])-m1)/sqrt(v1);	
z[,2] <- (10+x[,2]/(1+exp(x[,1]))-m2)/sqrt(v2);	
z[,3] <- ((0.04*x[,1]*x[,3]+0.6)^3-m3)/sqrt(v3);
z[,4] <- ((x[,2]+x[,4]+20)^2-m4)/sqrt(v4);

# instrumental variable

eta<- z[,1:4]
iv<- rbinom(n,1,prob=expit(eta));

# unmeasured confounder in latent index model
u<- rlogis(n, location = 0, scale = 1);

# treatment
eta.d<- 1+cbind(iv,z[,1:4])
tr<- as.numeric(eta.d >=u);

# outcome
late  <- 1
eta.y <- late*tr +z[,1:4]
y <- rnorm(n, mean=eta.y, sd=1)

# save; if using main effects of x, then both the instrument propensity score
# and outcome models are misspecified

simu.iv.data <- cbind(y,tr,iv,x)

save(simu.iv.data, file="simu.iv.data.rda")

References

Tan, Z. (2020) Model-assisted inference for treatment effects using regularized calibrated estimation with high-dimensional data, Annals of Statistics, 48, 811<e2><80><93>837.

Sun, B. and Tan, Z. (2020) High-dimensional model-assisted inference for local average treatment effects with instrumental variables, arXiv:2009.09286.


RCAL documentation built on Nov. 8, 2020, 4:22 p.m.