WEE-package: Weighted Estimated Equation (WEE) Approaches in Genetic...

Description Author(s) References Examples

Description

Secondary analysis of case-control studies using a weighted estimating equation (WEE) approach: logistic regression for binary secondary outcomes, linear regression and quantile regression for continuous secondary outcomes.

Author(s)

Xiaoyu Song, Iuliana Ionita-Laza, Mengling Liu, Joan Reitman, Ying Wei

Maintainer: Wodan Ling <wl2459@columbia.edu>

References

[1] Ying Wei, Xiaoyu Song, Mengling Liu, Iuliana Ionita-Laza and Joan Reibman (2016). Quantile Regression in the Secondary Analysis of Case Control Data. Journal of the American Statistical Association, 111:513, 344-354; DOI: 10.1080/01621459.2015.1008101

[2] Xiaoyu Song, Iuliana Ionita-Laza, Mengling Liu, Joan Reibman, Ying Wei (2016). A General and Robust Framework for Secondary Traits Analysis. Genetics, vol. 202 no. 4 1329-1343; DOI: 10.1534/genetics.115.181073

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#---------------------- WEE logistic regression ----------------------#
## Generate simulated data
# set population size as 500000
n = 500000 

# set parameters
beta = c(0.2, 0.1) # P(Y|X,Z)
gamma = c(0.3, log(2), log(2)) #P(D|X,Y,Z)

# generate the genetic variant X
x = rbinom(n,size=2,prob=0.3)

# generate the standardized continuous covariate Z correlated with X
z = rnorm(n, mean=0.5*x-0.3, sd=1)

# generate the binary secondary trait Y
py = exp(-1+beta[1]*x+beta[2]*z)/
        (1+exp(-1+beta[1]*x+beta[2]*z))
y = rbinom(n,1, py)

# generate the primary disease D 
# (alpha changes to make sure the disease prevalence = 0.1 )
alpha = -2.88
pd = exp(alpha+x*gamma[1]+y*log(2)+z*log(2))/
(1+exp(alpha+x*gamma[1]+y*log(2)+z*log(2)))
d = rbinom(n,size=1,prob=pd)

# form the population dataset
dat = as.data.frame(cbind(d, y, z, x))

# generate sample dataset with 200 cases and 200 controls
dat_cases = dat[which(dat$d==1),]
dat_controls= dat[which(dat$d==0),]
dat_cases_sample = dat_cases[sample(sum(dat$d==1),
                             200,replace=FALSE),]
dat_controls_sample = dat_controls[sample(sum(dat$d==0), 
                                   200,replace=FALSE),]

dat_logistic = rbind(dat_cases_sample,dat_controls_sample)	
colnames(dat_logistic) = c("D", "y", "z","x")
D = dat_logistic$D # Disease status
pD = sum(dat$d==1)/500000 # Population disease prevalence

## WEE logsitic regression
WEE.logistic(y ~ x + z, D, 
             data = dat_logistic, pD)	  

WEE.logistic(y ~ x + z, D, 
             data = dat_logistic, pD, boot = 500)	 



#---------------------- WEE linear regression ----------------------#
## Generate simulated data
# set population size as 500000
n = 500000

# set parameters
beta = c(0.2, 0.1) # P(Y|X,Z)
gamma = c(0.3, log(2), log(2)) #P(D|X,Y,Z)

# generate the genetic variant X
x = rbinom(n,size=2,prob=0.3)

# generate the standardized continuous covariate Z correlated with X
z = rnorm(n, mean=0.5*x-0.3, sd=1)

# generate the continuous secondary trait Y
y = 1+beta[1]*x+beta[2]*z+rnorm(n)

# generate the primary disease D
alpha = -3.62
pd = exp(alpha + x*gamma[1] + y*log(2) + z*log(2))/
(1 + exp(alpha + x*gamma[1] + y*log(2) + z*log(2)))
d = rbinom(n,size=1,prob=pd)

# form population data set
dat=as.data.frame(cbind(d, y, z, x))

# generate sample dataset with 200 cases and 200 controls
dat_cases = dat[which(dat$d==1),]
dat_controls= dat[which(dat$d==0),]
dat_cases_sample = dat_cases[sample(sum(dat$d==1),
                             200, replace=FALSE),]
dat_controls_sample = dat_controls[sample(sum(dat$d==0),
                                   200, replace=FALSE),]
  
dat_linear=rbind(dat_cases_sample,dat_controls_sample)
colnames(dat_linear)=c("D", "y", "z","x")
D = dat_linear$D # Disease status
pD = sum(dat$d == 1)/500000 # Population disease prevalence

## WEE linear regresssion
WEE.linear(y ~ x + z, D, 
           data = dat_linear, pD)   

WEE.linear(y ~ x + z, D, 
          data = dat_linear, pD, boot = 500) 



#---------------------- WEE quantile regression ----------------------#
## Generate simulated data
# set population size as 500000
n = 500000 

# set parameters
beta = c(0.12, 0.1) # P(Y|X,Z)
gamma = c(-4, log(1.5), log(1.5), log(2) ) #P(D|X,Y,Z)

# generate the genetic variant X
x = rbinom(n,size=2,prob=0.3)
  
# generate the continuous covariate Z
z = rnorm(n)
  
# generate the continuous secondary trait Y
y= 1 + beta[1]*x + beta[2]*z + (1+0.02*x)*rnorm(n)

# generate disease status D
p = exp(gamma[1]+x*gamma[2]+z*gamma[3]+y*gamma[4])/
(1+exp(gamma[1]+x*gamma[2]+z*gamma[3]+y*gamma[4]))
d = rbinom(n,size=1,prob=p)

# form population data dataset
dat = as.data.frame(cbind(x,y,z,d))
colnames(dat) = c("x","y","z","d")

# Generate sample dataset with 200 cases and 200 controls
dat_cases = dat[which(dat$d==1),]
dat_controls= dat[which(dat$d==0),]
dat_cases_sample = dat_cases[sample(sum(dat$d==1),
                            200, replace=FALSE),]
dat_controls_sample = dat_controls[sample(sum(dat$d==0),
                                   200, replace=FALSE),]

dat_quantile = as.data.frame(rbind(dat_cases_sample,
                                   dat_controls_sample))
colnames(dat_quantile) = c("x","y","z","D")
D = dat_quantile$D # Disease status
pd = sum(d==1)/n # population disease prevalence

# WEE quantile regressions:
WEE.quantile(y ~ x, D, tau = 0.5, 
             data = dat_quantile, pd_pop = pd)	  				

WEE.quantile(y ~ x + z, D, tau = 1:9/10, 
             data = dat_quantile, pd_pop = pd, boot = 500)

WEE documentation built on May 2, 2019, 6:43 a.m.