gentmle: gentmle

Description Usage Arguments Examples

Description

General TMLE function that takes care of the bookkeeping of estimation and update steps.

Usage

1
2
3
gentmle(initdata, params, submodel = submodel_logit, loss = loss_loglik,
  depsilon = 1e-04, approach = "full", max_iter = 100, g.trunc = 1e-04,
  Q.trunc = 1e-04, simultaneous.inference = FALSE, ...)

Arguments

initdata,

dataframe with the following names: A is the treatment vector, Y is the outcome Qk is the initial prediction for the outcome, Q1k is the initial prediction setting A to 1, Q0k is the initial prediction for the outcome setting A = 0. gk is the initial fit for the treatment mechanism.

params,

named list of parameters to estimate. See define_param for details

submodel,

submodel along which to fluctuate

loss,

loss function to optimize

depsilon,

small epsilon, used for the recurisve approach only

approach,

One of initial, recursive (small delta), line, full

max_iter,

Maximum number of iteration steps. 100 is almost always more than sufficient for full or line approach. Try 10000 for recursive approach and very occasionally you might need more.

g.trunc,

To keep treatment mechanism probs between [g.trunc, 1 - g.trunc]. Prevents practical positivity violations from making highly variant estimates

Q.trunc,

To keep outcome prediction probs between [Q.trunc, 1 - Q.trunc]. This prevents infinite loss in case of log-likelihood loss

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
###### Example of TMLE for the treatment-specific mean E[Y_1]

Qbar0 <- function(A, W) {
    W1 <- W[, 1]
    W2 <- W[, 2]
    Qbar <- plogis(A + A * W1 + W2)
    return(Qbar)
}

g0 <- function(W) {
    W1 <- W[, 1]
    W2 <- W[, 2]
    # rep(0.5, nrow(W))
    plogis(0.25 * W1 - 0.1 * W2)
}

gen_data <- function(n = 1000, p = 2) {
    W <- matrix(rnorm(n * p), nrow = n)
    colnames(W) <- paste("W", seq_len(p), sep = "")
    A <- rbinom(n, 1, g0(W))
    u <- runif(n)
    Y <- as.numeric(u < Qbar0(A, W))
    data.frame(W, A, Y)
}

data <- gen_data(1000)
Wnodes <- grep("^W", names(data), value = T)
gk <- g0(data[, Wnodes])
Qk <- Qbar0(data$A, data[, Wnodes])
Q1k <- Qbar0(1, data[, Wnodes])

initdata <- data.frame(A = data$A, Y = data$Y, gk = gk, Qk = Qk, Q1k = Q1k)
result <- gentmle(initdata = initdata, params = list(param_EY1), approach = "recursive")
print(result)

###### Example of TMLE for the average treatment effect
library(gentmle2)
Qbar0 <- function(A, W) {
    W1 <- W[, 1]
    W2 <- W[, 2]
    Qbar <- plogis(A + A * W1 + W2)
    return(Qbar)
}

g0 <- function(W) {
    W1 <- W[, 1]
    W2 <- W[, 2]
    # rep(0.5, nrow(W))
    plogis(0.4 * W1 - 0.8 * W2
           +(W1>0) * 1+1
           )
}

gen_data <- function(n = 1000, p = 2) {
    W <- matrix(rnorm(n * p), nrow = n)
    colnames(W) <- paste("W", seq_len(p), sep = "")
    A <- rbinom(n, 1, g0(W))
    u <- runif(n)
    Y <- as.numeric(u < Qbar0(A, W))
    data.frame(W, A, Y)
}

# truth
data <- gen_data(5000000)
Wnodes <- grep("^W", names(data), value = T)
hist(gk <- g0(data[, Wnodes]), breaks = 200)
Qk <- Qbar0(data$A, data[, Wnodes])
Q1k <- Qbar0(1, data[, Wnodes])
Q0k <- Qbar0(0, data[, Wnodes])
ATE0 = mean(Q1k - Q0k)

# sample
data <- gen_data(300)
Wnodes <- grep("^W", names(data), value = T)
gk <- g0(data[, Wnodes])
hist(gk)
Qk <- Qbar0(data$A, data[, Wnodes])
Q1k <- Qbar0(1, data[, Wnodes])
Q0k <- Qbar0(0, data[, Wnodes])

# for 1 step TMLE a la Mark van der Laan's new work choose recursive approach with a lot
# of iterations
initdata <- data.frame(A = data$A, Y = data$Y, gk = gk, Qk = Qk, Q1k = Q1k, Q0k = Q0k)
# result <- gentmle(initdata = initdata, params = list(param_ATE),
#                   approach = "recursive", max_iter = 1)
# print(result)

# for iterative TMLE, choose full or line if 1-d parameter
result <- gentmle(initdata = initdata, params = list(param_ATE),
                  submodel = submodel_logit,
                  loss = loss_loglik,
                  approach = "line",
                  max_iter = 100)

# for iterative TMLE, choose full or line if 1-d parameter, weighted loss with intercept model
result1 <- gentmle(initdata = initdata, params = list(param_ATE),
                  submodel = submodel_logit_intercept,
                  loss = loss_loglik_wts,
                  approach = "line",
                  max_iter = 10000)
print(result)
print(result1)
ATE0
# initial ests will be the same
print(result1$initests)
colMeans(result1$Dstar)
colMeans(result$Dstar)
result1$converge
result$converge
###### Example of TMLE for variance of conditional average treatment effect or blip variance
###### var(E[Y|A=1, W] - E[Y|A=0, W])

Qbar0 <- function(A, W) {
    W1 <- W[, 1]
    W2 <- W[, 2]
    Qbar <- plogis(A + A * W1 + W2)
    return(Qbar)
}

g0 <- function(W) {
    W1 <- W[, 1]
    W2 <- W[, 2]
    # rep(0.5, nrow(W))
    plogis(0.25 * W1 - 0.1 * W2)
}

gen_data <- function(n = 1000, p = 2) {
    W <- matrix(rnorm(n * p), nrow = n)
    colnames(W) <- paste("W", seq_len(p), sep = "")
    A <- rbinom(n, 1, g0(W))
    u <- runif(n)
    Y <- as.numeric(u < Qbar0(A, W))
    data.frame(W, A, Y)
}

data <- gen_data(1000)
Wnodes <- grep("^W", names(data), value = T)
gk <- g0(data[, Wnodes])
Qk <- Qbar0(data$A, data[, Wnodes])
Q1k <- Qbar0(1, data[, Wnodes])
Q0k <- Qbar0(0, data[, Wnodes])

initdata <- data.frame(A = data$A, Y = data$Y, gk = gk, Qk = Qk, Q1k = Q1k, Q0k = Q0k)
result <- gentmle(initdata = initdata, params = list(param_sigmaATE), approach = "recursive")
print(result)

# for iterative TMLE, choose full--different approaches sometimes give slightly
# different answers
result <- gentmle(initdata = initdata, params = list(param_sigmaATE), approach = "full")
print(result)

###### Example of TMLE for simultaneous estimation of two parameters,
###### ATE and blip variance

# notice here we specify two parameters to be simultaneously estimated
initdata <- data.frame(A = data$A, Y = data$Y, gk = gk, Qk = Qk, Q1k = Q1k, Q0k = Q0k)
result <- gentmle(initdata = initdata, params = list(param_ATE, param_sigmaATE),
                  approach = "recursive", max_iter = 10000)
print(result)

# for iterative TMLE, choose full--different approaches sometimes give slightly
# different answers

# full approach computes a separate epsilon for each parameter
result <- gentmle(initdata = initdata, params = list(param_ATE, param_sigmaATE),
                  approach = "full")
print(result)

# line approach is the iterative analog of the 1 step TMLE
result <- gentmle(initdata = initdata, params = list(param_ATE, param_sigmaATE),
                  approach = "line")
print(result)

# recursive is the 1 step tmle as in Mark van der Laan's recent work
result <- gentmle(initdata = initdata, params = list(param_ATE, param_sigmaATE),
                  approach = "recursive", max_iter = 10000)

# One can also form simultaneous confidence bounds for numerous params using the
# influence curves by specifying simultaneous.inference = TRUE

result <- gentmle(initdata = initdata, params = list(param_ATE, param_sigmaATE),
                  approach = "recursive", max_iter = 10000, simultaneous.inference = TRUE)
print(result)

jeremyrcoyle/gentmle2 documentation built on May 19, 2019, 5:08 a.m.