set.DAG: Create and Lock DAG Object

Description Usage Arguments Value References Examples

View source: R/simcausal.r

Description

Check current DAG (created with node) for errors and consistency of its node distributions, set the observed data generating distribution. Attempts to simulates several observations to catch any errors in DAG definition. New nodes cannot be added after function set.DAG has been applied.

Usage

1
2
set.DAG(DAG, vecfun, latent.v, n.test = 10,
  verbose = getOption("simcausal.verbose"))

Arguments

DAG

Named list of node objects that together will form a DAG. Temporal ordering of nodes is either determined by the order in which the nodes were added to the DAG (using +node(...) interface) or with an optional "order" argument to node().

vecfun

A character vector with names of the vectorized user-defined node formula functions. See examples and the vignette for more information.

latent.v

The names of the unobserved (latent) DAG node names. These variables will be hidden from the observed simulated data and will be marked differently on the DAG plot.

n.test

Non-negative simulation sample size used for testing the consistency of the DAG object. A larger n.test may be useful when simulating a network of a fixed size (see ?network) or when testing DAG for rare-event errors. A smaller n.test can be better for performance (faster validation of the DAG). Set n.test=0 to completely skip the simulation test (use at your own risk, since calling sim() on such un-tested DAG may lead to uninterpretable errors). Note that when using n.test=0, the plotDAG function will not draw any child-parent relationships, since the formula parsing is not performed.

verbose

Set to TRUE to print messages on status and information to the console. Turn this off by default using options(simcausal.verbose=FALSE).

Value

A DAG (S3) object, which is a list consisting of node object(s) sorted by their temporal order.

References

Sofrygin O, van der Laan MJ, Neugebauer R (2017). "simcausal R Package: Conducting Transparent and Reproducible Simulation Studies of Causal Effect Estimation with Complex Longitudinal Data." Journal of Statistical Software, 81(2), 1-47. doi: 10.18637/jss.v081.i02.

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
#---------------------------------------------------------------------------------------
# EXAMPLE 1A: Define some Bernoulli nodes, survival outcome Y and put it together in a
# DAG object
#---------------------------------------------------------------------------------------
W1node <- node(name = "W1", distr = "rbern",
	prob = plogis(-0.5), order = 1)
W2node <- node(name = "W2", distr = "rbern",
	prob = plogis(-0.5 + 0.5 * W1), order = 2)
Anode <- node(name = "A", distr = "rbern",
	prob = plogis(-0.5 - 0.3 * W1 - 0.3 * W2), order = 3)
Ynode <- node(name = "Y", distr = "rbern",
	prob = plogis(-0.1 + 1.2 * A + 0.3 * W1 + 0.3 * W2), order = 4)
D1Aset <- set.DAG(c(W1node,W2node,Anode,Ynode))

#---------------------------------------------------------------------------------------
# EXAMPLE 1B: Same as 1A using +node interface and no order argument
#---------------------------------------------------------------------------------------
D1B <- DAG.empty()
D1B <- D1B +
  node(name = "W1", distr = "rbern", prob = plogis(-0.5)) +
  node(name = "W2", distr = "rbern", prob = plogis(-0.5 + 0.5 * W1)) +
  node(name = "A", distr = "rbern", prob = plogis(-0.5 - 0.3 * W1 - 0.3 * W2)) +
  node(name = "Y", distr = "rbern", prob = plogis(-0.1 + 1.2 * A + 0.3 * W1 + 0.3 * W2))
D1Bset <- set.DAG(D1B)

#---------------------------------------------------------------------------------------
# EXAMPLE 1C: Same as 1A and 1B using add.nodes interface and no order argument
#---------------------------------------------------------------------------------------
D1C <- DAG.empty()
D1C <- add.nodes(D1C, node(name = "W1", distr = "rbern", prob = plogis(-0.5)))
D1C <- add.nodes(D1C, node(name = "W2", distr = "rbern",
	prob = plogis(-0.5 + 0.5 * W1)))
D1C <- add.nodes(D1C, node(name = "A", distr = "rbern",
	prob = plogis(-0.5 - 0.3 * W1 - 0.3 * W2)))
D1C <- add.nodes(D1C, node(name = "Y", distr = "rbern",
	prob = plogis(-0.1 + 1.2 * A + 0.3 * W1 + 0.3 * W2)))
D1C <- set.DAG(D1C)

#---------------------------------------------------------------------------------------
# EXAMPLE 1D: Add a uniformly distributed node and redefine outcome Y as categorical
#---------------------------------------------------------------------------------------
D_unif <- DAG.empty()
D_unif <- D_unif +
 node("W1", distr = "rbern", prob = plogis(-0.5)) +
 node("W2", distr = "rbern", prob = plogis(-0.5 + 0.5 * W1)) +
 node("W3", distr = "runif", min = plogis(-0.5 + 0.7 * W1 + 0.3 * W2), max = 10) +
 node("An", distr = "rbern", prob = plogis(-0.5 - 0.3 * W1 - 0.3 * W2 - 0.2 * sin(W3)))
# Categorical syntax 1 (probabilities as values):
D_cat_1 <- D_unif + node("Y", distr = "rcat.b1", probs = {0.3; 0.4})
D_cat_1 <- set.DAG(D_cat_1)
# Categorical syntax 2 (probabilities as formulas):
D_cat_2 <- D_unif +
node("Y", distr = "rcat.b1",
	probs={plogis(-0.1 + 1.2 * An + 0.3 * W1 + 0.3 * W2 + 0.2 * cos(W3));
			   plogis(-0.5 + 0.7 * W1)})
D_cat_2 <- set.DAG(D_cat_2)

#---------------------------------------------------------------------------------------
# EXAMPLE 2A: Define Bernoulli nodes using R rbinom() function, defining prob argument
# for L2 as a function of node L1
#---------------------------------------------------------------------------------------
D <- DAG.empty()
D <- D +
node("L1", t = 0, distr = "rbinom", prob = 0.05, size = 1) +
node("L2", t = 0, distr = "rbinom", prob = ifelse(L1[0] == 1, 0.5, 0.1), size = 1)
Dset <- set.DAG(D)

#---------------------------------------------------------------------------------------
# EXAMPLE 2B: Equivalent to 2A, passing argument size to rbinom inside a named list
# params
#---------------------------------------------------------------------------------------
D <- DAG.empty()
D <- D +
node("L1", t = 0, distr = "rbinom", prob = 0.05, params = list(size = 1)) +
node("L2", t = 0, distr = "rbinom",
	prob = ifelse(L1[0] == 1,0.5,0.1), params = list(size = 1))
Dset <- set.DAG(D)

#---------------------------------------------------------------------------------------
# EXAMPLE 2C: Equivalent to 2A and 2B, define Bernoulli nodes using a wrapper "rbern"
#---------------------------------------------------------------------------------------
D <- DAG.empty()
D <- D +
node("L1", t = 0, distr = "rbern", prob = 0.05) +
node("L2", t = 0, distr = "rbern", prob = ifelse(L1[0] == 1, 0.5, 0.1))
Dset <- set.DAG(D)

#---------------------------------------------------------------------------------------
# EXAMPLE 3: Define node with normal distribution using rnorm() R function
#---------------------------------------------------------------------------------------
D <- DAG.empty()
D <- D + node("L2", t = 0, distr = "rnorm", mean = 10, sd = 5)
Dset <- set.DAG(D)

#---------------------------------------------------------------------------------------
# EXAMPLE 4: Define 34 Bernoulli nodes, or 2 Bernoulli nodes over 17 time points,
#---------------------------------------------------------------------------------------
t_end <- 16
D <- DAG.empty()
D <- D +
node("L2", t = 0:t_end, distr = "rbinom",
	prob = ifelse(t == t_end, 0.5, 0.1), size = 1) +
node("L1", t = 0:t_end, distr = "rbinom",
	prob = ifelse(L2[0] == 1, 0.5, 0.1), size = 1)
Dset <- set.DAG(D)
sim(Dset, n=10)

#---------------------------------------------------------------------------------------
# EXAMPLE 5: Defining new distribution function 'rbern', defining and passing a custom
# vectorized node function 'customfun'
#---------------------------------------------------------------------------------------
rbern <- function(n, prob) { # defining a bernoulli wrapper based on R rbinom function
  rbinom(n = n, prob = prob, size = 1)
}
customfun <- function(arg, lambda) {
  res <- ifelse(arg == 1, lambda, 0.1)
  res
}
D <- DAG.empty()
D <- D +
node("W1", distr = "rbern", prob = 0.05) +
node("W2", distr = "rbern", prob = customfun(W1, 0.5)) +
node("W3", distr = "rbern", prob = ifelse(W1 == 1, 0.5, 0.1))
D1d <- set.DAG(D, vecfun = c("customfun"))
sim(D1d, n = 10, rndseed = 1)

#---------------------------------------------------------------------------------------
# EXAMPLE 6: Defining latent variables I and U.Y (will be hidden from simulated data)
#---------------------------------------------------------------------------------------
D <- DAG.empty()
D <- D +
  node("I",
    distr = "rcat.b1",
    probs = c(0.1, 0.2, 0.2, 0.2, 0.1, 0.1, 0.1)) +
  node("W1",
    distr = "rnorm",
    mean = ifelse(I == 1, 0, ifelse(I == 2, 3, 10)) + 0.6 * I,
    sd = 1) +
  node("W2",
    distr = "runif",
    min = 0.025*I, max = 0.7*I) +
  node("W3",
    distr = "rbern",
    prob = plogis(-0.5 + 0.7*W1 + 0.3*W2 - 0.2*I)) +
  node("A",
    distr = "rbern",
    prob = plogis(+4.2 - 0.5*W1 + 0.2*W2/2 + 0.2*W3)) +
	node("U.Y", distr = "rnorm", mean = 0, sd = 1) +
  node("Y",
    distr = "rconst",
    const = -0.5 + 1.2*A + 0.1*W1 + 0.3*W2 + 0.2*W3 + 0.2*I + U.Y)
Dset1 <- set.DAG(D, latent.v = c("I", "U.Y"))
sim(Dset1, n = 10, rndseed = 1)

#---------------------------------------------------------------------------------------
# EXAMPLE 7: Multivariate random variables
#---------------------------------------------------------------------------------------
if (requireNamespace("mvtnorm", quietly = TRUE)) {
  D <- DAG.empty()
  # 2 dimensional normal (uncorrelated), using rmvnorm function from rmvnorm package:
  D <- D +
    node(c("X1","X2"), distr = "mvtnorm::rmvnorm",
      asis.params = list(mean = "c(0,1)")) +
  # Can define a degenerate (rconst) multivariate node:
    node(c("X1.copy", "X2.copy"), distr = "rconst", const = c(X1, X2))
  Dset1 <- set.DAG(D, verbose = TRUE)
  sim(Dset1, n = 10)
}

# On the other hand this syntax wont work,
# since simcausal will parse c(0,1) into a two column matrix:
## Not run: 
D <- DAG.empty()
D <- D + node(c("X1","X2"), distr = "mvtnorm::rmvnorm", mean = c(0,1))
Dset1 <- set.DAG(D, verbose = TRUE)

## End(Not run)

if (requireNamespace("mvtnorm", quietly = TRUE)) {
  D <- DAG.empty()
  # Bivariate normal (correlation coef 0.75):
  D <- D +
    node(c("X1","X2"), distr = "mvtnorm::rmvnorm",
      asis.params = list(mean = "c(0,1)",
        sigma = "matrix(c(1,0.75,0.75,1), ncol=2)")) +
  # Can use any component of such multivariate nodes when defining new nodes:
    node("A", distr = "rconst", const = 1 - X1)
  Dset2 <- set.DAG(D, verbose = TRUE)
  sim(Dset2, n = 10)
}

# Time-varying multivar node (3 time-points, 2 dimensional normal)
# plus changing the mean over time, as as function of t:
if (requireNamespace("mvtnorm", quietly = TRUE)) {
  D <- DAG.empty()
  D <- D +
    node(c("X1", "X2"), t = 0:2, distr = "mvtnorm::rmvnorm",
      asis.params = list(
        mean = "c(0,1) + t",
        sigma = "matrix(rep(0.75,4), ncol=2)"))
  Dset5b <- set.DAG(D)
  sim(Dset5b, n = 10)
}

# Two ways to define the same bivariate uniform copula:
if (requireNamespace("copula", quietly = TRUE)) {
  D <- DAG.empty()
  D <- D +
  # with a warning since normalCopula() returns an object unknown to simcausal:
    node(c("X1","X2"), distr = "copula::rCopula",
      copula = eval(copula::normalCopula(0.75, dim = 2))) +
  # same, as above:
    node(c("X3","X4"), distr = "copula::rCopula",
      asis.params = list(copula = "copula::normalCopula(0.75, dim = 2)"))
  vecfun.add("qbinom")
  # Bivariate binomial derived from previous copula, with same correlation:
  D <- D +
    node(c("A.Bin1", "A.Bin2"), distr = "rconst",
      const = c(qbinom(X1, 10, 0.5), qbinom(X2, 15, 0.7)))
  Dset3 <- set.DAG(D)
  sim(Dset3, n = 10)
}

# Same as "A.Bin1" and "A.Bin2", but directly using rmvbin function in bindata package:
if (requireNamespace("bindata", quietly = TRUE)) {
  D <- DAG.empty()
  D <- D +
    node(c("B.Bin1","B.Bin2"), distr = "bindata::rmvbin",
      asis.params = list(
        margprob = "c(0.5, 0.5)",
        bincorr = "matrix(c(1,0.75,0.75,1), ncol=2)"))
  Dset4b <- set.DAG(D)
  sim(Dset4b, n = 10)
}

#---------------------------------------------------------------------------------------
# EXAMPLE 8: Combining simcausal non-standard evaluation with eval() forced evaluation
#---------------------------------------------------------------------------------------
coefAi <- 1:10
D <- DAG.empty()
D <- D +
  node("A", t = 1, distr = "rbern", prob = 0.7) +
  node("A", t = 2:10, distr = "rconst", const = eval(coefAi[t]) * A[t-1])
Dset8 <- set.DAG(D)
sim(Dset8, n = 10)

#---------------------------------------------------------------------------------------
# TWO equivalent ways of creating a multivariate node (combining nodes W1 and W2):
#---------------------------------------------------------------------------------------
D <- DAG.empty()
D <- D + node("W1", distr = "rbern", prob = 0.45)
D <- D + node("W2", distr = "rconst", const = 1)

# option 1:
D <- D + node(c("W1.copy1", "W2.copy1"), distr = "rconst", const = c(W1, W2))

# equivalent option 2:
create_mat <- function(W1, W2) cbind(W1, W2)
vecfun.add("create_mat")
D <- D + node(c("W1.copy2", "W2.copy2"), distr = "rconst", const = create_mat(W1, W2))

Dset <- set.DAG(D)
sim(Dset, n=10, rndseed=1)

simcausal documentation built on Oct. 9, 2017, 1:03 a.m.