tests/testthat/test_node_cox.r

test_that("with censoring", {

  set.seed(3245)

  dag <- empty_dag() +
    node("A", type="rnorm", mean=10, sd=2) +
    node("B", type="rbernoulli", p=0.5) +
    node("C", type="cox", parents=c("A", "B"), betas=c(0.2, 1),
         lambda=2, gamma=1, surv_dist="weibull", cens_dist=runif,
         cens_args=list(min=0, max=10000))

  out <- sim_from_dag(dag=dag, n_sim=100)

  expect_equal(colnames(out), c("A", "B", "C_time", "C_status"))
  expect_true(all(out$C_status==1))
  expect_equal(mean(out$C_time), 0.051396, tolerance=0.0001)
})

test_that("calling the function directly", {

  set.seed(3245)

  dag <- empty_dag() +
    node("A", type="rnorm", mean=10, sd=2) +
    node("B", type="rbernoulli", p=0.5)
  data <- as.data.frame(sim_from_dag(dag=dag, n_sim=100))

  out <- node_cox(data=data, parents=c("A", "B"), betas=c(0.2, 1),
                  lambda=2, gamma=1, surv_dist="weibull", cens_dist="runif",
                  cens_args=list(min=0, max=10000), name="C")

  expect_equal(colnames(out), c("C_time", "C_status"))
  expect_true(all(out$C_status==1))
  expect_equal(mean(out$C_time), 0.0514, tolerance=0.0001)
})

test_that("without censoring", {

  set.seed(324545)

  dag <- empty_dag() +
    node("A", type="rnorm", mean=10, sd=2) +
    node("B", type="rbernoulli", p=0.5) +
    node("C", type="cox", parents=c("A", "B"), betas=c(0.2, 1),
         lambda=2, gamma=1, surv_dist="weibull", cens_dist=NULL)

  out <- sim_from_dag(dag=dag, n_sim=100)

  expect_equal(colnames(out), c("A", "B", "C_time", "C_status"))
  expect_true(all(out$C_status==1))
  expect_equal(mean(out$C_time), 0.05253, tolerance=0.0001)
})

test_that("with formula", {

  set.seed(32457)

  dag <- empty_dag() +
    node("A", type="rnorm", mean=10, sd=2) +
    node("B", type="rbernoulli", p=0.5) +
    node("C", type="cox", formula=~A + B, betas=c(0.2, 1),
         lambda=2, gamma=1, surv_dist="weibull", cens_dist="runif",
         cens_args=list(min=0, max=10000))

  out <- sim_from_dag(dag=dag, n_sim=100)

  expect_equal(colnames(out), c("A", "B", "C_time", "C_status"))
  expect_true(all(out$C_status==1))
  expect_equal(mean(out$C_time), 0.04857, tolerance=0.0001)
})

test_that("with enhanced formula", {

  set.seed(32457)

  dag <- empty_dag() +
    node("A", type="rnorm", mean=10, sd=2) +
    node("B", type="rbernoulli", p=0.5) +
    node("C", type="cox", formula=~A*0.2 + BTRUE*1,
         lambda=2, gamma=1, surv_dist="weibull", cens_dist="runif",
         cens_args=list(min=0, max=10000))

  out <- sim_from_dag(dag=dag, n_sim=100)

  expect_equal(colnames(out), c("A", "B", "C_time", "C_status"))
  expect_true(all(out$C_status==1))
  expect_equal(mean(out$C_time), 0.04857, tolerance=0.0001)
})

test_that("without censoring: exponential", {

  set.seed(324545)

  dag <- empty_dag() +
    node("A", type="rnorm", mean=10, sd=2) +
    node("B", type="rbernoulli", p=0.5)

  # still putting it out in two columns
  dag1 <- dag +
    node("C", type="cox", parents=c("A", "B"), betas=c(0.2, 1),
         lambda=2, gamma=1, surv_dist="exponential", cens_dist=NULL)

  out <- sim_from_dag(dag=dag1, n_sim=100)
  expect_equal(colnames(out), c("A", "B", "C_time", "C_status"))

  # returning only the time as one columns
  dag2 <- dag +
    node("C", type="cox", parents=c("A", "B"), betas=c(0.2, 1),
         lambda=2, gamma=1, surv_dist="exponential", cens_dist=NULL,
         as_two_cols=FALSE)

  out <- sim_from_dag(dag=dag2, n_sim=100)
  expect_equal(colnames(out), c("A", "B", "C"))
})

test_that("using only one variable in formula", {

  set.seed(1234)

  dag <- empty_dag() +
    node("A", type="rnorm") +
    node("Y", type="cox", formula= ~ A*0.8, surv_dist="weibull",
         lambda=1.1, gamma=0.7, cens_dist=NULL)

  data <- sim_from_dag(dag, n_sim=100)

  expect_equal(round(mean(data$Y_time), 3), 2.24)
})

test_that("left-truncation works", {

  set.seed(32457)

  dag <- empty_dag() +
    node("A", type="rnorm", mean=10, sd=2) +
    node("B", type="rbernoulli", p=0.5) +
    node("C", type="cox", formula=~A*0.2 + BTRUE*1,
         lambda=2, gamma=1, surv_dist="weibull", left=15)

  out <- sim_from_dag(dag=dag, n_sim=100)
  expect_true(all(out$C_time > 15))
})

test_that("generating integers", {

  set.seed(324573)

  dag <- empty_dag() +
    node("A", type="rnorm", mean=10, sd=2) +
    node("B", type="rbernoulli", p=0.5) +
    node("C", type="cox", formula=~A*0.2 + BTRUE*1,
         lambda=0.1, gamma=1, surv_dist="weibull", as_integer=TRUE)

  # as two columns
  out <- sim_from_dag(dag=dag, n_sim=100)
  expect_equal(out$C_time, ceiling(out$C_time))

  # as one column
  dag <- empty_dag() +
    node("A", type="rnorm", mean=10, sd=2) +
    node("B", type="rbernoulli", p=0.5) +
    node("C", type="cox", formula=~A*0.2 + BTRUE*1,
         lambda=0.1, gamma=1, surv_dist="weibull", as_integer=TRUE,
         as_two_cols=FALSE)

  out <- sim_from_dag(dag=dag, n_sim=100)
  expect_equal(out$C, ceiling(out$C))
})

test_that("using time-dependent baseline hazard", {

  ## some arbitrary baseline hazard function
  fbasehaz <- function(t) {
    0.002 +
      0.01 * exp(-((t - 200)^2) / (2 * 50^2)) +   # first hill
      0.008 * exp(-((t - 700)^2) / (2 * 80^2))    # second hill
  }

  ## general test case
  dag <- empty_dag() +
    node("A", type="rnorm") +
    node("B", type="rbernoulli") +
    node("Y", type="cox", surv_dist=fbasehaz, basehaz_grid=1:100000,
         formula= ~ A*-0.5 + B*1.5)

  set.seed(1234)
  data <- sim_from_dag(dag, n_sim=1000)

  expect_equal(round(mean(data$Y_time), 3), 168.352)

  # error with non-positive baseline hazard
  fbasehaz2 <- function(t) {
    return(-0.1)
  }

  dag <- empty_dag() +
    node("A", type="rnorm") +
    node("B", type="rbernoulli") +
    node("Y", type="cox", surv_dist=fbasehaz2, basehaz_grid=1:100000,
         formula= ~ A*-0.5 + B*1.5)

  set.seed(1234)
  expect_error(data <- sim_from_dag(dag, n_sim=1000))

  # error with left truncation times out of bounds
  dag <- empty_dag() +
    node("A", type="rnorm") +
    node("B", type="rbernoulli") +
    node("Y", type="cox", surv_dist=fbasehaz, basehaz_grid=1:100000,
         formula= ~ A*-0.5 + B*1.5, left=1000000000)

  set.seed(123344)
  expect_error(data <- sim_from_dag(dag, n_sim=1000))

  # error with extrapolation = FALSE
  dag <- empty_dag() +
    node("A", type="rnorm") +
    node("B", type="rbernoulli") +
    node("Y", type="cox", surv_dist=fbasehaz, basehaz_grid=1:100,
         formula= ~ A*-0.5 + B*1.5)

  set.seed(123344)
  expect_error(data <- sim_from_dag(dag, n_sim=1000))
})

Try the simDAG package in your browser

Any scripts or data that you put into this service are public.

simDAG documentation built on May 14, 2026, 5:11 p.m.