tests/RUnit/RUnit_tests_04_netcont_sA_tests.R

# ---------------------------------------------------------------------------------
# TEST SET 4
# ---------------------------------------------------------------------------------
# network TMLE fit with continous sA
# Run one TMLE simulation for network data generated igraph::sample_k_regular (all nodes have the same degree k)
# estimate psi0 under trunced g.star (same as in iid)
# sA is the same as in iid example w sAnet=(sA, rowMeans(sA[[1:Kmax]])).
# ---------------------------------------------------------------------------------------------------------
# TMLE for causal effect in network data with continuous exposure under continuous stochastic intervention;
# intervention g.star is defined by shifting the normal density of observed sA until g.star/g.0 >= 10,
# then its truncated to be equal to g.0
# ---------------------------------------------------------------------------------------------------------

`%+%` <- function(a, b) paste0(a, b)
# f.gstar,
run.net.1sim.tmlenet <- function(datO, NetInd_mat, sW, sA, Kmax, Qform, newA.gstar, psi0) {
  datO_input <- datO[,c("W1", "W2", "W3", "sA", "Y")]
  res <- tmlenet(data = datO_input,
                  # Anodes = "sA",
                  Ynode = "Y",
                  Kmax = Kmax,
                  NETIDmat = NetInd_mat,
                  # f_gstar1 = f.gstar,
                  intervene1.sA = newA.gstar,
                  sW = sW, sA = sA,
                  Qform = Qform,
                  hform.g0 = "sA + net.mean.sA ~ W1 + W2 + W3",
                  hform.gstar = "sA + net.mean.sA ~ W1 + W2 + W3",
                  optPars = list(
                    bootstrap.var = FALSE, n.bootstrap = 1
                    ))
                  # correct Q:
                  # Qform = "Y ~ W1 + W2 + W3 + sA + net.mean.sA",
                  # misspecified Q:
                  # Qform = "Y ~ W2 + W3 + net.mean.sA",

  CIs <- res$EY_gstar1$IC.CIs
  print("CIs: "); print(CIs)
  (tmle_B.CI <- CIs[rownames(CIs)%in%"tmle_B",])
  (h_iptw.CI <- CIs[rownames(CIs)%in%"h_iptw",])
  cover.tmle_B <- ((psi0 <= tmle_B.CI[2]) && (psi0 >= tmle_B.CI[1]))
  cover.h_iptw <- ((psi0 <= h_iptw.CI[2]) && (psi0 >= h_iptw.CI[1]))

  est_mat <- res$EY_gstar1$estimates
  est <- as.vector(est_mat)
  names(est) <- rownames(est_mat)
  cover <- apply(CIs, 1, function(row) ((psi0 <= row[2]) && (psi0 >= row[1])))
  cover2 <- c(tmle_B = cover.tmle_B, h_iptw = cover.h_iptw)

  return(list(est = est, cover = cover))
}

#------------------------------------------------------------------------------------------------------------
# The user-defined network sampler(s) from igraph (regular graph model)
# Generate regular random graphs with same degree for each node
# Kmax - degree of each node
make_netDAG <- function(Kmax, trunc, shift) {
  generate.igraph.k.regular <- function(n, Kmax, ...) {
    if (n < 20) Kmax <- 5
    igraph.reg <- igraph::sample_k_regular(no.of.nodes = n, k = Kmax, directed = TRUE, multiple = FALSE)
    # From igraph object to sparse adj. matrix:
    sparse_AdjMat <- simcausal::igraph.to.sparseAdjMat(igraph.reg)
    # From igraph object to simcausal/tmlenet input (NetInd_k, nF, Kmax):
    NetInd_out <- simcausal::sparseAdjMat.to.NetInd(sparse_AdjMat)
    print("old Kmax:" %+% Kmax)
    print("new Kmax:" %+% NetInd_out$Kmax)
    print("NetInd_k"); print(head(NetInd_out$NetInd_k))
    if (Kmax < NetInd_out$Kmax) message("new network has larger Kmax value than requested, new Kmax = " %+% NetInd_out$Kmax)
    return(NetInd_out$NetInd_k)
  }
  # graph <- igraph::sample_k_regular(no.of.nodes = 50, k = 10, directed = TRUE, multiple = FALSE)
  # par(mar=c(.1,.1,.1,.1))
  # igraph::plot.igraph(graph,
  #     layout=igraph::layout.fruchterman.reingold,
  #     vertex.size=7,
  #     vertex.label.cex=.5,
  #     edge.arrow.size=.5)

  D <- DAG.empty()
  # Adding the ER model network generator from igraph:
  D <- D + network("NetInd_k", Kmax = Kmax, netfun = "generate.igraph.k.regular")
  D <- D +
      node("W1", distr = "rbern", prob = 0.5) +
      node("W2", distr = "rbern", prob = 0.3) +
      node("W3", distr = "rbern", prob = 0.3) +
      node("sA.mu", distr = "rconst", const = (0.98 * W1 + 0.58 * W2 + 0.33 * W3)) +
      node("sA", distr = "rnorm", mean = sA.mu, sd = 1) +
      node("net.mean.sA", distr = "rconst", const = mean(sA[[1:Kmax]])) +
      node("untrunc.sA.gstar",  distr = "rconst", const = sA + shift) +
      node("r.new.sA",  distr = "rconst", const =
            exp(shift * (untrunc.sA.gstar - sA.mu - shift / 2))) +
      node("tr.sA.gstar",  distr = "rconst", const = ifelse(r.new.sA > trunc, sA, untrunc.sA.gstar)) +
      node("probY", distr = "rconst", const =
            plogis(-0.35 * sA - 0.20 * mean(sA[[1:Kmax]]) - 0.5 * W1 - 0.58 * W2 - 0.33 * W3)) +
      node("Y", distr = "rbern", prob = probY) +
      node("probY.gstar", distr = "rconst", const =
            plogis(-0.35 * tr.sA.gstar - 0.20 * mean(tr.sA.gstar[[1:Kmax]]) - 0.5 * W1 - 0.58 * W2 - 0.33 * W3)) +
      node("Y.gstar", distr = "rbern", prob = probY.gstar)
  Dset <- set.DAG(D, n.test = 10)
  return(Dset)
}

# Function that returns a stochastic intervention function intervening on sA, for given shift
# create_f.gstar <- function(shift, trunc) {
#   shift <- shift
#   trunc <- trunc
#   f.gstar <- function(data, ...) {
#     sA.mu <- 0.98 * data[,"W1"] + 0.58 * data[,"W2"] + 0.33 * data[,"W3"]
#     sA <- data[,"sA"]
#     untrunc.sA.gstar <- sA + shift
#     # ratio of P_g^*(sA=sa|W)/P_g0(sA=sa|W) for sa=sA generated under g^*:
#     r.new.sA <- exp(shift * (untrunc.sA.gstar - sA.mu - shift / 2))
#     trunc.sA.gstar <- ifelse(r.new.sA > trunc, sA, untrunc.sA.gstar)
#     return(trunc.sA.gstar)
#   }
#   return(f.gstar)
# }

test.onesim.net.tmlefit <- function() {
  require(simcausal)
  #------------------------------------------------------------------------------------------------------------
  Kmax <- 5
  trunc <- 4
  shift <- 1
  Dset <- make_netDAG(Kmax, trunc, shift)

  #------------------------------------------------------------------------------------------------------------
  # # plotDAG(Dset)
  # rndseed2 <- 54321
  # nsamp <- 50000
  # datFull <- sim(Dset, n = nsamp, rndseed = rndseed2)
  # print(head(datFull, 50))
  # # netind_cl <- attributes(datFull)$netind_cl # to get the network object from sim data:
  # # NetInd_mat <- attributes(datFull)$netind_cl$NetInd # to get the network matrix from sim data:
  # print("mean(datFull$Y): " %+% mean(datFull$Y));
  # # [1] "mean(datFull$Y): 0.301425"
  # psi0 <- mean(datFull$Y.gstar)
  # print("psi0: " %+% psi0)
  # # [1] "psi0: 0.22062" for 50K sample (trunc = 4, shift = 1, Kmax = 5)
  # # [1] "psi0: 0.19184" for 50K sample (trunc = 7, shift = 1, Kmax = 10)

  # f.gstar <- create_f.gstar(shift = shift, trunc = trunc)

  shift <- shift
  trunc <- trunc

  newA.gstar <-  def_new_sA(sA = ifelse(exp(shift * (sA + shift - (0.98*W1 + 0.58*W2 + 0.33*W3) - shift/2)) > trunc,
                            sA,
                            sA + shift))

  # DEFINE SUMMARY MEASURES:
  sW <- def_sW(W1 = "W1", W2 = "W2", W3 = "W3")
  sA <- def_sA(sA = "sA", net.mean.sA = rowMeans(sA[[1:Kmax]]), replaceNAw0 = TRUE)

  seed <- 12345
  datO <- sim(Dset, n = 10000, rndseed = seed)
  mean(datO$Y.gstar) # [1] 0.2136
  checkTrue(abs(mean(datO$Y.gstar) - 0.2136) < 10^-6)

  netind_cl <- attributes(datO)$netind_cl
  NetInd_mat <- attributes(datO)$netind_cl$NetInd
  dim(NetInd_mat)

  getOption("tmlenet.verbose")
  options(tmlenet.verbose = FALSE)
  # tmlenet_options(binByMass = FALSE, bin_estimator = glmR6$new())
  # tmlenet_options(poolContinVar = TRUE, bin_estimator = speedglmR6$new()) # to pool by contin outcome:
  print_tmlenet_opts()

  #------------------------------------------------------------------------------------------------------------
  # TEST MISSPECIFIED Q:
  #------------------------------------------------------------------------------------------------------------
  tmlenet_options(maxNperBin = 1000, bin.method="equal.mass")
  print_tmlenet_opts()
  Qform.mis <- "Y ~ W2 + W3 + net.mean.sA" # # misspecified Q:
  timerun <- system.time(
    estres <- run.net.1sim.tmlenet(datO = datO, NetInd_mat = NetInd_mat,
                                    sW = sW, sA = sA, Kmax = Kmax,
                                    Qform = Qform.mis,
                                    newA.gstar = newA.gstar,
                                    # f.gstar = f.gstar,
                                    psi0 = 0)
  )
  timerun
  #  user  system elapsed
  # 1.659   0.121   1.791
  estres
  #      tmle    h_iptw     gcomp
  # 0.2159311 0.2181338 0.2648796
  # [1] "CIs: "
  #        LBCI_0.025 UBCI_0.975
  # tmle    0.1930713  0.2387909
  # h_iptw  0.1886270  0.2476407
  # gcomp          NA         NA

  # test 1:
  checkTrue(abs(estres$est["tmle"] - 0.2159311) < 10^-6)
  # test 2:
  checkTrue(abs(estres$est["h_iptw"] - 0.2181338) < 10^-6)
  # test 3:
  checkTrue(abs(estres$est["gcomp"] - 0.2648796) < 10^-6)

  #------------------------------------------------------------------------------------------------------------
  # TEST EQUAL LENGTH INTERVAL FITTING METHOD (bin.method = "equal.len") (MISSPECIFIED Q):
  #------------------------------------------------------------------------------------------------------------
  Qform.mis <- "Y ~ W2 + W3 + net.mean.sA" # # misspecified Q:
  tmlenet_options(maxNperBin = 1000, bin.method = "equal.len", nbins = 10)
  # tmlenet_options(binByMass = FALSE, bin_estimator = glmR6$new())
  # tmlenet_options(poolContinVar = TRUE, bin_estimator = speedglmR6$new()) # to pool by contin outcome:
  print_tmlenet_opts()
  timerun <- system.time(
    estres_eqlen <- run.net.1sim.tmlenet(datO = datO, NetInd_mat = NetInd_mat,
                                    sW = sW, sA = sA, Kmax = Kmax,
                                    Qform = Qform.mis,
                                    newA.gstar = newA.gstar,
                                    # f.gstar = f.gstar,
                                    psi0 = 0)
  )
  timerun
  estres_eqlen
  #      tmle    h_iptw     gcomp
  # 0.2257509 0.2299967 0.2648796
  #        LBCI_0.025 UBCI_0.975
  # tmle    0.2001600  0.2513419
  # h_iptw  0.1961125  0.2638809
  # test 1:
  checkTrue(abs(estres_eqlen$est["tmle"] - 0.2257509) < 10^-6)
  # test 2:
  checkTrue(abs(estres_eqlen$est["h_iptw"] - 0.2299967) < 10^-6)
  # test 3:
  checkTrue(abs(estres_eqlen$est["gcomp"] - 0.2648796) < 10^-6)

  #------------------------------------------------------------------------------------------------------------
  # TEST CORRECT Q:
  #------------------------------------------------------------------------------------------------------------
  tmlenet_options(maxNperBin = 1000, bin.method="equal.mass")
  Qform.corr <- "Y ~ W1 + W2 + W3 + sA + net.mean.sA"
  estres2 <- run.net.1sim.tmlenet(datO = datO, NetInd_mat = NetInd_mat,
                                  sW = sW, sA = sA, Kmax = Kmax,
                                  Qform = Qform.corr,
                                  newA.gstar = newA.gstar,
                                  # f.gstar = f.gstar,
                                  psi0 = 0)
  estres2
  #      tmle    h_iptw     gcomp
  # 0.2150487 0.2181338 0.2140926
  # [1] "CIs: "
  #        LBCI_0.025 UBCI_0.975
  # tmle    0.1925783  0.2375190
  # h_iptw  0.1886270  0.2476407
  # gcomp          NA         NA

  # test 1:
  checkTrue(abs(estres2$est["tmle"] - 0.2150487) < 10^-6)
  # test 2:
  checkTrue(abs(estres2$est["h_iptw"] - 0.2181338) < 10^-6)
  # test 3:
  checkTrue(abs(estres2$est["gcomp"] - 0.2140926) < 10^-6)

  #------------------------------------------------------------------------------------------------------------
  # TEST PARALLEL FITTING METHOD FOR LOGISTIC REGRESSIONS (MISSPECIFIED Q):
  # FAILS ON win build (COMMENTING OUT):
  #------------------------------------------------------------------------------------------------------------
  # require(foreach)
  # require(doParallel)
  # registerDoParallel(cores = 2)
  # # registerDoParallel(cores = 1)
  # tmlenet_options(maxNperBin = 1000, bin.method="equal.mass", parfit = TRUE)
  # # tmlenet_options(binByMass = FALSE, bin_estimator = glmR6$new())
  # # tmlenet_options(poolContinVar = TRUE, bin_estimator = speedglmR6$new()) # to pool by contin outcome:
  # print_tmlenet_opts()
  # estres_par <- run.net.1sim.tmlenet(datO = datO, NetInd_mat = NetInd_mat,
  #                                   sW = sW, sA = sA, Kmax = Kmax,
  #                                   Qform = Qform.mis, f.gstar = f.gstar, psi0 = 0)
  # # test 1:
  # checkTrue(abs(estres_par$est["tmle"] - 0.2159311) < 10^-6)
  # # test 2:
  # checkTrue(abs(estres_par$est["h_iptw"] - 0.2181338) < 10^-6)
  # # test 3:
  # checkTrue(abs(estres_par$est["gcomp"] - 0.2648796) < 10^-6)

}
osofr/tmlenet documentation built on May 24, 2019, 4:58 p.m.