RUnit_tests_04_netcont_sA_tests.R
In tmlenet: Targeted Maximum Likelihood Estimation for Network Data

# ---------------------------------------------------------------------------------
# TEST SET 4
# ---------------------------------------------------------------------------------
# network TMLE fit with continous sA 
# Run one TMLE simulation for network data generated igraph::sample_k_regular (all nodes have the same degree k)
# estimate psi0 under trunced g.star (same as in iid)
# sA is the same as in iid example w sAnet=(sA, rowMeans(sA[[1:Kmax]])).
# ---------------------------------------------------------------------------------------------------------
# TMLE for causal effect in network data with continuous exposure under continuous stochastic intervention;
# intervention g.star is defined by shifting the normal density of observed sA until g.star/g.0 >= 10,
# then its truncated to be equal to g.0
# ---------------------------------------------------------------------------------------------------------

`%+%` <- function(a, b) paste0(a, b)
run.net.1sim.tmlenet <- function(datO, NetInd_mat, def_sW, def_sA, Kmax, Qform, f.gstar, psi0) {
  datO_input <- datO[,c("W1", "W2", "W3", "sA", "Y")]
  res <- tmlenet(data = datO_input, Anode = "sA", Ynode = "Y",
                  Kmax = Kmax,
                  NETIDmat = NetInd_mat,
                  f_gstar1 = f.gstar,
                  sW = def_sW, sA = def_sA,
                  Qform = Qform,
                  hform.g0 = "sA + net.mean.sA ~ W1 + W2 + W3",
                  hform.gstar = "sA + net.mean.sA ~ W1 + W2 + W3",
                  optPars = list(n_MCsims = 1))
                  # correct Q:
                  # Qform = "Y ~ W1 + W2 + W3 + sA + net.mean.sA",
                  # misspecified Q:
                  # Qform = "Y ~ W2 + W3 + net.mean.sA",

  CIs <- res$EY_gstar1$CIs
  print("CIs: "); print(CIs)
  (tmle_B.CI <- CIs[rownames(CIs)%in%"tmle_B",])
  (h_iptw.CI <- CIs[rownames(CIs)%in%"h_iptw",])
  cover.tmle_B <- ((psi0 <= tmle_B.CI[2]) && (psi0 >= tmle_B.CI[1]))
  cover.h_iptw <- ((psi0 <= h_iptw.CI[2]) && (psi0 >= h_iptw.CI[1]))

  est_mat <- res$EY_gstar1$estimates
  est <- as.vector(est_mat)
  names(est) <- rownames(est_mat)
  cover <- apply(CIs, 1, function(row) ((psi0 <= row[2]) && (psi0 >= row[1])))
  cover2 <- c(tmle_B = cover.tmle_B, h_iptw = cover.h_iptw)

  return(list(est = est, cover = cover))
}

#------------------------------------------------------------------------------------------------------------
# The user-defined network sampler(s) from igraph (regular graph model)
# Generate regular random graphs with same degree for each node
# Kmax - degree of each node
make_netDAG <- function(Kmax, trunc.const, shift.const) {
  generate.igraph.k.regular <- function(n, Kmax, ...) {
    if (n < 20) Kmax <- 5
    igraph.reg <- igraph::sample_k_regular(no.of.nodes = n, k = Kmax, directed = TRUE, multiple = FALSE)
    # From igraph object to sparse adj. matrix:
    sparse_AdjMat <- simcausal::igraph.to.sparseAdjMat(igraph.reg)
    # From igraph object to simcausal/tmlenet input (NetInd_k, nF, Kmax):
    NetInd_out <- simcausal::sparseAdjMat.to.NetInd(sparse_AdjMat)
    print("old Kmax:" %+% Kmax)
    print("new Kmax:" %+% NetInd_out$Kmax)
    print("NetInd_k"); print(head(NetInd_out$NetInd_k))
    if (Kmax < NetInd_out$Kmax) message("new network has larger Kmax value than requested, new Kmax = " %+% NetInd_out$Kmax)
    return(NetInd_out$NetInd_k)
  }
  # graph <- igraph::sample_k_regular(no.of.nodes = 50, k = 10, directed = TRUE, multiple = FALSE)
  # par(mar=c(.1,.1,.1,.1))
  # igraph::plot.igraph(graph,
  #     layout=igraph::layout.fruchterman.reingold,
  #     vertex.size=7,
  #     vertex.label.cex=.5,
  #     edge.arrow.size=.5)

  D <- DAG.empty()
  # Adding the ER model network generator from igraph:
  D <- D + network("NetInd_k", Kmax = Kmax, netfun = "generate.igraph.k.regular")
  D <- D +
      node("W1", distr = "rbern", prob = 0.5) +
      node("W2", distr = "rbern", prob = 0.3) +
      node("W3", distr = "rbern", prob = 0.3) +
      node("sA.mu", distr = "rconst", const = (0.98 * W1 + 0.58 * W2 + 0.33 * W3)) +
      node("sA", distr = "rnorm", mean = sA.mu, sd = 1) +
      node("net.mean.sA", distr = "rconst", const = mean(sA[[1:Kmax]])) +
      node("untrunc.sA.gstar",  distr = "rconst", const = sA + shift.const) +
      node("r.new.sA",  distr = "rconst", const =
            exp(shift.const * (untrunc.sA.gstar - sA.mu - shift.const / 2))) +
      node("tr.sA.gstar",  distr = "rconst", const = ifelse(r.new.sA > trunc.const, sA, untrunc.sA.gstar)) +
      node("probY", distr = "rconst", const =
            plogis(-0.35 * sA - 0.20 * mean(sA[[1:Kmax]]) - 0.5 * W1 - 0.58 * W2 - 0.33 * W3)) +
      node("Y", distr = "rbern", prob = probY) +
      node("probY.gstar", distr = "rconst", const =
            plogis(-0.35 * tr.sA.gstar - 0.20 * mean(tr.sA.gstar[[1:Kmax]]) - 0.5 * W1 - 0.58 * W2 - 0.33 * W3)) +
      node("Y.gstar", distr = "rbern", prob = probY.gstar)
  Dset <- set.DAG(D)
  return(Dset)
}

# Function that returns a stochastic intervention function intervening on sA, for given shift.const
create_f.gstar <- function(shift, trunc.const) {
  shift.const <- shift
  trunc.const <- trunc.const
  f.gstar <- function(data, ...) {
    sA.mu <- 0.98 * data[,"W1"] + 0.58 * data[,"W2"] + 0.33 * data[,"W3"]
    sA <- data[,"sA"]
    untrunc.sA.gstar <- sA + shift.const
    # ratio of P_g^*(sA=sa|W)/P_g0(sA=sa|W) for sa=sA generated under g^*:
    r.new.sA <- exp(shift.const * (untrunc.sA.gstar - sA.mu - shift.const / 2))
    trunc.sA.gstar <- ifelse(r.new.sA > trunc.const, sA, untrunc.sA.gstar)
    return(trunc.sA.gstar)
  }
  return(f.gstar)
}

test.onesim.net.tmlefit <- function() {
  require(simcausal)
  #------------------------------------------------------------------------------------------------------------
  Kmax <- 5
  trunc.const <- 4
  shift.const <- 1
  Dset <- make_netDAG(Kmax, trunc.const, shift.const)
  #------------------------------------------------------------------------------------------------------------
  # # plotDAG(Dset)
  # rndseed2 <- 54321
  # nsamp <- 50000
  # datFull <- sim(Dset, n = nsamp, rndseed = rndseed2)
  # print(head(datFull, 50))
  # # netind_cl <- attributes(datFull)$netind_cl # to get the network object from sim data:
  # # NetInd_mat <- attributes(datFull)$netind_cl$NetInd # to get the network matrix from sim data:
  # print("mean(datFull$Y): " %+% mean(datFull$Y));
  # # [1] "mean(datFull$Y): 0.301425"
  # psi0 <- mean(datFull$Y.gstar)
  # print("psi0: " %+% psi0)
  # # [1] "psi0: 0.22062" for 50K sample (trunc.const = 4, shift = 1, Kmax = 5)
  # # [1] "psi0: 0.19184" for 50K sample (trunc.const = 7, shift = 1, Kmax = 10)

  f.gstar <- create_f.gstar(shift = shift.const, trunc.const = trunc.const)
  # DEFINE SUMMARY MEASURES:
  def_sW <- def.sW(W1 = "W1", W2 = "W2", W3 = "W3")
  def_sA <- def.sA(sA = "sA", net.mean.sA = rowMeans(sA[[1:Kmax]]), replaceNAw0 = TRUE)

  seed <- 12345
  datO <- sim(Dset, n = 10000, rndseed = seed)
  mean(datO$Y.gstar) # [1] 0.2136
  checkTrue(abs(mean(datO$Y.gstar) - 0.2136) < 10^-6)

  netind_cl <- attributes(datO)$netind_cl
  NetInd_mat <- attributes(datO)$netind_cl$NetInd
  dim(NetInd_mat)

  getOption("tmlenet.verbose")
  options(tmlenet.verbose = FALSE)
  # tmlenet_options(binByMass = FALSE, useglm = TRUE)
  # tmlenet_options(poolContinVar = TRUE, useglm = FALSE) # to pool by contin outcome:
  print_tmlenet_opts()

  #------------------------------------------------------------------------------------------------------------
  # TEST MISSPECIFIED Q:
  #------------------------------------------------------------------------------------------------------------
  tmlenet_options(maxNperBin = 1000, bin.method="equal.mass")
  print_tmlenet_opts()
  Qform.mis <- "Y ~ W2 + W3 + net.mean.sA" # # misspecified Q:
  timerun <- system.time(
    estres <- run.net.1sim.tmlenet(datO = datO, NetInd_mat = NetInd_mat,
                                    def_sW = def_sW, def_sA = def_sA, Kmax = Kmax,
                                    Qform = Qform.mis, f.gstar = f.gstar, psi0 = 0)
  )
  timerun
  #  user  system elapsed
  # 1.659   0.121   1.791
  estres
  #      tmle    h_iptw     gcomp
  # 0.2159311 0.2181338 0.2648796   # [1] "CIs: "
  #        LBCI_0.025 UBCI_0.975
  # tmle    0.1930713  0.2387909
  # h_iptw  0.1886270  0.2476407
  # gcomp          NA         NA

  # test 1:
  checkTrue(abs(estres$est["tmle"] - 0.2159311) < 10^-6)
  # test 2:
  checkTrue(abs(estres$est["h_iptw"] - 0.2181338) < 10^-6)
  # test 3:
  checkTrue(abs(estres$est["gcomp"] - 0.2648796) < 10^-6)

  #------------------------------------------------------------------------------------------------------------
  # TEST EQUAL LENGTH INTERVAL FITTING METHOD (bin.method = "equal.len") (MISSPECIFIED Q):
  #------------------------------------------------------------------------------------------------------------
  Qform.mis <- "Y ~ W2 + W3 + net.mean.sA" # # misspecified Q:
  tmlenet_options(maxNperBin = 1000, bin.method = "equal.len", nbins = 10)
  # tmlenet_options(binByMass = FALSE, useglm = TRUE)
  # tmlenet_options(poolContinVar = TRUE, useglm = FALSE) # to pool by contin outcome:
  print_tmlenet_opts()
  timerun <- system.time(
    estres_eqlen <- run.net.1sim.tmlenet(datO = datO, NetInd_mat = NetInd_mat,
                                    def_sW = def_sW, def_sA = def_sA, Kmax = Kmax,
                                    Qform = Qform.mis, f.gstar = f.gstar, psi0 = 0)
  )
  timerun
  estres_eqlen
  #      tmle    h_iptw     gcomp
  # 0.2257509 0.2299967 0.2648796
  #        LBCI_0.025 UBCI_0.975
  # tmle    0.2001600  0.2513419
  # h_iptw  0.1961125  0.2638809
  # test 1:
  checkTrue(abs(estres_eqlen$est["tmle"] - 0.2257509) < 10^-6)
  # test 2:
  checkTrue(abs(estres_eqlen$est["h_iptw"] - 0.2299967) < 10^-6)
  # test 3:
  checkTrue(abs(estres_eqlen$est["gcomp"] - 0.2648796) < 10^-6)

  #------------------------------------------------------------------------------------------------------------
  # TEST CORRECT Q:
  #------------------------------------------------------------------------------------------------------------
  tmlenet_options(maxNperBin = 1000, bin.method="equal.mass")
  Qform.corr <- "Y ~ W1 + W2 + W3 + sA + net.mean.sA"
  estres2 <- run.net.1sim.tmlenet(datO = datO, NetInd_mat = NetInd_mat,
                                  def_sW = def_sW, def_sA = def_sA, Kmax = Kmax,
                                  Qform = Qform.corr, f.gstar = f.gstar, psi0 = 0)
  estres2
  #      tmle    h_iptw     gcomp
  # 0.2150487 0.2181338 0.2140926
  # [1] "CIs: "
  #        LBCI_0.025 UBCI_0.975
  # tmle    0.1925783  0.2375190
  # h_iptw  0.1886270  0.2476407
  # gcomp          NA         NA

  # test 1:
  checkTrue(abs(estres2$est["tmle"] - 0.2150487) < 10^-6)
  # test 2:
  checkTrue(abs(estres2$est["h_iptw"] - 0.2181338) < 10^-6)
  # test 3:
  checkTrue(abs(estres2$est["gcomp"] - 0.2140926) < 10^-6)

  #------------------------------------------------------------------------------------------------------------
  # TEST PARALLEL FITTING METHOD FOR LOGISTIC REGRESSIONS (MISSPECIFIED Q):
  # FAILS ON win build (COMMENTING OUT):
  #------------------------------------------------------------------------------------------------------------
  # require(foreach)
  # require(doParallel)
  # registerDoParallel(cores = 2)
  # # registerDoParallel(cores = 1)
  # tmlenet_options(maxNperBin = 1000, bin.method="equal.mass", parfit = TRUE)
  # # tmlenet_options(binByMass = FALSE, useglm = TRUE)
  # # tmlenet_options(poolContinVar = TRUE, useglm = FALSE) # to pool by contin outcome:
  # print_tmlenet_opts()
  # estres_par <- run.net.1sim.tmlenet(datO = datO, NetInd_mat = NetInd_mat,
  #                                   def_sW = def_sW, def_sA = def_sA, Kmax = Kmax,
  #                                   Qform = Qform.mis, f.gstar = f.gstar, psi0 = 0)
  # # test 1:
  # checkTrue(abs(estres_par$est["tmle"] - 0.2159311) < 10^-6)
  # # test 2:
  # checkTrue(abs(estres_par$est["h_iptw"] - 0.2181338) < 10^-6)
  # # test 3:
  # checkTrue(abs(estres_par$est["gcomp"] - 0.2648796) < 10^-6)












}
Any scripts or data that you put into this service are public.
tmlenet documentation built on May 29, 2017, 2:22 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
tmlenet
Targeted Maximum Likelihood Estimation for Network Data

tests/RUnit/RUnit_tests_04_netcont_sA_tests.R
In tmlenet: Targeted Maximum Likelihood Estimation for Network Data

Try the tmlenet package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

tmlenet Targeted Maximum Likelihood Estimation for Network Data

tests/RUnit/RUnit_tests_04_netcont_sA_tests.R In tmlenet: Targeted Maximum Likelihood Estimation for Network Data

Try the tmlenet package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

tmlenet
Targeted Maximum Likelihood Estimation for Network Data

tests/RUnit/RUnit_tests_04_netcont_sA_tests.R
In tmlenet: Targeted Maximum Likelihood Estimation for Network Data