RUnit_tests_03_iidcont_sA_tests.R
In tmlenet: Targeted Maximum Likelihood Estimation for Network Data

# ---------------------------------------------------------------------------------
# TEST SET 2. TESTS FOR FITTING CONTINUOUS EXPOSURE sA IN IID DATA
# ---------------------------------------------------------------------------------
# Fitting continuous exposure by  binning, conditional on covariates
# Overall exposure g0 (sA) is a mixture of 3 normals, 
# individual exposure is normal with mu for each observation being a function of (W1,W2,W3), sd = 1;
# ---------------------------------------------------------------------------------

`%+%` <- function(a, b) paste0(a, b)
as.numeric.factor <- function(x) {as.numeric(levels(x))[x]}
allNA <- function(x) all(is.na(x))

# ---------------------------------------------------------------------------------
# Test 1. Directly fit a joint density for sA, sA2 ~ W, for sA - continuous (with speedglm and glm.fit)
# ---------------------------------------------------------------------------------
test.simple.fit.density.sA <- function() {
  get.density.sAdat <- function(nsamp = 100000) {
    require(simcausal)
    D <- DAG.empty()
    D <-
    D + node("W1", distr = "rbern", prob = 0.5) +
        node("W2", distr = "rbern", prob = 0.3) +
        node("W3", distr = "rbern", prob = 0.3) +
        node("sA.mu", distr = "rconst", const = (0.98 * W1 + 0.58 * W2 + 0.33 * W3)) +
        node("sA", distr = "rnorm", mean = sA.mu, sd = 1)
    D <- set.DAG(D)
    datO <- sim(D, n = nsamp, rndseed = 12345)
  }

  get.setW.sAdat <- function(setWvals, nsamp = 100000) {
    require(simcausal)
    W1val <- setWvals[1]; W2val <- setWvals[2]; W3val <- setWvals[3]
    D <- DAG.empty()
    D <-
    D + node("W1", distr = "rconst", const = .(W1val)) +
        node("W2", distr = "rconst", const = .(W2val)) +
        node("W3", distr = "rconst", const = .(W3val)) +
        node("sA", distr = "rnorm", mean = (0.98 * W1 + 0.58 * W2 + 0.33 * W3), sd = 1)
    D <- set.DAG(D)
    datWset <- sim(D, n = nsamp, rndseed = 12345)
    setWmat <- as.matrix(data.frame(W1 = W1val, W2 = W2val, W3 = W3val, sA = seq(-4, 4, by = 0.2)))
    return(list(setWsA = datWset$sA, setWmat = setWmat))
  }

  def.nodeojb <- function(datO) {
    Kmax <- 1
    nodes <- list(Anode = "sA", Wnodes = c("W1", "W2", "W3"), nFnode = "nF")
    def_sW <- def.sW(W1 = "W1", W2 = "W2", W3 = "W3")
    def_sA <- def.sA(sA = "sA")
    netind_cl <- simcausal::NetIndClass$new(nobs = nrow(datO))
    # Define datNetObs:
    datnetW <- DatNet$new(netind_cl = netind_cl, nodes = nodes)$make.sVar(Odata = datO, sVar.object = def_sW)
    checkTrue(tmlenet:::is.DatNet(datnetW))
    datnetA <- DatNet$new(netind_cl = netind_cl, nodes = nodes)$make.sVar(Odata = datO, sVar.object = def_sA)
    datNetObs <- DatNet.sWsA$new(datnetW = datnetW, datnetA = datnetA)$make.dat.sWsA()
    return(list(datNetObs = datNetObs, netind_cl = netind_cl, def_sA = def_sA, def_sW = def_sW, nodes = nodes))
  }

  nsamp <- 10000
  # nsamp <- 100000
  datO <- get.density.sAdat(nsamp)
  nodeobjs <- def.nodeojb(datO)
  testm.sW <- nodeobjs$def_sW$eval.nodeforms(data.df = datO, netind_cl = nodeobjs$netind_cl)
  testm.sA <- nodeobjs$def_sA$eval.nodeforms(data.df = datO, netind_cl = nodeobjs$netind_cl)

  # Define est_params_list:
  reg.sVars <- list(outvars = c("sA"), predvars = c("W1", "W2", "W3"))
  subset_vars <- lapply(reg.sVars$outvars, function(var) {var})
  sA_class <- nodeobjs$datNetObs$datnetA$type.sVar[reg.sVars$outvars]

  # Put all est_params in RegressionClass (regression with speedglm package)
  print("fitting h_gN based equal.len intervals (default) and speedglm (default): ")
  regclass <- RegressionClass$new(outvar.class = sA_class,
                                  outvar = reg.sVars$outvars,
                                  predvars = reg.sVars$predvars,
                                  subset = subset_vars)
  summeas.g0 <- SummariesModel$new(reg = regclass, DatNet.sWsA.g0 = nodeobjs$datNetObs)
  summeas.g0$fit(data = nodeobjs$datNetObs)

  # Test the coef and summary functions for binoutmodel class:
  out_ContinSummaryModel <- summeas.g0$getPsAsW.models()$`P(sA|sW).1`
  out_BinModels <- out_ContinSummaryModel$getPsAsW.models()
  print(tmlenet:::coef.BinOutModel(out_BinModels[[1]]))
  print(tmlenet:::summary.BinOutModel(out_BinModels[[2]]))
  # print(tmlenet:::summary.BinOutModel(out_BinModels[[3]]))
  # print(tmlenet:::summary.BinOutModel(out_BinModels[[4]]))
  # print(tmlenet:::summary.BinOutModel(out_BinModels[[5]]))

  # (intrvls <- out_ContinSummaryModel$intrvls)
  # (intrvls.width <- diff(intrvls))
  # length(intrvls.width)

  # ord.sVar <- nodeobjs$datNetObs$discretize.sVar(name.sVar = "sA", intervals = out_ContinSummaryModel$intrvls)
  # ord.sVar_bw <- intrvls.width[ord.sVar]
  # print(head(cbind(sA = nodeobjs$datNetObs$dat.sVar[, "sA"], ord.sVar, bw = ord.sVar_bw, nodeobjs$datNetObs$dat.bin.sVar), 5))
  # print("freq count for transformed ord.sVar: "); print(table(ord.sVar))
  # plot(density(ord.sVar))
  # hist(ord.sVar)
  summeas.g0$predict(newdata = nodeobjs$datNetObs)  # summeas.g0$sA_nms
  # Get P(sA|W) for the observed data (W,sA):
  # SHOULD BE SIMILAR TO THE OBSERVED DENSITY OF s.A (but discretized)
  h_gN <- summeas.g0$predictAeqa(newdata = nodeobjs$datNetObs) # *** DatNet.sWsA$O.datnetA IS TO BE RENAMED TO $O.O.datnetA for clarity ***
  
  print("h_gN fit under speedglm: " %+% mean(h_gN)) # [1] 0.2718823
  checkTrue(abs(mean(h_gN)-0.2718823) < 10^-4)
  # ---------------------------------------------------------------------------------------------------------
  # Plot predicted discretized probs conditional on some values of W's
  # ---------------------------------------------------------------------------------------------------------
  # setWvals <- c(W1 = 0, W2 = 0, W3 = 1)
  # subs <- (datO$W1==setWvals["W1"] & datO$W2==setWvals["W2"] & datO$W3==setWvals["W3"])
  # sum(subs)
  # setWdat_res <- get.setW.sAdat(setWvals, nsamp)
  # plot(density(setWdat_res$setWsA))
  # lines(datO[subs,"sA"], h_gN[subs], type = "p", cex = .3, col = "red")
  # plot(datO[subs,"sA"], h_gN[subs], type = "p", cex = .3, col = "red")
  # lines(density(setWdat_res$setWsA))

  # ---------------------------------------------------------------------------------------------------------
  # Plot all predicted discretized probs together (without conditioning on particular subset of W's)
  # ---------------------------------------------------------------------------------------------------------
  # plot(datO[,"sA"], h_gN, type = "p", cex = .3, col = "red")
  # lines(density(datO[,"sA"]))

  # ---------------------------------------------------------------------------------------------------------
  # **** Same fit as before but doing regressions with stats::glm.fit ****
  # ---------------------------------------------------------------------------------------------------------
  print("fitting h_gN based equal.len intervals (default) and glm.fit: ")
  regclass.gml <- RegressionClass$new(useglm = TRUE, outvar.class = sA_class,
                                      outvar = reg.sVars$outvars,
                                      predvars = reg.sVars$predvars,
                                      subset = subset_vars)
  summeas.g0.glm <- SummariesModel$new(reg = regclass.gml, DatNet.sWsA.g0 = nodeobjs$datNetObs)
  summeas.g0.glm$fit(data = nodeobjs$datNetObs)

  # Test the coef and summary functions for binoutmodel class:
  out_ContinSummaryModel <- summeas.g0.glm$getPsAsW.models()$`P(sA|sW).1`
  out_BinModels <- out_ContinSummaryModel$getPsAsW.models()
  print(tmlenet:::coef.BinOutModel(out_BinModels[[1]]))
  print(tmlenet:::summary.BinOutModel(out_BinModels[[2]]))
  # print(tmlenet:::summary.BinOutModel(out_BinModels[[3]]))
  # print(tmlenet:::summary.BinOutModel(out_BinModels[[4]]))
  # print(tmlenet:::summary.BinOutModel(out_BinModels[[5]]))

  summeas.g0.glm$predict(newdata = nodeobjs$datNetObs)
  h_gN.glm <- summeas.g0.glm$predictAeqa(newdata = nodeobjs$datNetObs) # *** DatNet.sWsA$O.datnetA IS TO BE RENAMED TO $O.O.datnetA for clarity ***

  print("h_gN fit under glm: " %+% mean(h_gN.glm)) # [1] 0.2718823
  checkTrue(abs(mean(h_gN.glm)-0.2718823) < 10^-4)


  # ---------------------------------------------------------------------------------------------------------
  # **** Same fit as before but doing binnning by mass intervals & regressions with speedglm ****
  # ---------------------------------------------------------------------------------------------------------
  print("fitting h_gN based on bin_bymass = TRUE and speedglm (default): ")
  regclass.binmass <- RegressionClass$new(useglm = FALSE,
                                          bin_bymass = TRUE,
                                          max_nperbin = 1000,
                                          outvar.class = sA_class,
                                          outvar = reg.sVars$outvars,
                                          predvars = reg.sVars$predvars,
                                          subset = subset_vars)
  summeas.g0 <- SummariesModel$new(reg = regclass.binmass, DatNet.sWsA.g0 = nodeobjs$datNetObs)
  summeas.g0$fit(data = nodeobjs$datNetObs)
  summeas.g0$predict(newdata = nodeobjs$datNetObs)
  h_gN <- summeas.g0$predictAeqa(newdata = nodeobjs$datNetObs) # *** DatNet.sWsA$O.datnetA IS TO BE RENAMED TO $O.O.datnetA for clarity ***
  mean(h_gN) # [1] 0.2668144
  checkTrue(abs(mean(h_gN)-0.2668144) < 10^-4)

  # ---------------------------------------------------------------------------------------------------------
  # **** Same fit as before but binning using "dhist" function & regressions with speedglm ****
  # ---------------------------------------------------------------------------------------------------------
  print("fitting h_gN based on dhist intervals and speedglm (default): ")
  regclass.bidhist <- RegressionClass$new(useglm = FALSE,
                                          bin_bymass = FALSE,
                                          bin_bydhist = TRUE,
                                          max_nperbin = 1000,
                                          outvar.class = sA_class,
                                          outvar = reg.sVars$outvars,
                                          predvars = reg.sVars$predvars,
                                          subset = subset_vars)
  summeas.g0 <- SummariesModel$new(reg = regclass.bidhist, DatNet.sWsA.g0 = nodeobjs$datNetObs)
  summeas.g0$fit(data = nodeobjs$datNetObs)

  out_ContinSummaryModel <- summeas.g0$getPsAsW.models()$`P(sA|sW).1`
  print("Intervals for dhist: ")
  print(out_ContinSummaryModel$intrvls)
  # [1] -1003.5240566    -3.5240566    -1.9562682    -0.8766233    -0.2052710     0.2963970     0.7404754     1.2078705
  # [9]     1.6959939     2.3455552     3.3931981     4.9362651  1004.9362651

  out_BinModels <- out_ContinSummaryModel$getPsAsW.models()
  print(tmlenet:::coef.BinOutModel(out_BinModels[[1]]))
  print(tmlenet:::summary.BinOutModel(out_BinModels[[2]]))
  print(tmlenet:::summary.BinOutModel(out_BinModels[[3]]))
  print(tmlenet:::summary.BinOutModel(out_BinModels[[4]]))
  print(tmlenet:::summary.BinOutModel(out_BinModels[[5]]))

  summeas.g0$predict(newdata = nodeobjs$datNetObs)
  h_gN <- summeas.g0$predictAeqa(newdata = nodeobjs$datNetObs) # *** DatNet.sWsA$O.datnetA IS TO BE RENAMED TO $O.O.datnetA for clarity ***
  print("regclass.bidhist mean(h_gN) under speedglm: " %+% mean(h_gN))
  # [1] 0.276875
  # [1] 0.27687500785635
  checkTrue(abs(mean(h_gN)-0.276875) < 10^-4)
}


# ---------------------------------------------------------------------------------------------------------
# Test 2. Running iid TMLE fit for continous sA
# TMLE for causal effect in i.i.d. data with continuous exposure under continuous stochastic intervention;
# intervention g.star is defined by shifting the normal density of observed sA until g.star/g.0 >= 10, 
# then its truncated to be equal to g.0
# ---------------------------------------------------------------------------------------------------------
# Run one TMLE simulation for iid data sampled from get.iid.densityOdat, estimating psi0 under trunced g.star
# ---------------------------------------------------------------------------------------------------------
get.iid.densityOdat <- function(nsamp = 100000, rndseed = NULL, trunc.const = 10, shift.const = 2) {
  require(simcausal)
  # nsamp = 100000
  # rndseed = 12345
  # trunc.const <- 10
  # shift.const <- 2
  D <- DAG.empty()
  D <-
  D + node("W1", distr = "rbern", prob = 0.5) +
      node("W2", distr = "rbern", prob = 0.3) +
      node("W3", distr = "rbern", prob = 0.3) +
      node("sA.mu", distr = "rconst", const = (0.98 * W1 + 0.58 * W2 + 0.33 * W3)) +
      node("shift",  distr = "rconst", const = .(shift.const)) +
      node("sA", distr = "rnorm", mean = sA.mu, sd = 1) +
      node("r.obs.sA",  distr = "rconst", const = exp(shift * (sA - sA.mu - shift / 2))) +
      node("trunc.c",  distr = "rconst", const = .(trunc.const)) +
      node("untrunc.sA.gstar",  distr = "rconst", const = sA + shift) +
      # node("p.gstar.sA", distr = "rconst", const = (1/sqrt(2*.(pi))) * exp((-1/2) * (untrunc.sA.gstar - sA.mu)^2)) +
      # node("p.gstar.sA.gstar", distr = "rconst", const = (1/sqrt(2*.(pi))) * exp((-1/2) * (untrunc.sA.gstar - (sA.mu + shift))^2)) +
      node("r.new.sA",  distr = "rconst", const = exp(shift * (untrunc.sA.gstar - sA.mu - shift / 2))) +
      node("trunc.sA.gstar",  distr = "rconst", const = ifelse(r.new.sA > trunc.c, sA, untrunc.sA.gstar)) +
      node("probY", distr = "rconst", const = plogis(-0.45 * sA - 0.5 * W1 - 0.58 * W2 - 0.33 * W3)) +
      node("Y", distr = "rbern", prob = plogis(-0.45 * sA - 0.5 * W1 - 0.58 * W2 - 0.33 * W3)) +
      node("probY.gstar", distr = "rconst", const = plogis(-0.45 * trunc.sA.gstar - 0.5 * W1 - 0.58 * W2 - 0.33 * W3)) +
      node("Y.gstar", distr = "rbern", prob = plogis(-0.45 * trunc.sA.gstar - 0.5 * W1 - 0.58 * W2 - 0.33 * W3))

  D <- set.DAG(D)
  datO <- sim(D, n = nsamp, rndseed = rndseed)

  head(datO, 100)
  print("mean(datO$Y): " %+% mean(datO$Y));
  # [1] 0.31625
  psi0 <- mean(datO$Y.gstar)
  print("psi0: " %+% psi0)
  # [1] 0.22378

  return(list(psi0 = psi0, datO = datO))
}

run.1sim.tmlenet <- function(nsamp, psi0, Qform, f.gstar, trunc.const = 10, shift.const = 2, n_MCsims = 10) {
  datO <- get.iid.densityOdat(nsamp = nsamp, rndseed = NULL, trunc.const = trunc.const, shift.const = shift.const)$datO
  datO <- datO[,c("W1", "W2", "W3", "sA", "Y")]
  Kmax <- 1
  def_sW <- def.sW(W1 = "W1", W2 = "W2", W3 = "W3")
  def_sA <- def.sA(sA = "sA")
  tmlenet_res <- tmlenet(data = datO, Anode = "sA", Ynode = "Y",
                          Kmax = Kmax,
                          # nFnode = NULL,
                          f_gstar1 = f.gstar,
                          sW = def_sW, sA = def_sA,
                          Qform = Qform,
                          hform.g0 = "sA ~ W1 + W2 + W3",
                          hform.gstar = "sA ~ W1 + W2 + W3",
                          optPars = list(n_MCsims = n_MCsims))
                          # correct Q:
                          # Qform = "Y ~ W1 + W2 + W3 + sA",
                          # misspecified Q:
                          # Qform = "Y ~ W3 + sA",
  CIs <- tmlenet_res$EY_gstar1$CIs
  (tmle_B.CI <- CIs[rownames(CIs)%in%"tmle_B",])
  (h_iptw.CI <- CIs[rownames(CIs)%in%"h_iptw",])
  cover.tmle_B <- ((psi0 <= tmle_B.CI[2]) && (psi0 >= tmle_B.CI[1]))
  cover.h_iptw <- ((psi0 <= h_iptw.CI[2]) && (psi0 >= h_iptw.CI[1]))

  est_mat <- tmlenet_res$EY_gstar1$estimates
  est <- as.vector(est_mat)
  names(est) <- rownames(est_mat)
  cover <- apply(CIs, 1, function(row) ((psi0 <= row[2]) && (psi0 >= row[1])))
  cover2 <- c(tmle_B = cover.tmle_B, h_iptw = cover.h_iptw)
  # print("ests"); print(est)
  # print("CIs"); print(CIs)
  # print("cover"); print(cover)
  # print("cover2"); print(cover2)
  return(list(est = est, cover = cover))
}

# Function that returns a stochastic intervention function intervening on sA, for given shift
create_f.gstar <- function(shift, trunc.const) {
  shift.const <- shift
  trunc.const <- trunc.const
  f.gstar <- function(data, ...) {
    print("shift.const: " %+% shift.const)
    sA.mu <- 0.98 * data[,"W1"] + 0.58 * data[,"W2"] + 0.33 * data[,"W3"]
    untrunc.sA <- rnorm(n = nrow(data), mean = sA.mu + shift.const, sd = 1)
    r.new.sA <- exp(shift.const * (untrunc.sA - sA.mu - shift.const / 2))
    trunc.sA <- ifelse(r.new.sA > trunc.const, untrunc.sA - shift.const, untrunc.sA)
    return(trunc.sA)
  }
  return(f.gstar)
}

# NOTE:ADD THIS TO AN EXAMPLE OF STOCHASTIC INTERVENTION:
test.onesim.iid.tmlefit <- function() {
  # ---------------------------------------------------------------------------------------------------------
  trunc.const <- 10
  shift.const <- 1
  nsamp <- 10000
  # nsamp <- 2000
  # ---------------------------------------------------------------------------------------------------------
  # # get true psi.0:
  # datO <- get.iid.densityOdat(nsamp = 100000, rndseed = 12345, trunc.const = trunc.const, shift.const = shift.const)
  # psi0 <- datO$psi0
  # print("psi0: " %+% psi0)
  # [1] "psi0: 0.239584" (shift=1)
  # [1] "psi0: 0.2241085" (shift=2)

  # ---------------------------------------------------------------------------------------------------------
  # Misspecified Q:
  # ---------------------------------------------------------------------------------------------------------
  set.seed(33556)
  Qform.mis <- "Y ~ W3 + sA" # misspecified Q:
  f.gstar <- create_f.gstar(shift = shift.const, trunc.const = trunc.const)
  res <- run.1sim.tmlenet(nsamp = nsamp, psi0 = 0, Qform = Qform.mis,
                          f.gstar = f.gstar,  trunc.const = trunc.const, shift.const = shift.const,
                          n_MCsims = 10)
  res$est
  #      tmle    h_iptw     gcomp
  # 0.2360305 0.2441800 0.2131320
  # [1] "new MC.ests mat: "
  #         estimate
  # TMLE   0.2360305
  # h_IPTW 0.2441800
  # MLE    0.2131320
  # test 1:
  checkTrue(abs(res$est["tmle"]-0.2360305) < 10^-4)
  # test 2:
  checkTrue(abs(res$est["h_iptw"]-0.2441800) < 10^-4)
  # test 3:
  checkTrue(abs(res$est["gcomp"]-0.2131320) < 10^-4)

  # ---------------------------------------------------------------------------------------------------------
  # Correct Q:
  # ---------------------------------------------------------------------------------------------------------
  set.seed(23466)
  Qform.corr <- "Y ~ W1 + W2 + W3 + sA" # correct Q:
  res2 <- run.1sim.tmlenet(nsamp = nsamp, psi0 = 0, Qform = Qform.corr,
                          f.gstar = f.gstar,  trunc.const = 10, shift.const = shift.const,
                          n_MCsims = 10)
  res2$est
  # [1] "new MC.ests mat: "
  #         estimate
  # TMLE   0.2391396
  # h_IPTW 0.2443753
  # MLE    0.2428439
  #      tmle    h_iptw     gcomp
  # 0.2391396 0.2443753 0.2428439
  # test 1:
  checkTrue(abs(res2$est["tmle"]-0.2391396) < 10^-4)
  # test 2:
  checkTrue(abs(res2$est["h_iptw"]-0.2443753) < 10^-4)
  # test 3:
  checkTrue(abs(res2$est["gcomp"]-0.2428439) < 10^-4)



  # ---------------------------------------------------------------------------------------------------------
  # Correct Q w/ glm.fit:
  # ---------------------------------------------------------------------------------------------------------
  old_opts <- tmlenet_options(useglm = FALSE)
  print_tmlenet_opts()
  set.seed(23466)
  Qform.corr <- "Y ~ W1 + W2 + W3 + sA" # correct Q:
  res3 <- run.1sim.tmlenet(nsamp = nsamp, psi0 = 0, Qform = Qform.corr,
                          f.gstar = f.gstar,  trunc.const = 10, shift.const = shift.const,
                          n_MCsims = 10)
  res3$est
  # [1] "new MC.ests mat: "
  #         estimate
  # TMLE   0.2391396
  # h_IPTW 0.2443753
  # MLE    0.2428439
  #      tmle    h_iptw     gcomp
  # 0.2391396 0.2443753 0.2428439
  # test 1:
  checkTrue(abs(res2$est["tmle"]-0.2391396) < 10^-4)
  # test 2:
  checkTrue(abs(res2$est["h_iptw"]-0.2443753) < 10^-4)
  # test 3:
  checkTrue(abs(res2$est["gcomp"]-0.2428439) < 10^-4)





}
Any scripts or data that you put into this service are public.
tmlenet documentation built on May 29, 2017, 2:22 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
tmlenet
Targeted Maximum Likelihood Estimation for Network Data

tests/RUnit/RUnit_tests_03_iidcont_sA_tests.R
In tmlenet: Targeted Maximum Likelihood Estimation for Network Data

Try the tmlenet package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

tmlenet Targeted Maximum Likelihood Estimation for Network Data

tests/RUnit/RUnit_tests_03_iidcont_sA_tests.R In tmlenet: Targeted Maximum Likelihood Estimation for Network Data

Try the tmlenet package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

tmlenet
Targeted Maximum Likelihood Estimation for Network Data

tests/RUnit/RUnit_tests_03_iidcont_sA_tests.R
In tmlenet: Targeted Maximum Likelihood Estimation for Network Data