library(tidyverse)
library(parallel)
library(didgformula)
library(kableExtra)
library(glue)

cores = detectCores()/2


set.seed(5)
nsims=5
Tt=5
Beta = generate_parameters(Tt, mu_Beta_A = c(.2,.2,.2,.2,-.4))
truth = estimate_truth(df_po = generate_data(1e7, Tt, Beta, TRUE), Tt, type = 'levels')
gc()
#>           used  (Mb) gc trigger   (Mb)  max used   (Mb)
#> Ncells 2424328 129.5    4536298  242.3   3412841  182.3
#> Vcells 4989007  38.1  530150331 4044.8 549990717 4196.1


onesim = function(n) {
  df = generate_data(n, Tt, Beta) 


  #num_mods = fit_treatment_models(df, rhs_formula = '~1', Tt=Tt, freq_w=1)
  den_mods_true = fit_treatment_models(df, rhs_formula = '~W1{t}+W2{t}+I(W2{t}^2)', Tt=Tt, freq_w =1)
  den_mods_false = fit_treatment_models(df, rhs_formula = '~W1{t}+W2{t}', Tt=Tt, freq_w =1)
  weights_true = cbind(1, calc_weights(denominator = pred_treatment_models(df, den_mods_true),
                                       numerator   = 1))
  weights_false = cbind(1, calc_weights(denominator = pred_treatment_models(df, den_mods_false),
                                        numerator   = 1))


  results = list(
    iptw_true = iptw_pipeline(df, Tt, '~W1{t}+W2{t}+I(W2{t}^2)', '~1',models = FALSE),
    iptw_gfal = iptw_pipeline(df, Tt, '~W1{t}+W2{t}', '~1',models = FALSE),
    ice_true = ice_pipeline(df, '~W1{t}+W2{t}+I(W2{t}^2)', '~W1{t-1}+W2{t-1}+I(W2{t-1}^2)','~W1{k}+W2{k}+I(W2{k}^2)',Tt,models = FALSE),
    ice_qfal = ice_pipeline(df, '~W1{t}+W2{t}', '~W1{t-1}+W2{t-1}','~W1{k}+W2{k}',Tt,models = FALSE),
    tmle_true = ice_pipeline(df, '~W1{t}+W2{t}+I(W2{t}^2)', '~W1{t-1}+W2{t-1}+I(W2{t-1}^2)','~W1{k}+W2{k}+I(W2{k}^2)',Tt,models = FALSE, tmle=TRUE, weights=weights_true),
    tmle_gfal = ice_pipeline(df, '~W1{t}+W2{t}+I(W2{t}^2)', '~W1{t-1}+W2{t-1}+I(W2{t-1}^2)','~W1{k}+W2{k}+I(W2{k}^2)',Tt,models = FALSE, tmle=TRUE, weights=weights_false),
    tmle_qfal = ice_pipeline(df, '~W1{t}+W2{t}          ',  '~W1{t-1}+W2{t-1}            ', '~W1{k}+W2{k}          ',Tt,models = FALSE, tmle=TRUE, weights=weights_true),
    tmle_bfal = ice_pipeline(df, '~W1{t}+W2{t}          ',  '~W1{t-1}+W2{t-1}            ', '~W1{k}+W2{k}          ',Tt,models = FALSE, tmle=TRUE, weights=weights_false)
  )


  #return estimate and bias for mean outcome at time 5
  y0 = mean(df$Y0)
  results %>%
    map(mutate, estimate = cumsum(estimate) + y0) %>%
    bind_rows(.id='estimator') %>%
    left_join(truth %>% rename(truth=estimate), by='t') %>%
    mutate(bias = estimate - truth) %>%
    filter(t==5) %>%
    select(estimator, estimate, bias)
}

fmt = function(x, dig =3, clip=TRUE, format='g') {
  out = formatC(x, digits=dig, format=format, flag='#')
  clipped_out = paste0(c('<0.', rep(0, dig-1), '1'), collapse = '')
  if(clip) {
    out = ifelse(x < 10^(-dig), clipped_out, out)
  }
  return (out)
}
sims1e3 = mclapply(1:nsims, function(x) onesim(1e2), mc.cores = cores, mc.set.seed = TRUE)
gc()
#>           used  (Mb) gc trigger   (Mb)  max used   (Mb)
#> Ncells 2424458 129.5    4536298  242.3   3412841  182.3
#> Vcells 4990716  38.1  424120265 3235.8 549990717 4196.1
sims1e4 = mclapply(1:nsims, function(x) onesim(1e3), mc.cores = cores, mc.set.seed = TRUE)
gc()
#>           used  (Mb) gc trigger   (Mb)  max used   (Mb)
#> Ncells 2424804 129.5    4536298  242.3   3412841  182.3
#> Vcells 4991628  38.1  339296212 2588.7 549990717 4196.1
sims1e5 = mclapply(1:nsims, function(x) onesim(1e4), mc.cores = cores, mc.set.seed = TRUE)
gc()
#>           used  (Mb) gc trigger   (Mb)  max used   (Mb)
#> Ncells 2424806 129.5    4536298  242.3   3412841  182.3
#> Vcells 4991697  38.1  271436970 2070.9 549990717 4196.1
#reshape
result = list('1e3'=sims1e3,'1e4'=sims1e4, '1e5'=sims1e5) %>%
  map(bind_rows, .id='rep') %>%
  bind_rows(.id='n') 

result_table = result %>%
  group_by(n, estimator) %>%
  summarise(variance = var(estimate), bias =mean(bias),
            p = nortest::lillie.test(estimate)$p.value) %>%
  mutate(p = fmt(p, dig=2),
         variance = fmt(variance, dig=2, clip=FALSE),
         bias = fmt(bias, dig=4, clip=FALSE, format='f')) %>%
  ungroup() %>%
  split(.$n) %>%
  {cbind(estimator = .[[1]]$estimator,  Reduce(cbind, map(., select, -n:-estimator))) }


result_table %>%
  as.matrix() %>%
  kable() %>%
  kable_styling() %>%
  add_header_above(c(" " = 1, "n=1,000" = 3, "n=10,000" =3, "n=100,000" = 3)) %>%
  footnote('p=p-value from Kolmogorov-Smirnoff test for normality.
Abbreviations: ice=iterated conditional expectation, iptw=inverse probability of treatment weighted, tmle=targeted
maximum likelihood, qfal=outcome models misspecified, gfal=treatment models misspecified, bfal=both sets of models
misspecified, true=all models correctly specified.
8
')
n=1,000
n=10,000
n=100,000
estimator variance bias p variance bias p variance bias p
ice_qfal 0.0043 0.0115 0.68 0.00046 0.0110 0.81 4.5e-05 0.0116 0.76
ice_true 0.0042 -0.0004 0.82 0.00045 -0.0002 0.23 4.2e-05 0.0002 0.45
iptw_gfal 0.0045 0.0117 0.86 0.00047 0.0117 0.49 4.6e-05 0.0125 0.69
iptw_true 0.0060 -0.0017 0.37 0.00063 -0.0008 0.87 7.4e-05 -0.0001 0.57
tmle_bfal 0.0044 0.0119 0.51 0.00046 0.0115 0.65 4.5e-05 0.0122 0.62
tmle_gfal 0.0042 -0.0008 0.86 0.00045 -0.0004 0.28 4.3e-05 0.0002 0.21
tmle_qfal 0.0059 -0.0009 0.15 0.00064 -0.0007 0.78 7.6e-05 -0.0002 0.41
tmle_true 0.0054 -0.0003 0.82 0.00056 0.0001 0.11 5.7e-05 0.0001 0.69
Note:
p=p-value from Kolmogorov-Smirnoff test for normality.
Abbreviations: ice=iterated conditional expectation, iptw=inverse probability of treatment weighted, tmle=targeted
maximum likelihood, qfal=outcome models misspecified, gfal=treatment models misspecified, bfal=both sets of models
misspecified, true=all models correctly specified.


audreyrenson/didgformula documentation built on Oct. 9, 2022, 11:45 a.m.