# TB incident contact investigation costing
# N Green
#
# bootstrap raw incidence data then aggregate
# to get 95% CI estimates using quantiles
# and then sample from normal distributions
# with these parameter values to get counts of
# incidents, identified, screened and latent positive
library(reshape2)
library(dplyr)
library(rsample)
library(purrr)
library(tidyr)
library(broom)
devtools::load_all(".")
source("scripts/model_data.R")
dat <- read.csv("data/cleaned_data.csv", check.names = FALSE)
########
# prep #
########
# total number of individuals within each year and setting
# and proportions
# i.e. sum all same settings
total_year_setting <-
dat %>%
sum_by_group(year, setting)
# # mean number of individuals within each setting
# mean_setting <-
# dat %>%
# mean_by_group(setting)
# check against bootstrap estimates
# across all years
mean_setting <-
mean_by_setting(total_year_setting)
#############
# bootstrap #
#############
# see https://cran.r-project.org/web/packages/rsample/vignettes/Basics.html
boots <- rsample::bootstraps(dat, times = 100)
#####################################################
# bootstrap statistic
# over each bootstrap sample
# as.data.frame() transforms from bootstrap object
boot_tabs <-
boots %>%
mutate(
# mean totals across years for each setting
total =
map(splits,
function(x)
mean_by_setting(
sum_by_group(dat = as.data.frame(x),
year, setting))),
# total individuals within each year and setting
per_inc =
map(splits,
function(x)
sum_by_group(dat = as.data.frame(x),
year, setting)))
# - mean of year totals for each setting
# - mean(#identified)/mean(#incidents)
# stack all bootstrap dataframes into single long format
# stacked_boot <-
# boot_tabs %>%
# select(-splits, -per_inc) %>%
# # Turn it into a tibble by stacking
# unnest(cols = c(total)) %>%
# mutate(id_per_inc = round(identified/incidents, 2),
# screen_per_inc = round(screen/incidents, 2),
# latent_per_inc = round(latent/incidents, 2))
###################################################
# bootstrapped costs
# - year totals for each setting
# - mean(#identified/#incidents)
stacked_per_inc <-
boot_tabs %>%
select(-splits, -total) %>%
unnest(cols = c(per_inc)) %>%
group_by(id, setting) %>%
summarise_at(vars(identified:latent_per_inc),
function(x) round(median(x, na.rm = TRUE), 2)) %>%
mutate(screen_per_inc2 = round(id_per_inc*p_screen, 2),
latent_per_inc2 = round(screen_per_inc2*p_ltbi, 2),
total_cost = vtotal_year_cost(incidents,
id_per_inc,
screen_per_inc2,
latent_per_inc2),
cost_per_id = total_cost/identified,
cost_per_screen = total_cost/screen,
cost_per_ltbi = total_cost/latent)
# view bootstrap samples
# par(mfrow = c(3,3))
#
# for (i in 2010:2018){
# hist(stacked_boot[stacked_boot$year == i &
# stacked_boot$setting == "education", "screen"] %>%
# unlist(),
# breaks = 40,
# main = i)
# }
# calculate mean and upper, lower bounds of 95% CI
# tab_total <-
# stacked_boot %>%
# # group_by(year, setting) %>% # for totals
# group_by(setting) %>% # for means
# summarise(
# mu_id = round(mean(identified, na.rm = TRUE), 0),
# lCI_id = round(quantile(identified, probs = 0.25, na.rm = TRUE), 0),
# uCI_id = round(quantile(identified, probs = 0.975, na.rm = TRUE), 0),
# mu_s = round(mean(screen, na.rm = TRUE), 0),
# lCI_s = round(quantile(screen, probs = 0.25, na.rm = TRUE), 0),
# uCI_s = round(quantile(screen, probs = 0.975, na.rm = TRUE), 0),
# mu_ltbi = round(mean(latent, na.rm = TRUE), 0),
# lCI_ltbi = round(quantile(latent, probs = 0.25, na.rm = TRUE), 0),
# uCI_ltbi = round(quantile(latent, probs = 0.975, na.rm = TRUE), 0),
# mu_inc = round(mean(incidents, na.rm = TRUE), 0),
# lCI_inc = round(quantile(incidents, probs = 0.25, na.rm = TRUE), 0),
# uCI_inc = round(quantile(incidents, probs = 0.975, na.rm = TRUE), 0))
#
# tab_total$sigma_inc <- (tab_total$mu_inc - tab_total$lCI_inc)/1.96
# tab_total$sigma_id <- (tab_totalab$mu_id - tab_total$lCI_id)/1.96
# tab_total$sigma_screen <- (tab_total$mu_s - tab_total$lCI_s)/1.96
# tab_total$sigma_ltbi <- (tab_total$mu_ltbi - tab_total$lCI_ltbi)/1.96
#
# # write.csv(tab, file = here::here("data", "bar_dat_boot.csv"), row.names = FALSE)
# write.csv(tab_total, file = here::here("data", "bar_dat_boot_mean.csv"), row.names = FALSE)
#################################
# summary table
tab_per_inc <-
stacked_per_inc %>%
# group_by(year, setting) %>% # for totals
group_by(setting) %>% # for means
summarise(
mu_inc = mean(incidents, na.rm = TRUE),
sd_inc = sd(incidents, na.rm = TRUE),
lCI_inc = round(quantile(incidents, probs = 0.025, na.rm = TRUE), 2),
uCI_inc = round(quantile(incidents, probs = 0.975, na.rm = TRUE), 2),
mu_id_per_inc = mean(id_per_inc, na.rm = TRUE),
sd_id_per_inc = sd(id_per_inc, na.rm = TRUE),
lCI_id_per_inc = round(quantile(id_per_inc, probs = 0.025, na.rm = TRUE), 2),
uCI_id_per_inc = round(quantile(id_per_inc, probs = 0.975, na.rm = TRUE), 2),
mu_s = round(mean(screen_per_inc2, na.rm = TRUE), 2),
sd_s = round(sd(screen_per_inc2, na.rm = TRUE), 2),
lCI_s = round(quantile(screen_per_inc2, probs = 0.025, na.rm = TRUE), 2),
uCI_s = round(quantile(screen_per_inc2, probs = 0.975, na.rm = TRUE), 2),
mu_ltbi = round(mean(latent_per_inc2, na.rm = TRUE), 2),
sd_ltbi = round(sd(latent_per_inc2, na.rm = TRUE), 2),
lCI_ltbi = round(quantile(latent_per_inc2, probs = 0.025, na.rm = TRUE), 2),
uCI_ltbi = round(quantile(latent_per_inc2, probs = 0.975, na.rm = TRUE), 2),
mu_platent = round(mean(p_ltbi, na.rm = TRUE), 2),
sd_platent = sd(p_ltbi, na.rm = TRUE),
lCI_platent = round(quantile(p_ltbi, probs = 0.025, na.rm = TRUE), 2),
uCI_platent = round(quantile(p_ltbi, probs = 0.975, na.rm = TRUE), 2),
mu_pscreen = round(mean(p_screen, na.rm = TRUE), 2),
sd_pscreen = sd(p_screen, na.rm = TRUE),
lCI_pscreen = round(quantile(p_screen, probs = 0.025, na.rm = TRUE), 2),
uCI_pscreen = round(quantile(p_screen, probs = 0.975, na.rm = TRUE), 2),
mu_cost = mean(total_cost, na.rm = TRUE),
sd_cost = sd(total_cost, na.rm = TRUE),
lCI_cost = round(quantile(total_cost, probs = 0.025, na.rm = TRUE), 2),
uCI_cost = round(quantile(total_cost, probs = 0.975, na.rm = TRUE), 2),
mu_cost_per_id = mean(cost_per_id, na.rm = TRUE),
sd_cost_per_id = sd(cost_per_id, na.rm = TRUE),
lCI_cost_per_id = round(quantile(cost_per_id, probs = 0.025, na.rm = TRUE), 2),
uCI_cost_per_id = round(quantile(cost_per_id, probs = 0.975, na.rm = TRUE), 2),
mu_cost_per_screen = mean(cost_per_screen, na.rm = TRUE),
sd_cost_per_screen = sd(cost_per_screen, na.rm = TRUE),
lCI_cost_per_screen = round(quantile(cost_per_screen, probs = 0.025, na.rm = TRUE), 2),
uCI_cost_per_screen = round(quantile(cost_per_screen, probs = 0.975, na.rm = TRUE), 2),
mu_cost_per_ltbi = mean(cost_per_ltbi, na.rm = TRUE),
sd_cost_per_ltbi = sd(cost_per_ltbi, na.rm = TRUE),
lCI_cost_per_ltbi = round(quantile(cost_per_ltbi, probs = 0.025, na.rm = TRUE), 2),
uCI_cost_per_ltbi = round(quantile(cost_per_ltbi, probs = 0.975, na.rm = TRUE), 2)
)
##TODO: why this doesnt equal the sd() values?
# tab_per_inc$sigma_id <- (tab_per_inc$mu_id - tab_per_inc$lCI_id)/1.96
# tab_per_inc$sigma_inc <- (tab_per_inc$mu_inc - tab_per_inc$lCI_inc)/1.96
# tab_per_inc$sigma_screen <- (tab_per_inc$mu_s - tab_per_inc$lCI_s)/1.96
# tab_per_inc$sigma_ltbi <- (tab_per_inc$mu_ltbi - tab_per_inc$lCI_ltbi)/1.96
# tab_per_inc$sigma_platent <- (tab_per_inc$mu_platent - tab_per_inc$lCI_platent)/1.96
# tab_per_inc$sigma_pscreen <- (tab_per_inc$mu_pscreen - tab_per_inc$lCI_pscreen)/1.96
########
# save #
########
write.csv(tab_per_inc,
file = here::here("data", "boot_mean_per_inc.csv"),
row.names = FALSE)
save(tab_per_inc, file = here::here("data", "tab_per_inc.RData"))
write.csv(stacked_per_inc,
file = here::here("data", "boot_stacked_per_inc.csv"),
row.names = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.