
#    This file creates the dataset tb.interruption, and use the
#    full dataset posted by Lackey et. al. including the missing
#    values.  The Lackey paper reports 1294 enrolled, but 1293 are in
#    the dataset.  Suspect the case not in the data is the one where treatment
#    was stopped at option of treating physician

# Data are available from the Dryad Digital Repository
# (, doi:10. 5061/dryad.fp94d).

Lima_TB_Treatment_Default_Data <-
  readxl::read_excel(here::here("data-raw/tb_interruption/Lima TB Treatment Default Data.xls"))

tb <- Lima_TB_Treatment_Default_Data


#rename columns
colnames(tb) <- c("id","", "sex", "marital.status",
                        "poverty", "prison.history",
                        "education", "tobacco.use", "alcohol.use", "drug.use",
                        "rehab.history", "mdr.tb", "bmi", "chronic.disease", "hiv.test",
                        "diabetes", "trt.outcome", "")

tb <- subset(tb, select = -c(

# df <- df %>% mutate_at(c('team', 'position'), as.factor)

tb <- tb %>% mutate_at(c('sex',

# recode some factor levels

tb$poverty = recode_factor(tb$poverty,
                              "Not in poverty" =  "No",
                              "Poverty/extreme poverty" = "Yes" )

#create binary version of trt.outcome

tb$ <-
  (tb$trt.outcome == "Default")

# check for 1233 complete cases, and for
# match to initial analysis

# tb.complete <- tb[complete.cases(tb), ]

# prop.table(table(tb.complete$
# addmargins(table(tb.complete$
# addmargins(table(tb.complete$education, tb.complete$

tb.interruption <- tb

usethis::use_data(tb.interruption, overwrite = TRUE)
OpenIntroStat/openintro documentation built on Jan. 2, 2025, 6:15 a.m.