tuni: Tables for Epidemiological Analysis

# Setup -----------------------------------------
#
# ## Installing required packages for this template
required_packages <- c("knitr",       # create output docs
                       "here",        # find your files
                       "dplyr",       # clean/shape data
                       "epikit",      # clean/shape data
                       "forcats",     # clean/shape data
                       "stringr",     # clean text
                       "rio",         # read in data
                       "ggplot2",     # create plots and charts
                       "patchwork",   # combine plots in one
                       "sitrep",      # MSF field epi functions
                       "linelist",    # Functions for cleaning/standardising data/dates
                       #"matchmaker",  # dictionary-based standardization of variables
                       #"incidence",   # create epicurves
                       "aweek",       # define epi weeks
                       #"sf",          # encode spatial vector data
                       #"ggspatial",   # plot maps
                       "testthat"     # testing functions
)

for (pkg in required_packages) {
  # install packages if not already present
  if (!pkg %in% rownames(installed.packages())) {
    install.packages(pkg)
  }

  # load packages to this current session
  library(pkg, character.only = TRUE)
}



# linelist_raw <- rio::import(here::here("AJS_AmTiman.xlsx"), which = "linelist")
linelist_raw <- epidict::gen_data("AJS")

# Load dictionary
linelist_dict <- epidict::msf_dict("AJS", compact = FALSE)

linelist_cleaned <- linelist_raw
## Clean column names

## define clean variable names using clean_labels from the epitrix package
# cleaned_colnames <- epitrix::clean_labels(colnames(linelist_raw))
#
# ## overwrite variable names with defined clean names
# colnames(linelist_cleaned) <- cleaned_colnames

linelist_cleaned <- matchmaker::match_df(linelist_cleaned,
                                         dict  = linelist_dict,
                                         from  = "option_code",
                                         to    = "option_name",
                                         by    = "data_element_shortname",
                                         order = "option_order_in_set"
)

linelist_cleaned <- linelist_cleaned %>%
  filter(!is.na(case_number) & !is.na(date_of_consultation_admission))


linelist_cleaned$DIED <- str_detect(linelist_cleaned$exit_status, "Dead")
linelist_cleaned <- linelist_cleaned %>%
  mutate(case_def = case_when(
    is.na(hep_e_rdt) & is.na(other_cases_in_hh)           ~ NA_character_,
    hep_e_rdt == "Positive"                               ~ "Confirmed",
    hep_e_rdt != "Positive" & other_cases_in_hh == "Yes"  ~ "Probable",
    TRUE                                                  ~ "Suspected"
  ))

linelist_cleaned <- linelist_cleaned %>%
  mutate(exit_status2 = case_when(
    exit_status %in% c("Dead on arrival", "Dead in facility") ~ "Died",
    exit_status %in% c("Transferred (to an MSF facility)",
                       "Transferred (to External Facility)")  ~ "Transferred",
    exit_status == "Discharged home"                          ~ "Discharged",
    exit_status == "Left against medical advice"              ~ "Left"
  ))


linelist_cleaned <- linelist_cleaned %>%
  mutate(case_def = case_when(
    is.na(hep_e_rdt) & is.na(other_cases_in_hh)           ~ NA_character_,
    hep_e_rdt == "Positive"                               ~ "Confirmed",
    hep_e_rdt != "Positive" & other_cases_in_hh == "Yes"  ~ "Probable",
    TRUE                                                  ~ "Suspected"
  ))



# change the order of levels in a single categorical variable
# This orders the levels -- since in all figures / tables, 0-4 should come
# before 4-24, etc
linelist_cleaned <- linelist_cleaned %>%
  mutate(time_to_death = factor(time_to_death,
                                levels = c("0-4 hours",
                                           ">4-24 hours",
                                           ">24-48 hours",
                                           ">48 hours")))



# Change the order of levels of multiple categorical variables at the same time
linelist_cleaned <- linelist_cleaned %>%
  mutate_at(vars(
    # Looks for variables beginning with "test"
    starts_with("test")),
    fct_relevel,
    # Sets order of levels
    "Positive", "Negative", "Not done")


# create a grouping of all symptoms
SYMPTOMS <- c("history_of_fever",
              "fever",
              "nausea_anorexia",
              "vomiting",
              "epigastric_pain_heartburn",
              "generalized_itch",
              "headache",
              "joint_pains",
              "diarrhoea",
              "bleeding",
              "convulsions",
              "mental_state",
              "other_symptoms"
)



# create a grouping of all lab tests
LABS <- c("hep_b_rdt",
          "hep_c_rdt",
          "hep_e_rdt",
          "test_hepatitis_a",
          "test_hepatitis_b",
          "test_hepatitis_c",
          "test_hepatitis_e_igg",
          "test_hepatitis_e_igm" ,
          "test_hepatitis_e_genotype",
          "test_hepatitis_e_virus",
          "malaria_rdt_at_admission",
          "malaria_blood_film",
          "dengue",
          "dengue_rdt",
          "yellow_fever",
          "typhoid",
          "chikungunya_onyongnyong",
          "ebola_marburg",
          "lassa_fever",
          "other_arthropod_transmitted_virus",
          "other_pathogen"
)

# Add under 2 years to the age_years variable
## data dictionary defines that under 2s dont have year filled in (but months/days instead)
linelist_cleaned <- linelist_cleaned %>%
  mutate(age_months = case_when(
    is.na(age_years) & is.na(age_months) ~ as.numeric(age_days / 30),
    TRUE                                 ~ as.numeric(age_months)
  ),
  age_years = case_when(
    is.na(age_years) & is.na(age_months) ~ as.numeric(age_days / 365.25),
    is.na(age_years)                     ~ as.numeric(age_months / 12),
    TRUE                                 ~ as.numeric(age_years)
  ))

## create age group variable for under 5 years based on months
linelist_cleaned$age_group_mon <- epikit::age_categories(linelist_cleaned$age_months,
                                                         breakers = c(0, 6, 9, 12, 24),
                                                         ceiling = TRUE)

## create an age group variable by specifying categorical breaks
linelist_cleaned$age_group <- epikit::age_categories(linelist_cleaned$age_years,
                                                     breakers = c(0, 3, 15, 30, 45))

#
linelist_cleaned$gender <- factor(linelist_cleaned$sex, levels = c("Male", "Female"))


# Population data
# estimate population size by age group in years
population_data_age <- gen_population(total_pop = 5000,
                                      groups = c("0-2", "3-14", "15-29", "30-44", "45+"),
                                      proportions = c(0.068, 0.3622, 0.276, 0.1616, 0.1321),
                                      strata = NULL) %>%
  rename(age_group = groups,
         population = n)


## estimate population size by region proportion
population_data_region <- gen_population(total_pop = 5000,         # set the total population
                                         groups = c("Village A", "Village B", "Village C", "Village D"),  # set the groups
                                         proportions = c(0.221, 0.174, 0.355, 0.245),                     # set the proportions for each group
                                         strata = NULL) %>%               # do not stratify by gender
  rename(patient_origin = groups,  # rename columns (syntax is NEW NAME = OLD NAME)
         population = n)


population <- sum(population_data_age$population)


# Tests start ------------------------------------------------------------------


test_that("cfr calculation returns gtsummary object and correct results with dichotomous variables", {
  # Uses gtsummary::add_stat but simiplifies it - trade off some customisability/transparency for convenience
  # should be able to handle dichot, logical, multi-level variables - both label and level gtsummary::add_stat
  # function: add_gt_case_fatality_rate
  expected_cfr <-  linelist_cleaned %>%
    filter(patient_facility_type == "Inpatient") %>%
    case_fatality_rate_df(deaths = DIED, mergeCI = TRUE) %>%
    mutate(cfr = formatC(cfr, digits = 2, format = "f"))

  gt_cfr <- linelist_cleaned %>%
    dplyr::filter(patient_facility_type == "Inpatient") %>%
    dplyr::select(DIED) %>%
    # case_fatality_rate_df(deaths = DIED, mergeCI = TRUE) %>%
    gtsummary::tbl_summary(
      statistic = everything() ~ "{N}",
      label = DIED ~ "All participants") %>%
    # Use wrapper function to calculate cfr
    add_cfr(deaths_var = "DIED")
  cfr_df <- gt_cfr$table_body

  expect_s3_class(gt_cfr, "gtsummary")
  expect_equal(as.numeric(cfr_df$Deaths), expected_cfr$deaths)
  expect_equal(as.numeric(cfr_df$stat_0), expected_cfr$population)
  expect_equal(cfr_df$`CFR (%)`, expected_cfr$cfr)
  expect_equal(cfr_df$`95%CI`, expected_cfr$ci)
})

test_that("cfr calculation returns gtsummary object and correct results with categorical variables", {
  # Uses gtsummary::add_stat but simiplifies it - trade off some customisability/transparency for convenience
  # should be able to handle dichot, logical, multi-level variables - both label and level gtsummary::add_stat
  # function: add_gt_case_fatality_rate
  expected_cfr <-  linelist_cleaned %>%
    filter(patient_facility_type == "Inpatient") %>%
    epitabulate::case_fatality_rate_df(deaths = DIED, group = gender, mergeCI = TRUE) %>%
    mutate(cfr = formatC(cfr, 2, format = "f"))

  gt_cfr <- linelist_cleaned %>%
    dplyr::filter(patient_facility_type == "Inpatient") %>%
    dplyr::select(DIED, gender) %>%
    dplyr::mutate(deaths_var = "DIED") %>%
    gtsummary::tbl_summary(
      include = gender,
      statistic = gender ~ "{n}",
      missing = "no",
      label = gender ~ "Gender"
    ) %>%
    # Use wrapper function to calculate cfr
    add_cfr(deaths_var = "DIED")

  cfr_df <- gt_cfr$table_body

  male_exp_cfr <- expected_cfr %>% dplyr::filter(gender == "Male")
  female_exp_cfr <- expected_cfr %>% dplyr::filter(gender == "Female")

  male_cfr <- cfr_df %>% dplyr::filter(label == "Male")
  female_cfr <- cfr_df %>% dplyr::filter(label == "Female")


  expect_s3_class(gt_cfr, "gtsummary")
  expect_equal(as.numeric(male_cfr$Deaths), male_exp_cfr$deaths)
  expect_equal(as.numeric(female_cfr$stat_0), female_exp_cfr$population)
  expect_equal(male_cfr$`CFR (%)`, male_exp_cfr$cfr)
  expect_equal(female_cfr$`95%CI`, female_exp_cfr$ci)

})

test_that("cfr calculation returns gtsummary object and correct results with categorical and dichotomous variables", {
  # Uses gtsummary::add_stat but simiplifies it - trade off some customisability/transparency for convenience
  # should be able to handle dichot, logical, multi-level variables - both label and level gtsummary::add_stat
  # function: add_gt_case_fatality_rate
  expected_cfr_all <- linelist_cleaned %>%
    filter(patient_facility_type == "Inpatient") %>%
    case_fatality_rate_df(deaths = DIED, mergeCI = TRUE) %>%
    mutate(cfr = formatC(cfr, digits = 2, format = "f"))
  expected_cfr <-  linelist_cleaned %>%
    filter(patient_facility_type == "Inpatient") %>%
    epitabulate::case_fatality_rate_df(deaths = DIED, group = gender, mergeCI = TRUE) %>%
    mutate(cfr = formatC(cfr, 2, format = "f"))

  gt_cfr <- linelist_cleaned %>%
    dplyr::filter(patient_facility_type == "Inpatient") %>%
    dplyr::select(DIED, gender) %>%
    gtsummary::tbl_summary(
      statistic = list(DIED ~ "{N}", gender ~ "{n}"),
      label = list(gender ~ "Gender", DIED ~ "All participants")
    ) %>%
    # Use wrapper function to calculate cfr
    add_cfr(deaths_var = "DIED")

  cfr_df <- gt_cfr$table_body

  all_participants <- cfr_df %>% dplyr::filter(variable == "DIED")
  male_exp_cfr <- expected_cfr %>% dplyr::filter(gender == "Male")
  female_exp_cfr <- expected_cfr %>% dplyr::filter(gender == "Female")

  male_cfr <- cfr_df %>% dplyr::filter(label == "Male")
  female_cfr <- cfr_df %>% dplyr::filter(label == "Female")

  expect_s3_class(gt_cfr, "gtsummary")
  expect_equal(as.numeric(male_cfr$Deaths), male_exp_cfr$deaths)
  expect_equal(as.numeric(female_cfr$stat_0), female_exp_cfr$population)
  expect_equal(male_cfr$`CFR (%)`, male_exp_cfr$cfr)
  expect_equal(female_cfr$`95%CI`, female_exp_cfr$ci)
  expect_equal(all_participants$`CFR (%)`, expected_cfr_all$cfr)

})

test_that("attack rate calculation returns gtsummary object and correct results with dichotomous variables", {
  # Case variable but be a logical (T/F or 1,0) with no missing data (NAs)
  # Since there is no missing data forthese  symptoms in linelist, we can code to
  # logical using ifelse - can also be coded as 1,0
  linelist_cleaned <- linelist_cleaned %>%
    mutate(case = ifelse(diarrhoea == "Yes" & bleeding == "Yes" & fever == "Yes", T, F))

  case_count <- sum(linelist_cleaned$case)
  total_pop <- nrow(linelist_cleaned)
  expected_ar <- attack_rate( case_count,  total_pop, multiplier = 10000) %>%
    epikit::merge_ci_df(e = 3)

  gt_ar <- linelist_cleaned %>%
    gtsummary::tbl_summary(
      include = case,
      statistic = case ~ "{n}",
      label = case ~ "Case")  %>%
    add_ar(case_var = "case", multiplier = 10000)

  gt_ar
  ar_df <- gt_ar$table_body

  expect_s3_class(gt_ar, "gtsummary")
  expect_equal(as.numeric(ar_df$stat_0), expected_ar$cases)
  expect_equal(ar_df$`AR (per 10,000)`, formatC(expected_ar$ar, digits = 2, format = "f"))
  expect_equal(ar_df$`95%CI`, expected_ar$ci)
})

test_that("attack rate calculation returns gtsummary object and correct results with categorical variables", {

  # Case variable but be a logical (T/F or 1,0) with no missing data (NAs)
  # Since there is no missing data forthese  symptoms in linelist, we can code to
  # logical using ifelse - can also be coded as 1,0
  linelist_cleaned <- linelist_cleaned %>%
    mutate(case = ifelse(diarrhoea == "Yes" & bleeding == "Yes" & fever == "Yes", T, F))

  counts <- linelist_cleaned %>%
    dplyr::mutate(case = factor(case)) %>%
    dplyr::group_by(age_group, case, .drop = FALSE) %>%
    dplyr::count(name = "case_n") %>%
    dplyr::group_by(age_group) %>%
    dplyr::mutate(total = sum(case_n), .drop = FALSE) %>%
    dplyr::filter(case == T)

  # calculate population total from population table
  expected_ar_lev <- attack_rate(counts$case_n, counts$total, multiplier = 10000) %>%
    epikit::merge_ci_df(e = 3)


  gt_ar_lev <- linelist_cleaned %>%
    # Add population and multiplier to data frame (can't pass args to add_stat)
    dplyr::select(age_group, case) %>%
    gtsummary::tbl_summary(
      include = age_group,
      statistic = age_group ~ "{n}",
      label = age_group ~ "Age group") %>%
    add_ar(case_var = "case", multiplier = 10000)

  ar_df_lev <- gt_ar_lev$table_body
  ar_df_lev <- ar_df_lev %>% filter(label != "Age Group")

  expect_s3_class(gt_ar_lev, "gtsummary")
  expect_equal(as.numeric(ar_df_lev$Cases[-1]), expected_ar_lev$cases)
  expect_equal(ar_df_lev$`AR (per 10,000)`[-1], formatC(expected_ar_lev$ar, digits = 2, format = "f"))
  expect_equal(ar_df_lev$`95%CI`[-1], expected_ar_lev$ci)
})

test_that("attack rate calculation returns gtsummary object and correct results with dichotomous variable forced to categorical", {
  # Case variable but be a logical (T/F or 1,0) with no missing data (NAs)
  # Since there is no missing data forthese  symptoms in linelist, we can code to
  # logical using ifelse - can also be coded as 1,0
  linelist_cleaned <- linelist_cleaned %>%
    mutate(case = ifelse(diarrhoea == "Yes" & bleeding == "Yes" & fever == "Yes", T, F))

  case_count <- sum(linelist_cleaned$case)
  total_pop <- nrow(linelist_cleaned)
  expected_ar <- attack_rate( case_count,  total_pop, multiplier = 10000) %>%
    epikit::merge_ci_df(e = 3)
   # Create dichotomous variable to force to categorical
  linelist_cleaned <- linelist_cleaned %>%
    mutate(setting = factor(
           sample(c("rural", "urban"), nrow(linelist_cleaned), prob = c(0.25, 0.75), replace = TRUE),
           levels = c("rural", "urban")))
  # create fake population numbers for urban/rural
  setting_population <- c(4000, 10000)

  gt_ar_lev <- linelist_cleaned %>%
    # Add population and multiplier to data frame (can't pass args to add_stat)
    dplyr::select(setting, case) %>%
    gtsummary::tbl_summary(
      include = setting,
      statistic = setting ~ "{n}",
      label = setting ~ "Setting",
      type = setting ~ "categorical") %>%
    add_ar(case_var = "case")

  ar_df_lev <- gt_ar_lev$table_body
  ar_df_lev <- ar_df_lev %>% filter(label != "Setting")
  totals <- linelist_cleaned %>% group_by(setting) %>% count()

  expect_s3_class(gt_ar_lev, "gtsummary")
  expect_equal(as.numeric(ar_df_lev$stat_0), totals$n)
})

test_that("attack rate calculation returns gtsummary object and correct results with categorical and dichotomous variables", {
  linelist_cleaned <- linelist_cleaned %>%
    mutate(case = ifelse(diarrhoea == "Yes" & bleeding == "Yes" & fever == "Yes", T, F))


  cases <- linelist_cleaned %>%
    dplyr::mutate(case = factor(case)) %>%
    dplyr::group_by(age_group, case, .drop = FALSE) %>%
    dplyr::count(name = "cases") %>%
    group_by(age_group, .drop = FALSE) %>%
    dplyr::mutate(total = sum(cases)) %>%
    dplyr::filter(case == TRUE)

  # attack rate for all
  case_count <- sum(linelist_cleaned$case)
  total_pop <- nrow(linelist_cleaned)
  expected_ar <- attack_rate( case_count,  total_pop, multiplier = 10000) %>%
    epikit::merge_ci_df(e = 3) %>%
    mutate(ar = formatC(ar, 2, format = "f"))

  # attack rate for each group
  expected_ar_lev <- epitabulate::attack_rate(cases$cases, cases$total, multiplier = 10000) %>%
    epikit::merge_ci_df(e = 3) %>%
    dplyr::mutate(cases = as.character(cases)) %>%
    mutate(ar = formatC(ar, 2, format = "f"))

  gt_ar <- linelist_cleaned %>%
    # Add population and multiplier to data frame (can't pass args to add_stat)
    dplyr::select(case, age_group) %>%
    gtsummary::tbl_summary(
      statistic = list(case ~ "{N}",
                       age_group ~ "{n}"),
      label = list(case ~ "All participants", age_group ~ "Age Group")
    ) %>%
    add_ar(case_var = "case")

  ar_df <- gt_ar$table_body

  expect_s3_class(gt_ar, "gtsummary")

  # Tests for dichotomous
  expect_equal(as.numeric(ar_df$Cases[1]), expected_ar$cases)
  expect_equal(ar_df$`AR (per 10,000)`[1], expected_ar$ar)
  expect_equal(ar_df$`95%CI`[1], expected_ar$ci)

  # Tests for categorical
  expect_equal(ar_df$Cases[-c(1,2)], expected_ar_lev$cases)
  expect_equal(ar_df$`AR (per 10,000)`[-c(1,2)], expected_ar_lev$ar)
  expect_equal(ar_df$`95%CI`[-c(1,2)], expected_ar_lev$ci)
})

test_that("gt_remove_stat removes stat by column name", {
  gt <- linelist_cleaned %>%
    select(DIED, gender, age_group) %>%
    gtsummary::tbl_summary()

  expect(!is.null(gt$table_body$stat_0), "column does not exist")

  gt_removed <- gt %>% gt_remove_stat(col_name = "stat_0")
  suppressWarnings(
    expect(is.null(gt_removed$table_body$stat_0), "column exists")
  )
})


test_that("add_crosstabs adds stat columns showing outcome by exposure in two by two table", {
  count_table <- linelist_cleaned %>%
    dplyr::select(recent_travel, diarrhoea) %>%
    group_by(recent_travel, diarrhoea) %>%
    count()

  gt_cs <- add_crosstabs(
    data = linelist_cleaned,
    exposure = "recent_travel",
    outcome = "diarrhoea",
    show_overall = TRUE,
    exposure_label = "Recent travel",
    outcome_label = "Diarrhoea",
    two_by_two = TRUE,
    show_N_header = TRUE)

  gt_df <- gt_cs$table_body

  expect_s3_class(gt_cs, "gtsummary")
  expect_true(grepl(paste0("^", count_table[1,3]), gt_df$stat_1[2]))
  expect_true(grepl(paste0("^", count_table[4,3]), gt_df$stat_2[3]))
  expect_equal(unique(gt_cs$table_styling$header$spanning_header), c(NA, "Diarrhoea"))
})

test_that("add_crosstabs adds stat columns showing outcome by exposure in single row", {
  count_table <- linelist_cleaned %>%
    dplyr::select(recent_travel, diarrhoea) %>%
    group_by(recent_travel, diarrhoea) %>%
    count()

  gt_cs <- add_crosstabs(
    data = linelist_cleaned,
    exposure = "recent_travel",
    outcome = "diarrhoea",
    show_overall = TRUE,
    exposure_label = "Recent travel",
    outcome_label = "Diarrhoea",
    two_by_two = FALSE)

  gt_df <- gt_cs$table_body

  expect_s3_class(gt_cs, "gtsummary")
  expect_true(grepl(paste0("^", count_table[1,3]), gt_df$stat_1_1[1]))
  expect_true(grepl(paste0("^", count_table[4,3]), gt_df$stat_2_2[1]))
  expect_equal("**Overall**", unique(gt_cs$table_styling$header$spanning_header)[7])
})

test_that("add_crosstabs adds stat columns showing outcome by exposure", {

  gt_cs <- add_crosstabs(
    data = linelist_cleaned,
    exposure = "recent_travel",
    outcome = "diarrhoea",
    var_name = "gender",
    show_overall = TRUE,
    exposure_label = "Recent travel",
    outcome_label = "Diarrhoea",
    var_label = "Gender",
    two_by_two = FALSE)

  gt_df <- gt_cs$table_body

  col_names <- c("variable", "var_label", "row_type", "label", "var_type_1",
                 "stat_1_1", "stat_2_1", "var_type_2", "stat_1_2", "stat_2_2",
                 "var_type_3", "stat_0_3")

  span_heads <- c(NA, "**Table 1**", "**Cases**", "**Table 2**", "**Controls**",
                  "**Table 3**", "**Overall**")


  expect_s3_class(gt_cs, "gtsummary")
  expect_equal(names(gt_df), col_names)
  expect_equal(unique(gt_cs$table_styling$header$spanning_header), span_heads)
})

test_that("add_crosstabs adds stat columns showing outcome by exposure", {

  gt_cs <- add_crosstabs(
    data = linelist_cleaned,
    exposure = "recent_travel",
    outcome = "diarrhoea",
    var_name = "gender",
    show_overall = TRUE,
    exposure_label = "Recent travel",
    outcome_label = "Diarrhoea",
    var_label = "Gender",
    two_by_two = FALSE)

  gt_df <- gt_cs$table_body

  col_names <- c("variable", "var_label", "row_type", "label", "var_type_1",
                 "stat_1_1", "stat_2_1", "var_type_2", "stat_1_2", "stat_2_2",
                 "var_type_3", "stat_0_3")

  span_heads <- c(NA, "**Table 1**", "**Cases**", "**Table 2**", "**Controls**",
                  "**Table 3**", "**Overall**")


  expect_s3_class(gt_cs, "gtsummary")
  expect_equal(names(gt_df), col_names)
  expect_equal(unique(gt_cs$table_styling$header$spanning_header), span_heads)
})

test_that("mortality rate calculation returns gtsummary object and correct results with dichotomous variables", {
  # Deaths variable but be a logical (T/F or 1,0) with no missing data (NAs)

  deaths <- linelist_cleaned %>%
    dplyr::group_by(age_group, DIED) %>%
    dplyr::count(name = "deaths") %>%
    group_by(age_group) %>%
    dplyr::mutate(total = sum(deaths)) %>%
    dplyr::filter(DIED == TRUE)

  # attack rate for each group
  expected_mr_lev <- epitabulate::mortality_rate(deaths$deaths, deaths$total, multiplier = 10000) %>%
    epikit::merge_ci_df(e = 3) %>%
    dplyr::mutate(deaths = as.character(deaths)) %>%
    dplyr::mutate(mr = `mortality per 10 000`) %>%
    mutate(mr = formatC(mr, 2, format = "f"))

  deaths_count <- sum(linelist_cleaned$DIED)
  total_pop <- nrow(linelist_cleaned)

  expected_mr <- epitabulate::mortality_rate( deaths_count,  total_pop, multiplier = 10000) %>%
    epikit::merge_ci_df(e = 3)


  gt_mr <- linelist_cleaned %>%
    dplyr::select(DIED, age_group) %>%
    gtsummary::tbl_summary(
      statistic = list(DIED ~ "{N}",
                       age_group ~ "{n}"),
      label = list(DIED ~ "All participants", age_group ~ "Age Group"))  %>%
    add_mr(deaths_var = "DIED", multiplier = 10000)

  gt_mr
  # Remove 0 from comparison df, because they won't be in expected df
  mr_df <- gt_mr$table_body %>% filter(Deaths != 0)

  expect_s3_class(gt_mr, "gtsummary")
  # Check all participants deaths
  expect_equal(as.numeric(mr_df$Deaths[1]), expected_mr$deaths)
  expect_equal(
    mr_df$`MR (per 10,000)`[1],
    formatC(expected_mr$`mortality per 10 000`, digits = 2, format = "f"))
  expect_equal(mr_df$`95%CI`[1], expected_mr$ci)

  # Tests for categorical
  expect_equal(mr_df$Deaths[-c(1)], expected_mr_lev$deaths)
  expect_equal(mr_df$`MR (per 10,000)`[-c(1)], expected_mr_lev$mr)
  expect_equal(mr_df$`95%CI`[-c(1)], expected_mr_lev$ci)

})


test_that("mortality rate calculation returns gtsummary object and correct results with dichotomous variables with population provided", {
  # Deaths variable but be a logical (T/F or 1,0) with no missing data (NAs)

  age_groups <- levels(linelist_cleaned$age_group)
  deaths <- linelist_cleaned %>%
    dplyr::group_by(age_group, DIED, .drop = FALSE) %>%
    dplyr::count(name = "deaths") %>%
    dplyr::group_by(age_group, .drop = FALSE) %>%
    dplyr::mutate(total = sum(deaths)) %>%
    # dplyr::group_by(age_group, DIED)  %>%
    ungroup() %>%
    tidyr::complete(age_group, DIED, fill = list(deaths = 0, total = 0)) %>%
    dplyr::filter(DIED == TRUE)

  population_arg <- deaths$total * 15

  # attack rate for each group
  expected_mr_lev <- epitabulate::mortality_rate(deaths$deaths, population_arg, multiplier = 10000) %>%
    epikit::merge_ci_df(e = 3) %>%
    dplyr::mutate(deaths = as.character(deaths)) %>%
    dplyr::mutate(mr = `mortality per 10 000`) %>%
    mutate(mr = formatC(mr, 2, format = "f"))

  deaths_count <- sum(linelist_cleaned$DIED)
  total_pop <- sum(population_arg)

  expected_mr <- epitabulate::mortality_rate( deaths_count,  total_pop, multiplier = 10000) %>%
    epikit::merge_ci_df(e = 3)


  gt_mr <- linelist_cleaned %>%
    dplyr::select(DIED, age_group) %>%
    gtsummary::tbl_summary(
      statistic = list(DIED ~ "{N}",
                       age_group ~ "{n}"),
      label = list(DIED ~ "All participants", age_group ~ "Age Group"))  %>%
    add_mr(deaths_var = "DIED",
           population = population_arg,
           multiplier = 10000,
           drop_tblsummary_stat = TRUE)

  gt_mr
  mr_df <- gt_mr$table_body

  expect_s3_class(gt_mr, "gtsummary")
  expect_equal(as.numeric(mr_df$Deaths[1]), expected_mr$deaths)
  # the following should be the expected if population argument not given divided by 15
  # (set by the population_arg variable above)
  expect_equal(
    mr_df$`MR (per 10,000)`[1],
    formatC(expected_mr$`mortality per 10 000`, digits = 2, format = "f"))
  expect_equal(mr_df$`95%CI`[1], expected_mr$ci)

  # Tests for categorical
  expect_equal(mr_df$Deaths[-c(1,2)], expected_mr_lev$deaths)
  expect_equal(mr_df$`MR (per 10,000)`[-c(1,2)], expected_mr_lev$mr)
  expect_equal(mr_df$`95%CI`[-c(1,2)], expected_mr_lev$ci)

})

test_that("attack rate calculation returns gtsummary object and correct results with categorical and dichotomous variables with population provided", {
  linelist_cleaned <- linelist_cleaned %>%
    mutate(case = ifelse(diarrhoea == "Yes" & bleeding == "Yes" & fever == "Yes", T, F))


  cases <- linelist_cleaned %>%
    dplyr::mutate(case = factor(case)) %>%
    dplyr::group_by(age_group, case, .drop = FALSE) %>%
    dplyr::count(name = "cases") %>%
    group_by(age_group, .drop = FALSE) %>%
    dplyr::mutate(total = sum(cases)) %>%
    dplyr::filter(case == TRUE)

  population_arg <- cases$total * 15
  total_pop <- sum(population_arg)

  # attack rate for all
  case_count <- sum(linelist_cleaned$case)
  expected_ar <- attack_rate( case_count,  total_pop, multiplier = 10000) %>%
    epikit::merge_ci_df(e = 3) %>%
    mutate(ar = formatC(ar, 2, format = "f"))

  # attack rate for each group
  expected_ar_lev <- epitabulate::attack_rate(
    cases$cases,
    population_arg,
    multiplier = 10000) %>%
    epikit::merge_ci_df(e = 3) %>%
    dplyr::mutate(cases = as.character(cases)) %>%
    mutate(ar = formatC(ar, 2, format = "f"))

  gt_ar <- linelist_cleaned %>%
    # Add population and multiplier to data frame (can't pass args to add_stat)
    dplyr::select(case, age_group) %>%
    gtsummary::tbl_summary(
      statistic = list(case ~ "{N}",
                       age_group ~ "{n}"),
      label = list(case ~ "All participants", age_group ~ "Age Group")
    ) %>%
    add_ar(case_var = "case",
           population = population_arg,
           drop_tblsummary_stat = TRUE)

  gt_ar
  ar_df <- gt_ar$table_body

  expect_s3_class(gt_ar, "gtsummary")

  # Tests for dichotomous
  expect_equal(as.numeric(ar_df$Cases[1]), expected_ar$cases)
  expect_equal(ar_df$`AR (per 10,000)`[1], expected_ar$ar)
  expect_equal(ar_df$`95%CI`[1], expected_ar$ci)

  # Tests for categorical
  expect_equal(ar_df$Cases[-c(1,2)], expected_ar_lev$cases)
  expect_equal(ar_df$`AR (per 10,000)`[-c(1,2)], expected_ar_lev$ar)
  expect_equal(ar_df$`95%CI`[-c(1,2)], expected_ar_lev$ci)
})

test_that("gt_mh_odds adds tabulated data, odds, and mh odds to gtsummary object", {

  cases <- linelist_cleaned %>%
    mutate(water_source_tank = ifelse(water_source == "Tank", TRUE, FALSE)) %>%
    filter(typhoid %in% c("Positive", "Negative")) %>%
    mutate(typhoid_logical = ifelse(typhoid == "Positive", TRUE, FALSE))

  expected_OR <-
    tab_univariate(
      cases,
      exposure = "water_source_tank",
      outcome = "typhoid_logical",
      measure = "OR")

  ci <- paste(formatC(expected_OR$lower, digits = 2, format = "f"), "--",
              formatC(expected_OR$upper, digits = 2, format = "f"))
  gt_obj <- cases %>%
    gt_mh_odds(
      exposure = "water_source_tank",
      outcome = "typhoid_logical",
      exposure_label = "Water source - tank",
      outcome_label = "Typhoid fever",
      # strata = "age_group"
      strata = "residential_status_brief",
      strata_label = "Residential status"
    )


  mh_df <- gt_obj$table_body
  expect_equal(gt_obj$table_body$label[1], "Overall")
  # OR matches
  expect_equal(mh_df$risk_estimate_2[2], formatC(expected_OR$ratio, digits = 2, format = "f"))
  # CIs match
  expect_equal(mh_df$risk_CI_2[2],
               paste(round(expected_OR$lower, 2), "--", round(expected_OR$upper,2)))

})

test_that("gt_mh_odds adds tabulated data, odds, and mh odds to gtsummary object", {

  cases <- linelist_cleaned %>%
    mutate(water_source_tank = ifelse(water_source == "Tank", TRUE, FALSE)) %>%
    filter(typhoid %in% c("Positive", "Negative")) %>%
    mutate(typhoid_logical = ifelse(typhoid == "Positive", TRUE, FALSE))

  expected_OR <-
    tab_univariate(
      cases,
      exposure = "water_source_tank",
      outcome = "typhoid_logical",
      measure = "OR")

  ci <- paste(formatC(expected_OR$lower, digits = 2, format = "f"), "--",
              formatC(expected_OR$upper, digits = 2, format = "f"))
  gt_obj <- cases %>%
    gt_mh_odds(
      exposure = "water_source_tank",
      outcome = "typhoid_logical",
      exposure_label = "Water source - tank",
      outcome_label = "Typhoid fever",
      strata = "residential_status_brief",
      strata_label = "Residential status"
    )


  mh_df <- gt_obj$table_body
  expect_equal(gt_obj$table_body$label[1], "Overall")
  # crude overall OR matches
  expect_equal(mh_df$risk_estimate_2[2], formatC(expected_OR$ratio, digits = 2, format = "f"))
  # crude overall CIs match
  expect_equal(mh_df$risk_CI_2[2],
               paste(round(expected_OR$lower, 2), "--", round(expected_OR$upper,2)))

})


test_that("gt_mh_odds does not calculate mh odds if any cells contain 0", {

  cases <- linelist_cleaned %>%
    mutate(water_source_tank = ifelse(water_source == "Tank", TRUE, FALSE)) %>%
    filter(typhoid %in% c("Positive", "Negative")) %>%
    mutate(typhoid_logical = ifelse(typhoid == "Positive", TRUE, FALSE)) %>%
    filter(age_group != "0-2")

  expected_OR <-
    tab_univariate(
      cases,
      exposure = "water_source_tank",
      outcome = "typhoid_logical",
      measure = "OR")

  ci <- paste(formatC(expected_OR$lower, digits = 2, format = "f"), "--",
              formatC(expected_OR$upper, digits = 2, format = "f"))
  gt_obj <- cases %>%
    gt_mh_odds(
      exposure = "water_source_tank",
      outcome = "typhoid_logical",
      exposure_label = "Water source - tank",
      outcome_label = "Typhoid fever",
      strata = "age_group",
      strata_label = "Age group"
    )


  mh_df <- gt_obj$table_body
  expect_equal(gt_obj$table_body$label[1], "Overall")
  # crude overall OR matches
  expect_equal(mh_df$risk_estimate_2[2], formatC(expected_OR$ratio, digits = 2, format = "f"))
  # crude overall CIs match
  expect_equal(mh_df$risk_CI_2[2],
               paste(round(expected_OR$lower, 2), "--", round(expected_OR$upper,2)))
  expect_equal(mh_df$MHOR_2[3], "--")

})
R4EPI/tuni documentation built on March 20, 2023, 4:37 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
R4EPI/tuni
Tables for Epidemiological Analysis

tests/testthat/test-gt_wrappers.R
In R4EPI/tuni: Tables for Epidemiological Analysis

R Package Documentation

Browse R Packages

We want your feedback!

R4EPI/tuni Tables for Epidemiological Analysis

tests/testthat/test-gt_wrappers.R In R4EPI/tuni: Tables for Epidemiological Analysis

R Package Documentation

Browse R Packages

We want your feedback!

R4EPI/tuni
Tables for Epidemiological Analysis

tests/testthat/test-gt_wrappers.R
In R4EPI/tuni: Tables for Epidemiological Analysis