tests/testthat/testLifeExpectancy.R

# dps to test to
n <- 4

df1 <- data.frame(startage = c(0L, 1L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
                               60L, 65L, 70L, 75L, 80L, 85L, 90L),
                  pops = c(7060L, 35059L, 46974L, 48489L, 43219L, 38561L, 46009L, 57208L,
                           61435L, 55601L, 50209L, 56416L, 46411L, 39820L, 37978L,
                           37039L, 33288L, 23306L, 11936L, 11936L),
                  deaths = c(17L, 9L, 4L, 8L, 20L, 15L, 24L, 33L, 50L, 71L, 100L, 163L,
                             263L, 304L, 536L, 872L, 1390L, 1605L, 1936L, 1937L))

df1_cum <- data.frame(pops_used = c(787954,	780894,	745835,	698861,	650372,	607153,	568592,
                                    522583,	465375,	403940,	348339,	298130,	241714,	195303,
                                    155483,	117505,	80466,	47178,	23872,	11936),
                      dths_used = c(9357,	9340,	9331,	9327,	9319,	9299,	9284,
                                    9260,	9227,	9177,	9106,	9006,	8843,	8580,
                                    8276,	7740,	6868,	5478,	3873,	1937))

df2 <- data.frame(startage = c(0L, 1L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
                               60L, 70L, 75L, 80L, 85L, 90L, 65L),
                  pops = c(7060L, 35059L, 46974L, 48489L, 43219L, 38561L, 46009L, 57208L,
                           61435L, 55601L, 50209L, 56416L, 46411L, 39820L, 37039L,
                           33288L, 23306L, 11936L, 11936L, 37978L),
                  deaths = c(17L, 9L, 4L, 8L, 20L, 15L, 24L, 33L, 50L, 71L, 100L, 163L,
                             263L, 304L, 872L, 1390L, 1605L, 1936L, 1937L, 536L))

df3 <- data.frame(startage = c("0", "1-4", "5-9", "10 – 14", "15 – 19", "20 – 24", "25 – 29",
                               "30 – 34", "35 – 39", "40 – 44", "45 – 49", "50 – 54",
                               "55 – 59", "60 – 64", "65 – 69", "75 – 79", "80 – 84",
                               "85 – 89", "90 +", "70 – 74"),
                  pops = c(7060L, 35059L, 46974L, 48489L, 43219L, 38561L, 46009L, 57208L,
                           61435L, 55601L, 50209L, 56416L, 46411L, 39820L, 37978L,
                           33288L, 23306L, 11936L, 11936L, 37039L),
                  deaths = c(17L, 9L, 4L, 8L, 20L, 15L, 24L, 33L, 50L, 71L, 100L, 163L,
                             263L, 304L, 536L, 1390L, 1605L, 1936L, 1937L, 872L))
df_neg_deaths <- data.frame(age = c(0L, 1L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
                                    60L, 65L, 70L, 75L, 80L, 85L, 90L),
                            pop = c(7060L, 35059L, 46974L, 48489L, 43219L, 38561L, 46009L, 57208L,
                                    61435L, 55601L, 50209L, 56416L, 46411L, 39820L, 37978L,
                                    37039L, 33288L, 23306L, 11936L, 11936L),
                            deaths = c(17L, 9L, 4L, -5L, 20L, 15L, 24L, 33L, 50L, 71L, 100L, 163L,
                                       263L, 304L, 536L, 872L, 1390L, 1605L, 1936L, 1937L))
df_zero_pop <- data.frame(age = c(0L, 1L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
                                  60L, 65L, 70L, 75L, 80L, 85L, 90L),
                          pop = c(7060L, 35059L, 46974L, 48489L, 43219L, 0L, 46009L, 57208L,
                                  61435L, 55601L, 50209L, 56416L, 46411L, 39820L, 37978L,
                                  37039L, 33288L, 23306L, 11936L, 11936L),
                          deaths = c(17L, 9L, 4L, 8L, 20L, 15L, 24L, 33L, 50L, 71L, 100L, 163L,
                                     263L, 304L, 536L, 872L, 1390L, 1605L, 1936L, 1937L))
df_deaths_greater_pops <- data.frame(age = c(0L, 1L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
                                                  60L, 65L, 70L, 75L, 80L, 85L, 90L),
                                     pop = c(7060L, 35059L, 46974L, 48489L, 43219L, 38561L, 46009L, 57208L,
                                              61435L, 55601L, 50209L, 56416L, 46411L, 39820L, 37978L,
                                              37039L, 33288L, 23306L, 119L, 11936L),
                                     deaths = c(17L, 9L, 4L, 8L, 20L, 15L, 24L, 33L, 50L, 71L, 100L, 163L,
                                                263L, 304L, 536L, 872L, 1390L, 1605L, 1936L, 1937L))
df_missing_age <- data.frame(age = c(0L, 1L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
                                     60L, 65L, 70L, 75L, 80L, 85L),
                             pop = c(7060L, 35059L, 46974L, 48489L, 43219L, 38561L, 46009L, 57208L,
                                     61435L, 55601L, 50209L, 56416L, 46411L, 39820L, 37978L,
                                     37039L, 33288L, 23306L, 11936L),
                             deaths = c(17L, 9L, 4L, 8L, 20L, 15L, 24L, 33L, 50L, 71L, 100L, 163L,
                                        263L, 304L, 536L, 872L, 1390L, 1605L, 1936L))
df_low_pops <- data.frame(stringsAsFactors=FALSE,
                          age = c(0L, 1L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
                                  60L, 65L, 70L, 75L, 80L, 85L, 90L),
                          pop = c(128L, 152L, 120L, 176L, 194L, 180L, 145L, 149L, 107L, 185L,
                                  165L, 109L, 100L, 122L, 133L, 189L, 123L, 121L, 147L,
                                  138L),
                          deaths = c(58L, 93L, 78L, 94L, 59L, 71L, 80L, 73L, 69L, 72L, 91L, 69L,
                                     78L, 71L, 54L, 91L, 82L, 53L, 50L, 84L))
df_widecis_plus <- data.frame(area = c(rep("Area 1", 20), rep("Area 2", 20)),
                 startage = rep(c(0L, 1L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
                                  60L, 65L, 70L, 75L, 80L, 85L, 90L), 2),
                 pops = rep(c(270L, 235L, 246L, 248L, 243L, 238L, 246L, 257L,
                              261L, 355L, 350L, 356L, 346L, 339L, 337L,
                              337L, 333L, 323L, 311L, 311L), 2),
                 deaths = rep(c(17L, 9L, 4L, 8L, 20L, 15L, 24L, 33L, 50L, 51L, 10L, 16L,
                                26L, 30L, 36L, 22L, 13L, 5L, 6L, 1L), 2)) %>%
  group_by(area)

df_widecis_plus$deaths[df_widecis_plus$area == "Area 1" & df_widecis_plus$startage == 90] <- 312


df_grouped_with_warnings <- data.frame(stringsAsFactors=FALSE,
                                       area = c(rep("Good data", 20),
                                                rep("Negative deaths", 20),
                                                rep("Negative pops", 20),
                                                rep("Deaths more than pops", 20),
                                                rep("Low pops", 20),
                                                rep("Missing age band", 19)),
                                       age = c(0L, 1L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L,
                                               60L, 65L, 70L, 75L, 80L, 85L, 90L, 0L, 1L, 5L, 10L, 15L, 20L,
                                               25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L,
                                               90L, 0L, 1L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L,
                                               55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 0L, 1L, 5L, 10L, 15L,
                                               20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L,
                                               85L, 90L, 0L, 1L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L,
                                               55L, 60L, 65L, 70L, 75L, 80L, 85L, 90L, 0L, 1L, 5L, 10L, 15L,
                                               20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L,
                                               85L),
                                       pop = c(7060L, 35059L, 46974L, 48489L, 43219L, 43219L, 46009L, 57208L,
                                               61435L, 55601L, 50209L, 56416L, 46411L, 39820L, 37978L,
                                               37039L, 33288L, 23306L, 11936L, 11936L, 7060L, 35059L, 46974L, 48489L,
                                               43219L, 43219L, 46009L, 57208L, 61435L, 55601L, 50209L, 56416L,
                                               46411L, 39820L, 37978L, 37039L, 33288L, 23306L, 11936L, 11936L,
                                               7060L, 35059L, 46974L, -10L, 43219L, 43219L, 46009L, 57208L,
                                               61435L, 55601L, 50209L, 56416L, 46411L, 39820L, 37978L, 37039L,
                                               33288L, 23306L, 11936L, 11936L, 7060L, 35059L, 46974L, 48489L,
                                               43219L, 43219L, 46009L, 57208L, 61435L, 55601L, 50209L, 56416L,
                                               46411L, 39820L, 37978L, 37039L, 33288L, 23306L, 11936L, 11936L,
                                               100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L,
                                               100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 7060L,
                                               35059L, 46974L, 48489L, 43219L, 43219L, 46009L, 57208L, 61435L,
                                               55601L, 50209L, 56416L, 46411L, 39820L, 37978L, 37039L, 33288L,
                                               23306L, 11936L),
                                       deaths = c(17L, 9L, 4L, 8L, 20L, 15L, 24L, 33L, 50L, 71L, 100L, 163L,
                                                  263L, 304L, 536L, 872L, 1390L, 1605L, 1936L, 1937L, 17L, 9L,
                                                  -2L, 8L, 20L, 15L, 24L, 33L, 50L, 71L, 100L, 163L, 263L, 304L,
                                                  536L, 872L, 1390L, 1605L, 1936L, 1937L, 17L, 9L, 4L, 8L, 20L, 15L,
                                                  24L, 33L, 50L, 71L, 100L, 163L, 263L, 304L, 536L, 872L, 1390L,
                                                  1605L, 1936L, 1937L, 17L, 9L, 4L, 8L, 20L, 50000L, 24L, 33L,
                                                  50L, 71L, 100L, 163L, 263L, 304L, 536L, 872L, 1390L, 1605L, 1936L,
                                                  1937L, 17L, 9L, 4L, 8L, 20L, 15L, 24L, 33L, 50L, 71L, 80L, 80L,
                                                  80L, 80L, 80L, 80L, 80L, 80L, 80L, 80L, 17L, 9L, 4L, 8L, 20L,
                                                  15L, 24L, 33L, 50L, 71L, 100L, 163L, 263L, 304L, 536L, 872L,
                                                  1390L, 1605L, 1936L))

answer1 <- round(data.frame(value = c(80.16960813, 79.36245674, 75.44193645, 70.47299936,
                                      65.52909542, 60.67510088, 55.78835987, 50.92752727,
                                      46.06740425, 41.24505636, 36.49322717, 31.83343842, 27.26027992,
                                      22.9719144, 18.76856852, 14.95858336, 11.51684445, 8.618849241,
                                      6.163942934, 6.16210635),
                            lowercl = c(79.88268548, 79.0898816, 75.17407307, 70.20675732,
                                        65.26548673, 60.41876577, 55.53801579, 50.68279939,
                                        45.82666794, 41.00842831, 36.26223216, 31.60996318, 27.04337271,
                                        22.76653134, 18.57570352, 14.77985004, 11.34831778, 8.451134045,
                                        5.978358351, 5.887683386),
                            uppercl = c(80.45653078, 79.63503188, 75.70979983, 70.7392414,
                                        65.79270412, 60.931436, 56.03870395, 51.17225516, 46.30814055,
                                        41.48168441, 36.72422219, 32.05691365, 27.47718713,
                                        23.17729746, 18.96143352, 15.13731667, 11.68537112, 8.786564437,
                                        6.349527517, 6.436529314)),
                 n)




answer2 <- cbind(df1[c(3, 7),],
                 round(answer1[c(3, 7),], n),
                 df1_cum[c(3, 7),],
                 data.frame(stringsAsFactors = FALSE,
                            confidence  = rep("95%", 2),
                            statistic = paste("life expectancy at", c(5, 25)),
                            method = rep("Chiang, using Silcocks et al for confidence limits", 2))) %>%
                select(-pops, -deaths)

answer_widecis <- round(tibble(value = c(19.22914685, 19.44120610, 18.33467808, 14.67659995,
                                         11.81284556, 11.63815414, 10.05641628, 9.93152356,
                                         11.95846561,
                                         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
                                         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
                                         NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
                               lower95_0cl = c(16.43900644, 16.53437505, 15.62922550,
                                               12.08108630, 9.21823373, 8.68414661,
                                               6.54020385, 4.71570153, 2.37762283,
                                               -1.60695210, -10.33328815, -17.16409724,
                                               -26.91715103, -45.04820928, -76.27067271,
                                               -137.12203187, -195.39453664, -242.37480291,
                                               -266.74198466, -298.54880400,
                                               NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
                                               NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
                               upper95_0cl = c(22.01928726, 22.34803716, 21.04013066,
                                               17.27211361, 14.40745738, 14.59216167,
                                               13.57262872, 15.14734559, 21.53930839,
                                               50.29765396, 97.99598549, 107.54413263,
                                               128.91238524, 181.91668353, 278.06441642,
                                               472.32822039, 649.40455756, 783.50845564,
                                               841.04259816, 920.54880400,
                                               NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
                                               NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)),
                        n)

test1 <- phe_life_expectancy(df1, deaths, pops, startage, type="standard")
test1.1 <- phe_life_expectancy(df1, deaths, pops, startage, confidence = 95)
test2 <- phe_life_expectancy(df2, deaths, pops, startage)
test3 <- df1 %>%
  mutate(area = "test") %>%
  group_by(area) %>%
  phe_life_expectancy(deaths, pops, startage)
test4 <- phe_life_expectancy(df3, deaths, pops, startage,
                             age_contents = c("0", "1-4", "5-9",
                                              "10 – 14", "15 – 19",
                                              "20 – 24", "25 – 29",
                                              "30 – 34", "35 – 39",
                                              "40 – 44", "45 – 49",
                                              "50 – 54", "55 – 59",
                                              "60 – 64", "65 – 69",
                                              "70 – 74", "75 – 79",
                                              "80 – 84", "85 – 89",
                                              "90 +"))
test5 <- phe_life_expectancy(df1, deaths, pops, startage, le_age = 5)
test6 <- phe_life_expectancy(df1, deaths, pops, startage, le_age = c(5, 25), type="full") %>%
  mutate_at(c("value", "lowercl", "uppercl"), round, digits = n)

test7 <- phe_life_expectancy(df1, deaths, pops, startage, confidence = 99.8)
test8 <- phe_life_expectancy(df1, deaths, pops, startage, confidence = c(95, 99.8))

negative_warning <- capture_warnings(
  test_neg <- phe_life_expectancy(df_neg_deaths, deaths, pop, age))
zero_warning <- capture_warnings(
  test_zero_pop <- phe_life_expectancy(df_zero_pop, deaths, pop, age))
deaths_pops_warning <- capture_warnings(
  test_greater_than_pops <- phe_life_expectancy(df_deaths_greater_pops, deaths, pop, age))
low_pops_warning <- capture_warnings(
  test_low_pops <- phe_life_expectancy(df_low_pops, deaths, pop, age))

age_contents_short <- c(0L, 1L, 5L, 10L, 15L, 20L, 25L, 30L, 35L,
                        40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 85L)

missing_warning     <- capture_warnings(
  test_missing_ageband <- phe_life_expectancy(df_missing_age, deaths, pop, age,
                                              age_contents = age_contents_short))
wideci_warning <- capture_warnings(
  test_widecis <- phe_life_expectancy(df_widecis_plus, deaths, pops, startage, confidence = c(0.95, 0.998)))
multi_warnings      <- capture_warnings(
  test_grouped_with_warnings <- df_grouped_with_warnings %>%
                                    group_by(area) %>%
                                    phe_life_expectancy(deaths, pop, age)
  )

cols_to_test <- c("value", "lowercl", "uppercl")
expected_num_cols <- 9

#test calculations
test_that("LE and CIs calculate correctly",{
  expect_equal(round(test1[, cols_to_test], n), round(answer1, n),
               info = "test defaults but with type standard")
  expect_equal(round(test1.1[, cols_to_test], n), round(answer1, n),
               info = "test confidence = 95")
  expect_equal(round(test2[, cols_to_test], n), round(answer1, n),
               info = "incorrect ageband order")
  expect_equal(round(test3[, cols_to_test], n), as_tibble(round(answer1, n)),
               info = "single area grouping")
  expect_equal(round(test4[, cols_to_test], n), round(answer1, n),
               info = "custom age bands in wrong order")
  expect_equal(round(test5[, cols_to_test], n), round(answer1[3, ], n),
               ignore_attr = TRUE, #because the row names are different and we are only interested in values
               info = "return single age band")
  expect_equal(test6, answer2,
               ignore_attr = TRUE, #because the row names are different and we are only interested in values
               info = "type = 'full' with two filters")
  expect_equal(sum(!is.na(test_neg[, cols_to_test])), 0,
               info = "negative deaths produces only NAs")
  expect_equal(sum(!is.na(test_zero_pop[, cols_to_test])), 0,
               info = "zero in pop age band produces only NAs")
  expect_equal(sum(!is.na(test_greater_than_pops[, cols_to_test])), 0,
               info = "deaths in age band greater than pops produces only NAs")
  expect_equal(sum(!is.na(test_missing_ageband[, cols_to_test])), 0,
               info = "missing age band produces only NAs")
  expect_equal(nrow(test_grouped_with_warnings), nrow(df_grouped_with_warnings),
               info = "correct number of rows for grouped calcs")
  expect_equal(test7[, c("lowercl", "uppercl")],
               test8[, c("lower99_8cl", "upper99_8cl")],
               ignore_attr = TRUE)
  expect_equal(round(test_widecis[, c("value", "lower95_0cl", "upper95_0cl")], n),
               round(answer_widecis, n),
               info = "suppress wide CI > 20")

})

# test that correct columns are output
test_that("LE - correct column numbers are output",{
  expect_equal(ncol(test1), expected_num_cols - 5)
  expect_equal(ncol(test2), expected_num_cols)
  expect_equal(ncol(test8), expected_num_cols - 2 + (2 * 2))
  expect_equal(ncol(phe_life_expectancy(df1, deaths, pops, startage, confidence = 90:99)),
               expected_num_cols - 2 + (2 * length(90:99)))
})

# test that output is in correct format
test_that("LE - correct output format",{
  expect_true(is.data.frame(test1),   info = "test1 is dataframe format")
  expect_true(is.data.frame(test1.1), info = "test1.1 is dataframe format")
  expect_true(is.data.frame(test2),   info = "test2 is dataframe format")
  expect_true(is.data.frame(test3),   info = "test3 is dataframe format")
  expect_true(is.data.frame(test4),   info = "test4 is dataframe format")
  expect_true(is.data.frame(test5),   info = "test5 is dataframe format")
  expect_true(is.data.frame(test6),   info = "test6 is dataframe format")
  expect_true(is.data.frame(test7),   info = "test7 is dataframe format")
  expect_true(is.data.frame(test8),   info = "test8 is dataframe format")
  expect_true(is.data.frame(test_grouped_with_warnings),   info = "test_grouped_with_warnings is dataframe format")
  expect_equal(group_vars(test3), c("area"), info = "test3 output is grouped by area")
})

# test warnings
test_that("LE - warnings are generated when invalid arguments are used",{
  expect_warning(phe_life_expectancy(df1, deaths, pops, startage, le_age = 4),
                 "le_age not in the vector described by age_contents; all life expectancies will be returned")
  expect_warning(phe_life_expectancy(df1, deaths, pops, startage, le_age = c(4, 6)),
                 "le_age not in the vector described by age_contents; all life expectancies will be returned")
  expect_match(negative_warning,
               "some age bands have negative deaths; outputs have been suppressed to NAs")
  expect_match(zero_warning,
               "some age bands have a zero or less population; outputs have been suppressed to NAs")
  expect_match(deaths_pops_warning,
               "some age bands have more deaths than population; outputs have been suppressed to NAs")
  expect_match(low_pops_warning,
               "some groups have a total population of less than 5,000; outputs have been suppressed to NAs")
  expect_match(missing_warning,
               "some groups contain a different number of age bands than 20; life expectancy cannot be calculated for these\\. These groups will contain NAs\\.")
  expect_match(wideci_warning,
               "some age bands have more deaths than population; outputs have been suppressed to NAs",
               all = FALSE)
  expect_match(wideci_warning,
               "some life expectancy values have a 95% confidence interval > 20 years; these values have been suppressed to NAs",
               all = FALSE)
  expect_match(multi_warnings, "some age bands have negative deaths; outputs have been suppressed to NAs",
               all = FALSE)
  expect_match(multi_warnings, "some age bands have a zero or less population; outputs have been suppressed to NAs",
               all = FALSE)
  expect_match(multi_warnings, "some groups contain a different number of age bands than 20; life expectancy cannot be calculated for these\\. These groups will contain NAs\\.",
               all = FALSE)
  expect_match(multi_warnings, "some age bands have more deaths than population; outputs have been suppressed to NAs",
               all = FALSE)
  expect_match(multi_warnings, "some groups have a total population of less than 5,000; outputs have been suppressed to NAs",
               all = FALSE)
})


# test error handling

test_that("LE - errors are generated when invalid arguments are used",{
  expect_error(phe_life_expectancy(df3, deaths, pops, startage),
               "the contents in the startage field do not match the contents of the age_contents vector")
  expect_error(phe_life_expectancy(df3, deaths, pops, startage,
                                   age_contents = c("0", "1 4", "5 9",
                                                    "20 24", "25 29",
                                                    "10 14", "15 19",
                                                    "30 34", "35 39",
                                                    "40 44", "45 49",
                                                    "50 54", "55 59",
                                                    "60 64", "65 69",
                                                    "70 74", "75 79",
                                                    "80 84", "85 89",
                                                    "90+")),
               "age_contents doesn't appear to be in ascending order; the following age bands appear out of position: 20 24, 25 29, 10 14, 15 19")
  expect_error(phe_life_expectancy(),
               "function life_expectancy requires at least 4 arguments: data, deaths, population, startage")
  expect_error(phe_life_expectancy(df1, deaths, pop, age,
                                   age_contents = c(1L, 0L, seq(5, 90, by = 5))),
               "first age band in age_contents must be 0")
  expect_error(phe_life_expectancy(df1, deaths, pop, age,
                                   confidence = 0.8),
               "all confidence levels must be between 90 and 100 or between 0.9 and 1")

})
publichealthengland/PHEindicatormethods documentation built on Dec. 15, 2024, 3:18 p.m.