tests/testthat/test-helper-functions.R

## 'add_initial_to_classif' ---------------------------------------------------

test_that("'add_initial_to_classif' gives expected result", {
    classif_vars <- fake_classif_vars(n_time = 3)
    ans_obtained <- add_initial_to_classif(classif_vars)
    ans_expected <- fake_classif_vars(n_time = 4)
    ans_expected <- within(ans_expected, {
        time <- time - 1L;
        cohort <- cohort - 1L
    })
    is_alive <- ans_expected$cohort >= min(classif_vars$cohort)
    is_original_or_lower <- with(ans_expected, (time > min(time)) | (cohort == time - age))
    ans_expected <- ans_expected[is_alive & is_original_or_lower, ]
    new_cohorts <- fake_classif_vars(n_time = 3L, n_age = 1L)
    new_cohorts <- new_cohorts[new_cohorts$cohort == new_cohorts$time, ]
    new_cohorts$age <- new_cohorts$age - 1L
    ans_expected <- rbind(ans_expected, new_cohorts)
    expect_identical(sort_df(ans_obtained), sort_df(ans_expected))
})


## fake_classif_vars ----------------------------------------------------------

test_that("'fake_classif_vars' works with valid inputs", {
    ans_obtained <- fake_classif_vars(n_age = 1L,
                                      n_time = 1L)
    ans_expected <- data.frame(cohort = c(1999L, 1999L, 2000L, 2000L),
                               sex = c("Female", "Male", "Female", "Male"),
                               time = c(2000L, 2000L, 2000L, 2000L),
                               age = c(0L, 0L, 0L, 0L))
    expect_identical(ans_obtained, ans_expected)
})


## fake_classif_vars_popn -------------------------------------------------

test_that("'fake_classif_vars_popn' works with valid inputs", {
    ans_obtained <- fake_classif_vars_popn(n_age = 1L,
                                           n_time = 1L)
    ans_expected <- data.frame(sex = c("Female", "Female", "Male", "Male"),
                               time = c(1999L, 2000L, 1999L, 2000L),
                               age = c(0L, 0L, 0L, 0L))
    expect_identical(ans_obtained, ans_expected)
})


## is_lower_triangle ------------------------------------------------------

test_that("'is_lower_triangle' works with valid data", {
    ans_obtained <- is_lower_triangle(age = c(0L, 0L, 1L),
                                      time = c(2000L, 2000L, 2000L),
                                      cohort = c(2000L, 1999L, 1999L))
    ans_expected <- c(TRUE, FALSE, TRUE)
    expect_identical(ans_obtained, ans_expected)
    ans_obtained <- is_lower_triangle(age = integer(),
                                      time = integer(),
                                      cohort = integer())
    ans_expected <- logical()
    expect_identical(ans_obtained, ans_expected)
})


## 'make_list_dm_df' ----------------------------------------------------------

test_that("'make_list_dm_df' returns expected answer - non-popn, no regions", {
    classif_vars <- fake_classif_vars(sex = FALSE)
    df <- classif_vars[-1, ]
    df$count <- seq_len(nrow(df))
    ans_obtained <- make_list_dm_df(df = df,
                                    classif_vars = classif_vars,
                                    is_popn = FALSE)
    ans_expected <- merge(classif_vars,
                          df,
                          by = c("cohort", "gender", "time", "age"),
                          all.x = TRUE,
                          sort = FALSE)
    ans_expected <- sort_classif_cols(ans_expected, ignore = "count")
    ans_expected <- sort_df(ans_expected, ignore = "count")
    ans_expected <- split(ans_expected$count,
                          ans_expected[c("cohort", "gender")],
                          lex.order = TRUE)
    names(ans_expected) <- NULL
    expect_identical(ans_obtained, ans_expected)
})

test_that("'make_list_dm_df' returns expected answer - non-popn, with regions", {
    classif_vars <- fake_classif_vars(n_region = 2)
    df <- classif_vars[-1, ]
    df$count <- seq_len(nrow(df))
    ans_obtained <- make_list_dm_df(df = df,
                                    classif_vars = classif_vars,
                                    is_popn = FALSE)
    ans_expected <- merge(classif_vars,
                          df,
                          by = c("cohort", "sex", "time", "age", "region"),
                          all.x = TRUE,
                          sort = FALSE)
    ans_expected <- sort_classif_cols(ans_expected, ignore = "count")
    ans_expected <- sort_df(ans_expected, ignore = "count")
    ans_expected <- split(ans_expected$count,
                          ans_expected[c("cohort", "sex")],
                          lex.order = TRUE)
    names(ans_expected) <- NULL
    ans_expected <- lapply(ans_expected, function(x) matrix(x, nrow = 2))
    expect_identical(ans_obtained, ans_expected)
})

test_that("'make_list_dm_df' returns expected answer - popn, no regions", {
    classif_vars_popn <- fake_classif_vars_popn(n_time = 3L)
    df <- classif_vars_popn
    df$count <- seq_len(nrow(df))
    classif_vars <- fake_classif_vars(n_time = 3L)
    ans_obtained <- make_list_dm_df(df = df,
                                    classif_vars = classif_vars,
                                    is_popn = TRUE)
    ans_expected <- classif_vars
    initial_existing <- fake_classif_vars(n_time = 1L)
    initial_existing$time <- initial_existing$time - 1L
    initial_existing$cohort <- initial_existing$cohort - 1L
    initial_existing <- initial_existing[(initial_existing$cohort >= min(ans_expected$cohort)) &
                                         with(initial_existing, cohort == time - age), ]
    initial_new <- ans_expected[ans_expected$cohort == ans_expected$time, ]
    initial_new$age <- initial_new$age - 1L
    ans_expected <- rbind(initial_existing, initial_new, ans_expected)    
    df_with_cohort <- df
    df_with_cohort$cohort <- with(df_with_cohort, time - age)
    ans_expected <- merge(ans_expected,
                          df_with_cohort,
                          by = c("cohort", "sex", "time", "age"),
                          all.x = TRUE,
                          sort = FALSE)
    ans_expected <- sort_classif_cols(ans_expected, ignore = "count")
    ans_expected <- sort_df(ans_expected, ignore = "count")
    ans_expected <- to_list_col(ans_expected,
                                nm_f = c("cohort", "sex"),
                                nm_x = "count")$count
    expect_identical(ans_obtained, ans_expected)
})

test_that("'make_list_dm_df' returns expected answer - popn, with regions", {
    classif_vars_popn <- fake_classif_vars_popn(n_time = 3, n_region = 2)
    df <- classif_vars_popn
    df$count <- seq_len(nrow(df))
    classif_vars <- fake_classif_vars(n_time = 3, n_region = 2)
    ans_obtained <- make_list_dm_df(df = df,
                                    classif_vars = classif_vars,
                                    is_popn = TRUE)
    ans_expected <- classif_vars
    initial_existing <- fake_classif_vars(n_time = 1L, n_region = 2L)
    initial_existing$time <- initial_existing$time - 1L
    initial_existing$cohort <- initial_existing$cohort - 1L
    initial_existing <- initial_existing[(initial_existing$cohort >= min(ans_expected$cohort)) &
                                         with(initial_existing, cohort == time - age), ]
    initial_new <- ans_expected[ans_expected$cohort == ans_expected$time, ]
    initial_new$age <- initial_new$age - 1L
    ans_expected <- rbind(initial_existing, initial_new, ans_expected)    
    df_with_cohort <- df
    df_with_cohort$cohort <- with(df_with_cohort, time - age)
    ans_expected <- merge(ans_expected,
                          df_with_cohort,
                          by = c("cohort", "sex", "time", "age", "region"),
                          all.x = TRUE,
                          sort = FALSE)
    ans_expected <- sort_classif_cols(ans_expected, ignore = "count")
    ans_expected <- sort_df(ans_expected, ignore = "count")
    ans_expected <- to_list_col(ans_expected,
                                nm_f = c("cohort", "sex"),
                                nm_x = "count")$count
    ans_expected <- lapply(ans_expected, matrix, nrow = 2)
    expect_identical(ans_obtained, ans_expected)
})


## 'sort_classif_cols' -------------------------------------------------------

test_that("'sort_classif_cols' works without region - ignore is NULL", {
    x <- data.frame(age = 0,
                    cohort = 2000,
                    sex = "Female",
                    time = 2000)
    ans_obtained <- sort_classif_cols(x)
    ans_expected <- x[c(2, 3, 4, 1)]
    expect_identical(ans_obtained, ans_expected)
})


test_that("'sort_classif_cols' works with region - ignore is NULL", {
    x <- data.frame(age = 0,
                    region = "a",
                    cohort = 2000,
                    gender = "Female",
                    time = 2000)
    ans_obtained <- sort_classif_cols(x)
    ans_expected <- x[c(3, 4, 5, 1, 2)] 
    expect_identical(ans_obtained, ans_expected)
})

test_that("'sort_classif_cols' works without region - ignore is non-NULL", {
    x <- data.frame(age = 0,
                    count = 3,
                    cohort = 2000,
                    sex = "Female",
                    time = 2000)
    ans_obtained <- sort_classif_cols(x, ignore = "count")
    ans_expected <- x[c(3, 4, 5, 1, 2)]
    expect_identical(ans_obtained, ans_expected)
})

test_that("'sort_classif_cols' works with region - ignore is non-NULL", {
    x <- data.frame(age = 0,
                    region = "a",
                    cohort = 2000,
                    gender = "Female",
                    time = 2000,
                    count = 2)
    ans_obtained <- sort_classif_cols(x, ignore = "count")
    ans_expected <- x[c(3, 4, 5, 1, 2, 6)] 
    expect_identical(ans_obtained, ans_expected)
})

test_that("'sort_classif_cols' raises expected errors", {
    x <- data.frame(age = 0,
                    region = "a",
                    cohort = 2000,
                    gender = "Female",
                    time = 2000,
                    count = 2)
    expect_error(sort_classif_cols(x, ignore = c("wrong", "count")),
                 "\"wrong\" is not a valid column name")
    x <- data.frame(age = 0,
                    region = "a",
                    cohort = 2000,
                    wrong = "Female",
                    time = 2000,
                    count = 2)
    expect_error(sort_classif_cols(x, ignore = "count"),
                 "invalid name for classification variable : \"wrong\"")
})


## 'sort_df' ------------------------------------------------------------------

test_that("'sort_df' works with valid data - ignore is NULL", {
    x <- expand.grid(age = c(1, 100, 2),
                     sex = c("F", "M"),
                     stringsAsFactors = FALSE,
                     KEEP.OUT.ATTRS = FALSE)
    ans_obtained <- sort_df(x)
    ans_expected <- data.frame(age = c(1, 1, 2, 2, 100, 100),
                               sex = c("F", "M", "F", "M", "F", "M"))
    expect_identical(ans_obtained, ans_expected)
})

test_that("'sort_df' works with valid data - ignore is non-NULL", {
    x <- expand.grid(age = c(1, 100, 2),
                     sex = c("F", "M"),
                     stringsAsFactors = FALSE,
                     KEEP.OUT.ATTRS = FALSE)
    ans_obtained <- sort_df(x, ignore = "age")
    ans_expected <- data.frame(age = c(1, 100, 2, 1, 100, 2),
                               sex = c("F", "F", "F", "M", "M", "M"))
    expect_identical(ans_obtained, ans_expected)
})


## 'to_list_col' -------------------------------------------------------------

test_that("'to_list_col' works with valid data - single x column", {
    df <- data.frame(a = c(2, 1, 2, 1, 1),
                     b = c(3, 3, 3, 2, 2),
                     z = 1:5)
    ans_obtained <- to_list_col(df = df, nm_f = c("a", "b"), nm_x = "z")
    ans_expected <- data.frame(a = c(2, 1, 1),
                               b = c(3, 3, 2))
    ans_expected$z <- list(c(1L, 3L), 2L, c(4L, 5L))
    expect_identical(ans_obtained, ans_expected)
})

test_that("'to_list_col' works with valid data - multiple x columns", {
    df <- data.frame(z2 = 5:1,
                     a = c(2, 1, 2, 1, 1),
                     b = c(3, 3, 3, 2, 2),
                     z1 = 1:5)
    ans_obtained <- to_list_col(df = df, nm_f = c("a", "b"), nm_x = c("z1", "z2"))
    ans_expected <- data.frame(a = c(2, 1, 1),
                               b = c(3, 3, 2))
    ans_expected$z1 <- list(c(1L, 3L), 2L, c(4L, 5L))
    ans_expected$z2 <- list(c(5L, 3L), 4L, c(2L, 1L))
    expect_identical(ans_obtained, ans_expected)
})


   
## valid_age_time_cohort ------------------------------------------------------

test_that("'valid_age_time_cohort' returns TRUE with valid data", {
    expect_true(valid_age_time_cohort(age = c(0L, 0L, 1L),
                                      time = c(2000L, 2000L, 2000L),
                                      cohort = c(2000L, 1999L, 1999L)))
    expect_true(valid_age_time_cohort(age = integer(),
                                      time = integer(),
                                      cohort = integer()))
})

test_that("'valid_age_time_cohort' raises error with invalid data", {
    expect_error(valid_age_time_cohort(age = c(0L, 0L, 3L),
                                       time = c(2000L, 2000L, 2000L),
                                       cohort = c(2000L, 1999L, 1999L)),
                 "invalid combination of age \\[3\\], time \\[2000\\], and cohort \\[1999\\]")    
})
ONSdigital/Bayesian-demographic-accounts documentation built on Jan. 10, 2022, 12:34 a.m.