tests/testthat/test-generate_crosstab.R

# Create mock data
mock_data <- dplyr::tibble(
  type = factor(c("A", "B", "A", "B", "C", "A", "C", "B", "A")),
  sex = factor(c("Male", "Female", "Female", "Male", "Male", "Female", "Female", "Male", "Female")),
  status = c("Healthy", "Sick", "Sick", "Healthy", "Healthy", "Sick", "Sick", "Healthy", "Sick"),
  age_group = factor(c("Old", "Young", "Young", "Young", "Old", "Old", "Young", "Young", "Young"))
)

mock_data_labelled <- dplyr::tibble(
  type = c(1, 2, 1, 2, 3, 1, 3, 2, 1, 1, 3, 1, 1, 2, 2, 3),
  sex =  c(1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1),
  status = c("Healthy", "Sick", "Sick", "Healthy", "Healthy", "Sick", "Sick", "Healthy", "Sick", "Healthy", "Healthy", "Sick", "Sick", "Sick", "Healthy", "Healthy"),
  age_group = c("Old", "Young", "Young", "Young", "Old", "Old", "Young", "Young", "Young", "Young", "Old", "Young", "Young", "Young", "Young", "Old")
)

mock_data_labelled <- mock_data_labelled |>
  dplyr::mutate(
    type = haven::labelled(
      type,
      label = "Type",
      labels = c(A = 1, B = 2, C = 3)
    ),
    sex = haven::labelled(
      sex,
      label = "Sex",
      labels = c(Male = 1, Female = 2)
    )
  )

mock_data_factored <- mock_data_labelled |>
  dplyr::mutate(
    type = factor(type, labels = c("A", "B", "C")),
    sex = factor(sex, labels = c("Male", "Female"))
  )

attr(mock_data_labelled$age_group, "label") <- "Age group"

# Unit tests for generate_crosstab function
test_that("generate_crosstab generates frequency table for one variable", {
  result <- generate_crosstab(mock_data, x = type)

  expect_s3_class(result, "data.frame")
  expect_s3_class(result, "tsg")
  expect_s3_class(result, "tsgf")
  expect_equal(ncol(result), 3)
  expect_equal(nrow(result), 4)

})


test_that("generate_crosstab generates cross-tabulation correctly using different inputs", {

  df <- generate_crosstab(mock_data, type, sex)
  df_l <- generate_crosstab(mock_data_labelled, type, sex)
  df_f <- generate_crosstab(mock_data_factored, type, sex)

  df2 <- generate_crosstab(mock_data, sex, type)
  df2_l <- generate_crosstab(mock_data_labelled, sex, type)
  df2_f <- generate_crosstab(mock_data_factored, sex, type)

  expect_s3_class(df, "data.frame")
  expect_s3_class(df, "tsg")
  expect_s3_class(df_l, "tsg")
  expect_s3_class(df_l, "tsg")

  expect_s3_class(df, "tsgc")
  expect_s3_class(df_f, "tsgc")
  expect_s3_class(df_f, "tsgc")

  expect_equal(dim(df), c(4, 6))
  expect_equal(dim(df_l), c(4, 6))
  expect_equal(dim(df_f), c(4, 6))

  expect_equal(dim(df2), c(3, 8))
  expect_equal(dim(df2_l), c(3, 8))
  expect_equal(dim(df2_f), c(3, 8))

  expect_equal(attributes(df$category)$label, 'type')
  expect_equal(attributes(df_l$category)$label, 'Type')
  expect_equal(attributes(df_f$category)$label, 'type')
  expect_equal(attributes(df2$category)$label, 'sex')
  expect_equal(attributes(df2_l$category)$label, 'Sex')
  expect_equal(attributes(df2_f$category)$label, 'sex')

  expect_equal(attributes(df$percent_Male)$label, 'Percent__Male')
  expect_equal(attributes(df_l$percent_1)$label, 'Percent__Male')
  expect_equal(attributes(df_f$percent_Male)$label, 'Percent__Male')
  expect_equal(attributes(df2$frequency_A)$label, 'Frequency__A')
  expect_equal(attributes(df2_l$frequency_1)$label, 'Frequency__A')
  expect_equal(attributes(df2_f$frequency_A)$label, 'Frequency__A')

  expect_equal(df$percent_Male[1] + df$percent_Female[1], 100)
  expect_equal(df_l$percent_1[1] + df_l$percent_2[1], 100)
  expect_equal(df_f$percent_Male[1] + df_f$percent_Female[1], 100)
  expect_equal(df2$percent_A[1] + df2$percent_B[1] + df2$percent_C[1], 100)
  expect_equal(df2_l$percent_1[1] + df2_l$percent_2[1] + df2_l$percent_3[1], 100)
  expect_equal(df2_f$percent_A[1] + df2_f$percent_B[1] + df2_f$percent_C[1], 100)

  expect_equal(as.character(df$category[nrow(df)]), "Total")
  expect_equal(as.character(df_l$category[nrow(df_l)]), "0")
  expect_equal(as.character(df_f$category[nrow(df_f)]), "Total")
  expect_equal(as.character(df2$category[nrow(df2)]), "Total")
  expect_equal(as.character(df2_l$category[nrow(df2_l)]), "0")
  expect_equal(as.character(df2_f$category[nrow(df2_f)]), "Total")

  expect_equal(df$total[nrow(df)], 9)
  expect_equal(df$total[nrow(df_l)], 9)
  expect_equal(df$total[nrow(df_f)], 9)

  expect_equal(df$total[nrow(df)], 9)
  expect_equal(df$total[nrow(df_l)], 9)
  expect_equal(df$total[nrow(df_f)], 9)

  expect_equal(
    names(df),
    c("category", "total", "frequency_Female", "frequency_Male", "percent_Female", "percent_Male")
  )

  expect_equal(
    names(df_l),
    c("category", "total", "frequency_1", "frequency_2", "percent_1", "percent_2")
  )

  expect_equal(
    names(df_f),
    c("category", "total", "frequency_Male", "frequency_Female", "percent_Male", "percent_Female")
  )

})


test_that("generate_crosstab calculates proportions if specified", {

  result <- generate_crosstab(mock_data, x = type, sex, add_percent = TRUE, as_proportion = TRUE)

  expect_true(all(result$proportion_Male >= 0 & result$proportion_Male <= 1))
  expect_equal(result$proportion_Male[1] + result$proportion_Female[1], 1)
  expect_equal(result$proportion_Male[nrow(result)] + result$proportion_Female[nrow(result)], 1)

})


test_that("generate_croosstab handles grouping correctly", {

  df_1 <- mock_data_labelled |>
    dplyr::group_by(type) |>
    generate_crosstab(age_group, sex, expand_categories = TRUE)

  df_2 <- mock_data_labelled |>
    dplyr::group_by(type) |>
    generate_crosstab(age_group, sex, expand_categories = FALSE)

  df_3 <- mock_data_labelled |>
    dplyr::group_by(type) |>
    generate_crosstab(age_group, sex, expand_categories = TRUE, group_as_list = TRUE)

  df_4 <- mock_data_labelled |>
    dplyr::group_by(type) |>
    generate_crosstab(age_group, sex, expand_categories = FALSE, group_as_list = TRUE)

  df_5 <- mock_data_labelled |>
    dplyr::group_by(type) |>
    generate_crosstab(age_group, sex, calculate_per_group = FALSE)

  df_6 <- mock_data_labelled |>
    dplyr::group_by(type) |>
    generate_crosstab(age_group, sex, calculate_per_group = TRUE, group_grand_total = TRUE)

  df_7 <- mock_data_labelled |>
    dplyr::group_by(type) |>
    generate_crosstab(age_group, sex, group_as_list = TRUE, group_grand_total = TRUE)

  expect_equal(dim(df_1), c(9, 7))
  expect_equal(dim(df_2), c(8, 7))
  expect_equal(length(df_3), 3)
  expect_equal(length(df_4), 3)

  expect_equal(names(df_3), c("A", "B", "C"))
  expect_equal(names(df_4), c("A", "B", "C"))

  expect_equal(dim(df_3$A), c(3, 7))
  expect_equal(dim(df_4$A), c(3, 7))
  expect_equal(dim(df_3$B), c(3, 7))
  expect_equal(dim(df_4$B), c(2, 7))
  expect_equal(dim(df_3$C), c(3, 7))
  expect_equal(dim(df_4$C), c(3, 7))

  expect_equal(nrow(df_6), 12)
  expect_true(identical(names(df_7), c("All", "A", "B", "C")))


  expect_identical(
    names(df_1),
    c("type", "category", "total", "frequency_1", "frequency_2", "percent_1", "percent_2")
  )

  expect_identical(
    names(df_2),
    c("type", "category", "total", "frequency_1", "frequency_2", "percent_1", "percent_2")
  )

  expect_identical(
    names(df_3[[1]]),
    c("type", "category", "total", "frequency_1", "frequency_2", "percent_1", "percent_2")
  )

  expect_identical(
    names(df_4[[1]]),
    c("type", "category", "total", "frequency_1", "frequency_2", "percent_1", "percent_2")
  )

  expect_identical(
    names(df_5),
    c("type", "category", "total", "frequency_1", "frequency_2", "percent_1", "percent_2")
  )

})


test_that("generate_croosstab handles multiple grouping correctly", {

  df_1 <- mock_data_labelled |>
    dplyr::group_by(type, status) |>
    generate_crosstab(
      age_group,
      sex,
      expand_categories = FALSE,
      group_as_list = TRUE
    )

  df_2 <- mock_data_labelled |>
    dplyr::group_by(type, status) |>
    generate_crosstab(
      age_group,
      sex,
      expand_categories = TRUE,
      group_as_list = TRUE
    )

  df_3 <- mock_data_labelled |>
    dplyr::group_by(type, status) |>
    generate_crosstab(
      age_group,
      sex,
      expand_categories = FALSE,
      calculate_per_group = TRUE
    )

  df_4 <- mock_data_labelled |>
    dplyr::group_by(type, status) |>
    generate_crosstab(
      age_group,
      sex,
      expand_categories = TRUE,
      calculate_per_group = TRUE
    )

  df_5 <- mock_data_labelled |>
    dplyr::group_by(type, status) |>
    generate_crosstab(
      age_group,
      sex,
      expand_categories = FALSE,
      calculate_per_group = FALSE
    )

  expect_identical(
    names(df_3),
    c("type", "status", "category", "total", "frequency_1", "frequency_2", "percent_1", "percent_2")
  )

  expect_identical(
    names(df_4),
    c("type", "status", "category", "total", "frequency_1", "frequency_2", "percent_1", "percent_2")
  )

  expect_identical(
    names(df_5),
    c("type", "status", "category", "total", "frequency_1", "frequency_2", "percent_1", "percent_2")
  )

})


test_that("generate_croosstab handles multiple grouping with edge cases correctly", {

  df_3 <- mock_data |>
    dplyr::group_by(type, status) |>
    generate_crosstab(
      age_group,
      sex,
      expand_categories = FALSE,
      calculate_per_group = TRUE
    )

  df_4 <- mock_data |>
    dplyr::group_by(type, status) |>
    generate_crosstab(
      age_group,
      sex,
      expand_categories = TRUE,
      calculate_per_group = TRUE
    )

  df_5 <- mock_data |>
    dplyr::group_by(type, status) |>
    generate_crosstab(
      age_group,
      sex,
      expand_categories = FALSE,
      calculate_per_group = FALSE
    )

  expect_identical(
    names(df_3),
    c("type", "status", "category", "total", "frequency_Male", "frequency_Female", "percent_Male", "percent_Female")
  )

  expect_identical(
    names(df_4),
    c("type", "status", "category", "total", "frequency_Male", "frequency_Female", "percent_Male", "percent_Female")
  )

  expect_identical(
    names(df_5),
    c("type", "status", "category", "total", "frequency_Female", "frequency_Male", "percent_Female", "percent_Male")
  )

})


test_that("generate_crosstab works correctly using percent_by_column", {

  df <- generate_crosstab(mock_data, type, sex, percent_by_column = TRUE)
  df_l <- generate_crosstab(mock_data_labelled, type, sex, percent_by_column = TRUE)
  df_f <- generate_crosstab(mock_data_factored, type, sex, percent_by_column = TRUE)

  df2 <- generate_crosstab(mock_data, sex, type, percent_by_column = TRUE)
  df2_l <- generate_crosstab(mock_data_labelled, sex, type, percent_by_column = TRUE)
  df2_f <- generate_crosstab(mock_data_factored, sex, type, percent_by_column = TRUE)

  expect_s3_class(df, "data.frame")
  expect_s3_class(df, "tsg")
  expect_s3_class(df_l, "tsg")
  expect_s3_class(df_l, "tsg")

  expect_s3_class(df, "tsgc")
  expect_s3_class(df_f, "tsgc")
  expect_s3_class(df_f, "tsgc")

  expect_equal(dim(df), c(4, 7))
  expect_equal(dim(df_l), c(4, 7))
  expect_equal(dim(df_f), c(4, 7))

  expect_equal(dim(df2), c(3, 9))
  expect_equal(dim(df2_l), c(3, 9))
  expect_equal(dim(df2_f), c(3, 9))

  expect_equal(attributes(df$category)$label, 'type')
  expect_equal(attributes(df_l$category)$label, 'Type')
  expect_equal(attributes(df_f$category)$label, 'type')
  expect_equal(attributes(df2$category)$label, 'sex')
  expect_equal(attributes(df2_l$category)$label, 'Sex')
  expect_equal(attributes(df2_f$category)$label, 'sex')

  expect_equal(attributes(df$percent_Male)$label, 'Percent__Male')
  expect_equal(attributes(df_l$percent_1)$label, 'Percent__Male')
  expect_equal(attributes(df_f$percent_Male)$label, 'Percent__Male')
  expect_equal(attributes(df2$frequency_A)$label, 'Frequency__A')
  expect_equal(attributes(df2_l$frequency_1)$label, 'Frequency__A')
  expect_equal(attributes(df2_f$frequency_A)$label, 'Frequency__A')

  expect_equal(df$percent_Male[nrow(df)], 100)
  expect_equal(df_l$percent_1[nrow(df_l)], 100)
  expect_equal(df_f$percent_Male[nrow(df_f)], 100)
  expect_equal(df2$percent_A[nrow(df2)], 100)
  expect_equal(df2_l$percent_1[nrow(df2_l)], 100)
  expect_equal(df2_f$percent_A[nrow(df2_f)], 100)

  expect_equal(as.character(df$category[nrow(df)]), "Total")
  expect_equal(as.character(df_l$category[nrow(df_l)]), "0")
  expect_equal(as.character(df_f$category[nrow(df_f)]), "Total")
  expect_equal(as.character(df2$category[nrow(df2)]), "Total")
  expect_equal(as.character(df2_l$category[nrow(df2_l)]), "0")
  expect_equal(as.character(df2_f$category[nrow(df2_f)]), "Total")

  expect_equal(df$frequency_total[nrow(df)], 9)
  expect_equal(df$frequency_total[nrow(df_l)], 9)
  expect_equal(df$frequency_total[nrow(df_f)], 9)

  expect_equal(df$frequency_total[nrow(df)], 9)
  expect_equal(df$frequency_total[nrow(df_l)], 9)
  expect_equal(df$frequency_total[nrow(df_f)], 9)

})


test_that("generate_crosstab returns frequency values only if specified", {

  df_0 <- mock_data_labelled |>
    generate_crosstab(type, sex, add_percent = FALSE)

  df_1 <- mock_data_labelled |>
    dplyr::group_by(type) |>
    generate_crosstab(age_group, sex, add_percent = FALSE)

  df_2 <- mock_data_labelled |>
    dplyr::group_by(type) |>
    generate_crosstab(age_group, sex, add_percent = FALSE, group_as_list = TRUE)

  df_3 <- mock_data_labelled |>
    dplyr::group_by(type) |>
    generate_crosstab(age_group, sex, add_percent = FALSE, percent_by_column = TRUE)

  df_4 <- mock_data_labelled |>
    dplyr::group_by(type) |>
    generate_crosstab(age_group, sex, add_percent = FALSE, calculate_per_group = TRUE)

  expect_equal(df_0[['1']][1] + df_0[['2']][1], 7)
  expect_equal(df_0[['1']][nrow(df_0)] + df_0[['2']][nrow(df_0)], 16)

  expect_identical(
    names(df_0),
    c("category", "1", "2", "total")
  )

  expect_equal(dim(df_0), c(4, 4))
  expect_equal(dim(df_1), c(9, 5))
  expect_equal(dim(df_2$A), c(3, 5))
  expect_equal(dim(df_2$B), c(3, 5))
  expect_equal(dim(df_2$C), c(3, 5))
  expect_equal(dim(df_3), c(9, 5))
  expect_equal(dim(df_4), c(9, 5))

})


test_that("generate_crosstab handles missing values correctly", {

  mock_data_with_na <- mock_data
  mock_data_with_na$type[1] <- NA

  result_1 <- generate_crosstab(mock_data_with_na, type, sex, label_na = "Missing")

  result_2 <- mock_data_with_na |>
    dplyr::group_by(age_group) |>
    generate_crosstab(type, sex, include_na = FALSE)

  result_3 <- mock_data_with_na |>
    dplyr::group_by(age_group) |>
    generate_crosstab(type, sex, label_na = "Missing")

  expect_true(any(result_1$category == "Missing"))
  expect_true(all(!is.na(result_2$category)))

})



test_that("generate_crosstab retains label when grouping is applied", {

  df_grouped <- mock_data_labelled |>
    dplyr::group_by(age_group) |>
    generate_crosstab(type, sex, group_as_list = TRUE)

  expect_equal(attributes(df_grouped[[1]]$age_group)$label, "Age group")

})

Try the tsg package in your browser

Any scripts or data that you put into this service are public.

tsg documentation built on Feb. 22, 2026, 5:08 p.m.