Nothing
# Example data frame for testing
df <- dplyr::tibble(
category = c("C", "C", "A", "C", "A", "C", "A", "B"),
type = c("X", "Y", "X", "X", "Y", "Y", "X", "X"),
value = c(1, 2, 2, 2, 3, 3, 2, 2)
)
df_na <- dplyr::tibble(
category = c("A", "B", "A", NA, "C", "C", "A", NA),
type = c("X", "Y", "X", "X", "Y", "Y", "X", "X"),
value = c(1, 2, 1, 2, 3, 3, 1, 2)
)
df_empty <- dplyr::tibble(
category = character(0),
value = numeric(0)
)
df_labelled <- dplyr::tibble(
category = haven::labelled(
c(1, 2, 1, 3, 2, 1, 3, 2),
label = "Category haven",
labels = c(A = 1, B = 2, C = 3)
),
value = c(10, 20, 10, 30, 20, 10, 30, 20)
)
df_factored <- dplyr::tibble(
category = factor(c(1, 2, 1, 3, 2, 1, 3, 2), labels = c("A", "B", "C")),
value = c(5, 10, 5, 15, 10, 5, 15, 10)
)
mock_data_labelled <- dplyr::tibble(
type = c(1, 2, 1, 2, 3, 1, 3, 2, 1, 1, 3, 1, 1, 2, 2, 3),
sex = c(1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1),
status = c("Healthy", "Sick", "Sick", "Healthy", "Healthy", "Sick", "Sick", "Healthy", "Sick", "Healthy", "Healthy", "Sick", "Sick", "Sick", "Healthy", "Healthy"),
age_group = c("Old", "Young", "Young", "Young", "Old", "Old", "Young", "Young", "Young", "Young", "Old", "Young", "Young", "Young", "Young", "Old")
)
mock_data_labelled <- mock_data_labelled |>
dplyr::mutate(
type = haven::labelled(
type,
label = "Type",
labels = c(A = 1, B = 2, C = 3)
),
sex = haven::labelled(
sex,
label = "Sex",
labels = c(Male = 1, Female = 2)
)
)
attr(mock_data_labelled$age_group, "label") <- "Age group"
attr(df_factored$category, "label") <- "Category factor"
test_that("generate_frequency returns correct frequency table", {
result <- generate_frequency(df, category)
expect_s3_class(result, "tsg")
expect_true("category" %in% colnames(result))
expect_true("frequency" %in% colnames(result))
expect_true("percent" %in% colnames(result))
expect_true(is.integer(result$frequency))
expect_true(is.numeric(result$percent) & !is.integer(result$percent))
expect_equal(ncol(result), 3)
expect_equal(nrow(result), 4)
expect_equal(as.vector(result$category), c("C", "A", "B", "Total"))
expect_equal(as.vector(result$frequency), c(4, 3, 1, 8))
expect_equal(attributes(result$frequency)$label, "Frequency")
expect_equal(attributes(result$percent)$label, "Percent")
expect_equal(attributes(result$category)$label, "category")
})
test_that("generate_frequency returns correct frequency table for multiple variables", {
result <- generate_frequency(df)
expect_s3_class(result, "tsg")
expect_s3_class(result, "tsgf")
expect_true(inherits(result, 'list'))
expect_equal(length(result), 3)
expect_equal(names(result), c("category", "type", "value"))
})
test_that("generate_frequency handles factors and labelled variables correctly", {
result_labelled <- generate_frequency(df_labelled, category)
expect_equal(as.vector(result_labelled$category), c(1, 2, 3, 0))
expect_equal(attributes(result_labelled$category)$label, "Category haven")
result_factored <- generate_frequency(df_factored, category)
expect_equal(as.vector(result_factored$category), c("A", "B", "C", "Total"))
expect_equal(attributes(result_factored$category)$label, "Category factor")
})
# Sorting by frequency in descending order
test_that("generate_frequency sorts correctly by frequency", {
result <- generate_frequency(df, category, sort_value = FALSE)
expect_equal(as.vector(result$category), c("A", "B", "C", "Total"))
expect_equal(as.vector(result$frequency), c(3, 1, 4, 8))
})
# Excluding variable from sorting (sort_except)
test_that("generate_frequency respects sort_except argument", {
result <- generate_frequency(df, category, value, sort_value = TRUE, sort_except = "value")
expect_equal(as.vector(result$category$category), c("C", "A", "B", "Total"))
expect_equal(as.vector(result$category$frequency), c(4, 3, 1, 8))
expect_equal(as.vector(result$value$frequency), c(1, 5, 2, 8))
})
# Adding totals and percentages
test_that("generate_frequency adds total and percentages", {
result <- generate_frequency(df, category, add_total = TRUE, add_percent = TRUE)
expect_true("percent" %in% colnames(result))
expect_equal(result$category[nrow(result)], "Total")
expect_equal(result$percent[nrow(result)], 100)
})
# Excluding total and percentage
test_that("generate_frequency can exclude total and percentage", {
result <- generate_frequency(df, category, add_total = FALSE, add_percent = FALSE)
expect_true(!("percent" %in% colnames(result)))
expect_equal(ncol(result), 2)
expect_equal(nrow(result), 3)
})
# Adding cumulative frequencies and percentages
test_that("generate_frequency adds cumulative frequencies and percentages", {
result <- generate_frequency(df, category, add_cumulative = TRUE, add_cumulative_percent = TRUE)
expect_true("cumulative" %in% colnames(result))
expect_true("cumulative_percent" %in% colnames(result))
expect_equal(ncol(result), 5)
expect_equal(result$cumulative_percent[nrow(result) - 1], 100)
expect_equal(result$cumulative[nrow(result) - 1], result$frequency[nrow(result)])
expect_true(is.na(result$cumulative_percent[nrow(result)]))
expect_true(is.na(result$cumulative[nrow(result)]))
})
# Metadata handling (title, subtitle)
test_that("generate_frequency attaches metadata correctly", {
result <- generate_frequency(
df,
category,
metadata = list(
title = "Frequency Table",
subtitle = "Category Counts"
)
)
# Check if metadata attributes are added correctly
expect_equal(attr(result, "title"), "Frequency Table")
expect_equal(attr(result, "subtitle"), "Category Counts")
})
# Custom labels for categories
test_that("generate_frequency uses custom labels", {
result <- generate_frequency(df, category, label_stub = "Custom Label")
# Check if the custom label is used for the output
expect_equal(attr(result, "label_xlsx"), "Custom Label")
})
# Test position of total row (top vs. bottom)
test_that("generate_frequency places the total row in the correct position", {
result_top <- generate_frequency(df, category, add_total = TRUE, position_total = "top")
result_bottom <- generate_frequency(df, category, add_total = TRUE, position_total = "bottom")
# Ensure total row is in the correct position
expect_equal(result_top$category[1], "Total")
expect_equal(result_bottom$category[nrow(result_bottom)], "Total")
})
test_that("generate_frequency returns as proportion instead of percent", {
result <- generate_frequency(df, category, as_proportion = TRUE)
expect_true("proportion" %in% colnames(result))
expect_equal(result$proportion[nrow(result)], 1)
expect_equal(ncol(result), 3)
expect_equal(nrow(result), 4)
expect_equal(attributes(result$proportion)$label, "Proportion")
})
# Including and excluding NA values
test_that("generate_frequency handles NA values correctly", {
result_without_na <- generate_frequency(df_na, category, include_na = FALSE, add_total = FALSE)
result_with_na <- generate_frequency(df_na, category, include_na = TRUE, add_total = FALSE)
result_with_na_labelled <- generate_frequency(df_na, category, include_na = TRUE, label_na = "Missing", add_total = FALSE)
result_with_na_group <- df_na |>
dplyr::group_by(type) |>
generate_frequency(category, include_na = TRUE, add_total = FALSE, group_as_list = TRUE, label_na = "Missing")
expect_equal(sum(result_without_na$frequency, na.rm = TRUE), 6)
expect_equal(nrow(result_without_na), 3) # Only A, B, C
expect_equal(nrow(result_with_na), 4) # A, B, C, NA
expect_equal(nrow(result_with_na_labelled), 4) # A, B, C, "Missing"
expect_true("Missing" %in% result_with_na_labelled$category)
expect_true("Not reported" %in% result_with_na$category)
expect_true("Missing" %in% result_with_na_group[[1]]$category)
})
# Group as list
test_that("generate_frequency calculates per group and returns a list", {
result <- df |>
dplyr::group_by(value) |>
generate_frequency(category, group_as_list = TRUE)
expect_true(inherits(result, 'list'))
expect_equal(length(result), 3) # Three unique values in 'value' column
expect_true(all(sapply(result, function(x) inherits(x, 'data.frame'))))
result_warn <- df |>
dplyr::group_by(category) |>
generate_frequency(value, group_as_list = TRUE)
expect_equal(names(result_warn), c("A", "B", "C"))
expect_warning(result_warn, regexp = NA)
})
test_that("generate_frequency calculates per group and returns a data frame", {
result <- df |>
dplyr::group_by(value) |>
generate_frequency(category, group_as_list = FALSE)
result_g <- df |>
dplyr::group_by(value) |>
generate_frequency(category, group_as_list = FALSE, group_grand_total = TRUE)
expect_true(inherits(result, 'data.frame'))
expect_equal(nrow(result), 12)
expect_true(inherits(result_g, 'data.frame'))
expect_equal(nrow(result_g), 16)
})
# Group as list with grand total
test_that("generate_frequency calculates per group with grand total and returns a list", {
result <- df |>
dplyr::group_by(value) |>
generate_frequency(category, group_as_list = TRUE, group_grand_total = TRUE)
expect_true(inherits(result, 'list'))
expect_equal(length(result), 4) # Three unique values in 'value' column
expect_true(all(sapply(result, function(x) inherits(x, 'data.frame'))))
expect_true(identical(names(result), c("All", "1", "2", "3")))
result_warn <- df |>
dplyr::group_by(category) |>
generate_frequency(value, group_as_list = TRUE)
expect_equal(names(result_warn), c("A", "B", "C"))
expect_warning(result_warn, regexp = NA)
})
# With multiple grouping variables
test_that("generate_frequency works with multiple grouping variables", {
result_1 <- df |>
dplyr::group_by(type, category) |>
generate_frequency(value, group_as_list = FALSE, calculate_per_group = FALSE)
expect_true(inherits(result_1, 'tsg'))
expect_equal(nrow(result_1), 8)
expect_equal(ncol(result_1), 5)
result_2 <- df |>
dplyr::group_by(type, category) |>
generate_frequency(value, group_as_list = TRUE, group_separator = "|", expand_categories = FALSE) |>
suppressMessages()
expect_true(inherits(result_2, 'list'))
expect_equal(length(result_2), 5)
expect_equal(names(result_2), c("X|A", "X|B", "X|C", "Y|A", "Y|C"))
expect_true(all(sapply(result_2, function(x) inherits(x, 'data.frame'))))
expect_equal(nrow(result_2[["X|A"]]), 2)
expect_equal(nrow(result_2[["Y|C"]]), 3)
expect_equal(ncol(result_2[["Y|C"]]), 5)
expect_equal(result_2[["Y|C"]]$frequency[nrow(result_2[["Y|C"]])], 2)
expect_equal(result_2[["Y|C"]]$percent[nrow(result_2[["Y|C"]])], 100)
expect_contains(attributes(result_2)$groups, c("type", "category"))
result_3 <- df |>
dplyr::group_by(type, category) |>
generate_frequency(value, group_as_list = FALSE, calculate_per_group = TRUE) |>
suppressMessages()
expect_true(inherits(result_3, 'tsg'))
expect_equal(nrow(result_3), 20)
expect_equal(ncol(result_3), 5)
expect_equal(result_3$frequency[nrow(result_3)], 2)
expect_equal(result_3$percent[nrow(result_3)], 100)
expect_equal(result_3$category[nrow(result_3)], "Total")
expect_equal(result_3$type[nrow(result_3)], "Y")
expect_length(result_3$category[result_3$category == "Total"], 5)
expect_equal(attributes(result_3)$label_total, "Total")
expect_contains(attributes(result_3)$groups, c("type", "category"))
})
# Edge case with an empty dataset
test_that("generate_frequency handles empty data frame correctly", {
result <- generate_frequency(df_empty, category)
expect_equal(ncol(result), 3)
expect_equal(nrow(result), 1)
expect_equal(result$category[1], "Total")
expect_equal(result$frequency[1], 0)
expect_equal(result$percent[1], 0)
})
# Edge case where all data is NA
test_that("generate_frequency handles dataset with only NA values", {
df_na_only <- dplyr::tibble(category = c(NA, NA, NA), value = c(NA, NA, NA))
result <- generate_frequency(df_na_only, category, include_na = TRUE)
expect_true("Not reported" %in% result$category)
expect_equal(result$frequency[1], 3) # All NA values should be counted as one category
expect_equal(nrow(result), 2) # "Missing" and "Total"
expect_equal(result$percent[1], 100)
expect_equal(result$category[2], "Total")
})
# Top n categories
test_that("generate_frequency handles top_n parameter correctly", {
df_large <- dplyr::tibble(category = sample(LETTERS[1:10], 140, replace = TRUE))
result_top_3 <- generate_frequency(df_large, category, top_n = 3, add_total = FALSE)
df_top_n <- generate_frequency(person_record, marital_status, top_n = 3)
df_top_n_only <- generate_frequency(person_record, marital_status, top_n = 3, top_n_only = TRUE)
expect_equal(nrow(df_top_n), 5)
expect_equal(nrow(df_top_n_only), 4)
expect_equal(nrow(result_top_3), 4)
expect_true("Others" %in% result_top_3$category)
})
test_that("generate_frequency handles top_n with NA values correctly", {
df_large_na <- dplyr::tibble(category = sample(c(LETTERS[1:10], NA), 100, replace = TRUE))
result_top_3_na <- generate_frequency(df_large_na, category, top_n = 3, include_na = TRUE, add_total = FALSE)
result_top_3 <- generate_frequency(df_large_na, category, top_n = 3, top_n_only = TRUE, include_na = TRUE, add_total = FALSE)
result_top_not_sorted <- generate_frequency(df_large_na, category, top_n = 3, include_na = TRUE, add_total = FALSE, sort_value = FALSE)
expect_equal(nrow(result_top_3_na), 4)
expect_equal(nrow(result_top_not_sorted), length(unique(df_large_na$category)))
expect_equal(nrow(result_top_3), 3)
})
test_that("generate_frequency handles collapse list correctly", {
df_collapse <- dplyr::tibble(
category_1 = c("A", "B", "C", "D", "E", "F", "G", "H"),
category_2 = c("A", "C", "D", "D", "D", "F", "A", "H"),
)
df <- df_collapse |>
tsg::generate_frequency(add_percent = FALSE, collapse_list = TRUE)
df_1 <- df_collapse |>
tsg::generate_frequency(collapse_list = TRUE)
df_2 <- df_collapse |>
generate_frequency() |>
collapse_list()
expect_equal(df_1, df_2)
expect_equal(nrow(df_1), 2)
expect_equal(ncol(df_1), 19)
expect_equal(dim(df), c(2, 10))
})
test_that("generate_frequency expand categories correctly", {
df_1 <- df |>
dplyr::group_by(type) |>
generate_frequency(category, expand_categories = TRUE)
df_2 <- df |>
dplyr::group_by(type) |>
generate_frequency(category, expand_categories = FALSE)
df_3 <- df |>
dplyr::group_by(type) |>
generate_frequency(category, expand_categories = FALSE, calculate_per_group = FALSE)
expect_equal(nrow(df_1), 8)
expect_equal(nrow(df_2), 7)
expect_equal(nrow(df_3), 6)
})
test_that("generate_frequency retains label when grouping is applied", {
df_grouped_1 <- mock_data_labelled |>
dplyr::group_by(type) |>
generate_frequency(sex)
df_grouped_2 <- mock_data_labelled |>
dplyr::group_by(age_group) |>
generate_frequency(sex, group_as_list = TRUE)
expect_equal(attributes(df_grouped_1$type)$label, "Type")
expect_equal(attributes(df_grouped_2$Young$age_group)$label, "Age group")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.