# test data ---------------------------------------------------------------
set.seed(100)
responses <- {
data.frame(
# continuous numeric, no variable label, no NA
q0 = sample(
x = datasets::swiss$Agriculture,
size = 25,
replace = TRUE),
# continuous numeric, variable label, incl. NA
q1 = sample(
x = c(datasets::swiss$Agriculture, NA),
size = 25,
prob = c(rep(.8/47,47), 0.2),
replace = TRUE),
# factor (numbers), no value labels
q2 = sample(
x = datasets::Orange$Tree,
size = 25,
replace = TRUE),
# character, no value labels
q3 = sample(
stringr::fruit,
25,
prob = 1/(1:80 * sum(1/(1:80))),
replace = TRUE),
# numeric values, discrete numeric value labels
q4 = sample(
1:8,
25,
replace = TRUE),
# character values, discrete character value labels
q5 = sample(
letters[1:4],
25,
prob = c(0.4,0.3,0.2,0.1),
replace = TRUE),
# character, no value labels
gender_labelled = c(
rep(1, 12),
rep(2, 12),
rep(3, 0),
rep(NA_real_, 1)
),
# groups
group_var1 = sample(
c('group 1', 'group 2', NA_character_),
25,
prob = c(.8, .15, .05),
replace = TRUE
),
# numeric weights
w = rnorm(25, mean = 1, sd = 0.1)
) %>%
labelled::set_value_labels(
q4 = c(`Less than a year` = 1,
`1-2 years` = 2,
`3-4 years` = 3,
`5-10 years` = 4,
`10-20 years` = 5,
`20-50 years` = 6,
`50-100 years` = 7,
`More than 100 years` = 8),
q5 = c(
`Very happy` = "a",
`Somewhat happy` = "b",
`Somewhat unhappy` = "c",
`Very unhappy` = "d"
),
gender_labelled = c(
'male' = 1,
'female' = 2,
'other' = 3
)
) %>%
labelled::set_variable_labels(
q1 = "% of males involved in agriculture",
q2 = "Orange tree ID",
q3 = "Preferred fruit",
q4 = "Duration",
q5 = "Satisfaction",
w = "Weights",
gender_labelled = 'gender'
) %>%
dplyr::as_tibble()
}
# Basic tests -------------------------------------------------------------
### Incorrect parameter testing
#dataframes
test_that("Not a dataframe error - vectors", {
df <- c('This', 'is', 'not', 'a', 'dataframe')
a = c(1, 1, 2, 3, 1)
expect_error(freqs(df, a))
})
test_that("Not a dataframe error - matrix", {
column_a <- c(1,1,1,1,2,2,3)
column_b <- c(0.5, 1.2, 0.8, 0.5, 0.2, 0.1, 1)
table <- rbind(column_a, column_b)
expect_error(freqs(table, column_a))
})
#variables
test_that("Runs on variables, not integers", {
expect_error(freqs(mtcars, 10))
})
#nas
test_that("Incorrect nas argument", {
expect_error(freqs(mtcars, cyl, nas = 'True'))
})
#weights
test_that("Incorrect wt argument", {
expect_error(freqs(mtcars, cyl, wt = 'True'))
})
### weights
test_that("Weights", {
df <- data.frame(
a = c(1, 2, 2, 3, 4, 2, NA),
weights = c(0.9, 0.9, 1.1, 1.1, 1, 1, 1)
)
freqs_weighted <- freqs(df, a, wt = weights)
expect_equal(freqs_weighted$n[1], .9)
})
### nas
#label
test_that("nas - label", {
df <- data.frame(
a = c(1, 2, 2, 3, 4, 2, NA),
weights = c(0.9, 0.9, 1.1, 1.1, 1, 1, 1)
)
yes_nas <- freqs(df, a)
no_nas <- freqs(df, a, nas = FALSE)
expect_equal(nrow(yes_nas), 5)
expect_equal(nrow(no_nas), 4)
})
#group
test_that("nas - group", {
df <- data.frame(
a = c(1, 2, 2, 3, 4, 2, NA),
a2 = c(1, 2, 2, 3, 4, 2, 5),
g = c(1, 1, 2, 2, 3, NA, 2),
g2 = c(1, 1, 2, 2, 3, 3, NA)
) %>% dplyr::group_by(g)
yes_nas <- df %>%
dplyr::group_by(g) %>%
freqs(a)
no_nas <- df %>%
dplyr::group_by(g) %>%
freqs(a, nas_group = FALSE)
no_nas2 <- df %>%
dplyr::group_by(g2) %>%
freqs(a2, nas_group = FALSE)
group_factors <- df %>%
dplyr::group_by(g) %>%
freqs(a, factor_group = TRUE)
expect_equal(nrow(yes_nas), 7)
expect_equal(nrow(no_nas), 6)
expect_equal(is.factor(group_factors$group_var), TRUE)
expect_equal(is.factor(no_nas$group_var), FALSE)
expect_equal(names(yes_nas)[1], 'group_var')
})
###Digits
test_that("Digits", {
df <- data.frame(
a = c(.1, .2, .3)
)
dig1 <- freqs(df, a, digits = 1)
dig2 <- freqs(df, a)
dig3 <- freqs(df, a, digits = 3)
expect_equal(dig1$result[1], .3)
expect_equal(dig2$result[1], .33)
expect_equal(dig3$result[1], .333)
})
###Differing classes of variables
#character column freq
test_that("character vars", {
df <- data.frame(
a = c('Character', '1', 'test')
)
frequencies <- freqs(df, a)
expect_equal(is.data.frame(frequencies), TRUE)
})
#numeric column freq
test_that("numeric vars", {
df <- data.frame(
a = c(1, 2, 3)
)
frequencies <- freqs(df, a)
expect_equal(is.data.frame(frequencies), TRUE)
})
#factored/labelled column freq
test_that("factor vars with missing values", {
df <- data.frame(
a = c(1, 2, 3)
)
labelled::val_label(df$a, 1) <- 'Yes'
labelled::val_label(df$a, 2) <- 'No'
labelled::val_label(df$a, 3) <- 'Idk'
labelled::val_label(df$a, 4) <- 'Do I show up?'
df$a <- forcats::as_factor(df$a)
frequencies <- freqs(df, a)
expect_equal(nrow(frequencies), 4)
})
###Select function
#filter groups
test_that("filter out groups from vars", {
df <- data.frame(
a = c(1, 1, 3, 4, 5),
b = c(1, 1, 1, 2, 2),
c = c(2, 3, 4, 5, 6)
)
frequencies <- df %>%
dplyr::select(a, b) %>%
dplyr::group_by(b) %>%
freqs()
expect_equal(nrow(frequencies), 4)
})
#filter weights
test_that("filter out weights from vars", {
df <- data.frame(
a = c(1, 1, 3, 4, 5),
b = c(1, 1, 1, 2, 2),
c = c(2, 3, 4, 5, 6)
)
frequencies <- df %>%
dplyr::select(a, b) %>%
freqs(wt = b)
expect_equal(nrow(frequencies), 4)
})
# More advanced tests -------------------------------------------------------------------
test_that("test data is correct", {
expect_type(responses, "list")
expect_equal(ncol(responses), 9)
expect_equal(nrow(responses), 25)
})
test_that("NAs not present, nas = T: n & result are correct", {
expect_equivalent(responses %>%
dplyr::select(q0) %>%
freqs(stat = "mean") %>%
dplyr::select(result) %>%
dplyr::pull(),
round(mean(responses$q0),2)
)
expect_equivalent(responses %>%
dplyr::select(q0) %>%
freqs(stat = "mean", nas = TRUE) %>%
dplyr::select(n) %>%
dplyr::pull(),
nrow(responses[!is.na(responses$q0),])
)
})
test_that("NAs not present, nas = F: n & result are correct", {
expect_equivalent(responses %>%
dplyr::select(q0) %>%
freqs(stat = "mean", nas = FALSE) %>%
dplyr::select(result) %>%
dplyr::pull(),
round(mean(responses$q0),2)
)
expect_equivalent(responses %>%
dplyr::select(q0) %>%
freqs(stat = "mean", nas = FALSE) %>%
dplyr::select(n) %>%
dplyr::pull(),
nrow(responses[!is.na(responses$q0),])
)
})
test_that("NAs present, nas = T: throws error", {
expect_error(responses %>%
dplyr::select(q1) %>%
freqs(stat = "mean")
)
})
test_that("NAs present, nas = F: n & result are correct", {
expect_equal(responses %>%
dplyr::select(q1) %>%
freqs(stat = "mean", nas = FALSE) %>%
dplyr::select(result) %>%
dplyr::pull(),
round(mean(responses$q1, na.rm = TRUE), 2)
)
expect_equivalent(responses %>%
dplyr::select(q1) %>%
freqs(stat = "mean", nas = FALSE) %>%
dplyr::select(n) %>%
dplyr::pull(),
nrow(responses[!is.na(responses$q1),])
)
})
test_that("factor variable input: throws error", {
expect_error(
responses %>%
select(q2) %>%
freqs(stat = 'mean')
)
})
test_that("character variable input: throws error", {
expect_error(
responses %>%
dplyr::select(q3) %>%
freqs(stat = 'mean')
)
})
test_that("column with value labels input: throws error", {
expect_error(
responses %>%
dplyr::select(q4) %>%
freqs(stat = 'mean')
)
})
test_that("column with value labels input: (potentially misleading) result is correct
after labels are removed", {
expect_equivalent(
responses %>%
dplyr::mutate(q4 = as.numeric(q4)) %>%
dplyr::select(q4) %>%
freqs(stat = 'mean') %>%
dplyr::select(result) %>%
dplyr::pull(),
mean(responses$q4)
)
})
test_that("column with value labels input: answer is correct after labels removed (even if potentially misleading)", {
expect_equivalent(
responses %>%
dplyr::select(q4) %>%
labelled::remove_labels() %>%
freqs(stat = 'mean') %>%
dplyr::select(result) %>%
dplyr::pull(),
mean(responses$q4)
)
})
test_that("using weights: equivalent to weighted.mean() output", {
expect_equal(
responses %>%
dplyr::select(q1, w) %>%
freqs(stat = 'mean', nas = FALSE, wt = w) %>%
dplyr::select(result) %>%
dplyr::pull(),
stats::weighted.mean(x = responses$q1,
w = responses$w,
na.rm = TRUE) %>%
round(2)
)
})
test_that("using prompt: variable label is correctly output", {
expect_equal(
responses %>%
dplyr::select(q1) %>%
freqs(stat = 'mean', nas = FALSE, prompt = TRUE) %>%
dplyr::select(prompt) %>%
dplyr::pull(),
responses %>%
dplyr::select(q1) %>%
labelled::var_label() %>%
tibble::deframe()
)
})
test_that("using digits: output is precise to multiple decimal places", {
expect_equal(
responses %>%
dplyr::select(w) %>%
freqs(stat = 'mean', digits = 6, nas = FALSE) %>%
dplyr::select(result) %>%
dplyr::pull(),
responses %>%
dplyr::select(w) %>%
dplyr::pull() %>%
mean(na.rm = TRUE) %>%
round(digits = 6)
)
})
test_that("stat other than 'quantile' gives message when percentile value is provided", {
expect_message(
responses %>%
freqs(q1, percentile = 75, stat = 'mean', nas = FALSE)
)
})
test_that("stat argument only accepts percent, mean, quantile, or summary", {
expect_error(
responses %>%
freqs(q1, stat = 'means', percentile = 75, nas = FALSE)
)
})
test_that("function stops when value labels exist", {
expect_error(
responses %>%
freqs(q4, stat = 'mean', nas = FALSE)
)
})
test_that("unweighted_ns = TRUE, but no wt variable", {
expect_error(
responses %>%
freqs(
q4,
unweighted_ns = TRUE
),
"If you use unweighted_ns = TRUE, you must specify a wt variable"
)
})
test_that("freqs_wuw, ns and results are equal", {
freqs_normal <- mtcars %>% freqs(gear)
freqs_normal_weighted <- mtcars %>% freqs(gear, wt = carb)
freqs_wuw_table <- mtcars %>% y2clerk:::freqs_wuw(
gear,
wt = carb,
# Defaults auto input by function
stat = 'percent',
percentile = NULL,
nas = TRUE,
prompt = FALSE,
digits = 2,
nas_group = TRUE,
factor_group = FALSE,
show_missing_levels = FALSE
)
freqs_wuw_infreqs <- mtcars %>% freqs(
gear,
wt = carb,
unweighted_ns = TRUE,
show_missing_levels = FALSE
)
expect_equal(freqs_normal_weighted$result, freqs_wuw_infreqs$result)
expect_equal(freqs_normal$n, freqs_wuw_infreqs$n)
})
test_that("freqs_wuw, test on responses", {
freqs_normal <- responses %>% freqs(q4)
freqs_normal_weighted <- responses %>% freqs(q4, wt = w)
freqs_wuw_table <- responses %>% y2clerk:::freqs_wuw(
q4,
wt = w,
# Defaults auto input by function
stat = 'percent',
percentile = NULL,
nas = TRUE,
prompt = FALSE,
digits = 2,
nas_group = TRUE,
factor_group = FALSE,
show_missing_levels = FALSE
)
freqs_wuw_infreqs <- responses %>% freqs(
q4,
wt = w,
unweighted_ns = TRUE,
show_missing_levels = FALSE
)
expect_equal(freqs_normal_weighted$result, freqs_wuw_infreqs$result)
expect_equal(freqs_normal$n, freqs_wuw_infreqs$n)
})
# Test on show missing levels ---------------------------------------------
test_that("multi_freqs - show_missing_levels argument", {
test_no_missing_levels <- responses %>%
freqs(
gender_labelled,
show_missing_levels = FALSE
)
test_yes_missing_levels <- responses %>%
freqs(
gender_labelled,
show_missing_levels = TRUE
)
test_yes_missing_levels_no_nas <- responses %>%
freqs(
gender_labelled,
nas = FALSE,
show_missing_levels = TRUE
)
sum_no_missing <-
stringr::str_detect(test_no_missing_levels$label, 'other') %>%
sum(na.rm = TRUE)
sum_yes_missing <-
stringr::str_detect(test_yes_missing_levels$label, 'other') %>%
sum(na.rm = TRUE)
sum_yes_missing_nas <-
stringr::str_detect(test_yes_missing_levels_no_nas$label, 'other') %>%
sum()
expect_equal(sum_no_missing, 0)
expect_equal(sum_yes_missing, 1)
expect_equal(sum_yes_missing_nas, 1)
})
test_that("freqs - show_missing_levels argument", {
# Missing level shows up in NA group, but not other groups
no_missing <- responses %>%
dplyr::group_by(group_var1) %>%
freqs(
gender_labelled,
nas = FALSE,
show_missing_levels = FALSE
)
yes_missing <- responses %>%
dplyr::group_by(group_var1) %>%
freqs(
gender_labelled,
nas = FALSE,
show_missing_levels = TRUE
)
yes_missing_no_nas_group <- responses %>%
dplyr::group_by(group_var1) %>%
freqs(
gender_labelled,
nas = FALSE,
show_missing_levels = TRUE,
nas_group = FALSE
)
sum_no_missing <-
stringr::str_detect(no_missing$label, 'other') %>%
sum()
sum_yes_missing <-
stringr::str_detect(yes_missing$label, 'other') %>%
sum()
sum_yes_missing_no_nas_group <-
stringr::str_detect(yes_missing_no_nas_group$label, 'other') %>%
sum()
expect_equal(sum_no_missing, 0)
expect_equal(sum_yes_missing, 3)
expect_equal(sum_yes_missing_no_nas_group, 2)
})
test_that("freqs - show_missing_levels ordered", {
missing_tibble <- tibble::tibble(
weekdays = c(
rep(1, 10),
rep(2, 0),
rep(3, 10),
rep(4, 0),
rep(5, 10)
),
pokemon = c(
rep(1, 12),
rep(2, 5),
rep(3, 13)
)
) %>%
labelled::set_value_labels(
weekdays = c(
'Monday' = 1,
'Tuesday' = 2,
'Wednesday' = 3,
'Thursday' = 4,
'Friday' = 5
),
pokemon = c(
'Bulbasaur' = 1,
'Charmander' = 2,
'Squirtle' = 3
)
)
missing_freqs <- missing_tibble %>% freqs(weekdays)
missing_freqs_grouped <- missing_tibble %>%
dplyr::group_by(pokemon) %>%
freqs(weekdays, factor_group = TRUE)
expect_equal(missing_freqs %>% dplyr::pull(n), c(10, 0, 10, 0, 10))
expect_equal(
missing_freqs %>% dplyr::pull(label),
c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday')
)
expect_equal(
missing_freqs_grouped %>% dplyr::pull(label) %>% unique(),
c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday')
)
})
# Percentile tests --------------------------------------------------------
test_that("bad input throws error", {
expect_error(
responses %>%
dplyr::select(q0) %>%
freqs(stat = "perc")
)
})
test_that("NAs not present, nas = T: n & result are correct", {
expect_equivalent(responses %>%
dplyr::select(q0) %>%
freqs(stat = "quantile", percentile = 95) %>%
dplyr::select(result) %>%
dplyr::pull(),
round(quantile(responses$q0, 0.95),2)
)
expect_equivalent(responses %>%
dplyr::select(q0) %>%
freqs(stat = "mean", nas = TRUE) %>%
dplyr::select(n) %>%
dplyr::pull(),
nrow(responses[!is.na(responses$q0),])
)
})
test_that("NAs not present, nas = F: n & result are correct", {
expect_equivalent(responses %>%
dplyr::select(q0) %>%
freqs(stat = "quantile", nas = FALSE, percentile = 50) %>%
dplyr::select(result) %>%
dplyr::pull(),
round(median(responses$q0),2)
)
expect_equivalent(responses %>%
dplyr::select(q0) %>%
freqs(stat = "quantile", nas = FALSE, percentile = 50) %>%
dplyr::select(n) %>%
dplyr::pull(),
nrow(responses[!is.na(responses$q0),])
)
})
test_that("NAs present, nas = T: throws error", {
expect_error(
responses %>%
dplyr::select(q1) %>%
freqs(stat = "quantile", percentile = 95)
)
})
test_that("NAs present, nas = F: n & result are correct", {
expect_equal(responses %>%
dplyr::select(q1) %>%
freqs(stat = "quantile", nas = FALSE, percentile = 95) %>%
dplyr::select(result) %>%
dplyr::pull(),
round(quantile(responses$q1, 0.95, na.rm = TRUE), 2)
)
expect_equivalent(responses %>%
dplyr::select(q1) %>%
freqs(stat = "quantile", nas = FALSE, percentile = 95) %>%
dplyr::select(n) %>%
dplyr::pull(),
nrow(responses[!is.na(responses$q1),])
)
})
test_that("factor variable input: throws error", {
expect_error(
responses %>%
dplyr::select(q2) %>%
freqs(stat = 'quantile')
)
})
test_that("character variable input: throws error", {
expect_error(
responses %>%
dplyr::select(q3) %>%
freqs(stat = 'quantile')
)
})
test_that("column with value labels input: throws error", {
expect_error(
responses %>%
dplyr::select(q4) %>%
freqs(stat = 'quantile')
)
})
test_that("column with value labels input: (potentially misleading) result is correct
after labels are removed", {
expect_equivalent(
responses %>%
dplyr::mutate(q4 = as.numeric(q4)) %>%
dplyr::select(q4) %>%
freqs(stat = 'quantile', percentile = 95) %>%
dplyr::select(result) %>%
dplyr::pull(),
quantile(as.numeric(responses$q4), 0.95)
)
})
test_that("column with value labels input: answer is correct after labels removed (even if potentially misleading)", {
expect_equivalent(
responses %>%
dplyr::select(q4) %>%
labelled::remove_labels() %>%
freqs(stat = 'quantile', percentile = 95) %>%
dplyr::select(result) %>%
dplyr::pull(),
quantile(as.numeric(responses$q4), 0.95)
)
})
test_that("using weights: equivalent to wtd.quantile() output", {
expect_equal(
responses %>%
dplyr::select(q1, w) %>%
freqs(stat = 'quantile', nas = FALSE, wt = w, percentile = 95) %>%
dplyr::select(result) %>%
dplyr::pull(),
reldist::wtd.quantile(x = responses$q1,
q = 0.95,
weight = responses$w,
na.rm = TRUE) %>%
round(2)
)
})
test_that("using prompt: variable label is correctly output", {
expect_equal(
responses %>%
dplyr::select(q1) %>%
freqs(stat = 'quantile', nas = FALSE, prompt = TRUE, percentile = 0.95) %>%
dplyr::select(prompt) %>%
dplyr::pull(),
responses %>%
dplyr::select(q1) %>%
labelled::var_label() %>%
tibble::deframe()
)
})
test_that("using digits: output is precise to multiple decimal places", {
expect_equal(
responses %>%
dplyr::select(w) %>%
freqs(stat = 'quantile', percentile = 95, digits = 6, nas = FALSE) %>%
dplyr::select(result) %>%
dplyr::pull(),
responses %>%
dplyr::select(w) %>%
dplyr::pull() %>%
quantile(0.95, na.rm = TRUE) %>%
round(digits = 6)
)
})
test_that("output from 'percentile = 0' is equivalent to base::min()", {
expect_equal(
responses %>%
freqs(q0, stat = 'quantile', percentile = 0) %>%
dplyr::select(result) %>%
dplyr::pull() %>% as.numeric(),
min(responses$q0)
)
})
test_that("output from 'percentile = 100' is equivalent to base::max()", {
expect_equal(
responses %>%
freqs(q0, stat = 'quantile', percentile = 100) %>%
dplyr::select(result) %>%
dplyr::pull() %>% as.numeric(),
max(responses$q0)
)
})
test_that("there are 6 lines of output (min, 1st quartile, median, mean, 3rd quartile, max)", {
expect_equal(
responses %>%
freqs(q0, stat = 'summary') %>%
nrow(),
6
)
expect_equal(
responses %>%
freqs(q1, stat = 'summary', nas = FALSE) %>%
nrow(),
6
)
expect_equal(
responses %>%
freqs( q1, stat = 'summary', nas = FALSE, wt = w) %>%
nrow(),
6
)
})
test_that("setting a percentile value when stat = 'summary' does not affect output", {
expect_equal(
responses %>%
freqs(q0, stat = 'summary', percentile = 0),
responses %>%
freqs(q0, stat = 'summary')
)
})
test_that("stat = 'summary' gives message when percentile value is provided", {
expect_message(
responses %>%
dplyr::select(q0,q1,w) %>%
freqs(percentile = 75, stat = 'summary', wt = w, nas = FALSE)
)
})
test_that("stat = 'mean' works when GROUPED", {
test <- responses %>%
dplyr::group_by(q2) %>%
freqs(q1, stat = "mean", nas = FALSE, wt = q0)
expect_equal(length(test$group_var), 4)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.