Nothing
expect_equal_saved_prep <- function(
metadata,
is_aggregated = TRUE,
time_freq = NULL,
file = NULL
) {
workflow <- setup_test_workflow(
metadata = metadata,
is_aggregated = is_aggregated,
time_freq = time_freq,
link_geo = NULL,
link = FALSE
)
saved <- paste0(
"snapshots/data_processing/",
file
) %>%
testthat::test_path() %>%
read_saved_csv()
expect_equal(
workflow$preprocessed_data(),
saved,
tolerance = 0.01,
ignore_attr = TRUE
)
}
test_that("prepprocess is consistent", {
skip_on_cran()
set.seed(123)
# individual-level COVID data
expect_equal_saved_prep(
metadata = list(
is_timevar = TRUE,
special_case = "covid",
family = "binomial"
),
is_aggregated = FALSE,
time_freq = "week",
file = "covid_binomial_indiv.csv"
)
# aggregated COVID data
expect_equal_saved_prep(
metadata = list(
is_timevar = TRUE,
special_case = "covid",
family = "binomial"
),
is_aggregated = TRUE,
time_freq = NULL,
file = "covid_binomial_agg.csv"
)
# individual-level general time-varying data
# with binary outcome
expect_equal_saved_prep(
metadata = list(
is_timevar = TRUE,
special_case = NULL,
family = "binomial"
),
is_aggregated = FALSE,
time_freq = "week",
file = "timevar_binomial_indiv.csv"
)
# aggregated general time-varying data
# with binary outcome
expect_equal_saved_prep(
metadata = list(
is_timevar = TRUE,
special_case = NULL,
family = "binomial"
),
is_aggregated = TRUE,
time_freq = NULL,
file = "timevar_binomial_agg.csv"
)
# individual-level general time-varying data
# with continuous outcome
expect_equal_saved_prep(
metadata = list(
is_timevar = TRUE,
special_case = NULL,
family = "normal"
),
is_aggregated = FALSE,
time_freq = "week",
file = "timevar_normal_indiv.csv"
)
# individual-level polling data
expect_equal_saved_prep(
metadata = list(
is_timevar = FALSE,
special_case = "poll",
family = "binomial"
),
is_aggregated = FALSE,
time_freq = NULL,
file = "poll_binomial_indiv.csv"
)
# aggregated polling data
expect_equal_saved_prep(
metadata = list(
is_timevar = FALSE,
special_case = "poll",
family = "binomial"
),
is_aggregated = TRUE,
time_freq = NULL,
file = "poll_binomial_agg.csv"
)
# individual-level general cross-sectional data
# with binary outcome
expect_equal_saved_prep(
metadata = list(
is_timevar = FALSE,
special_case = NULL,
family = "binomial"
),
is_aggregated = FALSE,
time_freq = NULL,
file = "crosssec_binomial_indiv.csv"
)
# aggregated general cross-sectional data
# with binary outcome
expect_equal_saved_prep(
metadata = list(
is_timevar = FALSE,
special_case = NULL,
family = "binomial"
),
is_aggregated = TRUE,
time_freq = NULL,
file = "crosssec_binomial_agg.csv"
)
# individual-level general cross-sectional data
# with continuous outcome
expect_equal_saved_prep(
metadata = list(
is_timevar = FALSE,
special_case = NULL,
family = "normal"
),
is_aggregated = FALSE,
time_freq = NULL,
file = "crosssec_normal_indiv.csv"
)
})
test_that("link_acs works with all linking geographies", {
skip_on_cran()
# No linking geography
expect_no_error(
setup_test_workflow(
metadata = list(
is_timevar = FALSE,
special_case = NULL,
family = "binomial"
),
is_aggregated = TRUE,
time_freq = NULL,
link_geo = NULL,
link = TRUE
)
)
# Linking through zip
expect_no_error(
setup_test_workflow(
metadata = list(
is_timevar = FALSE,
special_case = NULL,
family = "binomial"
),
is_aggregated = TRUE,
time_freq = NULL,
link_geo = "zip",
link = TRUE
)
)
# Linking through county
expect_no_error(
setup_test_workflow(
metadata = list(
is_timevar = FALSE,
special_case = NULL,
family = "binomial"
),
is_aggregated = TRUE,
time_freq = NULL,
link_geo = "county",
link = TRUE
)
)
# Linking through state
expect_no_error(
setup_test_workflow(
metadata = list(
is_timevar = FALSE,
special_case = NULL,
family = "binomial"
),
is_aggregated = TRUE,
time_freq = NULL,
link_geo = "state",
link = TRUE
)
)
})
test_that("load_pstrat works", {
skip_on_cran()
pstrat_data <- example_pstrat_data()
# For general time-varying data
# with binary outcome
workflow <- setup_test_workflow(
metadata = list(
is_timevar = TRUE,
special_case = NULL,
family = "binomial"
),
is_aggregated = TRUE,
time_freq = NULL,
link = FALSE
)
capture.output({
workflow$load_pstrat(pstrat_data)
}, type = "message")
expect_no_error(workflow$demo_bars("sex"))
# For general time-varying data
# with continuous outcome
workflow <- setup_test_workflow(
metadata = list(
is_timevar = TRUE,
special_case = NULL,
family = "normal"
),
is_aggregated = FALSE,
time_freq = "week",
link = FALSE
)
capture.output({
workflow$load_pstrat(pstrat_data)
}, type = "message")
expect_no_error(workflow$demo_bars("sex"))
# For general cross-sectional data
# with binary outcome
workflow <- setup_test_workflow(
metadata = list(
is_timevar = FALSE,
special_case = NULL,
family = "binomial"
),
is_aggregated = TRUE,
time_freq = NULL,
link = FALSE
)
capture.output({
workflow$load_pstrat(pstrat_data)
}, type = "message")
expect_no_error(workflow$demo_bars("sex"))
# For general cross-sectional data
# with continuous outcome
workflow <- setup_test_workflow(
metadata = list(
is_timevar = FALSE,
special_case = NULL,
family = "normal"
),
is_aggregated = FALSE,
time_freq = NULL,
link = FALSE
)
capture.output({
workflow$load_pstrat(pstrat_data)
}, type = "message")
expect_no_error(workflow$demo_bars("sex"))
# For COVID data
workflow <- setup_test_workflow(
metadata = list(
is_timevar = TRUE,
special_case = "covid",
family = "binomial"
),
is_aggregated = TRUE,
time_freq = NULL,
link = FALSE
)
expect_error(
workflow$load_pstrat(pstrat_data),
"Custom poststratification data is not supported for special cases"
)
# For polling data
workflow <- setup_test_workflow(
metadata = list(
is_timevar = FALSE,
special_case = "poll",
family = "binomial"
),
link = FALSE
)
expect_error(
workflow$load_pstrat(pstrat_data),
"Custom poststratification data is not supported for special cases"
)
})
test_that(".impute is consistent", {
skip_on_cran()
set.seed(123)
n <- 20
cols <- c("sex", "race", "age")
data <- example_sample_data(
is_timevar = FALSE,
is_aggregated = FALSE,
special_case = NULL,
family = "binomial"
) %>%
mutate(
across(all_of(cols),
~ replace(., row_number() <= n, NA))
)
workflow <- mrp_workflow()
capture.output(
workflow$preprocess(
data,
is_timevar = FALSE,
is_aggregated = FALSE,
special_case = NULL,
family = "binomial"
)
, type = "message")
saved <- testthat::test_path("snapshots/data_processing/impute.csv") %>%
read_saved_csv()
expect_equal(
workflow$preprocessed_data(),
saved
)
})
test_that(".data_type defaults classify common cases correctly", {
# binary
expect_equal(.data_type(c(TRUE, FALSE, NA)), "bin")
expect_equal(.data_type(c(0, 1), num = TRUE), 1)
# categorical (character / factor)
expect_equal(.data_type(c("a", "b", "c", NA)), "cat")
expect_equal(.data_type(factor(c("x", "y", "z")), num = TRUE), 2)
# integer-like with few distinct values → categorical
expect_equal(.data_type(c(1,1,2,2,3,3,NA)), "cat")
# numeric with any decimal → continuous
expect_equal(.data_type(c(1.0, 2.5, 3.0)), "cont")
expect_equal(.data_type(c(1, 2.2), num = TRUE), 3) # even with only 2 values
# integer-like with many distinct values → continuous
expect_equal(.data_type(1:100), "cont")
# dates/times → continuous
expect_equal(.data_type(as.Date("2024-01-01") + 0:5), "cont")
expect_equal(.data_type(as.POSIXct("2024-01-01 00:00:00", tz = "UTC") + 0:5), "cont")
# empty or all-NA → categorical
expect_error(.data_type(c(NA, NA)), "Column does not contain any non-NA values.")
expect_error(.data_type(logical()), "Column does not contain any non-NA values.")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.