Nothing
#' Test a given diseasy store
#' @description
#' This function runs a battery of tests of the given diseasystore.
#'
#' The supplied diseasystore must be a generator for the diseasystore, not an instance of the diseasystore.
#'
#' The tests assume that data has been made available locally to run the majority of the tests.
#' The location of the local data should be configured in the options for "source_conn" of the given
#' diseasystore before calling test_diseasystore.
#'
#' @param diseasystore_generator (`Diseasystore*`)\cr
#' The diseasystore R6 class generator to test.
#' @param conn_generator (`function`)\cr
#' Function that generates a `list`() of connections use as target_conn.
#' Should take a `skip_backend` that does not open connections for the given backends.
#' @param data_files (`character()`)\cr
#' List of files that should be available when testing.
#' @param target_schema `r rd_target_schema()`
#' @param test_start_date (`Date`)\cr
#' The earliest date to retrieve data from during tests.
#' @param skip_backends (`character()`)\cr
#' List of connection types to skip tests for due to missing functionality.
#' @param ...
#' Other parameters passed to the diseasystore generator.
#' @return `r rd_side_effects`
#' @examplesIf requireNamespace("RSQLite", quietly = TRUE)
#' \donttest{
#' withr::local_options("diseasystore.DiseasystoreEcdcRespiratoryViruses.pull" = FALSE)
#'
#' conn_generator <- function(skip_backends = NULL) {
#' switch(
#' ("SQLiteConnection" %in% skip_backends) + 1,
#' list(DBI::dbConnect(RSQLite::SQLite())), # SQLiteConnection not in skip_backends
#' list() # SQLiteConnection in skip_backends
#' )
#' }
#'
#' test_diseasystore(
#' DiseasystoreEcdcRespiratoryViruses,
#' conn_generator,
#' data_files = "data/snapshots/2023-11-24_ILIARIRates.csv",
#' target_schema = "test_ds",
#' test_start_date = as.Date("2022-06-20"),
#' slice_ts = "2023-11-24"
#' )
#' }
#' @importFrom curl has_internet
#' @export
test_diseasystore <- function(
diseasystore_generator = NULL,
conn_generator = NULL,
data_files = NULL,
target_schema = "test_ds",
test_start_date = NULL,
skip_backends = NULL,
...
) {
# Determine the class of the diseasystore being tested
diseasystore_class <- diseasystore_generator$classname
coll <- checkmate::makeAssertCollection()
checkmate::assert_class(diseasystore_generator, "R6ClassGenerator", add = coll)
checkmate::assert_function(conn_generator, args = "skip_backends", add = coll)
conns <- conn_generator()
purrr::walk(conns, ~ checkmate::assert_multi_class(., c("DBIConnection", "OdbcConnection"), add = coll))
purrr::walk(conns, DBI::dbDisconnect)
checkmate::assert_character(data_files, null.ok = TRUE, add = coll)
checkmate::assert_character(target_schema, add = coll)
checkmate::assert_date(test_start_date, add = coll)
checkmate::assert_character(skip_backends, null.ok = TRUE, add = coll)
checkmate::assert_true(is.null(diseasyoption("remote_conn", diseasystore_class)) || curl::has_internet(), add = coll)
checkmate::reportAssertions(coll)
# Store the current options for the connections
remote_conn <- diseasyoption("remote_conn", diseasystore_class)
# Check file availability
# In practice, it is best to make a local copy of the data which is stored in the "test_data" folder
# This folder can either be in the package folder (preferred, please create the folder) or in the tempdir()
local_conn <- purrr::detect("test_data", checkmate::test_directory_exists, .default = tempdir())
# Then we download the first n rows of each data set of interest
remote_data_available <- !is.null(remote_conn) && curl::has_internet() # Assume available
if (remote_data_available) {
# Assume we can run on CRAN
skip_on_cran <- FALSE
# Determine the type of source_conn_helper needed for the generator
if (stringr::str_detect(remote_conn, r"{https?:\/\/api.github.com\/repos\/[a-zA-Z-]*\/[a-zA-Z-]*}")) {
source_conn_helper <- source_conn_github # nolint: object_usage_linter
skip_on_cran <- TRUE # CRAN tests cannot use authenticated API requests to Github and will fail
} else if (stringr::str_detect(remote_conn,
r"{\b(?:https?|ftp):\/\/[-A-Za-z0-9+&@#\/%?=~_|!:,.;]*[-A-Za-z0-9+&@#\/%=~_|]}")) {
source_conn_helper <- source_conn_path # nolint: object_usage_linter
} else {
stop("`source_conn_helper` could not be determined!", call. = FALSE)
}
# Then look for availability of files and download if needed
on_cran <- !interactive() && !identical(Sys.getenv("NOT_CRAN"), "true") && !identical(Sys.getenv("CI"), "true")
if (on_cran && skip_on_cran) {
remote_data_available <- FALSE
} else {
for (file in data_files) {
remote_url <- source_conn_helper(remote_conn, file)
# If we have the file locally, do not re-download but check it exists
if (checkmate::test_file_exists(file.path(local_conn, file))) {
remote_data_available <- !identical(
class(try(readr::read_csv(remote_url, n_max = 1, show_col_types = FALSE, progress = FALSE))),
"try-error"
)
} else { # If we don't have the file locally, copy it down
remote_data_available <- !identical(
class(
try({
dir.create(file.path(local_conn, dirname(file)), recursive = TRUE, showWarnings = FALSE)
readr::read_csv(remote_url, n_max = diseasyoption("n_max", diseasystore_class, .default = 1000),
show_col_types = FALSE, progress = FALSE) |>
readr::write_csv(file.path(local_conn, file))
})
),
"try-error"
)
}
}
}
}
# Throw warning if remote data unavailable (only throw if we are working locally, don't run this check on CRAN)
if (!is.null(remote_conn) && curl::has_internet() && !on_cran && !remote_data_available) {
warning(
glue::glue("remote_conn for {diseasystore_class} unavailable despite internet being available!"),
call. = FALSE
)
}
# Check that the files are available after attempting to download
local <- purrr::every(data_files, ~ checkmate::test_file_exists(file.path(local_conn, .)))
# Set testing options
# - set the target_schema for the diseasystore during tests
# - limit the number of records to use in tests
# - Use the local test data directory for testing
# - Reduce the lock wait during test in case of deadlocks by failed test (1 minute during tests)
withr::local_options(stats::setNames(target_schema, glue::glue("diseasystore.{diseasystore_class}.target_schema")))
withr::local_options(stats::setNames(local_conn, glue::glue("diseasystore.{diseasystore_class}.source_conn")))
withr::local_options(stats::setNames(1000, glue::glue("diseasystore.{diseasystore_class}.n_max")))
withr::local_options(stats::setNames(60, glue::glue("diseasystore.{diseasystore_class}.lock_wait_max")))
# Make a little helper function to clean up the connection after each test
# and check that intermediate dbplyr and SCDB tables (e.g. from dplyr::compute() calls) are cleaned up
connection_clean_up <- function(conn) {
testthat::expect_identical(nrow(SCDB::get_tables(conn, show_temporary = TRUE, pattern = "dbplyr_")), 0L)
testthat::expect_identical(nrow(SCDB::get_tables(conn, show_temporary = TRUE, pattern = "SCDB_")), 0L)
DBI::dbDisconnect(conn)
}
# ######## ######## ###### ######## ###### ######## ######## ###### #### ## ## ######
# ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## ## ##
# ## ## ## ## ## ## ## ## ## ## #### ## ##
# ## ###### ###### ## ###### ######## ###### ## #### ## ## ## ## ######
# ## ## ## ## ## ## ## ## ## ## ## ## #### ##
# ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## ##
# ## ######## ###### ## ###### ######## ######## ###### #### ## ## ######
testthat::test_that(glue::glue("{diseasystore_class} initialises correctly"), {
testthat::skip_if_not_installed("RSQLite")
testthat::skip_if("SQLiteConnection" %in% skip_backends)
# Initialise without start_date and end_date
ds <- testthat::expect_no_error(diseasystore_generator$new(
verbose = FALSE,
target_conn = DBI::dbConnect(RSQLite::SQLite()),
...
))
# Check feature store has been created
checkmate::expect_class(ds, diseasystore_class)
# Check all FeatureHandlers have been initialised
feature_handlers <- diseasystore_generator$private_fields$.ds_map |>
purrr::map(~ purrr::pluck(diseasystore_generator$private_fields, .))
purrr::walk(feature_handlers, ~ {
checkmate::expect_class(.x, "FeatureHandler")
checkmate::expect_function(.x %.% compute)
checkmate::expect_function(.x %.% get)
checkmate::expect_function(.x %.% key_join)
})
# Check that the min and max dates have been set
checkmate::expect_date(ds$min_start_date, upper = lubridate::today())
checkmate::expect_date(ds$max_end_date, upper = lubridate::today())
# Clean up
rm(ds)
invisible(gc())
})
testthat::test_that(glue::glue("{diseasystore_class} can initialise with remote source_conn"), {
testthat::skip_if_not_installed("RSQLite")
testthat::skip_if("SQLiteConnection" %in% skip_backends)
testthat::skip_if_not(curl::has_internet())
testthat::skip_if_not(remote_data_available)
# Ensure source is set as the remote
withr::local_options(
stats::setNames(
diseasyoption("remote_conn", diseasystore_class),
glue::glue("diseasystore.{diseasystore_class}.source_conn")
)
)
ds <- testthat::expect_no_error(diseasystore_generator$new(
target_conn = DBI::dbConnect(RSQLite::SQLite()),
start_date = test_start_date,
end_date = test_start_date,
verbose = FALSE,
...
))
feature <- testthat::expect_no_error(ds$available_features[[1]])
testthat::expect_no_error(ds$get_feature(feature))
# Clean up
rm(ds)
invisible(gc())
})
testthat::test_that(glue::glue("{diseasystore_class} can initialise with default source_conn"), {
testthat::skip_if_not_installed("RSQLite")
testthat::skip_if("SQLiteConnection" %in% skip_backends)
testthat::skip_if_not(local)
ds <- testthat::expect_no_error(diseasystore_generator$new(
target_conn = DBI::dbConnect(RSQLite::SQLite()),
start_date = test_start_date,
end_date = test_start_date,
verbose = FALSE,
...
))
testthat::expect_no_error(ds$get_feature(ds$available_features[[1]]))
# Clean up
rm(ds)
invisible(gc())
})
testthat::test_that(glue::glue("Skipped test connection are also disallowed in {diseasystore_class} constructor "), {
testthat::skip_if(is.null(skip_backends))
# Check that the constructor throws an error if the connection is skipped in the tests
for (conn in conn_generator()) {
if (!checkmate::test_multi_class(conn, skip_backends)) {
DBI::dbDisconnect(conn)
next
}
# Check that an error is thrown when attempting to use the connection
# This error is a checkmate assertion, so we remove newlines and `*` from the formatted error message
testthat::expect_error(
tryCatch(
diseasystore_generator$new(verbose = FALSE, target_conn = conn),
error = \(e) {
e$message |>
stringr::str_remove_all(stringr::fixed("\n *")) |>
stringr::str_remove_all(stringr::fixed("* ")) |>
simpleError(message = _) |>
stop() # nolint: condition_call_linter
}
),
paste0("Must be disjunct from \\{'", paste(skip_backends, collapse = "|"), "\\'}")
)
}
})
# Set a test_end_date for the test
test_end_date <- test_start_date + lubridate::days(4)
testthat::test_that(glue::glue("{diseasystore_class} can retrieve features from a fresh state"), {
testthat::skip_if_not(local)
for (conn in conn_generator(skip_backends)) {
# Initialise without start_date and end_date
ds <- testthat::expect_no_error(diseasystore_generator$new(verbose = FALSE, target_conn = conn, ...))
# Attempt to get features from the feature store
# then check that they match the expected value from the generators
purrr::walk2(ds$available_features, ds$ds_map, ~ {
# Suppress our user facing, informative warnings
pkgcond::suppress_warnings(
class = c(
"diseasystore::age_on_date.SQLiteConnection-warning",
"diseasystore::add_years.SQLiteConnection-warning"
),
expr = {
feature_checksums <- ds$get_feature(.x, start_date = test_start_date, end_date = test_end_date) |>
SCDB::digest_to_checksum()
# digest_to_checksum() creates an intermediary table in SQLite
if (inherits(conn, "SQLiteConnection")) SCDB::defer_db_cleanup(feature_checksums)
feature_checksums <- feature_checksums |>
dplyr::pull("checksum") |>
sort()
reference_generator <- purrr::pluck(ds, ".__enclos_env__", "private", .y, "compute")
reference <- reference_generator(
start_date = test_start_date,
end_date = test_end_date,
slice_ts = ds %.% slice_ts,
source_conn = ds %.% source_conn,
ds = ds
)
}
)
# Check that reference data is limited to the study period (start_date and end_date)
reference_out_of_bounds <- reference |>
dplyr::collect() |>
dplyr::filter(.data$valid_until <= !!test_start_date | !!test_end_date < .data$valid_from)
testthat::expect_identical(
SCDB::nrow(reference_out_of_bounds),
0L,
info = glue::glue("Feature `{.x}` returns data outside the study period.")
)
# Check that valid_from / valid_until are date (or stored in the date-like class on the remote)
validity_period_data_types <- reference |>
utils::head(0) |>
dplyr::select("valid_from", "valid_until") |>
dplyr::collect() |>
purrr::map(~ DBI::dbDataType(dbObj = conn, obj = .))
testthat::expect_identical(
purrr::pluck(validity_period_data_types, "valid_from"),
DBI::dbDataType(dbObj = conn, obj = as.Date(0)),
info = glue::glue("Feature `{.x}` has a non-Date `valid_from` column.")
)
testthat::expect_identical(
purrr::pluck(validity_period_data_types, "valid_until"),
DBI::dbDataType(dbObj = conn, obj = as.Date(0)),
info = glue::glue("Feature `{.x}` has a non-Date `valid_until` column.")
)
# Check that valid_until (date or NA) is (strictly) greater than valid_from (date)
# Remember that data is valid in the interval [valid_from, valid_until) and NA is treated as infinite
testthat::expect_identical(sum(is.na(dplyr::pull(reference, "valid_from"))), 0L)
testthat::expect_identical(
sum(dplyr::pull(reference, "valid_from") >= dplyr::pull(reference, "valid_until"), na.rm = TRUE),
0L,
info = glue::glue("Feature `{.x}` has some elements where `valid_from` >= `valid_until`.")
)
# Copy to remote and continue checks
if (!inherits(reference, "tbl_sql") ||
(inherits(reference, "tbl_sql") && !identical(dbplyr::remote_con(reference), conn))) {
reference <- dplyr::copy_to(conn, df = reference, name = SCDB::unique_table_name("ds_reference"))
SCDB::defer_db_cleanup(reference)
}
reference_checksums <- reference |>
SCDB::digest_to_checksum()
# digest_to_checksum() creates an intermediary table in SQLite
if (inherits(conn, "SQLiteConnection")) SCDB::defer_db_cleanup(reference_checksums)
reference_checksums <- reference_checksums |>
dplyr::pull("checksum") |>
sort()
testthat::expect_identical(feature_checksums, reference_checksums)
})
# Clean up
connection_clean_up(conn)
rm(ds)
invisible(gc())
}
})
# Set a test_end_date for the rest of the tests
test_end_date <- test_start_date + lubridate::days(9)
testthat::test_that(glue::glue("{diseasystore_class} can extend existing features"), {
testthat::skip_if_not(local)
for (conn in conn_generator(skip_backends)) {
# Initialise without start_date and end_date
ds <- testthat::expect_no_error(diseasystore_generator$new(verbose = FALSE, target_conn = conn, ...))
# Attempt to get features from the feature store (using different dates)
# then check that they match the expected value from the generators
purrr::walk2(ds$available_features, ds$ds_map, ~ {
# Suppress our user facing, informative warnings
pkgcond::suppress_warnings(
class = c(
"diseasystore::age_on_date.SQLiteConnection-warning",
"diseasystore::add_years.SQLiteConnection-warning"
),
expr = {
feature_checksums <- ds$get_feature(.x, start_date = test_start_date, end_date = test_end_date) |>
SCDB::digest_to_checksum()
# digest_to_checksum() creates an intermediary table in SQLite
if (inherits(conn, "SQLiteConnection")) SCDB::defer_db_cleanup(feature_checksums)
feature_checksums <- feature_checksums |>
dplyr::pull("checksum") |>
sort()
reference_generator <- purrr::pluck(ds, ".__enclos_env__", "private", .y, "compute")
reference <- reference_generator(
start_date = test_start_date,
end_date = test_end_date,
slice_ts = ds %.% slice_ts,
source_conn = ds %.% source_conn,
ds = ds
)
}
)
# Copy to remote and continue checks
if (!inherits(reference, "tbl_sql") ||
(inherits(reference, "tbl_sql") && !identical(dbplyr::remote_con(reference), conn))) {
reference <- dplyr::copy_to(conn, df = reference, name = SCDB::unique_table_name("ds_reference"))
SCDB::defer_db_cleanup(reference)
}
reference_checksums <- reference |>
SCDB::digest_to_checksum()
# digest_to_checksum() creates an intermediary table in SQLite
if (inherits(conn, "SQLiteConnection")) SCDB::defer_db_cleanup(reference_checksums)
reference_checksums <- reference_checksums |>
dplyr::pull("checksum") |>
sort()
testthat::expect_identical(
feature_checksums,
reference_checksums,
info = glue::glue("Feature `{.x}` has a mismatch between `$get_feature()` and `$compute()`.")
)
})
# Clean up
connection_clean_up(conn)
rm(ds)
invisible(gc())
}
})
# Helper function that checks the output of key_joins
key_join_features_tester <- function(output, start_date, end_date) {
# The output dates should match start and end date
testthat::expect_equal(min(output$date), start_date) # nolint: expect_identical_linter. R 4.5 makes dates more confusing than it already is
testthat::expect_equal(max(output$date), end_date) # nolint: expect_identical_linter.
}
testthat::test_that(glue::glue("{diseasystore_class} can key_join features"), {
testthat::skip_if_not(local)
for (conn in conn_generator(skip_backends)) {
# Initialise without start_date and end_date
ds <- testthat::expect_no_error(diseasystore_generator$new(verbose = FALSE, target_conn = conn, ...))
# First check we can aggregate without a stratification
for (observable in ds$available_observables) {
testthat::expect_no_error(
ds$key_join_features(observable = observable, stratification = NULL, test_start_date, test_end_date)
)
}
# Then test combinations with non-NULL stratifications
expand.grid(observable = ds$available_observables,
stratification = ds$available_stratifications) |>
purrr::pwalk(\(observable, stratification) {
# This code may fail (gracefully) in some cases. These we catch here
output <- tryCatch({
ds$key_join_features(
observable = as.character(observable),
stratification = eval(parse(text = glue::glue("rlang::quos({stratification})"))),
test_start_date, test_end_date
)
},
error = function(e) {
testthat::expect_identical(
e$message,
paste("(At least one) stratification feature does not match observable aggregator. Not implemented yet.")
)
return(NULL)
},
warning = function(w) {
checkmate::expect_character(w$message, pattern = "Observable already stratified by")
return(NULL)
})
# If the code does not fail, we test the output
if (!is.null(output)) {
key_join_features_tester(dplyr::collect(output), test_start_date, test_end_date)
}
})
# Clean up
connection_clean_up(conn)
rm(ds)
invisible(gc())
}
})
testthat::test_that(glue::glue("{diseasystore_class} can key_join with feature-independent stratification"), {
testthat::skip_if_not(local)
for (conn in conn_generator(skip_backends)) {
# Initialise without start_date and end_date
ds <- testthat::expect_no_error(diseasystore_generator$new(verbose = FALSE, target_conn = conn, ...))
if (length(ds$available_observables) > 0) {
# Check we can aggregate with feature-independent stratifications
output <- ds$key_join_features(
observable = ds$available_observables[[1]],
stratification = rlang::quos(string = "test", number = 2),
test_start_date,
test_end_date
)
testthat::expect_identical(unique(dplyr::pull(output, "string")), "test")
testthat::expect_identical(unique(dplyr::pull(output, "number")), 2)
}
# Clean up
connection_clean_up(conn)
rm(ds)
invisible(gc())
}
})
testthat::test_that(glue::glue("{diseasystore_class} key_join fails gracefully"), {
testthat::skip_if_not(local)
for (conn in conn_generator(skip_backends)) {
# Initialise without start_date and end_date
ds <- testthat::expect_no_error(diseasystore_generator$new(verbose = FALSE, target_conn = conn, ...))
# Attempt to perform the possible key_joins
# Test key_join with malformed inputs
expand.grid(observable = ds$available_observables,
stratification = "non_existent_stratification") |>
purrr::pwalk(\(observable, stratification) {
# This code may fail (gracefully) in some cases. These we catch here
output <- tryCatch({
ds$key_join_features(
observable = as.character(observable),
stratification = eval(parse(text = glue::glue("rlang::quos({stratification})"))),
start_date = test_start_date,
end_date = test_end_date
)
}, error = function(e) {
checkmate::expect_character(e$message, pattern = "Stratification could not be computed")
return(NULL)
})
# If the code does not fail, we test the output
if (!is.null(output)) {
key_join_features_tester(dplyr::collect(output), test_start_date, test_end_date)
}
})
expand.grid(observable = ds$available_observables,
stratification = "test = non_existent_stratification") |>
purrr::pwalk(\(observable, stratification) {
# This code may fail (gracefully) in some cases. These we catch here
output <- tryCatch({
ds$key_join_features(
observable = as.character(observable),
stratification = eval(parse(text = glue::glue("rlang::quos({stratification})"))),
start_date = test_start_date,
end_date = test_end_date
)
}, error = function(e) {
checkmate::expect_character(e$message, pattern = "Stratification could not be computed")
return(NULL)
})
# If the code does not fail, we test the output
if (!is.null(output)) {
key_join_features_tester(dplyr::collect(output), test_start_date, test_end_date)
}
})
# Clean up
connection_clean_up(conn)
rm(ds)
invisible(gc())
}
})
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.