#------------------------------------------------------------------------------#
#
# _ _ _ _ _
# (_) | | | | | | | |
# _ __ ___ _ _ __ | |_ | |__ | | __ _ _ __ | | __
# | '_ \ / _ \ | || '_ \ | __|| '_ \ | | / _` || '_ \ | |/ /
# | |_) || (_) || || | | || |_ | |_) || || (_| || | | || <
# | .__/ \___/ |_||_| |_| \__||_.__/ |_| \__,_||_| |_||_|\_\
# | |
# |_|
#
# This file is part of the 'rstudio/pointblank' project.
#
# Copyright (c) 2017-2024 pointblank authors
#
# For full copyright and license information, please look at
# https://rstudio.github.io/pointblank/LICENSE.html
#
#------------------------------------------------------------------------------#
#' Transform a **pointblank** agent to a **testthat** test file
#'
#' @description
#'
#' With a **pointblank** *agent*, we can write a **testthat** test file and opt
#' to place it in the `testthat/tests` if it is available in the project path
#' (we can specify an alternate path as well). This works by transforming the
#' validation steps to a series of `expect_*()` calls inside individual
#' [testthat::test_that()] statements.
#'
#' A major requirement for using `write_testthat_file()` on an agent is the
#' presence of an expression that can retrieve the target table. Typically, we
#' might supply a table-prep formula, which is a formula that can be invoked to
#' obtain the target table (e.g., `tbl = ~ pointblank::small_table`). This
#' user-supplied statement will be used by `write_testthat_file()` to generate a
#' table-loading statement at the top of the new **testthat** test file so that
#' the target table is available for each of the [testthat::test_that()]
#' statements that follow. If an *agent* was not created using a table-prep
#' formula set for the `tbl`, it can be modified via the [set_tbl()] function.
#'
#' Thresholds will be obtained from those applied for the `stop` state. This can
#' be set up for a **pointblank** *agent* by passing an `action_levels` object
#' to the `actions` argument of [create_agent()] or the same argument of any
#' included validation function. If `stop` thresholds are not available, then a
#' threshold value of `1` will be used for each generated `expect_*()` statement
#' in the resulting **testthat** test file.
#'
#' There is no requirement that the **agent** first undergo interrogation with
#' [interrogate()]. However, it may be useful as a dry run to interactively
#' perform an interrogation on the target data before generating the
#' **testthat** test file.
#'
#' @details
#'
#' Tests for inactive validation steps will be skipped with a clear message
#' indicating that the reason for skipping was due to the test not being active.
#' Any inactive validation steps can be forced into an active state by using the
#' [activate_steps()] on an *agent* (the opposite is possible with the
#' [deactivate_steps()] function).
#'
#' The **testthat** package comes with a series of `skip_on_*()` functions which
#' conveniently cause the test file to be skipped entirely if certain conditions
#' are met. We can quickly set any number of these at the top of the
#' **testthat** test file by supplying keywords as a vector to the `skips`
#' option of `write_testthat_file()`. For instance, setting
#' `skips = c("cran", "windows)` will add the **testthat** `skip_on_cran()` and
#' `skip_on_os("windows")` statements, meaning that the generated test file
#' won't run on a CRAN system or if the system OS is Windows.
#'
#' Here is an example of **testthat** test file output (`"test-small_table.R"`):
#'
#' ```r
#' # Generated by pointblank
#'
#' tbl <- small_table
#'
#' test_that("column `date_time` exists", {
#'
#' expect_col_exists(
#' tbl,
#' columns = date_time,
#' threshold = 1
#' )
#' })
#'
#' test_that("values in `c` should be <= `5`", {
#'
#' expect_col_vals_lte(
#' tbl,
#' columns = c,
#' value = 5,
#' threshold = 0.25
#' )
#' })
#'
#' ```
#'
#' This was generated by the following set of R statements:
#'
#' ```r
#' library(pointblank)
#'
#' agent <-
#' create_agent(
#' tbl = ~ small_table,
#' actions = action_levels(stop_at = 0.25)
#' ) %>%
#' col_exists(date_time) %>%
#' col_vals_lte(c, value = 5)
#'
#' write_testthat_file(
#' agent = agent,
#' name = "small_table",
#' path = "."
#' )
#' ```
#'
#' @param agent *The pointblank agent object*
#'
#' `obj:<ptblank_agent>` // **required**
#'
#' A **pointblank** *agent* object that is commonly created through the use of
#' the [create_agent()] function.
#'
#' @param name *Name for generated testthat file*
#'
#' `scalar<character>` // *default:* `NULL` (`optional`)
#'
#' An optional name for for the **testhat** test file. This should be a name
#' without extension and without the leading `"test-"` text. If nothing is
#' supplied, the name will be derived from the `tbl_name` in the agent. If
#' that's not present, a generic name will be used.
#'
#' @param path *File path*
#'
#' `scalar<character>` // *default:* `NULL` (`optional`)
#'
#' A path can be specified here if there shouldn't be an attempt to place the
#' file in `testthat/tests`.
#'
#' @param overwrite *Overwrite a previous file of the same name*
#'
#' `scalar<logical>` // *default:* `FALSE`
#'
#' Should a **testthat** file of the same name be overwritten?
#'
#' @param skips *Test skipping*
#'
#' `vector<character>` // *default:* `NULL` (`optional`)
#'
#' This is an optional vector of test-skipping keywords modeled after the
#' **testthat** `skip_on_*()` functions. The following keywords can be used to
#' include `skip_on_*()` statements: `"cran"` ([testthat::skip_on_cran()]),
#' `"travis"` ([testthat::skip_on_travis()]), `"appveyor"`
#' ([testthat::skip_on_appveyor()]), `"ci"` ([testthat::skip_on_ci()]),
#' `"covr"` ([testthat::skip_on_covr()]), `"bioc"`
#' ([testthat::skip_on_bioc()]). There are keywords for skipping tests on
#' certain operating systems and all of them will insert a specific
#' [testthat::skip_on_os()] call. These are `"windows"`
#' (`skip_on_os("windows")`), `"mac"` (`skip_on_os("mac")`), `"linux"`
#' (`skip_on_os("linux")`), and `"solaris"` (`skip_on_os("solaris")`). These
#' calls will be placed at the top of the generated **testthat** test file.
#'
#' @param quiet *Inform (or not) upon file writing*
#'
#' `scalar<logical>` // *default:* `FALSE`
#'
#' Should the function *not* inform when the file is written?
#'
#' @return Invisibly returns `TRUE` if the **testthat** file has been written.
#'
#' @section Examples:
#'
#' ## Creating a **testthat** file from an *agent*
#'
#' Let's walk through a data quality analysis of an extremely small table. It's
#' actually called `small_table` and we can find it as a dataset in this
#' package.
#'
#' ```{r}
#' small_table
#' ```
#'
#' Creating an `action_levels` object is a common workflow step when creating a
#' pointblank agent. We designate failure thresholds to the `warn`, `stop`, and
#' `notify` states using `action_levels()`.
#'
#' ```r
#' al <-
#' action_levels(
#' warn_at = 0.10,
#' stop_at = 0.25,
#' notify_at = 0.35
#' )
#' ```
#'
#' A pointblank `agent` object is now created and the `al` object is provided to
#' the agent. The static thresholds provided by the `al` object make reports a
#' bit more useful after interrogation.
#'
#' ```r
#' agent <-
#' create_agent(
#' tbl = ~ small_table,
#' label = "An example.",
#' actions = al
#' ) %>%
#' col_exists(c(date, date_time)) %>%
#' col_vals_regex(
#' b,
#' regex = "[0-9]-[a-z]{3}-[0-9]{3}"
#' ) %>%
#' col_vals_gt(d, value = 100) %>%
#' col_vals_lte(c, value = 5) %>%
#' interrogate()
#' ```
#'
#' This agent and all of the checks can be transformed into a testthat file with
#' `write_testthat_file()`. The `stop` thresholds will be ported over to the
#' `expect_*()` functions in the new file.
#'
#' ```r
#' write_testthat_file(
#' agent = agent,
#' name = "small_table",
#' path = "."
#' )
#' ```
#'
#' The above code will generate a file with the name `"test-small_table.R"`. The
#' path was specified with `"."` so the file will be placed in the working
#' directory. If you'd like to easily add this new file to the `tests/testthat`
#' directory then `path = NULL` (the default) will makes this possible (this is
#' useful during package development).
#'
#' What's in the new file? This:
#'
#' ```r
#' # Generated by pointblank
#'
#' tbl <- small_table
#'
#' test_that("column `date` exists", {
#'
#' expect_col_exists(
#' tbl,
#' columns = date,
#' threshold = 1
#' )
#' })
#'
#' test_that("column `date_time` exists", {
#'
#' expect_col_exists(
#' tbl,
#' columns = date_time,
#' threshold = 1
#' )
#' })
#'
#' test_that("values in `b` should match the regular expression:
#' `[0-9]-[a-z]{3}-[0-9]{3}`", {
#'
#' expect_col_vals_regex(
#' tbl,
#' columns = b,
#' regex = "[0-9]-[a-z]{3}-[0-9]{3}",
#' threshold = 0.25
#' )
#' })
#'
#' test_that("values in `d` should be > `100`", {
#'
#' expect_col_vals_gt(
#' tbl,
#' columns = d,
#' value = 100,
#' threshold = 0.25
#' )
#' })
#'
#' test_that("values in `c` should be <= `5`", {
#'
#' expect_col_vals_lte(
#' tbl,
#' columns = c,
#' value = 5,
#' threshold = 0.25
#' )
#' })
#' ```
#'
#' ## Using an *agent* stored on disk as a YAML file
#'
#' An agent on disk as a YAML file can be made into a **testthat** file. The
#' `"agent-small_table.yml"` file is available in the **pointblank** package
#' and the path can be obtained with `system.file()`.
#'
#' ```r
#' yml_file <-
#' system.file(
#' "yaml", "agent-small_table.yml",
#' package = "pointblank"
#' )
#' ```
#'
#'
#' Writing the **testthat** file into the working directory is much the same as
#' before but we're reading the agent from disk this time. It's a read and write
#' combo, really. Again, we are choosing to write the file to the working
#' directory by using `path = "."`.
#'
#' ```r
#' write_testthat_file(
#' agent = yaml_read_agent(yml_file),
#' name = "from_agent_yaml",
#' path = "."
#' )
#' ```
#'
#' @family Post-interrogation
#' @section Function ID:
#' 8-5
#'
#' @export
write_testthat_file <- function(
agent,
name = NULL,
path = NULL,
overwrite = FALSE,
skips = NULL,
quiet = FALSE
) {
# Enforce that the agent must have a `read_fn`
# TODO: Improve the content of the `stop()` message
if (is.null(agent$read_fn)) {
stop(
"The agent must have a `read_fn` value when transforming into tests.",
call. = FALSE
)
}
# Select only the necessary columns from the agent's `validation_set`
agent_validation_set <-
agent$validation_set %>%
dplyr::select(
i, assertion_type, brief, eval_active
)
# Get the stop threshold values
stop_thresholds <- get_thresholds(agent = agent, type = "stop")
# Create a string that will be used to read the table (at the top
# of the testthat test file)
read_tbl_str <-
paste0(
"tbl <- ",
utils::capture.output(rlang::f_rhs(agent$read_fn))
)
# Obtain expressions from the agent that correspond to the
# validation function calls
# Using the `expanded = TRUE` option in `agent_get_exprs()` so that
# that the expanded form of the validation steps is available
# (same number of steps as in the validation report)
agent_exprs_raw <-
agent_get_exprs(agent, expanded = TRUE) %>%
strsplit("%>%") %>%
unlist() %>%
gsub("^\n", "", .)
if (grepl("^create_agent", agent_exprs_raw[1])) {
agent_exprs_raw <- agent_exprs_raw[-1]
}
# Remove the `create_agent()` statement and perform
# some initial mutations to the testing statements
agent_exprs_raw <-
vapply(
agent_exprs_raw,
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (grepl("^create_agent", x)) {
return(x)
}
x <- gsub("\\(\n\\s+", "(\n tbl,\n ", x)
x <- gsub("^", "expect_", x)
if (grepl("expect_rows_distinct\\(\\)", x)) {
x <- "expect_rows_distinct(tbl)"
}
if (grepl("expect_rows_complete\\(\\)", x)) {
x <- "expect_rows_complete(tbl)"
}
x
}
)
# Insert any `stop` threshold values into
# the `agent_exprs_raw` vector
agent_exprs_raw <-
insert_threshold_values(
agent_exprs_raw,
threshold_vals = stop_thresholds
)
# Generate descriptions for each test
test_that_desc <-
agent_validation_set$brief %>%
gsub("(Expect that |Expect )", "", .) %>%
gsub(" $", "", .) %>%
gsub("\\.$", "", .)
# Initialize vector of `test_that()` tests
test_that_tests <- c()
# Assemble the sequence of `test_that()` tests
for (i in seq_along(agent_exprs_raw)) {
test_that_tests <-
c(test_that_tests,
paste0(
"test_that(\"",
test_that_desc[i],
"\", {\n\n",
agent_exprs_raw[i] %>%
gsub("^", " ", .) %>%
gsub("\n ", "\n ", .) %>%
gsub("\n\\)", "\n )", .) %>%
gsub(" #", " #", .),
"\n})\n\n"
)
)
}
# Paste the `testthat` test strings together
test_that_tests <-
paste0(
paste0(read_tbl_str, "\n", "\n"),
paste0(test_that_tests, collapse = ""),
collapse = ""
)
# Process the `skips` vector
skips_text <- process_skips_text(skips)
if (!is.null(skips_text)) {
test_that_tests <-
paste0(
skips_text,
test_that_tests,
collapse = ""
)
}
test_that_tests <-
paste0(
"# Generated by pointblank\n\n",
test_that_tests,
collapse = ""
)
# Remove trailing newlines
test_that_tests <- gsub("\n\n$", "", test_that_tests)
# Create the filename for the testthat test file
file_name <- resolve_test_filename(agent = agent, name = name)
# Determine if there is a path `tests/testthat`
if (is.null(path) && fs::dir_exists("tests/testthat")) {
file_path <- "tests/testthat"
} else if (is.null(path) && !fs::dir_exists("tests/testthat")) {
file_path <- "."
} else if (!is.null(path) && !fs::dir_exists(path)) {
# Stop function and inform user that this function
# won't create a path
stop(
"The provided `path` does not exist:\n",
"* Please create the path",
call. = FALSE
)
} else if (!is.null(path) && fs::dir_exists(path)) {
file_path <- path
}
# Create path that contains the testthat test file name
path <- as.character(fs::path_norm(fs::path_wd(file_path, file_name)))
# Check if the file to write already exists; if it does, don't
# write the new file if `overwrite` is FALSE
if (fs::file_exists(path) && !overwrite) {
stop(
"The testthat file of the same name already exists:\n",
"* set `overwrite` to `TRUE`, or\n",
"* choose a different `name`, or\n",
"* define another `path` for the file",
call. = FALSE
)
}
# Write the testthat file to the resulting `path`
pb_write_file(
path = path,
lines = test_that_tests,
append = FALSE
)
# Generate cli message w.r.t. written YAML file
if (!quiet) {
cli_bullet_msg(
msg = "The testthat file has been written to `{path}`",
bullet = cli::symbol$tick,
color = "green"
)
}
invisible(TRUE)
}
get_thresholds <- function(agent, type) {
vapply(
agent$validation_set$actions,
FUN.VALUE = numeric(1),
USE.NAMES = FALSE,
FUN = function(x) {
type_fraction <- x[[paste0(type, "_fraction")]]
type_count <- x[[paste0(type, "_count")]]
if (is.null(type_fraction) && is.null(type_count)) {
x <- NA_real_
} else if (!is.null(type_fraction)) {
x <- type_fraction
} else {
x <- type_count
}
x
}
)
}
insert_threshold_values <- function(
agent_exprs_raw,
threshold_vals
) {
vapply(
seq_along(agent_exprs_raw),
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (
grepl(
"(^expect_col_is_|^expect_col_exists|^expect_col_schema_match)",
agent_exprs_raw[x]
)
) {
threshold_val <- 1
} else {
threshold_val <- threshold_vals[x]
}
if (is.na(threshold_vals[x])) {
threshold_val <- 1
}
agent_exprs_raw[x] %>%
gsub("\n\\)", paste0(",\n threshold = ", threshold_val, "\n\\)"), .)
}
)
}
resolve_test_filename <- function(agent, name) {
if (is.null(name)) {
if (is.null(agent$tbl_name) ||
is.na(agent$tbl_name) ||
agent$tbl_name == "") {
file_name <- "test-pointblank_validation.R"
} else {
file_name <-
agent$tbl_name %>%
fs::path_sanitize() %>%
gsub("(\\.| |'|:)", "_", .) %>%
paste0("test-", .) %>%
paste0(., ".R")
}
} else {
if (!is.character(name)) {
stop(
"The value supplied to `name` must be of class 'character'.",
call. = FALSE
)
}
file_name <-
name[1] %>%
fs::path_sanitize() %>%
gsub("(\\.| |'|:)", "_", .) %>%
paste0("test-", .) %>%
paste0(., ".R")
}
file_name
}
process_skips_text <- function(skips) {
if (is.null(skips) || any(!is.character(skips))) {
return(NULL)
}
skips_keywords <-
c(
"cran", "travis", "appveyor", "ci", "covr", "bioc",
"windows", "mac", "linux", "solaris"
)
skips_keywords_os <- c("windows", "mac", "linux", "solaris")
skips_keywords_non_os <- base::setdiff(skips_keywords, skips_keywords_os)
if (!all(skips %in% skips_keywords)) {
stop(
"All values provided in `skips` must be valid skipping keywords.",
call. = FALSE
)
}
skips_text <-
vapply(
unique(skips),
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (x %in% skips_keywords_non_os) {
x <- paste0("skip_on_", x, "()\n")
}
if (x %in% skips_keywords_os) {
x <- paste0("skip_on_os(\"", x, "\")\n")
}
x
}
)
paste0(paste0(skips_text, collapse = ""), "\n")
}
pb_write_file <- function(
path,
lines,
append = FALSE,
line_ending = NULL
) {
stopifnot(is.character(path))
stopifnot(is.character(lines))
if (append) {
file_mode <- "ab"
} else {
file_mode <- "wb"
}
# Create a file connection
file_connection <- file(path, open = file_mode, encoding = "utf-8")
on.exit(close(file_connection))
# Obtain the appropriate line ending based on the platform
if (.Platform$OS.type == "windows") {
line_ending <- "\r\n"
} else {
line_ending <- "\n"
}
lines <- gsub("\r?\n", line_ending, lines)
writeLines(
text = enc2utf8(lines),
con = file_connection,
sep = line_ending,
useBytes = TRUE
)
invisible(TRUE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.