R/write_testthat_file.R

Defines functions write_testthat_file

Documented in write_testthat_file

#
#                _         _    _      _                _    
#               (_)       | |  | |    | |              | |   
#  _ __    ___   _  _ __  | |_ | |__  | |  __ _  _ __  | | __
# | '_ \  / _ \ | || '_ \ | __|| '_ \ | | / _` || '_ \ | |/ /
# | |_) || (_) || || | | || |_ | |_) || || (_| || | | ||   < 
# | .__/  \___/ |_||_| |_| \__||_.__/ |_| \__,_||_| |_||_|\_\
# | |                                                        
# |_|                                                        
# 
# This file is part of the 'rich-iannone/pointblank' package.
# 
# (c) Richard Iannone <riannone@me.com>
# 
# For full copyright and license information, please look at
# https://rich-iannone.github.io/pointblank/LICENSE.html
#


#' Transform a **pointblank** agent to a **testthat** test file
#' 
#' @description
#' With a **pointblank** *agent*, we can write a **testthat** test file and opt
#' to place it in the `testthat/tests` if it is available in the project path
#' (we can specify an alternate path as well). This works by transforming the
#' validation steps to a series of `expect_*()` calls inside individual
#' [testthat::test_that()] statements.
#' 
#' A major requirement for using `write_testthat_file()` on an agent is the
#' presence of an expression that can retrieve the target table. Typically, we
#' might supply a table-prep formula, which is a formula that can be invoked to
#' obtain the target table (e.g., `tbl = ~ pointblank::small_table`). This
#' user-supplied statement will be used by `write_testthat_file()` to generate a
#' table-loading statement at the top of the new **testthat** test file so that
#' the target table is available for each of the [testthat::test_that()]
#' statements that follow. If an *agent* was not created using a table-prep
#' formula set for the `tbl`, it can be modified via the [set_tbl()] function.
#' 
#' Thresholds will be obtained from those applied for the `stop` state. This can
#' be set up for a **pointblank** *agent* by passing an `action_levels` object
#' to the `actions` argument of [create_agent()] or the same argument of any
#' included validation function. If `stop` thresholds are not available, then a
#' threshold value of `1` will be used for each generated `expect_*()` statement
#' in the resulting **testthat** test file.
#' 
#' There is no requirement that the **agent** first undergo interrogation with
#' [interrogate()]. However, it may be useful as a dry run to interactively
#' perform an interrogation on the target data before generating the
#' **testthat** test file.
#' 
#' @details 
#' Tests for inactive validation steps will be skipped with a clear message
#' indicating that the reason for skipping was due to the test not being active.
#' Any inactive validation steps can be forced into an active state by using the
#' [activate_steps()] on an *agent* (the opposite is possible with the
#' [deactivate_steps()] function).
#' 
#' The **testthat** package comes with a series of `skip_on_*()` functions which
#' conveniently cause the test file to be skipped entirely if certain conditions
#' are met. We can quickly set any number of these at the top of the
#' **testthat** test file by supplying keywords as a vector to the `skips`
#' option of `write_testthat_file()`. For instance, setting 
#' `skips = c("cran", "windows)` will add the **testthat** `skip_on_cran()` and
#' `skip_on_os("windows")` statements, meaning that the generated test file
#' won't run on a CRAN system or if the system OS is Windows.
#' 
#' Here is an example of **testthat** test file output (`"test-small_table.R"`):
#' 
#' ```r
#' # Generated by pointblank
#' 
#' tbl <- small_table
#' 
#' test_that("column `date_time` exists", {
#'   
#'   expect_col_exists(
#'     tbl,
#'     columns = vars(date_time),
#'     threshold = 1
#'   ) 
#' })
#' 
#' test_that("values in `c` should be <= `5`", {
#'   
#'   expect_col_vals_lte(
#'     tbl,
#'     columns = vars(c),
#'     value = 5,
#'     threshold = 0.25
#'   ) 
#' })
#' 
#' ```
#' 
#' This was generated by the following set of R statements:
#' 
#' ```r
#' library(pointblank)
#' 
#' agent <- 
#'   create_agent(
#'     tbl = ~ small_table,
#'     actions = action_levels(stop_at = 0.25)
#'   ) %>%
#'   col_exists(vars(date_time)) %>%
#'   col_vals_lte(vars(c), value = 5)
#'   
#' write_testthat_file(
#'   agent = agent,
#'   name = "small_table",
#'   path = "."
#' )
#' ```
#' 
#' @param agent An agent object of class `ptblank_agent`.
#' @param name An optional name for for the **testhat** test file. This should
#'   be a name without extension and without the leading `"test-"` text. If
#'   nothing is supplied, the name will be derived from the `tbl_name` in the
#'   agent. If that's not present, a generic name will be used.
#' @param path A path can be specified here if there shouldn't be an attempt to
#'   place the file in `testthat/tests`.
#' @param overwrite Should a **testthat** file of the same name be overwritten?
#'   By default, this is `FALSE`.
#' @param skips This is an optional vector of test-skipping keywords modeled
#'   after the **testthat** `skip_on_*()` functions. The following keywords can
#'   be used to include `skip_on_*()` statements: `"cran"`
#'   ([testthat::skip_on_cran()]), `"travis"` ([testthat::skip_on_travis()]),
#'   `"appveyor"` ([testthat::skip_on_appveyor()]), `"ci"`
#'   ([testthat::skip_on_ci()]), `"covr"` ([testthat::skip_on_covr()]), `"bioc"`
#'   ([testthat::skip_on_bioc()]). There are keywords for skipping tests on
#'   certain operating systems and all of them will insert a specific
#'   [testthat::skip_on_os()] call. These are `"windows"`
#'   (`skip_on_os("windows")`), `"mac"` (`skip_on_os("mac")`), `"linux"`
#'   (`skip_on_os("linux")`), and `"solaris"` (`skip_on_os("solaris")`). These
#'   calls will be placed at the top of the generated **testthat** test file.
#' @param quiet Should the function *not* inform when the file is written? By
#'   default this is `FALSE`.
#'   
#' @return Invisibly returns `TRUE` if the **testthat** file has been written. 
#' 
#' @section Examples:
#' 
#' ## Creating a **testthat** file from an *agent*
#' 
#' Let's walk through a data quality analysis of an extremely small table. It's
#' actually called `small_table` and we can find it as a dataset in this
#' package.
#' 
#' ```{r}
#' small_table
#' ```
#' 
#' Creating an `action_levels` object is a common workflow step when creating a
#' pointblank agent. We designate failure thresholds to the `warn`, `stop`, and
#' `notify` states using `action_levels()`.
#' 
#' ```r
#' al <- 
#'   action_levels(
#'     warn_at = 0.10,
#'     stop_at = 0.25,
#'     notify_at = 0.35
#'   )
#' ```
#' 
#' A pointblank `agent` object is now created and the `al` object is provided to
#' the agent. The static thresholds provided by the `al` object make reports a
#' bit more useful after interrogation.
#' 
#' ```r
#' agent <- 
#'   create_agent(
#'     tbl = ~ small_table,
#'     label = "An example.",
#'     actions = al
#'   ) %>%
#'   col_exists(vars(date, date_time)) %>%
#'   col_vals_regex(
#'     vars(b),
#'     regex = "[0-9]-[a-z]{3}-[0-9]{3}"
#'   ) %>%
#'   col_vals_gt(vars(d), value = 100) %>%
#'   col_vals_lte(vars(c), value = 5) %>%
#'   interrogate()
#' ```
#' 
#' This agent and all of the checks can be transformed into a testthat file with
#' `write_testthat_file()`. The `stop` thresholds will be ported over to the
#' `expect_*()` functions in the new file.
#' 
#' ```r
#' write_testthat_file(
#'   agent = agent,
#'   name = "small_table",
#'   path = "."
#' )
#' ```
#' 
#' The above code will generate a file with the name `"test-small_table.R"`. The
#' path was specified with `"."` so the file will be placed in the working
#' directory. If you'd like to easily add this new file to the `tests/testthat`
#' directory then `path = NULL` (the default) will makes this possible (this is
#' useful during package development).
#' 
#' What's in the new file? This:
#' 
#' ```r
#' # Generated by pointblank
#' 
#' tbl <- small_table
#' 
#' test_that("column `date` exists", {
#'   
#'   expect_col_exists(
#'     tbl,
#'     columns = vars(date),
#'     threshold = 1
#'   ) 
#' })
#' 
#' test_that("column `date_time` exists", {
#'   
#'   expect_col_exists(
#'     tbl,
#'     columns = vars(date_time),
#'     threshold = 1
#'   ) 
#' })
#' 
#' test_that("values in `b` should match the regular expression: 
#' `[0-9]-[a-z]{3}-[0-9]{3}`", {
#'   
#'   expect_col_vals_regex(
#'     tbl,
#'     columns = vars(b),
#'     regex = "[0-9]-[a-z]{3}-[0-9]{3}",
#'     threshold = 0.25
#'   ) 
#' })
#' 
#' test_that("values in `d` should be > `100`", {
#'   
#'   expect_col_vals_gt(
#'     tbl,
#'     columns = vars(d),
#'     value = 100,
#'     threshold = 0.25
#'   ) 
#' })
#' 
#' test_that("values in `c` should be <= `5`", {
#'   
#'   expect_col_vals_lte(
#'     tbl,
#'     columns = vars(c),
#'     value = 5,
#'     threshold = 0.25
#'   ) 
#' })
#' ```
#' 
#' ## Using an *agent* stored on disk as a YAML file
#' 
#' An agent on disk as a YAML file can be made into a **testthat** file. The
#' `"agent-small_table.yml"` file is available in the **pointblank** package
#' and the path can be obtained with `system.file()`.
#' 
#' ```r
#' yml_file <- 
#'   system.file(
#'     "yaml", "agent-small_table.yml",
#'     package = "pointblank"
#'   )
#' ```
#'   
#'
#' Writing the **testthat** file into the working directory is much the same as
#' before but we're reading the agent from disk this time. It's a read and write
#' combo, really. Again, we are choosing to write the file to the working
#' directory by using `path = "."`.
#' 
#' ```r
#' write_testthat_file(
#'   agent = yaml_read_agent(yml_file),
#'   name = "from_agent_yaml",
#'   path = "."
#' )
#' ```
#' 
#' @family Post-interrogation
#' @section Function ID:
#' 8-5
#' 
#' @export
write_testthat_file <- function(
    agent,
    name = NULL,
    path = NULL,
    overwrite = FALSE,
    skips = NULL,
    quiet = FALSE
) {

  # Enforce that the agent must have a `read_fn`
  # TODO: Improve the content of the `stop()` message
  if (is.null(agent$read_fn)) {
    stop(
      "The agent must have a `read_fn` value when transforming into tests.",
      call. = FALSE
    )
  }
  
  # Select only the necessary columns from the agent's `validation_set` 
  agent_validation_set <- 
    agent$validation_set %>% 
    dplyr::select(
      i, assertion_type, brief, eval_active
    )
  
  # Get the stop threshold values
  stop_thresholds <- get_thresholds(agent = agent, type = "stop")
  
  # Create a string that will be used to read the table (at the top
  # of the testthat test file)
  read_tbl_str <-
    paste0(
      "tbl <- ",
      utils::capture.output(rlang::f_rhs(agent$read_fn))
    )
  
  # Obtain expressions from the agent that correspond to the
  # validation function calls
  
  # Using the `expanded = TRUE` option in `agent_get_exprs()` so that
  # that the expanded form of the validation steps is available
  # (same number of steps as in the validation report)
  agent_exprs_raw <- 
    agent_get_exprs(agent, expanded = TRUE) %>%
    strsplit("%>%") %>%
    unlist() %>%
    gsub("^\n", "", .)
  
  if (grepl("^create_agent", agent_exprs_raw[1])) {
    agent_exprs_raw <- agent_exprs_raw[-1]
  }
  
  # Remove the `create_agent()` statement and perform
  # some initial mutations to the testing statements
  agent_exprs_raw <-
    vapply(
      agent_exprs_raw,
      FUN.VALUE = character(1),
      USE.NAMES = FALSE,
      FUN = function(x) {
        if (grepl("^create_agent", x)) {
          return(x)
        }
        x <- gsub("\\(\n\\s+", "(\n  tbl,\n  ", x)
        x <- gsub("^", "expect_", x)
        if (grepl("expect_rows_distinct\\(\\)", x)) {
          x <- "expect_rows_distinct(tbl)"
        }
        if (grepl("expect_rows_complete\\(\\)", x)) {
          x <- "expect_rows_complete(tbl)"
        }
        x
      }
    )
  
  # Insert any `stop` threshold values into
  # the `agent_exprs_raw` vector
  agent_exprs_raw <- 
    insert_threshold_values(
      agent_exprs_raw,
      threshold_vals = stop_thresholds
    )
  
  # Generate descriptions for each test
  test_that_desc <- 
    agent_validation_set$brief %>%
    gsub("(Expect that |Expect )", "", .) %>%
    gsub(" $", "", .) %>%
    gsub("\\.$", "", .)
  
  # Initialize vector of `test_that()` tests 
  test_that_tests <- c()
    
  # Assemble the sequence of `test_that()` tests
  for (i in seq_along(agent_exprs_raw)) {

    test_that_tests <-
      c(test_that_tests,
        paste0(
          "test_that(\"",
          test_that_desc[i],
          "\", {\n\n",
          agent_exprs_raw[i] %>%
            gsub("^", "  ", .) %>%
            gsub("\n  ", "\n    ", .) %>%
            gsub("\n\\)", "\n  )", .) %>%
            gsub("    #", "  #", .),
          "\n})\n\n"
        )
      )
  }

  # Paste the `testthat` test strings together
  test_that_tests <- 
    paste0(
      paste0(read_tbl_str, "\n", "\n"),
      paste0(test_that_tests, collapse = ""),
      collapse = ""
    )
  
  # Process the `skips` vector
  skips_text <- process_skips_text(skips)
  
  if (!is.null(skips_text)) {
    test_that_tests <-
      paste0(
        skips_text,
        test_that_tests,
        collapse = ""
      )
  }
  
  test_that_tests <-
    paste0(
      "# Generated by pointblank\n\n",
      test_that_tests,
      collapse = ""
    )

  # Remove trailing newlines  
  test_that_tests <- gsub("\n\n$", "", test_that_tests)
  
  # Create the filename for the testthat test file
  file_name <- resolve_test_filename(agent = agent, name = name)
  
  # Determine if there is a path `tests/testthat`
  if (is.null(path) && fs::dir_exists("tests/testthat")) {
    file_path <- "tests/testthat"
  } else if (is.null(path) && !fs::dir_exists("tests/testthat")) {
    file_path <- "."
  } else if (!is.null(path) && !fs::dir_exists(path)) {
    # Stop function and inform user that this function
    # won't create a path
    stop(
      "The provided `path` does not exist:\n",
      "* Please create the path",
      call. = FALSE
    )
  } else if (!is.null(path) && fs::dir_exists(path)) {
    file_path <- path
  }

  # Create path that contains the testthat test file name
  path <- as.character(fs::path_norm(fs::path_wd(file_path, file_name)))
  
  # Check if the file to write already exists; if it does, don't
  # write the new file if `overwrite` is FALSE
  if (fs::file_exists(path) && !overwrite) {
    stop(
      "The testthat file of the same name already exists:\n",
      "* set `overwrite` to `TRUE`, or\n",
      "* choose a different `name`, or\n",
      "* define another `path` for the file",
      call. = FALSE
    )
  }
  
  # Write the testthat file to the resulting `path`
  pb_write_file(
    path = path,
    lines = test_that_tests,
    append = FALSE
  )
  
  # Generate cli message w.r.t. written YAML file
  if (!quiet) {
    cli_bullet_msg(
      msg = "The testthat file has been written to `{path}`",
      bullet = cli::symbol$tick,
      color = "green"
    )
  }
  
  invisible(TRUE)
}

get_thresholds <- function(agent, type) {
  
  vapply(
    agent$validation_set$actions,
    FUN.VALUE = numeric(1), 
    USE.NAMES = FALSE,
    FUN = function(x) {
      
      type_fraction <- x[[paste0(type, "_fraction")]]
      type_count <- x[[paste0(type, "_count")]]
      
      if (is.null(type_fraction) && is.null(type_count)) {
        x <- NA_real_
      } else if (!is.null(type_fraction)) {
        x <- type_fraction
      } else {
        x <- type_count
      }
      
      x
    }
  )
}

insert_threshold_values <- function(
    agent_exprs_raw,
    threshold_vals
) {
  
  vapply(
    seq_along(agent_exprs_raw),
    FUN.VALUE = character(1),
    USE.NAMES = FALSE,
    FUN = function(x) {
      
      if (
        grepl(
          "(^expect_col_is_|^expect_col_exists|^expect_col_schema_match)",
          agent_exprs_raw[x]
        )
      ) {
        threshold_val <- 1
      } else {
        threshold_val <- threshold_vals[x]
      }
      
      if (is.na(threshold_vals[x])) {
        threshold_val <- 1
      }
      
      agent_exprs_raw[x] %>% 
        gsub("\n\\)", paste0(",\n  threshold = ", threshold_val, "\n\\)"), .)
    }
  )
}

resolve_test_filename <- function(agent, name) {
  
  if (is.null(name)) {
    if (is.null(agent$tbl_name) ||
        is.na(agent$tbl_name) ||
        agent$tbl_name == "") {
      
      file_name <- "test-pointblank_validation.R"
      
    } else {
      
      file_name <- 
        agent$tbl_name %>%
        fs::path_sanitize() %>%
        gsub("(\\.| |'|:)", "_", .) %>%
        paste0("test-", .) %>%
        paste0(., ".R")
    }
    
  } else {
    
    if (!is.character(name)) {
      stop(
        "The value supplied to `name` must be of class 'character'.",
        call. = FALSE
      )
    }
    
    file_name <- 
      name[1] %>%
      fs::path_sanitize() %>%
      gsub("(\\.| |'|:)", "_", .) %>%
      paste0("test-", .) %>%
      paste0(., ".R")
  }
  
  file_name
}

process_skips_text <- function(skips) {
  
  if (is.null(skips) || any(!is.character(skips))) {
    return(NULL)
  }
  
  skips_keywords <-
    c(
      "cran", "travis", "appveyor", "ci", "covr", "bioc",
      "windows", "mac", "linux", "solaris"
    )
  
  skips_keywords_os <- c("windows", "mac", "linux", "solaris")
  skips_keywords_non_os <- base::setdiff(skips_keywords, skips_keywords_os)
  
  if (!all(skips %in% skips_keywords)) {
    
    stop(
      "All values provided in `skips` must be valid skipping keywords.",
      call. = FALSE
    )
  }
  
  skips_text <- 
    vapply(
      unique(skips),
      FUN.VALUE = character(1),
      USE.NAMES = FALSE,
      FUN = function(x) {
        
        if (x %in% skips_keywords_non_os) {
          x <- paste0("skip_on_", x, "()\n")
        }
        
        if (x %in% skips_keywords_os) {
          x <- paste0("skip_on_os(\"", x, "\")\n")
        }
        
        x
      }
    )
  
  paste0(paste0(skips_text, collapse = ""), "\n")
}

pb_write_file <- function(
    path,
    lines,
    append = FALSE,
    line_ending = NULL
) {
  
  stopifnot(is.character(path))
  stopifnot(is.character(lines))
  
  if (append) {
    file_mode <- "ab"
  } else {
    file_mode <- "wb"
  }

  # Create a file connection  
  file_connection <- file(path, open = file_mode, encoding = "utf-8")
  
  on.exit(close(file_connection))
  
  # Obtain the appropriate line ending based on the platform
  if (.Platform$OS.type == "windows") {
    line_ending <- "\r\n"
  } else {
    line_ending <- "\n"
  }
  
  lines <- gsub("\r?\n", line_ending, lines)
  
  writeLines(
    text = enc2utf8(lines),
    con = file_connection,
    sep = line_ending, 
    useBytes = TRUE
  )
  
  invisible(TRUE)
}

Try the pointblank package in your browser

Any scripts or data that you put into this service are public.

pointblank documentation built on April 25, 2023, 5:06 p.m.