The package logo is based on the vector graphic help created by Luis Prado
which he licensed under CC-BY 3.0. The original logo is stored under data-raw/logo/help.svg
and a
colored raster image version is stored under data-raw/logo/help_yellow.png
.
This was fed to hexSticker to create data-raw/logo/hexsticker.png
and data-raw/logo/hexsticker_alt.png
as
follows (the spotlight has some "random jitter", so hexSticker creates a slightly different image on each run):
hexSticker::sticker(subplot = "data-raw/logo/help_yellow.png", s_x = 1.0, s_y = 1.31, s_width = 0.47, # the default asp 1.0 distorts the subplot, so we have to fiddle -.- asp = 0.85, package = "pal", p_x = 1.0, p_y = 0.65, p_color = "#ffcc00", p_size = 40.0, h_fill = "#000000", h_color = "#ffcc00", spotlight = TRUE, l_x = 1.1, l_y = 2.0, l_width = 8.0, l_height = 8.0, l_alpha = 0.5, url = "rpkg.dev/pal", u_color = "#ffcc00", u_size = 4.0, filename = "data-raw/logo/hexsticker.png")
Finally, data-raw/logo/hexsticker.png
was post-processed by the G'MIC filter Felt Pen in GIMP to create
data-raw/logo/hexsticker_gmic_felt_pen.png
which, however, was discarded. Instead, the final package logo data-raw/logo/hexsticker_smooth.png
is just
smoothed using the G'MIC filter Smooth Diffusion in GIMP.
R CMD check
notes about undefined global objects used in magrittr pipesCf. https://github.com/tidyverse/magrittr/issues/29#issuecomment-74313262
utils::globalVariables(names = c(".", # tidyselect fns "any_of", "everything", "where", # other "data", "default_value", "default_value_dynamic", "env_var", "key", "package", "Package", "r_opt", "repository", "rowid", "type", "Version", "X1", "X2"))
forbidden_dots
# forbidden dots arguments forbidden_dots <- list(roxy_tag_value = c("pkgs", "destdir", "available", "type", "quiet"))
as_env_var_name
TODO:
NOTES:
as_env_var_name <- function(...) { as_str(..., sep = "_") |> stringr::str_replace_all(pattern = "\\W", replacement = "_") |> stringr::str_replace(pattern = "^(\\d)", replacement = "_\\1") |> toupper() }
get_pkg_config
get_pkg_config <- function(pkg) { checkmate::assert_string(pkg) if (!exists_in_namespace(x = "pkg_config", ns = pkg)) { cli::cli_abort(paste0("Package {.pkg {pkg}} has not defined the required package configuration metadata in its namespace (as object {.var pkg_config}). ", "See the {.emph Details} section of {.help pal::pkg_config_val} for more information.")) } pkg_config <- utils::getFromNamespace(x = "pkg_config", ns = pkg) checkmate::assert_data_frame(pkg_config, col.names = "unique") if (!("key" %in% colnames(pkg_config) && any(c("default_value", "default_value_dynamic") %in% colnames(pkg_config)))) { cli::cli_abort("{.code {pkg}:::pkg_config} must at minimum contain the columns {.var key} and {.var default_value} or {.var default_value_dynamic}.") } # ensure/complement df structure pkg_config |> vctrs::tib_cast(tibble::tibble(key = character(), default_value = list(), default_value_dynamic = character(), require = logical(), description = character())) |> tidyr::replace_na(replace = list(require = TRUE)) }
get_pkg_config_val_default
get_pkg_config_val_default <- function(key, pkg_config, env = parent.frame()) { key <- rlang::arg_match0(key, values = pkg_config$key) data <- pkg_config |> dplyr::filter(key == !!key) if (is.na(data$default_value_dynamic)) { result <- Reduce(x = data$default_value, f = c) } else { # ensure only one of `default_value_dynamic` and `default_value` is set if (!is.null(data$default_value[[1L]])) { cli::cli_abort(paste0("Only one of {.var default_value} and {.var default_value_dynamic} can be set in {.var pkg_config} for a specific {.var key}, but ", "for {.field {key}} both are.")) } result <- eval(expr = parse(text = data$default_value_dynamic), envir = env) } result }
has_root
TODO: Submit this function as check_criterion()
upstream as outlined from here on.
has_root <- function(criterion, path, check_parent_dirs) { checkmate::assert_class(criterion, classes = "root_criterion") checkmate::assert_flag(check_parent_dirs) rlang::check_installed("rprojroot", reason = reason_pkg_required()) checkmate::assert_directory_exists(path, access = "r") if (check_parent_dirs) { result <- tryCatch( expr = { rprojroot::find_root(criterion = criterion, path = path) TRUE }, error = \(e) FALSE ) } else { result <- criterion$testfun |> purrr::map_lgl(\(x) x(path = path)) |> any() } result }
pkg_config_env_var_name
pkg_config_env_var_name <- function(pkg, key) { as_env_var_name("R", pkg, key) }
pkg_config_opt_name
pkg_config_opt_name <- function(pkg, key) { paste(pkg, key, sep = ".") }
is_heading_node
is_heading_node <- function(xml_node) { xml2::xml_name(xml_node) == "heading" }
node_heading_lvl
node_heading_lvl <- function(xml_node) { xml_node |> xml2::xml_attr(attr = "level") |> as.integer() }
subnode_ix
subnode_ix <- function(xml_nodes, i) { i_node <- i i <- i + 1L ix_subnodes <- integer() heading_lvl_node <- node_heading_lvl(xml_nodes[i_node]) is_subnode <- is_heading_node(xml_nodes[i_node]) && i <= length(xml_nodes) while (is_subnode) { is_subnode <- !is_heading_node(xml_nodes[i]) || isTRUE(node_heading_lvl(xml_nodes[i]) > heading_lvl_node) ix_subnodes %<>% c(i[is_subnode]) i <- i + 1L if (i > length(xml_nodes)) is_subnode <- FALSE } ix_subnodes }
safe_seq_len
#' Generate an integer sequence of specific length (safe) #' #' Modified version of [base::seq_len()] that returns a zero-length integer in case of a zero-length input instead of throwing an error. #' #' @param n Desired length of the integer sequence. #' #' @return An integer sequence starting from `1L`. #' @family stat #' @export #' #' @examples #' pal::safe_seq_len(5) #' #' # this function simply returns a zero-length integer for zero-length inputs... #' pal::safe_seq_len(NULL) #' pal::safe_seq_len(integer()) #' #' # ...while `seq_len()` throws an error #' try(seq_len(NULL)) #' try(seq_len(integer())) safe_seq_len <- function(n) { if (length(n) > 0L) { return(seq_len(n)) } else { return(integer()) } }
safe_max
#' Maximum (safe) #' #' @description #' Modified version of [base::max()] that differs in the following ways: #' #' - `NA`s in the input (`...`) are ignored _by default_ (`rm_na = TRUE`). #' - If the input is of length zero, the output will also be of length zero (of the same type as the input). #' - It is ensured that all inputs are either numeric, of length zero or `NA`. The only case where the return value will be `-Inf` or `NA` is when the input #' contains only `-Inf` or `NA`. #' - `r pkgsnip::roxy_lbl("dyn_dots_support")` #' #' @param ... Numeric objects of which to determine the maximum. `r pkgsnip::roxy_lbl("dyn_dots_support")` #' @param rm_na Ignore missing values in `...`. If missing values are present and `rm_na = FALSE`, the result will always be `NA`. #' #' @return A numeric scalar or empty value, of the same type as `...`. #' @family stat #' @export #' #' @examples #' # other than `base::max()`, this function removes `NA`s by default #' max(1, NA_real_, 2) #' pal::safe_max(1, NA_real_, 2) #' #' # other than `base::max()`, this function does not return `-Inf` or `NA_character_` for #' # zero-length inputs #' max(NULL) #' max(integer()) #' pal::safe_max(NULL) #' pal::safe_max(integer()) #' #' # other than `base::max()`, this function fails for non-numeric inputs #' max("zero", 1L) #' max("zero", "one") #' max(character()) #' try(pal::safe_max("zero", 1L)) #' try(pal::safe_max("zero", "one")) #' try(pal::safe_max(character())) safe_max <- function(..., rm_na = TRUE) { input <- rlang::list2(...) purrr::map(input, \(x) checkmate::assert_numeric(x, typed.missing = TRUE, null.ok = TRUE, .var.name = "...")) input |> purrr::reduce(c) |> when(length(.) == 0L ~ .[0L], all(is.na(.)) && rm_na ~ .[NA], ~ max(., na.rm = rm_na)) }
safe_min
#' Minimum (safe) #' #' @description #' Modified version of [base::min()] that differs in the following ways: #' #' - `NA`s in the input (`...`) are ignored _by default_ (`rm_na = TRUE`). #' - If the input is of length zero, the output will also be of length zero (of the same type as the input). #' - It is ensured that all inputs are either numeric, of length zero or `NA`. The only case where the return value will be `-Inf` or `NA` is when the input #' contains only `-Inf` or `NA`. #' - `r pkgsnip::roxy_lbl("dyn_dots_support")` #' #' @inheritParams safe_max #' @param ... Numeric objects of which to determine the minimum. `r pkgsnip::roxy_lbl("dyn_dots_support")` #' #' @inherit safe_max return #' @family stat #' @export #' #' @examples #' # other than `base::min()`, this function removes `NA`s by default #' min(1, NA_real_, 2) #' pal::safe_min(1, NA_real_, 2) #' #' # other than `base::min()`, this function does not return `-Inf` or `NA_character_` for #' # zero-length inputs #' min(NULL) #' min(integer()) #' pal::safe_min(NULL) #' pal::safe_min(integer()) #' #' # other than `base::min()`, this function fails for non-numeric inputs #' min("zero", 1L) #' min("zero", "one") #' min(character()) #' try(pal::safe_min("zero", 1L)) #' try(pal::safe_min("zero", "one")) #' try(pal::safe_min(character())) safe_min <- function(..., rm_na = TRUE) { input <- rlang::list2(...) purrr::map(input, \(x) checkmate::assert_numeric(x, typed.missing = TRUE, null.ok = TRUE, .var.name = "...")) input |> purrr::reduce(c) |> when(length(.) == 0L ~ .[0L], all(is.na(.)) && rm_na ~ .[NA], ~ min(., na.rm = rm_na)) }
round_to
#' Round to any number #' #' Round a numeric vector to any number, rounded up by default (`round_up = TRUE`). #' #' This function's precision is limited to 15 significant digits in order to account for the [limits of R's floating point #' representation](https://stackoverflow.com/a/35349949/7196903). #' #' A computationally more efficient alternative would be the unexported `scales:::round_any()` which drives [scales::number()] – with the drawback that it lacks #' control of rounding up exact remainders of `accuracy / 2`, i.e. it _always_ rounds _off_. #' #' @param x Vector of numbers to round. #' @param to Number to round `x` to. A numeric scalar. #' @param round_up Whether to round a remainder of exactly `to / 2` _up_ or not. Set to `FALSE` in order to round _off_. #' #' @return A numeric vector of the same length as `x`. #' @seealso [prettyunits::pretty_round()] #' @family stat #' @export #' #' @examples #' vals = c(0.025, 0.1, 0.1999, 0.099999, 0.49, 0.55, 0.5, 0.9, 1) #' vals |> pal::round_to(to = 0.05) #' vals |> pal::round_to(to = 0.05, #' round_up = FALSE) round_to <- function(x, to = 0.2, round_up = TRUE) { checkmate::assert_number(to, lower = 0L, finite = TRUE) checkmate::assert_flag(round_up) result <- x %/% to remainder <- signif(x %% to, # round to a max of 15 significant digits to avoid exceeding floating-point representation limits digits = 15L) if (round_up) { which_round <- remainder >= (to / 2L) } else { which_round <- remainder > (to / 2L) } result[which_round] <- result[which_round] + 1L result * to }
stat_mode
#' Statistical mode #' #' Computes the [statistical mode](https://en.wikipedia.org/wiki/Mode_(statistics)) of a set of values. The mode is defined as the most frequent value, i.e. the #' value that is most likely to be sampled. #' #' @param x An \R object. #' @param type What the function should calculate. #' - `"one"`: Return _the_ mode of `x`. If multiple modes or no mode at all exists, `NA` is returned. #' - `"all"`: Return _all_ modes of `x`. If none exists (e.g. because all values of `x` are distinct), `NA` is returned. #' - `"n"`: Return the number of modes of `x`. #' @param rm_na Ignore missing values in `x`. A logical scalar. #' #' @return If `type = "n"`, the number of modes in `x` (an integer). Otherwise, the mode(s) of `x` or `NA` if none exist(s) (same type as `x`). #' @family stat #' @seealso The package [modeest](https://cran.r-project.org/package=modeest) for more powerful mode estimation functions. #' @export #' #' @examples #' pal::stat_mode(c(rep(3L, times = 3), 1:9)) #' pal::stat_mode(c(1.5, 4, 9.9)) #' #' # if no mode exists, `NA` (of the same type as x) is returned #' pal::stat_mode(letters) #' pal::stat_mode(c(letters, "a")) #' #' # if multiple modes exist, `NA` is returned by default #' pal::stat_mode(c(letters, "a", "b")) #' # set `type = "all"` to return all modes instead #' pal::stat_mode(c(letters, "a", "b"), #' type = "all") #' #' # `NA` is treated as any other value by default #' pal::stat_mode(c(letters, "a", NA_character_, NA_character_), #' type = "all") #' # set `rm_na = TRUE` to ignore `NA` values #' pal::stat_mode(c(letters, "a", NA_character_, NA_character_), #' type = "all", #' rm_na = TRUE) stat_mode <- function(x, type = c("one", "all", "n"), rm_na = FALSE) { x <- unlist(x) type <- rlang::arg_match(type) checkmate::assert_flag(rm_na) if (rm_na) x <- x[!is.na(x)] # get unique values u_x <- unique(x) n_u_x <- length(u_x) # get frequencies of all unique values frequencies <- tabulate(match(x, u_x)) modes <- frequencies == max(frequencies) # determine number of modes n_modes <- sum(modes) %>% ifelse(. == n_u_x, 0L, .) type |> when( # return the number of modes if requested . == "n" ~ n_modes, # or return mode(s) if requested and existing (. == "one" && n_modes == 1L) || (. == "all" && n_modes > 0L) ~ u_x[which(modes)], # else return `NA` (of the same type as `x`) ~ x[NA][1L] ) }
assert_cols
#' Assert data frame columns #' #' Asserts that a data frame contains the specified columns. #' #' @param data Data frame to check. #' @param cols Column names which must be present in `data`. A character vector or `NULL`. #' @param strict Whether additional columns not specified in `cols` are allowed in `data`. #' @param obj_name Name of the checked object to print in error messages. #' #' @return `data`, invisibly. #' @family df #' @export #' #' @examples #' pal::assert_cols(data = mtcars, #' cols = c("mpg", "disp")) #' #' try( #' pal::assert_cols(data = mtcars, #' cols = c("mpg", "display")) #' ) #' #' try( #' pal::assert_cols(data = mtcars, #' cols = c("mpg", "disp"), #' strict = TRUE) #' ) #' #' try( #' pal::assert_cols(data = mtcars, #' strict = TRUE) #' ) assert_cols <- function(data, cols = NULL, strict = FALSE, obj_name = checkmate::vname(data)) { checkmate::assert_character(cols, null.ok = TRUE) checkmate::assert_flag(strict) checkmate::assert_string(obj_name) checkmate::assert_data_frame(data, min.cols = length(cols)) # disallow any cols when `strict` without `cols` if (strict && is.null(cols)) { cols <- character() } checkmate::assert_names(colnames(data), must.include = if (strict) NULL else cols, identical.to = if (strict) cols else NULL, what = "column names", .var.name = obj_name) invisible(data) }
is_equal_df
#' Test if two data frames/tibbles are equal #' #' Compares two [data frames][base::data.frame()]/[tibbles][tibble::tbl_df] (or two objects coercible to tibbles like [matrices][base::matrix()]), optionally #' ignoring row and column ordering, and returns `TRUE` if both are equal, or `FALSE` otherwise. If the latter is the case and `quiet = FALSE`, information #' about detected differences is printed to the console. #' #' Under the hood, this function relies on [waldo::compare()]. #' #' @inheritParams waldo::compare #' @param x The data frame / tibble to check for changes. #' @param y The data frame / tibble that `x` should be checked against, i.e. the reference, so messages describe how `x` is different to `y`. #' @param ignore_col_order Whether or not to ignore the order of columns. #' @param ignore_row_order Whether or not to ignore the order of rows. #' @param ignore_col_types Whether or not to distinguish similar column types. Currently, if set to `TRUE`, this will convert factors to characters and integers #' to doubles before the comparison. #' @param quiet Whether or not to output detected differences between `x` and `y` to the console. #' @param max_diffs Maximum number of differences shown. Only relevant if `quiet = FALSE` or `return_waldo_compare = TRUE`. Set `max_diffs = Inf` to see all #' differences. #' @param return_waldo_compare Whether to return a character vector of class [`waldo_compare`][waldo::compare] describing the differences between `x` and `y` #' instead of `TRUE` or `FALSE`. #' #' @return If `return_waldo_compare = FALSE`, a logical scalar indicating the result of the comparison. Otherwise a character vector of class #' [`waldo_compare`][waldo::compare] describing the differences between `x` and `y`. #' @family df #' @export #' #' @examples #' scramble <- function(x) x[sample(nrow(x)), sample(ncol(x))] #' #' # by default, ordering of rows and columns matters... #' pal::is_equal_df(x = mtcars, #' y = scramble(mtcars)) #' #' # ...but those can be ignored if desired #' pal::is_equal_df(x = mtcars, #' y = scramble(mtcars), #' ignore_col_order = TRUE) #' pal::is_equal_df(x = mtcars, #' y = scramble(mtcars), #' ignore_row_order = TRUE) #' #' # by default, `is_equal_df()` is sensitive to column type differences... #' df1 <- data.frame(x = "a", #' stringsAsFactors = FALSE) #' df2 <- data.frame(x = factor("a")) #' pal::is_equal_df(df1, df2) #' #' # ...but you can request it to not make a difference between similar types #' pal::is_equal_df(df1, df2, #' ignore_col_types = TRUE) is_equal_df <- function(x, y, ignore_col_order = FALSE, ignore_row_order = FALSE, ignore_col_types = FALSE, tolerance = NULL, quiet = TRUE, max_diffs = 10L, return_waldo_compare = FALSE) { rlang::check_installed("waldo", reason = reason_pkg_required()) checkmate::assert_flag(ignore_col_order) checkmate::assert_flag(ignore_row_order) checkmate::assert_flag(ignore_col_types) checkmate::assert_number(tolerance, lower = 0.0, null.ok = TRUE) checkmate::assert_flag(quiet) checkmate::assert_number(max_diffs, lower = 1.0) checkmate::assert_flag(return_waldo_compare) # convert `x` and `y` to tibble if any modification is required if (ignore_col_order || ignore_row_order || ignore_col_types) { x %<>% tibble::as_tibble() y %<>% tibble::as_tibble() } # sort columns if necessary if (ignore_col_order) { x %<>% dplyr::select(sort(colnames(.))) y %<>% dplyr::select(sort(colnames(.))) } # sort rows if necessary if (ignore_row_order) { x %<>% dplyr::arrange(dplyr::across(.cols = everything())) y %<>% dplyr::arrange(dplyr::across(.cols = everything())) } # harmonize column types if necessary if (ignore_col_types) { x %<>% dplyr::mutate(dplyr::across(.cols = where(is.factor), .fns = as.character), dplyr::across(.cols = where(is.integer), .fns = as.double)) y %<>% dplyr::mutate(dplyr::across(.cols = where(is.factor), .fns = as.character), dplyr::across(.cols = where(is.integer), .fns = as.double)) } result <- waldo::compare(x = x, y = y, tolerance = tolerance, x_arg = "x", y_arg = "y", max_diffs = max_diffs) if (length(result) > 0L) { if (!quiet) { cli::cli_alert_info(text = "`x` differs from `y`:") cat("\n") print(result) } if (!return_waldo_compare) result <- FALSE } else if (!return_waldo_compare) { result <- TRUE } result }
reduce_df_list
TODO: Add an example where this fn is useful.
#' Reduce a nested list of data frames / tibbles to a single tibble #' #' Recursively reduces a nested list containing data frames / tibbles at its leafs to a single tibble. #' #' @param x A list containing data frames / tibbles at its leafs. #' @param strict Ensure `x` contains data frames / tibbles only and throw an error otherwise. If `FALSE`, leafs containing other objects are ignored (skipped). #' #' @return `r pkgsnip::return_lbl("tibble")` #' @family df #' @export reduce_df_list <- function(x, strict = TRUE) { checkmate::assert_flag(strict) if (is.data.frame(x) || tibble::is_tibble(x)) { result <- x } else if (purrr::pluck_depth(x) < 2L) { if (strict) { cli::cli_abort("At least one element of the list to be reduced is not a data frame / tibble!") } else { result <- NULL } } else { result <- x |> purrr::map(reduce_df_list, strict = strict) |> purrr::list_rbind() } result }
as_flat_list
DEPRECATED!
TODO:
This fn is ill-fated -> get rid of it (first of its usage)!
keep_attrs
does not work as one would expected, e.g. pal::as_flat_list(mtcars, keep_attrs = FALSE)
Behaviour with named (sub)lists like the following one is counter-intuitive:
r
fokus::raw_qstnr_suppl_election(ballot_date = ballot_date,
lvl = 'cantonal',
canton = canton,
prcd = "proportional",
election_nr = election_nr) |>
_$party |>
dplyr::first() |>
pal::as_flat_list() |>
str()
When setting keep_attrs = FALSE
(names
is an attribute), the list is actually flattened (but parent names lost). Ideal would be to have a param like
concatenate_names = FALSE
which could be used to vary this behaviour.
#' Convert to a flat list #' #' @description #' _Recursively_ flattens a list. Unlike the similar `unlist()`, it #' #' - always returns a list, i.e. wraps `x` in a list if necessary, and will never remove the last list level. Thus it is #' [type-safe](https://en.wikipedia.org/wiki/Type_safety). #' #' - won't treat any of the list leafs specially (like `unlist()` does with factors). Thus leaf values will never be modified. #' #' - removes list names. `unlist()` concatenates nested names (separated by a dot). #' #' @param x `r pkgsnip::param_lbl("r_obj")` #' @param keep_attrs Keep [attributes][base::attr()] (and thereby retain list structure of custom objects). A logical scalar. #' @param attrs_to_drop Attribute names which should never be kept. Only relevant if `keep_attrs = TRUE`. A character vector. #' #' @return A [list][base::list()]. #' @family list #' @export #' #' @examples #' library(magrittr) #' #' nested_list <- list(1:3, list("foo", list("bar"))) %T>% str() #' #' # unlike `unlist()` which also removes the last list tier in many cases... #' unlist("foobar") #' unlist(nested_list) |> str() #' # ...this function always returns an (unnested) list #' pal::as_flat_list("foobar") |> str() #' pal::as_flat_list(nested_list) |> str() #' #' nested_list <- list(list(factor("a"), factor("b")), factor("c")) %T>% str() #' #' # unlike `unlist()` which combines factors... #' unlist(nested_list) |> str() #' # ...this function does not modify the list elements #' pal::as_flat_list(nested_list) |> str() #' #' nested_list <- #' list(c(list(1L), list(tibble::tibble(a = list(1.1, "2")))), #' list(tibble::as_tibble(mtcars[1:2, ]))) %T>% #' str() #' nested_list_2 <- list(1:3, xfun::strict_list(list(list("buried deep")))) %T>% str() #' #' # by default, attributes and thus custom objects (except `xfun_strict_list`) are retained, i.e. #' # not flattened... #' pal::as_flat_list(nested_list) |> str() #' pal::as_flat_list(nested_list_2) |> str() #' # ...but you can drop them and thereby flatten custom objects if needed... #' pal::as_flat_list(nested_list, keep_attrs = FALSE) |> str() #' # ...or retain `xfun_strict_list`s, too #' pal::as_flat_list(nested_list_2, attrs_to_drop = NULL) |> str() as_flat_list <- function(x, keep_attrs = TRUE, attrs_to_drop = "xfun_strict_list") { checkmate::assert_flag(keep_attrs) checkmate::assert_character(attrs_to_drop, any.missing = FALSE, null.ok = TRUE) regard_attrs <- keep_attrs && length(setdiff(attributes(x), attrs_to_drop)) > 0L depth <- purrr::pluck_depth(x) # wrap `x` in a list if it's not if (regard_attrs || (!rlang::is_bare_list(x) && depth < 2L)) { result <- list(x) # return `x` as-is if it is an unnested list } else if (depth < 3L) { result <- x # flatten the two last list levels (keeping attributes if requested) } else if (depth < 4L) { result <- if (keep_attrs) rm_list_lvl(x, attrs_to_drop = attrs_to_drop) else unname(purrr::list_flatten(as.list(x))) } else { # recursively feed the elements of `x` to this function and flatten the two last list levels (keeping attributes if requested) result <- x |> purrr::map(\(x) as_flat_list(x = x, keep_attrs = keep_attrs, attrs_to_drop = attrs_to_drop)) |> when(keep_attrs ~ rm_list_lvl(., attrs_to_drop = attrs_to_drop), ~ unname(purrr::list_flatten(.))) } result } rm_list_lvl <- function(x, attrs_to_drop = "xfun_strict_list") { checkmate::assert_list(x) result <- list() for (i in seq_along(x)) { regard_attrs <- length(setdiff(attributes(x[[i]]), attrs_to_drop)) > 0L if (!regard_attrs && purrr::pluck_depth(x[[i]]) > 1L) { result %<>% c(x[[i]]) } else { result %<>% c(list(x[[i]])) } } result }
as_chr
#' Convert to a character vector #' #' _Recursively_ applies [base::as.character()] to its inputs. #' #' @param ... \R objects to be converted to a character vector. `r pkgsnip::roxy_lbl("dyn_dots_support")` #' @param use_names Whether or not to preserve names by [base::unlist()]. #' #' @return A character vector. #' @family string #' @export #' #' @examples #' library(magrittr) #' #' to_convert <- #' list(tibble::tibble(a = 1:3), "A", factor("wonderful"), xfun::strict_list("day")) %T>% #' print() #' #' as.character(to_convert) #' pal::as_chr(!!!to_convert) as_chr <- function(..., use_names = FALSE) { rlang::list2(...) |> purrr::modify_tree(leaf = as.character, is_node = is.list) |> unlist(use.names = use_names) }
escape_lf
#' Escape line feeds / newlines #' #' Escapes the [POSIX-standard newline control character `LF`](https://en.wikipedia.org/wiki/Newline) (aka `\n`) which is the standard on Unix/Linux and recent #' versions of macOS. Set `escape_cr = TRUE` in order to also escape the carriage return character `CR` (aka `\r`) commonly used on Microsoft Windows. #' #' @param x A character vector. #' @param escape_cr Whether or not to also escape the carriage return character `CR` (aka `\r`). A logical scalar. #' #' @return A character vector of the same length as `x`. #' @family string #' @export #' #' @examples #' library(magrittr) #' #' # read in and print RStudio add-in registration file as-is #' text <- #' fs::path_package(package = "pal", #' "rstudio", "addins.dcf") |> #' readr::read_file() %T>% #' pal::cat_lines() #' #' # escape newlines and print again #' pal::escape_lf(text) |> pal::cat_lines() escape_lf <- function(x, escape_cr = FALSE) { checkmate::assert_character(x, null.ok = TRUE) checkmate::assert_flag(escape_cr) x %<>% stringr::str_replace_all(pattern = "\\n", replacement = "\\\\n") if (escape_cr) { x %<>% stringr::str_replace_all(pattern = "\\r", replacement = "\\\\r") } x }
prettify_nr
TODO:
phrase_nr()
?#' Prettify a numeric vector #' #' Prettifies a numeric vector by rounding, separating thousands and optionally other procedures. Basically a convenience wrapper around [round_to()] and #' [base::format()]. #' #' @inheritParams round_to #' @param x A numeric vector to prettify. #' @param round_to Number to round `x` to. A numeric scalar. #' @param big_mark Character used between every 3 digits to separate thousands. #' @param decimal_mark Character used to indicate the numeric decimal point. Only relevant if `x` does not solely consist of integers. #' @param justify_right Whether to right-justify the results to a common width. See the `trim` parameter of [base::format()] for details. #' @param ... Further arguments passed on to [base::format()]. #' #' @return A character vector of the same length as `x`. #' @family string #' @export #' #' @examples #' c(0.11, 11111.11) |> pal::prettify_nr() #' #' c(0.11, 11111.11) |> #' pal::prettify_nr(justify_right = TRUE) |> #' pal::cat_lines() prettify_nr <- function(x, round_to = 0.1, round_up = TRUE, big_mark = "'", decimal_mark = ".", justify_right = FALSE, ...) { round_to(x = x, to = round_to, round_up = round_up) |> format(big.mark = big_mark, decimal.mark = decimal_mark, trim = !justify_right, ... = ...) }
sentenceify
#' Convert to sentence case with trailing punctuation mark #' #' Converts the input to a character vector and ensures it starts with an upper case letter and ends with the specified punctuation mark. #' #' Note that this function doesn't alter any characters in `x` other than the first and the last one. #' #' @param x Input to be converted to [sentence case](https://en.wikipedia.org/wiki/Letter_case#Case_styles), typically a character vector. #' @param punctuation_mark Punctuation mark to be appended to `x`. A character scalar. #' #' @inherit capitalize_first return #' @family string #' @seealso #' [stringr::str_to_sentence()] to convert a character vector to all lowercase except for the first character. Note that this also includes lowercasing [proper #' nouns](https://en.wikipedia.org/wiki/Proper_and_common_nouns), [abbreviations](https://en.wikipedia.org/wiki/Abbreviation) etc. #' #' [snakecase::to_sentence_case()] that builds upon [stringr::str_to_sentence()] but offers additional options to finetune the conversion. Note that #' `abbreviations` have to be manually specified in order to be preserved in upper case. #' @export #' #' @examples #' pal::sentenceify("no verb, no sentence") #' #' # punctuation mark won't be duplicated if already existing #' pal::sentenceify(c("I've made my point", #' "good point.")) sentenceify <- function(x, punctuation_mark = ".") { checkmate::assert_string(punctuation_mark) capitalize_first(x) |> purrr::map_chr(\(x) { if (is.na(x) || stringr::str_sub(string = x, start = -1L) == punctuation_mark) { x } else { paste0(x, punctuation_mark) } }) }
capitalize_first
#' Capitalize first letter #' #' Converts the input to a character vector, with the first letter of each element uppercased. #' #' @param x Input of which to capitalize the first letter, typically a character vector. #' #' @return A character vector of the same length as `x`. #' @family string #' @export #' #' @examples #' pal::capitalize_first(c("one", "Two", "tHREE")) capitalize_first <- function(x) { stringr::str_replace(string = x, pattern = "^.", replacement = toupper) }
wrap_chr
#' Wrap character in character #' #' Wraps a character vector `x` in another character vector `wrap` (by default the string `"`). #' #' @param x A character vector or something coercible to. Will be fed to [as_chr()] before wrapping. #' @param wrap Character sequence `x` is to be wrapped in. A character vector or something coercible to. #' #' @return A character vector of the same length as `wrap`. #' @family string #' @export #' #' @examples #' mtcars |> #' colnames() |> #' pal::wrap_chr(wrap = "`") |> #' magrittr::set_colnames(x = mtcars) |> #' pal::pipe_table() wrap_chr <- function(x, wrap = "\"") { paste0(wrap, as_chr(x), wrap) }
as_line_feed_chr
#' Convert control character sequence name to actual character sequence #' #' @param eol `r pkgsnip::param_lbl("eol")` #' #' @return A character scalar. #' @family string #' @export as_line_feed_chr <- function(eol = c("LF", "CRLF", "CR", "LFCR")) { switch(EXPR = rlang::arg_match(eol), LF = "\n", CRLF = "\r\n", CR = "\r", LFCR = "\n\r") }
dsv_colnames
#' Get column names of a delimiter-separated string #' #' Returns the column names of a string in a [delimiter-separated-value](https://en.wikipedia.org/wiki/Delimiter-separated_values) format like #' [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) or [TSV](https://en.wikipedia.org/wiki/Tab-separated_values). #' #' @param x Delimiter-separated string. A character scalar. #' @param delim Single character used to separate fields within `x`. #' @param quote Single character used to quote strings within `x`. Set to `NULL` for none. #' #' @return A character vector of column names. #' @family string #' @export #' #' @examples #' "https://raw.githubusercontent.com/tidyverse/readr/master/inst/extdata/mtcars.csv" |> #' httr2::request() |> #' httr2::req_perform() |> #' httr2::resp_body_string() |> #' pal::dsv_colnames() dsv_colnames <- function(x, delim = ",", quote = "\"") { checkmate::assert_string(delim, min.chars = 1L, pattern = "^.$") checkmate::assert_string(quote, null.ok = TRUE, pattern = "^.$") x |> # extract first line regexpr(pattern = "[\r\n]") |> magrittr::subtract(1L) |> substr(x = x, start = 1L, stop = _) |> stringr::str_split_1(pattern = delim) |> stringr::str_remove_all(pattern = glue::glue("^{quote}|{quote}$")) }
as_str
#' Convert to a character scalar (aka string) #' #' Similar to [`paste0(..., collapse = "")`][base::paste0()], but _recursively_ converts its inputs to type character. #' #' @param ... \R objects to be assembled to a single string. `r pkgsnip::roxy_lbl("dyn_dots_support")` #' @param sep Separator to delimit `...`. Defaults to none (`""`). #' @param rm_na Exclude missing values. If `FALSE`, missing values will be represented as `"NA"` in the resulting string. #' #' @return A character scalar. #' @family string #' @export #' #' @examples #' library(magrittr) #' #' input <- #' paste0(2:4, #' collapse = ", ") |> #' purrr::map(rep, #' times = 20) %>% #' list(c("This is a glut of ", "meaningless numbers: "), .) #' #' # while this just converts `input` in a lazy way... #' paste0(input, #' collapse = "") #' #' # ...this one works harder #' pal::as_str(input) as_str <- function(..., sep = "", rm_na = FALSE) { checkmate::assert_flag(rm_na) result <- as_chr(...) if (rm_na) { result %<>% magrittr::extract(!is.na(.)) } if (length(result) > 0L) { result %<>% paste0(collapse = sep) } result }
as_comment_str
#' Assemble an (R) comment string of the desired line width #' #' Takes a vector of paragraphs, wraps them at the specified line width, prefixes them with comment markers and returns the result as a single string. #' #' @param ... Comment lines. A character vector or something coercible to. #' @param line_width Maximum character width at which to wrap lines. A positive integerish scalar. #' @param comment_prefix Character sequence that indicates the start of a comment. A character scalar. #' @param sep_paragraphs Whether or not to separate paragraphs with an empty comment line. #' #' @return A character scalar. #' @family string #' @export #' #' @examples #' pal::as_comment_str(glue::glue("Copyright (C) {format(Sys.Date(), '%Y')} Santa Clause"), #' "No presents without code.") |> #' cat() #' #' # wrap lines at 20 chars #' pal::as_comment_str(glue::glue("Copyright (C) {format(Sys.Date(), '%Y')} Santa Clause"), #' "No presents without code.", #' line_width = 20L) |> #' cat() #' #' # disable empty comment lines between paragraphs: #' pal::as_comment_str(glue::glue("Copyright (C) {format(Sys.Date(), '%Y')} Santa Clause"), #' "Hohoho.", #' sep_paragraphs = FALSE) |> #' cat() as_comment_str <- function(..., line_width = 160L, comment_prefix = "# ", sep_paragraphs = TRUE) { checkmate::assert_count(line_width) checkmate::assert_string(comment_prefix) checkmate::assert_flag(sep_paragraphs) as_chr(...) |> stringr::str_wrap(width = line_width - nchar(comment_prefix)) |> stringr::str_split(pattern = stringr::fixed("\n")) |> purrr::map(\(x) paste0(paste0(comment_prefix, x), collapse = "\n")) %>% purrr::map2_chr(.y = seq_along(.), .f = \(x, y) { if (sep_paragraphs && y < length(.)) paste0(x, "\n", comment_prefix, "\n") else paste0(x, "\n") }) |> paste0(collapse = "") }
enum_str
#' Enumerate vector's elements as string #' #' Takes a vector or list and enumerates its elements in a single string. Convenience function combining [wrap_chr()] and [cli::ansi_collapse()] with slightly #' differing defaults (`last` defaults to the value of `sep2`). #' #' @inheritParams cli::ansi_collapse #' @inheritParams wrap_chr #' #' @return A character scalar. #' @family string #' @export #' #' @examples #' # by default, `last` defaults to `sep2` #' pal::enum_str(1:2) #' pal::enum_str(1:3) #' #' # input is optionally wrapped in a character sequence #' pal::enum_str(letters[1:3], #' wrap = "`") enum_str <- function(x, sep = ", ", sep2 = " and ", last = sep2, trunc = Inf, width = Inf, ellipsis = cli::symbol$ellipsis, style = c("both-ends", "head"), wrap = "") { wrap_chr(x = x, wrap = wrap) |> cli::ansi_collapse(sep = sep, sep2 = sep2, last = last, trunc = trunc, width = width, ellipsis = ellipsis, style = style) }
fuse_regex
TODO:
[ ] Deprecate fuse_regex(...)
in favor of rex::rex(or(...))
?
[ ] Incorporate fuzzify_regex()
and diacritify_regex
from FA or even make a separate pkg, say rgx
#' Fuse regular expressions #' #' Combines a vector or list of regular expressions to a single one (by logical OR). #' #' @param ... Regular expressions. All elements will be converted to type character before fusing. `r pkgsnip::roxy_lbl("dyn_dots_support")` #' @param .literal Whether or not the input should be interpreted *literally* instead of as regular expressions. If `TRUE`, all elements of `...` are wrapped in #' the literal escape sequences `\Q` and `\E`. #' #' @return A character scalar. #' @seealso The [rex][rex::rex] package which provides an intuitive framework to build complex regular expressions. #' @family string #' @export #' #' @examples #' # perform some (nonsense) Jane Austen text extraction #' regex <- c("My dear Jane", #' "make haste, ", #' "(?i)\\bevil") #' #' readr::read_lines(file = "https://www.gutenberg.org/cache/epub/1342/pg1342.txt") |> #' stringr::str_subset(pattern = pal::fuse_regex(regex)) fuse_regex <- function(..., .literal = FALSE) { checkmate::assert_flag(.literal) result <- as_chr(...) if (.literal) { result %<>% paste0("\\Q", ., "\\E") } result %<>% paste0(collapse = "|") dots <- rlang::list2(...) if (length(dots) > 1L || length(dots[[1L]]) > 1L) { result %<>% paste0("(", ., ")") } result }
Functions to work with filesystem paths.
path_mod_time
#' Get path modification time #' #' Determine a file or directory's modification time. Other than [base::file.mtime()], this function is based on [fs::file_info()] and the datetime of #' modification is returned in [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) by default. #' #' @inheritParams fs::file_info #' @param tz Timezone to return the result in. A character scalar where `""` means the current time zone. #' #' @return `r pkgsnip::return_lbl("datetime")` #' @family path #' @export #' #' @examples #' fs::path_package(package = "dplyr", #' "DESCRIPTION") |> #' pal::path_mod_time() path_mod_time <- function(path, follow = FALSE, tz = "UTC") { checkmate::assert_string(tz) as.POSIXct(fs::file_info(path = path, follow = follow)$modification_time, tz = tz) }
flatten_path_tree
#' Flatten path tree #' #' Flattens a hierarchical list of path elements into a character vector of full paths. #' #' @param path_tree Directory tree. A nested list of named lists and character vectors. #' @param parent_path Optional parent path of `path_tree`. A character scalar. #' #' @return A character vector. #' @family path #' @export #' #' @examples #' list("root_dir" = list("subdir1", #' "subdir2" = list("file1.ext"), #' "file2.ext")) |> #' pal::flatten_path_tree() |> #' pal::cat_lines() flatten_path_tree <- function(path_tree, parent_path = NULL) { if (purrr::pluck_depth(path_tree) > 1L) { result <- rlang::names2(path_tree) |> purrr::map_chr(\(x) fs::path_join(c(parent_path, x))) |> purrr::map2(.x = path_tree, .f = flatten_path_tree) |> purrr::list_c(ptype = character()) %>% c(parent_path, .) } else { result <- fs::path_join(c(parent_path, path_tree)) } fs::path(result) }
draw_path_tree
NOTES:
fs::dir_tree()
which uses Unicode box drawing
characters to draw a directory tree.#' Draw path tree #' #' Pretty-prints a character vector of hierarchical paths as a tree. Uses [Unicode box drawing #' characters](https://en.wikipedia.org/wiki/Box-drawing_character#Box_Drawing) to draw the nesting structure. #' #' This function is the equivalent of [fs::dir_tree()] for an artificial/fictional path hierarchy. To print a tree of an actual filesystem path hierarchy, it's #' recommended to rely on `fs::dir_tree()` which additionally colorizes the output based on real filesystem information. Apart from that, the algorithm to draw #' the tree is the same. #' #' @param paths A character vector of paths as returned by [flatten_path_tree()] or [fs::dir_ls()]. #' @param quiet Whether or not to suppress drawing the directory tree. #' #' @return `paths`, invisibly. #' @family path #' @export #' #' @examples #' # using an artificial/fictional path hierarchy #' list("root_dir" = list("subdir1", #' "subdir2" = list("file1.ext"), #' "file2.ext")) |> #' pal::flatten_path_tree() |> #' pal::draw_path_tree() #' #' # using an actual path hierarchy #' fs::path_package("dplyr") |> #' fs::dir_ls(recurse = TRUE) |> #' pal::draw_path_tree() #' #' # to get colorized output, use `fs::dir_tree()` instead #' fs::path_package("dplyr") |> fs::dir_tree() draw_path_tree <- function(paths, quiet = FALSE) { checkmate::assert_character(paths, any.missing = FALSE, min.len = 1L) checkmate::assert_flag(quiet) has_leading_root <- paths |> fs::path_split() |> purrr::map_chr(purrr::chuck, 1L) |> magrittr::equals(paths[1L]) |> all() leaf_paths <- paths # remove dir root to get same result structure as `fs::dir_ls()` if (has_leading_root) leaf_paths %<>% .[-1L] by_dir <- split(leaf_paths, fs::path_dir(leaf_paths)) box_chars <- list(h = "\u2500", v = "\u2502", l = "\u2514", j = "\u251c") print_leaf <- function(x, indent) { leafs <- by_dir[[x]] for (i in seq_along(leafs)) { if (i == length(leafs)) { cat(indent, paste0(box_chars$l, box_chars$h, box_chars$h, " ", collapse = ""), fs::path_file(leafs[[i]]), "\n", sep = "") print_leaf(x = leafs[[i]], indent = paste0(indent, " ")) } else { cat(indent, paste0(box_chars$j, box_chars$h, box_chars$h, " ", collapse = ""), fs::path_file(leafs[[i]]), "\n", sep = "") print_leaf(x = leafs[[i]], indent = paste0(indent, paste0(box_chars$v, " ", collapse = ""))) } } } if (!quiet) { dir_root <- ifelse(has_leading_root, paths[1L], by_dir |> names() %>% magrittr::extract(which.min(nchar(.)))) # print dir root cat(dir_root, "\n", sep = "") # print leafs print_leaf(fs::path_expand(dir_root), "") } invisible(paths) }
Extending rlang's check dots functions, making the use of R's ...
argument
placeholder yet another bit safer.
check_dots_named
NOTES:
Code to suggest proper dots params largely borrowed from
rlang:::stop_arg_match()
(MIT-licensed).
methods::formalArgs(args(FN))
is a workaround to get formal args from both normal fns as well as
primitives.
Function has been proposed upstream in PR #35, but there has been no reaction. Meanwhile the check_dots_*
functions have been integrated into rlang, maybe propose it there?
#' Check that all named dots arguments are valid #' #' @description #' `r lifecycle::badge("experimental")` #' #' Ensures that [dots][base::dots()] `...` are either empty (if `.empty_ok = TRUE`), or all named elements in dots are a valid subset of `.fn`'s parameter #' names. In case of an invalid or `.forbidden` argument, an informative message is shown and the defined `.action` is taken. #' #' @details #' `check_dots_named()` is intended to combat the second one of the two major downsides that using `...` usually brings. In chapter 6.6 of the book #' _Advanced R_ it is [phrased](https://adv-r.hadley.nz/functions.html#fun-dot-dot-dot) as follows: #' #' _Using `...` comes with two downsides:_ #' #' - _When you use it to pass arguments to another function, you have to carefully explain to the user where those arguments go. This makes it hard to #' understand what you can do with functions like `lapply()` and `plot()`._ #' #' - **_A misspelled argument will not raise an error. This makes it easy for typos to go unnoticed._** #' #' @param ... Dots argument to check. #' @param .fn Function the `...` will be passed on to. #' @param .additional Parameter names within `...` that should be treated as valid in addition to `.fn`'s actual parameter names. A character vector. #' @param .forbidden Parameter names within `...` that should be treated as invalid. This has precedence over `.additional`. A character vector. #' @param .empty_ok Set to `TRUE` if empty `...` should be allowed, or to `FALSE` otherwise. #' @param .action Action to take when the check fails. One of `r as_md_vals(c("abort", "warn", "inform")) |> enum_str(sep2 = " or ")` #' #' @seealso rlang's check dots functions: [rlang::check_dots_empty()], [rlang::check_dots_used()] and [rlang::check_dots_unnamed()] #' @family dots #' @export #' #' @examples #' # We can use `check_dots_named()` to address this second downside: #' sum_safe <- function(..., #' na.rm = FALSE) { #' pal::check_dots_named(..., #' .fn = sum) #' sum(..., #' na.rm = na.rm) #' } #' #' # Note how the misspelled `na_rm` (instead of `na.rm`) silently gets ignored in the original #' # function, #' sum(1, 2, NA, na_rm = TRUE) #' #' # whereas our safe version properly errors: #' try( #' sum_safe(1, 2, NA, na_rm = TRUE) #' ) #' #' # We can even build an `sapply()` function that fails "intelligently": #' sapply_safe <- function(X, #' FUN, #' ..., #' simplify = TRUE, #' USE.NAMES = TRUE) { #' pal::check_dots_named(..., #' .fn = FUN) #' sapply(X = X, #' FUN = FUN, #' ..., #' simplify = TRUE, #' USE.NAMES = TRUE) #' } #' #' # While the original `sapply()` silently consumes misspelled named arguments via `...`, #' sapply(1:5, paste, "hour workdays", sep = "-", colaspe = " ") #' #' # `sapply_safe()` will throw an informative error message: #' try( #' sapply_safe(1:5, paste, "hour workdays", sep = "-", colaspe = " ") #' ) #' #' # But be aware that `check_dots_named()` might be a bit rash, #' try( #' sum_safe(a = 1, b = 2) #' ) #' #' # while the original function actually has nothing to complain: #' sum(a = 1, b = 2) #' #' # Furthermore, it doesn't play nicely with generics that don't expose all of the argument names #' # of the method that is eventually invoked (`to` and `by` in the case of `seq()` -> `seq.int()`): #' try( #' sapply_safe(X = c(0,50), #' FUN = seq, #' to = 100, #' by = 5) #' ) #' #' # To work around this, directly supply the proper method (`seq.int`), #' sapply_safe(X = c(0,50), #' FUN = seq.int, #' to = 100, #' by = 5) #' #' # or just provide `to` and `by` *unnamed*: #' sapply_safe(X = c(0,50), #' FUN = seq, #' 100, #' 5) check_dots_named <- function(..., .fn, .additional = NULL, .forbidden = NULL, .empty_ok = TRUE, .action = c("abort", "warn", "inform")) { checkmate::assert_function(.fn) checkmate::assert_character(.additional, any.missing = FALSE, null.ok = TRUE) checkmate::assert_character(.forbidden, any.missing = FALSE, null.ok = TRUE) checkmate::assert_flag(.empty_ok) .action <- rlang::arg_match(.action) if (...length() > 0L) { # determine original function name the `...` will be passed on to fn_arg_name <- deparse1(substitute(.fn)) parent_call <- as.list(sys.call(-1L)) parent_arg_names <- methods::formalArgs(args(sys.function(-1L))) if (fn_arg_name %in% parent_arg_names) { fn_name <- deparse1(parent_call[which(parent_arg_names == fn_arg_name) + 1L][[1L]]) } else { fn_name <- fn_arg_name } # determine arg names of the function the `...` will be passed on to dots_arg_names <- methods::formalArgs(args(.fn)) # check named `...` args purrr::walk(.x = setdiff(...names(), ""), .f = check_dot_named, dots_arg_names = dots_arg_names, allowed_arg_names = setdiff(union(dots_arg_names, .additional), .forbidden), fn_name = fn_name, action = .action) } else if (!.empty_ok) { msg <- "{.arg ...} must be provided (!= NULL)." switch(EXPR = .action, abort = cli::cli_abort(msg), warn = cli::cli_warn(msg), inform = cli::cli_inform(msg)) } } check_dot_named <- function(dot, dots_arg_names, allowed_arg_names, fn_name, action) { # The following code is largely borrowed from `rlang::arg_match()` allowed_arg_names_excl_dots <- setdiff(allowed_arg_names, '...') i <- match(dot, allowed_arg_names) if (rlang::is_na(i)) { is_forbidden <- dot %in% dots_arg_names is_restricted <- !setequal(dots_arg_names, allowed_arg_names) msg <- paste0(ifelse(is_forbidden, "Forbidden", "Invalid"), " named argument provided in {.arg ...}: {.arg {dot}}") if (length(allowed_arg_names_excl_dots) > 0L) { msg %<>% c(i = paste0(ifelse(is_restricted, "Named arguments allowed to pass on to ", "Valid named arguments for "), "{.fun {fn_name}} include: {.arg {allowed_arg_names_excl_dots}}")) } else { msg %<>% c(i = paste0("Only unnamed arguments are ", ifelse(is_restricted, "allowed", "valid"), " for {.fun {fn_name}}.")) } i_partial <- pmatch(dot, allowed_arg_names_excl_dots) candidate <- NULL if (!rlang::is_na(i_partial)) { candidate <- allowed_arg_names_excl_dots[[i_partial]] } i_close <- utils::adist(dot, allowed_arg_names_excl_dots) / nchar(allowed_arg_names_excl_dots) if (any(i_close <= 0.5)) { candidate <- allowed_arg_names_excl_dots[[which.min(i_close)]] } if (!is.null(candidate)) { msg %<>% c(">" = "Did you mean {.arg {candidate}}?") } switch(EXPR = action, abort = cli::cli_abort(msg), warn = cli::cli_warn(msg), inform = cli::cli_inform(msg)) } }
ls_pkg
#' List a subset of all installed packages #' #' @param pkg A character vector of package names. #' @param ignore_case Do not distinguish between upper and lower case letters in `pkg`. If `FALSE`, `pkg` is treated case-sensitive. #' @param as_regex Interpret `pkg` as regular expression(s). If `FALSE`, `pkg` is interpreted literally. #' #' @return A [tibble][tibble::tbl_df]. #' @family rpkgs #' @export #' #' @examples #' pal::ls_pkg(pkg = c("methods", "utils")) #' #' pal::ls_pkg(pkg = "^ut", #' as_regex = TRUE) ls_pkg <- function(pkg, ignore_case = TRUE, as_regex = FALSE) { checkmate::assert_character(pkg, any.missing = FALSE, min.chars = 1L) checkmate::assert_flag(ignore_case) checkmate::assert_flag(as_regex) regex <- pkg if (!as_regex) { regex <- paste0("\\Q", regex, "\\E") } regex %<>% fuse_regex() if (ignore_case) { regex <- paste0("(?i)", regex) } if (!as_regex) { regex <- paste0("^", regex, "$") } utils::installed.packages() |> tibble::as_tibble() |> dplyr::filter(stringr::str_detect(string = Package, pattern = regex)) }
use_pkg
#' Depend on another package #' #' Wrapper around [usethis::use_package()] with different defaults and optional `DESCRIPTION` file [tidying][usethis::use_tidy_description]. #' #' @inheritParams usethis::use_package #' @param tidy Whether or not to run [usethis::use_tidy_description()] after adding `package` to dependencies. #' #' @return `NULL`, invisibly. #' @family rpkgs #' @export use_pkg <- function(package, type = "Imports", min_version = TRUE, tidy = TRUE) { checkmate::assert_flag(tidy) rlang::check_installed("usethis", reason = reason_pkg_required()) # TODO: submit PR upstream for this if (isFALSE(min_version)) min_version <- NULL usethis::use_package(package = package, type = type, min_version = min_version) if (tidy) { usethis::use_tidy_description() } }
is_pkg_installed
Part of this function was originally proposed by Artem Klevtsov on Stack Overflow.
#' Test if packages are installed #' #' Returns `TRUE` or `FALSE` for each `pkg`, depending on whether the `pkg` is installed on the current system or not, optionally ensuring a `min_version`. #' #' In contrast to [base::require()], this function checks if the packages are installed without attaching their namespaces if so. #' #' In contrast to [rlang::is_installed()] or [xfun::pkg_available()], this function doesn't load the packages if they're installed. #' #' In contrast to [xfun::pkg_available()], it is fully vectorized, i.e. returns a (named) logical vector of the same length as `pkg`. #' #' It is [considerably #' faster](https://stackoverflow.com/questions/9341635/check-for-installed-packages-before-running-install-packages/38082613#38082613) than the commonly used #' `pkg %in% rownames(installed.packages())` check. #' #' @param pkg Package names. A character vector. #' @param min_version Minimum required version number of each `pkg`. Must be either `NULL` to ignore version numbers, or a vector of #' [`package_version`][base::package_version()]s or something coercible to. #' #' @return A named logical vector of the same length as `pkg`. #' @family rpkgs #' @export #' #' @examples #' pal::is_pkg_installed("tidyverse") #' #' # it is vectorized... #' pal::is_pkg_installed(pkg = c("dplyr", "tibble", "magrittr"), #' min_version = c("1.0", "2.0", "99.9.9000")) #' #' # ...and scalar arguments will be recycled #' pal::is_pkg_installed(pkg = "dplyr", #' min_version = c("0.5", "1.0", "99.9.9000")) #' #' pal::is_pkg_installed(pkg = c("dplyr", "tibble", "magrittr"), #' min_version = "1.0") is_pkg_installed <- function(pkg, min_version = NULL) { checkmate::assert_character(pkg, any.missing = FALSE) min_version %<>% as.package_version() if (length(min_version) == 0L) { result <- purrr::map_lgl(magrittr::set_names(pkg, pkg), ~ nzchar(system.file(package = .x))) } else { result <- is_pkg_installed(pkg = pkg) %>% list(names(.), min_version) |> purrr::pmap_lgl(\(...) { if (..1) utils::packageVersion(pkg = ..2) >= ..3 else ..1 }) } result }
is_pkg_cran
#' Test if a package is available on CRAN #' #' @inheritParams is_http_success #' @param pkg Package name (case-sensitive). A character scalar. #' @param min_version Minimum required version number of `pkg`. Must be either `NULL` to ignore version numbers, or a single #' [`package_version`][base::package_version()] or something coercible to. #' #' @return A logical scalar. #' @family rpkgs #' @export #' #' @examples #' pal::is_pkg_cran("foobar") #' pal::is_pkg_cran("dplyr") #' pal::is_pkg_cran("dplyr", min_version = "9999.9") is_pkg_cran <- function(pkg, min_version = NULL, max_tries = 1L) { checkmate::assert_string(pkg) min_version %<>% as.package_version() rlang::check_installed(pkg = c("httr2", "rvest"), reason = reason_pkg_required()) url <- glue::glue("https://cran.r-project.org/package={pkg}") is_cran <- is_http_success(url = url, max_tries = max_tries) if (is_cran && length(min_version)) { is_cran <- httr2::request(base_url = url) |> httr2::req_method(method = "GET") |> httr2::req_retry(max_tries = max_tries) |> httr2::req_perform() |> httr2::resp_body_html() |> rvest::html_element(css = "body") |> rvest::html_element(css = "table") |> rvest::html_table() |> dplyr::filter(stringr::str_detect(X1, stringr::fixed("Version"))) |> dplyr::filter(as.package_version(X2) >= !!min_version) |> nrow() |> magrittr::is_greater_than(0L) } is_cran }
is_pkg_dir
#' Test if a directory is an \R package #' #' Convenience wrapper around the [`rprojroot::is_r_package`][rprojroot::is_r_package] root criterion. Note that it will by default only return `TRUE` for the #' root of a package directory, not its subdirectories. #' #' @param path Path of the directory to check. A character scalar. Defaults to the current working directory. #' @param check_parent_dirs Whether or not to also check `path`'s parent directories (up until the filesystem root) for an \R package setup. Setting #' `check_parent_dirs = TRUE` guarantees that `TRUE` is returned for all subdirectories of an R package. #' #' @return `TRUE` if `path` is the root (if `check_parent_dirs = FALSE`) or a (sub)directory directory (if `check_parent_dirs = TRUE`) of an \R package, #' `FALSE` otherwise. #' @family rpkgs #' @export #' #' @examples #' pal::is_pkg_dir() #' pal::is_pkg_dir(fs::path_package("utils")) is_pkg_dir <- function(path = ".", check_parent_dirs = FALSE) { has_root(criterion = rprojroot::is_r_package, path = path, check_parent_dirs = check_parent_dirs) }
is_pkgdown_dir
#' Test if pkgdown is set up for an R package directory #' #' Convenience wrapper around the [`rprojroot::is_pkgdown_project`][rprojroot::is_pkgdown_project] root criterion. Note that it will by default only return #' `TRUE` for the root of a package directory and the `pkgdown` subdirectory, not other subdirectories. #' #' @param path Path of the R package directory to check. A character scalar. Defaults to the current working directory. #' @param check_parent_dirs Whether or not to also check `path`'s parent directories (up until the filesystem root) for a pkgdown setup. Setting #' `check_parent_dirs = TRUE` guarantees that `TRUE` is returned for all subdirectories of an R package that has pkgdown set up. #' #' @return `TRUE` if pkgdown is set up for `path`, `FALSE` otherwise. #' @family rpkgs #' @export #' #' @examples #' pal::is_pkgdown_dir() #' pal::is_pkgdown_dir(fs::path_package("pal")) is_pkgdown_dir <- function(path = ".", check_parent_dirs = FALSE) { has_root(criterion = rprojroot::is_pkgdown_project, path = path, check_parent_dirs = check_parent_dirs) }
exists_in_namespace
#' Test if object exists in namespace #' #' Determines whether or not an object exists in a specific [namespace](https://adv-r.hadley.nz/environments.html#namespaces). #' #' This complements [utils::getFromNamespace()] and uses almost the same code internally. #' #' @inheritParams utils::getFromNamespace #' #' @return A logical scalar. #' @export #' #' @examples #' if (pal::is_pkg_installed("pkgpurl")) { #' pal::exists_in_namespace("pkg_config", "pkgpurl") #' } #' #' pal::exists_in_namespace("pkg_config", "pal") exists_in_namespace <- function(x, ns, pos = -1L, envir = as.environment(pos)) { if (missing(ns)) { nm <- attr(x = envir, which = "name", exact = TRUE) if (is.null(nm) || !startsWith(nm, "package:")) { cli::cli_abort("Specified {.arg envir} is not a package environment.") } ns <- asNamespace(substring(nm, 9L)) } else { ns <- asNamespace(ns) } exists(x = x, envir = ns, inherits = FALSE) }
reason_pkg_required
#' Assemble reason why package is required #' #' Assembles a prose text containing the reason why a certain \R package is required by the `fn` of another `pkg`. Intended to be used as argument `reason` of #' [rlang::check_installed()]. #' #' @param fn Name of the function that requires the \R package. A character scalar. #' @param pkg Name of the package `fn` is part of. A character scalar. #' #' @return A character scalar. #' @export #' #' @examples #' \dontrun{ #' rlang::check_installed(pkg = "foo", #' reason = pal::reason_pkg_required(pkg = "bar", #' fn = "serve")) #' } reason_pkg_required <- function(fn = rlang::call_name(rlang::caller_call()), pkg = utils::packageName(parent.frame())) { checkmate::assert_string(fn) checkmate::assert_string(pkg) cli::format_inline("by {.fun {pkg}::{fn}}, but is not installed.") }
TODO:
Add support for config files (text files with structured data in YAML, TOML, JSON) as (4th) pkg config val src.
Add support for system keyrings as (5th) pkg config val src via keyring::key_get()
, which is more
secure to store secrets than the existing srcs.
Document how to add pkg config info to R/<pkg>-package.R
:
``` r
r pkgsnip::md_snip("pkg_config", pkg = "PKG_NAME")
```
and figure out best way to show same info on pkgdown site (pkgdown builds the R/<pkg>-package.R
to /reference/<pkg>-package.html
but doesn't link it
anywhere it seems).
Outsource these fns into separate pkg?
Pro:
Contra:
For these fns and their deps, currently the following pkgs are required (almost all of pal's deps):
Thus, outsourcing would probably only make sense if we would also restrict us to using base R fns only.
Possible pkg names:
Maybe first compile a systematic review of existing pkgs with similar functionality like
and ask Posit/tidyverse folks for advice?
Functions around pal's canonicalized way to package configuration.
pkg_config_val
#' Get package configuration value #' #' @description #' Retrieves a package configuration value in a canonical way. The following configuration sources are consulted in descending order and the first hit is #' returned: #' #' 1. The \R [option][options] `<pkg>.<key>`. #' 2. The [environment variable](https://en.wikipedia.org/wiki/Environment_variable) `R_<PKG>_<KEY>`. #' 3. The ad-hoc default value specified via this function's `default` argument (`NULL` means unspecified). #' 4. The configuration's global default value as specified in the package's configuration metadata (column `default_value` or `default_value_dynamic` of #' `<pkg>::pkg_config`; `NULL` means unspecified). #' #' Depending on `require`, an error is thrown if none of the above sources contain a value. #' #' @details #' This function is intended to be used by package authors who want to expose their package configuration options in a canonical way (as outlined above). For #' `pkg_config_val()` to properly work, the configuration metadata must be available in the package's namespace as object `pkg_config`, which must be a #' [dataframe][data.frame] or [tibble][tibble::tbl_df] with at minimum the columns `key` (of type character holding the configuration key names) and #' `default_value` (of type list holding static default configuration values) or `default_value_dynamic` (of type character holding R code expressions that #' evaluate to default configuration values dynamically at access time). #' #' @param key Configuration key name. A character scalar. #' @param pkg Package name. A character scalar. Defaults to the name of the calling package. #' @param default Default value to fall back to if neither the \R option `<pkg>.<key>` nor the environment variable `R_<PKG>_<KEY>` is set. If `NULL`, the #' default value for `key` in `<pkg>::pkg_config` will be used (if defined). #' @param require Whether or not to require that the configuration value is set. If `TRUE` and no configuration value is set, an error is thrown with #' instructions on how to provide a value. If `NULL`, the `require` value for `key` in `<pkg>::pkg_config` will be used (defaults to `TRUE`). #' @param env Environment to evaluate `default_value_dynamic` in, if necessary. #' #' @return `r pkgsnip::return_lbl("r_obj")` #' @seealso [xfun::env_option()] for a compatible (albeit less powerful) approach to R option and environment variable coherence. #' @family pkg_config #' @export #' #' @examples #' try( #' pal::pkg_config_val(key = "gen_pkgdown_ref", #' pkg = "pkgpurl") #' ) pkg_config_val <- function(key, pkg = utils::packageName(env = parent.frame()), default = NULL, require = NULL, env = parent.frame()) { checkmate::assert_flag(require, null.ok = TRUE) pkg_config <- get_pkg_config(pkg) key <- rlang::arg_match0(key, values = pkg_config$key) # 1st priority: R option result <- getOption(pkg_config_opt_name(pkg = pkg, key = key)) # 2nd priority: environment variable if (is.null(result)) { result <- Sys.getenv(pkg_config_env_var_name(pkg = pkg, key = key), unset = NA, names = FALSE) } # 3rd priority: default value if (is.na(result)) { result <- default %||% get_pkg_config_val_default(key = key, pkg_config = pkg_config, env = env) } # abort if value is required but none was provided if (is.null(result)) { if (is.null(require)) { require <- pkg_config |> dplyr::filter(key == !!key) |> dplyr::pull("require") } if (require) { cli::cli_abort(paste0("Please set the {pkg} package configuration option {.field {key}} by either setting the R option ", "{.field {pkg_config_opt_name(pkg = pkg, key = key)}} or the environment variable ", "{.envvar {pkg_config_env_var_name(pkg = pkg, key = key)}}.")) } } result }
pkg_config_val_default
#' Get default package configuration value #' #' Retrieves a package configuration's default value from the package's configuration metadata (column `default_value` or `default_value_dynamic` of #' `<pkg>::pkg_config`). If no default value is specified (`NULL`), nothing is returned (`NULL`). #' #' @inheritParams pkg_config_val #' #' @return `pkgsnip::return_lbl("r_obj")` #' @family pkg_config #' @export #' #' @examples #' try( #' pal::pkg_config_val_default(key = "gen_pkgdown_ref", #' pkg = "pkgpurl") #' ) pkg_config_val_default <- function(key, pkg = utils::packageName(env = parent.frame()), env = parent.frame()) { get_pkg_config_val_default(key = key, pkg_config = get_pkg_config(pkg), env = env) }
has_pkg_config_val
#' Test if package configuration value is set #' #' Tests whether or not a certain package configuration value is set. See [pkg_config_val()] for the underlying concept. #' #' Note that `has_pkg_config_val()` throws an error if the package configuration *key* doesn't exist. #' #' @inheritParams pkg_config_val #' #' @return A logical scalar. #' @family pkg_config #' @export #' #' @examples #' try( #' pal::has_pkg_config_val(key = "gen_pkgdown_ref", #' pkg = "pkgpurl") #' ) has_pkg_config_val <- function(key, pkg = utils::packageName(env = parent.frame()), env = parent.frame()) { !is.null(pkg_config_val(key = key, pkg = pkg, default = NULL, require = FALSE, env = env)) }
augment_pkg_config
#' Augment package configuration metadata #' #' Augments a package's configuration metadata (`<pkg>::pkg_config`) with the columns `r_opts` and `env_var` holding the respective \R option and environment #' variable names. #' #' @inheritParams pkg_config_val #' #' @return A [tibble][tibble::tbl_df] with at minimum the columns `key`, `default_value`, `r_opt` and `env_var`. #' @family pkg_config #' @export #' #' @examples #' try( #' pal::augment_pkg_config(pkg = "pkgpurl") #' ) augment_pkg_config <- function(pkg = utils::packageName(env = parent.frame())) { get_pkg_config(pkg) |> dplyr::mutate(r_opt = pkg_config_opt_name(pkg = pkg, key = key), env_var = purrr::map_chr(key, \(k) pkg_config_env_var_name(pkg = pkg, key = k))) }
print_pkg_config
NOTES:
description
is not mandatory.#' Print package configuration metadata #' #' Prints a package's configuration metadata (`<pkg>::pkg_config`) as a prettily formatted Markdown table. #' #' @inheritParams pkg_config_val #' @param roxy_to_md Whether or not to convert roxygen2 documentation [links in pseudo-Markdown #' style](https://roxygen2.r-lib.org/articles/rd-formatting.html#function-links) to actual Markdown ones using [roxy_to_md_links()]. #' #' @inherit pipe_table return #' @family pkg_config #' @export #' #' @examples #' try( #' pal::print_pkg_config(pkg = "pkgpurl") #' ) print_pkg_config <- function(pkg = utils::packageName(env = parent.frame()), roxy_to_md = FALSE) { checkmate::assert_flag(roxy_to_md) augment_pkg_config(pkg) |> dplyr::mutate(dplyr::across(c(r_opt, env_var), \(x) wrap_chr(x, wrap = "`")), dplyr::across(any_of("description"), \(x) if (roxy_to_md) purrr::map_chr(x, roxy_to_md_links) else x), default_value = purrr::map2(default_value, default_value_dynamic, \(x, y) { if (!is.na(y)) { return(wrap_chr(y, wrap = "`")) } else if (is.null(x)) { return("") } else { return(x |> constructive::construct(unicode_representation = "unicode", check = TRUE, one_liner = TRUE) |> purrr::chuck("code") |> wrap_chr(wrap = "`")) } })) |> dplyr::select(any_of("description"), r_opt, env_var, default_value) |> dplyr::rename_with(.cols = any_of("description"), .fn = stringr::str_to_title) |> dplyr::rename(`R option` = r_opt, `Environment variable` = env_var, `Default value` = default_value) |> pipe_table() }
Extending the desc package.
desc_list
#' Get all `DESCRIPTION` file fields as cleaned up list #' #' @description #' Returns all fields from a `DESCRIPTION` file as a named list with values cleaned up: #' - Whitespaces at the start and end of field values as well as repeated whitespaces within them are removed. #' - Multi-value fields are returned as vectors. #' - The fields `Depends`, `Imports` and `Suggests` are returned as a single data frame named `dependencies`. #' #' @inheritParams desc_value #' #' @return A list. #' @family desc #' @export #' #' @examples #' fs::path_package(package = "dplyr") |> pal::desc_list() desc_list <- function(file = ".") { rlang::check_installed("desc", reason = reason_pkg_required()) fields <- desc::desc_fields(file = file) result <- fields |> setdiff(c("Authors@R", "Depends", "Imports", "Suggests", "URL")) |> rlang::set_names() |> purrr::map(desc_value, file = file) if ("Authors@R" %in% fields) result[["Authors@R"]] <- desc::desc_get_authors(file = file) if (any(c("Depends", "Imports", "Suggests") %in% fields)) result[["dependencies"]] <- desc::desc_get_deps(file = file) if ("URL" %in% fields) result[["URL"]] <- desc::desc_get_urls(file = file) result }
desc_value
TODO:
Rename fn to something like desc_get_field_safe()
?
Figure out why setting default
to an error call referring to key
doesn't work, i.e.:
r
pal::desc_value("Suggestss", default = cli::cli_abort("No {.field {key}} field set in {.file DESCRIPTION}!"))
#' Get value from `DESCRIPTION` file field, cleaned up and with dynamic fallback #' #' Returns the value from a `DESCRIPTION` file field (aka _key_). Whitespaces at the start and end of the value as well as repeated whitespaces within #' it are removed. #' #' This function is a slightly modified version of [desc::desc_get_field()] that allows the `default` parameter to be dependent on the `key` parameter. #' #' By default, the following string is returned if `key = "NoRealKey"` is not found: #' #' ``` #' "<No `NoRealKey` field set in DESCRIPTION!>" #' ``` #' #' If you rather want to take an action like throwing an error, it's recommended to call [desc::desc_get_field()] directly. #' #' @inheritParams desc::desc_get_field #' @param default Default value to return if `key` is not found. #' #' @return A character scalar. #' @family desc #' @export #' #' @examples #' pal::desc_value(key = "Description", #' file = fs::path_package("pal")) desc_value <- function(key, file = ".", default = glue::glue("<No \x60{key}\x60 field set in DESCRIPTION!>")) { rlang::check_installed("desc", reason = reason_pkg_required()) desc::desc_get_field(key = key, default = default, file = file) }
desc_dep_vrsn
#' Get dependency version from `DESCRIPTION` file #' #' Returns the version of the specified `pkg` dependency from a `DESCRIPTION` file. #' #' @inheritParams pkg_config_val #' @inheritParams desc::desc_get_deps #' @param types Dependency types to be considered. If `pkg` is listed in multiple dependency types, the maximally required version is returned. #' #' @return `r pkgsnip::return_lbl("num_vrsn")` #' @family desc #' @export #' #' @examples #' fs::path_package(package = "dplyr") |> pal::desc_dep_vrsn(pkg = "tibble") desc_dep_vrsn <- function(pkg, file = ".", types = c("Imports", "Depends", "Suggests", "Enhances", "LinkingTo")) { checkmate::assert_string(pkg) types <- rlang::arg_match(arg = types, multiple = TRUE) result <- desc::desc_get_deps(file = file) |> dplyr::filter(package == !!pkg & type %in% !!types) if (nrow(result) == 0L) { cli::cli_abort("Package {.val {pkg}} is not listed as a dependency of type {.or {.val {types}}} in {.file {file}}.") } result$version |> stringr::str_extract("\\d+[\\d.]+") |> purrr::map(\(x) { if (is.na(x)) { as.numeric_version(NULL)[NA] } else { as.numeric_version(x) } }) |> purrr::list_c(ptype = numeric_version(NULL)) |> max(na.rm = TRUE) |> when(length(.) == 0L ~ as.numeric_version(NULL)[NA], ~ .) }
desc_url_git
TODO:
#' Get the Git repository URL from `DESCRIPTION` file #' #' Returns the first Git repository URL found in the `URL` (preferred) or `BugReports` fields of a `DESCRIPTION` file. #' #' Currently, this function recognizes [GitLab](https://about.gitlab.com/), [GitHub](https://github.com/), [Gitea](https://gitea.com/), #' [Codeberg](https://codeberg.org/), [Pagure](https://pagure.io/), [Bitbucket](https://bitbucket.org/) and [SourceHut](https://sr.ht/) repository URLs. #' #' @inheritParams desc::desc_get_field #' #' @return A character scalar. #' @family desc #' @export desc_url_git <- function(file = ".") { rlang::check_installed("desc", reason = reason_pkg_required()) desc::desc_get_field(key = "BugReports", default = character(), file = file) |> stringr::str_replace(pattern = "/issues/?$", replacement = "/") %>% c(desc::desc_get_urls(), .) |> stringr::str_subset(pattern = "^https?://(git(hub|lab|ea)\\..+|(codeberg|bitbucket)\\.org|(git\\.)?src\\.ht|pagure\\.io)/") |> dplyr::first() }
Complementing and extending the roxygen2 package.
fn_param_defaults
#' Get function's default parameter values #' #' Extracts a function parameter's default value(s) from its language definition and returns the result as a character vector. #' #' This function can be very convenient to avoid duplication in roxygen2 documentation by leveraging [inline \R code #' evaluation](https://roxygen2.r-lib.org/articles/rd-formatting.html#inline-code) as follows: #' #' ```r #' #' @param some_param Some parameter. One of #' #' `r pal::fn_param_defaults(param = "some_param", fn = "some_fn") |> pal::wrap_chr("\x60") |> cli::ansi_collapse()`. #' #' #' some_fn <- function(some_param = c("a", "b", "c")) { #' some_param <- rlang::arg_match(some_param) #' ... #' } #' ``` #' #' Or to list the possible parameter values formatted as an unnumbered list instead, replace `cli::ansi_collapse()` with [pal::as_md_list()] in the example #' above. #' #' # Caveats #' #' [base::deparse1()] is used internally to get a character representation of non-character default values. Therefore all of `deparse()`'s fuzziness also #' applies to this function. #' #' @param param Parameter name. A character scalar. #' @param fn A [function][base::function] or a function name (searched for in `env`). See [base::formals()] for details. #' @param env [Environment][base::environment] `fn` is defined in. See [base::formals()] for details. #' #' @return A character vector. #' @family roxy #' @export #' #' @examples #' pal::fn_param_defaults(param = ".name_repair", #' fn = tibble::as_tibble) #' #' # as Markdown-formatted enumeration in prose #' pal::fn_param_defaults(param = ".name_repair", #' fn = tibble::as_tibble) |> #' pal::wrap_chr("`") |> #' cli::ansi_collapse() |> #' cat() fn_param_defaults <- function(param, fn = sys.function(sys.parent()), env = parent.frame()) { checkmate::assert_string(param) # turn `fn` into type function if necessary (the same as `formals(fun)` does internally) if (is.character(fn)) { fn %<>% get(mode = "function", envir = env) } default_vals <- formals(fun = args(name = fn), envir = env) if (param %in% names(default_vals)) { default_vals <- default_vals[[param]] } else { fn_name <- deparse1(expr = substitute(fn), backtick = FALSE) cli::cli_abort("The function {.fn {fn_name}} does not have a parameter named {.arg {param}}.") } if (missing(default_vals)) { fn_name <- deparse1(expr = substitute(fn), backtick = FALSE) cli::cli_abort("{.fn {fn_name}}'s parameter {.arg {param}} does not have a default value.") } # evaluate default param if it results in a character vector if (is.language(default_vals)) { evaluated_default_vals <- tryCatch(expr = eval(expr = default_vals, envir = env), error = \(x) NULL) if (is.character(evaluated_default_vals)) default_vals <- evaluated_default_vals } if (is.character(default_vals)) { default_vals %<>% wrap_chr() } else { default_vals %<>% deparse1(backtick = FALSE, control = c("keepNA", "keepInteger", "niceNames", "showAttributes", "warnIncomplete")) } default_vals }
enum_fn_param_defaults
#' Enumerate function's default parameter values #' #' Convenience function combining [fn_param_defaults()], [wrap_chr()] and [cli::ansi_collapse()]. #' #' This function can be very convenient to avoid duplication in roxygen2 documentation by leveraging [inline \R code #' evaluation](https://roxygen2.r-lib.org/articles/rd-formatting.html#inline-code) as follows: #' #' ```r #' #' @param some_param Some parameter. One of #' #' `r pal::enum_fn_param_defaults(param = "some_param", fn = "some_fn")`. #' #' #' some_fn <- function(some_param = c("a", "b", "c")) { #' some_param <- rlang::arg_match(some_param) #' ... #' } #' ``` #' #' @inheritParams fn_param_defaults #' @param wrap Character sequence the default parameter values are to be wrapped in. A character vector or something coercible to. #' @param sep2,last Passed on to [cli::ansi_collapse()]. #' @param ... Further arguments passed on to [cli::ansi_collapse()]. #' #' @return A character scalar. #' @family roxy #' @export #' #' @examples #' pal::enum_fn_param_defaults(param = ".name_repair", #' fn = tibble::as_tibble) |> #' cat() enum_fn_param_defaults <- function(param, fn = sys.function(sys.parent()), env = parent.frame(), wrap = "`", sep2 = " or ", last = sep2, ...) { check_dots_named(..., .fn = cli::ansi_collapse, .forbidden = c("sep2", "last")) fn_param_defaults(param = param, fn = fn, env = env) |> wrap_chr(wrap = wrap) |> cli::ansi_collapse(sep2 = sep2, last = last, ...) }
roxy_to_md_links
NOTES:
Stripping all valid MD links using xml2::xml_text(md_to_xml(x))
so that only roxygen2 documentation links remain won't work since the Markdown
normalization also removes enclosing backticks from the latter (i.e. "[`base::c()`]"
becomes "[base::c()]"
).
roxygen2 only allows plaintext, optionally formatted as inline code, as documentation link text. And it does not allow to mix text both formatted as inline code and not inside documentation link text (it throws a warning if tried). This simplifies link text replacing quite a bit for us.
While downlit::autolink_url()
requires objects with non-standard names like pkgdown-package
to be wrapped in backticks to be recognized (i.e.
"pkgdown::`pkgdown-package`"
), roxygen2 only handles them properly when the backticks are omitted and otherwise throws a warning:
refers to unavailable topic pkgdown::`pkgdown-package`
Thus, it's recommended to directly link to pkgdown documentation in links to objects with non-standard names.
#' Convert roxygen2 documentation links to Markdown #' #' Converts roxygen2 documentation [links in pseudo-Markdown style](https://roxygen2.r-lib.org/articles/rd-formatting.html#function-links) to actual Markdown #' ones using [downlit::autolink_url()]. #' #' @param x Markdown text with roxygen2 documentation links. A character scalar. #' #' @return A character scalar. #' @family roxy #' @export #' #' @examples #' pal::roxy_to_md_links("[base::c()] is so short I almost forget it's there.") #' pal::roxy_to_md_links("[`base::c()`], probably the most used base R function ever.") #' pal::roxy_to_md_links("Some functions [are magic][downlit::autolink_url]!") #' pal::roxy_to_md_links("downlit's [`autolink_url()`][downlit::autolink_url()] seems magic!") roxy_to_md_links <- function(x) { checkmate::assert_string(x) rlang::check_installed(pkg = "downlit", reason = reason_pkg_required()) # determine the roxy-specific doc links by first parsing input as CommonMark which escapes all brackets from roxy doc links (but not from valid MD links) links_roxy <- md_to_xml(x) |> xml2::xml_contents() |> xml_to_md() |> stringr::str_extract_all(pattern = "(\\\\\\[`[^`]+`\\\\\\](\\\\\\[[^\\]]+?\\\\\\])?|\\\\\\[[^\\]]+?\\\\\\](\\\\\\[[^\\]]+?\\\\\\])?)") |> purrr::list_c(ptype = character()) |> stringr::str_remove_all(pattern = stringr::fixed("\\")) is_short <- stringr::str_detect(string = links_roxy, pattern = "^\\[[^\\]]+\\]$") targets_roxy <- links_roxy # remove possible enclosing backticks from roxy shortlinks to make them parseable by downlit targets_roxy[is_short] %<>% stringr::str_remove_all(pattern = stringr::fixed("`")) targets_roxy %<>% stringr::str_extract(pattern = "\\[([^\\]]+)\\]$", group = 1L) %>% purrr::map(downlit::autolink_url) %>% purrr::list_c(ptype = character()) process <- !is.na(targets_roxy) # short-circuit if nothing to do if (!any(process)) { return(x) } links_md <- links_roxy links_md[process & !is_short] %<>% stringr::str_replace(pattern = "\\[[^\\]]+\\]$", replacement = paste0("(", targets_roxy[process & !is_short], ")")) links_md[process & is_short] %<>% stringr::str_replace(pattern = "(?<=^\\[)([^`\\]]+)", replacement = "`\\1`") %>% paste0("(", targets_roxy[process & is_short], ")") links_md[process] |> magrittr::set_names(value = paste0("\\Q", links_roxy[process], "\\E")) |> stringr::str_replace_all(string = x) }
roxy_blocks
#' Get roxygen2 blocks #' #' @description #' #' `r lifecycle::badge("experimental")` #' #' Parses the roxygen2 package documentation of a specific R package or from a single `.R` source code file. #' #' @param pkg,text Either a package name (`pkg`) or a character vector of \R source code lines (`text`) to extract the object's roxygen2 tag value from. #' @param ... Further arguments passed on to [utils::download.packages()], excluding #' `r cli::ansi_collapse(as_md_vals(forbidden_dots$roxy_tag_value), last = " and ")`. Only relevant if `pkg` is provided. #' @param quiet `r pkgsnip::param_lbl("quiet")` #' #' @return A list of [`roxy_block`][roxygen2::roxy_block] objects. #' @family roxy #' @export #' #' @examples #' # Either provide an R source file as a character vector `text`... #' text <- readr::read_lines(paste0("https://raw.githubusercontent.com/r-lib/rlang/", #' "db52a58d505b65f58ba922d4752b5b0061f2a98c/R/fn.R")) #' #' pal::roxy_blocks(text = text) |> head(n = 3L) #' #' # ...or provide a package name as `pkg` #' try( #' pal::roxy_blocks(pkg = "tinkr", #' repos = "https://cloud.r-project.org") |> #' head(n = 3L) #' ) roxy_blocks <- function(pkg = NULL, ..., text = NULL, quiet = TRUE) { rlang::check_installed("roxygen2", reason = reason_pkg_required()) checkmate::assert_flag(quiet) checkmate::assert_string(pkg, null.ok = TRUE) checkmate::assert_character(text, null.ok = TRUE) is_pkg_null <- is.null(pkg) is_text_null <- is.null(text) if (is_pkg_null && is_text_null) cli::cli_abort("One of {.arg pkg} and {.arg text} mustn't be {.val NULL}.") if (!is_pkg_null && !is_text_null) cli::cli_abort("Only one of {.arg pkg} and {.arg text} can be provided.") if (is_pkg_null) { rlang::check_dots_empty0(...) blocks <- tryCatch(expr = roxygen2::parse_text(text = text), error = \(x) list(FALSE, x)) if (length(blocks) > 0L && isFALSE(blocks[[1L]])) { error_msg <- as.character(blocks[[2L]]) is_missing_obj <- stringr::str_detect(string = error_msg, pattern = " (not found|could not find .+)\\n") cli::cli_abort(ifelse(is_missing_obj, error_msg |> stringr::str_remove(pattern = '^.*: ') |> stringr::str_replace(pattern = "could not find function \"(.+?)\"", replacement = "Function {.fun \\1} not found") |> stringr::str_replace(pattern = "object '(.+?)'", replacement = "Object {.var \\1}") |> stringr::str_replace(pattern = "not found\\n?", replacement = paste0("was not found when parsing {.arg text}. You might need to attach the package you're ", "trying to extract the roxygen2 tag value from. Or try ", "{.fn pkg_roxy_tag_value}.")), "Error parsing {.arg text}: {error_msg}")) } } else { rlang::check_installed("rappdirs", reason = reason_pkg_required()) check_dots_named(..., .fn = utils::download.packages, .forbidden = forbidden_dots$roxy_tag_value) # alert if installed version doesn't match downloaded one: pkgs_available <- utils::available.packages(type = "source", filters = c("R_version", "OS_type", "subarch", "CRAN")) |> tibble::as_tibble() |> dplyr::filter(Package == pkg) pkg_version_max_installed <- ls_pkg(pkg = pkg, as_regex = FALSE) |> dplyr::pull("Version") |> max() if (pkg_version_max_installed %in% pkgs_available$Version) { pkgs_available %<>% dplyr::filter(Version == pkg_version_max_installed) } else { cli::cli_alert_warning(paste0("No sources available for download of locally installed version {.val pkg_version_max_installed} of package {.pkg {pkg}}. ", "Downloading sources of latest available version {.val {max(pkgs_available[, 'Version'])}} instead. If the following ", "roxygen tag parsing fails or produces unexpected results, consider updating package {.pkg {pkg}}.")) } pkgs_available %<>% as.matrix() %>% magrittr::set_rownames(value = .[, 1L]) tmp_dir <- rappdirs::user_cache_dir() |> fs::path(glue::glue("pal-roxy_tag_value-{pkg}")) |> fs::dir_create() |> fs::path_real() tmp_archive <- utils::download.packages(pkgs = pkg, destdir = tmp_dir, available = pkgs_available, ... = ..., type = "source", quiet = quiet) utils::untar(tarfile = tmp_archive[1L, 2L], exdir = tmp_dir) tmp_pkg <- fs::path(tmp_dir, pkg) blocks <- if (quiet) suppressMessages(roxygen2::parse_package(path = tmp_pkg)) else roxygen2::parse_package(path = tmp_pkg) unlink(x = tmp_dir, recursive = TRUE) } blocks }
roxy_obj
#' Get roxygen2 block object #' #' Extracts a single object from a list of [`roxy_block`][roxygen2::roxy_block] objects. #' #' @param blocks A list of [`roxy_block`][roxygen2::roxy_block] objects as returned by [roxy_blocks()]. #' @param obj_name Object name to extract, usually a function name. A character scalar. #' #' @return A [`roxy_block`][roxygen2::roxy_block] object. #' @family roxy #' @export roxy_obj <- function(blocks, obj_name) { checkmate::assert_list(blocks, types = "roxy_block", all.missing = FALSE) obj_names <- blocks |> purrr::map_depth(.depth = 1L, .f = purrr::pluck, "object", "topic") |> purrr::compact() |> purrr::list_c(ptype = character()) obj_name <- rlang::arg_match0(arg = obj_name, values = obj_names) blocks[[which(obj_names == obj_name)]] }
roxy_tag_value
TODO: Fix failing examples during pkgdown::build_site()
or -- if not possible to fix -- figure out how to disable running these examples during pkgdown build.
#' Get an object's roxygen2 tag value(s) #' #' Extracts the value(s) belonging to the `tag_name`s documenting `obj_name` from a list of [`roxy_block`][roxygen2::roxy_block] objects. #' #' @inheritParams roxy_obj #' @param tag_names Name(s) of the [roxygen2 tag(s)](https://roxygen2.r-lib.org/articles/rd.html) (without the `@` prefix) to extract the value(s) from. A #' character vector. #' @param param_name Parameter name to extract the value from. Only relevant if `"param" %in% tag_names`. A character scalar. #' #' @return A character vector of the same length as `tag_names`. #' @family roxy #' @export #' #' @examples #' if (interactive()) { #' try( #' pal::roxy_blocks(pkg = "viridisLite") |> #' pal::roxy_tag_value(obj_name = "viridis", #' tag_names = "param", #' param_name = "alpha") |> #' cat() #' ) #' } roxy_tag_value <- function(blocks, obj_name, tag_names, param_name) { block <- roxy_obj(blocks = blocks, obj_name = obj_name) tags <- block$tags checkmate::assert_subset(tag_names, choices = purrr::map_chr(tags, purrr::pluck, "tag"), empty.ok = FALSE) tag_names %<>% unique() if ("param" %in% tag_names) { ix_param <- tags |> purrr::map_lgl(\(x) x$tag == "param") |> which() param_name <- rlang::arg_match0(arg = param_name, values = purrr::map_chr(tags[ix_param], purrr::pluck, "val", "name")) i_to_keep <- tags[ix_param] |> purrr::map_lgl(\(x) x$val$name == param_name) |> which() block$tags %<>% magrittr::extract(-ix_param[-i_to_keep]) } purrr::map_chr(tag_names, \(x) roxygen2::block_get_tag_value(block = block, tag = x) |> when(is.list(.) ~ .$description, ~ .)) }
md_verb
NOTES:
rlang::check_dots_unnamed()
since it throws an error with more exotic expressions like the ones in the examples. Thus, the param names were
prefixed with a dot to avoid unintentional matches from ...
.#' Convert to verbatim Markdown #' #' Converts the provided \R expressions to their character representation using [base::deparse1()] and formats them as [verbatim #' Markdown](https://pandoc.org/MANUAL.html#verbatim). #' #' @param ... \R expression(s) to convert to verbatim Markdown. Must be unnamed. `r pkgsnip::roxy_lbl("dyn_dots_support")` #' @param .eval Whether or not to evaluate the expression(s) in `...`. #' @param .collapse String to separate the results of a single expression in `...`. #' @param .backtick Whether or not to enclose symbolic names in backticks if they do not follow the standard syntax. #' @param .control Deparsing options. A character vector or `NULL`. See [base::.deparseOpts] for all possible options. #' @param .width.cutoff Cutoff (in bytes) at which line-breaking is tried. An integer scalar between `20` and `500`. #' @param .nlines Maximum number of lines to produce. A negative value indicates no limit. An integer scalar. #' #' @return A character vector of the same length as `...`. #' @family md #' @export #' #' @examples #' pal::md_verb(1:3, "It", is.logical, `||`, FALSE, quote(`?!`)) |> #' pal::cat_lines() #' #' # you can splice vector or list expressions if you like #' pal::md_verb(!!!1:3, "It", is.logical, `||`, FALSE, quote(`?!`)) |> #' pal::cat_lines() #' #' # to evaluate, or not to evaluate, that is the question #' pal::md_verb(!!!1:3, "It", is.logical, `||`, FALSE, quote(`?!`), #' .eval = FALSE) |> #' pal::cat_lines() #' #' # unevaluated expressions do not need to exist #' pal::md_verb(Not, actual(), `R-expressions`, #' .eval = FALSE) |> #' pal::cat_lines() #' #' # you can opt out of wrapping non-standard syntax in additional backticks #' pal::md_verb(Not, actual(), `R-expressions`, #' .eval = FALSE, #' .backtick = FALSE) |> #' pal::cat_lines() # nolint start: object_name_linter. md_verb <- function(..., .eval = TRUE, .collapse = " ", .backtick = TRUE, .control = c("keepNA", "keepInteger", "niceNames", "showAttributes", "warnIncomplete"), .width.cutoff = 500L, .nlines = -1L) { checkmate::assert_flag(.eval) checkmate::assert_flag(.backtick) if (.eval) { result <- rlang::list2(...) } else { result <- eval(substitute(alist(...))) } result |> purrr::map(\(x) { deparsed <- deparse1(expr = x, collapse = .collapse, width.cutoff = .width.cutoff, backtick = .backtick, control = .control, nlines = .nlines) escape_backtick <- stringr::str_detect(string = deparsed, pattern = stringr::fixed("`")) paste0("`", "` "[escape_backtick], deparsed, " `"[escape_backtick], "`") }) |> purrr::list_c(ptype = character()) } # nolint end
as_md_list
TODO:
#' Convert a character vector to a Markdown list #' #' Convenience wrapper around [pander::pandoc.list.return()] to convert a character vector (or something coercible to) to a [Markdown #' list](https://pandoc.org/MANUAL.html#lists). #' #' @param x \R object, e.g. a character vector. Each element of `x` will become an item in the resulting Markdown list. #' @param type Markdown list type. One of #' - `"unordered"` for an unordered aka [bullet list](https://pandoc.org/MANUAL.html#bullet-lists). Corresponds to #' [`<ul>`](https://developer.mozilla.org/docs/Web/HTML/Element/ul) in HTML. #' - `"ordered"` for an ordered aka [numbered list](https://pandoc.org/MANUAL.html#ordered-lists). Corresponds to #' [`<ol>`](https://developer.mozilla.org/docs/Web/HTML/Element/ol) in HTML. #' - `"ordered_roman"` for a variation of an ordered/numbered list with uppercase roman numerals instead of Arabic numerals as list markers. #' @param tight Whether or not to add additional spacing between list items. #' @param indent_lvl Level of indentation of the resulting Markdown list. For each level, four additional spaces are added in front of every list item. An #' integer scalar. #' @param wrap An optional string to wrap the list items in. #' #' @return If `x` is empty, a character vector of length zero. Otherwise, a character scalar. #' @family md #' @export #' #' @examples #' rownames(mtcars) |> #' pal::as_md_list() |> #' cat() as_md_list <- function(x, type = c("unordered", "ordered", "ordered_roman"), tight = TRUE, indent_lvl = 0L, wrap = NULL) { type <- rlang::arg_match(type) checkmate::assert_flag(tight) checkmate::assert_count(indent_lvl) checkmate::assert_string(wrap, null.ok = TRUE) rlang::check_installed("pander", reason = reason_pkg_required()) x %<>% as_chr() if (length(x) == 0L) { return(character()) } pander::pandoc.list.return(elements = paste0(wrap, x, wrap), style = switch(EXPR = type, unordered = "bullet", ordered = "ordered", ordered_roman = "roman"), loose = !tight, indent.level = indent_lvl, add.line.breaks = FALSE, add.end.of.list = FALSE) }
as_md_vals
#' Format values as verbatim Markdown #' #' Converts the given values to a character vector, formatted as [verbatim Markdown](https://pandoc.org/MANUAL.html#verbatim). Character values are additionally #' wrapped in double quotes. Metadata like [types][base::typeof] or [attributes][base::attributes] of the input is *not* represented in the output, only its #' *values*. #' #' @param ... Values to be formatted. One or more \R objects. #' #' @return A character vector. #' @family md #' @export #' #' @examples #' list(1L, 2.2, "other") |> #' pal::as_md_vals() #' #' # note that values are flattened before further processing them, so this yields the same result #' list(list(list(1L, list(2.2), list(list("other"))))) |> #' pal::as_md_vals() as_md_vals <- function(...) { rlang::list2(...) |> as_flat_list() |> purrr::map(\(x) { # wrap chr vals in double quotes if (is.character(x)) x %<>% wrap_chr() # determine nr of backticks to wrap result in, safely escaping any number of existing backticks add_space <- TRUE backticks <- x |> stringr::str_extract_all(pattern = "`+") |> purrr::list_c(ptype = character()) |> unique() %>% magrittr::extract(nchar(.) == safe_max(nchar(.))) if (length(backticks) == 0L) { add_space <- FALSE backticks <- "`" } else { backticks %<>% paste0("`") } paste0(backticks, " "[add_space], x, " "[add_space], backticks) }) |> purrr::list_c(ptype = character()) }
as_md_val_list
#' List values as a Markdown list #' #' Generates a Markdown list of the given values formatted as [verbatim](https://pandoc.org/MANUAL.html#verbatim). Character values are additionally wrapped in #' double quotes. #' #' @inheritParams as_md_vals #' #' @return A character scalar. #' @family md #' @export #' #' @examples #' list(1L, 2.2, "other") |> #' pal::as_md_val_list() |> #' cat() #' #' # note that values are flattened before listing them, so this yields the same list #' list(list(list(1L, list(2.2), list(list("other"))))) |> #' pal::as_md_val_list() |> #' cat() as_md_val_list <- function(...) { as_md_list(as_md_vals(...)) }
pipe_table
TODO:
xfun::md_table()
and consider upstreaming pipe_table()
's current upsides (col/rownames formatting etc.) there.#' Convert dataframe/tibble to Markdown pipe table #' #' Convenience wrapper around [`knitr::kable(format = "pipe")`][knitr::kable()] to create a [Markdown pipe #' table](https://pandoc.org/MANUAL.html#extension-pipe_tables). #' #' # Create tables dynamically in roxygen2 documentation #' #' This function can be useful to create tables inside [roxygen2][roxygen2::roxygen2] documentation programmatically from data using #' [dynamic \R code](https://roxygen2.r-lib.org/articles/rd-formatting.html#dynamic-r-code-1). #' #' For example, the inline code #' #' `` `r mtcars |> head() |> pipe_table()` `` #' #' should produce the following table in [roxygen2 7.1.0](https://www.tidyverse.org/blog/2020/03/roxygen2-7-1-0/) and above: #' #' `r mtcars |> head() |> pipe_table()` #' #' @inherit knitr::kable details #' #' @inheritParams knitr::kable #' @param x Dataframe/tibble/matrix to be converted to a pipe table. #' @param incl_rownames Whether to include row names or not. A logical scalar or `NULL`. If `NULL`, row names are included if `rownames(x)` is neither `NULL` #' nor identical to `seq_len(nrow(x))`. #' @param strong_colnames Whether or not to highlight column names by formatting them `<strong>` (wrapping them in two asterisks). #' @param strong_rownames Whether or not to highlight row names by formatting them `<strong>` (wrapping them in two asterisks). #' @param align Column alignment. Either `NULL` for auto-alignment or a character vector consisting of `'l'` (left), `'c'` (center) and/or `'r'` (right). If #' `align = NULL`, numeric columns are right-aligned, and other columns are left-aligned. If `length(align) == 1L`, the string will be expanded to a vector #' of individual letters, e.g. `'clc'` becomes `c('c', 'l', 'c')`. #' @param format_args A list of arguments to be passed to [base::format()] to format table values, e.g. `list(big.mark = ',')`. #' #' @return A character vector. #' @seealso [xfun::md_table()] #' @family md #' @export #' #' @examples #' mtcars |> head() |> pal::pipe_table() |> pal::cat_lines() pipe_table <- function(x, incl_rownames = NULL, strong_colnames = TRUE, strong_rownames = TRUE, align = NULL, label = NULL, digits = getOption("digits"), format_args = list()) { checkmate::assert_flag(incl_rownames, null.ok = TRUE) checkmate::assert_flag(strong_colnames) checkmate::assert_flag(strong_rownames) rlang::check_installed("knitr", reason = reason_pkg_required()) if (is.null(incl_rownames)) { incl_rownames <- !identical(rownames(x), as.character(seq_len(nrow(x)))) } # format rownames <strong> if requested and sensible if ((is.null(incl_rownames) || incl_rownames) && strong_rownames) { # setting rownames on a tibble is deprecated, thus we convert to dataframe x %<>% as.data.frame() rownames(x) %<>% paste0("**", ., "**") } kable_args <- alist(x = x, format = "pipe", digits = digits, row.names = incl_rownames, col.names = colnames(x) |> when(strong_colnames ~ paste0("**", ., "**"), ~ .), label = label, format.args = format_args) |> when(!is.null(align) ~ c(., alist(align = align)), ~ .) do.call(what = knitr::kable, args = kable_args) }
strip_md
NOTES:
commonmark::markdown_text()
is found here.#' Strip Markdown formatting from character vector #' #' Removes all Markdown formatting from a character vector. #' #' This function relies on [commonmark::markdown_text()] which [supports the CommonMark specification plus the Github #' extensions](https://github.com/r-lib/commonmark#readme). Unfortunately, [Markdown footnotes](https://pandoc.org/MANUAL.html#footnotes) can't be stripped #' using `commonmark::markdown_text()`. Therefore a separate option `strip_footnotes` is offered which relies on regular expressions to remove inline footnotes #' and footnote definitions and references. #' #' @param x A character vector to strip Markdown formatting from. #' @param strip_footnotes Whether to remove Markdown footnotes, too. If `FALSE`, footnotes are canonicalized (to have sequential integer identifiers). #' #' @return A character vector of the same length as `x`. #' @family md #' @export #' #' @examples #' pal::strip_md( #' "A **MD** formatted [string](https://en.wikipedia.org/wiki/String_(computer_science))" #' ) #' #' # link references are only removed *iff* the reference is included in `x`: #' pal::strip_md("[A reference link][refid]\n\n[refid]: https://example.com") #' pal::strip_md("[A reference link][refid]\n\n_No ref here..._") strip_md <- function(x, strip_footnotes = TRUE) { rlang::check_installed("commonmark", version = "1.9.0", reason = reason_pkg_required()) checkmate::assert_character(x) checkmate::assert_flag(strip_footnotes) purrr::map_chr(x, \(x2) { if (is.na(x2)) { x2 } else { result <- commonmark::markdown_text(text = x2, footnotes = TRUE, extensions = TRUE) if (!isTRUE(endsWith(x2, "\n"))) { result %<>% stringr::str_remove(pattern = "\n$") } if (strip_footnotes) { result %<>% strip_md_footnotes() } result } }) }
strip_md_footnotes
#' Strip Markdown footnotes from character vector #' #' Removes all Markdown footnotes from a character vector. #' #' Note that it is not checked whether footnote references and definitions actually match (by identifier), thus they are removed even if they were invalid. #' #' @param x A character vector to strip Markdown footnotes from. Note that elements in `x` are processed as separate Markdown domains, i.e. _not_ as individual #' lines belonging to the same Markdown document. #' #' @return A character vector of the same length as `x`. #' @family md #' @export #' #' @examples #' pal::strip_md_footnotes( #' "An **MD** formatted string with footnote[^fn].\n\n[^fn]: A note.\n" #' ) strip_md_footnotes <- function(x) { checkmate::assert_character(x) |> stringr::str_split(pattern = "(\\n{3,}|\\n{2}(?! {4}))") |> purrr::map_chr(\(x2) { # remove footnote defs ix_to_rm <- x2 |> stringr::str_detect("^\\[\\^[^\\s\\]]+\\]: .*") |> which() if (length(ix_to_rm)) { x2 <- x2[-ix_to_rm] } # remove inline footnotes and refs x2 %<>% stringr::str_remove_all(pattern = "\\[\\^[^\\s\\]]+\\]|\\^\\[.+?\\]") if (length(x2) > 0L && !all(is.na(x2))) { x2 %<>% paste0(collapse = "\n\n") } x2 }) }
Extending the commonmark package.
md_to_xml
TODO:
There are two noteworthy R packages for R Markdown file parsing:
parsermd which uses custom C++ code under the hood and therefore should be very fast. But it appears to implement its own custom abstract syntax tree (AST), which seems like a silly idea.
We should thoroughly check out the package and compare it to the XML-based tinkr approach!
tinkr, whose built-in XSL stylesheet we already use (via tinkr::stylesheet()
and tinkr::md_ns()
).
tinkr also provides its own to_xml()
. The main advantage of pal::md_to_xml()
is that
pal::strip_yaml_header()
is more robust in YAML header detection than blogdown's
split_yaml_body()
used in tinkr::to_xml()
.
Thus it would be ideal to:
Submit PR introducing xfun::split_yaml_body()
based on logic in strip_yaml_header()
; xfun is already imported in blogdown and xaringan.
Submit PR replacing blogdown:::split_yaml_body()
with
xfun::split_yaml_body()
.
Submit PR replacing xaringan:::split_yaml_body()
with
xfun::split_yaml_body()
.
Submit PR importing xfun and replacing tinkr:::split_yaml_body()
with
xfun::split_yaml_body()
.
Deprecate pal::md_to_xml()
and instead rely on tinkr::to_xml()$body
for the same purpose.
Further note that tinkr::to_xml()
(as well as the unexported and separate tinkr:::md_to_xml()
) additionally
parse knitr in-header chunk options via a call to tinkr:::parse_rmd()
, something pal::md_to_xml()
simply omits (but which could be very useful
for certain use cases; note though that it doesn't yet support in-body chunk options).
replace certain typographic single and double quotes with the basic commonmark-compliant ones via a call to FIX ME. I think it is a pretty bad
idea to enable this by default and there should at least be an option to opt out of this behaviour (i.e. introduce an additional boolean param
normalize_quotation_marks
or the like).
do not allow to change the hardbreaks
, smart
, normalize
and extensions
params of the internal call to commonmark::markdown_xml()
. At least
being able to set normalize = TRUE
(the default of pal::md_to_xml()
) seems useful, but currently commonmark::markdown_xml()
s default of
normalize = FALSE
is used.
Lastly, it would be nice to upstream md_xml_subnode_ix()
as well.
#' Parse (R) Markdown as CommonMark XML tree #' #' Parses (R) Markdown file content according to the [CommonMark](https://commonmark.org/) specification and returns it as an XML parse tree. #' #' @inheritParams as_line_feed_chr #' @inheritParams gitlab_document #' @inheritParams commonmark::markdown_xml #' @param md (R) Markdown file content as a character scalar. #' @param hardbreaks Whether or not to treat newlines as hard line breaks. #' @param strip_xml_ns Whether or not to [remove the default XML namespace][xml2::xml_ns_strip] (`d1`) assigned by [commonmark::markdown_xml()]. #' #' @return An [`xml_document`][xml2::xml_document-class]. #' @family commonmark #' @export #' #' @examples #' "# A title #' #' Some prose. #' #' ## A subtitle #' #' More prose. #' #' ## Another subtitle #' #' Out of prose here. #' #' ### A sub-subtitle #' #' I'm dug in. #' #' # Another title #' #' A last word." |> pal::md_to_xml() md_to_xml <- function(md, smart_punctuation = FALSE, hardbreaks = FALSE, normalize = TRUE, sourcepos = FALSE, footnotes = TRUE, extensions = c("strikethrough", "table", "tasklist"), eol = c("LF", "CRLF", "CR", "LFCR"), strip_xml_ns = TRUE) { rlang::check_installed("commonmark", reason = reason_pkg_required()) rlang::check_installed("xml2", reason = reason_pkg_required()) result <- strip_yaml_header(rmd = md, eol = eol) |> commonmark::markdown_xml(hardbreaks = hardbreaks, smart = smart_punctuation, normalize = normalize, sourcepos = sourcepos, footnotes = footnotes, extensions = extensions) |> xml2::read_xml() |> when(strip_xml_ns ~ xml2::xml_ns_strip(.), ~ .) # `xml2::xml_ns_strip()` returns its result invisibly, so we make it visible again (result) }
md_xml_subnode_ix
#' Determine CommonMark XML subnode indices #' #' Determines the XML children node indices for every XML node at the highest level of `xml` by interpreting [Markdown heading #' levels](https://pandoc.org/MANUAL.html#headings) (1–6). #' #' [commonmark::markdown_xml()] (and so [md_to_xml()] which builds upon it) **do** parse (R) Markdown file content according to the #' **[CommonMark](https://commonmark.org/) specification**, but **do not** return any information about the document's **heading hierarchy**. #' `md_xml_subnode_ix()` fills this gap by giving the hierarchy structure in the form of the XML subnode indices for every node at the highest level of `xml`. #' #' @param xml CommonMark parse tree. An [`xml_document`][xml2::xml_document-class], [`xml_nodeset`][xml2::xml_nodeset-class] or #' [`xml_node`][xml2::xml_node-class]. #' #' @return A list of integer vectors of the same length as the number of XML nodes at the highest level of `xml`. #' @family commonmark #' @export #' #' @examples #' "# A title #' #' Some prose. #' #' ## A subtitle #' #' More prose. #' #' ## Another subtitle #' #' Out of prose here. #' #' ### A sub-subtitle #' #' I'm dug in. #' #' # Another title #' #' A last word." |> #' pal::md_to_xml() |> #' pal::md_xml_subnode_ix() md_xml_subnode_ix <- function(xml) { assert_class_any(xml, classes = c("xml_document", "xml_nodeset", "xml_node"), name = "xml") rlang::check_installed("xml2", reason = reason_pkg_required()) xml_names <- xml2::xml_name(xml) if (length(xml_names) == 1L && xml_names == "document") { xml %<>% xml2::xml_contents() } purrr::map(seq_along(xml), \(i) subnode_ix(xml_nodes = xml, i = i)) }
xml_to_md
#' Convert from CommonMark XML to (R) Markdown #' #' @inheritParams md_xml_subnode_ix #' #' @return A character scalar. #' @family commonmark #' @export #' #' @examples #' "# A title #' #' Some prose. #' #' ## A subtitle #' #' More prose. #' #' ## Another subtitle #' #' Out of prose here. #' #' ### A sub-subtitle #' #' I'm dug in. #' #' # Another title #' #' A last word." |> #' pal::md_to_xml() |> #' xml2::xml_contents() |> #' pal::xml_to_md() |> #' cat() xml_to_md <- function(xml) { assert_class_any(xml, classes = c("xml_document", "xml_nodeset", "xml_node"), name = "xml") rlang::check_installed("tinkr", reason = reason_pkg_required()) rlang::check_installed("xml2", reason = reason_pkg_required()) rlang::check_installed("xslt", reason = reason_pkg_required()) xml |> as.character() |> paste0(collapse = "\n") |> # trim whitespace in case `xml` was already of type character stringr::str_trim() |> # add CommonMark XML namespace when(stringr::str_detect(string = ., pattern = "^<document[>\\s]") ~ ., ~ paste0('<document xmlns="', as.character(tinkr::md_ns()), '">', ., '</document>')) |> xml2::read_xml() |> # convert XML to CommonMark xslt::xml_xslt(stylesheet = xml2::read_xml(tinkr::stylesheet())) }
build_readme
#' Build `README.Rmd` #' #' A simpler, but considerably faster alternative to [devtools::build_readme()] since it doesn't install your package in a temporary library before building the #' `README.Rmd`. This has the pleasant side effect that, other than the latter function, it also works for `.Rmd` files which aren't part of an \R package. #' #' Note that for public package repositories, it's recommended to use [devtools::build_readme()] since it ensures the `README.Rmd` can be built _reproducibly_, #' which means all the objects and files it references must be accessible from the repository. #' #' `r pkgsnip::md_snip("rstudio_addin")` #' #' @param input Path to the R Markdown README file to be built. A character scalar. #' @param output Path to the built Markdown README. A character scalar. #' @param build_index_md Whether to build a separate [pkgdown][pkgdown::pkgdown]-optimized `pkgdown/index.md` alongside `output` (i.e. in the same parent #' directory). If `NULL`, it will only be built if the parent directory of `output` [contains a pkgdown configuration file][is_pkgdown_dir]. Note that it will #' be built with the \R option `pal.build_readme.is_pkgdown` set to `TRUE`, allowing for conditional content inclusion in `input` – e.g. via the [code chunk #' option](https://yihui.org/knitr/options/#code-evaluation) `eval = isTRUE(getOption("pal.build_readme.is_pkgdown"))`. #' @param env Environment in which code chunks are to be evaluated, e.g. [base::parent.frame()], [base::new.env()], or [base::globalenv()]. #' @param clean Whether or not to delete intermediate files created by [rmarkdown::render()]. #' @param quiet `r pkgsnip::param_lbl("quiet")` #' #' @return The path to `input` as a character scalar, invisibly. #' @family rmd_knitr #' @export build_readme <- function(input = "README.Rmd", output = "README.md", build_index_md = NULL, env = parent.frame(), clean = TRUE, quiet = FALSE) { checkmate::assert_environment(env) checkmate::assert_string(input) checkmate::assert_path_for_output(output, overwrite = TRUE) checkmate::assert_flag(build_index_md, null.ok = TRUE) checkmate::assert_flag(clean) checkmate::assert_flag(quiet) rlang::check_installed("knitr", reason = reason_pkg_required()) rlang::check_installed("rmarkdown", reason = reason_pkg_required()) # add args to `env` rlang::env_bind(.env = env, input = input, output = output, build_index_md = build_index_md) # add `pkg_metadata` to env parent_dir <- fs::path_dir(input) |> fs::path_abs() if (is_pkg_dir(parent_dir)) { rlang::env_bind(.env = env, pkg_metadata = desc_list(parent_dir)) } if (!quiet) { cli_progress_step_quick("Building {.file {input}}") } # generate `output` ## render to the output format specified in the YAML header (defaults to `rmarkdown::md_document`) rmarkdown::render(input = input, output_file = output, clean = clean, quiet = TRUE, envir = env) # generate `index.md` if indicated if (!isFALSE(build_index_md)) { output_dir <- fs::path_dir(output) if (is_pkgdown_dir(output_dir)) { index_md_path <- fs::path(output_dir, "pkgdown") |> fs::dir_create() |> fs::path("index.md") |> checkmate::assert_path_for_output(overwrite = TRUE) |> fs::path_abs() # clean Rmd file rlang::check_installed("brio", reason = reason_pkg_required()) rlang::check_installed("withr", reason = reason_pkg_required()) tmp_file <- fs::file_temp(pattern = "tmp-pal-build_readme", ext = fs::path_ext(input)) brio::read_file(input) |> # remove possible trailing horizontal line in Rmd file since pkgdown always adds one below content # TODO: submit PR to pkgdown fixing this? stringr::str_replace(pattern = " {0,3}([-\\*_]{3,}|<hr */?>)(\\s*(\\n\\[\\^[\\w-]+\\]:.*\\n?)*$)", replacement = "\\2") |> # remove `align` and `height` <img> tags (rely on custom CSS file `pkgdown/extra.css` instead) stringr::str_replace_all(pattern = "(<img [^>]+)(align=['\"].*?['\"]\\s*)", replacement = "\\1") |> stringr::str_replace_all(pattern = "(<img [^>]+)(height=['\"].*?['\"]\\s*)", replacement = "\\1") |> brio::write_file(path = tmp_file) # render `pkgdown/index.md` withr::with_options( new = list(pal.build_readme.is_pkgdown = TRUE), code = rmarkdown::render(input = tmp_file, output_file = index_md_path, output_format = rmarkdown::md_document(variant = "markdown", # disable Pandoc extensions in input which rmarkdown only adds for backwards compatibility md_extensions = c("-autolink_bare_uris", "-tex_math_single_backslash"), pandoc_args = "--columns=9999") |> # overwrite Pandoc output format (i.a. disable Pandoc's raw attributes to have more control over inline HTML) purrr::list_modify(pandoc = list(to = "markdown-raw_attribute")), knit_root_dir = parent_dir, clean = clean, quiet = TRUE, envir = env) ) fs::file_delete(tmp_file) } } }
knitr_table_format
#' Determine current knitr table format #' #' Determines the current knitr table format based on the \R option #' [`knitr.table.format`](https://bookdown.org/yihui/rmarkdown-cookbook/kable.html#kable-formats) which can either be set directly to a valid format string or #' to a function returning one of these strings conditionally. #' #' This is basically a convenience wrapper to be able to access the current `knitr.table.format` in a hassle-free way, i.e. it provides the conditional logic to #' account for the possibility that `knitr.table.format` is set to a function rather than a format string. #' #' @param default knitr table format to fall back to when the \R option `knitr.table.format` is not set. One of #' `r fn_param_defaults(param = "default", fn = knitr_table_format) |> wrap_chr("\x60") |> as_md_list()` #' #' See [knitr::kable()]'s `format` argument for details. #' #' @return A character scalar. #' @family rmd_knitr #' @export knitr_table_format <- function(default = c("pipe", "simple", "html", "latex", "rst")) { allowed_formats <- eval(formals()$default) opt <- getOption("knitr.table.format") result <- opt %||% rlang::arg_match(default) if (is.function(result)) result <- result() if (!(result %in% allowed_formats)) { cli::cli_abort(paste0("R option {.field knitr.table.format} must evaluate to one of ", enum_str(x = paste0("{.val ", allowed_formats, "}"), sep2 = " or "), ", but is {.code {deparse1(opt)}}", ifelse(is.function(opt), " which evaluates to {.val {result}}", ""), ".")) } result }
strip_yaml_header
#' Strip YAML header from R Markdown #' #' Extracts the body from R Markdown file content, stripping a possible [YAML metadata #' block](https://bookdown.org/yihui/rmarkdown-cookbook/rmarkdown-anatomy.html#yaml-metadata) at the beginning. #' #' Note that for the [R Markdown file format](https://rmarkdown.rstudio.com/), the [YAML metadata #' block](https://pandoc.org/MANUAL.html#extension-yaml_metadata_block) must occur at the beginning of the document (and there can be only one). Additional #' whitespace characters (incl. newlines) before the YAML metadata block are allowed. #' #' @inheritParams as_line_feed_chr #' @param rmd R Markdown file content as a character scalar. #' #' @return Body of the R Markdown file as a character vector of lines. #' @family rmd_knitr #' @export #' #' @examples #' rmd <- " #' --- #' output: pal::gitlab_document #' --- #' #' # A heading #' #' Some prose. #' " #' #' pal::cat_lines(rmd) #' #' rmd |> #' pal::strip_yaml_header() |> #' pal::cat_lines() strip_yaml_header <- function(rmd, eol = c("LF", "CRLF", "CR", "LFCR")) { checkmate::assert_string(rmd) has_yaml <- stringr::str_detect(string = rmd, pattern = "^(\\n\\s*)?---\\s*\\n.*(---|...)\\s*\\n") eol %<>% as_line_feed_chr() rmd %<>% stringr::str_split_1(pattern = eol) last_yaml_line_nr <- 0L if (has_yaml) { last_yaml_line_nr <- rmd |> stringr::str_locate("^---\\s*$") |> tibble::as_tibble() |> tibble::rowid_to_column() |> dplyr::filter(!dplyr::if_any(.cols = everything(), .fns = is.na)) |> dplyr::pull("rowid") |> purrr::chuck(2L) |> min(rmd |> stringr::str_locate("^\\.{3}\\s*$") |> tibble::as_tibble() |> tibble::rowid_to_column() |> dplyr::filter(!dplyr::if_any(.cols = everything(), .fns = is.na)) |> dplyr::pull("rowid") |> purrr::pluck(1L), na.rm = TRUE) } rmd[(last_yaml_line_nr + 1L):length(rmd)] }
Custom R Markdown output formats which can be used in addition to the default output formats.
gitlab_document
TODO:
Find out why param df_print
has no effect (seems to always fall back to
"default"
; the same happens for rmarkdown::github_document()
). Is this a bug?
Setting df_print = "kable"
should actually produce a pipe table...
Finish the damn tocr package and switch to it for TOC generation instead of Pandoc's built-in but very limited --table-of-contents
option!
As soon as Pandoc offers native support for GitLab Flavored Markdown, switching to that will be the next step.
#' Convert to GitLab Flavored Markdown #' #' Format for converting from R Markdown to [GitLab Flavored Markdown](https://gitlab.com/help/user/markdown.md). #' #' This is the GitLab equivalent to the [`github_document`][rmarkdown::github_document()] R Markdown #' [output format](https://bookdown.org/yihui/rmarkdown/output-formats.html). It basically ensures Pandoc is called with a custom set of options optimized for #' maximum compatibility with [GitLab Flavored Markdown](https://gitlab.com/help/user/markdown.md). #' #' ## Caveats regarding GitLab-Flavored-Markdown-specific features #' #' GitLab Flavored Markdown extends the [CommonMark](https://spec.commonmark.org/current/) Markdown specification with a bunch of #' [special features](https://gitlab.com/help/user/markdown.md#gfm-extends-standard-markdown). To be able to properly make use of them, observe the following #' points: #' #' - For [inline diffs](https://gitlab.com/help/user/markdown.md#inline-diff), only use curly braces (`{}`), not square brackets (`[]`). The latter will be #' escaped by Pandoc during conversion and thus not recognized by GitLab as starting/ending an inline diff. #' #' - You have to set `smart_punctuation = FALSE` in order to leave certain #' [special GitLab references](https://gitlab.com/help/user/markdown.md#special-gitlab-references) (like commit range comparisons) untouched for GitLab to #' interpret them correctly. #' #' All the special GitLab references for snippets and labels that start with a tilde (`~`) or a dollar sign (`$`) won't work because these characters will be #' escaped by Pandoc during conversion. #' #' - The `[[_TOC_]]` tag to let GitLab [generate a table of contents](https://gitlab.com/help/user/markdown.md#table-of-contents) won't work because it will be #' escaped by Pandoc during conversion. You can let Pandoc generate the TOC instead by setting `toc = TRUE`. #' #' - [Multiline blockquotes](https://gitlab.com/help/user/markdown.md#multiline-blockquote) won't work because the fence delimiters `>>>` will be escaped by #' Pandoc during conversion. #' #' @inheritParams rmarkdown::output_format #' @inheritParams rmarkdown::md_document #' @param smart_punctuation Whether or not to enable [Pandoc's `smart` extension](https://pandoc.org/MANUAL.html#extension-smart) which converts straight quotes #' to curly quotes, `---` to an em-dash (—), `--` to an en-dash (–), and `...` to ellipses (…). It also replaces regular spaces after certain abbreviations #' such as `Mr.` with [non-breaking spaces](https://en.wikipedia.org/wiki/Non-breaking_space). #' @param parse_emoji_markup Whether to enable [Pandoc's `emoji` extension](https://pandoc.org/MANUAL.html#extension-emoji) which parses emoji markup (e.g. #' `:smile:`) as Unicode emoticons. #' @param toc Include a table of contents (TOC) [automatically generated by Pandoc](https://pandoc.org/MANUAL.html#option--toc). Note that the TOC will be #' placed _before_ the README's body, meaning also _before_ the first Markdown header. #' @param add_footnotes_hr Whether to add a trailing horizontal rule (`---`) to the final Markdown file if it doesn't already end in one and contains footnotes #' (currently only checks for Pandoc's [reference-style footnotes](https://pandoc.org/MANUAL.html#footnotes) and not inline footnotes). This improves #' readability when the file is rendered on `GitLab.com`. #' @param autolink_bare_uris Enable the [`autolink_bare_uris` Pandoc Markdown extension](https://pandoc.org/MANUAL.html#extension-autolink_bare_uris) which #' makes all absolute URIs into links, even when not surrounded by pointy braces `<...>`. #' @param tex_math_single_backslash Enable the #' [`tex_math_single_backslash` Pandoc Markdown extension](https://pandoc.org/MANUAL.html#extension-tex_math_single_backslash) which causes anything between #' `\(` and `\)` to be interpreted as inline TeX math, and anything between `\[` and `\]` to be interpreted as display TeX math. Note: a drawback of this #' extension is that it precludes escaping `(` and `[`. #' #' @return R Markdown output format intended to be fed to [rmarkdown::render()]. #' @family rmd_format #' @export #' #' @examples #' \donttest{ #' tmp_file <- fs::file_temp() #' download.file(url = "https://gitlab.com/rpkg.dev/pal/-/raw/master/Rmd/pal.Rmd", #' destfile = tmp_file, #' quiet = TRUE, #' mode = "wb") #' #' rmarkdown::render(input = tmp_file, #' output_format = pal::gitlab_document(), #' quiet = TRUE) |> #' brio::read_lines() |> #' length()} gitlab_document <- function(smart_punctuation = TRUE, parse_emoji_markup = FALSE, df_print = "kable", toc = FALSE, toc_depth = 6L, fig_width = 7L, fig_height = 5L, dev = "png", preserve_yaml = FALSE, add_footnotes_hr = TRUE, autolink_bare_uris = FALSE, tex_math_single_backslash = FALSE) { checkmate::assert_flag(smart_punctuation) checkmate::assert_flag(parse_emoji_markup) checkmate::assert_flag(toc) checkmate::assert_int(toc_depth, lower = 1L, upper = 6L) checkmate::assert_flag(preserve_yaml) checkmate::assert_flag(add_footnotes_hr) checkmate::assert_flag(autolink_bare_uris) checkmate::assert_flag(tex_math_single_backslash) rlang::check_installed("rmarkdown", reason = reason_pkg_required()) # `post_process` fn to ensure MD ends in trailing horizontal rule (to visually separate footnotes from a possible trailing numbered list) if (add_footnotes_hr) { ensure_trailing_md_hr <- function(metadata, input_file, output_file, clean, verbose) { rlang::check_installed("brio", reason = reason_pkg_required()) checkmate::assert_file_exists(output_file, access = "w") md <- brio::read_file(output_file) # ensure trailing horizontal rule if file contains footnotes if (stringr::str_detect(string = md, pattern = "(\\n\\[\\^[\\w-]+\\]:.*)") && stringr::str_detect(string = md, pattern = " {0,3}([-\\*_]{3,}|<hr */?>)(\\s*(\\n\\[\\^[\\w-]+\\]:.*\\n?)*$)", negate = TRUE)) { md |> stringr::str_replace(pattern = "((\\n\\[\\^[\\w-]+\\]:.*\\n?)*$)", replacement = "\n---\n\\1") |> brio::write_file(path = output_file) } output_file } } else { ensure_trailing_md_hr <- NULL } # create rmd output format rmarkdown::output_format( knitr = rmarkdown::knitr_options_html(fig_width = fig_width, fig_height = fig_height, fig_retina = NULL, keep_md = FALSE, dev = dev), pandoc = rmarkdown::pandoc_options(to = c("markdown", "+emoji"[parse_emoji_markup], "-smart", "-simple_tables", "-multiline_tables", "-grid_tables", "-header_attributes", "-fenced_code_attributes", "-inline_code_attributes", "-link_attributes", "-raw_attribute", "-pandoc_title_block", "-yaml_metadata_block"[!preserve_yaml]) |> paste0(collapse = ""), from = c("markdown", "+autolink_bare_uris"[autolink_bare_uris], "+tex_math_single_backslash"[tex_math_single_backslash], "-smart"[!smart_punctuation]) |> paste0(collapse = ""), args = c("--columns=9999", "--standalone", "--table-of-contents"[toc], paste0("--toc-depth=", toc_depth)[toc])), df_print = df_print, pre_knit = NULL, post_knit = NULL, pre_processor = NULL, intermediates_generator = NULL, post_processor = ensure_trailing_md_hr, on_exit = NULL, base_format = NULL ) }
Extending the htmlwidgets package.
write_widget
#' Write HTML widget to file #' #' Writes an HTML widget's main container `<div>` and data `<script>` tags to file. #' #' Other than [htmlwidgets::saveWidget()], this function does *not* write a complete HTML document but only the widget's core tags. Hence it is intended to #' create files which are in turn included in another HTML file, e.g. via a static site generator. The HTML widget's dependencies must be handled separately, #' e.g. using [write_widget_deps()]. #' #' To directly get the HTML widget's core tags as a character scalar, use `as.character(htmltools::tagList(x))`. #' #' @param x HTML widget object. #' @param path File path to write the HTML widget's core tags to. #' #' @return The content written to `path` as a character scalar, invisibly. #' @family htmlwidget #' @export write_widget <- function(x, path) { checkmate::assert_class(x, classes = "htmlwidget") checkmate::assert_path_for_output(path, overwrite = TRUE) rlang::check_installed("brio", reason = reason_pkg_required()) rlang::check_installed("htmltools", reason = reason_pkg_required()) x |> htmltools::tagList() |> as.character() |> brio::write_lines(path = path) }
write_widget_deps
#' Write HTML widget dependencies #' #' Writes an HTML widget's JS and CSS dependencies to files and returns a snippet for their inclusion in the `<head>` of an HTML document. #' #' To write the HTML widget's core tags to file, use [write_widget()]. #' #' @inheritParams write_widget #' @param path Directory path to write the HTML widget's dependencies to. A subfolder will be created for each dependency. #' @param path_base Base path to determine the relative root of `path` for inclusion of the widget's dependencies. Must be a parent of or equal to `path`. #' @param path_head_snippet Optional file path to write the snippet for inclusion of the widget's dependencies in the `<head>` of an HTML document to. #' @param quiet Whether or not to print the snippet for inclusion of the widget's dependencies to console. #' #' @return An [HTML][htmltools::HTML] object suitable for inclusion in the `<head>` of an HTML document, invisibly. #' @family htmlwidget #' @export write_widget_deps <- function(x, path, path_base, path_head_snippet = NULL, quiet = !is.null(path_head_snippet)) { checkmate::assert_class(x, classes = "htmlwidget") checkmate::assert_directory_exists(path, access = "w") if (!fs::path_has_parent(path = path, parent = path_base)) { cli::cli_abort("{.arg path_base} is not a parent of (or equal to) {.arg path}.") } if (!is.null(path_head_snippet)) { checkmate::assert_path_for_output(path_head_snippet, overwrite = TRUE) } checkmate::assert_flag(quiet) rlang::check_installed("brio", reason = reason_pkg_required()) rlang::check_installed("htmltools", reason = reason_pkg_required()) deps <- htmltools::findDependencies(x) |> purrr::map(\(x) { htmltools::copyDependencyToDir(dependency = x, outputDir = path) |> htmltools::makeDependencyRelative(basepath = path_base) }) head_snippet <- htmltools::renderDependencies(deps) |> # manually add the root slash stringr::str_replace_all(pattern = "(\\s(src|href)\\s*=\\s*[\"'])([^/])", replacement = "\\1/\\3") |> htmltools::HTML() if (!is.null(path_head_snippet)) { brio::write_lines(text = head_snippet, path = path_head_snippet) } if (!quiet) { cli::cli_alert_info(text = "Include the following HTML snippet in the {.field <head>} of your document:") cli::cli_par() cli::cli_end() cli::cli_code(lines = head_snippet) } invisible(head_snippet) }
Extending the checkmate package.
assert_class_any
#' Assert object is member of any class #' #' Asserts that an object is member of any of the specified classes. #' #' In contrast to [checkmate::assert_class()], this function returns `TRUE` as long as `x` is at least member of *one* of `classes`. #' #' @param x \R object to test. #' @param classes Class names to check for inheritance. A character vector. #' @param name Name of the checked object to print in error message in case the assertion fails. A character scalar. #' #' @return `x`, invisibly. #' @family checkmate #' @export #' #' @examples #' xml2::read_html("https://pal.rpkg.dev/dev/license") |> #' assert_class_any(classes = c("xml_document", "xml_nodeset", "xml_node")) assert_class_any <- function(x, classes, name = "x") { checkmate::assert_character(classes, any.missing = FALSE) if (!inherits(x = x, what = classes)) { checkmate::assert_string(name) classes_actual <- class(x) cli::cli_abort(paste0("{.arg {name}} must {cli::qty(classes)} be {?of class/member of any of the classes} ", classes %>% paste0("{.val ", ., "}") |> enum_str(sep2 = " or "), ", but is {cli::qty(classes_actual)} of class{?es} {.val {classes_actual}}.")) } invisible(x) }
assert_df_or_tibble
#' Assert object is data frame or tibble (extension) #' #' Asserts that an object is `pkgsnip::param_lbl(id = "df_or_tibble", as_sentence = FALSE)`. #' #' @inheritParams assert_class_any #' #' @inherit assert_class_any return #' @family checkmate #' @export #' #' @examples #' tibble::tibble() |> pal::assert_df_or_tibble() #' #' try( #' matrix() |> pal::assert_df_or_tibble() #' ) assert_df_or_tibble <- function(x, name = "x") { assert_class_any(x = x, classes = c("data.frame", "tbl", "tbl_dbi", "tbl_df", "tbl_lazy", "tbl_sql"), name = name) }
assert_inf_count
#' Assert count or `Inf` #' #' Asserts that `x` is either a [count][checkmate::assert_count] or [positive infinity][is.infinite] (`Inf`). #' #' @param x Object to check. #' @param ... Further arguments passed on to [checkmate::assert_count()]. #' #' @return `x`, invisibly. #' @family checkmate #' @export #' #' @examples #' pal::assert_inf_count(1001L) #' pal::assert_inf_count(1) #' pal::assert_inf_count(0.0) #' pal::assert_inf_count(Inf) #' #' try( #' pal::assert_inf_count(-1) #' ) #' try( #' pal::assert_inf_count(1.5) #' ) #' try( #' pal::assert_inf_count(-Inf) #' ) assert_inf_count <- function(x, ...) { if (isTRUE(is.infinite(x))) { if (x < 0L) { cli::cli_abort("{.arg x} must either be a single integerish value or positive infinity ({.code Inf}).") } return(invisible(x)) } else { return(checkmate::assert_count(x = x, ...)) } }
Extending the cli package.
cli_qty_lgl
#' [cli](https://cli.r-lib.org/) pluralization helpers for booleans #' #' Equivalents to [cli::qty()] and [cli::no()] for a logical input. #' #' If `cnd` evaluates to `TRUE`, the resulting cli quantity is `1`, otherwise `0`. See cli's [pluralization #' rules](https://cli.r-lib.org/articles/pluralization.html#pluralization-markup-1) for details about how these quantities are interpreted. #' #' @param cnd Condition. A logical scalar. #' #' @return `0L` or `1L` with the additional class `cli_noprint`. #' @family cli #' @export #' #' @examples #' cnd <- runif(1L) < 0.5 #' #' cli::pluralize(paste0( #' "{pal::cli_qty_lgl(cnd)}I think this function ", #' "{?comes in handy/is not worth a second of my attention}. Having looked at the rest of the ", #' "package, this {?is quite surprising/does not come as a surprise}." #' )) #' #' cli::pluralize("This function is worth exactly {pal::cli_no_lgl(cnd)} second of my time.") cli_qty_lgl <- function(cnd) { checkmate::assert_flag(cnd) cnd %<>% as.integer() class(cnd) <- "cli_noprint" cnd }
cli_no_lgl
#' @rdname cli_qty_lgl #' @export cli_no_lgl <- function(cnd) { checkmate::assert_flag(cnd) cnd %<>% as.integer() class(cnd) <- "cli_no" cnd }
cli_progress_step_quick
#' Quick [cli](https://cli.r-lib.org/) simplified progress message #' #' Version of [cli::cli_progress_step()] that for the messages on (un)successful termination simply appends **done**/**failed** to `msg`. #' #' @inheritParams cli::cli_progress_bar #' @inheritParams cli::cli_progress_step #' #' @return The cli progress bar `id` as a character scalar, invisibly. #' @family cli #' @export #' #' @examples #' { #' pal::cli_progress_step_quick(msg = "Doing my thing") #' Sys.sleep(1) #' cli::cli_progress_done() #' } cli_progress_step_quick <- function(msg, spinner = FALSE, class = if (!spinner) ".alert-info", current = TRUE, .auto_close = TRUE, .envir = parent.frame()) { checkmate::assert_string(msg) msg %<>% paste0("\u2026") cli::cli_progress_step(msg = msg, msg_done = paste(msg, "done"), msg_failed = paste(msg, "failed"), spinner = spinner, class = class, current = current, .auto_close = .auto_close, .envir = .envir) }
cli_process_expr
#' Evaluate an expression with [cli](https://cli.r-lib.org/) process indication #' #' @description #' #' `r lifecycle::badge("superseded")` \cr #' This function is superseded in favor of [cli_progress_step_quick()] and the underlying `cli::cli_progress_*` family of functions which are more powerful and #' versatile than the `cli::cli_process_*` family on which `cli_process_expr()` is built. #' #' @inheritParams cli::cli_process_start #' @param expr An expression to be evaluated. #' @param env Default environment to evaluate `expr`, as well as possible [glue][glue::glue()] expressions within `msg`, in. #' #' @return The result of the evaluated `expr`, invisibly. #' @family cli #' @export #' #' @examples #' if (interactive()) { #' pal::cli_process_expr(Sys.sleep(3L), "Zzzz") #' } #' #' russian_roulette <- function() { #' msg <- "Spinning the cylinder \U0001F91E … " #' pal::cli_process_expr(msg = msg, #' msg_done = paste0(msg, "and pulling the trigger – lucky again. \U0001F60C"), #' msg_failed = paste0(msg, "and pulling the trigger – head blast!"), #' expr = { #' if (interactive()) Sys.sleep(1) #' if (runif(1L) < 0.4) stop("\U0001F92F\u2620") #' }) #' } #' #' set.seed(321) #' russian_roulette() #' set.seed(123) #' try(russian_roulette()) cli_process_expr <- function(expr, msg, msg_done = paste(msg, "... done"), msg_failed = paste(msg, "... failed"), msg_class = "alert-info", done_class = "alert-success", failed_class = "alert-danger", env = parent.frame()) { checkmate::assert_string(msg, # NOTE: This is necessary since `cli::cli_process_start(msg = "")` throws an error min.chars = 1L) checkmate::assert_string(msg_done) checkmate::assert_string(msg_failed) checkmate::assert_string(msg_class) checkmate::assert_string(done_class) checkmate::assert_string(failed_class) checkmate::assert_environment(env) # NOTE: We cannot rely on `on_exit = "done"` since in case of an error the on-exit code of this function won't reach execution because we throw the error # using `rlang::cnd_signal(.x)` first. status_bar_container_id <- cli::cli_process_start(msg = msg, msg_done = msg_done, msg_failed = msg_failed, msg_class = msg_class, done_class = done_class, failed_class = failed_class, .envir = env) result <- tryCatch(expr = rlang::eval_tidy(expr = {{ expr }}, env = env), error = \(x) { cli::cli_process_failed(status_bar_container_id) rlang::cnd_signal(x) }) cli::cli_process_done(status_bar_container_id) invisible(result) }
Extending the gert and git2r packages.
git_file_mod_time
NOTES:
log
subcommand might be more efficient than blame
we use here. But since
gert::git_log()
doesn't allow to specify a file path and git2r doesn't offer a log
wrapper
function, we don't really have a choice.#' Get Git file modification time #' #' Determine the time a file in a Git repository was last modified. #' #' Note that only *committed* changes to the file are regarded. The modification is returned in [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time). #' #' @param path Path to a file, relative to the Git repository root. #' @param repo Path to a Git repository. #' #' @return `r pkgsnip::return_lbl("datetime")` #' @family git #' @export #' #' @examples #' \dontrun{ #' pal::git_file_mod_time(path = "README.md")} git_file_mod_time <- function(path, repo = ".") { rlang::check_installed("git2r", reason = reason_pkg_required()) # make path relative to `repo` if necessary to please `git2r::blame()` if (fs::is_absolute_path(path)) { path <- fs::path_abs(repo) |> c(path) |> fs::path_common() |> fs::path_rel(path = path) } git2r::blame(repo = repo, path = path)$hunks |> purrr::map(\(x) as.POSIXct(x$final_signature$when)) |> purrr::list_c(ptype = vctrs::new_datetime()) |> safe_max() }
git_remote_tree_url
NOTES:
#' Get Git remote tree URL #' #' Determines the base Git tree URL to the current branch's upstream remote of `repo`. #' #' This function is useful to assemble URLs to files and folders in your repo's Git forge (GitHub, GitLab, etc.). #' #' @inheritParams gert::git_remote_list #' @param remote Name of the Git remote to determine the URL for. Defaults to `"origin"` if present, otherwise the first remote listed by #' [gert::git_remote_list()]. #' #' @return If `repo` is a Git repository, a character scalar. Otherwise, a character vector of length zero. #' @family git #' @export #' #' @examples #' pal::git_remote_tree_url() |> #' paste0("Rmd/pal.Rmd") |> #' browseURL() git_remote_tree_url <- function(repo = ".", remote = NULL) { checkmate::assert_string(remote, null.ok = TRUE) rlang::check_installed("gert", reason = reason_pkg_required()) remotes <- try(gert::git_remote_list(repo = repo)) # return empty chr if no remotes found if (!is.data.frame(remotes)) { return(character()) } # if multiple remotes present and no explicit `remote` provided, take "origin" if present, otherwise the first one remote <- remote %||% ifelse(nrow(remotes) > 1L && "origin" %in% remotes$name, "origin", remotes$name[1L]) url <- gert::git_remote_info(repo = repo, remote = remote) |> _$url # convert SSH address to URL if (startsWith(url, "git@")) { url %<>% stringr::str_replace_all(pattern = c("(git@[\\w\\.]{1,}\\.[a-z]{2,}):" = "\\1/", "^git@" = "https://", "\\.git$" = "")) } add_slash_minus <- stringr::str_detect(url, stringr::fixed("gitlab")) url %<>% paste0("/-"[add_slash_minus], "/tree/", gert::git_branch(), "/") url }
Extending the httr2 package which is built upon the curl package.
is_http_success
NOTES:
Handling/catching interrupts allows to abort the check in RStudio via its "stop" button. Mainly useful if max_tries > 1
.
There are web servers that disallow HEAD requests, although it contradicts the spec. If this should prove to be a problem, we could implement a fallback to a GET request.
#' Test if an HTTP request is successful #' #' @description #' Convenience wrapper around a bunch of [httr2][httr2::httr2-package] functions that returns #' #' - `TRUE` if the specified `url` could be resolved _and_ a [`HEAD`](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Request_methods) request could #' be [successfully completed](https://en.wikipedia.org/wiki/List_of_HTTP_status_codes), or #' #' - `FALSE` in any other case. #' #' @details #' This function is similar to [RCurl::url.exists()], i.e. it only retrieves the header, no body, but is based on [httr2][httr2::httr2-package] which in turn is #' based on [curl](https://jeroen.cran.dev/curl/). #' #' For checks on lower levels of the network stack like performing DNS queries or TCP port pings, see the [pingr](https://r-lib.github.io/pingr/) package. #' #' @inheritParams httr2::req_retry #' @inheritParams httr2::req_perform #' @param url HTTP protocol address. The scheme is optional, so both `"google.com"` and `"https://google.com"` will work. A character scalar. #' @param max_tries `r pkgsnip::param_lbl("max_tries")` #' #' @return A logical scalar. #' @family http #' @export #' #' @examples #' pal::is_http_success("goo.gl") #' pal::is_http_success("https://google.com/") #' pal::is_http_success("https://google.not/") #' #' # by default, requests are only retried on HTTP 429 and 503 status codes #' pal::is_http_success(url = "https://httpstat.us/503", #' max_tries = 2, #' verbosity = 1) #' pal::is_http_success(url = "https://httpstat.us/500", #' max_tries = 2, #' verbosity = 1) #' #' # to retry on *all* failing status codes, set `is_transient` accordingly: #' pal::is_http_success(url = "https://httpstat.us/500", #' max_tries = 2, #' is_transient = \(x) TRUE, #' verbosity = 1) is_http_success <- function(url, max_tries = 1L, retry_on_failure = FALSE, is_transient = NULL, verbosity = NULL) { rlang::check_installed("httr2", reason = reason_pkg_required()) tryCatch(expr = httr2::request(base_url = url) |> httr2::req_method(method = "HEAD") |> httr2::req_retry(max_tries = max_tries, retry_on_failure = retry_on_failure, is_transient = is_transient) |> httr2::req_perform(verbosity = verbosity) |> httr2::resp_is_error() |> magrittr::not(), error = \(x) FALSE, interrupt = \(x) cli::cli_abort("Terminated by the user.")) }
is_url
#' Test if URL #' #' Tests whether the elements of a character vector are [Uniform Resource Locators](https://de.wikipedia.org/wiki/Uniform_Resource_Locator) (URLs). #' #' This function is based on [xml2::url_parse()] and simply checks whether the elements in `x` contain both a **scheme** as well as *some* **scheme-specific #' part** (excl. ports). No further checks are performed, so it's explicitly not verified that a URL actually conforms to its respective scheme. #' #' @param x Character vector to test. #' #' @return A logical vector of the same length as `x`. #' @family http #' @export #' #' @examples #' pal::is_url(c("/some/path", #' "file:///some/path")) is_url <- function(x) { checkmate::assert_character(x, any.missing = FALSE) rlang::check_installed("xml2", reason = reason_pkg_required()) parsed <- xml2::url_parse(x) has_scheme <- nchar(parsed$scheme) > 0L has_specifics <- purrr::pmap_lgl(parsed, # NOTE: we don't consider col `port` ~ any(nchar(c(..2, ..4, ..5, ..6, ..7)) > 0L)) has_scheme & has_specifics }
req_cached
TODO:
Rename this fn to be less missleading (it returns a response object, not a request object; but resp_cached()
wouldn't be much better since all the
httr2::resp_*()
fns extract something from a response rather than returning a response).
Maybe http_get_cached()
would be appropriate (since it only does GET requests anyways)?
#' Perform a cached HTTP GET request #' #' Convenience wrapper around a bunch of [httr2][httr2::httr2-package] functions. #' #' @inheritParams is_http_success #' #' @inherit httr2::req_perform return #' @family http #' @export req_cached <- function(url, max_tries = 3L) { rlang::check_installed("httr2", reason = reason_pkg_required()) httr2::request(base_url = url) |> httr2::req_method(method = "GET") |> httr2::req_cache(path = fs::path(tools::R_user_dir(package = "httr2", which = "cache"), "req_cache")) |> httr2::req_retry(max_tries = max_tries) |> httr2::req_perform() }
toml_read
#' Read in and parse TOML file as strict list #' #' Reads in a file in [Tom's Obvious Minimal Language (TOML)](https://toml.io/) format and returns its content as a (nested) [strict list][xfun::strict_list()]. #' #' The file is parsed using [`RcppTOML::parseTOML(escape = FALSE)`][RcppTOML::parseTOML]. #' #' @inheritParams RcppTOML::parseTOML #' @param input If `from_file = TRUE`, the path to a TOML file as a character scalar. Otherwise, TOML content as a character vector. #' @param from_file Whether `input` is the path to a TOML file or already a character vector of TOML content. #' #' @return `r pkgsnip::return_lbl("strict_list")` #' @family toml #' @export toml_read <- function(input, from_file = TRUE, verbose = FALSE) { checkmate::assert_flag(from_file) checkmate::assert_flag(verbose) rlang::check_installed("RcppTOML", reason = reason_pkg_required()) rlang::check_installed("xfun", reason = reason_pkg_required()) if (from_file) { checkmate::assert_file_exists(input, access = "r") } else { checkmate::assert_character(input) # reduce to character scalar since `RcppTOML::parseTOML()` doesn't accept vectors input %<>% paste0(collapse = "\n") } xfun::as_strict_list(RcppTOML::parseTOML(input = input, verbose = verbose, fromFile = from_file, escape = FALSE)) }
toml_validate
NOTES:
An alternative to the Rust-based external taplo
CLI would be to rely on the R package jsonvalidate that wraps the
JavaScript tool Ajv. The main drawback is that it doesn't feature native TOML support, meaning only JSON files can be directly
validated. So we'd need to convert the TOML to JSON before validation -- which a) isn't possible in a strictly lossless manner due to the additional native
date(time) types formats TOML supports as well as serialization ambuigity when
the conversion is done via an intermediary R representation (RcppTOML::parseTOML()
can only convert TOML to R lists, not directly to JSON, but R doesn't have
a native atomic type, only vectors, thus the "atomicness" information is lost when converting R lists to JSON), and b) could fail for an invalid TOML file.
After succesful conversion, validation errors would then refer to the JSON representation which obviously is less useful. Also, Ajv is notably slower than Taplo
(which is built in Rust).
The main advantage of using jsonvalidate is support for JSON schema formats greater than Draft 4 (the highest version taplo currently supports).
Example code to validate the simple.toml
qstnr survey configuration using jsonvalidate:
jsonvalidate::json_validate( json = '{ "qstnr": { "items": { "source": [ { "list": "qstnr:::defaults$items" }, { "list": "list(ballot_date = list(name = list(en = \'ballooon day\')))" } ], "ids": { "official_register_data": [ "ballot_date", "ballot_type", "first_name", "middle_name", "last_name", "street", "street_number", "zip_code", "place" ], "personal_data": [ "year_of_birth", "sex", "cantonal_att achment" ] } } } }', schema = "/home/salim/Code/own/rpkg.dev/qstnr/inst/json-schema/qstnr-survey.schema.json", engine = "ajv", error = TRUE, # set to FALSE in order to get the `errors` tibble below verbose = TRUE ) %T>% { attr(., "errors") |> tibble::as_tibble() }
#' Validate TOML #' #' Validates a TOML file or character vector using the external [Taplo CLI](https://taplo.tamasfe.dev/cli/introduction.html), optionally against a [JSON #' Schema](https://json-schema.org/) ([Draft 4](https://json-schema.org/specification-links.html#draft-4)). #' #' The highest supported JSON Schema specification is [Draft 4](https://json-schema.org/specification-links.html#draft-4). This is a [limitation of #' the underlying tool Taplo](https://taplo.tamasfe.dev/configuration/developing-schemas.html). #' #' Taplo allows to define the schema to be used directly in the TOML file using the [`schema` header #' directive](https://taplo.tamasfe.dev/configuration/directives.html#the-schema-directive). Note that the `schema` argument has precendence unless set to #' `NULL` (the default). #' #' # Why JSON Schema-based validation #' #' Although there are two noteworthy attempts at introducing a native validation format for TOML, [TOLS](https://github.com/toml-lang/toml/pull/116/) and [TOML #' Schema](https://github.com/toml-lang/toml/issues/792), neither of them has been officially adopted yet. As it appears, it could take several more years to #' decades until such thing happens, if ever. #' #' In the meantime, we can use the JSON Schema format as an alternative thanks to an awesome implementation by the [Taplo #' CLI](https://taplo.tamasfe.dev/cli/introduction.html), which itself is written in the Rust programming language and [available as a single-binary program for #' all common platforms](https://taplo.tamasfe.dev/cli/installation/binary.html). #' #' @inheritParams toml_read #' @param schema URL to a [JSON Schema](https://json-schema.org/) ([Draft 4](https://json-schema.org/specification-links.html#draft-4)) file to validate `input` #' against. Can also be a local filesystem path specified in the [file URI scheme](https://en.wikipedia.org/wiki/File_URI_scheme) (absolute path prefixed with #' `file://`). If `NULL`, no schema-based validation is performed and `input` is only checked to be TOML-compliant. #' @param top_errors_only Whether to reduce the output to the top error message of each of TOML CLI's error classes. If `FALSE`, TOML CLI's complete error #' output is shown. #' #' @return If the validation is successful, `input` invisibly. Otherwise an error is thrown. #' @family toml #' @export #' #' @examples #' try( #' pal::toml_validate(input = "key = tru", #' from_file = FALSE) #' ) toml_validate <- function(input, from_file = TRUE, schema = NULL, top_errors_only = TRUE) { checkmate::assert_flag(from_file) checkmate::assert_flag(top_errors_only) checkmate::assert_string(schema, null.ok = TRUE) if (!is.null(schema) && !is_url(schema)) { cli::cli_abort(paste0("{.arg schema} must be a valid URL. To refer to a (local) filesystem path, prepend it with the {.href [`file://` URI ", "scheme](https://en.wikipedia.org/wiki/File_URI_scheme)}.")) } if (from_file) { checkmate::assert_file_exists(input, access = "r") } else { checkmate::assert_character(input) } assert_cli(cmd = "taplo", error_msg = paste0("The {.strong taplo} executable is required but couldn't be found on system's {.href ", "[PATH](https://en.wikipedia.org/wiki/PATH_(variable))}. Binaries of the {.href [Taplo ", "CLI](https://taplo.tamasfe.dev/cli/introduction.html)} are available for all common platforms from here: ", "{.url https://taplo.tamasfe.dev/cli/installation/binary.html}")) result <- suppressWarnings(system2(command = "taplo", args = c("lint", "--no-auto-config", "--colors=always", paste0("--schema=", schema)[!is.null(schema)], ifelse(from_file, input, "-")), stdout = TRUE, stderr = TRUE, input = if (from_file) NULL else input, # cf. https://taplo.tamasfe.dev/cli/usage/configuration.html env = "RUST_LOG=error", timeout = 5L)) result_excl_ansi <- cli::ansi_strip(result) if (length(result) > 1L && stringr::str_detect(string = result_excl_ansi[1L], pattern = "(?i)error")) { # only retain first Taplo error of each class (later ones are usually unhelpful) ix_error <- result_excl_ansi |> stringr::str_detect(pattern = "^(?i)error:") |> which() ix_error_begin <- result_excl_ansi |> stringr::str_detect(pattern = "^(?i)error:") |> magrittr::and(!duplicated(result_excl_ansi)) |> which() if (top_errors_only && length(ix_error) > 0L) { i_error_end <- result_excl_ansi |> stringr::str_detect(pattern = "^ERROR ") |> which() |> dplyr::first() ix_final <- ix_error_begin |> purrr::map(\(x) { i_error_current <- which(ix_error == x) has_next <- i_error_current < length(ix_error) seq(from = x, to = ifelse(has_next, ix_error[i_error_current + 1L] - 1L, i_error_end - 1L)) }) |> purrr::list_c(ptype = integer()) } else { ix_final <- seq_along(result) } # we avoid `cli::cli_abort()` for now since it always strips consecutive whitespaces, even non-breaking ones # cf. https://github.com/r-lib/cli/issues/531#issuecomment-1292639286 rlang::abort(message = c(ifelse(from_file, cli::format_inline("Validation of {.file {input}} failed with"), "TOML validation failed with:"), # turning it into a named vctr avoids the default bullets " " = "", result[ix_final], ""), use_cli_format = FALSE) } invisible(input) }
test_cli
#' Check if CLI tool is available #' #' Tests whether a [command-line interface (CLI)](https://en.wikipedia.org/wiki/Command-line_interface) tool is found on the system's #' [`PATH`](https://en.wikipedia.org/wiki/PATH_(variable)) and optionally returns the executable's filesystem path. #' #' @param cmd System command to invoke the CLI tool. A character scalar. #' @param get_cmd_path Whether or not to return the filesystem path to the CLI tool. If `FALSE`, a boolean is returned indicating if the CLI tool is found on #' the system or not. #' @param force_which If set to `TRUE`, [base::Sys.which()], which relies on the system command `which`, will be used instead of `command -v` to determine the #' availability of `cmd` on Unix-like systems. On Windows, `base::Sys.which()` is used in any case. `command -v` is #' [generally recommended for bourne-like shells](https://unix.stackexchange.com/q/85249/201803) and therefore is the default on Linux, macOS and other #' [Unixes](https://en.wikipedia.org/wiki/Unix-like). #' #' @return A logical scalar if `get_cmd_path = FALSE`, otherwise the filesystem [path][fs::path] to the `cmd` executable. #' @family sys #' @export #' #' @examples #' pal::test_cli("Rscript") #' #' cmd <- ifelse(checkmate::test_os("windows"), "pandoc.exe", "pandoc") #' pal::test_cli(cmd, get_cmd_path = TRUE) test_cli <- function(cmd, get_cmd_path = FALSE, force_which = FALSE) { checkmate::assert_string(cmd) checkmate::assert_flag(get_cmd_path) checkmate::assert_flag(force_which) if (force_which || checkmate::test_os("windows")) { result <- Sys.which(names = cmd) |> as.character() |> when(. == "" ~ character(), ~ .) |> when(get_cmd_path ~ fs::path(.), length(.) == 0L ~ FALSE, ~ TRUE) } else { # define "defused" warning/error handler defuse <- function(e) if (get_cmd_path) character() else FALSE result <- tryCatch(expr = system2(command = "command", args = c("-v", cmd), stdout = get_cmd_path, stderr = get_cmd_path), warning = defuse, error = defuse) |> when(get_cmd_path ~ fs::path(.), isFALSE(.) ~ ., ~ TRUE) } result }
assert_cli
#' Assert CLI tool is available #' #' Asserts that a [command-line interface (CLI)](https://en.wikipedia.org/wiki/Command-line_interface) tool is found on the system's #' [`PATH`](https://en.wikipedia.org/wiki/PATH_(variable)) and returns the executable's filesystem path. #' #' @inheritParams test_cli #' @param error_msg Error message to print in case `cmd` is not found on system's `PATH`. `r pkgsnip::param_lbl("cli_markup_support")` A character scalar. #' #' @return If the CLI tool is available on the system's `PATH`, its filesystem path, invisibly. Otherwise, an error is thrown. #' @family sys #' @family checkmate #' @export #' #' @examples #' pal::assert_cli("Rscript") #' #' cmd <- ifelse(checkmate::test_os("windows"), "pandoc.exe", "pandoc") #' pal::assert_cli(cmd) assert_cli <- function(cmd, error_msg = paste0("The {.strong {cmd}} executable is required but couldn't be found on ", "system's {.href [PATH](https://en.wikipedia.org/wiki/PATH_(variable))}."), force_which = FALSE) { checkmate::assert_string(error_msg) cli_path <- test_cli(cmd = cmd, get_cmd_path = TRUE, force_which = force_which) if (length(cli_path) == 0L) { cli::cli_abort(error_msg) } invisible(cli_path) }
path_script
This function is inspired by an answer from Stack Overflow user Jerry T.
TODO:
whereami::whereami()
and
whereami::thisfile
which seem to offer exactly this functionality.#' Determine file path of executing script #' #' Tries to determine the path to the R/Rmd script that this function is called from. #' #' @return The file path to the executing script. #' @family sys #' @export path_script <- function() { rlang::check_installed("rprojroot", reason = reason_pkg_required()) rlang::check_installed("rstudioapi", reason = reason_pkg_required()) cmd_args <- commandArgs(trailingOnly = FALSE) needle <- "--file=" match <- grep(x = cmd_args, pattern = needle) # Rscript if (length(match) > 0L) { return(normalizePath(sub(needle, "", cmd_args[match]))) } # `source()`d via R console if (!is.null(sys.frames()[[1L]][["ofile"]])) { return(normalizePath(sys.frames()[[1L]][["ofile"]])) } # RStudio Run Selection, cf. http://stackoverflow.com/a/35842176/2292993 if (!is.null(rprojroot::thisfile())) { return(rprojroot::thisfile()) } # RStudio document path <- rstudioapi::getActiveDocumentContext()[["path"]] if (path != "") { return(normalizePath(path)) } cli::cli_abort("Couldn't determine script path.") }
capture_print
#' Capture printed console output as string #' #' Returns what [`print(x)`][base::print()] would output on the console – if `collapse` is set to anything other than `NULL`, as a character scalar #' (i.e. a string), otherwise as a character vector of output lines. #' #' This is a simple convenience wrapper around [utils::capture.output()]. Note that [ANSI escape sequences](https://en.wikipedia.org/wiki/ANSI_escape_code) #' (e.g. as output by the `print()` methods of tidyverse packages) are included in the result. To remove them, use [cli::ansi_strip()]. #' #' @param x \R object of which the output of `print()` is to be captured. #' @param collapse Optional string for concatenating the results. If `NULL`, a character vector of print lines is returned. #' #' @return A character vector if `collapse = NULL`, otherwise a character scalar. #' @export #' #' @examples #' mtcars |> pal::capture_print() #' mtcars |> pal::capture_print(collapse = "\n") |> cat() #' #' # to strip ANSI escape sequences, use `cli::ansi_strip()` #' mtcars |> tibble::as_tibble() #' #' mtcars |> #' tibble::as_tibble() |> #' pal::capture_print(collapse = "\n") |> #' cli::ansi_strip() |> #' cat() capture_print <- function(x, collapse = NULL) { utils::capture.output(print(x), file = NULL, type = "output", split = FALSE) |> paste0(collapse = collapse) }
cat_lines
#' Convert to character vector and print newline-separated #' #' Convenience wrapper around [as_chr()] and [base::cat()], mainly intended for interactive use. #' #' @param ... \R object(s) to convert to character and print. `r pkgsnip::roxy_lbl("dyn_dots_support")` #' #' @inherit base::cat return #' @seealso #' [cli::cat_line()] for a faster alternative that doesn't _recursively_ convert its input to type character. #' #' [xfun::raw_string()] (and [xfun::file_string()]) for an alternative approach to the same use case (but without any conversion to type character at all). #' #' [`writeLines(con = stdout())`][base::writeLines] for a base R alternative that *only* accepts character inputs. #' @export #' #' @examples #' fs::path_package(package = "pal", #' "rstudio", "addins.dcf") |> #' readr::read_lines() |> #' pal::cat_lines() #' #' # conversion to type character, recursive vs. non-recursive #' to_convert <- list(tibble::tibble(a = 1:3), "A", factor("wonderful"), xfun::strict_list("day")) #' #' to_convert |> pal::cat_lines() #' to_convert |> cli::cat_line() #' #' # this OTOH only accepts chr inputs #' try( #' to_convert |> writeLines() #' ) cat_lines <- function(...) { cat(as_chr(...), sep = "\n") }
cols_regex
NOTES:
The "real" data in the examples stems from here.
Function has been proposed upstream in PR #1112 but was rejected.
#' Create [readr][readr::readr-package] column specification using regular expression matching #' #' Allows to define a regular expression per desired [column specification object][readr::cols] matching the respective column names. #' #' @param ... Named arguments where the names are (Perl-compatible) regular expressions and the values are column objects created by `col_*()`, or their #' abbreviated character names (as described in the `col_types` parameter of [readr::read_delim()]). `r pkgsnip::roxy_lbl("dyn_dots_support")` #' @param .col_names Column names which should be matched by `...`. #' @param .default Any named columns not matched by any of the regular expressions in `...` will be read with this column type. #' #' @return A [column specification][readr::cols]. #' @export #' #' @examples #' library(magrittr) #' #' # for some hypothetical CSV data column names like these... #' col_names <- c("VAR1_Text", #' "VAR2_Text", #' "VAR3_Text_Other", #' "VAR1_Code_R1", #' "VAR2_Code_R2", #' "HAS_R1_Lag", #' "HAS_R2_Lag", #' "GARBAGEX67", #' "GARBAGEY09") #' #' # ...a column spec could be created concisely as follows: #' col_regex <- list("_Text(_|$)" = "c", #' "_Code(_|$)" = "i", #' "^GARBAGE" = readr::col_skip()) #' #' pal::cols_regex(.col_names = col_names, #' !!!col_regex, #' .default = "l") #' #' # we can parse some real data: #' url <- "https://salim_b.gitlab.io/misc/Kantonsratswahl_Zuerich_2019_Ergebnisse_Gemeinden.csv" #' #' raw_data <- #' httr2::request(url) |> #' httr2::req_perform() |> #' httr2::resp_body_string() #' #' col_spec <- pal::cols_regex("^(Gemeindenamen|Liste|Wahlkreis)$" = "c", #' "(?i)anteil" = "d", #' .default = "i", #' .col_names = pal::dsv_colnames(raw_data)) #' #' print(col_spec) #' #' readr::read_csv(file = raw_data, #' col_types = col_spec) #' #' # to process the same data without first downloading it to disk, use `readr::type_convert()`: #' readr::read_csv(file = url, #' col_types = list(.default = "c")) %>% #' readr::type_convert(col_types = col_spec) cols_regex <- function(..., .col_names, .default = readr::col_character()) { rlang::check_installed("readr", reason = reason_pkg_required()) spec <- list() patterns <- rlang::list2(...) if (length(setdiff(names(patterns), "")) < length(patterns)) { cli::cli_abort("All column specifications in {.arg ...} must be named by a regular expression.") } for (i in seq_along(patterns)) { matched_vars <- grep(x = .col_names, pattern = names(patterns[i]), value = TRUE, perl = TRUE) spec <- rep(list(patterns[[i]]), length(matched_vars)) |> magrittr::set_names(matched_vars) %>% c(spec, .) } spec <- c(spec, alist(.default = .default)) do.call(readr::cols, spec) }
mime_to_ext
#' MIME type to file extension #' #' Determines a suitable file extension from a [MIME type](https://en.wikipedia.org/wiki/Media_type), based i.a. on [`mime::mimemap`][mime::mimemap] and #' `mime:::mimeextra`. In case of multiple matches, the first one is returned and a warning is printed (unless `quiet = TRUE`). #' #' @param mime_type MIME type to determine the file extension for. A character scalar. #' @param quiet `r pkgsnip::param_lbl("quiet")` #' #' @return A character scalar. #' @export #' #' @examples #' pal::mime_to_ext("application/json") #' pal::mime_to_ext("audio/mpeg") mime_to_ext = function(mime_type, quiet = FALSE) { checkmate::assert_string(mime_type) i <- which(mime_types_exts %in% mime_type) result <- names(mime_types_exts[i]) if (length(i) == 0L) { return(NA_character_) } if (!quiet && length(i) > 1L) { cli::cli_warn("The MIME type {.val {mime_type}} maps to multiple file extensions ({.val {result}}). Only the first one is returned.") } result[1L] }
rename_from
NOTES:
This function is \~3x faster than a similar dplyr::rename(!!!list(...)
:
r
bench::mark(pal = mtcars |> pal::rename_from(dict = list(mpg = "Miles/(US) gallon",
cyl = "Number of cylinders",
disp = "Displacement (cu.in.)",
hp = "Gross horsepower",
drat = "Rear axle ratio",
wt = "Weight (1000 lbs)",
qsec = "1/4 mile time",
vs = "Engine (0 = V-shaped, 1 = straight)",
am = "Transmission (0 = automatic, 1 = manual)",
gear = "Number of forward gears",
carb = "Number of carburetors")),
dplyr = mtcars |> dplyr::rename(!!!list("Miles/(US) gallon" = "mpg",
"Number of cylinders" = "cyl",
"Displacement (cu.in.)" = "disp",
"Gross horsepower" = "hp",
"Rear axle ratio" = "drat",
"Weight (1000 lbs)" = "wt",
"1/4 mile time" = "qsec",
"Engine (0 = V-shaped, 1 = straight)" = "vs",
"Transmission (0 = automatic, 1 = manual)" = "am",
"Number of forward gears" = "gear",
"Number of carburetors" = "carb")))
#' Rename elements from dictionary #' #' Renames the elements of a vector or list from a dictionary, leaving unmatched names untouched by default. #' #' @param x Object whose elements are to be renamed. #' @param dict A named character vector or list of strings where names are the old and the values are the new names. #' @param default Value(s) used when names aren't matched by any name in `dict`. Recycled to the length of `x`. #' #' @return `x` with elements renamed according to `dict`. #' @export #' #' @examples #' mtcars |> pal::rename_from(dict = c(mpg = "Miles/(US) gallon", #' cyl = "Number of cylinders", #' disp = "Displacement (cu.in.)", #' hp = "Gross horsepower", #' drat = "Rear axle ratio", #' wt = "Weight (1000 lbs)", #' qsec = "1/4 mile time", #' vs = "Engine (0 = V-shaped, 1 = straight)", #' am = "Transmission (0 = automatic, 1 = manual)", #' gear = "Number of forward gears", #' carb = "Number of carburetors", #' not_there = "Yikes!")) rename_from <- function(x, dict, default = names(x)) { checkmate::assert_named(x) if (is.list(dict)) { checkmate::assert_list(dict, types = "character", any.missing = FALSE, names = "unique") } else { checkmate::assert_vector(dict, strict = TRUE, any.missing = FALSE, names = "unique") } if (length(x) > 0L) { checkmate::assert_character(default) dict %<>% purrr::imap(\(x, name) rlang::new_formula(lhs = name, rhs = x, env = emptyenv())) names_new <- dplyr::case_match(names(x), !!!dict, .default = default) return(magrittr::set_names(x, names_new)) } else { return(x) } }
arrange_by
#' Sort vector by another vector #' #' Arranges a vector `x` by the order of another vector `by`. #' #' Note that this function significantly differs from [base::sort_by()]. #' #' @param x Vector to be ordered. #' @param by Reference vector which `x` will be ordered by. #' #' @return A permutation of `x`. #' @export #' #' @examples #' library(magrittr) #' #' # generate 100 random letters #' random_letters <- #' letters |> #' magrittr::extract(sample.int(n = 26L, #' size = 100L, #' replace = TRUE)) %T>% #' print() #' #' # sort the random letters alphabetically #' random_letters |> pal::arrange_by(by = letters) arrange_by <- function(x, by) { x[order(match(x = x, table = by))] }
when
#' Generalized `if`-`else` for pipes #' #' A flavour of pattern matching (or an if-else abstraction) in which a value `.` is matched against a sequence `...` of condition-action sets. When a valid #' match/condition is found, the action is executed and the result of the action is returned. #' #' Condition-action sets are written as formulas with conditions as left-hand sides (LHS) and actions as right-hand sides (RHS). A formula with only a #' right-hand side will be treated as a condition which is always satisfied. For such a default case one can also omit the `~` symbol, but note that its value #' will then be evaluated early. Any named argument will be made available in all conditions and actions, which is useful in avoiding repeated temporary #' computations or temporary assignments. # #' Validity of the conditions are tested with [base::isTRUE()]. In other words conditions resulting in more than one logical will never be valid. Note that the #' input value is always treated as a single object, as opposed to the [base::ifelse()] function. #' #' This function is copied over from package purrr since it [has been deprecated with the release of purrr #' 1.0](https://www.tidyverse.org/blog/2022/12/purrr-1-0-0/#core-purpose-refinements). `pal::when()` can be used as a drop-in replacement for `purrr::when()`. #' All credits go to the original authors of `purrr::when()` [as of #' 2022-12-21](https://github.com/tidyverse/purrr/blame/54546b6bf938c54da4d232061db2fc91b1d5923c/R/deprec-when.R). #' #' @param . Value to match against. #' @param ... Formulas, each containing a condition as LHS and an action as RHS. Named arguments will define additional values. #' `r pkgsnip::roxy_lbl("dyn_dots_support")` #' #' @return The value resulting from the action of the first matched condition, or `NULL` if no matches are found and no default is given. #' @export #' #' @examples #' 1:10 |> pal::when(sum(.) <= 50 ~ sum(.), #' sum(.) <= 100 ~ sum(.)/2L, #' ~ 0L) when <- function(., ...) { dots <- rlang::list2(...) names <- names(dots) named <- if (is.null(names)) rep(FALSE, length(dots)) else names != "" if (sum(!named) == 0L) { cli::cli_abort("At least one matching condition is needed.") } is_formula <- vapply(dots, function(dot) identical(class(dot), "formula"), logical(1L)) env <- new.env(parent = parent.frame()) env[["."]] <- . if (sum(named) > 0L) { for (i in which(named)) { env[[names[i]]] <- dots[[i]] } } result <- NULL for (i in which(!named)) { if (is_formula[i]) { action <- length(dots[[i]]) if (action == 2L || isTRUE(eval(dots[[i]][[2L]], env, env))) { result <- eval(dots[[i]][[action]], env, env) break } } else { result <- dots[[i]] } } result }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.