# DO NOT EDIT THIS FILE BY HAND! Instead edit the R Markdown source file `Rmd/pal.Rmd` and run `pkgpurl::purl_rmd()`.
# See `README.md#r-markdown-format` for more information on the literate programming approach used applying the R Markdown format.
# pal: Friendly Convenience/Utility Functions
# Copyright (C) 2025 Salim Brüggemann
#
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
utils::globalVariables(names = c(".",
# tidyselect fns
"any_of",
"everything",
"where",
# other
"data",
"default_value",
"default_value_dynamic",
"env_var",
"key",
"package",
"Package",
"r_opt",
"repository",
"rowid",
"type",
"Version",
"X1",
"X2"))
# forbidden dots arguments
forbidden_dots <- list(roxy_tag_value = c("pkgs",
"destdir",
"available",
"type",
"quiet"))
as_env_var_name <- function(...) {
as_str(...,
sep = "_") |>
stringr::str_replace_all(pattern = "\\W",
replacement = "_") |>
stringr::str_replace(pattern = "^(\\d)",
replacement = "_\\1") |>
toupper()
}
get_pkg_config <- function(pkg) {
checkmate::assert_string(pkg)
if (!exists_in_namespace(x = "pkg_config",
ns = pkg)) {
cli::cli_abort(paste0("Package {.pkg {pkg}} has not defined the required package configuration metadata in its namespace (as object {.var pkg_config}). ",
"See the {.emph Details} section of {.help pal::pkg_config_val} for more information."))
}
pkg_config <- utils::getFromNamespace(x = "pkg_config",
ns = pkg)
checkmate::assert_data_frame(pkg_config,
col.names = "unique")
if (!("key" %in% colnames(pkg_config) && any(c("default_value", "default_value_dynamic") %in% colnames(pkg_config)))) {
cli::cli_abort("{.code {pkg}:::pkg_config} must at minimum contain the columns {.var key} and {.var default_value} or {.var default_value_dynamic}.")
}
# ensure/complement df structure
pkg_config |>
vctrs::tib_cast(tibble::tibble(key = character(),
default_value = list(),
default_value_dynamic = character(),
require = logical(),
description = character())) |>
tidyr::replace_na(replace = list(require = TRUE))
}
get_pkg_config_val_default <- function(key,
pkg_config,
env = parent.frame()) {
key <- rlang::arg_match0(key,
values = pkg_config$key)
data <- pkg_config |> dplyr::filter(key == !!key)
if (is.na(data$default_value_dynamic)) {
result <- Reduce(x = data$default_value,
f = c)
} else {
# ensure only one of `default_value_dynamic` and `default_value` is set
if (!is.null(data$default_value[[1L]])) {
cli::cli_abort(paste0("Only one of {.var default_value} and {.var default_value_dynamic} can be set in {.var pkg_config} for a specific {.var key}, but ",
"for {.field {key}} both are."))
}
result <- eval(expr = parse(text = data$default_value_dynamic),
envir = env)
}
result
}
has_root <- function(criterion,
path,
check_parent_dirs) {
checkmate::assert_class(criterion,
classes = "root_criterion")
checkmate::assert_flag(check_parent_dirs)
rlang::check_installed("rprojroot",
reason = reason_pkg_required())
checkmate::assert_directory_exists(path,
access = "r")
if (check_parent_dirs) {
result <- tryCatch(
expr = {
rprojroot::find_root(criterion = criterion,
path = path)
TRUE
},
error = \(e) FALSE
)
} else {
result <-
criterion$testfun |>
purrr::map_lgl(\(x) x(path = path)) |>
any()
}
result
}
pkg_config_env_var_name <- function(pkg,
key) {
as_env_var_name("R", pkg, key)
}
pkg_config_opt_name <- function(pkg,
key) {
paste(pkg, key,
sep = ".")
}
is_heading_node <- function(xml_node) {
xml2::xml_name(xml_node) == "heading"
}
node_heading_lvl <- function(xml_node) {
xml_node |>
xml2::xml_attr(attr = "level") |>
as.integer()
}
subnode_ix <- function(xml_nodes,
i) {
i_node <- i
i <- i + 1L
ix_subnodes <- integer()
heading_lvl_node <- node_heading_lvl(xml_nodes[i_node])
is_subnode <- is_heading_node(xml_nodes[i_node]) && i <= length(xml_nodes)
while (is_subnode) {
is_subnode <- !is_heading_node(xml_nodes[i]) || isTRUE(node_heading_lvl(xml_nodes[i]) > heading_lvl_node)
ix_subnodes %<>% c(i[is_subnode])
i <- i + 1L
if (i > length(xml_nodes)) is_subnode <- FALSE
}
ix_subnodes
}
#' Generate an integer sequence of specific length (safe)
#'
#' Modified version of [base::seq_len()] that returns a zero-length integer in case of a zero-length input instead of throwing an error.
#'
#' @param n Desired length of the integer sequence.
#'
#' @return An integer sequence starting from `1L`.
#' @family stat
#' @export
#'
#' @examples
#' pal::safe_seq_len(5)
#'
#' # this function simply returns a zero-length integer for zero-length inputs...
#' pal::safe_seq_len(NULL)
#' pal::safe_seq_len(integer())
#'
#' # ...while `seq_len()` throws an error
#' try(seq_len(NULL))
#' try(seq_len(integer()))
safe_seq_len <- function(n) {
if (length(n) > 0L) {
return(seq_len(n))
} else {
return(integer())
}
}
#' Maximum (safe)
#'
#' @description
#' Modified version of [base::max()] that differs in the following ways:
#'
#' - `NA`s in the input (`...`) are ignored _by default_ (`rm_na = TRUE`).
#' - If the input is of length zero, the output will also be of length zero (of the same type as the input).
#' - It is ensured that all inputs are either numeric, of length zero or `NA`. The only case where the return value will be `-Inf` or `NA` is when the input
#' contains only `-Inf` or `NA`.
#' - `r pkgsnip::roxy_lbl("dyn_dots_support")`
#'
#' @param ... Numeric objects of which to determine the maximum. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#' @param rm_na Ignore missing values in `...`. If missing values are present and `rm_na = FALSE`, the result will always be `NA`.
#'
#' @return A numeric scalar or empty value, of the same type as `...`.
#' @family stat
#' @export
#'
#' @examples
#' # other than `base::max()`, this function removes `NA`s by default
#' max(1, NA_real_, 2)
#' pal::safe_max(1, NA_real_, 2)
#'
#' # other than `base::max()`, this function does not return `-Inf` or `NA_character_` for
#' # zero-length inputs
#' max(NULL)
#' max(integer())
#' pal::safe_max(NULL)
#' pal::safe_max(integer())
#'
#' # other than `base::max()`, this function fails for non-numeric inputs
#' max("zero", 1L)
#' max("zero", "one")
#' max(character())
#' try(pal::safe_max("zero", 1L))
#' try(pal::safe_max("zero", "one"))
#' try(pal::safe_max(character()))
safe_max <- function(...,
rm_na = TRUE) {
input <- rlang::list2(...)
purrr::map(input,
\(x) checkmate::assert_numeric(x,
typed.missing = TRUE,
null.ok = TRUE,
.var.name = "..."))
input |>
purrr::reduce(c) |>
when(length(.) == 0L ~ .[0L],
all(is.na(.)) && rm_na ~ .[NA],
~ max(., na.rm = rm_na))
}
#' Minimum (safe)
#'
#' @description
#' Modified version of [base::min()] that differs in the following ways:
#'
#' - `NA`s in the input (`...`) are ignored _by default_ (`rm_na = TRUE`).
#' - If the input is of length zero, the output will also be of length zero (of the same type as the input).
#' - It is ensured that all inputs are either numeric, of length zero or `NA`. The only case where the return value will be `-Inf` or `NA` is when the input
#' contains only `-Inf` or `NA`.
#' - `r pkgsnip::roxy_lbl("dyn_dots_support")`
#'
#' @inheritParams safe_max
#' @param ... Numeric objects of which to determine the minimum. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#'
#' @inherit safe_max return
#' @family stat
#' @export
#'
#' @examples
#' # other than `base::min()`, this function removes `NA`s by default
#' min(1, NA_real_, 2)
#' pal::safe_min(1, NA_real_, 2)
#'
#' # other than `base::min()`, this function does not return `-Inf` or `NA_character_` for
#' # zero-length inputs
#' min(NULL)
#' min(integer())
#' pal::safe_min(NULL)
#' pal::safe_min(integer())
#'
#' # other than `base::min()`, this function fails for non-numeric inputs
#' min("zero", 1L)
#' min("zero", "one")
#' min(character())
#' try(pal::safe_min("zero", 1L))
#' try(pal::safe_min("zero", "one"))
#' try(pal::safe_min(character()))
safe_min <- function(...,
rm_na = TRUE) {
input <- rlang::list2(...)
purrr::map(input,
\(x) checkmate::assert_numeric(x,
typed.missing = TRUE,
null.ok = TRUE,
.var.name = "..."))
input |>
purrr::reduce(c) |>
when(length(.) == 0L ~ .[0L],
all(is.na(.)) && rm_na ~ .[NA],
~ min(., na.rm = rm_na))
}
#' Round to any number
#'
#' Round a numeric vector to any number, rounded up by default (`round_up = TRUE`).
#'
#' This function's precision is limited to 15 significant digits in order to account for the [limits of R's floating point
#' representation](https://stackoverflow.com/a/35349949/7196903).
#'
#' A computationally more efficient alternative would be the unexported `scales:::round_any()` which drives [scales::number()] – with the drawback that it lacks
#' control of rounding up exact remainders of `accuracy / 2`, i.e. it _always_ rounds _off_.
#'
#' @param x Vector of numbers to round.
#' @param to Number to round `x` to. A numeric scalar.
#' @param round_up Whether to round a remainder of exactly `to / 2` _up_ or not. Set to `FALSE` in order to round _off_.
#'
#' @return A numeric vector of the same length as `x`.
#' @seealso [prettyunits::pretty_round()]
#' @family stat
#' @export
#'
#' @examples
#' vals = c(0.025, 0.1, 0.1999, 0.099999, 0.49, 0.55, 0.5, 0.9, 1)
#' vals |> pal::round_to(to = 0.05)
#' vals |> pal::round_to(to = 0.05,
#' round_up = FALSE)
round_to <- function(x,
to = 0.2,
round_up = TRUE) {
checkmate::assert_number(to,
lower = 0L,
finite = TRUE)
checkmate::assert_flag(round_up)
result <- x %/% to
remainder <- signif(x %% to,
# round to a max of 15 significant digits to avoid exceeding floating-point representation limits
digits = 15L)
if (round_up) {
which_round <- remainder >= (to / 2L)
} else {
which_round <- remainder > (to / 2L)
}
result[which_round] <- result[which_round] + 1L
result * to
}
#' Statistical mode
#'
#' Computes the [statistical mode](https://en.wikipedia.org/wiki/Mode_(statistics)) of a set of values. The mode is defined as the most frequent value, i.e. the
#' value that is most likely to be sampled.
#'
#' @param x An \R object.
#' @param type What the function should calculate.
#' - `"one"`: Return _the_ mode of `x`. If multiple modes or no mode at all exists, `NA` is returned.
#' - `"all"`: Return _all_ modes of `x`. If none exists (e.g. because all values of `x` are distinct), `NA` is returned.
#' - `"n"`: Return the number of modes of `x`.
#' @param rm_na Ignore missing values in `x`. A logical scalar.
#'
#' @return If `type = "n"`, the number of modes in `x` (an integer). Otherwise, the mode(s) of `x` or `NA` if none exist(s) (same type as `x`).
#' @family stat
#' @seealso The package [modeest](https://cran.r-project.org/package=modeest) for more powerful mode estimation functions.
#' @export
#'
#' @examples
#' pal::stat_mode(c(rep(3L, times = 3), 1:9))
#' pal::stat_mode(c(1.5, 4, 9.9))
#'
#' # if no mode exists, `NA` (of the same type as x) is returned
#' pal::stat_mode(letters)
#' pal::stat_mode(c(letters, "a"))
#'
#' # if multiple modes exist, `NA` is returned by default
#' pal::stat_mode(c(letters, "a", "b"))
#' # set `type = "all"` to return all modes instead
#' pal::stat_mode(c(letters, "a", "b"),
#' type = "all")
#'
#' # `NA` is treated as any other value by default
#' pal::stat_mode(c(letters, "a", NA_character_, NA_character_),
#' type = "all")
#' # set `rm_na = TRUE` to ignore `NA` values
#' pal::stat_mode(c(letters, "a", NA_character_, NA_character_),
#' type = "all",
#' rm_na = TRUE)
stat_mode <- function(x,
type = c("one", "all", "n"),
rm_na = FALSE) {
x <- unlist(x)
type <- rlang::arg_match(type)
checkmate::assert_flag(rm_na)
if (rm_na) x <- x[!is.na(x)]
# get unique values
u_x <- unique(x)
n_u_x <- length(u_x)
# get frequencies of all unique values
frequencies <- tabulate(match(x, u_x))
modes <- frequencies == max(frequencies)
# determine number of modes
n_modes <- sum(modes) %>% ifelse(. == n_u_x, 0L, .)
type |> when(
# return the number of modes if requested
. == "n" ~ n_modes,
# or return mode(s) if requested and existing
(. == "one" && n_modes == 1L) || (. == "all" && n_modes > 0L) ~ u_x[which(modes)],
# else return `NA` (of the same type as `x`)
~ x[NA][1L]
)
}
#' Assert data frame columns
#'
#' Asserts that a data frame contains the specified columns.
#'
#' @param data Data frame to check.
#' @param cols Column names which must be present in `data`. A character vector or `NULL`.
#' @param strict Whether additional columns not specified in `cols` are allowed in `data`.
#' @param obj_name Name of the checked object to print in error messages.
#'
#' @return `data`, invisibly.
#' @family df
#' @export
#'
#' @examples
#' pal::assert_cols(data = mtcars,
#' cols = c("mpg", "disp"))
#'
#' try(
#' pal::assert_cols(data = mtcars,
#' cols = c("mpg", "display"))
#' )
#'
#' try(
#' pal::assert_cols(data = mtcars,
#' cols = c("mpg", "disp"),
#' strict = TRUE)
#' )
#'
#' try(
#' pal::assert_cols(data = mtcars,
#' strict = TRUE)
#' )
assert_cols <- function(data,
cols = NULL,
strict = FALSE,
obj_name = checkmate::vname(data)) {
checkmate::assert_character(cols,
null.ok = TRUE)
checkmate::assert_flag(strict)
checkmate::assert_string(obj_name)
checkmate::assert_data_frame(data,
min.cols = length(cols))
# disallow any cols when `strict` without `cols`
if (strict && is.null(cols)) {
cols <- character()
}
checkmate::assert_names(colnames(data),
must.include = if (strict) NULL else cols,
identical.to = if (strict) cols else NULL,
what = "column names",
.var.name = obj_name)
invisible(data)
}
#' Test if two data frames/tibbles are equal
#'
#' Compares two [data frames][base::data.frame()]/[tibbles][tibble::tbl_df] (or two objects coercible to tibbles like [matrices][base::matrix()]), optionally
#' ignoring row and column ordering, and returns `TRUE` if both are equal, or `FALSE` otherwise. If the latter is the case and `quiet = FALSE`, information
#' about detected differences is printed to the console.
#'
#' Under the hood, this function relies on [waldo::compare()].
#'
#' @inheritParams waldo::compare
#' @param x The data frame / tibble to check for changes.
#' @param y The data frame / tibble that `x` should be checked against, i.e. the reference, so messages describe how `x` is different to `y`.
#' @param ignore_col_order Whether or not to ignore the order of columns.
#' @param ignore_row_order Whether or not to ignore the order of rows.
#' @param ignore_col_types Whether or not to distinguish similar column types. Currently, if set to `TRUE`, this will convert factors to characters and integers
#' to doubles before the comparison.
#' @param quiet Whether or not to output detected differences between `x` and `y` to the console.
#' @param max_diffs Maximum number of differences shown. Only relevant if `quiet = FALSE` or `return_waldo_compare = TRUE`. Set `max_diffs = Inf` to see all
#' differences.
#' @param return_waldo_compare Whether to return a character vector of class [`waldo_compare`][waldo::compare] describing the differences between `x` and `y`
#' instead of `TRUE` or `FALSE`.
#'
#' @return If `return_waldo_compare = FALSE`, a logical scalar indicating the result of the comparison. Otherwise a character vector of class
#' [`waldo_compare`][waldo::compare] describing the differences between `x` and `y`.
#' @family df
#' @export
#'
#' @examples
#' scramble <- function(x) x[sample(nrow(x)), sample(ncol(x))]
#'
#' # by default, ordering of rows and columns matters...
#' pal::is_equal_df(x = mtcars,
#' y = scramble(mtcars))
#'
#' # ...but those can be ignored if desired
#' pal::is_equal_df(x = mtcars,
#' y = scramble(mtcars),
#' ignore_col_order = TRUE)
#' pal::is_equal_df(x = mtcars,
#' y = scramble(mtcars),
#' ignore_row_order = TRUE)
#'
#' # by default, `is_equal_df()` is sensitive to column type differences...
#' df1 <- data.frame(x = "a",
#' stringsAsFactors = FALSE)
#' df2 <- data.frame(x = factor("a"))
#' pal::is_equal_df(df1, df2)
#'
#' # ...but you can request it to not make a difference between similar types
#' pal::is_equal_df(df1, df2,
#' ignore_col_types = TRUE)
is_equal_df <- function(x,
y,
ignore_col_order = FALSE,
ignore_row_order = FALSE,
ignore_col_types = FALSE,
tolerance = NULL,
quiet = TRUE,
max_diffs = 10L,
return_waldo_compare = FALSE) {
rlang::check_installed("waldo",
reason = reason_pkg_required())
checkmate::assert_flag(ignore_col_order)
checkmate::assert_flag(ignore_row_order)
checkmate::assert_flag(ignore_col_types)
checkmate::assert_number(tolerance,
lower = 0.0,
null.ok = TRUE)
checkmate::assert_flag(quiet)
checkmate::assert_number(max_diffs,
lower = 1.0)
checkmate::assert_flag(return_waldo_compare)
# convert `x` and `y` to tibble if any modification is required
if (ignore_col_order || ignore_row_order || ignore_col_types) {
x %<>% tibble::as_tibble()
y %<>% tibble::as_tibble()
}
# sort columns if necessary
if (ignore_col_order) {
x %<>% dplyr::select(sort(colnames(.)))
y %<>% dplyr::select(sort(colnames(.)))
}
# sort rows if necessary
if (ignore_row_order) {
x %<>% dplyr::arrange(dplyr::across(.cols = everything()))
y %<>% dplyr::arrange(dplyr::across(.cols = everything()))
}
# harmonize column types if necessary
if (ignore_col_types) {
x %<>% dplyr::mutate(dplyr::across(.cols = where(is.factor),
.fns = as.character),
dplyr::across(.cols = where(is.integer),
.fns = as.double))
y %<>% dplyr::mutate(dplyr::across(.cols = where(is.factor),
.fns = as.character),
dplyr::across(.cols = where(is.integer),
.fns = as.double))
}
result <- waldo::compare(x = x,
y = y,
tolerance = tolerance,
x_arg = "x",
y_arg = "y",
max_diffs = max_diffs)
if (length(result) > 0L) {
if (!quiet) {
cli::cli_alert_info(text = "`x` differs from `y`:")
cat("\n")
print(result)
}
if (!return_waldo_compare) result <- FALSE
} else if (!return_waldo_compare) {
result <- TRUE
}
result
}
#' Reduce a nested list of data frames / tibbles to a single tibble
#'
#' Recursively reduces a nested list containing data frames / tibbles at its leafs to a single tibble.
#'
#' @param x A list containing data frames / tibbles at its leafs.
#' @param strict Ensure `x` contains data frames / tibbles only and throw an error otherwise. If `FALSE`, leafs containing other objects are ignored (skipped).
#'
#' @return `r pkgsnip::return_lbl("tibble")`
#' @family df
#' @export
reduce_df_list <- function(x,
strict = TRUE) {
checkmate::assert_flag(strict)
if (is.data.frame(x) || tibble::is_tibble(x)) {
result <- x
} else if (purrr::pluck_depth(x) < 2L) {
if (strict) {
cli::cli_abort("At least one element of the list to be reduced is not a data frame / tibble!")
} else {
result <- NULL
}
} else {
result <-
x |>
purrr::map(reduce_df_list,
strict = strict) |>
purrr::list_rbind()
}
result
}
#' Convert to a flat list
#'
#' @description
#' _Recursively_ flattens a list. Unlike the similar `unlist()`, it
#'
#' - always returns a list, i.e. wraps `x` in a list if necessary, and will never remove the last list level. Thus it is
#' [type-safe](https://en.wikipedia.org/wiki/Type_safety).
#'
#' - won't treat any of the list leafs specially (like `unlist()` does with factors). Thus leaf values will never be modified.
#'
#' - removes list names. `unlist()` concatenates nested names (separated by a dot).
#'
#' @param x `r pkgsnip::param_lbl("r_obj")`
#' @param keep_attrs Keep [attributes][base::attr()] (and thereby retain list structure of custom objects). A logical scalar.
#' @param attrs_to_drop Attribute names which should never be kept. Only relevant if `keep_attrs = TRUE`. A character vector.
#'
#' @return A [list][base::list()].
#' @family list
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' nested_list <- list(1:3, list("foo", list("bar"))) %T>% str()
#'
#' # unlike `unlist()` which also removes the last list tier in many cases...
#' unlist("foobar")
#' unlist(nested_list) |> str()
#' # ...this function always returns an (unnested) list
#' pal::as_flat_list("foobar") |> str()
#' pal::as_flat_list(nested_list) |> str()
#'
#' nested_list <- list(list(factor("a"), factor("b")), factor("c")) %T>% str()
#'
#' # unlike `unlist()` which combines factors...
#' unlist(nested_list) |> str()
#' # ...this function does not modify the list elements
#' pal::as_flat_list(nested_list) |> str()
#'
#' nested_list <-
#' list(c(list(1L), list(tibble::tibble(a = list(1.1, "2")))),
#' list(tibble::as_tibble(mtcars[1:2, ]))) %T>%
#' str()
#' nested_list_2 <- list(1:3, xfun::strict_list(list(list("buried deep")))) %T>% str()
#'
#' # by default, attributes and thus custom objects (except `xfun_strict_list`) are retained, i.e.
#' # not flattened...
#' pal::as_flat_list(nested_list) |> str()
#' pal::as_flat_list(nested_list_2) |> str()
#' # ...but you can drop them and thereby flatten custom objects if needed...
#' pal::as_flat_list(nested_list, keep_attrs = FALSE) |> str()
#' # ...or retain `xfun_strict_list`s, too
#' pal::as_flat_list(nested_list_2, attrs_to_drop = NULL) |> str()
as_flat_list <- function(x,
keep_attrs = TRUE,
attrs_to_drop = "xfun_strict_list") {
checkmate::assert_flag(keep_attrs)
checkmate::assert_character(attrs_to_drop,
any.missing = FALSE,
null.ok = TRUE)
regard_attrs <- keep_attrs && length(setdiff(attributes(x), attrs_to_drop)) > 0L
depth <- purrr::pluck_depth(x)
# wrap `x` in a list if it's not
if (regard_attrs || (!rlang::is_bare_list(x) && depth < 2L)) {
result <- list(x)
# return `x` as-is if it is an unnested list
} else if (depth < 3L) {
result <- x
# flatten the two last list levels (keeping attributes if requested)
} else if (depth < 4L) {
result <- if (keep_attrs) rm_list_lvl(x, attrs_to_drop = attrs_to_drop) else unname(purrr::list_flatten(as.list(x)))
} else {
# recursively feed the elements of `x` to this function and flatten the two last list levels (keeping attributes if requested)
result <-
x |>
purrr::map(\(x) as_flat_list(x = x,
keep_attrs = keep_attrs,
attrs_to_drop = attrs_to_drop)) |>
when(keep_attrs ~ rm_list_lvl(.,
attrs_to_drop = attrs_to_drop),
~ unname(purrr::list_flatten(.)))
}
result
}
rm_list_lvl <- function(x,
attrs_to_drop = "xfun_strict_list") {
checkmate::assert_list(x)
result <- list()
for (i in seq_along(x)) {
regard_attrs <- length(setdiff(attributes(x[[i]]), attrs_to_drop)) > 0L
if (!regard_attrs && purrr::pluck_depth(x[[i]]) > 1L) {
result %<>% c(x[[i]])
} else {
result %<>% c(list(x[[i]]))
}
}
result
}
#' Convert to a character vector
#'
#' _Recursively_ applies [base::as.character()] to its inputs.
#'
#' @param ... \R objects to be converted to a character vector. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#' @param use_names Whether or not to preserve names by [base::unlist()].
#'
#' @return A character vector.
#' @family string
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' to_convert <-
#' list(tibble::tibble(a = 1:3), "A", factor("wonderful"), xfun::strict_list("day")) %T>%
#' print()
#'
#' as.character(to_convert)
#' pal::as_chr(!!!to_convert)
as_chr <- function(...,
use_names = FALSE) {
rlang::list2(...) |>
purrr::modify_tree(leaf = as.character,
is_node = is.list) |>
unlist(use.names = use_names)
}
#' Escape line feeds / newlines
#'
#' Escapes the [POSIX-standard newline control character `LF`](https://en.wikipedia.org/wiki/Newline) (aka `\n`) which is the standard on Unix/Linux and recent
#' versions of macOS. Set `escape_cr = TRUE` in order to also escape the carriage return character `CR` (aka `\r`) commonly used on Microsoft Windows.
#'
#' @param x A character vector.
#' @param escape_cr Whether or not to also escape the carriage return character `CR` (aka `\r`). A logical scalar.
#'
#' @return A character vector of the same length as `x`.
#' @family string
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' # read in and print RStudio add-in registration file as-is
#' text <-
#' fs::path_package(package = "pal",
#' "rstudio", "addins.dcf") |>
#' readr::read_file() %T>%
#' pal::cat_lines()
#'
#' # escape newlines and print again
#' pal::escape_lf(text) |> pal::cat_lines()
escape_lf <- function(x,
escape_cr = FALSE) {
checkmate::assert_character(x,
null.ok = TRUE)
checkmate::assert_flag(escape_cr)
x %<>% stringr::str_replace_all(pattern = "\\n",
replacement = "\\\\n")
if (escape_cr) {
x %<>% stringr::str_replace_all(pattern = "\\r",
replacement = "\\\\r")
}
x
}
#' Prettify a numeric vector
#'
#' Prettifies a numeric vector by rounding, separating thousands and optionally other procedures. Basically a convenience wrapper around [round_to()] and
#' [base::format()].
#'
#' @inheritParams round_to
#' @param x A numeric vector to prettify.
#' @param round_to Number to round `x` to. A numeric scalar.
#' @param big_mark Character used between every 3 digits to separate thousands.
#' @param decimal_mark Character used to indicate the numeric decimal point. Only relevant if `x` does not solely consist of integers.
#' @param justify_right Whether to right-justify the results to a common width. See the `trim` parameter of [base::format()] for details.
#' @param ... Further arguments passed on to [base::format()].
#'
#' @return A character vector of the same length as `x`.
#' @family string
#' @export
#'
#' @examples
#' c(0.11, 11111.11) |> pal::prettify_nr()
#'
#' c(0.11, 11111.11) |>
#' pal::prettify_nr(justify_right = TRUE) |>
#' pal::cat_lines()
prettify_nr <- function(x,
round_to = 0.1,
round_up = TRUE,
big_mark = "'",
decimal_mark = ".",
justify_right = FALSE,
...) {
round_to(x = x,
to = round_to,
round_up = round_up) |>
format(big.mark = big_mark,
decimal.mark = decimal_mark,
trim = !justify_right,
... = ...)
}
#' Convert to sentence case with trailing punctuation mark
#'
#' Converts the input to a character vector and ensures it starts with an upper case letter and ends with the specified punctuation mark.
#'
#' Note that this function doesn't alter any characters in `x` other than the first and the last one.
#'
#' @param x Input to be converted to [sentence case](https://en.wikipedia.org/wiki/Letter_case#Case_styles), typically a character vector.
#' @param punctuation_mark Punctuation mark to be appended to `x`. A character scalar.
#'
#' @inherit capitalize_first return
#' @family string
#' @seealso
#' [stringr::str_to_sentence()] to convert a character vector to all lowercase except for the first character. Note that this also includes lowercasing [proper
#' nouns](https://en.wikipedia.org/wiki/Proper_and_common_nouns), [abbreviations](https://en.wikipedia.org/wiki/Abbreviation) etc.
#'
#' [snakecase::to_sentence_case()] that builds upon [stringr::str_to_sentence()] but offers additional options to finetune the conversion. Note that
#' `abbreviations` have to be manually specified in order to be preserved in upper case.
#' @export
#'
#' @examples
#' pal::sentenceify("no verb, no sentence")
#'
#' # punctuation mark won't be duplicated if already existing
#' pal::sentenceify(c("I've made my point",
#' "good point."))
sentenceify <- function(x,
punctuation_mark = ".") {
checkmate::assert_string(punctuation_mark)
capitalize_first(x) |>
purrr::map_chr(\(x) {
if (is.na(x) || stringr::str_sub(string = x, start = -1L) == punctuation_mark) {
x
} else {
paste0(x, punctuation_mark)
}
})
}
#' Capitalize first letter
#'
#' Converts the input to a character vector, with the first letter of each element uppercased.
#'
#' @param x Input of which to capitalize the first letter, typically a character vector.
#'
#' @return A character vector of the same length as `x`.
#' @family string
#' @export
#'
#' @examples
#' pal::capitalize_first(c("one", "Two", "tHREE"))
capitalize_first <- function(x) {
stringr::str_replace(string = x,
pattern = "^.",
replacement = toupper)
}
#' Wrap character in character
#'
#' Wraps a character vector `x` in another character vector `wrap` (by default the string `"`).
#'
#' @param x A character vector or something coercible to. Will be fed to [as_chr()] before wrapping.
#' @param wrap Character sequence `x` is to be wrapped in. A character vector or something coercible to.
#'
#' @return A character vector of the same length as `wrap`.
#' @family string
#' @export
#'
#' @examples
#' mtcars |>
#' colnames() |>
#' pal::wrap_chr(wrap = "`") |>
#' magrittr::set_colnames(x = mtcars) |>
#' pal::pipe_table()
wrap_chr <- function(x,
wrap = "\"") {
paste0(wrap, as_chr(x), wrap)
}
#' Convert control character sequence name to actual character sequence
#'
#' @param eol `r pkgsnip::param_lbl("eol")`
#'
#' @return A character scalar.
#' @family string
#' @export
as_line_feed_chr <- function(eol = c("LF", "CRLF", "CR", "LFCR")) {
switch(EXPR = rlang::arg_match(eol),
LF = "\n",
CRLF = "\r\n",
CR = "\r",
LFCR = "\n\r")
}
#' Get column names of a delimiter-separated string
#'
#' Returns the column names of a string in a [delimiter-separated-value](https://en.wikipedia.org/wiki/Delimiter-separated_values) format like
#' [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) or [TSV](https://en.wikipedia.org/wiki/Tab-separated_values).
#'
#' @param x Delimiter-separated string. A character scalar.
#' @param delim Single character used to separate fields within `x`.
#' @param quote Single character used to quote strings within `x`. Set to `NULL` for none.
#'
#' @return A character vector of column names.
#' @family string
#' @export
#'
#' @examples
#' "https://raw.githubusercontent.com/tidyverse/readr/master/inst/extdata/mtcars.csv" |>
#' httr2::request() |>
#' httr2::req_perform() |>
#' httr2::resp_body_string() |>
#' pal::dsv_colnames()
dsv_colnames <- function(x,
delim = ",",
quote = "\"") {
checkmate::assert_string(delim,
min.chars = 1L,
pattern = "^.$")
checkmate::assert_string(quote,
null.ok = TRUE,
pattern = "^.$")
x |>
# extract first line
regexpr(pattern = "[\r\n]") |>
magrittr::subtract(1L) |>
substr(x = x,
start = 1L,
stop = _) |>
stringr::str_split_1(pattern = delim) |>
stringr::str_remove_all(pattern = glue::glue("^{quote}|{quote}$"))
}
#' Convert to a character scalar (aka string)
#'
#' Similar to [`paste0(..., collapse = "")`][base::paste0()], but _recursively_ converts its inputs to type character.
#'
#' @param ... \R objects to be assembled to a single string. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#' @param sep Separator to delimit `...`. Defaults to none (`""`).
#' @param rm_na Exclude missing values. If `FALSE`, missing values will be represented as `"NA"` in the resulting string.
#'
#' @return A character scalar.
#' @family string
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' input <-
#' paste0(2:4,
#' collapse = ", ") |>
#' purrr::map(rep,
#' times = 20) %>%
#' list(c("This is a glut of ", "meaningless numbers: "), .)
#'
#' # while this just converts `input` in a lazy way...
#' paste0(input,
#' collapse = "")
#'
#' # ...this one works harder
#' pal::as_str(input)
as_str <- function(...,
sep = "",
rm_na = FALSE) {
checkmate::assert_flag(rm_na)
result <- as_chr(...)
if (rm_na) {
result %<>% magrittr::extract(!is.na(.))
}
if (length(result) > 0L) {
result %<>% paste0(collapse = sep)
}
result
}
#' Assemble an (R) comment string of the desired line width
#'
#' Takes a vector of paragraphs, wraps them at the specified line width, prefixes them with comment markers and returns the result as a single string.
#'
#' @param ... Comment lines. A character vector or something coercible to.
#' @param line_width Maximum character width at which to wrap lines. A positive integerish scalar.
#' @param comment_prefix Character sequence that indicates the start of a comment. A character scalar.
#' @param sep_paragraphs Whether or not to separate paragraphs with an empty comment line.
#'
#' @return A character scalar.
#' @family string
#' @export
#'
#' @examples
#' pal::as_comment_str(glue::glue("Copyright (C) {format(Sys.Date(), '%Y')} Santa Clause"),
#' "No presents without code.") |>
#' cat()
#'
#' # wrap lines at 20 chars
#' pal::as_comment_str(glue::glue("Copyright (C) {format(Sys.Date(), '%Y')} Santa Clause"),
#' "No presents without code.",
#' line_width = 20L) |>
#' cat()
#'
#' # disable empty comment lines between paragraphs:
#' pal::as_comment_str(glue::glue("Copyright (C) {format(Sys.Date(), '%Y')} Santa Clause"),
#' "Hohoho.",
#' sep_paragraphs = FALSE) |>
#' cat()
as_comment_str <- function(...,
line_width = 160L,
comment_prefix = "# ",
sep_paragraphs = TRUE) {
checkmate::assert_count(line_width)
checkmate::assert_string(comment_prefix)
checkmate::assert_flag(sep_paragraphs)
as_chr(...) |>
stringr::str_wrap(width = line_width - nchar(comment_prefix)) |>
stringr::str_split(pattern = stringr::fixed("\n")) |>
purrr::map(\(x) paste0(paste0(comment_prefix, x),
collapse = "\n")) %>%
purrr::map2_chr(.y = seq_along(.),
.f = \(x, y) {
if (sep_paragraphs && y < length(.)) paste0(x, "\n", comment_prefix, "\n") else paste0(x, "\n")
}) |>
paste0(collapse = "")
}
#' Enumerate vector's elements as string
#'
#' Takes a vector or list and enumerates its elements in a single string. Convenience function combining [wrap_chr()] and [cli::ansi_collapse()] with slightly
#' differing defaults (`last` defaults to the value of `sep2`).
#'
#' @inheritParams cli::ansi_collapse
#' @inheritParams wrap_chr
#'
#' @return A character scalar.
#' @family string
#' @export
#'
#' @examples
#' # by default, `last` defaults to `sep2`
#' pal::enum_str(1:2)
#' pal::enum_str(1:3)
#'
#' # input is optionally wrapped in a character sequence
#' pal::enum_str(letters[1:3],
#' wrap = "`")
enum_str <- function(x,
sep = ", ",
sep2 = " and ",
last = sep2,
trunc = Inf,
width = Inf,
ellipsis = cli::symbol$ellipsis,
style = c("both-ends", "head"),
wrap = "") {
wrap_chr(x = x,
wrap = wrap) |>
cli::ansi_collapse(sep = sep,
sep2 = sep2,
last = last,
trunc = trunc,
width = width,
ellipsis = ellipsis,
style = style)
}
#' Fuse regular expressions
#'
#' Combines a vector or list of regular expressions to a single one (by logical OR).
#'
#' @param ... Regular expressions. All elements will be converted to type character before fusing. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#' @param .literal Whether or not the input should be interpreted *literally* instead of as regular expressions. If `TRUE`, all elements of `...` are wrapped in
#' the literal escape sequences `\Q` and `\E`.
#'
#' @return A character scalar.
#' @seealso The [rex][rex::rex] package which provides an intuitive framework to build complex regular expressions.
#' @family string
#' @export
#'
#' @examples
#' # perform some (nonsense) Jane Austen text extraction
#' regex <- c("My dear Jane",
#' "make haste, ",
#' "(?i)\\bevil")
#'
#' readr::read_lines(file = "https://www.gutenberg.org/cache/epub/1342/pg1342.txt") |>
#' stringr::str_subset(pattern = pal::fuse_regex(regex))
fuse_regex <- function(...,
.literal = FALSE) {
checkmate::assert_flag(.literal)
result <- as_chr(...)
if (.literal) {
result %<>% paste0("\\Q", ., "\\E")
}
result %<>% paste0(collapse = "|")
dots <- rlang::list2(...)
if (length(dots) > 1L || length(dots[[1L]]) > 1L) {
result %<>% paste0("(", ., ")")
}
result
}
#' Get path modification time
#'
#' Determine a file or directory's modification time. Other than [base::file.mtime()], this function is based on [fs::file_info()] and the datetime of
#' modification is returned in [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) by default.
#'
#' @inheritParams fs::file_info
#' @param tz Timezone to return the result in. A character scalar where `""` means the current time zone.
#'
#' @return `r pkgsnip::return_lbl("datetime")`
#' @family path
#' @export
#'
#' @examples
#' fs::path_package(package = "dplyr",
#' "DESCRIPTION") |>
#' pal::path_mod_time()
path_mod_time <- function(path,
follow = FALSE,
tz = "UTC") {
checkmate::assert_string(tz)
as.POSIXct(fs::file_info(path = path, follow = follow)$modification_time,
tz = tz)
}
#' Flatten path tree
#'
#' Flattens a hierarchical list of path elements into a character vector of full paths.
#'
#' @param path_tree Directory tree. A nested list of named lists and character vectors.
#' @param parent_path Optional parent path of `path_tree`. A character scalar.
#'
#' @return A character vector.
#' @family path
#' @export
#'
#' @examples
#' list("root_dir" = list("subdir1",
#' "subdir2" = list("file1.ext"),
#' "file2.ext")) |>
#' pal::flatten_path_tree() |>
#' pal::cat_lines()
flatten_path_tree <- function(path_tree,
parent_path = NULL) {
if (purrr::pluck_depth(path_tree) > 1L) {
result <-
rlang::names2(path_tree) |>
purrr::map_chr(\(x) fs::path_join(c(parent_path, x))) |>
purrr::map2(.x = path_tree,
.f = flatten_path_tree) |>
purrr::list_c(ptype = character()) %>%
c(parent_path, .)
} else {
result <- fs::path_join(c(parent_path, path_tree))
}
fs::path(result)
}
#' Draw path tree
#'
#' Pretty-prints a character vector of hierarchical paths as a tree. Uses [Unicode box drawing
#' characters](https://en.wikipedia.org/wiki/Box-drawing_character#Box_Drawing) to draw the nesting structure.
#'
#' This function is the equivalent of [fs::dir_tree()] for an artificial/fictional path hierarchy. To print a tree of an actual filesystem path hierarchy, it's
#' recommended to rely on `fs::dir_tree()` which additionally colorizes the output based on real filesystem information. Apart from that, the algorithm to draw
#' the tree is the same.
#'
#' @param paths A character vector of paths as returned by [flatten_path_tree()] or [fs::dir_ls()].
#' @param quiet Whether or not to suppress drawing the directory tree.
#'
#' @return `paths`, invisibly.
#' @family path
#' @export
#'
#' @examples
#' # using an artificial/fictional path hierarchy
#' list("root_dir" = list("subdir1",
#' "subdir2" = list("file1.ext"),
#' "file2.ext")) |>
#' pal::flatten_path_tree() |>
#' pal::draw_path_tree()
#'
#' # using an actual path hierarchy
#' fs::path_package("dplyr") |>
#' fs::dir_ls(recurse = TRUE) |>
#' pal::draw_path_tree()
#'
#' # to get colorized output, use `fs::dir_tree()` instead
#' fs::path_package("dplyr") |> fs::dir_tree()
draw_path_tree <- function(paths,
quiet = FALSE) {
checkmate::assert_character(paths,
any.missing = FALSE,
min.len = 1L)
checkmate::assert_flag(quiet)
has_leading_root <-
paths |>
fs::path_split() |>
purrr::map_chr(purrr::chuck,
1L) |>
magrittr::equals(paths[1L]) |>
all()
leaf_paths <- paths
# remove dir root to get same result structure as `fs::dir_ls()`
if (has_leading_root) leaf_paths %<>% .[-1L]
by_dir <- split(leaf_paths, fs::path_dir(leaf_paths))
box_chars <- list(h = "\u2500",
v = "\u2502",
l = "\u2514",
j = "\u251c")
print_leaf <- function(x,
indent) {
leafs <- by_dir[[x]]
for (i in seq_along(leafs)) {
if (i == length(leafs)) {
cat(indent, paste0(box_chars$l, box_chars$h, box_chars$h, " ",
collapse = ""),
fs::path_file(leafs[[i]]),
"\n",
sep = "")
print_leaf(x = leafs[[i]],
indent = paste0(indent, " "))
} else {
cat(indent,
paste0(box_chars$j, box_chars$h, box_chars$h, " ",
collapse = ""),
fs::path_file(leafs[[i]]),
"\n",
sep = "")
print_leaf(x = leafs[[i]],
indent = paste0(indent, paste0(box_chars$v, " ",
collapse = "")))
}
}
}
if (!quiet) {
dir_root <- ifelse(has_leading_root,
paths[1L],
by_dir |>
names() %>%
magrittr::extract(which.min(nchar(.))))
# print dir root
cat(dir_root, "\n",
sep = "")
# print leafs
print_leaf(fs::path_expand(dir_root), "")
}
invisible(paths)
}
#' Check that all named dots arguments are valid
#'
#' @description
#' `r lifecycle::badge("experimental")`
#'
#' Ensures that [dots][base::dots()] `...` are either empty (if `.empty_ok = TRUE`), or all named elements in dots are a valid subset of `.fn`'s parameter
#' names. In case of an invalid or `.forbidden` argument, an informative message is shown and the defined `.action` is taken.
#'
#' @details
#' `check_dots_named()` is intended to combat the second one of the two major downsides that using `...` usually brings. In chapter 6.6 of the book
#' _Advanced R_ it is [phrased](https://adv-r.hadley.nz/functions.html#fun-dot-dot-dot) as follows:
#'
#' _Using `...` comes with two downsides:_
#'
#' - _When you use it to pass arguments to another function, you have to carefully explain to the user where those arguments go. This makes it hard to
#' understand what you can do with functions like `lapply()` and `plot()`._
#'
#' - **_A misspelled argument will not raise an error. This makes it easy for typos to go unnoticed._**
#'
#' @param ... Dots argument to check.
#' @param .fn Function the `...` will be passed on to.
#' @param .additional Parameter names within `...` that should be treated as valid in addition to `.fn`'s actual parameter names. A character vector.
#' @param .forbidden Parameter names within `...` that should be treated as invalid. This has precedence over `.additional`. A character vector.
#' @param .empty_ok Set to `TRUE` if empty `...` should be allowed, or to `FALSE` otherwise.
#' @param .action Action to take when the check fails. One of `r as_md_vals(c("abort", "warn", "inform")) |> enum_str(sep2 = " or ")`
#'
#' @seealso rlang's check dots functions: [rlang::check_dots_empty()], [rlang::check_dots_used()] and [rlang::check_dots_unnamed()]
#' @family dots
#' @export
#'
#' @examples
#' # We can use `check_dots_named()` to address this second downside:
#' sum_safe <- function(...,
#' na.rm = FALSE) {
#' pal::check_dots_named(...,
#' .fn = sum)
#' sum(...,
#' na.rm = na.rm)
#' }
#'
#' # Note how the misspelled `na_rm` (instead of `na.rm`) silently gets ignored in the original
#' # function,
#' sum(1, 2, NA, na_rm = TRUE)
#'
#' # whereas our safe version properly errors:
#' try(
#' sum_safe(1, 2, NA, na_rm = TRUE)
#' )
#'
#' # We can even build an `sapply()` function that fails "intelligently":
#' sapply_safe <- function(X,
#' FUN,
#' ...,
#' simplify = TRUE,
#' USE.NAMES = TRUE) {
#' pal::check_dots_named(...,
#' .fn = FUN)
#' sapply(X = X,
#' FUN = FUN,
#' ...,
#' simplify = TRUE,
#' USE.NAMES = TRUE)
#' }
#'
#' # While the original `sapply()` silently consumes misspelled named arguments via `...`,
#' sapply(1:5, paste, "hour workdays", sep = "-", colaspe = " ")
#'
#' # `sapply_safe()` will throw an informative error message:
#' try(
#' sapply_safe(1:5, paste, "hour workdays", sep = "-", colaspe = " ")
#' )
#'
#' # But be aware that `check_dots_named()` might be a bit rash,
#' try(
#' sum_safe(a = 1, b = 2)
#' )
#'
#' # while the original function actually has nothing to complain:
#' sum(a = 1, b = 2)
#'
#' # Furthermore, it doesn't play nicely with generics that don't expose all of the argument names
#' # of the method that is eventually invoked (`to` and `by` in the case of `seq()` -> `seq.int()`):
#' try(
#' sapply_safe(X = c(0,50),
#' FUN = seq,
#' to = 100,
#' by = 5)
#' )
#'
#' # To work around this, directly supply the proper method (`seq.int`),
#' sapply_safe(X = c(0,50),
#' FUN = seq.int,
#' to = 100,
#' by = 5)
#'
#' # or just provide `to` and `by` *unnamed*:
#' sapply_safe(X = c(0,50),
#' FUN = seq,
#' 100,
#' 5)
check_dots_named <- function(...,
.fn,
.additional = NULL,
.forbidden = NULL,
.empty_ok = TRUE,
.action = c("abort", "warn", "inform")) {
checkmate::assert_function(.fn)
checkmate::assert_character(.additional,
any.missing = FALSE,
null.ok = TRUE)
checkmate::assert_character(.forbidden,
any.missing = FALSE,
null.ok = TRUE)
checkmate::assert_flag(.empty_ok)
.action <- rlang::arg_match(.action)
if (...length() > 0L) {
# determine original function name the `...` will be passed on to
fn_arg_name <- deparse1(substitute(.fn))
parent_call <- as.list(sys.call(-1L))
parent_arg_names <- methods::formalArgs(args(sys.function(-1L)))
if (fn_arg_name %in% parent_arg_names) {
fn_name <- deparse1(parent_call[which(parent_arg_names == fn_arg_name) + 1L][[1L]])
} else {
fn_name <- fn_arg_name
}
# determine arg names of the function the `...` will be passed on to
dots_arg_names <- methods::formalArgs(args(.fn))
# check named `...` args
purrr::walk(.x = setdiff(...names(),
""),
.f = check_dot_named,
dots_arg_names = dots_arg_names,
allowed_arg_names = setdiff(union(dots_arg_names,
.additional),
.forbidden),
fn_name = fn_name,
action = .action)
} else if (!.empty_ok) {
msg <- "{.arg ...} must be provided (!= NULL)."
switch(EXPR = .action,
abort = cli::cli_abort(msg),
warn = cli::cli_warn(msg),
inform = cli::cli_inform(msg))
}
}
check_dot_named <- function(dot,
dots_arg_names,
allowed_arg_names,
fn_name,
action) {
# The following code is largely borrowed from `rlang::arg_match()`
allowed_arg_names_excl_dots <- setdiff(allowed_arg_names, '...')
i <- match(dot, allowed_arg_names)
if (rlang::is_na(i)) {
is_forbidden <- dot %in% dots_arg_names
is_restricted <- !setequal(dots_arg_names,
allowed_arg_names)
msg <- paste0(ifelse(is_forbidden,
"Forbidden",
"Invalid"),
" named argument provided in {.arg ...}: {.arg {dot}}")
if (length(allowed_arg_names_excl_dots) > 0L) {
msg %<>% c(i = paste0(ifelse(is_restricted,
"Named arguments allowed to pass on to ",
"Valid named arguments for "),
"{.fun {fn_name}} include: {.arg {allowed_arg_names_excl_dots}}"))
} else {
msg %<>% c(i = paste0("Only unnamed arguments are ", ifelse(is_restricted, "allowed", "valid"), " for {.fun {fn_name}}."))
}
i_partial <- pmatch(dot, allowed_arg_names_excl_dots)
candidate <- NULL
if (!rlang::is_na(i_partial)) {
candidate <- allowed_arg_names_excl_dots[[i_partial]]
}
i_close <- utils::adist(dot, allowed_arg_names_excl_dots) / nchar(allowed_arg_names_excl_dots)
if (any(i_close <= 0.5)) {
candidate <- allowed_arg_names_excl_dots[[which.min(i_close)]]
}
if (!is.null(candidate)) {
msg %<>% c(">" = "Did you mean {.arg {candidate}}?")
}
switch(EXPR = action,
abort = cli::cli_abort(msg),
warn = cli::cli_warn(msg),
inform = cli::cli_inform(msg))
}
}
#' List a subset of all installed packages
#'
#' @param pkg A character vector of package names.
#' @param ignore_case Do not distinguish between upper and lower case letters in `pkg`. If `FALSE`, `pkg` is treated case-sensitive.
#' @param as_regex Interpret `pkg` as regular expression(s). If `FALSE`, `pkg` is interpreted literally.
#'
#' @return A [tibble][tibble::tbl_df].
#' @family rpkgs
#' @export
#'
#' @examples
#' pal::ls_pkg(pkg = c("methods", "utils"))
#'
#' pal::ls_pkg(pkg = "^ut",
#' as_regex = TRUE)
ls_pkg <- function(pkg,
ignore_case = TRUE,
as_regex = FALSE) {
checkmate::assert_character(pkg,
any.missing = FALSE,
min.chars = 1L)
checkmate::assert_flag(ignore_case)
checkmate::assert_flag(as_regex)
regex <- pkg
if (!as_regex) {
regex <- paste0("\\Q", regex, "\\E")
}
regex %<>% fuse_regex()
if (ignore_case) {
regex <- paste0("(?i)", regex)
}
if (!as_regex) {
regex <- paste0("^", regex, "$")
}
utils::installed.packages() |>
tibble::as_tibble() |>
dplyr::filter(stringr::str_detect(string = Package,
pattern = regex))
}
#' Depend on another package
#'
#' Wrapper around [usethis::use_package()] with different defaults and optional `DESCRIPTION` file [tidying][usethis::use_tidy_description].
#'
#' @inheritParams usethis::use_package
#' @param tidy Whether or not to run [usethis::use_tidy_description()] after adding `package` to dependencies.
#'
#' @return `NULL`, invisibly.
#' @family rpkgs
#' @export
use_pkg <- function(package,
type = "Imports",
min_version = TRUE,
tidy = TRUE) {
checkmate::assert_flag(tidy)
rlang::check_installed("usethis",
reason = reason_pkg_required())
# TODO: submit PR upstream for this
if (isFALSE(min_version)) min_version <- NULL
usethis::use_package(package = package,
type = type,
min_version = min_version)
if (tidy) {
usethis::use_tidy_description()
}
}
#' Test if packages are installed
#'
#' Returns `TRUE` or `FALSE` for each `pkg`, depending on whether the `pkg` is installed on the current system or not, optionally ensuring a `min_version`.
#'
#' In contrast to [base::require()], this function checks if the packages are installed without attaching their namespaces if so.
#'
#' In contrast to [rlang::is_installed()] or [xfun::pkg_available()], this function doesn't load the packages if they're installed.
#'
#' In contrast to [xfun::pkg_available()], it is fully vectorized, i.e. returns a (named) logical vector of the same length as `pkg`.
#'
#' It is [considerably
#' faster](https://stackoverflow.com/questions/9341635/check-for-installed-packages-before-running-install-packages/38082613#38082613) than the commonly used
#' `pkg %in% rownames(installed.packages())` check.
#'
#' @param pkg Package names. A character vector.
#' @param min_version Minimum required version number of each `pkg`. Must be either `NULL` to ignore version numbers, or a vector of
#' [`package_version`][base::package_version()]s or something coercible to.
#'
#' @return A named logical vector of the same length as `pkg`.
#' @family rpkgs
#' @export
#'
#' @examples
#' pal::is_pkg_installed("tidyverse")
#'
#' # it is vectorized...
#' pal::is_pkg_installed(pkg = c("dplyr", "tibble", "magrittr"),
#' min_version = c("1.0", "2.0", "99.9.9000"))
#'
#' # ...and scalar arguments will be recycled
#' pal::is_pkg_installed(pkg = "dplyr",
#' min_version = c("0.5", "1.0", "99.9.9000"))
#'
#' pal::is_pkg_installed(pkg = c("dplyr", "tibble", "magrittr"),
#' min_version = "1.0")
is_pkg_installed <- function(pkg,
min_version = NULL) {
checkmate::assert_character(pkg,
any.missing = FALSE)
min_version %<>% as.package_version()
if (length(min_version) == 0L) {
result <- purrr::map_lgl(magrittr::set_names(pkg, pkg),
~ nzchar(system.file(package = .x)))
} else {
result <-
is_pkg_installed(pkg = pkg) %>%
list(names(.), min_version) |>
purrr::pmap_lgl(\(...) {
if (..1) utils::packageVersion(pkg = ..2) >= ..3 else ..1
})
}
result
}
#' Test if a package is available on CRAN
#'
#' @inheritParams is_http_success
#' @param pkg Package name (case-sensitive). A character scalar.
#' @param min_version Minimum required version number of `pkg`. Must be either `NULL` to ignore version numbers, or a single
#' [`package_version`][base::package_version()] or something coercible to.
#'
#' @return A logical scalar.
#' @family rpkgs
#' @export
#'
#' @examples
#' pal::is_pkg_cran("foobar")
#' pal::is_pkg_cran("dplyr")
#' pal::is_pkg_cran("dplyr", min_version = "9999.9")
is_pkg_cran <- function(pkg,
min_version = NULL,
max_tries = 1L) {
checkmate::assert_string(pkg)
min_version %<>% as.package_version()
rlang::check_installed(pkg = c("httr2", "rvest"),
reason = reason_pkg_required())
url <- glue::glue("https://cran.r-project.org/package={pkg}")
is_cran <- is_http_success(url = url,
max_tries = max_tries)
if (is_cran && length(min_version)) {
is_cran <-
httr2::request(base_url = url) |>
httr2::req_method(method = "GET") |>
httr2::req_retry(max_tries = max_tries) |>
httr2::req_perform() |>
httr2::resp_body_html() |>
rvest::html_element(css = "body") |>
rvest::html_element(css = "table") |>
rvest::html_table() |>
dplyr::filter(stringr::str_detect(X1, stringr::fixed("Version"))) |>
dplyr::filter(as.package_version(X2) >= !!min_version) |>
nrow() |>
magrittr::is_greater_than(0L)
}
is_cran
}
#' Test if a directory is an \R package
#'
#' Convenience wrapper around the [`rprojroot::is_r_package`][rprojroot::is_r_package] root criterion. Note that it will by default only return `TRUE` for the
#' root of a package directory, not its subdirectories.
#'
#' @param path Path of the directory to check. A character scalar. Defaults to the current working directory.
#' @param check_parent_dirs Whether or not to also check `path`'s parent directories (up until the filesystem root) for an \R package setup. Setting
#' `check_parent_dirs = TRUE` guarantees that `TRUE` is returned for all subdirectories of an R package.
#'
#' @return `TRUE` if `path` is the root (if `check_parent_dirs = FALSE`) or a (sub)directory directory (if `check_parent_dirs = TRUE`) of an \R package,
#' `FALSE` otherwise.
#' @family rpkgs
#' @export
#'
#' @examples
#' pal::is_pkg_dir()
#' pal::is_pkg_dir(fs::path_package("utils"))
is_pkg_dir <- function(path = ".",
check_parent_dirs = FALSE) {
has_root(criterion = rprojroot::is_r_package,
path = path,
check_parent_dirs = check_parent_dirs)
}
#' Test if pkgdown is set up for an R package directory
#'
#' Convenience wrapper around the [`rprojroot::is_pkgdown_project`][rprojroot::is_pkgdown_project] root criterion. Note that it will by default only return
#' `TRUE` for the root of a package directory and the `pkgdown` subdirectory, not other subdirectories.
#'
#' @param path Path of the R package directory to check. A character scalar. Defaults to the current working directory.
#' @param check_parent_dirs Whether or not to also check `path`'s parent directories (up until the filesystem root) for a pkgdown setup. Setting
#' `check_parent_dirs = TRUE` guarantees that `TRUE` is returned for all subdirectories of an R package that has pkgdown set up.
#'
#' @return `TRUE` if pkgdown is set up for `path`, `FALSE` otherwise.
#' @family rpkgs
#' @export
#'
#' @examples
#' pal::is_pkgdown_dir()
#' pal::is_pkgdown_dir(fs::path_package("pal"))
is_pkgdown_dir <- function(path = ".",
check_parent_dirs = FALSE) {
has_root(criterion = rprojroot::is_pkgdown_project,
path = path,
check_parent_dirs = check_parent_dirs)
}
#' Test if object exists in namespace
#'
#' Determines whether or not an object exists in a specific [namespace](https://adv-r.hadley.nz/environments.html#namespaces).
#'
#' This complements [utils::getFromNamespace()] and uses almost the same code internally.
#'
#' @inheritParams utils::getFromNamespace
#'
#' @return A logical scalar.
#' @export
#'
#' @examples
#' if (pal::is_pkg_installed("pkgpurl")) {
#' pal::exists_in_namespace("pkg_config", "pkgpurl")
#' }
#'
#' pal::exists_in_namespace("pkg_config", "pal")
exists_in_namespace <- function(x,
ns,
pos = -1L,
envir = as.environment(pos)) {
if (missing(ns)) {
nm <- attr(x = envir,
which = "name",
exact = TRUE)
if (is.null(nm) || !startsWith(nm, "package:")) {
cli::cli_abort("Specified {.arg envir} is not a package environment.")
}
ns <- asNamespace(substring(nm, 9L))
} else {
ns <- asNamespace(ns)
}
exists(x = x,
envir = ns,
inherits = FALSE)
}
#' Assemble reason why package is required
#'
#' Assembles a prose text containing the reason why a certain \R package is required by the `fn` of another `pkg`. Intended to be used as argument `reason` of
#' [rlang::check_installed()].
#'
#' @param fn Name of the function that requires the \R package. A character scalar.
#' @param pkg Name of the package `fn` is part of. A character scalar.
#'
#' @return A character scalar.
#' @export
#'
#' @examples
#' \dontrun{
#' rlang::check_installed(pkg = "foo",
#' reason = pal::reason_pkg_required(pkg = "bar",
#' fn = "serve"))
#' }
reason_pkg_required <- function(fn = rlang::call_name(rlang::caller_call()),
pkg = utils::packageName(parent.frame())) {
checkmate::assert_string(fn)
checkmate::assert_string(pkg)
cli::format_inline("by {.fun {pkg}::{fn}}, but is not installed.")
}
#' Get package configuration value
#'
#' @description
#' Retrieves a package configuration value in a canonical way. The following configuration sources are consulted in descending order and the first hit is
#' returned:
#'
#' 1. The \R [option][options] `<pkg>.<key>`.
#' 2. The [environment variable](https://en.wikipedia.org/wiki/Environment_variable) `R_<PKG>_<KEY>`.
#' 3. The ad-hoc default value specified via this function's `default` argument (`NULL` means unspecified).
#' 4. The configuration's global default value as specified in the package's configuration metadata (column `default_value` or `default_value_dynamic` of
#' `<pkg>::pkg_config`; `NULL` means unspecified).
#'
#' Depending on `require`, an error is thrown if none of the above sources contain a value.
#'
#' @details
#' This function is intended to be used by package authors who want to expose their package configuration options in a canonical way (as outlined above). For
#' `pkg_config_val()` to properly work, the configuration metadata must be available in the package's namespace as object `pkg_config`, which must be a
#' [dataframe][data.frame] or [tibble][tibble::tbl_df] with at minimum the columns `key` (of type character holding the configuration key names) and
#' `default_value` (of type list holding static default configuration values) or `default_value_dynamic` (of type character holding R code expressions that
#' evaluate to default configuration values dynamically at access time).
#'
#' @param key Configuration key name. A character scalar.
#' @param pkg Package name. A character scalar. Defaults to the name of the calling package.
#' @param default Default value to fall back to if neither the \R option `<pkg>.<key>` nor the environment variable `R_<PKG>_<KEY>` is set. If `NULL`, the
#' default value for `key` in `<pkg>::pkg_config` will be used (if defined).
#' @param require Whether or not to require that the configuration value is set. If `TRUE` and no configuration value is set, an error is thrown with
#' instructions on how to provide a value. If `NULL`, the `require` value for `key` in `<pkg>::pkg_config` will be used (defaults to `TRUE`).
#' @param env Environment to evaluate `default_value_dynamic` in, if necessary.
#'
#' @return `r pkgsnip::return_lbl("r_obj")`
#' @seealso [xfun::env_option()] for a compatible (albeit less powerful) approach to R option and environment variable coherence.
#' @family pkg_config
#' @export
#'
#' @examples
#' try(
#' pal::pkg_config_val(key = "gen_pkgdown_ref",
#' pkg = "pkgpurl")
#' )
pkg_config_val <- function(key,
pkg = utils::packageName(env = parent.frame()),
default = NULL,
require = NULL,
env = parent.frame()) {
checkmate::assert_flag(require,
null.ok = TRUE)
pkg_config <- get_pkg_config(pkg)
key <- rlang::arg_match0(key,
values = pkg_config$key)
# 1st priority: R option
result <- getOption(pkg_config_opt_name(pkg = pkg,
key = key))
# 2nd priority: environment variable
if (is.null(result)) {
result <- Sys.getenv(pkg_config_env_var_name(pkg = pkg,
key = key),
unset = NA,
names = FALSE)
}
# 3rd priority: default value
if (is.na(result)) {
result <- default %||% get_pkg_config_val_default(key = key,
pkg_config = pkg_config,
env = env)
}
# abort if value is required but none was provided
if (is.null(result)) {
if (is.null(require)) {
require <-
pkg_config |>
dplyr::filter(key == !!key) |>
dplyr::pull("require")
}
if (require) {
cli::cli_abort(paste0("Please set the {pkg} package configuration option {.field {key}} by either setting the R option ",
"{.field {pkg_config_opt_name(pkg = pkg, key = key)}} or the environment variable ",
"{.envvar {pkg_config_env_var_name(pkg = pkg, key = key)}}."))
}
}
result
}
#' Get default package configuration value
#'
#' Retrieves a package configuration's default value from the package's configuration metadata (column `default_value` or `default_value_dynamic` of
#' `<pkg>::pkg_config`). If no default value is specified (`NULL`), nothing is returned (`NULL`).
#'
#' @inheritParams pkg_config_val
#'
#' @return `pkgsnip::return_lbl("r_obj")`
#' @family pkg_config
#' @export
#'
#' @examples
#' try(
#' pal::pkg_config_val_default(key = "gen_pkgdown_ref",
#' pkg = "pkgpurl")
#' )
pkg_config_val_default <- function(key,
pkg = utils::packageName(env = parent.frame()),
env = parent.frame()) {
get_pkg_config_val_default(key = key,
pkg_config = get_pkg_config(pkg),
env = env)
}
#' Test if package configuration value is set
#'
#' Tests whether or not a certain package configuration value is set. See [pkg_config_val()] for the underlying concept.
#'
#' Note that `has_pkg_config_val()` throws an error if the package configuration *key* doesn't exist.
#'
#' @inheritParams pkg_config_val
#'
#' @return A logical scalar.
#' @family pkg_config
#' @export
#'
#' @examples
#' try(
#' pal::has_pkg_config_val(key = "gen_pkgdown_ref",
#' pkg = "pkgpurl")
#' )
has_pkg_config_val <- function(key,
pkg = utils::packageName(env = parent.frame()),
env = parent.frame()) {
!is.null(pkg_config_val(key = key,
pkg = pkg,
default = NULL,
require = FALSE,
env = env))
}
#' Augment package configuration metadata
#'
#' Augments a package's configuration metadata (`<pkg>::pkg_config`) with the columns `r_opts` and `env_var` holding the respective \R option and environment
#' variable names.
#'
#' @inheritParams pkg_config_val
#'
#' @return A [tibble][tibble::tbl_df] with at minimum the columns `key`, `default_value`, `r_opt` and `env_var`.
#' @family pkg_config
#' @export
#'
#' @examples
#' try(
#' pal::augment_pkg_config(pkg = "pkgpurl")
#' )
augment_pkg_config <- function(pkg = utils::packageName(env = parent.frame())) {
get_pkg_config(pkg) |>
dplyr::mutate(r_opt = pkg_config_opt_name(pkg = pkg,
key = key),
env_var = purrr::map_chr(key, \(k) pkg_config_env_var_name(pkg = pkg,
key = k)))
}
#' Print package configuration metadata
#'
#' Prints a package's configuration metadata (`<pkg>::pkg_config`) as a prettily formatted Markdown table.
#'
#' @inheritParams pkg_config_val
#' @param roxy_to_md Whether or not to convert roxygen2 documentation [links in pseudo-Markdown
#' style](https://roxygen2.r-lib.org/articles/rd-formatting.html#function-links) to actual Markdown ones using [roxy_to_md_links()].
#'
#' @inherit pipe_table return
#' @family pkg_config
#' @export
#'
#' @examples
#' try(
#' pal::print_pkg_config(pkg = "pkgpurl")
#' )
print_pkg_config <- function(pkg = utils::packageName(env = parent.frame()),
roxy_to_md = FALSE) {
checkmate::assert_flag(roxy_to_md)
augment_pkg_config(pkg) |>
dplyr::mutate(dplyr::across(c(r_opt, env_var),
\(x) wrap_chr(x, wrap = "`")),
dplyr::across(any_of("description"),
\(x) if (roxy_to_md) purrr::map_chr(x, roxy_to_md_links) else x),
default_value = purrr::map2(default_value,
default_value_dynamic,
\(x, y) {
if (!is.na(y)) {
return(wrap_chr(y,
wrap = "`"))
} else if (is.null(x)) {
return("")
} else {
return(x |>
constructive::construct(unicode_representation = "unicode",
check = TRUE,
one_liner = TRUE) |>
purrr::chuck("code") |>
wrap_chr(wrap = "`"))
}
})) |>
dplyr::select(any_of("description"),
r_opt,
env_var,
default_value) |>
dplyr::rename_with(.cols = any_of("description"),
.fn = stringr::str_to_title) |>
dplyr::rename(`R option` = r_opt,
`Environment variable` = env_var,
`Default value` = default_value) |>
pipe_table()
}
#' Get all `DESCRIPTION` file fields as cleaned up list
#'
#' @description
#' Returns all fields from a `DESCRIPTION` file as a named list with values cleaned up:
#' - Whitespaces at the start and end of field values as well as repeated whitespaces within them are removed.
#' - Multi-value fields are returned as vectors.
#' - The fields `Depends`, `Imports` and `Suggests` are returned as a single data frame named `dependencies`.
#'
#' @inheritParams desc_value
#'
#' @return A list.
#' @family desc
#' @export
#'
#' @examples
#' fs::path_package(package = "dplyr") |> pal::desc_list()
desc_list <- function(file = ".") {
rlang::check_installed("desc",
reason = reason_pkg_required())
fields <- desc::desc_fields(file = file)
result <-
fields |>
setdiff(c("Authors@R",
"Depends",
"Imports",
"Suggests",
"URL")) |>
rlang::set_names() |>
purrr::map(desc_value,
file = file)
if ("Authors@R" %in% fields) result[["Authors@R"]] <- desc::desc_get_authors(file = file)
if (any(c("Depends", "Imports", "Suggests") %in% fields)) result[["dependencies"]] <- desc::desc_get_deps(file = file)
if ("URL" %in% fields) result[["URL"]] <- desc::desc_get_urls(file = file)
result
}
#' Get value from `DESCRIPTION` file field, cleaned up and with dynamic fallback
#'
#' Returns the value from a `DESCRIPTION` file field (aka _key_). Whitespaces at the start and end of the value as well as repeated whitespaces within
#' it are removed.
#'
#' This function is a slightly modified version of [desc::desc_get_field()] that allows the `default` parameter to be dependent on the `key` parameter.
#'
#' By default, the following string is returned if `key = "NoRealKey"` is not found:
#'
#' ```
#' "<No `NoRealKey` field set in DESCRIPTION!>"
#' ```
#'
#' If you rather want to take an action like throwing an error, it's recommended to call [desc::desc_get_field()] directly.
#'
#' @inheritParams desc::desc_get_field
#' @param default Default value to return if `key` is not found.
#'
#' @return A character scalar.
#' @family desc
#' @export
#'
#' @examples
#' pal::desc_value(key = "Description",
#' file = fs::path_package("pal"))
desc_value <- function(key,
file = ".",
default = glue::glue("<No \x60{key}\x60 field set in DESCRIPTION!>")) {
rlang::check_installed("desc",
reason = reason_pkg_required())
desc::desc_get_field(key = key,
default = default,
file = file)
}
#' Get dependency version from `DESCRIPTION` file
#'
#' Returns the version of the specified `pkg` dependency from a `DESCRIPTION` file.
#'
#' @inheritParams pkg_config_val
#' @inheritParams desc::desc_get_deps
#' @param types Dependency types to be considered. If `pkg` is listed in multiple dependency types, the maximally required version is returned.
#'
#' @return `r pkgsnip::return_lbl("num_vrsn")`
#' @family desc
#' @export
#'
#' @examples
#' fs::path_package(package = "dplyr") |> pal::desc_dep_vrsn(pkg = "tibble")
desc_dep_vrsn <- function(pkg,
file = ".",
types = c("Imports", "Depends", "Suggests", "Enhances", "LinkingTo")) {
checkmate::assert_string(pkg)
types <- rlang::arg_match(arg = types,
multiple = TRUE)
result <-
desc::desc_get_deps(file = file) |>
dplyr::filter(package == !!pkg & type %in% !!types)
if (nrow(result) == 0L) {
cli::cli_abort("Package {.val {pkg}} is not listed as a dependency of type {.or {.val {types}}} in {.file {file}}.")
}
result$version |>
stringr::str_extract("\\d+[\\d.]+") |>
purrr::map(\(x) {
if (is.na(x)) {
as.numeric_version(NULL)[NA]
} else {
as.numeric_version(x)
}
}) |>
purrr::list_c(ptype = numeric_version(NULL)) |>
max(na.rm = TRUE) |>
when(length(.) == 0L ~ as.numeric_version(NULL)[NA],
~ .)
}
#' Get the Git repository URL from `DESCRIPTION` file
#'
#' Returns the first Git repository URL found in the `URL` (preferred) or `BugReports` fields of a `DESCRIPTION` file.
#'
#' Currently, this function recognizes [GitLab](https://about.gitlab.com/), [GitHub](https://github.com/), [Gitea](https://gitea.com/),
#' [Codeberg](https://codeberg.org/), [Pagure](https://pagure.io/), [Bitbucket](https://bitbucket.org/) and [SourceHut](https://sr.ht/) repository URLs.
#'
#' @inheritParams desc::desc_get_field
#'
#' @return A character scalar.
#' @family desc
#' @export
desc_url_git <- function(file = ".") {
rlang::check_installed("desc",
reason = reason_pkg_required())
desc::desc_get_field(key = "BugReports",
default = character(),
file = file) |>
stringr::str_replace(pattern = "/issues/?$",
replacement = "/") %>%
c(desc::desc_get_urls(), .) |>
stringr::str_subset(pattern = "^https?://(git(hub|lab|ea)\\..+|(codeberg|bitbucket)\\.org|(git\\.)?src\\.ht|pagure\\.io)/") |>
dplyr::first()
}
#' Get function's default parameter values
#'
#' Extracts a function parameter's default value(s) from its language definition and returns the result as a character vector.
#'
#' This function can be very convenient to avoid duplication in roxygen2 documentation by leveraging [inline \R code
#' evaluation](https://roxygen2.r-lib.org/articles/rd-formatting.html#inline-code) as follows:
#'
#' ```r
#' #' @param some_param Some parameter. One of
#' #' `r pal::fn_param_defaults(param = "some_param", fn = "some_fn") |> pal::wrap_chr("\x60") |> cli::ansi_collapse()`.
#' #'
#' some_fn <- function(some_param = c("a", "b", "c")) {
#' some_param <- rlang::arg_match(some_param)
#' ...
#' }
#' ```
#'
#' Or to list the possible parameter values formatted as an unnumbered list instead, replace `cli::ansi_collapse()` with [pal::as_md_list()] in the example
#' above.
#'
#' # Caveats
#'
#' [base::deparse1()] is used internally to get a character representation of non-character default values. Therefore all of `deparse()`'s fuzziness also
#' applies to this function.
#'
#' @param param Parameter name. A character scalar.
#' @param fn A [function][base::function] or a function name (searched for in `env`). See [base::formals()] for details.
#' @param env [Environment][base::environment] `fn` is defined in. See [base::formals()] for details.
#'
#' @return A character vector.
#' @family roxy
#' @export
#'
#' @examples
#' pal::fn_param_defaults(param = ".name_repair",
#' fn = tibble::as_tibble)
#'
#' # as Markdown-formatted enumeration in prose
#' pal::fn_param_defaults(param = ".name_repair",
#' fn = tibble::as_tibble) |>
#' pal::wrap_chr("`") |>
#' cli::ansi_collapse() |>
#' cat()
fn_param_defaults <- function(param,
fn = sys.function(sys.parent()),
env = parent.frame()) {
checkmate::assert_string(param)
# turn `fn` into type function if necessary (the same as `formals(fun)` does internally)
if (is.character(fn)) {
fn %<>% get(mode = "function",
envir = env)
}
default_vals <- formals(fun = args(name = fn),
envir = env)
if (param %in% names(default_vals)) {
default_vals <- default_vals[[param]]
} else {
fn_name <- deparse1(expr = substitute(fn),
backtick = FALSE)
cli::cli_abort("The function {.fn {fn_name}} does not have a parameter named {.arg {param}}.")
}
if (missing(default_vals)) {
fn_name <- deparse1(expr = substitute(fn),
backtick = FALSE)
cli::cli_abort("{.fn {fn_name}}'s parameter {.arg {param}} does not have a default value.")
}
# evaluate default param if it results in a character vector
if (is.language(default_vals)) {
evaluated_default_vals <- tryCatch(expr = eval(expr = default_vals,
envir = env),
error = \(x) NULL)
if (is.character(evaluated_default_vals)) default_vals <- evaluated_default_vals
}
if (is.character(default_vals)) {
default_vals %<>% wrap_chr()
} else {
default_vals %<>% deparse1(backtick = FALSE,
control = c("keepNA",
"keepInteger",
"niceNames",
"showAttributes",
"warnIncomplete"))
}
default_vals
}
#' Enumerate function's default parameter values
#'
#' Convenience function combining [fn_param_defaults()], [wrap_chr()] and [cli::ansi_collapse()].
#'
#' This function can be very convenient to avoid duplication in roxygen2 documentation by leveraging [inline \R code
#' evaluation](https://roxygen2.r-lib.org/articles/rd-formatting.html#inline-code) as follows:
#'
#' ```r
#' #' @param some_param Some parameter. One of
#' #' `r pal::enum_fn_param_defaults(param = "some_param", fn = "some_fn")`.
#' #'
#' some_fn <- function(some_param = c("a", "b", "c")) {
#' some_param <- rlang::arg_match(some_param)
#' ...
#' }
#' ```
#'
#' @inheritParams fn_param_defaults
#' @param wrap Character sequence the default parameter values are to be wrapped in. A character vector or something coercible to.
#' @param sep2,last Passed on to [cli::ansi_collapse()].
#' @param ... Further arguments passed on to [cli::ansi_collapse()].
#'
#' @return A character scalar.
#' @family roxy
#' @export
#'
#' @examples
#' pal::enum_fn_param_defaults(param = ".name_repair",
#' fn = tibble::as_tibble) |>
#' cat()
enum_fn_param_defaults <- function(param,
fn = sys.function(sys.parent()),
env = parent.frame(),
wrap = "`",
sep2 = " or ",
last = sep2,
...) {
check_dots_named(...,
.fn = cli::ansi_collapse,
.forbidden = c("sep2", "last"))
fn_param_defaults(param = param,
fn = fn,
env = env) |>
wrap_chr(wrap = wrap) |>
cli::ansi_collapse(sep2 = sep2,
last = last,
...)
}
#' Convert roxygen2 documentation links to Markdown
#'
#' Converts roxygen2 documentation [links in pseudo-Markdown style](https://roxygen2.r-lib.org/articles/rd-formatting.html#function-links) to actual Markdown
#' ones using [downlit::autolink_url()].
#'
#' @param x Markdown text with roxygen2 documentation links. A character scalar.
#'
#' @return A character scalar.
#' @family roxy
#' @export
#'
#' @examples
#' pal::roxy_to_md_links("[base::c()] is so short I almost forget it's there.")
#' pal::roxy_to_md_links("[`base::c()`], probably the most used base R function ever.")
#' pal::roxy_to_md_links("Some functions [are magic][downlit::autolink_url]!")
#' pal::roxy_to_md_links("downlit's [`autolink_url()`][downlit::autolink_url()] seems magic!")
roxy_to_md_links <- function(x) {
checkmate::assert_string(x)
rlang::check_installed(pkg = "downlit",
reason = reason_pkg_required())
# determine the roxy-specific doc links by first parsing input as CommonMark which escapes all brackets from roxy doc links (but not from valid MD links)
links_roxy <-
md_to_xml(x) |>
xml2::xml_contents() |>
xml_to_md() |>
stringr::str_extract_all(pattern = "(\\\\\\[`[^`]+`\\\\\\](\\\\\\[[^\\]]+?\\\\\\])?|\\\\\\[[^\\]]+?\\\\\\](\\\\\\[[^\\]]+?\\\\\\])?)") |>
purrr::list_c(ptype = character()) |>
stringr::str_remove_all(pattern = stringr::fixed("\\"))
is_short <- stringr::str_detect(string = links_roxy,
pattern = "^\\[[^\\]]+\\]$")
targets_roxy <- links_roxy
# remove possible enclosing backticks from roxy shortlinks to make them parseable by downlit
targets_roxy[is_short] %<>% stringr::str_remove_all(pattern = stringr::fixed("`"))
targets_roxy %<>%
stringr::str_extract(pattern = "\\[([^\\]]+)\\]$",
group = 1L) %>%
purrr::map(downlit::autolink_url) %>%
purrr::list_c(ptype = character())
process <- !is.na(targets_roxy)
# short-circuit if nothing to do
if (!any(process)) {
return(x)
}
links_md <- links_roxy
links_md[process & !is_short] %<>% stringr::str_replace(pattern = "\\[[^\\]]+\\]$",
replacement = paste0("(", targets_roxy[process & !is_short], ")"))
links_md[process & is_short] %<>%
stringr::str_replace(pattern = "(?<=^\\[)([^`\\]]+)",
replacement = "`\\1`") %>%
paste0("(", targets_roxy[process & is_short], ")")
links_md[process] |>
magrittr::set_names(value = paste0("\\Q", links_roxy[process], "\\E")) |>
stringr::str_replace_all(string = x)
}
#' Get roxygen2 blocks
#'
#' @description
#'
#' `r lifecycle::badge("experimental")`
#'
#' Parses the roxygen2 package documentation of a specific R package or from a single `.R` source code file.
#'
#' @param pkg,text Either a package name (`pkg`) or a character vector of \R source code lines (`text`) to extract the object's roxygen2 tag value from.
#' @param ... Further arguments passed on to [utils::download.packages()], excluding
#' `r cli::ansi_collapse(as_md_vals(forbidden_dots$roxy_tag_value), last = " and ")`. Only relevant if `pkg` is provided.
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return A list of [`roxy_block`][roxygen2::roxy_block] objects.
#' @family roxy
#' @export
#'
#' @examples
#' # Either provide an R source file as a character vector `text`...
#' text <- readr::read_lines(paste0("https://raw.githubusercontent.com/r-lib/rlang/",
#' "db52a58d505b65f58ba922d4752b5b0061f2a98c/R/fn.R"))
#'
#' pal::roxy_blocks(text = text) |> head(n = 3L)
#'
#' # ...or provide a package name as `pkg`
#' try(
#' pal::roxy_blocks(pkg = "tinkr",
#' repos = "https://cloud.r-project.org") |>
#' head(n = 3L)
#' )
roxy_blocks <- function(pkg = NULL,
...,
text = NULL,
quiet = TRUE) {
rlang::check_installed("roxygen2",
reason = reason_pkg_required())
checkmate::assert_flag(quiet)
checkmate::assert_string(pkg, null.ok = TRUE)
checkmate::assert_character(text, null.ok = TRUE)
is_pkg_null <- is.null(pkg)
is_text_null <- is.null(text)
if (is_pkg_null && is_text_null) cli::cli_abort("One of {.arg pkg} and {.arg text} mustn't be {.val NULL}.")
if (!is_pkg_null && !is_text_null) cli::cli_abort("Only one of {.arg pkg} and {.arg text} can be provided.")
if (is_pkg_null) {
rlang::check_dots_empty0(...)
blocks <- tryCatch(expr = roxygen2::parse_text(text = text),
error = \(x) list(FALSE, x))
if (length(blocks) > 0L && isFALSE(blocks[[1L]])) {
error_msg <- as.character(blocks[[2L]])
is_missing_obj <- stringr::str_detect(string = error_msg,
pattern = " (not found|could not find .+)\\n")
cli::cli_abort(ifelse(is_missing_obj,
error_msg |>
stringr::str_remove(pattern = '^.*: ') |>
stringr::str_replace(pattern = "could not find function \"(.+?)\"",
replacement = "Function {.fun \\1} not found") |>
stringr::str_replace(pattern = "object '(.+?)'",
replacement = "Object {.var \\1}") |>
stringr::str_replace(pattern = "not found\\n?",
replacement = paste0("was not found when parsing {.arg text}. You might need to attach the package you're ",
"trying to extract the roxygen2 tag value from. Or try ",
"{.fn pkg_roxy_tag_value}.")),
"Error parsing {.arg text}: {error_msg}"))
}
} else {
rlang::check_installed("rappdirs",
reason = reason_pkg_required())
check_dots_named(...,
.fn = utils::download.packages,
.forbidden = forbidden_dots$roxy_tag_value)
# alert if installed version doesn't match downloaded one:
pkgs_available <-
utils::available.packages(type = "source",
filters = c("R_version",
"OS_type",
"subarch",
"CRAN")) |>
tibble::as_tibble() |>
dplyr::filter(Package == pkg)
pkg_version_max_installed <-
ls_pkg(pkg = pkg,
as_regex = FALSE) |>
dplyr::pull("Version") |>
max()
if (pkg_version_max_installed %in% pkgs_available$Version) {
pkgs_available %<>% dplyr::filter(Version == pkg_version_max_installed)
} else {
cli::cli_alert_warning(paste0("No sources available for download of locally installed version {.val pkg_version_max_installed} of package {.pkg {pkg}}. ",
"Downloading sources of latest available version {.val {max(pkgs_available[, 'Version'])}} instead. If the following ",
"roxygen tag parsing fails or produces unexpected results, consider updating package {.pkg {pkg}}."))
}
pkgs_available %<>%
as.matrix() %>%
magrittr::set_rownames(value = .[, 1L])
tmp_dir <-
rappdirs::user_cache_dir() |>
fs::path(glue::glue("pal-roxy_tag_value-{pkg}")) |>
fs::dir_create() |>
fs::path_real()
tmp_archive <- utils::download.packages(pkgs = pkg,
destdir = tmp_dir,
available = pkgs_available,
... = ...,
type = "source",
quiet = quiet)
utils::untar(tarfile = tmp_archive[1L, 2L],
exdir = tmp_dir)
tmp_pkg <- fs::path(tmp_dir, pkg)
blocks <- if (quiet) suppressMessages(roxygen2::parse_package(path = tmp_pkg)) else roxygen2::parse_package(path = tmp_pkg)
unlink(x = tmp_dir,
recursive = TRUE)
}
blocks
}
#' Get roxygen2 block object
#'
#' Extracts a single object from a list of [`roxy_block`][roxygen2::roxy_block] objects.
#'
#' @param blocks A list of [`roxy_block`][roxygen2::roxy_block] objects as returned by [roxy_blocks()].
#' @param obj_name Object name to extract, usually a function name. A character scalar.
#'
#' @return A [`roxy_block`][roxygen2::roxy_block] object.
#' @family roxy
#' @export
roxy_obj <- function(blocks,
obj_name) {
checkmate::assert_list(blocks,
types = "roxy_block",
all.missing = FALSE)
obj_names <-
blocks |>
purrr::map_depth(.depth = 1L,
.f = purrr::pluck,
"object", "topic") |>
purrr::compact() |>
purrr::list_c(ptype = character())
obj_name <- rlang::arg_match0(arg = obj_name,
values = obj_names)
blocks[[which(obj_names == obj_name)]]
}
#' Get an object's roxygen2 tag value(s)
#'
#' Extracts the value(s) belonging to the `tag_name`s documenting `obj_name` from a list of [`roxy_block`][roxygen2::roxy_block] objects.
#'
#' @inheritParams roxy_obj
#' @param tag_names Name(s) of the [roxygen2 tag(s)](https://roxygen2.r-lib.org/articles/rd.html) (without the `@` prefix) to extract the value(s) from. A
#' character vector.
#' @param param_name Parameter name to extract the value from. Only relevant if `"param" %in% tag_names`. A character scalar.
#'
#' @return A character vector of the same length as `tag_names`.
#' @family roxy
#' @export
#'
#' @examples
#' if (interactive()) {
#' try(
#' pal::roxy_blocks(pkg = "viridisLite") |>
#' pal::roxy_tag_value(obj_name = "viridis",
#' tag_names = "param",
#' param_name = "alpha") |>
#' cat()
#' )
#' }
roxy_tag_value <- function(blocks,
obj_name,
tag_names,
param_name) {
block <- roxy_obj(blocks = blocks,
obj_name = obj_name)
tags <- block$tags
checkmate::assert_subset(tag_names,
choices = purrr::map_chr(tags,
purrr::pluck,
"tag"),
empty.ok = FALSE)
tag_names %<>% unique()
if ("param" %in% tag_names) {
ix_param <-
tags |>
purrr::map_lgl(\(x) x$tag == "param") |>
which()
param_name <- rlang::arg_match0(arg = param_name,
values = purrr::map_chr(tags[ix_param],
purrr::pluck,
"val", "name"))
i_to_keep <-
tags[ix_param] |>
purrr::map_lgl(\(x) x$val$name == param_name) |>
which()
block$tags %<>% magrittr::extract(-ix_param[-i_to_keep])
}
purrr::map_chr(tag_names,
\(x) roxygen2::block_get_tag_value(block = block,
tag = x) |>
when(is.list(.) ~ .$description,
~ .))
}
#' Convert to verbatim Markdown
#'
#' Converts the provided \R expressions to their character representation using [base::deparse1()] and formats them as [verbatim
#' Markdown](https://pandoc.org/MANUAL.html#verbatim).
#'
#' @param ... \R expression(s) to convert to verbatim Markdown. Must be unnamed. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#' @param .eval Whether or not to evaluate the expression(s) in `...`.
#' @param .collapse String to separate the results of a single expression in `...`.
#' @param .backtick Whether or not to enclose symbolic names in backticks if they do not follow the standard syntax.
#' @param .control Deparsing options. A character vector or `NULL`. See [base::.deparseOpts] for all possible options.
#' @param .width.cutoff Cutoff (in bytes) at which line-breaking is tried. An integer scalar between `20` and `500`.
#' @param .nlines Maximum number of lines to produce. A negative value indicates no limit. An integer scalar.
#'
#' @return A character vector of the same length as `...`.
#' @family md
#' @export
#'
#' @examples
#' pal::md_verb(1:3, "It", is.logical, `||`, FALSE, quote(`?!`)) |>
#' pal::cat_lines()
#'
#' # you can splice vector or list expressions if you like
#' pal::md_verb(!!!1:3, "It", is.logical, `||`, FALSE, quote(`?!`)) |>
#' pal::cat_lines()
#'
#' # to evaluate, or not to evaluate, that is the question
#' pal::md_verb(!!!1:3, "It", is.logical, `||`, FALSE, quote(`?!`),
#' .eval = FALSE) |>
#' pal::cat_lines()
#'
#' # unevaluated expressions do not need to exist
#' pal::md_verb(Not, actual(), `R-expressions`,
#' .eval = FALSE) |>
#' pal::cat_lines()
#'
#' # you can opt out of wrapping non-standard syntax in additional backticks
#' pal::md_verb(Not, actual(), `R-expressions`,
#' .eval = FALSE,
#' .backtick = FALSE) |>
#' pal::cat_lines()
# nolint start: object_name_linter.
md_verb <- function(...,
.eval = TRUE,
.collapse = " ",
.backtick = TRUE,
.control = c("keepNA",
"keepInteger",
"niceNames",
"showAttributes",
"warnIncomplete"),
.width.cutoff = 500L,
.nlines = -1L) {
checkmate::assert_flag(.eval)
checkmate::assert_flag(.backtick)
if (.eval) {
result <- rlang::list2(...)
} else {
result <- eval(substitute(alist(...)))
}
result |>
purrr::map(\(x) {
deparsed <- deparse1(expr = x,
collapse = .collapse,
width.cutoff = .width.cutoff,
backtick = .backtick,
control = .control,
nlines = .nlines)
escape_backtick <- stringr::str_detect(string = deparsed,
pattern = stringr::fixed("`"))
paste0("`", "` "[escape_backtick], deparsed, " `"[escape_backtick], "`")
}) |>
purrr::list_c(ptype = character())
}
# nolint end
#' Convert a character vector to a Markdown list
#'
#' Convenience wrapper around [pander::pandoc.list.return()] to convert a character vector (or something coercible to) to a [Markdown
#' list](https://pandoc.org/MANUAL.html#lists).
#'
#' @param x \R object, e.g. a character vector. Each element of `x` will become an item in the resulting Markdown list.
#' @param type Markdown list type. One of
#' - `"unordered"` for an unordered aka [bullet list](https://pandoc.org/MANUAL.html#bullet-lists). Corresponds to
#' [`<ul>`](https://developer.mozilla.org/docs/Web/HTML/Element/ul) in HTML.
#' - `"ordered"` for an ordered aka [numbered list](https://pandoc.org/MANUAL.html#ordered-lists). Corresponds to
#' [`<ol>`](https://developer.mozilla.org/docs/Web/HTML/Element/ol) in HTML.
#' - `"ordered_roman"` for a variation of an ordered/numbered list with uppercase roman numerals instead of Arabic numerals as list markers.
#' @param tight Whether or not to add additional spacing between list items.
#' @param indent_lvl Level of indentation of the resulting Markdown list. For each level, four additional spaces are added in front of every list item. An
#' integer scalar.
#' @param wrap An optional string to wrap the list items in.
#'
#' @return If `x` is empty, a character vector of length zero. Otherwise, a character scalar.
#' @family md
#' @export
#'
#' @examples
#' rownames(mtcars) |>
#' pal::as_md_list() |>
#' cat()
as_md_list <- function(x,
type = c("unordered", "ordered", "ordered_roman"),
tight = TRUE,
indent_lvl = 0L,
wrap = NULL) {
type <- rlang::arg_match(type)
checkmate::assert_flag(tight)
checkmate::assert_count(indent_lvl)
checkmate::assert_string(wrap,
null.ok = TRUE)
rlang::check_installed("pander",
reason = reason_pkg_required())
x %<>% as_chr()
if (length(x) == 0L) {
return(character())
}
pander::pandoc.list.return(elements = paste0(wrap, x, wrap),
style = switch(EXPR = type,
unordered = "bullet",
ordered = "ordered",
ordered_roman = "roman"),
loose = !tight,
indent.level = indent_lvl,
add.line.breaks = FALSE,
add.end.of.list = FALSE)
}
#' Format values as verbatim Markdown
#'
#' Converts the given values to a character vector, formatted as [verbatim Markdown](https://pandoc.org/MANUAL.html#verbatim). Character values are additionally
#' wrapped in double quotes. Metadata like [types][base::typeof] or [attributes][base::attributes] of the input is *not* represented in the output, only its
#' *values*.
#'
#' @param ... Values to be formatted. One or more \R objects.
#'
#' @return A character vector.
#' @family md
#' @export
#'
#' @examples
#' list(1L, 2.2, "other") |>
#' pal::as_md_vals()
#'
#' # note that values are flattened before further processing them, so this yields the same result
#' list(list(list(1L, list(2.2), list(list("other"))))) |>
#' pal::as_md_vals()
as_md_vals <- function(...) {
rlang::list2(...) |>
as_flat_list() |>
purrr::map(\(x) {
# wrap chr vals in double quotes
if (is.character(x)) x %<>% wrap_chr()
# determine nr of backticks to wrap result in, safely escaping any number of existing backticks
add_space <- TRUE
backticks <-
x |>
stringr::str_extract_all(pattern = "`+") |>
purrr::list_c(ptype = character()) |>
unique() %>%
magrittr::extract(nchar(.) == safe_max(nchar(.)))
if (length(backticks) == 0L) {
add_space <- FALSE
backticks <- "`"
} else {
backticks %<>% paste0("`")
}
paste0(backticks, " "[add_space], x, " "[add_space], backticks)
}) |>
purrr::list_c(ptype = character())
}
#' List values as a Markdown list
#'
#' Generates a Markdown list of the given values formatted as [verbatim](https://pandoc.org/MANUAL.html#verbatim). Character values are additionally wrapped in
#' double quotes.
#'
#' @inheritParams as_md_vals
#'
#' @return A character scalar.
#' @family md
#' @export
#'
#' @examples
#' list(1L, 2.2, "other") |>
#' pal::as_md_val_list() |>
#' cat()
#'
#' # note that values are flattened before listing them, so this yields the same list
#' list(list(list(1L, list(2.2), list(list("other"))))) |>
#' pal::as_md_val_list() |>
#' cat()
as_md_val_list <- function(...) {
as_md_list(as_md_vals(...))
}
#' Convert dataframe/tibble to Markdown pipe table
#'
#' Convenience wrapper around [`knitr::kable(format = "pipe")`][knitr::kable()] to create a [Markdown pipe
#' table](https://pandoc.org/MANUAL.html#extension-pipe_tables).
#'
#' # Create tables dynamically in roxygen2 documentation
#'
#' This function can be useful to create tables inside [roxygen2][roxygen2::roxygen2] documentation programmatically from data using
#' [dynamic \R code](https://roxygen2.r-lib.org/articles/rd-formatting.html#dynamic-r-code-1).
#'
#' For example, the inline code
#'
#' `` `r mtcars |> head() |> pipe_table()` ``
#'
#' should produce the following table in [roxygen2 7.1.0](https://www.tidyverse.org/blog/2020/03/roxygen2-7-1-0/) and above:
#'
#' `r mtcars |> head() |> pipe_table()`
#'
#' @inherit knitr::kable details
#'
#' @inheritParams knitr::kable
#' @param x Dataframe/tibble/matrix to be converted to a pipe table.
#' @param incl_rownames Whether to include row names or not. A logical scalar or `NULL`. If `NULL`, row names are included if `rownames(x)` is neither `NULL`
#' nor identical to `seq_len(nrow(x))`.
#' @param strong_colnames Whether or not to highlight column names by formatting them `<strong>` (wrapping them in two asterisks).
#' @param strong_rownames Whether or not to highlight row names by formatting them `<strong>` (wrapping them in two asterisks).
#' @param align Column alignment. Either `NULL` for auto-alignment or a character vector consisting of `'l'` (left), `'c'` (center) and/or `'r'` (right). If
#' `align = NULL`, numeric columns are right-aligned, and other columns are left-aligned. If `length(align) == 1L`, the string will be expanded to a vector
#' of individual letters, e.g. `'clc'` becomes `c('c', 'l', 'c')`.
#' @param format_args A list of arguments to be passed to [base::format()] to format table values, e.g. `list(big.mark = ',')`.
#'
#' @return A character vector.
#' @seealso [xfun::md_table()]
#' @family md
#' @export
#'
#' @examples
#' mtcars |> head() |> pal::pipe_table() |> pal::cat_lines()
pipe_table <- function(x,
incl_rownames = NULL,
strong_colnames = TRUE,
strong_rownames = TRUE,
align = NULL,
label = NULL,
digits = getOption("digits"),
format_args = list()) {
checkmate::assert_flag(incl_rownames,
null.ok = TRUE)
checkmate::assert_flag(strong_colnames)
checkmate::assert_flag(strong_rownames)
rlang::check_installed("knitr",
reason = reason_pkg_required())
if (is.null(incl_rownames)) {
incl_rownames <- !identical(rownames(x), as.character(seq_len(nrow(x))))
}
# format rownames <strong> if requested and sensible
if ((is.null(incl_rownames) || incl_rownames) && strong_rownames) {
# setting rownames on a tibble is deprecated, thus we convert to dataframe
x %<>% as.data.frame()
rownames(x) %<>% paste0("**", ., "**")
}
kable_args <-
alist(x = x,
format = "pipe",
digits = digits,
row.names = incl_rownames,
col.names = colnames(x) |> when(strong_colnames ~ paste0("**", ., "**"),
~ .),
label = label,
format.args = format_args) |>
when(!is.null(align) ~ c(., alist(align = align)),
~ .)
do.call(what = knitr::kable,
args = kable_args)
}
#' Strip Markdown formatting from character vector
#'
#' Removes all Markdown formatting from a character vector.
#'
#' This function relies on [commonmark::markdown_text()] which [supports the CommonMark specification plus the Github
#' extensions](https://github.com/r-lib/commonmark#readme). Unfortunately, [Markdown footnotes](https://pandoc.org/MANUAL.html#footnotes) can't be stripped
#' using `commonmark::markdown_text()`. Therefore a separate option `strip_footnotes` is offered which relies on regular expressions to remove inline footnotes
#' and footnote definitions and references.
#'
#' @param x A character vector to strip Markdown formatting from.
#' @param strip_footnotes Whether to remove Markdown footnotes, too. If `FALSE`, footnotes are canonicalized (to have sequential integer identifiers).
#'
#' @return A character vector of the same length as `x`.
#' @family md
#' @export
#'
#' @examples
#' pal::strip_md(
#' "A **MD** formatted [string](https://en.wikipedia.org/wiki/String_(computer_science))"
#' )
#'
#' # link references are only removed *iff* the reference is included in `x`:
#' pal::strip_md("[A reference link][refid]\n\n[refid]: https://example.com")
#' pal::strip_md("[A reference link][refid]\n\n_No ref here..._")
strip_md <- function(x,
strip_footnotes = TRUE) {
rlang::check_installed("commonmark",
version = "1.9.0",
reason = reason_pkg_required())
checkmate::assert_character(x)
checkmate::assert_flag(strip_footnotes)
purrr::map_chr(x,
\(x2) {
if (is.na(x2)) {
x2
} else {
result <- commonmark::markdown_text(text = x2,
footnotes = TRUE,
extensions = TRUE)
if (!isTRUE(endsWith(x2, "\n"))) {
result %<>% stringr::str_remove(pattern = "\n$")
}
if (strip_footnotes) {
result %<>% strip_md_footnotes()
}
result
}
})
}
#' Strip Markdown footnotes from character vector
#'
#' Removes all Markdown footnotes from a character vector.
#'
#' Note that it is not checked whether footnote references and definitions actually match (by identifier), thus they are removed even if they were invalid.
#'
#' @param x A character vector to strip Markdown footnotes from. Note that elements in `x` are processed as separate Markdown domains, i.e. _not_ as individual
#' lines belonging to the same Markdown document.
#'
#' @return A character vector of the same length as `x`.
#' @family md
#' @export
#'
#' @examples
#' pal::strip_md_footnotes(
#' "An **MD** formatted string with footnote[^fn].\n\n[^fn]: A note.\n"
#' )
strip_md_footnotes <- function(x) {
checkmate::assert_character(x) |>
stringr::str_split(pattern = "(\\n{3,}|\\n{2}(?! {4}))") |>
purrr::map_chr(\(x2) {
# remove footnote defs
ix_to_rm <-
x2 |>
stringr::str_detect("^\\[\\^[^\\s\\]]+\\]: .*") |>
which()
if (length(ix_to_rm)) {
x2 <- x2[-ix_to_rm]
}
# remove inline footnotes and refs
x2 %<>% stringr::str_remove_all(pattern = "\\[\\^[^\\s\\]]+\\]|\\^\\[.+?\\]")
if (length(x2) > 0L && !all(is.na(x2))) {
x2 %<>% paste0(collapse = "\n\n")
}
x2
})
}
#' Parse (R) Markdown as CommonMark XML tree
#'
#' Parses (R) Markdown file content according to the [CommonMark](https://commonmark.org/) specification and returns it as an XML parse tree.
#'
#' @inheritParams as_line_feed_chr
#' @inheritParams gitlab_document
#' @inheritParams commonmark::markdown_xml
#' @param md (R) Markdown file content as a character scalar.
#' @param hardbreaks Whether or not to treat newlines as hard line breaks.
#' @param strip_xml_ns Whether or not to [remove the default XML namespace][xml2::xml_ns_strip] (`d1`) assigned by [commonmark::markdown_xml()].
#'
#' @return An [`xml_document`][xml2::xml_document-class].
#' @family commonmark
#' @export
#'
#' @examples
#' "# A title
#'
#' Some prose.
#'
#' ## A subtitle
#'
#' More prose.
#'
#' ## Another subtitle
#'
#' Out of prose here.
#'
#' ### A sub-subtitle
#'
#' I'm dug in.
#'
#' # Another title
#'
#' A last word." |> pal::md_to_xml()
md_to_xml <- function(md,
smart_punctuation = FALSE,
hardbreaks = FALSE,
normalize = TRUE,
sourcepos = FALSE,
footnotes = TRUE,
extensions = c("strikethrough", "table", "tasklist"),
eol = c("LF", "CRLF", "CR", "LFCR"),
strip_xml_ns = TRUE) {
rlang::check_installed("commonmark",
reason = reason_pkg_required())
rlang::check_installed("xml2",
reason = reason_pkg_required())
result <-
strip_yaml_header(rmd = md,
eol = eol) |>
commonmark::markdown_xml(hardbreaks = hardbreaks,
smart = smart_punctuation,
normalize = normalize,
sourcepos = sourcepos,
footnotes = footnotes,
extensions = extensions) |>
xml2::read_xml() |>
when(strip_xml_ns ~ xml2::xml_ns_strip(.),
~ .)
# `xml2::xml_ns_strip()` returns its result invisibly, so we make it visible again
(result)
}
#' Determine CommonMark XML subnode indices
#'
#' Determines the XML children node indices for every XML node at the highest level of `xml` by interpreting [Markdown heading
#' levels](https://pandoc.org/MANUAL.html#headings) (1–6).
#'
#' [commonmark::markdown_xml()] (and so [md_to_xml()] which builds upon it) **do** parse (R) Markdown file content according to the
#' **[CommonMark](https://commonmark.org/) specification**, but **do not** return any information about the document's **heading hierarchy**.
#' `md_xml_subnode_ix()` fills this gap by giving the hierarchy structure in the form of the XML subnode indices for every node at the highest level of `xml`.
#'
#' @param xml CommonMark parse tree. An [`xml_document`][xml2::xml_document-class], [`xml_nodeset`][xml2::xml_nodeset-class] or
#' [`xml_node`][xml2::xml_node-class].
#'
#' @return A list of integer vectors of the same length as the number of XML nodes at the highest level of `xml`.
#' @family commonmark
#' @export
#'
#' @examples
#' "# A title
#'
#' Some prose.
#'
#' ## A subtitle
#'
#' More prose.
#'
#' ## Another subtitle
#'
#' Out of prose here.
#'
#' ### A sub-subtitle
#'
#' I'm dug in.
#'
#' # Another title
#'
#' A last word." |>
#' pal::md_to_xml() |>
#' pal::md_xml_subnode_ix()
md_xml_subnode_ix <- function(xml) {
assert_class_any(xml,
classes = c("xml_document", "xml_nodeset", "xml_node"),
name = "xml")
rlang::check_installed("xml2",
reason = reason_pkg_required())
xml_names <- xml2::xml_name(xml)
if (length(xml_names) == 1L && xml_names == "document") {
xml %<>% xml2::xml_contents()
}
purrr::map(seq_along(xml),
\(i) subnode_ix(xml_nodes = xml,
i = i))
}
#' Convert from CommonMark XML to (R) Markdown
#'
#' @inheritParams md_xml_subnode_ix
#'
#' @return A character scalar.
#' @family commonmark
#' @export
#'
#' @examples
#' "# A title
#'
#' Some prose.
#'
#' ## A subtitle
#'
#' More prose.
#'
#' ## Another subtitle
#'
#' Out of prose here.
#'
#' ### A sub-subtitle
#'
#' I'm dug in.
#'
#' # Another title
#'
#' A last word." |>
#' pal::md_to_xml() |>
#' xml2::xml_contents() |>
#' pal::xml_to_md() |>
#' cat()
xml_to_md <- function(xml) {
assert_class_any(xml,
classes = c("xml_document", "xml_nodeset", "xml_node"),
name = "xml")
rlang::check_installed("tinkr",
reason = reason_pkg_required())
rlang::check_installed("xml2",
reason = reason_pkg_required())
rlang::check_installed("xslt",
reason = reason_pkg_required())
xml |>
as.character() |>
paste0(collapse = "\n") |>
# trim whitespace in case `xml` was already of type character
stringr::str_trim() |>
# add CommonMark XML namespace
when(stringr::str_detect(string = .,
pattern = "^<document[>\\s]") ~ .,
~ paste0('<document xmlns="', as.character(tinkr::md_ns()), '">', ., '</document>')) |>
xml2::read_xml() |>
# convert XML to CommonMark
xslt::xml_xslt(stylesheet = xml2::read_xml(tinkr::stylesheet()))
}
#' Build `README.Rmd`
#'
#' A simpler, but considerably faster alternative to [devtools::build_readme()] since it doesn't install your package in a temporary library before building the
#' `README.Rmd`. This has the pleasant side effect that, other than the latter function, it also works for `.Rmd` files which aren't part of an \R package.
#'
#' Note that for public package repositories, it's recommended to use [devtools::build_readme()] since it ensures the `README.Rmd` can be built _reproducibly_,
#' which means all the objects and files it references must be accessible from the repository.
#'
#' `r pkgsnip::md_snip("rstudio_addin")`
#'
#' @param input Path to the R Markdown README file to be built. A character scalar.
#' @param output Path to the built Markdown README. A character scalar.
#' @param build_index_md Whether to build a separate [pkgdown][pkgdown::pkgdown]-optimized `pkgdown/index.md` alongside `output` (i.e. in the same parent
#' directory). If `NULL`, it will only be built if the parent directory of `output` [contains a pkgdown configuration file][is_pkgdown_dir]. Note that it will
#' be built with the \R option `pal.build_readme.is_pkgdown` set to `TRUE`, allowing for conditional content inclusion in `input` – e.g. via the [code chunk
#' option](https://yihui.org/knitr/options/#code-evaluation) `eval = isTRUE(getOption("pal.build_readme.is_pkgdown"))`.
#' @param env Environment in which code chunks are to be evaluated, e.g. [base::parent.frame()], [base::new.env()], or [base::globalenv()].
#' @param clean Whether or not to delete intermediate files created by [rmarkdown::render()].
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return The path to `input` as a character scalar, invisibly.
#' @family rmd_knitr
#' @export
build_readme <- function(input = "README.Rmd",
output = "README.md",
build_index_md = NULL,
env = parent.frame(),
clean = TRUE,
quiet = FALSE) {
checkmate::assert_environment(env)
checkmate::assert_string(input)
checkmate::assert_path_for_output(output,
overwrite = TRUE)
checkmate::assert_flag(build_index_md,
null.ok = TRUE)
checkmate::assert_flag(clean)
checkmate::assert_flag(quiet)
rlang::check_installed("knitr",
reason = reason_pkg_required())
rlang::check_installed("rmarkdown",
reason = reason_pkg_required())
# add args to `env`
rlang::env_bind(.env = env,
input = input,
output = output,
build_index_md = build_index_md)
# add `pkg_metadata` to env
parent_dir <- fs::path_dir(input) |> fs::path_abs()
if (is_pkg_dir(parent_dir)) {
rlang::env_bind(.env = env,
pkg_metadata = desc_list(parent_dir))
}
if (!quiet) {
cli_progress_step_quick("Building {.file {input}}")
}
# generate `output`
## render to the output format specified in the YAML header (defaults to `rmarkdown::md_document`)
rmarkdown::render(input = input,
output_file = output,
clean = clean,
quiet = TRUE,
envir = env)
# generate `index.md` if indicated
if (!isFALSE(build_index_md)) {
output_dir <- fs::path_dir(output)
if (is_pkgdown_dir(output_dir)) {
index_md_path <-
fs::path(output_dir, "pkgdown") |>
fs::dir_create() |>
fs::path("index.md") |>
checkmate::assert_path_for_output(overwrite = TRUE) |>
fs::path_abs()
# clean Rmd file
rlang::check_installed("brio",
reason = reason_pkg_required())
rlang::check_installed("withr",
reason = reason_pkg_required())
tmp_file <- fs::file_temp(pattern = "tmp-pal-build_readme",
ext = fs::path_ext(input))
brio::read_file(input) |>
# remove possible trailing horizontal line in Rmd file since pkgdown always adds one below content
# TODO: submit PR to pkgdown fixing this?
stringr::str_replace(pattern = " {0,3}([-\\*_]{3,}|<hr */?>)(\\s*(\\n\\[\\^[\\w-]+\\]:.*\\n?)*$)",
replacement = "\\2") |>
# remove `align` and `height` <img> tags (rely on custom CSS file `pkgdown/extra.css` instead)
stringr::str_replace_all(pattern = "(<img [^>]+)(align=['\"].*?['\"]\\s*)",
replacement = "\\1") |>
stringr::str_replace_all(pattern = "(<img [^>]+)(height=['\"].*?['\"]\\s*)",
replacement = "\\1") |>
brio::write_file(path = tmp_file)
# render `pkgdown/index.md`
withr::with_options(
new = list(pal.build_readme.is_pkgdown = TRUE),
code = rmarkdown::render(input = tmp_file,
output_file = index_md_path,
output_format =
rmarkdown::md_document(variant = "markdown",
# disable Pandoc extensions in input which rmarkdown only adds for backwards compatibility
md_extensions = c("-autolink_bare_uris",
"-tex_math_single_backslash"),
pandoc_args = "--columns=9999") |>
# overwrite Pandoc output format (i.a. disable Pandoc's raw attributes to have more control over inline HTML)
purrr::list_modify(pandoc = list(to = "markdown-raw_attribute")),
knit_root_dir = parent_dir,
clean = clean,
quiet = TRUE,
envir = env)
)
fs::file_delete(tmp_file)
}
}
}
#' Determine current knitr table format
#'
#' Determines the current knitr table format based on the \R option
#' [`knitr.table.format`](https://bookdown.org/yihui/rmarkdown-cookbook/kable.html#kable-formats) which can either be set directly to a valid format string or
#' to a function returning one of these strings conditionally.
#'
#' This is basically a convenience wrapper to be able to access the current `knitr.table.format` in a hassle-free way, i.e. it provides the conditional logic to
#' account for the possibility that `knitr.table.format` is set to a function rather than a format string.
#'
#' @param default knitr table format to fall back to when the \R option `knitr.table.format` is not set. One of
#' `r fn_param_defaults(param = "default", fn = knitr_table_format) |> wrap_chr("\x60") |> as_md_list()`
#'
#' See [knitr::kable()]'s `format` argument for details.
#'
#' @return A character scalar.
#' @family rmd_knitr
#' @export
knitr_table_format <- function(default = c("pipe",
"simple",
"html",
"latex",
"rst")) {
allowed_formats <- eval(formals()$default)
opt <- getOption("knitr.table.format")
result <- opt %||% rlang::arg_match(default)
if (is.function(result)) result <- result()
if (!(result %in% allowed_formats)) {
cli::cli_abort(paste0("R option {.field knitr.table.format} must evaluate to one of ",
enum_str(x = paste0("{.val ", allowed_formats, "}"),
sep2 = " or "),
", but is {.code {deparse1(opt)}}",
ifelse(is.function(opt),
" which evaluates to {.val {result}}",
""),
"."))
}
result
}
#' Strip YAML header from R Markdown
#'
#' Extracts the body from R Markdown file content, stripping a possible [YAML metadata
#' block](https://bookdown.org/yihui/rmarkdown-cookbook/rmarkdown-anatomy.html#yaml-metadata) at the beginning.
#'
#' Note that for the [R Markdown file format](https://rmarkdown.rstudio.com/), the [YAML metadata
#' block](https://pandoc.org/MANUAL.html#extension-yaml_metadata_block) must occur at the beginning of the document (and there can be only one). Additional
#' whitespace characters (incl. newlines) before the YAML metadata block are allowed.
#'
#' @inheritParams as_line_feed_chr
#' @param rmd R Markdown file content as a character scalar.
#'
#' @return Body of the R Markdown file as a character vector of lines.
#' @family rmd_knitr
#' @export
#'
#' @examples
#' rmd <- "
#' ---
#' output: pal::gitlab_document
#' ---
#'
#' # A heading
#'
#' Some prose.
#' "
#'
#' pal::cat_lines(rmd)
#'
#' rmd |>
#' pal::strip_yaml_header() |>
#' pal::cat_lines()
strip_yaml_header <- function(rmd,
eol = c("LF", "CRLF", "CR", "LFCR")) {
checkmate::assert_string(rmd)
has_yaml <- stringr::str_detect(string = rmd,
pattern = "^(\\n\\s*)?---\\s*\\n.*(---|...)\\s*\\n")
eol %<>% as_line_feed_chr()
rmd %<>% stringr::str_split_1(pattern = eol)
last_yaml_line_nr <- 0L
if (has_yaml) {
last_yaml_line_nr <-
rmd |>
stringr::str_locate("^---\\s*$") |>
tibble::as_tibble() |>
tibble::rowid_to_column() |>
dplyr::filter(!dplyr::if_any(.cols = everything(),
.fns = is.na)) |>
dplyr::pull("rowid") |>
purrr::chuck(2L) |>
min(rmd |>
stringr::str_locate("^\\.{3}\\s*$") |>
tibble::as_tibble() |>
tibble::rowid_to_column() |>
dplyr::filter(!dplyr::if_any(.cols = everything(),
.fns = is.na)) |>
dplyr::pull("rowid") |>
purrr::pluck(1L),
na.rm = TRUE)
}
rmd[(last_yaml_line_nr + 1L):length(rmd)]
}
#' Convert to GitLab Flavored Markdown
#'
#' Format for converting from R Markdown to [GitLab Flavored Markdown](https://gitlab.com/help/user/markdown.md).
#'
#' This is the GitLab equivalent to the [`github_document`][rmarkdown::github_document()] R Markdown
#' [output format](https://bookdown.org/yihui/rmarkdown/output-formats.html). It basically ensures Pandoc is called with a custom set of options optimized for
#' maximum compatibility with [GitLab Flavored Markdown](https://gitlab.com/help/user/markdown.md).
#'
#' ## Caveats regarding GitLab-Flavored-Markdown-specific features
#'
#' GitLab Flavored Markdown extends the [CommonMark](https://spec.commonmark.org/current/) Markdown specification with a bunch of
#' [special features](https://gitlab.com/help/user/markdown.md#gfm-extends-standard-markdown). To be able to properly make use of them, observe the following
#' points:
#'
#' - For [inline diffs](https://gitlab.com/help/user/markdown.md#inline-diff), only use curly braces (`{}`), not square brackets (`[]`). The latter will be
#' escaped by Pandoc during conversion and thus not recognized by GitLab as starting/ending an inline diff.
#'
#' - You have to set `smart_punctuation = FALSE` in order to leave certain
#' [special GitLab references](https://gitlab.com/help/user/markdown.md#special-gitlab-references) (like commit range comparisons) untouched for GitLab to
#' interpret them correctly.
#'
#' All the special GitLab references for snippets and labels that start with a tilde (`~`) or a dollar sign (`$`) won't work because these characters will be
#' escaped by Pandoc during conversion.
#'
#' - The `[[_TOC_]]` tag to let GitLab [generate a table of contents](https://gitlab.com/help/user/markdown.md#table-of-contents) won't work because it will be
#' escaped by Pandoc during conversion. You can let Pandoc generate the TOC instead by setting `toc = TRUE`.
#'
#' - [Multiline blockquotes](https://gitlab.com/help/user/markdown.md#multiline-blockquote) won't work because the fence delimiters `>>>` will be escaped by
#' Pandoc during conversion.
#'
#' @inheritParams rmarkdown::output_format
#' @inheritParams rmarkdown::md_document
#' @param smart_punctuation Whether or not to enable [Pandoc's `smart` extension](https://pandoc.org/MANUAL.html#extension-smart) which converts straight quotes
#' to curly quotes, `---` to an em-dash (—), `--` to an en-dash (–), and `...` to ellipses (…). It also replaces regular spaces after certain abbreviations
#' such as `Mr.` with [non-breaking spaces](https://en.wikipedia.org/wiki/Non-breaking_space).
#' @param parse_emoji_markup Whether to enable [Pandoc's `emoji` extension](https://pandoc.org/MANUAL.html#extension-emoji) which parses emoji markup (e.g.
#' `:smile:`) as Unicode emoticons.
#' @param toc Include a table of contents (TOC) [automatically generated by Pandoc](https://pandoc.org/MANUAL.html#option--toc). Note that the TOC will be
#' placed _before_ the README's body, meaning also _before_ the first Markdown header.
#' @param add_footnotes_hr Whether to add a trailing horizontal rule (`---`) to the final Markdown file if it doesn't already end in one and contains footnotes
#' (currently only checks for Pandoc's [reference-style footnotes](https://pandoc.org/MANUAL.html#footnotes) and not inline footnotes). This improves
#' readability when the file is rendered on `GitLab.com`.
#' @param autolink_bare_uris Enable the [`autolink_bare_uris` Pandoc Markdown extension](https://pandoc.org/MANUAL.html#extension-autolink_bare_uris) which
#' makes all absolute URIs into links, even when not surrounded by pointy braces `<...>`.
#' @param tex_math_single_backslash Enable the
#' [`tex_math_single_backslash` Pandoc Markdown extension](https://pandoc.org/MANUAL.html#extension-tex_math_single_backslash) which causes anything between
#' `\(` and `\)` to be interpreted as inline TeX math, and anything between `\[` and `\]` to be interpreted as display TeX math. Note: a drawback of this
#' extension is that it precludes escaping `(` and `[`.
#'
#' @return R Markdown output format intended to be fed to [rmarkdown::render()].
#' @family rmd_format
#' @export
#'
#' @examples
#' \donttest{
#' tmp_file <- fs::file_temp()
#' download.file(url = "https://gitlab.com/rpkg.dev/pal/-/raw/master/Rmd/pal.Rmd",
#' destfile = tmp_file,
#' quiet = TRUE,
#' mode = "wb")
#'
#' rmarkdown::render(input = tmp_file,
#' output_format = pal::gitlab_document(),
#' quiet = TRUE) |>
#' brio::read_lines() |>
#' length()}
gitlab_document <- function(smart_punctuation = TRUE,
parse_emoji_markup = FALSE,
df_print = "kable",
toc = FALSE,
toc_depth = 6L,
fig_width = 7L,
fig_height = 5L,
dev = "png",
preserve_yaml = FALSE,
add_footnotes_hr = TRUE,
autolink_bare_uris = FALSE,
tex_math_single_backslash = FALSE) {
checkmate::assert_flag(smart_punctuation)
checkmate::assert_flag(parse_emoji_markup)
checkmate::assert_flag(toc)
checkmate::assert_int(toc_depth,
lower = 1L,
upper = 6L)
checkmate::assert_flag(preserve_yaml)
checkmate::assert_flag(add_footnotes_hr)
checkmate::assert_flag(autolink_bare_uris)
checkmate::assert_flag(tex_math_single_backslash)
rlang::check_installed("rmarkdown",
reason = reason_pkg_required())
# `post_process` fn to ensure MD ends in trailing horizontal rule (to visually separate footnotes from a possible trailing numbered list)
if (add_footnotes_hr) {
ensure_trailing_md_hr <- function(metadata,
input_file,
output_file,
clean,
verbose) {
rlang::check_installed("brio",
reason = reason_pkg_required())
checkmate::assert_file_exists(output_file,
access = "w")
md <- brio::read_file(output_file)
# ensure trailing horizontal rule if file contains footnotes
if (stringr::str_detect(string = md,
pattern = "(\\n\\[\\^[\\w-]+\\]:.*)")
&& stringr::str_detect(string = md,
pattern = " {0,3}([-\\*_]{3,}|<hr */?>)(\\s*(\\n\\[\\^[\\w-]+\\]:.*\\n?)*$)",
negate = TRUE)) {
md |>
stringr::str_replace(pattern = "((\\n\\[\\^[\\w-]+\\]:.*\\n?)*$)",
replacement = "\n---\n\\1") |>
brio::write_file(path = output_file)
}
output_file
}
} else {
ensure_trailing_md_hr <- NULL
}
# create rmd output format
rmarkdown::output_format(
knitr = rmarkdown::knitr_options_html(fig_width = fig_width,
fig_height = fig_height,
fig_retina = NULL,
keep_md = FALSE,
dev = dev),
pandoc = rmarkdown::pandoc_options(to =
c("markdown",
"+emoji"[parse_emoji_markup],
"-smart",
"-simple_tables",
"-multiline_tables",
"-grid_tables",
"-header_attributes",
"-fenced_code_attributes",
"-inline_code_attributes",
"-link_attributes",
"-raw_attribute",
"-pandoc_title_block",
"-yaml_metadata_block"[!preserve_yaml]) |>
paste0(collapse = ""),
from =
c("markdown",
"+autolink_bare_uris"[autolink_bare_uris],
"+tex_math_single_backslash"[tex_math_single_backslash],
"-smart"[!smart_punctuation]) |>
paste0(collapse = ""),
args = c("--columns=9999",
"--standalone",
"--table-of-contents"[toc],
paste0("--toc-depth=", toc_depth)[toc])),
df_print = df_print,
pre_knit = NULL,
post_knit = NULL,
pre_processor = NULL,
intermediates_generator = NULL,
post_processor = ensure_trailing_md_hr,
on_exit = NULL,
base_format = NULL
)
}
#' Write HTML widget to file
#'
#' Writes an HTML widget's main container `<div>` and data `<script>` tags to file.
#'
#' Other than [htmlwidgets::saveWidget()], this function does *not* write a complete HTML document but only the widget's core tags. Hence it is intended to
#' create files which are in turn included in another HTML file, e.g. via a static site generator. The HTML widget's dependencies must be handled separately,
#' e.g. using [write_widget_deps()].
#'
#' To directly get the HTML widget's core tags as a character scalar, use `as.character(htmltools::tagList(x))`.
#'
#' @param x HTML widget object.
#' @param path File path to write the HTML widget's core tags to.
#'
#' @return The content written to `path` as a character scalar, invisibly.
#' @family htmlwidget
#' @export
write_widget <- function(x,
path) {
checkmate::assert_class(x,
classes = "htmlwidget")
checkmate::assert_path_for_output(path,
overwrite = TRUE)
rlang::check_installed("brio",
reason = reason_pkg_required())
rlang::check_installed("htmltools",
reason = reason_pkg_required())
x |>
htmltools::tagList() |>
as.character() |>
brio::write_lines(path = path)
}
#' Write HTML widget dependencies
#'
#' Writes an HTML widget's JS and CSS dependencies to files and returns a snippet for their inclusion in the `<head>` of an HTML document.
#'
#' To write the HTML widget's core tags to file, use [write_widget()].
#'
#' @inheritParams write_widget
#' @param path Directory path to write the HTML widget's dependencies to. A subfolder will be created for each dependency.
#' @param path_base Base path to determine the relative root of `path` for inclusion of the widget's dependencies. Must be a parent of or equal to `path`.
#' @param path_head_snippet Optional file path to write the snippet for inclusion of the widget's dependencies in the `<head>` of an HTML document to.
#' @param quiet Whether or not to print the snippet for inclusion of the widget's dependencies to console.
#'
#' @return An [HTML][htmltools::HTML] object suitable for inclusion in the `<head>` of an HTML document, invisibly.
#' @family htmlwidget
#' @export
write_widget_deps <- function(x,
path,
path_base,
path_head_snippet = NULL,
quiet = !is.null(path_head_snippet)) {
checkmate::assert_class(x,
classes = "htmlwidget")
checkmate::assert_directory_exists(path,
access = "w")
if (!fs::path_has_parent(path = path,
parent = path_base)) {
cli::cli_abort("{.arg path_base} is not a parent of (or equal to) {.arg path}.")
}
if (!is.null(path_head_snippet)) {
checkmate::assert_path_for_output(path_head_snippet,
overwrite = TRUE)
}
checkmate::assert_flag(quiet)
rlang::check_installed("brio",
reason = reason_pkg_required())
rlang::check_installed("htmltools",
reason = reason_pkg_required())
deps <-
htmltools::findDependencies(x) |>
purrr::map(\(x) {
htmltools::copyDependencyToDir(dependency = x,
outputDir = path) |>
htmltools::makeDependencyRelative(basepath = path_base)
})
head_snippet <-
htmltools::renderDependencies(deps) |>
# manually add the root slash
stringr::str_replace_all(pattern = "(\\s(src|href)\\s*=\\s*[\"'])([^/])",
replacement = "\\1/\\3") |>
htmltools::HTML()
if (!is.null(path_head_snippet)) {
brio::write_lines(text = head_snippet,
path = path_head_snippet)
}
if (!quiet) {
cli::cli_alert_info(text = "Include the following HTML snippet in the {.field <head>} of your document:")
cli::cli_par()
cli::cli_end()
cli::cli_code(lines = head_snippet)
}
invisible(head_snippet)
}
#' Assert object is member of any class
#'
#' Asserts that an object is member of any of the specified classes.
#'
#' In contrast to [checkmate::assert_class()], this function returns `TRUE` as long as `x` is at least member of *one* of `classes`.
#'
#' @param x \R object to test.
#' @param classes Class names to check for inheritance. A character vector.
#' @param name Name of the checked object to print in error message in case the assertion fails. A character scalar.
#'
#' @return `x`, invisibly.
#' @family checkmate
#' @export
#'
#' @examples
#' xml2::read_html("https://pal.rpkg.dev/dev/license") |>
#' assert_class_any(classes = c("xml_document", "xml_nodeset", "xml_node"))
assert_class_any <- function(x,
classes,
name = "x") {
checkmate::assert_character(classes,
any.missing = FALSE)
if (!inherits(x = x,
what = classes)) {
checkmate::assert_string(name)
classes_actual <- class(x)
cli::cli_abort(paste0("{.arg {name}} must {cli::qty(classes)} be {?of class/member of any of the classes} ",
classes %>% paste0("{.val ", ., "}") |> enum_str(sep2 = " or "),
", but is {cli::qty(classes_actual)} of class{?es} {.val {classes_actual}}."))
}
invisible(x)
}
#' Assert object is data frame or tibble (extension)
#'
#' Asserts that an object is `pkgsnip::param_lbl(id = "df_or_tibble", as_sentence = FALSE)`.
#'
#' @inheritParams assert_class_any
#'
#' @inherit assert_class_any return
#' @family checkmate
#' @export
#'
#' @examples
#' tibble::tibble() |> pal::assert_df_or_tibble()
#'
#' try(
#' matrix() |> pal::assert_df_or_tibble()
#' )
assert_df_or_tibble <- function(x,
name = "x") {
assert_class_any(x = x,
classes = c("data.frame",
"tbl",
"tbl_dbi",
"tbl_df",
"tbl_lazy",
"tbl_sql"),
name = name)
}
#' Assert count or `Inf`
#'
#' Asserts that `x` is either a [count][checkmate::assert_count] or [positive infinity][is.infinite] (`Inf`).
#'
#' @param x Object to check.
#' @param ... Further arguments passed on to [checkmate::assert_count()].
#'
#' @return `x`, invisibly.
#' @family checkmate
#' @export
#'
#' @examples
#' pal::assert_inf_count(1001L)
#' pal::assert_inf_count(1)
#' pal::assert_inf_count(0.0)
#' pal::assert_inf_count(Inf)
#'
#' try(
#' pal::assert_inf_count(-1)
#' )
#' try(
#' pal::assert_inf_count(1.5)
#' )
#' try(
#' pal::assert_inf_count(-Inf)
#' )
assert_inf_count <- function(x,
...) {
if (isTRUE(is.infinite(x))) {
if (x < 0L) {
cli::cli_abort("{.arg x} must either be a single integerish value or positive infinity ({.code Inf}).")
}
return(invisible(x))
} else {
return(checkmate::assert_count(x = x,
...))
}
}
#' [cli](https://cli.r-lib.org/) pluralization helpers for booleans
#'
#' Equivalents to [cli::qty()] and [cli::no()] for a logical input.
#'
#' If `cnd` evaluates to `TRUE`, the resulting cli quantity is `1`, otherwise `0`. See cli's [pluralization
#' rules](https://cli.r-lib.org/articles/pluralization.html#pluralization-markup-1) for details about how these quantities are interpreted.
#'
#' @param cnd Condition. A logical scalar.
#'
#' @return `0L` or `1L` with the additional class `cli_noprint`.
#' @family cli
#' @export
#'
#' @examples
#' cnd <- runif(1L) < 0.5
#'
#' cli::pluralize(paste0(
#' "{pal::cli_qty_lgl(cnd)}I think this function ",
#' "{?comes in handy/is not worth a second of my attention}. Having looked at the rest of the ",
#' "package, this {?is quite surprising/does not come as a surprise}."
#' ))
#'
#' cli::pluralize("This function is worth exactly {pal::cli_no_lgl(cnd)} second of my time.")
cli_qty_lgl <- function(cnd) {
checkmate::assert_flag(cnd)
cnd %<>% as.integer()
class(cnd) <- "cli_noprint"
cnd
}
#' @rdname cli_qty_lgl
#' @export
cli_no_lgl <- function(cnd) {
checkmate::assert_flag(cnd)
cnd %<>% as.integer()
class(cnd) <- "cli_no"
cnd
}
#' Quick [cli](https://cli.r-lib.org/) simplified progress message
#'
#' Version of [cli::cli_progress_step()] that for the messages on (un)successful termination simply appends **done**/**failed** to `msg`.
#'
#' @inheritParams cli::cli_progress_bar
#' @inheritParams cli::cli_progress_step
#'
#' @return The cli progress bar `id` as a character scalar, invisibly.
#' @family cli
#' @export
#'
#' @examples
#' {
#' pal::cli_progress_step_quick(msg = "Doing my thing")
#' Sys.sleep(1)
#' cli::cli_progress_done()
#' }
cli_progress_step_quick <- function(msg,
spinner = FALSE,
class = if (!spinner) ".alert-info",
current = TRUE,
.auto_close = TRUE,
.envir = parent.frame()) {
checkmate::assert_string(msg)
msg %<>% paste0("\u2026")
cli::cli_progress_step(msg = msg,
msg_done = paste(msg, "done"),
msg_failed = paste(msg, "failed"),
spinner = spinner,
class = class,
current = current,
.auto_close = .auto_close,
.envir = .envir)
}
#' Evaluate an expression with [cli](https://cli.r-lib.org/) process indication
#'
#' @description
#'
#' `r lifecycle::badge("superseded")` \cr
#' This function is superseded in favor of [cli_progress_step_quick()] and the underlying `cli::cli_progress_*` family of functions which are more powerful and
#' versatile than the `cli::cli_process_*` family on which `cli_process_expr()` is built.
#'
#' @inheritParams cli::cli_process_start
#' @param expr An expression to be evaluated.
#' @param env Default environment to evaluate `expr`, as well as possible [glue][glue::glue()] expressions within `msg`, in.
#'
#' @return The result of the evaluated `expr`, invisibly.
#' @family cli
#' @export
#'
#' @examples
#' if (interactive()) {
#' pal::cli_process_expr(Sys.sleep(3L), "Zzzz")
#' }
#'
#' russian_roulette <- function() {
#' msg <- "Spinning the cylinder \U0001F91E … "
#' pal::cli_process_expr(msg = msg,
#' msg_done = paste0(msg, "and pulling the trigger – lucky again. \U0001F60C"),
#' msg_failed = paste0(msg, "and pulling the trigger – head blast!"),
#' expr = {
#' if (interactive()) Sys.sleep(1)
#' if (runif(1L) < 0.4) stop("\U0001F92F\u2620")
#' })
#' }
#'
#' set.seed(321)
#' russian_roulette()
#' set.seed(123)
#' try(russian_roulette())
cli_process_expr <- function(expr,
msg,
msg_done = paste(msg, "... done"),
msg_failed = paste(msg, "... failed"),
msg_class = "alert-info",
done_class = "alert-success",
failed_class = "alert-danger",
env = parent.frame()) {
checkmate::assert_string(msg,
# NOTE: This is necessary since `cli::cli_process_start(msg = "")` throws an error
min.chars = 1L)
checkmate::assert_string(msg_done)
checkmate::assert_string(msg_failed)
checkmate::assert_string(msg_class)
checkmate::assert_string(done_class)
checkmate::assert_string(failed_class)
checkmate::assert_environment(env)
# NOTE: We cannot rely on `on_exit = "done"` since in case of an error the on-exit code of this function won't reach execution because we throw the error
# using `rlang::cnd_signal(.x)` first.
status_bar_container_id <- cli::cli_process_start(msg = msg,
msg_done = msg_done,
msg_failed = msg_failed,
msg_class = msg_class,
done_class = done_class,
failed_class = failed_class,
.envir = env)
result <- tryCatch(expr = rlang::eval_tidy(expr = {{ expr }},
env = env),
error = \(x) {
cli::cli_process_failed(status_bar_container_id)
rlang::cnd_signal(x)
})
cli::cli_process_done(status_bar_container_id)
invisible(result)
}
#' Get Git file modification time
#'
#' Determine the time a file in a Git repository was last modified.
#'
#' Note that only *committed* changes to the file are regarded. The modification is returned in [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time).
#'
#' @param path Path to a file, relative to the Git repository root.
#' @param repo Path to a Git repository.
#'
#' @return `r pkgsnip::return_lbl("datetime")`
#' @family git
#' @export
#'
#' @examples
#' \dontrun{
#' pal::git_file_mod_time(path = "README.md")}
git_file_mod_time <- function(path,
repo = ".") {
rlang::check_installed("git2r",
reason = reason_pkg_required())
# make path relative to `repo` if necessary to please `git2r::blame()`
if (fs::is_absolute_path(path)) {
path <-
fs::path_abs(repo) |>
c(path) |>
fs::path_common() |>
fs::path_rel(path = path)
}
git2r::blame(repo = repo,
path = path)$hunks |>
purrr::map(\(x) as.POSIXct(x$final_signature$when)) |>
purrr::list_c(ptype = vctrs::new_datetime()) |>
safe_max()
}
#' Get Git remote tree URL
#'
#' Determines the base Git tree URL to the current branch's upstream remote of `repo`.
#'
#' This function is useful to assemble URLs to files and folders in your repo's Git forge (GitHub, GitLab, etc.).
#'
#' @inheritParams gert::git_remote_list
#' @param remote Name of the Git remote to determine the URL for. Defaults to `"origin"` if present, otherwise the first remote listed by
#' [gert::git_remote_list()].
#'
#' @return If `repo` is a Git repository, a character scalar. Otherwise, a character vector of length zero.
#' @family git
#' @export
#'
#' @examples
#' pal::git_remote_tree_url() |>
#' paste0("Rmd/pal.Rmd") |>
#' browseURL()
git_remote_tree_url <- function(repo = ".",
remote = NULL) {
checkmate::assert_string(remote,
null.ok = TRUE)
rlang::check_installed("gert",
reason = reason_pkg_required())
remotes <- try(gert::git_remote_list(repo = repo))
# return empty chr if no remotes found
if (!is.data.frame(remotes)) {
return(character())
}
# if multiple remotes present and no explicit `remote` provided, take "origin" if present, otherwise the first one
remote <- remote %||% ifelse(nrow(remotes) > 1L && "origin" %in% remotes$name,
"origin",
remotes$name[1L])
url <-
gert::git_remote_info(repo = repo,
remote = remote) |>
_$url
# convert SSH address to URL
if (startsWith(url, "git@")) {
url %<>% stringr::str_replace_all(pattern = c("(git@[\\w\\.]{1,}\\.[a-z]{2,}):" = "\\1/",
"^git@" = "https://",
"\\.git$" = ""))
}
add_slash_minus <- stringr::str_detect(url, stringr::fixed("gitlab"))
url %<>% paste0("/-"[add_slash_minus], "/tree/", gert::git_branch(), "/")
url
}
#' Test if an HTTP request is successful
#'
#' @description
#' Convenience wrapper around a bunch of [httr2][httr2::httr2-package] functions that returns
#'
#' - `TRUE` if the specified `url` could be resolved _and_ a [`HEAD`](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Request_methods) request could
#' be [successfully completed](https://en.wikipedia.org/wiki/List_of_HTTP_status_codes), or
#'
#' - `FALSE` in any other case.
#'
#' @details
#' This function is similar to [RCurl::url.exists()], i.e. it only retrieves the header, no body, but is based on [httr2][httr2::httr2-package] which in turn is
#' based on [curl](https://jeroen.cran.dev/curl/).
#'
#' For checks on lower levels of the network stack like performing DNS queries or TCP port pings, see the [pingr](https://r-lib.github.io/pingr/) package.
#'
#' @inheritParams httr2::req_retry
#' @inheritParams httr2::req_perform
#' @param url HTTP protocol address. The scheme is optional, so both `"google.com"` and `"https://google.com"` will work. A character scalar.
#' @param max_tries `r pkgsnip::param_lbl("max_tries")`
#'
#' @return A logical scalar.
#' @family http
#' @export
#'
#' @examples
#' pal::is_http_success("goo.gl")
#' pal::is_http_success("https://google.com/")
#' pal::is_http_success("https://google.not/")
#'
#' # by default, requests are only retried on HTTP 429 and 503 status codes
#' pal::is_http_success(url = "https://httpstat.us/503",
#' max_tries = 2,
#' verbosity = 1)
#' pal::is_http_success(url = "https://httpstat.us/500",
#' max_tries = 2,
#' verbosity = 1)
#'
#' # to retry on *all* failing status codes, set `is_transient` accordingly:
#' pal::is_http_success(url = "https://httpstat.us/500",
#' max_tries = 2,
#' is_transient = \(x) TRUE,
#' verbosity = 1)
is_http_success <- function(url,
max_tries = 1L,
retry_on_failure = FALSE,
is_transient = NULL,
verbosity = NULL) {
rlang::check_installed("httr2",
reason = reason_pkg_required())
tryCatch(expr =
httr2::request(base_url = url) |>
httr2::req_method(method = "HEAD") |>
httr2::req_retry(max_tries = max_tries,
retry_on_failure = retry_on_failure,
is_transient = is_transient) |>
httr2::req_perform(verbosity = verbosity) |>
httr2::resp_is_error() |>
magrittr::not(),
error = \(x) FALSE,
interrupt = \(x) cli::cli_abort("Terminated by the user."))
}
#' Test if URL
#'
#' Tests whether the elements of a character vector are [Uniform Resource Locators](https://de.wikipedia.org/wiki/Uniform_Resource_Locator) (URLs).
#'
#' This function is based on [xml2::url_parse()] and simply checks whether the elements in `x` contain both a **scheme** as well as *some* **scheme-specific
#' part** (excl. ports). No further checks are performed, so it's explicitly not verified that a URL actually conforms to its respective scheme.
#'
#' @param x Character vector to test.
#'
#' @return A logical vector of the same length as `x`.
#' @family http
#' @export
#'
#' @examples
#' pal::is_url(c("/some/path",
#' "file:///some/path"))
is_url <- function(x) {
checkmate::assert_character(x,
any.missing = FALSE)
rlang::check_installed("xml2",
reason = reason_pkg_required())
parsed <- xml2::url_parse(x)
has_scheme <- nchar(parsed$scheme) > 0L
has_specifics <- purrr::pmap_lgl(parsed,
# NOTE: we don't consider col `port`
~ any(nchar(c(..2, ..4, ..5, ..6, ..7)) > 0L))
has_scheme & has_specifics
}
#' Perform a cached HTTP GET request
#'
#' Convenience wrapper around a bunch of [httr2][httr2::httr2-package] functions.
#'
#' @inheritParams is_http_success
#'
#' @inherit httr2::req_perform return
#' @family http
#' @export
req_cached <- function(url,
max_tries = 3L) {
rlang::check_installed("httr2",
reason = reason_pkg_required())
httr2::request(base_url = url) |>
httr2::req_method(method = "GET") |>
httr2::req_cache(path = fs::path(tools::R_user_dir(package = "httr2",
which = "cache"),
"req_cache")) |>
httr2::req_retry(max_tries = max_tries) |>
httr2::req_perform()
}
#' Read in and parse TOML file as strict list
#'
#' Reads in a file in [Tom's Obvious Minimal Language (TOML)](https://toml.io/) format and returns its content as a (nested) [strict list][xfun::strict_list()].
#'
#' The file is parsed using [`RcppTOML::parseTOML(escape = FALSE)`][RcppTOML::parseTOML].
#'
#' @inheritParams RcppTOML::parseTOML
#' @param input If `from_file = TRUE`, the path to a TOML file as a character scalar. Otherwise, TOML content as a character vector.
#' @param from_file Whether `input` is the path to a TOML file or already a character vector of TOML content.
#'
#' @return `r pkgsnip::return_lbl("strict_list")`
#' @family toml
#' @export
toml_read <- function(input,
from_file = TRUE,
verbose = FALSE) {
checkmate::assert_flag(from_file)
checkmate::assert_flag(verbose)
rlang::check_installed("RcppTOML",
reason = reason_pkg_required())
rlang::check_installed("xfun",
reason = reason_pkg_required())
if (from_file) {
checkmate::assert_file_exists(input,
access = "r")
} else {
checkmate::assert_character(input)
# reduce to character scalar since `RcppTOML::parseTOML()` doesn't accept vectors
input %<>% paste0(collapse = "\n")
}
xfun::as_strict_list(RcppTOML::parseTOML(input = input,
verbose = verbose,
fromFile = from_file,
escape = FALSE))
}
#' Validate TOML
#'
#' Validates a TOML file or character vector using the external [Taplo CLI](https://taplo.tamasfe.dev/cli/introduction.html), optionally against a [JSON
#' Schema](https://json-schema.org/) ([Draft 4](https://json-schema.org/specification-links.html#draft-4)).
#'
#' The highest supported JSON Schema specification is [Draft 4](https://json-schema.org/specification-links.html#draft-4). This is a [limitation of
#' the underlying tool Taplo](https://taplo.tamasfe.dev/configuration/developing-schemas.html).
#'
#' Taplo allows to define the schema to be used directly in the TOML file using the [`schema` header
#' directive](https://taplo.tamasfe.dev/configuration/directives.html#the-schema-directive). Note that the `schema` argument has precendence unless set to
#' `NULL` (the default).
#'
#' # Why JSON Schema-based validation
#'
#' Although there are two noteworthy attempts at introducing a native validation format for TOML, [TOLS](https://github.com/toml-lang/toml/pull/116/) and [TOML
#' Schema](https://github.com/toml-lang/toml/issues/792), neither of them has been officially adopted yet. As it appears, it could take several more years to
#' decades until such thing happens, if ever.
#'
#' In the meantime, we can use the JSON Schema format as an alternative thanks to an awesome implementation by the [Taplo
#' CLI](https://taplo.tamasfe.dev/cli/introduction.html), which itself is written in the Rust programming language and [available as a single-binary program for
#' all common platforms](https://taplo.tamasfe.dev/cli/installation/binary.html).
#'
#' @inheritParams toml_read
#' @param schema URL to a [JSON Schema](https://json-schema.org/) ([Draft 4](https://json-schema.org/specification-links.html#draft-4)) file to validate `input`
#' against. Can also be a local filesystem path specified in the [file URI scheme](https://en.wikipedia.org/wiki/File_URI_scheme) (absolute path prefixed with
#' `file://`). If `NULL`, no schema-based validation is performed and `input` is only checked to be TOML-compliant.
#' @param top_errors_only Whether to reduce the output to the top error message of each of TOML CLI's error classes. If `FALSE`, TOML CLI's complete error
#' output is shown.
#'
#' @return If the validation is successful, `input` invisibly. Otherwise an error is thrown.
#' @family toml
#' @export
#'
#' @examples
#' try(
#' pal::toml_validate(input = "key = tru",
#' from_file = FALSE)
#' )
toml_validate <- function(input,
from_file = TRUE,
schema = NULL,
top_errors_only = TRUE) {
checkmate::assert_flag(from_file)
checkmate::assert_flag(top_errors_only)
checkmate::assert_string(schema,
null.ok = TRUE)
if (!is.null(schema) && !is_url(schema)) {
cli::cli_abort(paste0("{.arg schema} must be a valid URL. To refer to a (local) filesystem path, prepend it with the {.href [`file://` URI ",
"scheme](https://en.wikipedia.org/wiki/File_URI_scheme)}."))
}
if (from_file) {
checkmate::assert_file_exists(input,
access = "r")
} else {
checkmate::assert_character(input)
}
assert_cli(cmd = "taplo",
error_msg = paste0("The {.strong taplo} executable is required but couldn't be found on system's {.href ",
"[PATH](https://en.wikipedia.org/wiki/PATH_(variable))}. Binaries of the {.href [Taplo ",
"CLI](https://taplo.tamasfe.dev/cli/introduction.html)} are available for all common platforms from here: ",
"{.url https://taplo.tamasfe.dev/cli/installation/binary.html}"))
result <- suppressWarnings(system2(command = "taplo",
args = c("lint",
"--no-auto-config",
"--colors=always",
paste0("--schema=", schema)[!is.null(schema)],
ifelse(from_file,
input,
"-")),
stdout = TRUE,
stderr = TRUE,
input = if (from_file) NULL else input,
# cf. https://taplo.tamasfe.dev/cli/usage/configuration.html
env = "RUST_LOG=error",
timeout = 5L))
result_excl_ansi <- cli::ansi_strip(result)
if (length(result) > 1L && stringr::str_detect(string = result_excl_ansi[1L],
pattern = "(?i)error")) {
# only retain first Taplo error of each class (later ones are usually unhelpful)
ix_error <-
result_excl_ansi |>
stringr::str_detect(pattern = "^(?i)error:") |>
which()
ix_error_begin <-
result_excl_ansi |>
stringr::str_detect(pattern = "^(?i)error:") |>
magrittr::and(!duplicated(result_excl_ansi)) |>
which()
if (top_errors_only && length(ix_error) > 0L) {
i_error_end <-
result_excl_ansi |>
stringr::str_detect(pattern = "^ERROR ") |>
which() |>
dplyr::first()
ix_final <-
ix_error_begin |>
purrr::map(\(x) {
i_error_current <- which(ix_error == x)
has_next <- i_error_current < length(ix_error)
seq(from = x,
to = ifelse(has_next,
ix_error[i_error_current + 1L] - 1L,
i_error_end - 1L))
}) |>
purrr::list_c(ptype = integer())
} else {
ix_final <- seq_along(result)
}
# we avoid `cli::cli_abort()` for now since it always strips consecutive whitespaces, even non-breaking ones
# cf. https://github.com/r-lib/cli/issues/531#issuecomment-1292639286
rlang::abort(message = c(ifelse(from_file,
cli::format_inline("Validation of {.file {input}} failed with"),
"TOML validation failed with:"),
# turning it into a named vctr avoids the default bullets
" " = "",
result[ix_final],
""),
use_cli_format = FALSE)
}
invisible(input)
}
#' Check if CLI tool is available
#'
#' Tests whether a [command-line interface (CLI)](https://en.wikipedia.org/wiki/Command-line_interface) tool is found on the system's
#' [`PATH`](https://en.wikipedia.org/wiki/PATH_(variable)) and optionally returns the executable's filesystem path.
#'
#' @param cmd System command to invoke the CLI tool. A character scalar.
#' @param get_cmd_path Whether or not to return the filesystem path to the CLI tool. If `FALSE`, a boolean is returned indicating if the CLI tool is found on
#' the system or not.
#' @param force_which If set to `TRUE`, [base::Sys.which()], which relies on the system command `which`, will be used instead of `command -v` to determine the
#' availability of `cmd` on Unix-like systems. On Windows, `base::Sys.which()` is used in any case. `command -v` is
#' [generally recommended for bourne-like shells](https://unix.stackexchange.com/q/85249/201803) and therefore is the default on Linux, macOS and other
#' [Unixes](https://en.wikipedia.org/wiki/Unix-like).
#'
#' @return A logical scalar if `get_cmd_path = FALSE`, otherwise the filesystem [path][fs::path] to the `cmd` executable.
#' @family sys
#' @export
#'
#' @examples
#' pal::test_cli("Rscript")
#'
#' cmd <- ifelse(checkmate::test_os("windows"), "pandoc.exe", "pandoc")
#' pal::test_cli(cmd, get_cmd_path = TRUE)
test_cli <- function(cmd,
get_cmd_path = FALSE,
force_which = FALSE) {
checkmate::assert_string(cmd)
checkmate::assert_flag(get_cmd_path)
checkmate::assert_flag(force_which)
if (force_which || checkmate::test_os("windows")) {
result <-
Sys.which(names = cmd) |>
as.character() |>
when(. == "" ~ character(),
~ .) |>
when(get_cmd_path ~ fs::path(.),
length(.) == 0L ~ FALSE,
~ TRUE)
} else {
# define "defused" warning/error handler
defuse <- function(e) if (get_cmd_path) character() else FALSE
result <-
tryCatch(expr = system2(command = "command",
args = c("-v",
cmd),
stdout = get_cmd_path,
stderr = get_cmd_path),
warning = defuse,
error = defuse) |>
when(get_cmd_path ~ fs::path(.),
isFALSE(.) ~ .,
~ TRUE)
}
result
}
#' Assert CLI tool is available
#'
#' Asserts that a [command-line interface (CLI)](https://en.wikipedia.org/wiki/Command-line_interface) tool is found on the system's
#' [`PATH`](https://en.wikipedia.org/wiki/PATH_(variable)) and returns the executable's filesystem path.
#'
#' @inheritParams test_cli
#' @param error_msg Error message to print in case `cmd` is not found on system's `PATH`. `r pkgsnip::param_lbl("cli_markup_support")` A character scalar.
#'
#' @return If the CLI tool is available on the system's `PATH`, its filesystem path, invisibly. Otherwise, an error is thrown.
#' @family sys
#' @family checkmate
#' @export
#'
#' @examples
#' pal::assert_cli("Rscript")
#'
#' cmd <- ifelse(checkmate::test_os("windows"), "pandoc.exe", "pandoc")
#' pal::assert_cli(cmd)
assert_cli <- function(cmd,
error_msg = paste0("The {.strong {cmd}} executable is required but couldn't be found on ",
"system's {.href [PATH](https://en.wikipedia.org/wiki/PATH_(variable))}."),
force_which = FALSE) {
checkmate::assert_string(error_msg)
cli_path <- test_cli(cmd = cmd,
get_cmd_path = TRUE,
force_which = force_which)
if (length(cli_path) == 0L) {
cli::cli_abort(error_msg)
}
invisible(cli_path)
}
#' Determine file path of executing script
#'
#' Tries to determine the path to the R/Rmd script that this function is called from.
#'
#' @return The file path to the executing script.
#' @family sys
#' @export
path_script <- function() {
rlang::check_installed("rprojroot",
reason = reason_pkg_required())
rlang::check_installed("rstudioapi",
reason = reason_pkg_required())
cmd_args <- commandArgs(trailingOnly = FALSE)
needle <- "--file="
match <- grep(x = cmd_args,
pattern = needle)
# Rscript
if (length(match) > 0L) {
return(normalizePath(sub(needle, "", cmd_args[match])))
}
# `source()`d via R console
if (!is.null(sys.frames()[[1L]][["ofile"]])) {
return(normalizePath(sys.frames()[[1L]][["ofile"]]))
}
# RStudio Run Selection, cf. http://stackoverflow.com/a/35842176/2292993
if (!is.null(rprojroot::thisfile())) {
return(rprojroot::thisfile())
}
# RStudio document
path <- rstudioapi::getActiveDocumentContext()[["path"]]
if (path != "") {
return(normalizePath(path))
}
cli::cli_abort("Couldn't determine script path.")
}
#' Capture printed console output as string
#'
#' Returns what [`print(x)`][base::print()] would output on the console – if `collapse` is set to anything other than `NULL`, as a character scalar
#' (i.e. a string), otherwise as a character vector of output lines.
#'
#' This is a simple convenience wrapper around [utils::capture.output()]. Note that [ANSI escape sequences](https://en.wikipedia.org/wiki/ANSI_escape_code)
#' (e.g. as output by the `print()` methods of tidyverse packages) are included in the result. To remove them, use [cli::ansi_strip()].
#'
#' @param x \R object of which the output of `print()` is to be captured.
#' @param collapse Optional string for concatenating the results. If `NULL`, a character vector of print lines is returned.
#'
#' @return A character vector if `collapse = NULL`, otherwise a character scalar.
#' @export
#'
#' @examples
#' mtcars |> pal::capture_print()
#' mtcars |> pal::capture_print(collapse = "\n") |> cat()
#'
#' # to strip ANSI escape sequences, use `cli::ansi_strip()`
#' mtcars |> tibble::as_tibble()
#'
#' mtcars |>
#' tibble::as_tibble() |>
#' pal::capture_print(collapse = "\n") |>
#' cli::ansi_strip() |>
#' cat()
capture_print <- function(x,
collapse = NULL) {
utils::capture.output(print(x),
file = NULL,
type = "output",
split = FALSE) |>
paste0(collapse = collapse)
}
#' Convert to character vector and print newline-separated
#'
#' Convenience wrapper around [as_chr()] and [base::cat()], mainly intended for interactive use.
#'
#' @param ... \R object(s) to convert to character and print. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#'
#' @inherit base::cat return
#' @seealso
#' [cli::cat_line()] for a faster alternative that doesn't _recursively_ convert its input to type character.
#'
#' [xfun::raw_string()] (and [xfun::file_string()]) for an alternative approach to the same use case (but without any conversion to type character at all).
#'
#' [`writeLines(con = stdout())`][base::writeLines] for a base R alternative that *only* accepts character inputs.
#' @export
#'
#' @examples
#' fs::path_package(package = "pal",
#' "rstudio", "addins.dcf") |>
#' readr::read_lines() |>
#' pal::cat_lines()
#'
#' # conversion to type character, recursive vs. non-recursive
#' to_convert <- list(tibble::tibble(a = 1:3), "A", factor("wonderful"), xfun::strict_list("day"))
#'
#' to_convert |> pal::cat_lines()
#' to_convert |> cli::cat_line()
#'
#' # this OTOH only accepts chr inputs
#' try(
#' to_convert |> writeLines()
#' )
cat_lines <- function(...) {
cat(as_chr(...),
sep = "\n")
}
#' Create [readr][readr::readr-package] column specification using regular expression matching
#'
#' Allows to define a regular expression per desired [column specification object][readr::cols] matching the respective column names.
#'
#' @param ... Named arguments where the names are (Perl-compatible) regular expressions and the values are column objects created by `col_*()`, or their
#' abbreviated character names (as described in the `col_types` parameter of [readr::read_delim()]). `r pkgsnip::roxy_lbl("dyn_dots_support")`
#' @param .col_names Column names which should be matched by `...`.
#' @param .default Any named columns not matched by any of the regular expressions in `...` will be read with this column type.
#'
#' @return A [column specification][readr::cols].
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' # for some hypothetical CSV data column names like these...
#' col_names <- c("VAR1_Text",
#' "VAR2_Text",
#' "VAR3_Text_Other",
#' "VAR1_Code_R1",
#' "VAR2_Code_R2",
#' "HAS_R1_Lag",
#' "HAS_R2_Lag",
#' "GARBAGEX67",
#' "GARBAGEY09")
#'
#' # ...a column spec could be created concisely as follows:
#' col_regex <- list("_Text(_|$)" = "c",
#' "_Code(_|$)" = "i",
#' "^GARBAGE" = readr::col_skip())
#'
#' pal::cols_regex(.col_names = col_names,
#' !!!col_regex,
#' .default = "l")
#'
#' # we can parse some real data:
#' url <- "https://salim_b.gitlab.io/misc/Kantonsratswahl_Zuerich_2019_Ergebnisse_Gemeinden.csv"
#'
#' raw_data <-
#' httr2::request(url) |>
#' httr2::req_perform() |>
#' httr2::resp_body_string()
#'
#' col_spec <- pal::cols_regex("^(Gemeindenamen|Liste|Wahlkreis)$" = "c",
#' "(?i)anteil" = "d",
#' .default = "i",
#' .col_names = pal::dsv_colnames(raw_data))
#'
#' print(col_spec)
#'
#' readr::read_csv(file = raw_data,
#' col_types = col_spec)
#'
#' # to process the same data without first downloading it to disk, use `readr::type_convert()`:
#' readr::read_csv(file = url,
#' col_types = list(.default = "c")) %>%
#' readr::type_convert(col_types = col_spec)
cols_regex <- function(...,
.col_names,
.default = readr::col_character()) {
rlang::check_installed("readr",
reason = reason_pkg_required())
spec <- list()
patterns <- rlang::list2(...)
if (length(setdiff(names(patterns), "")) < length(patterns)) {
cli::cli_abort("All column specifications in {.arg ...} must be named by a regular expression.")
}
for (i in seq_along(patterns)) {
matched_vars <- grep(x = .col_names,
pattern = names(patterns[i]),
value = TRUE,
perl = TRUE)
spec <-
rep(list(patterns[[i]]),
length(matched_vars)) |>
magrittr::set_names(matched_vars) %>%
c(spec, .)
}
spec <- c(spec, alist(.default = .default))
do.call(readr::cols, spec)
}
#' MIME type to file extension
#'
#' Determines a suitable file extension from a [MIME type](https://en.wikipedia.org/wiki/Media_type), based i.a. on [`mime::mimemap`][mime::mimemap] and
#' `mime:::mimeextra`. In case of multiple matches, the first one is returned and a warning is printed (unless `quiet = TRUE`).
#'
#' @param mime_type MIME type to determine the file extension for. A character scalar.
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return A character scalar.
#' @export
#'
#' @examples
#' pal::mime_to_ext("application/json")
#' pal::mime_to_ext("audio/mpeg")
mime_to_ext = function(mime_type,
quiet = FALSE) {
checkmate::assert_string(mime_type)
i <- which(mime_types_exts %in% mime_type)
result <- names(mime_types_exts[i])
if (length(i) == 0L) {
return(NA_character_)
}
if (!quiet && length(i) > 1L) {
cli::cli_warn("The MIME type {.val {mime_type}} maps to multiple file extensions ({.val {result}}). Only the first one is returned.")
}
result[1L]
}
#' Rename elements from dictionary
#'
#' Renames the elements of a vector or list from a dictionary, leaving unmatched names untouched by default.
#'
#' @param x Object whose elements are to be renamed.
#' @param dict A named character vector or list of strings where names are the old and the values are the new names.
#' @param default Value(s) used when names aren't matched by any name in `dict`. Recycled to the length of `x`.
#'
#' @return `x` with elements renamed according to `dict`.
#' @export
#'
#' @examples
#' mtcars |> pal::rename_from(dict = c(mpg = "Miles/(US) gallon",
#' cyl = "Number of cylinders",
#' disp = "Displacement (cu.in.)",
#' hp = "Gross horsepower",
#' drat = "Rear axle ratio",
#' wt = "Weight (1000 lbs)",
#' qsec = "1/4 mile time",
#' vs = "Engine (0 = V-shaped, 1 = straight)",
#' am = "Transmission (0 = automatic, 1 = manual)",
#' gear = "Number of forward gears",
#' carb = "Number of carburetors",
#' not_there = "Yikes!"))
rename_from <- function(x,
dict,
default = names(x)) {
checkmate::assert_named(x)
if (is.list(dict)) {
checkmate::assert_list(dict,
types = "character",
any.missing = FALSE,
names = "unique")
} else {
checkmate::assert_vector(dict,
strict = TRUE,
any.missing = FALSE,
names = "unique")
}
if (length(x) > 0L) {
checkmate::assert_character(default)
dict %<>% purrr::imap(\(x, name) rlang::new_formula(lhs = name,
rhs = x,
env = emptyenv()))
names_new <- dplyr::case_match(names(x),
!!!dict,
.default = default)
return(magrittr::set_names(x, names_new))
} else {
return(x)
}
}
#' Sort vector by another vector
#'
#' Arranges a vector `x` by the order of another vector `by`.
#'
#' Note that this function significantly differs from [base::sort_by()].
#'
#' @param x Vector to be ordered.
#' @param by Reference vector which `x` will be ordered by.
#'
#' @return A permutation of `x`.
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' # generate 100 random letters
#' random_letters <-
#' letters |>
#' magrittr::extract(sample.int(n = 26L,
#' size = 100L,
#' replace = TRUE)) %T>%
#' print()
#'
#' # sort the random letters alphabetically
#' random_letters |> pal::arrange_by(by = letters)
arrange_by <- function(x,
by) {
x[order(match(x = x, table = by))]
}
#' Generalized `if`-`else` for pipes
#'
#' A flavour of pattern matching (or an if-else abstraction) in which a value `.` is matched against a sequence `...` of condition-action sets. When a valid
#' match/condition is found, the action is executed and the result of the action is returned.
#'
#' Condition-action sets are written as formulas with conditions as left-hand sides (LHS) and actions as right-hand sides (RHS). A formula with only a
#' right-hand side will be treated as a condition which is always satisfied. For such a default case one can also omit the `~` symbol, but note that its value
#' will then be evaluated early. Any named argument will be made available in all conditions and actions, which is useful in avoiding repeated temporary
#' computations or temporary assignments.
#
#' Validity of the conditions are tested with [base::isTRUE()]. In other words conditions resulting in more than one logical will never be valid. Note that the
#' input value is always treated as a single object, as opposed to the [base::ifelse()] function.
#'
#' This function is copied over from package purrr since it [has been deprecated with the release of purrr
#' 1.0](https://www.tidyverse.org/blog/2022/12/purrr-1-0-0/#core-purpose-refinements). `pal::when()` can be used as a drop-in replacement for `purrr::when()`.
#' All credits go to the original authors of `purrr::when()` [as of
#' 2022-12-21](https://github.com/tidyverse/purrr/blame/54546b6bf938c54da4d232061db2fc91b1d5923c/R/deprec-when.R).
#'
#' @param . Value to match against.
#' @param ... Formulas, each containing a condition as LHS and an action as RHS. Named arguments will define additional values.
#' `r pkgsnip::roxy_lbl("dyn_dots_support")`
#'
#' @return The value resulting from the action of the first matched condition, or `NULL` if no matches are found and no default is given.
#' @export
#'
#' @examples
#' 1:10 |> pal::when(sum(.) <= 50 ~ sum(.),
#' sum(.) <= 100 ~ sum(.)/2L,
#' ~ 0L)
when <- function(.,
...) {
dots <- rlang::list2(...)
names <- names(dots)
named <- if (is.null(names)) rep(FALSE, length(dots)) else names != ""
if (sum(!named) == 0L) {
cli::cli_abort("At least one matching condition is needed.")
}
is_formula <- vapply(dots,
function(dot) identical(class(dot), "formula"),
logical(1L))
env <- new.env(parent = parent.frame())
env[["."]] <- .
if (sum(named) > 0L) {
for (i in which(named)) {
env[[names[i]]] <- dots[[i]]
}
}
result <- NULL
for (i in which(!named)) {
if (is_formula[i]) {
action <- length(dots[[i]])
if (action == 2L || isTRUE(eval(dots[[i]][[2L]], env, env))) {
result <- eval(dots[[i]][[action]], env, env)
break
}
} else {
result <- dots[[i]]
}
}
result
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.