Utility Functions

Documented in safe_max safe_seq_len

# DO NOT EDIT THIS FILE BY HAND! Instead edit the R Markdown source file `Rmd/pal.Rmd` and run `pkgpurl::purl_rmd()`.
# See `README.md#r-markdown-format` for more information on the literate programming approach used applying the R Markdown format.

# pal: Friendly Convenience/Utility Functions
# Copyright (C) 2025 Salim Brüggemann
# 
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or any later version.
# 
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
# 
# You should have received a copy of the GNU Affero General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.

#' Generate an integer sequence of specific length (safe)
#'
#' Modified version of [base::seq_len()] that returns a zero-length integer in case of a zero-length input instead of throwing an error.
#'
#' @param n Desired length of the integer sequence.
#'
#' @return An integer sequence starting from `1L`.
#' @family stat
#' @export
#'
#' @examples
#' pal::safe_seq_len(5)
#'
#' # this function simply returns a zero-length integer for zero-length inputs...
#' pal::safe_seq_len(NULL)
#' pal::safe_seq_len(integer())
#' 
#' # ...while `seq_len()` throws an error
#' try(seq_len(NULL))
#' try(seq_len(integer()))
safe_seq_len <- function(n) {
  
  if (length(n) > 0L) {
    return(seq_len(n))
    
  } else {
    return(integer())
  }
}

#' Maximum (safe)
#'
#' @description
#' Modified version of [base::max()] that differs in the following ways:
#'
#' - `NA`s in the input (`...`) are ignored _by default_ (`rm_na = TRUE`).
#' - If the input is of length zero, the output will also be of length zero (of the same type as the input).
#' - It is ensured that all inputs are either numeric, of length zero or `NA`. The only case where the return value will be `-Inf` or `NA` is when the input
#'   contains only `-Inf` or `NA`.
#' - `r pkgsnip::roxy_lbl("dyn_dots_support")`
#'
#' @param ... Numeric objects of which to determine the maximum. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#' @param rm_na Ignore missing values in `...`. If missing values are present and `rm_na = FALSE`, the result will always be `NA`.
#'
#' @return A numeric scalar or empty value, of the same type as `...`.
#' @family stat
#' @export
#'
#' @examples
#' # other than `base::max()`, this function removes `NA`s by default
#' max(1, NA_real_, 2)
#' pal::safe_max(1, NA_real_, 2)
#'
#' # other than `base::max()`, this function does not return `-Inf` or `NA_character_` for
#' # zero-length inputs
#' max(NULL)
#' max(integer())
#' pal::safe_max(NULL)
#' pal::safe_max(integer())
#' 
#' # other than `base::max()`, this function fails for non-numeric inputs
#' max("zero", 1L)
#' max("zero", "one")
#' max(character())
#' try(pal::safe_max("zero", 1L))
#' try(pal::safe_max("zero", "one"))
#' try(pal::safe_max(character()))
safe_max <- function(...,
                     rm_na = TRUE) {
  
  input <- rlang::list2(...)
  
  for (i in seq_along(input)) {
    checkmate::assert_numeric(input[[i]],
                              typed.missing = TRUE,
                              null.ok = TRUE,
                              .var.name = "...")
  }
  
  input |>
    purrr::reduce(c) |>
    when(length(.) == 0L ~ .[0L],
         all(is.na(.)) && rm_na ~ .[NA],
         ~ max(., na.rm = rm_na))
}

#' Minimum (safe)
#'
#' @description
#' Modified version of [base::min()] that differs in the following ways:
#'
#' - `NA`s in the input (`...`) are ignored _by default_ (`rm_na = TRUE`).
#' - If the input is of length zero, the output will also be of length zero (of the same type as the input).
#' - It is ensured that all inputs are either numeric, of length zero or `NA`. The only case where the return value will be `-Inf` or `NA` is when the input
#'   contains only `-Inf` or `NA`.
#' - `r pkgsnip::roxy_lbl("dyn_dots_support")`
#'
#' @inheritParams safe_max
#' @param ... Numeric objects of which to determine the minimum. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#'
#' @inherit safe_max return
#' @family stat
#' @export
#'
#' @examples
#' # other than `base::min()`, this function removes `NA`s by default
#' min(1, NA_real_, 2)
#' pal::safe_min(1, NA_real_, 2)
#'
#' # other than `base::min()`, this function does not return `-Inf` or `NA_character_` for
#' # zero-length inputs
#' min(NULL)
#' min(integer())
#' pal::safe_min(NULL)
#' pal::safe_min(integer())
#' 
#' # other than `base::min()`, this function fails for non-numeric inputs
#' min("zero", 1L)
#' min("zero", "one")
#' min(character())
#' try(pal::safe_min("zero", 1L))
#' try(pal::safe_min("zero", "one"))
#' try(pal::safe_min(character()))
safe_min <- function(...,
                     rm_na = TRUE) {
  
  input <- rlang::list2(...)
  
  for (i in seq_along(input)) {
    checkmate::assert_numeric(input[[i]],
                              typed.missing = TRUE,
                              null.ok = TRUE,
                              .var.name = "...")
  }
  
  input |>
    purrr::reduce(c) |>
    when(length(.) == 0L ~ .[0L],
         all(is.na(.)) && rm_na ~ .[NA],
         ~ min(., na.rm = rm_na))
}

#' Round to any number
#'
#' Round a numeric vector to any number, rounded up by default (`round_up = TRUE`).
#'
#' This function's precision is limited to 15 significant digits in order to account for the [limits of R's floating point
#' representation](https://stackoverflow.com/a/35349949/7196903).
#'
#' A computationally more efficient alternative would be the unexported `scales:::round_any()` which drives [scales::number()] – with the drawback that it lacks
#' control of rounding up exact remainders of `accuracy / 2`, i.e. it _always_ rounds _off_.
#'
#' @param x Vector of numbers to round.
#' @param to Number to round `x` to. A numeric scalar.
#' @param round_up Whether to round a remainder of exactly `to / 2` _up_ or not. Set to `FALSE` in order to round _off_.
#'
#' @return A numeric vector of the same length as `x`.
#' @seealso [prettyunits::pretty_round()]
#' @family stat
#' @export
#'
#' @examples
#' vals = c(0.025, 0.1, 0.1999, 0.099999, 0.49, 0.55, 0.5, 0.9, 1)
#' vals |> pal::round_to(to = 0.05)
#' vals |> pal::round_to(to = 0.05,
#'                       round_up = FALSE)
round_to <- function(x,
                     to = 0.2,
                     round_up = TRUE) {
  
  checkmate::assert_number(to,
                           lower = 0L,
                           finite = TRUE)
  checkmate::assert_flag(round_up)
  
  result <- x %/% to
  remainder <- signif(x %% to,
                      # round to a max of 15 significant digits to avoid exceeding floating-point representation limits
                      digits = 15L)
  if (round_up) {
    which_round <- remainder >= (to / 2L)
  } else {
    which_round <- remainder > (to / 2L)
  }
  
  result[which_round] <- result[which_round] + 1L
  result * to
}

#' Statistical mode
#'
#' Computes the [statistical mode](https://en.wikipedia.org/wiki/Mode_(statistics)) of a set of values. The mode is defined as the most frequent value, i.e. the
#' value that is most likely to be sampled.
#'
#' @param x An \R object.
#' @param type What the function should calculate.
#'   - `"one"`: Return _the_ mode of `x`. If multiple modes or no mode at all exists, `NA` is returned.
#'   - `"all"`: Return _all_ modes of `x`. If none exists (e.g. because all values of `x` are distinct), `NA` is returned.
#'   - `"n"`: Return the number of modes of `x`.
#' @param rm_na Ignore missing values in `x`. A logical scalar.
#'
#' @return If `type = "n"`, the number of modes in `x` (an integer). Otherwise, the mode(s) of `x` or `NA` if none exist(s) (same type as `x`).
#' @family stat
#' @seealso The package [modeest](https://cran.r-project.org/package=modeest) for more powerful mode estimation functions.
#' @export
#'
#' @examples
#' pal::stat_mode(c(rep(3L, times = 3), 1:9))
#' pal::stat_mode(c(1.5, 4, 9.9))
#' 
#' # if no mode exists, `NA` (of the same type as x) is returned
#' pal::stat_mode(letters)
#' pal::stat_mode(c(letters, "a"))
#' 
#' # if multiple modes exist, `NA` is returned by default
#' pal::stat_mode(c(letters, "a", "b"))
#' # set `type = "all"` to return all modes instead
#' pal::stat_mode(c(letters, "a", "b"),
#'                type = "all")
#' 
#' # `NA` is treated as any other value by default
#' pal::stat_mode(c(letters, "a", NA_character_, NA_character_),
#'                type = "all")
#' # set `rm_na = TRUE` to ignore `NA` values
#' pal::stat_mode(c(letters, "a", NA_character_, NA_character_),
#'                type = "all",
#'                rm_na = TRUE)
stat_mode <- function(x,
                      type = c("one", "all", "n"),
                      rm_na = FALSE) {
  x <- unlist(x)
  type <- rlang::arg_match(type)
  checkmate::assert_flag(rm_na)
  
  if (rm_na) x <- x[!is.na(x)]
  
  # get unique values
  u_x <- unique(x)
  n_u_x <- length(u_x)
  
  # get frequencies of all unique values
  frequencies <- tabulate(match(x, u_x))
  modes <- frequencies == max(frequencies)
  
  # determine number of modes
  n_modes <- sum(modes) %>% ifelse(. == n_u_x, 0L, .)
  
  type |> when(
    # return the number of modes if requested
    . == "n" ~ n_modes,
    # or return mode(s) if requested and existing
    (. == "one" && n_modes == 1L) || (. == "all" && n_modes > 0L) ~ u_x[which(modes)],
    # else return `NA` (of the same type as `x`)
    ~ x[NA][1L]
  )
}

#' Assert data frame columns
#'
#' Asserts that a data frame contains the specified columns.
#'
#' @param data Data frame to check.
#' @param cols Column names which must be present in `data`. A character vector or `NULL`.
#' @param strict Whether additional columns not specified in `cols` are allowed in `data`.
#' @param obj_name Name of the checked object to print in error messages.
#'
#' @return `data`, invisibly.
#' @family df
#' @export
#'
#' @examples
#' pal::assert_cols(data = mtcars,
#'                  cols = c("mpg", "disp"))
#'
#' try(
#'   pal::assert_cols(data = mtcars,
#'                    cols = c("mpg", "display"))
#' )
#'
#' try(
#'   pal::assert_cols(data = mtcars,
#'                    cols = c("mpg", "disp"),
#'                    strict = TRUE)
#' )
#'
#' try(
#'   pal::assert_cols(data = mtcars,
#'                    strict = TRUE)
#' )
assert_cols <- function(data,
                        cols = NULL,
                        strict = FALSE,
                        obj_name = checkmate::vname(data)) {
  
  checkmate::assert_character(cols,
                              null.ok = TRUE)
  checkmate::assert_flag(strict)
  checkmate::assert_string(obj_name)
  checkmate::assert_data_frame(data,
                               min.cols = length(cols))
  
  # disallow any cols when `strict` without `cols`
  if (strict && is.null(cols)) {
    cols <- character()
  }
  
  checkmate::assert_names(colnames(data),
                          must.include = if (strict) NULL else cols,
                          identical.to = if (strict) cols else NULL,
                          what = "column names",
                          .var.name = obj_name)
  invisible(data)
}

#' Test if two data frames/tibbles are equal
#'
#' Compares two [data frames][base::data.frame]/[tibbles][tibble::tbl_df] (or two objects coercible to tibbles like [matrices][base::matrix]), optionally
#' ignoring row and column ordering, and returns `TRUE` if both are equal, or `FALSE` otherwise. If the latter is the case and `quiet = FALSE`, information
#' about detected differences is printed to the console.
#'
#' Under the hood, this function relies on [waldo::compare()].
#'
#' @inheritParams waldo::compare
#' @param x The data frame / tibble to check for changes.
#' @param y The data frame / tibble that `x` should be checked against, i.e. the reference, so messages describe how `x` is different to `y`.
#' @param ignore_col_order Whether or not to ignore the order of columns.
#' @param ignore_row_order Whether or not to ignore the order of rows.
#' @param ignore_col_types Whether or not to distinguish similar column types. Currently, if set to `TRUE`, this will convert factors to characters and integers
#'   to doubles before the comparison.
#' @param quiet Whether or not to output detected differences between `x` and `y` to the console.
#' @param max_diffs Maximum number of differences shown. Only relevant if `quiet = FALSE` or `return_waldo_compare = TRUE`. Set `max_diffs = Inf` to see all
#'   differences.
#' @param return_waldo_compare Whether to return a character vector of class [`waldo_compare`][waldo::compare] describing the differences between `x` and `y`
#'   instead of `TRUE` or `FALSE`.
#'
#' @return If `return_waldo_compare = FALSE`, a logical scalar indicating the result of the comparison. Otherwise a character vector of class
#'   [`waldo_compare`][waldo::compare] describing the differences between `x` and `y`.
#' @family df
#' @export
#'
#' @examples
#' scramble <- function(x) x[sample(nrow(x)), sample(ncol(x))]
#'
#' # by default, ordering of rows and columns matters...
#' pal::is_equal_df(x = mtcars,
#'                  y = scramble(mtcars))
#'
#' # ...but those can be ignored if desired
#' pal::is_equal_df(x = mtcars,
#'                  y = scramble(mtcars),
#'                  ignore_col_order = TRUE)
#' pal::is_equal_df(x = mtcars,
#'                  y = scramble(mtcars),
#'                  ignore_row_order = TRUE)
#'
#' # by default, `is_equal_df()` is sensitive to column type differences...
#' df1 <- data.frame(x = "a",
#'                   stringsAsFactors = FALSE)
#' df2 <- data.frame(x = factor("a"))
#' pal::is_equal_df(df1, df2)
#'
#' # ...but you can request it to not make a difference between similar types
#' pal::is_equal_df(df1, df2,
#'                  ignore_col_types = TRUE)
is_equal_df <- function(x,
                        y,
                        ignore_col_order = FALSE,
                        ignore_row_order = FALSE,
                        ignore_col_types = FALSE,
                        tolerance = NULL,
                        quiet = TRUE,
                        max_diffs = 10L,
                        return_waldo_compare = FALSE) {
  
  rlang::check_installed("waldo",
                         reason = reason_pkg_required())
  checkmate::assert_flag(ignore_col_order)
  checkmate::assert_flag(ignore_row_order)
  checkmate::assert_flag(ignore_col_types)
  checkmate::assert_number(tolerance,
                           lower = 0.0,
                           null.ok = TRUE)
  checkmate::assert_flag(quiet)
  checkmate::assert_number(max_diffs,
                           lower = 1.0)
  checkmate::assert_flag(return_waldo_compare)
  
  # convert `x` and `y` to tibble if any modification is required
  if (ignore_col_order || ignore_row_order || ignore_col_types) {
    
    x %<>% tibble::as_tibble()
    y %<>% tibble::as_tibble()
  }
  
  # sort columns if necessary
  if (ignore_col_order) {
    
    x %<>% dplyr::select(sort(colnames(.)))
    y %<>% dplyr::select(sort(colnames(.)))
  }
  
  # sort rows if necessary
  if (ignore_row_order) {
    
    x %<>% dplyr::arrange(dplyr::across(.cols = everything()))
    y %<>% dplyr::arrange(dplyr::across(.cols = everything()))
  }
  
  # harmonize column types if necessary
  if (ignore_col_types) {
    
    x %<>% dplyr::mutate(dplyr::across(.cols = where(is.factor),
                                       .fns = as.character),
                         dplyr::across(.cols = where(is.integer),
                                       .fns = as.double))
    y %<>% dplyr::mutate(dplyr::across(.cols = where(is.factor),
                                       .fns = as.character),
                         dplyr::across(.cols = where(is.integer),
                                       .fns = as.double))
  }
  
  result <- waldo::compare(x = x,
                           y = y,
                           tolerance = tolerance,
                           x_arg = "x",
                           y_arg = "y",
                           max_diffs = max_diffs)
  
  if (length(result) > 0L) {
    
    if (!quiet) {
      
      cli::cli_alert_info(text = "`x` differs from `y`:")
      cat("\n")
      print(result)
    }
    
    if (!return_waldo_compare) result <- FALSE
    
  } else if (!return_waldo_compare) {
    result <- TRUE
  }
  
  result
}

#' Reduce a nested list of data frames to a single data frame
#'
#' Recursively reduces a nested list containing data frames at its leafs to a single row-bound data frame.
#'
#' @param x A list containing data frames at its leafs.
#' @param strict Ensure `x` contains data frames only and throw an error otherwise. If `FALSE`, leafs containing other objects are ignored (skipped).
#'
#' @return A data frame.
#' @family df
#' @export
reduce_df_list <- function(x,
                           strict = TRUE) {
  
  checkmate::assert_flag(strict)
  
  if (is.data.frame(x)) {
    
    result <- x
    
  } else if (purrr::pluck_depth(x) < 2L) {
    
    if (strict) {
      cli::cli_abort("At least one element of the list to be reduced is not a data frame!")
      
    } else {
      result <- NULL
    }
    
  } else {
    
    result <-
      x |>
      purrr::map(\(x) reduce_df_list(x = x,
                                     strict = strict)) |>
      purrr::list_rbind()
  }
  
  result
}

#' Convert to flat list
#'
#' @description
#' Recursively flattens an \R object. Unlike [base::unlist()], it
#'
#' - always returns a regular list, i.e. wraps `x` in a [list][base::list] if necessary, and will never remove the last list level. Thus it is
#'   [type-safe](https://en.wikipedia.org/wiki/Type_safety).
#'
#' - won't treat any of the list leafs specially (like `unlist()` does with factors). Thus leaf values will never be modified.
#'
#' @inheritParams list_flatten
#' @param x `r pkgsnip::param_lbl("r_obj")`
#'
#' @return A list.
#' @family list
#' @export
#'
#' @examples
#' library(magrittr)
#' 
#' nested_list <- list(1:3, list("foo", list("bar"))) %T>% str()
#' lm_obj <- lm(mpg ~ hp, mtcars)
#'
#' # unlike `unlist()` which also removes the last list tier from regular lists and many list-based
#' # objects...
#' unlist("foobar")
#' unlist(nested_list) |> str()
#' unlist(lm_obj) |> str()
#' 
#' # ...this function is able to return consistent results, i.e. an unnested list
#' pal::as_flat_list("foobar", is_node = is.list) |> str()
#' pal::as_flat_list(nested_list, is_node = is.list) |> str()
#' pal::as_flat_list(lm_obj, is_node = is.list) |> str()
#' 
#' nested_list <- list(list(factor("a"), factor("b")), factor("c")) %T>% str()
#' 
#' # unlike `unlist()` which combines factors...
#' unlist(nested_list) |> str()
#' # ...this function does not modify the list elements
#' pal::as_flat_list(nested_list) |> str()
#' 
#' nested_list <-
#'   list(c(list(1L), list(tibble::tibble(a = list(1.1, "2")))),
#'        list(tibble::as_tibble(mtcars[1:2, ]))) %T>%
#'   str()
#' nested_list_2 <- list(1:3, xfun::strict_list(list(list("buried deep")))) %T>% str()
#'
#' # by default, classed lists like data frames, tibbles or `xfun_strict_list`s are retained, i.e.
#' # not flattened...
#' pal::as_flat_list(nested_list) |> str()
#' pal::as_flat_list(nested_list_2) |> str()
#' # ...but you can drop them and thereby flatten custom objects if needed
#' pal::as_flat_list(nested_list, is_node = is.list) |> str()
#' pal::as_flat_list(nested_list_2, is_node = is.list) |> str()
as_flat_list <- function(x,
                         is_node = NULL,
                         name_spec = "{outer}.{inner}",
                         name_repair = c("minimal", "unique", "check_unique", "universal")) {
  
  checkmate::assert_function(is_node,
                             null.ok = TRUE)
  if (is.null(is_node)) is_node <- vctrs::obj_is_list
  
  result <- x
  depth <- purrr::pluck_depth(result,
                              is_node = is_node)
  
  # unlist until only a single list level remains
  while (depth > 2L) {
    result %<>% list_flatten(is_node = is_node,
                             name_spec = name_spec,
                             name_repair = name_repair)
    depth <- purrr::pluck_depth(result,
                                is_node = is_node)
  }
  
  # wrap in list if necessary
  if (depth < 2L && !is_node(result)) {
    result %<>% list()
  }
  
  result
}

#' Convert to character vector
#'
#' _Recursively_ applies [base::as.character()] to its inputs.
#'
#' @param ... \R objects to be converted to a character vector. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#' @param use_names Whether or not to preserve names by [base::unlist()].
#'
#' @return A character vector.
#' @family string
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' to_convert <-
#'   list(tibble::tibble(a = 1:3), "A", factor("wonderful"), xfun::strict_list("day")) %T>%
#'   print()
#'
#' as.character(to_convert)
#' pal::as_chr(!!!to_convert)
as_chr <- function(...,
                   use_names = FALSE) {

  rlang::list2(...) |>
    purrr::modify_tree(leaf = as.character,
                       is_node = is.list) |>
    unlist(use.names = use_names)
}

#' Escape line feeds / newlines
#'
#' Escapes the [POSIX-standard newline control character `LF`](https://en.wikipedia.org/wiki/Newline) (aka `\n`) which is the standard on Unix/Linux and recent
#' versions of macOS. Set `escape_cr = TRUE` in order to also escape the carriage return character `CR` (aka `\r`) commonly used on Microsoft Windows.
#'
#' @param x A character vector.
#' @param escape_cr Whether or not to also escape the carriage return character `CR` (aka `\r`). A logical scalar.
#'
#' @return A character vector of the same length as `x`.
#' @family string
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' # read in and print RStudio add-in registration file as-is
#' text <-
#'   fs::path_package(package = "pal",
#'                    "rstudio", "addins.dcf") |>
#'   readr::read_file() %T>%
#'   pal::cat_lines()
#'
#' # escape newlines and print again
#' pal::escape_lf(text) |> pal::cat_lines()
escape_lf <- function(x,
                      escape_cr = FALSE) {
  
  checkmate::assert_character(x,
                              null.ok = TRUE)
  checkmate::assert_flag(escape_cr)
  
  x %<>% stringr::str_replace_all(pattern = "\\n",
                                  replacement = "\\\\n")
  if (escape_cr) {
    x %<>% stringr::str_replace_all(pattern = "\\r",
                                    replacement = "\\\\r")
  }
  
  x
}

#' Phrase numeric vector
#'
#' Phrases a numeric vector by rounding, separating thousands and optionally other procedures. Basically a convenience wrapper around [round_to()] and
#' [base::format()].
#'
#' @inheritParams round_to
#' @param x A numeric vector to phrase.
#' @param round_to Number to round `x` to. A numeric scalar.
#' @param big_mark Character used between every 3 digits to separate thousands.
#' @param decimal_mark Character used to indicate the numeric decimal point. Only relevant if `x` does not solely consist of integers.
#' @param justify_right Whether to right-justify the results to a common width. See the `trim` parameter of [base::format()] for details.
#' @param ... Further arguments passed on to [base::format()].
#'
#' @return A character vector of the same length as `x`.
#' @family string
#' @export
#'
#' @examples
#' c(0.11, 11111.11) |> pal::phrase_nr()
#'
#' c(0.11, 11111.11) |>
#'   pal::phrase_nr(justify_right = TRUE) |>
#'   pal::cat_lines()
phrase_nr <- function(x,
                      round_to = 0.1,
                      round_up = TRUE,
                      big_mark = "'",
                      decimal_mark = ".",
                      justify_right = FALSE,
                      ...) {
  round_to(x = x,
           to = round_to,
           round_up = round_up) |>
    format(big.mark = big_mark,
           decimal.mark = decimal_mark,
           trim = !justify_right,
           ... = ...)
}

#' Convert to sentence case with trailing punctuation mark
#'
#' Converts the input to a character vector and ensures it starts with an upper case letter and ends with the specified punctuation mark.
#'
#' Note that this function doesn't alter any characters in `x` other than the first and the last one.
#'
#' @param x Input to be converted to [sentence case](https://en.wikipedia.org/wiki/Letter_case#Case_styles), typically a character vector.
#' @param punctuation_mark Punctuation mark to be appended to `x`. A character scalar.
#'
#' @inherit capitalize_first return
#' @family string
#' @seealso
#' [stringr::str_to_sentence()] to convert a character vector to all lowercase except for the first character. Note that this also includes lowercasing [proper
#' nouns](https://en.wikipedia.org/wiki/Proper_and_common_nouns), [abbreviations](https://en.wikipedia.org/wiki/Abbreviation) etc.
#' 
#' [snakecase::to_sentence_case()] that builds upon [stringr::str_to_sentence()] but offers additional options to finetune the conversion. Note that
#' `abbreviations` have to be manually specified in order to be preserved in upper case.
#' @export
#'
#' @examples
#' pal::sentenceify("no verb, no sentence")
#'
#' # punctuation mark won't be duplicated if already existing
#' pal::sentenceify(c("I've made my point",
#'                    "good point."))
sentenceify <- function(x,
                        punctuation_mark = ".") {
  
  checkmate::assert_string(punctuation_mark)
  
  capitalize_first(x) |>
    purrr::map_chr(\(x) {
      if (is.na(x) || stringr::str_sub(string = x, start = -1L) == punctuation_mark) {
        x
      } else {
        paste0(x, punctuation_mark)
      }
    })
}

#' Capitalize first letter
#'
#' Converts the input to a character vector, with the first letter of each element uppercased.
#'
#' @param x Input of which to capitalize the first letter, typically a character vector.
#'
#' @return A character vector of the same length as `x`.
#' @family string
#' @export
#'
#' @examples
#' pal::capitalize_first(c("one", "Two", "tHREE"))
capitalize_first <- function(x) {
  
  stringr::str_replace(string = x,
                       pattern = "^.",
                       replacement = toupper)
}

#' Wrap character in character
#'
#' Wraps a character vector `x` in another character vector `wrap` (by default the string `"`).
#'
#' @param x A character vector or something coercible to. Will be fed to [as_chr()] before wrapping.
#' @param wrap Character sequence `x` is to be wrapped in. A character vector or something coercible to.
#'
#' @return A character vector of the same length as `wrap`.
#' @family string
#' @export
#'
#' @examples
#' mtcars |>
#'   colnames() |>
#'   pal::wrap_chr(wrap = "`") |>
#'   magrittr::set_colnames(x = mtcars) |>
#'   pal::pipe_table()
wrap_chr <- function(x,
                     wrap = "\"") {
  
  paste0(wrap, as_chr(x), wrap)
}

#' Convert control character sequence name to actual character sequence
#'
#' @param eol `r pkgsnip::param_lbl("eol")`
#'
#' @return A character scalar.
#' @family string
#' @export
as_line_feed_chr <- function(eol = c("LF", "CRLF", "CR", "LFCR")) {
  
  switch(EXPR = rlang::arg_match(eol),
         LF = "\n",
         CRLF = "\r\n",
         CR = "\r",
         LFCR = "\n\r")
}

#' Get column names of a delimiter-separated string
#'
#' Returns the column names of a string in a [delimiter-separated-value](https://en.wikipedia.org/wiki/Delimiter-separated_values) format like
#' [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) or [TSV](https://en.wikipedia.org/wiki/Tab-separated_values).
#'
#' @param x Delimiter-separated string. A character scalar.
#' @param delim Single character used to separate fields within `x`.
#' @param quote Single character used to quote strings within `x`. Set to `NULL` for none.
#'
#' @return A character vector of column names.
#' @family string
#' @export
#'
#' @examples
#' "https://raw.githubusercontent.com/tidyverse/readr/master/inst/extdata/mtcars.csv" |>
#'   httr2::request() |>
#'   httr2::req_perform() |>
#'   httr2::resp_body_string() |>
#'   pal::dsv_colnames()
dsv_colnames <- function(x,
                         delim = ",",
                         quote = "\"") {
  
  checkmate::assert_string(delim,
                           min.chars = 1L,
                           pattern = "^.$")
  checkmate::assert_string(quote,
                           null.ok = TRUE,
                           pattern = "^.$")
  x |>
    # extract first line
    regexpr(pattern = "[\r\n]") |>
    magrittr::subtract(1L) |>
    substr(x = x,
           start = 1L,
           stop = _) |>
    stringr::str_split_1(pattern = delim) |>
    stringr::str_remove_all(pattern = glue::glue("^{quote}|{quote}$"))
}

#' Convert to a character scalar (aka string)
#'
#' Similar to [`paste0(..., collapse = "")`][base::paste0], but _recursively_ converts its inputs to type character.
#'
#' @param ... \R objects to be assembled to a single string. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#' @param sep Separator to delimit `...`. Defaults to none (`""`).
#' @param rm_na Exclude missing values. If `FALSE`, missing values will be represented as `"NA"` in the resulting string.
#'
#' @return A character scalar.
#' @family string
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' input <-
#'   paste0(2:4,
#'          collapse = ", ") |>
#'   purrr::map(rep,
#'              times = 20) %>%
#'   list(c("This is a glut of ", "meaningless numbers: "), .)
#'
#' # while this just converts `input` in a lazy way...
#' paste0(input,
#'        collapse = "")
#'
#' # ...this one works harder
#' pal::as_str(input)
as_str <- function(...,
                   sep = "",
                   rm_na = FALSE) {
  
  checkmate::assert_flag(rm_na)
  
  result <- as_chr(...)
  
  if (rm_na) {
    result %<>% magrittr::extract(!is.na(.))
  }
  
  if (length(result) > 0L) {
    result %<>% paste0(collapse = sep)
  }
  
  result
}

#' Assemble an (R) comment string of the desired line width
#'
#' Takes a vector of paragraphs, wraps them at the specified line width, prefixes them with comment markers and returns the result as a single string.
#'
#' @param ... Comment lines. A character vector or something coercible to.
#' @param line_width Maximum character width at which to wrap lines. A positive integerish scalar.
#' @param comment_prefix Character sequence that indicates the start of a comment. A character scalar.
#' @param sep_paragraphs Whether or not to separate paragraphs with an empty comment line.
#'
#' @return A character scalar.
#' @family string
#' @export
#'
#' @examples
#' pal::as_comment_str(glue::glue("Copyright (C) {format(Sys.Date(), '%Y')} Santa Clause"),
#'                     "No presents without code.") |>
#'   cat()
#'
#' # wrap lines at 20 chars
#' pal::as_comment_str(glue::glue("Copyright (C) {format(Sys.Date(), '%Y')} Santa Clause"),
#'                     "No presents without code.",
#'                     line_width = 20L) |>
#'   cat()
#'
#' # disable empty comment lines between paragraphs:
#' pal::as_comment_str(glue::glue("Copyright (C) {format(Sys.Date(), '%Y')} Santa Clause"),
#'                     "Hohoho.",
#'                     sep_paragraphs = FALSE) |>
#'   cat()
as_comment_str <- function(...,
                              line_width = 160L,
                              comment_prefix = "# ",
                              sep_paragraphs = TRUE) {
  
  checkmate::assert_count(line_width)
  checkmate::assert_string(comment_prefix)
  checkmate::assert_flag(sep_paragraphs)
  
  as_chr(...) |>
    stringr::str_wrap(width = line_width - nchar(comment_prefix)) |>
    stringr::str_split(pattern = stringr::fixed("\n")) |>
    purrr::map(\(x) paste0(paste0(comment_prefix, x),
                           collapse = "\n")) %>%
    purrr::map2_chr(.y = seq_along(.),
                    .f = \(x, y) {
                      if (sep_paragraphs && y < length(.)) paste0(x, "\n", comment_prefix, "\n") else paste0(x, "\n")
                    }) |>
    paste0(collapse = "")
}

#' Convert to a valid environment variable name
#'
#' @param ... Input to convert to an environment variable name. A character vector or something coercible to.
#'
#' @returns A character scalar.
#' @family string
#' @export
#'
#' @examples
#' pal::as_env_var_name("foo.bar")
#' pal::as_env_var_name("/oh/my/goodness")
#' pal::as_env_var_name("1.2", "some title")
as_env_var_name <- function(...) {
  
  as_str(...,
         sep = "_") |>
    stringr::str_replace_all(pattern = "\\W",
                             replacement = "_") |>
    stringr::str_replace(pattern = "^(\\d)",
                         replacement = "_\\1") |>
    toupper()
}

#' Enumerate vector's elements as string
#'
#' Takes a vector or list and enumerates its elements in a single string. Convenience function combining [wrap_chr()] and [cli::ansi_collapse()] with slightly
#' differing defaults (`last` defaults to the value of `sep2`).
#'
#' @inheritParams cli::ansi_collapse
#' @inheritParams wrap_chr
#'
#' @return A character scalar.
#' @family string
#' @export
#'
#' @examples
#' # by default, `last` defaults to `sep2`
#' pal::enum_str(1:2)
#' pal::enum_str(1:3)
#' 
#' # input is optionally wrapped in a character sequence
#' pal::enum_str(letters[1:3],
#'               wrap = "`")
enum_str <- function(x,
                     sep = ", ",
                     sep2 = " and ",
                     last = sep2,
                     trunc = Inf,
                     width = Inf,
                     ellipsis = cli::symbol$ellipsis,
                     style = c("both-ends", "head"),
                     wrap = "") {
  
  wrap_chr(x = x,
           wrap = wrap) |>
    cli::ansi_collapse(sep = sep,
                       sep2 = sep2,
                       last = last,
                       trunc = trunc,
                       width = width,
                       ellipsis = ellipsis,
                       style = style)
}

#' Fuse regular expressions
#'
#' @description
#' `r lifecycle::badge("superseded")` \cr
#' It's recommended to rely on the [**rex**](https://rex.r-lib.org/) package instead, more precisely [`rex::rex(or(...))`][rex::or] and
#' [`rex::rex(or(rex::escape(c(...))))`][rex::escape].
#'
#' Combines a vector or list of regular expressions to a single one (by logical OR).
#'
#' @param ... Regular expressions. All elements will be converted to type character before fusing. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#' @param .literal Whether or not the input should be interpreted *literally* instead of as regular expressions. If `TRUE`, all elements of `...` are wrapped in
#'   the literal escape sequences `\Q` and `\E`.
#'
#' @return A character scalar.
#' @seealso The [rex][rex::rex] package which provides an intuitive framework to build complex regular expressions.
#' @family string
#' @export
#'
#' @examples
#' stringr::str_subset(string = c("yeah",
#'                                "nope",
#'                                "maybe",
#'                                "never"),
#'                     pattern = pal::fuse_regex(letters[1:3]))
fuse_regex <- function(...,
                       .literal = FALSE) {
  
  checkmate::assert_flag(.literal)
  
  result <- as_chr(...)
  if (.literal) {
    result %<>% paste0("\\Q", ., "\\E")
  }
  result %<>% paste0(collapse = "|")
  dots <- rlang::list2(...)
  
  if (length(dots) > 1L || length(dots[[1L]]) > 1L) {
    result %<>% paste0("(", ., ")")
  }
  
  result
}

#' Get path modification time
#'
#' Determine a file or directory's modification time. Other than [base::file.mtime()], this function is based on [fs::file_info()] and the datetime of
#' modification is returned in [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time) by default.
#'
#' @inheritParams fs::file_info
#' @param tz Timezone to return the result in. A character scalar where `""` means the current time zone.
#'
#' @return `r pkgsnip::return_lbl("datetime")`
#' @family path
#' @export
#'
#' @examples
#' fs::path_package(package = "dplyr",
#'                  "DESCRIPTION") |>
#'   pal::path_mod_time()
path_mod_time <- function(path,
                          follow = FALSE,
                          tz = "UTC") {
  
  checkmate::assert_string(tz)
  
  as.POSIXct(fs::file_info(path = path, follow = follow)$modification_time,
             tz = tz)
}

#' Flatten path tree
#'
#' Flattens a hierarchical list of path elements into a character vector of full paths.
#'
#' @param path_tree Directory tree. A nested list of named lists and character vectors.
#' @param parent_path Optional parent path of `path_tree`. A character scalar.
#'
#' @return A character vector.
#' @family path
#' @export
#'
#' @examples
#' list("root_dir" = list("subdir1",
#'                        "subdir2" = list("file1.ext"),
#'                        "file2.ext")) |>
#'   pal::flatten_path_tree() |>
#'   pal::cat_lines()
flatten_path_tree <- function(path_tree,
                              parent_path = NULL) {
  
  if (purrr::pluck_depth(path_tree) > 1L) {
    
    result <-
      rlang::names2(path_tree) |>
      purrr::map_chr(\(x) fs::path_join(c(parent_path, x))) |>
      purrr::map2(.x = path_tree,
                  .f = flatten_path_tree) |>
      purrr::list_c(ptype = character()) %>%
      c(parent_path, .)
    
  } else {
    
    result <- fs::path_join(c(parent_path, path_tree))
  }
  
  fs::path(result)
}

#' Draw path tree
#'
#' Pretty-prints a character vector of hierarchical paths as a tree. Uses [Unicode box drawing
#' characters](https://en.wikipedia.org/wiki/Box-drawing_character#Box_Drawing) to draw the nesting structure.
#' 
#' This function is the equivalent of [fs::dir_tree()] for an artificial/fictional path hierarchy. To print a tree of an actual filesystem path hierarchy, it's
#' recommended to rely on `fs::dir_tree()` which additionally colorizes the output based on real filesystem information. Apart from that, the algorithm to draw
#' the tree is the same.
#'
#' @param paths A character vector of paths as returned by [flatten_path_tree()] or [fs::dir_ls()].
#' @param quiet Whether or not to suppress drawing the directory tree.
#'
#' @return `paths`, invisibly.
#' @family path
#' @export
#'
#' @examples
#' # using an artificial/fictional path hierarchy
#' list("root_dir" = list("subdir1",
#'                        "subdir2" = list("file1.ext"),
#'                        "file2.ext")) |>
#'   pal::flatten_path_tree() |>
#'   pal::draw_path_tree()
#' 
#' # using an actual path hierarchy
#' fs::path_package("dplyr") |>
#'   fs::dir_ls(recurse = TRUE) |>
#'   pal::draw_path_tree()
#' 
#' # to get colorized output, use `fs::dir_tree()` instead
#' fs::path_package("dplyr") |> fs::dir_tree()
draw_path_tree <- function(paths,
                           quiet = FALSE) {
  
  checkmate::assert_character(paths,
                              any.missing = FALSE,
                              min.len = 1L)
  checkmate::assert_flag(quiet)
  
  has_leading_root <-
    paths |>
    fs::path_split() |>
    purrr::map_chr(purrr::chuck,
                   1L) |>
    magrittr::equals(paths[1L]) |>
    all()
  
  leaf_paths <- paths
  # remove dir root to get same result structure as `fs::dir_ls()`
  if (has_leading_root) leaf_paths %<>% .[-1L]
  
  by_dir <- split(leaf_paths, fs::path_dir(leaf_paths))
  
  box_chars <- list(h = "\u2500",
                    v = "\u2502",
                    l = "\u2514",
                    j = "\u251c")
  
  print_leaf <- function(x,
                         indent) {
    leafs <- by_dir[[x]]
    
    for (i in seq_along(leafs)) {
      
      if (i == length(leafs)) {
        
        cat(indent, paste0(box_chars$l, box_chars$h, box_chars$h, " ",
                           collapse = ""),
            fs::path_file(leafs[[i]]),
            "\n",
            sep = "")
        
        print_leaf(x = leafs[[i]],
                   indent = paste0(indent, "    "))
        
      } else {
        
        cat(indent,
            paste0(box_chars$j, box_chars$h, box_chars$h, " ",
                   collapse = ""),
            fs::path_file(leafs[[i]]),
            "\n",
            sep = "")
        
        print_leaf(x = leafs[[i]],
                   indent = paste0(indent, paste0(box_chars$v, "   ",
                                                  collapse = "")))
      }
    }
  }
  
  if (!quiet) {
    
    dir_root <- ifelse(has_leading_root,
                       paths[1L],
                       by_dir |>
                         names() %>%
                         magrittr::extract(which.min(nchar(.))))
    # print dir root
    cat(dir_root, "\n",
        sep = "")
    # print leafs
    print_leaf(fs::path_expand(dir_root), "")
  }
  
  invisible(paths)
}

#' Check that all named dots arguments are valid
#'
#' @description
#' `r lifecycle::badge("experimental")`
#'
#' Ensures that [dots][base::dots] `...` are either empty (if `.empty_ok = TRUE`), or all named elements in dots are a valid subset of `.fn`'s parameter names.
#' In case of an invalid or `.forbidden` argument, an informative message is shown and the defined `.action` is taken.
#'
#' @details
#' `check_dots_named()` is intended to combat the second one of the two major downsides that using `...` usually brings. In chapter 6.6 of the book
#' _Advanced R_ it is [phrased](https://adv-r.hadley.nz/functions.html#fun-dot-dot-dot) as follows:
#'
#' _Using `...` comes with two downsides:_
#' 
#' - _When you use it to pass arguments to another function, you have to carefully explain to the user where those arguments go. This makes it hard to
#'   understand what you can do with functions like `lapply()` and `plot()`._
#' 
#' - **_A misspelled argument will not raise an error. This makes it easy for typos to go unnoticed._**
#'
#' @param ... Dots argument to check.
#' @param .fn Function the `...` will be passed on to.
#' @param .additional Parameter names within `...` that should be treated as valid in addition to `.fn`'s actual parameter names. A character vector.
#' @param .forbidden Parameter names within `...` that should be treated as invalid. This has precedence over `.additional`. A character vector.
#' @param .empty_ok Set to `TRUE` if empty `...` should be allowed, or to `FALSE` otherwise.
#' @param .action Action to take when the check fails. One of `r as_md_vals(c("abort", "warn", "inform")) |> enum_str(sep2 = " or ")`
#'
#' @seealso rlang's check dots functions: [rlang::check_dots_empty()], [rlang::check_dots_used()] and [rlang::check_dots_unnamed()]
#' @family dots
#' @export
#'
#' @examples
#' # We can use `check_dots_named()` to address this second downside:
#' sum_safe <- function(...,
#'                      na.rm = FALSE) {
#'   pal::check_dots_named(...,
#'                         .fn = sum)
#'   sum(...,
#'       na.rm = na.rm)
#' }
#' 
#' # Note how the misspelled `na_rm` (instead of `na.rm`) silently gets ignored in the original
#' # function,
#' sum(1, 2, NA, na_rm = TRUE)
#'
#' # whereas our safe version properly errors:
#' try(
#'   sum_safe(1, 2, NA, na_rm = TRUE)
#' )
#'
#' # We can even build an `sapply()` function that fails "intelligently":
#' sapply_safe <- function(X,
#'                         FUN,
#'                         ...,
#'                         simplify = TRUE,
#'                         USE.NAMES = TRUE) {
#'   pal::check_dots_named(...,
#'                         .fn = FUN)
#'   sapply(X = X,
#'          FUN = FUN,
#'          ...,
#'          simplify = TRUE,
#'          USE.NAMES = TRUE)
#' }
#'
#' # While the original `sapply()` silently consumes misspelled named arguments via `...`,
#' sapply(1:5, paste, "hour workdays", sep = "-", colaspe = " ")
#'
#' # `sapply_safe()` will throw an informative error message:
#' try(
#'   sapply_safe(1:5, paste, "hour workdays", sep = "-", colaspe = " ")
#' )
#'
#' # But be aware that `check_dots_named()` might be a bit rash,
#' try(
#'   sum_safe(a = 1, b = 2)
#' )
#'
#' # while the original function actually has nothing to complain:
#' sum(a = 1, b = 2)
#'
#' # Furthermore, it doesn't play nicely with generics that don't expose all of the argument names
#' # of the method that is eventually invoked (`to` and `by` in the case of `seq()` -> `seq.int()`):
#' try(
#'   sapply_safe(X = c(0,50),
#'               FUN = seq,
#'               to = 100,
#'               by = 5)
#' )
#'
#' # To work around this, directly supply the proper method (`seq.int`),
#' sapply_safe(X = c(0,50),
#'             FUN = seq.int,
#'             to = 100,
#'             by = 5)
#'
#' # or just provide `to` and `by` *unnamed*:
#' sapply_safe(X = c(0,50),
#'             FUN = seq,
#'             100,
#'             5)
check_dots_named <- function(...,
                             .fn,
                             .additional = NULL,
                             .forbidden = NULL,
                             .empty_ok = TRUE,
                             .action = c("abort", "warn", "inform")) {
  
  checkmate::assert_function(.fn)
  checkmate::assert_character(.additional,
                              any.missing = FALSE,
                              null.ok = TRUE)
  checkmate::assert_character(.forbidden,
                              any.missing = FALSE,
                              null.ok = TRUE)
  checkmate::assert_flag(.empty_ok)
  .action <- rlang::arg_match(.action)
  
  if (...length() > 0L) {
    
    # determine original function name the `...` will be passed on to
    fn_arg_name <- deparse1(substitute(.fn))
    parent_call <- as.list(sys.call(-1L))
    parent_arg_names <- methods::formalArgs(args(sys.function(-1L)))
    
    if (fn_arg_name %in% parent_arg_names) {
      fn_name <- deparse1(parent_call[which(parent_arg_names == fn_arg_name) + 1L][[1L]])
    } else {
      fn_name <- fn_arg_name
    }
    
    # determine arg names of the function the `...` will be passed on to
    dots_arg_names <- methods::formalArgs(args(.fn))
    
    # check named `...` args
    purrr::walk(.x = setdiff(...names(),
                             ""),
                .f = check_dot_named,
                dots_arg_names = dots_arg_names,
                allowed_arg_names = setdiff(union(dots_arg_names,
                                                  .additional),
                                            .forbidden),
                fn_name = fn_name,
                action = .action)
    
  } else if (!.empty_ok) {
    
    msg <- "{.arg ...} must be provided (!= NULL)."
    
    switch(EXPR = .action,
           abort = cli::cli_abort(msg),
           warn = cli::cli_warn(msg),
           inform = cli::cli_inform(msg))
  }
}

check_dot_named <- function(dot,
                            dots_arg_names,
                            allowed_arg_names,
                            fn_name,
                            action) {
  
  # The following code is largely borrowed from `rlang::arg_match()`
  allowed_arg_names_excl_dots <- setdiff(allowed_arg_names, '...')
  i <- match(dot, allowed_arg_names)
  
  if (rlang::is_na(i)) {
    
    is_forbidden <- dot %in% dots_arg_names
    is_restricted <- !setequal(dots_arg_names,
                               allowed_arg_names)
    
    msg <- paste0(ifelse(is_forbidden,
                         "Forbidden",
                         "Invalid"),
                  " named argument provided in {.arg ...}: {.arg {dot}}")
    
    if (length(allowed_arg_names_excl_dots) > 0L) {
      
      msg %<>% c(i = paste0(ifelse(is_restricted,
                                   "Named arguments allowed to pass on to ",
                                   "Valid named arguments for "),
                            "{.fun {fn_name}} include: {.arg {allowed_arg_names_excl_dots}}"))
    } else {
      
      msg %<>% c(i = paste0("Only unnamed arguments are ", ifelse(is_restricted, "allowed", "valid"), " for {.fun {fn_name}}."))
    }
    
    i_partial <- pmatch(dot, allowed_arg_names_excl_dots)
    candidate <- NULL
    
    if (!rlang::is_na(i_partial)) {
      candidate <- allowed_arg_names_excl_dots[[i_partial]]
    }
    
    i_close <- utils::adist(dot, allowed_arg_names_excl_dots) / nchar(allowed_arg_names_excl_dots)
    
    if (any(i_close <= 0.5)) {
      candidate <- allowed_arg_names_excl_dots[[which.min(i_close)]]
    }
    
    if (!is.null(candidate)) {
      msg %<>% c(">" = "Did you mean {.arg {candidate}}?")
    }
    
    switch(EXPR = action,
           abort = cli::cli_abort(msg),
           warn = cli::cli_warn(msg),
           inform = cli::cli_inform(msg))
  }
}

#' List a subset of all installed packages
#'
#' @param pkg A character vector of package names.
#' @param ignore_case Do not distinguish between upper and lower case letters in `pkg`. If `FALSE`, `pkg` is treated case-sensitive.
#' @param as_regex Interpret `pkg` as regular expression(s). If `FALSE`, `pkg` is interpreted literally.
#'
#' @return A [tibble][tibble::tbl_df].
#' @family rpkgs
#' @export
#'
#' @examples
#' pal::ls_pkg(pkg = c("methods", "utils"))
#' 
#' pal::ls_pkg(pkg = "^ut",
#'             as_regex = TRUE)
ls_pkg <- function(pkg,
                   ignore_case = TRUE,
                   as_regex = FALSE) {
  
  checkmate::assert_character(pkg,
                              any.missing = FALSE,
                              min.chars = 1L)
  checkmate::assert_flag(ignore_case)
  checkmate::assert_flag(as_regex)
  
  regex <- pkg
  
  if (!as_regex) {
    regex <- paste0("\\Q", regex, "\\E")
  }
  
  regex %<>% fuse_regex()
  
  if (ignore_case) {
    regex <- paste0("(?i)", regex)
  }
  
  if (!as_regex) {
    regex <- paste0("^", regex, "$")
  }
  
  utils::installed.packages() |>
    tibble::as_tibble() |>
    dplyr::filter(stringr::str_detect(string = Package,
                                      pattern = regex))
}

#' Depend on another package
#'
#' Wrapper around [usethis::use_package()] with different defaults and optional `DESCRIPTION` file [tidying][usethis::use_tidy_description].
#'
#' @inheritParams usethis::use_package
#' @param tidy Whether or not to run [usethis::use_tidy_description()] after adding `package` to dependencies.
#'
#' @return `NULL`, invisibly.
#' @family rpkgs
#' @export
use_pkg <- function(package,
                    type = "Imports",
                    min_version = TRUE,
                    tidy = TRUE) {
  
  checkmate::assert_flag(tidy)
  rlang::check_installed("usethis",
                         reason = reason_pkg_required())
  
  # TODO: remove this once https://github.com/r-lib/usethis/pull/2117 has been merged and released
  if (isFALSE(min_version)) min_version <- NULL
  
  usethis::use_package(package = package,
                       type = type,
                       min_version = min_version)
  if (tidy) {
    usethis::use_tidy_description()
  }
}

#' Test if packages are installed
#'
#' Returns `TRUE` or `FALSE` for each `pkg`, depending on whether the `pkg` is installed on the current system or not, optionally ensuring a `min_version`.
#'
#' In contrast to [base::require()], this function checks if the packages are installed without attaching their namespaces if so.
#' 
#' In contrast to [rlang::is_installed()] or [xfun::pkg_available()], this function doesn't load the packages if they're installed.
#' 
#' In contrast to [xfun::pkg_available()], it is fully vectorized, i.e. returns a (named) logical vector of the same length as `pkg`.
#' 
#' It is [considerably
#' faster](https://stackoverflow.com/questions/9341635/check-for-installed-packages-before-running-install-packages/38082613#38082613) than the commonly used
#' `pkg %in% rownames(installed.packages())` check.
#'
#' @param pkg Package names. A character vector.
#' @param min_version Minimum required version number of each `pkg`. Must be either `NULL` to ignore version numbers, or a vector of
#'   [`package_version`][base::package_version]s or something coercible to.
#'
#' @return A named logical vector of the same length as `pkg`.
#' @family rpkgs
#' @export
#'
#' @examples
#' pal::is_pkg_installed("tidyverse")
#'
#' # it is vectorized...
#' pal::is_pkg_installed(pkg = c("dplyr", "tibble", "magrittr"),
#'                       min_version = c("1.0", "2.0", "99.9.9000"))
#'
#' # ...and scalar arguments will be recycled
#' pal::is_pkg_installed(pkg = "dplyr",
#'                       min_version = c("0.5", "1.0", "99.9.9000"))
#'
#' pal::is_pkg_installed(pkg = c("dplyr", "tibble", "magrittr"),
#'                       min_version = "1.0")
is_pkg_installed <- function(pkg,
                             min_version = NULL) {
  
  checkmate::assert_character(pkg,
                              any.missing = FALSE)
  min_version %<>% as.package_version()
  
  if (length(min_version) == 0L) {
    
    result <- purrr::map_lgl(magrittr::set_names(pkg, pkg),
                             ~ nzchar(system.file(package = .x)))
  } else {
    
    result <-
      is_pkg_installed(pkg = pkg) %>%
      list(names(.), min_version) |>
      purrr::pmap_lgl(\(...) {
        if (..1) utils::packageVersion(pkg = ..2) >= ..3 else ..1
      })
  }
  
  result
}

#' Test if a package is available on CRAN
#'
#' @inheritParams is_http_success
#' @param pkg Package name (case-sensitive). A character scalar.
#' @param min_version Minimum required version number of `pkg`. Must be either `NULL` to ignore version numbers, or a single
#'   [`package_version`][base::package_version] or something coercible to.
#'
#' @return A logical scalar.
#' @family rpkgs
#' @export
#'
#' @examples
#' pal::is_pkg_cran("foobar")
#' pal::is_pkg_cran("dplyr")
#' pal::is_pkg_cran("dplyr", min_version = "9999.9")
is_pkg_cran <- function(pkg,
                        min_version = NULL,
                        max_tries = 1L) {
  
  checkmate::assert_string(pkg)
  min_version %<>% as.package_version()
  rlang::check_installed(pkg = c("httr2", "rvest"),
                         reason = reason_pkg_required())
  
  url <- glue::glue("https://cran.r-project.org/package={pkg}")
  is_cran <- is_http_success(url = url,
                             max_tries = max_tries)
  
  if (is_cran && length(min_version)) {
    
    is_cran <-
      httr2::request(base_url = url) |>
      httr2::req_method(method = "GET") |>
      httr2::req_retry(max_tries = max_tries) |>
      httr2::req_perform() |>
      httr2::resp_body_html() |>
      rvest::html_element(css = "body") |>
      rvest::html_element(css = "table") |>
      rvest::html_table() |>
      dplyr::filter(stringr::str_detect(X1, stringr::fixed("Version"))) |>
      dplyr::filter(as.package_version(X2) >= !!min_version) |>
      nrow() |>
      magrittr::is_greater_than(0L)
  }
  
  is_cran
}

#' Test if a directory is an \R package
#'
#' Convenience wrapper around the [`rprojroot::is_r_package`] root criterion. Note that it will by default only return `TRUE` for the root of a package
#' directory, not its subdirectories.
#'
#' @param path Path of the directory to check. A character scalar. Defaults to the current working directory.
#' @param check_parent_dirs Whether or not to also check `path`'s parent directories (up until the filesystem root) for an \R package setup. Setting
#'   `check_parent_dirs = TRUE` guarantees that `TRUE` is returned for all subdirectories of an R package.
#'
#' @return `TRUE` if `path` is the root (if `check_parent_dirs = FALSE`) or a (sub)directory directory (if `check_parent_dirs = TRUE`) of an \R package,
#'   `FALSE` otherwise.
#' @family rpkgs
#' @export
#'
#' @examples
#' pal::is_pkg_dir()
#' pal::is_pkg_dir(fs::path_package("utils"))
is_pkg_dir <- function(path = ".",
                       check_parent_dirs = FALSE) {
  
  has_root(criterion = rprojroot::is_r_package,
           path = path,
           check_parent_dirs = check_parent_dirs)
}

#' Test if pkgdown is set up for an R package directory
#'
#' Convenience wrapper around the [`rprojroot::is_pkgdown_project`] root criterion. Note that it will by default only return `TRUE` for the root of a package
#' directory and the `pkgdown` subdirectory, not other subdirectories.
#'
#' @param path Path of the R package directory to check. A character scalar. Defaults to the current working directory.
#' @param check_parent_dirs Whether or not to also check `path`'s parent directories (up until the filesystem root) for a pkgdown setup. Setting
#'   `check_parent_dirs = TRUE` guarantees that `TRUE` is returned for all subdirectories of an R package that has pkgdown set up.
#'
#' @return `TRUE` if pkgdown is set up for `path`, `FALSE` otherwise.
#' @family rpkgs
#' @export
#'
#' @examples
#' pal::is_pkgdown_dir()
#' pal::is_pkgdown_dir(fs::path_package("pal"))
is_pkgdown_dir <- function(path = ".",
                           check_parent_dirs = FALSE) {
  
  has_root(criterion = rprojroot::is_pkgdown_project,
           path = path,
           check_parent_dirs = check_parent_dirs)
}

#' Test if object exists in namespace
#'
#' Determines whether or not an object exists in a specific [namespace](https://adv-r.hadley.nz/environments.html#namespaces).
#'
#' This complements [utils::getFromNamespace()] and uses almost the same code internally.
#'
#' @inheritParams utils::getFromNamespace
#'
#' @return A logical scalar.
#' @export
#'
#' @examples
#' if (pal::is_pkg_installed("pkgpurl")) {
#'   pal::exists_in_namespace("funky_config", "pkgpurl")
#' }
#'
#' pal::exists_in_namespace("funky_config", "pal")
exists_in_namespace <- function(x,
                                ns,
                                pos = -1L,
                                envir = as.environment(pos)) {
  if (missing(ns)) {
    
    nm <- attr(x = envir,
               which = "name",
               exact = TRUE)
    
    if (is.null(nm) || !startsWith(nm, "package:")) {
      cli::cli_abort("Specified {.arg envir} is not a package environment.")
    }
    
    ns <- asNamespace(substring(nm, 9L))
    
  } else {
    ns <- asNamespace(ns)
  }
  
  exists(x = x,
         envir = ns,
         inherits = FALSE)
}

#' Assemble reason why package is required
#'
#' Assembles a prose text containing the reason why a certain \R package is required by the `fn` of another `pkg`. Intended to be used as argument `reason` of
#' [rlang::check_installed()].
#'
#' @param fn Name of the function that requires the \R package. A character scalar.
#' @param pkg Name of the package `fn` is part of. A character scalar.
#'
#' @return A character scalar.
#' @export
#'
#' @examples
#' \dontrun{
#'   rlang::check_installed(pkg = "foo",
#'                          reason = pal::reason_pkg_required(pkg = "bar",
#'                                                            fn = "serve"))
#' }
reason_pkg_required <- function(fn = rlang::call_name(rlang::caller_call()),
                                pkg = utils::packageName(parent.frame())) {
  checkmate::assert_string(fn)
  checkmate::assert_string(pkg)
  
  cli::format_inline("by {.fun {pkg}::{fn}}, but is not installed.")
}

#' Get all `DESCRIPTION` file fields as cleaned up list
#'
#' @description
#' Returns all fields from a `DESCRIPTION` file as a named list with values cleaned up:
#' - Whitespaces at the start and end of field values as well as repeated whitespaces within them are removed.
#' - Multi-value fields are returned as vectors.
#' - The fields `Depends`, `Imports` and `Suggests` are returned as a single data frame named `dependencies`.
#'
#' @inheritParams desc_get_field_safe
#'
#' @return A list.
#' @family desc
#' @export
#'
#' @examples
#' fs::path_package(package = "dplyr") |> pal::desc_list()
desc_list <- function(file = ".") {
  
  rlang::check_installed("desc",
                         reason = reason_pkg_required())
  
  fields <- desc::desc_fields(file = file)
  
  result <-
    fields |>
    setdiff(c("Authors@R",
              "Depends",
              "Imports",
              "Suggests",
              "URL")) |>
    rlang::set_names() |>
    purrr::map(desc_get_field_safe,
               file = file)
  
  if ("Authors@R" %in% fields) result[["Authors@R"]] <- desc::desc_get_authors(file = file)
  if (any(c("Depends", "Imports", "Suggests") %in% fields)) result[["dependencies"]] <- desc::desc_get_deps(file = file)
  if ("URL" %in% fields) result[["URL"]] <- desc::desc_get_urls(file = file)
  
  result
}

#' Get value from `DESCRIPTION` file field (safe)
#'
#' Returns the value from a `DESCRIPTION` file field (aka _key_). Thin wrapper around [desc::desc_get_field()] that returns an informative string referring to
#' the given `key` as `default` if the field is unset.
#'
#' By default, the following string is returned if `key = "NoRealKey"` is not found:
#'
#' ```
#' "<No `NoRealKey` field set in DESCRIPTION!>"
#' ```
#'
#' If you rather want to take an action like throwing an error, it's recommended to call [desc::desc_get_field()] directly.
#'
#' @inheritParams desc::desc_get_field
#' @param default Default value to return if `key` is not found.
#'
#' @return A character scalar.
#' @family desc
#' @export
#'
#' @examples
#' pal::desc_get_field_safe(key = "Description",
#'                          file = fs::path_package("pal"))
desc_get_field_safe <- function(key,
                                default = glue::glue("<No \x60{key}\x60 field set in DESCRIPTION!>"),
                                trim_ws = TRUE,
                                squish_ws = trim_ws,
                                file = ".") {
  
  rlang::check_installed("desc",
                         reason = reason_pkg_required())
  
  desc::desc_get_field(key = key,
                       default = default,
                       trim_ws = trim_ws,
                       squish_ws = squish_ws,
                       file = file)
}

#' Get dependency version from `DESCRIPTION` file
#'
#' Returns the version of the specified `pkg` dependency from a `DESCRIPTION` file.
#'
#' @inheritParams desc::desc_get_deps
#' @param pkg `r pkgsnip::param_lbl("pkg")`
#' @param types Dependency types to be considered. If `pkg` is listed in multiple dependency types, the maximally required version is returned.
#'
#' @return `r pkgsnip::return_lbl("num_vrsn")`
#' @family desc
#' @export
#'
#' @examples
#' fs::path_package(package = "dplyr") |> pal::desc_dep_vrsn(pkg = "tibble")
desc_dep_vrsn <- function(pkg,
                          file = ".",
                          types = c("Imports", "Depends", "Suggests", "Enhances", "LinkingTo")) {
  
  checkmate::assert_string(pkg)
  types <- rlang::arg_match(arg = types,
                            multiple = TRUE)
  
  result <-
    desc::desc_get_deps(file = file) |>
    dplyr::filter(package == !!pkg & type %in% !!types)
  
  if (nrow(result) == 0L) {
    cli::cli_abort("Package {.val {pkg}} is not listed as a dependency of type {.or {.val {types}}} in {.file {file}}.")
  }
  
  result$version |>
    stringr::str_extract("\\d+[\\d.]+") |>
    purrr::map(\(x) {
      if (is.na(x)) {
        as.numeric_version(NULL)[NA]
      } else {
        as.numeric_version(x)
      }
    }) |>
    purrr::list_c(ptype = numeric_version(NULL)) |>
    max(na.rm = TRUE) |>
    when(length(.) == 0L ~ as.numeric_version(NULL)[NA],
         ~ .)
}

#' Get Git repository URL from `DESCRIPTION` file
#'
#' Returns the first Git repository URL found in the `URL` (preferred) or `BugReports` fields of a `DESCRIPTION` file.
#'
#' Currently, this function recognizes [GitLab](https://about.gitlab.com/), [GitHub](https://github.com/), [Gitea](https://gitea.com/),
#' [Codeberg](https://codeberg.org/), [Pagure](https://pagure.io/), [Bitbucket](https://bitbucket.org/) and [SourceHut](https://sr.ht/) repository URLs.
#'
#' @inheritParams desc::desc_get_field
#'
#' @return A character scalar.
#' @family desc
#' @export
desc_url_git <- function(file = ".") {
  
  rlang::check_installed("desc",
                         reason = reason_pkg_required())
  
  desc::desc_get_field(key = "BugReports",
                       default = character(),
                       file = file) |>
    stringr::str_replace(pattern = "/issues/?$",
                         replacement = "/") %>%
    c(desc::desc_get_urls(), .) |>
    stringr::str_subset(pattern = "^https?://(git(hub|lab|ea)\\..+|(codeberg|bitbucket)\\.org|(git\\.)?src\\.ht|pagure\\.io)/") |>
    dplyr::first()
}

#' Get function's default parameter values
#'
#' Extracts a function parameter's default value(s) from its language definition and returns the result as a character vector.
#'
#' This function can be very convenient to avoid duplication in roxygen2 documentation by leveraging [inline \R code
#' evaluation](https://roxygen2.r-lib.org/articles/rd-formatting.html#inline-code) as follows:
#'
#' ```r
#' #' @param some_param Some parameter. One of
#' #'   `r pal::fn_param_defaults(param = "some_param", fn = "some_fn") |> pal::wrap_chr("\x60") |> cli::ansi_collapse()`.
#' #'
#' some_fn <- function(some_param = c("a", "b", "c")) {
#'   some_param <- rlang::arg_match(some_param)
#'   ...
#' }
#' ```
#'
#' Or to list the possible parameter values formatted as an unnumbered list instead, replace `cli::ansi_collapse()` with [pal::as_md_list()] in the example
#' above.
#'
#' # Caveats
#' 
#' [base::deparse1()] is used internally to get a character representation of non-character default values. Therefore all of `deparse()`'s fuzziness also 
#' applies to this function.
#'
#' @param param Parameter name. A character scalar.
#' @param fn A [function][base::function] or a function name (searched for in `env`). See [base::formals()] for details.
#' @param env [Environment][base::environment] `fn` is defined in. See [base::formals()] for details.
#'
#' @return A character vector.
#' @family roxy
#' @export
#'
#' @examples
#' pal::fn_param_defaults(param = ".name_repair",
#'                        fn = tibble::as_tibble)
#'
#' # as Markdown-formatted enumeration in prose
#' pal::fn_param_defaults(param = ".name_repair",
#'                        fn = tibble::as_tibble) |>
#'   pal::wrap_chr("`") |>
#'   cli::ansi_collapse() |>
#'   cat()
fn_param_defaults <- function(param,
                              fn = sys.function(sys.parent()),
                              env = parent.frame()) {
  
  checkmate::assert_string(param)
  
  # turn `fn` into type function if necessary (the same as `formals(fun)` does internally)
  if (is.character(fn)) {
    fn %<>% get(mode = "function",
                envir = env)
  }
  
  default_vals <- formals(fun = args(name = fn),
                          envir = env)
  
  if (param %in% names(default_vals)) {
    default_vals <- default_vals[[param]]
  } else {
    fn_name <- deparse1(expr = substitute(fn),
                        backtick = FALSE)
    cli::cli_abort("The function {.fn {fn_name}} does not have a parameter named {.arg {param}}.")
  }
  
  if (missing(default_vals)) {
    fn_name <- deparse1(expr = substitute(fn),
                        backtick = FALSE)
    cli::cli_abort("{.fn {fn_name}}'s parameter {.arg {param}} does not have a default value.")
  }
  
  # evaluate default param if it results in a character vector
  if (is.language(default_vals)) {
    
    evaluated_default_vals <- tryCatch(expr = eval(expr = default_vals,
                                                   envir = env),
                                       error = \(x) NULL)
    
    if (is.character(evaluated_default_vals)) default_vals <- evaluated_default_vals
  }
  
  if (is.character(default_vals)) {
    default_vals %<>% wrap_chr()
  } else {
    default_vals %<>% deparse1(backtick = FALSE,
                               control = c("keepNA",
                                           "keepInteger",
                                           "niceNames",
                                           "showAttributes",
                                           "warnIncomplete"))
  }
  
  default_vals
}

#' Enumerate function's default parameter values
#'
#' Convenience function combining [fn_param_defaults()], [wrap_chr()] and [cli::ansi_collapse()].
#'
#' This function can be very convenient to avoid duplication in roxygen2 documentation by leveraging [inline \R code
#' evaluation](https://roxygen2.r-lib.org/articles/rd-formatting.html#inline-code) as follows:
#'
#' ```r
#' #' @param some_param Some parameter. One of
#' #'   `r pal::enum_fn_param_defaults(param = "some_param", fn = "some_fn")`.
#' #'
#' some_fn <- function(some_param = c("a", "b", "c")) {
#'   some_param <- rlang::arg_match(some_param)
#'   ...
#' }
#' ```
#'
#' @inheritParams fn_param_defaults
#' @param wrap Character sequence the default parameter values are to be wrapped in. A character vector or something coercible to.
#' @param sep2,last Passed on to [cli::ansi_collapse()].
#' @param ... Further arguments passed on to [cli::ansi_collapse()].
#'
#' @return A character scalar.
#' @family roxy
#' @export
#'
#' @examples
#' pal::enum_fn_param_defaults(param = ".name_repair",
#'                             fn = tibble::as_tibble) |>
#'   cat()
enum_fn_param_defaults <- function(param,
                                   fn = sys.function(sys.parent()),
                                   env = parent.frame(),
                                   wrap = "`",
                                   sep2 = " or ",
                                   last = sep2,
                                   ...) {
  check_dots_named(...,
                   .fn = cli::ansi_collapse,
                   .forbidden = c("sep2", "last"))
  
  fn_param_defaults(param = param,
                    fn = fn,
                    env = env) |>
    wrap_chr(wrap = wrap) |>
    cli::ansi_collapse(sep2 = sep2,
                       last = last,
                       ...)
}

#' Convert roxygen2 documentation links to Markdown
#'
#' Converts roxygen2 documentation [links in pseudo-Markdown style](https://roxygen2.r-lib.org/articles/rd-formatting.html#function-links) to actual Markdown
#' ones using [downlit::autolink_url()].
#'
#' @param x Markdown text with roxygen2 documentation links. A character scalar.
#'
#' @return A character scalar.
#' @family roxy
#' @export
#'
#' @examples
#' pal::roxy_to_md_links("[base::c()] is so short I almost forget it's there.")
#' pal::roxy_to_md_links("[`base::c()`], probably the most used base R function ever.")
#' pal::roxy_to_md_links("Some functions [are magic][downlit::autolink_url]!")
#' pal::roxy_to_md_links("downlit's [`autolink_url()`][downlit::autolink_url] seems magic!")
roxy_to_md_links <- function(x) {
  
  checkmate::assert_string(x)
  rlang::check_installed(pkg = "downlit",
                         reason = reason_pkg_required())
  
  # determine the roxy-specific doc links by first parsing input as CommonMark which escapes all brackets from roxy doc links (but not from valid MD links)
  links_roxy <-
    md_to_xml(x) |>
    xml2::xml_contents() |>
    xml_to_md() |>
    stringr::str_extract_all(pattern = "(\\\\\\[`[^`]+`\\\\\\](\\\\\\[[^\\]]+?\\\\\\])?|\\\\\\[[^\\]]+?\\\\\\](\\\\\\[[^\\]]+?\\\\\\])?)") |>
    purrr::list_c(ptype = character()) |>
    stringr::str_remove_all(pattern = stringr::fixed("\\"))
  
  is_short <- stringr::str_detect(string = links_roxy,
                                  pattern = "^\\[[^\\]]+\\]$")
  targets_roxy <- links_roxy
  # remove possible enclosing backticks from roxy shortlinks to make them parseable by downlit
  targets_roxy[is_short] %<>% stringr::str_remove_all(pattern = stringr::fixed("`"))
  targets_roxy %<>%
    stringr::str_extract(pattern = "\\[([^\\]]+)\\]$",
                         group = 1L) %>%
    purrr::map(downlit::autolink_url) %>%
    purrr::list_c(ptype = character())
  
  process <- !is.na(targets_roxy)
  
  # short-circuit if nothing to do 
  if (!any(process)) {
    return(x)
  }
  
  links_md <- links_roxy
  links_md[process & !is_short] %<>% stringr::str_replace(pattern = "\\[[^\\]]+\\]$",
                                                          replacement = paste0("(", targets_roxy[process & !is_short], ")"))
  links_md[process & is_short] %<>%
    stringr::str_replace(pattern = "(?<=^\\[)([^`\\]]+)",
                         replacement = "`\\1`") %>%
    paste0("(", targets_roxy[process & is_short], ")")
  
  links_md[process] |>
    magrittr::set_names(value = paste0("\\Q", links_roxy[process], "\\E")) |>
    stringr::str_replace_all(string = x)
}

#' Get roxygen2 blocks
#'
#' @description
#'
#' `r lifecycle::badge("experimental")`
#'
#' Parses the roxygen2 package documentation of a specific R package or from a single `.R` source code file.
#'
#' @param pkg,text Either a package name (`pkg`) or a character vector of \R source code lines (`text`) to extract the object's roxygen2 tag value from.
#' @param ... Further arguments passed on to [utils::download.packages()], excluding
#'   `r cli::ansi_collapse(as_md_vals(forbidden_dots$roxy_tag_value), last = " and ")`. Only relevant if `pkg` is provided.
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return A list of [`roxy_block`][roxygen2::roxy_block] objects.
#' @family roxy
#' @export
#'
#' @examples
#' # Either provide an R source file as a character vector `text`...
#' text <- readr::read_lines(paste0("https://raw.githubusercontent.com/r-lib/rlang/",
#'                                  "db52a58d505b65f58ba922d4752b5b0061f2a98c/R/fn.R"))
#'
#' pal::roxy_blocks(text = text) |> head(n = 3L)
#'
#' # ...or provide a package name as `pkg`
#' try(
#'   pal::roxy_blocks(pkg = "tinkr",
#'                    repos = "https://cloud.r-project.org") |>
#'     head(n = 3L)
#' )
roxy_blocks <- function(pkg = NULL,
                        ...,
                        text = NULL,
                        quiet = TRUE) {
  
  rlang::check_installed("roxygen2",
                         reason = reason_pkg_required())
  checkmate::assert_flag(quiet)
  checkmate::assert_string(pkg, null.ok = TRUE)
  checkmate::assert_character(text, null.ok = TRUE)
  is_pkg_null <- is.null(pkg)
  is_text_null <- is.null(text)
  if (is_pkg_null && is_text_null) cli::cli_abort("One of {.arg pkg} and {.arg text} mustn't be {.val NULL}.")
  if (!is_pkg_null && !is_text_null) cli::cli_abort("Only one of {.arg pkg} and {.arg text} can be provided.")
  
  if (is_pkg_null) {
    
    rlang::check_dots_empty0(...)
    
    blocks <- tryCatch(expr = roxygen2::parse_text(text = text),
                       error = \(x) list(FALSE, x))
    
    if (length(blocks) > 0L && isFALSE(blocks[[1L]])) {
      
      error_msg <- as.character(blocks[[2L]])
      is_missing_obj <- stringr::str_detect(string = error_msg,
                                            pattern = " (not found|could not find .+)\\n")
      
      cli::cli_abort(ifelse(is_missing_obj,
                            error_msg |>
                              stringr::str_remove(pattern = '^.*: ') |>
                              stringr::str_replace(pattern = "could not find function \"(.+?)\"",
                                                   replacement = "Function {.fun \\1} not found") |>
                              stringr::str_replace(pattern = "object '(.+?)'",
                                                   replacement = "Object {.var \\1}") |>
                              stringr::str_replace(pattern = "not found\\n?",
                                                   replacement = paste0("was not found when parsing {.arg text}. You might need to attach the package you're ",
                                                                        "trying to extract the roxygen2 tag value from. Or try ",
                                                                        "{.fn pkg_roxy_tag_value}.")),
                            "Error parsing {.arg text}: {error_msg}"))
    }
  } else {
    
    rlang::check_installed("rappdirs",
                           reason = reason_pkg_required())
    check_dots_named(...,
                     .fn = utils::download.packages,
                     .forbidden = forbidden_dots$roxy_tag_value)
    
    # alert if installed version doesn't match downloaded one:
    pkgs_available <-
      utils::available.packages(type = "source",
                                filters = c("R_version",
                                            "OS_type",
                                            "subarch",
                                            "CRAN")) |>
      tibble::as_tibble() |>
      dplyr::filter(Package == pkg)
    
    pkg_version_max_installed <-
      ls_pkg(pkg = pkg,
             as_regex = FALSE) |>
      dplyr::pull("Version") |>
      max()
    
    if (pkg_version_max_installed %in% pkgs_available$Version) { 
      
      pkgs_available %<>% dplyr::filter(Version == pkg_version_max_installed)
      
    } else {
      
      cli::cli_alert_warning(paste0("No sources available for download of locally installed version {.val pkg_version_max_installed} of package {.pkg {pkg}}. ",
                                    "Downloading sources of latest available version {.val {max(pkgs_available[, 'Version'])}} instead. If the following ",
                                    "roxygen tag parsing fails or produces unexpected results, consider updating package {.pkg {pkg}}."))
    }
    
    pkgs_available %<>%
      as.matrix() %>%
      magrittr::set_rownames(value = .[, 1L])
    
    tmp_dir <-
      rappdirs::user_cache_dir() |>
      fs::path(glue::glue("pal-roxy_tag_value-{pkg}")) |>
      fs::dir_create() |>
      fs::path_real()
    
    tmp_archive <- utils::download.packages(pkgs = pkg,
                                            destdir = tmp_dir,
                                            available = pkgs_available,
                                            ... = ...,
                                            type = "source",
                                            quiet = quiet)
    
    utils::untar(tarfile = tmp_archive[1L, 2L],
                 exdir = tmp_dir)
    
    tmp_pkg <- fs::path(tmp_dir, pkg)
    blocks <- if (quiet) suppressMessages(roxygen2::parse_package(path = tmp_pkg)) else roxygen2::parse_package(path = tmp_pkg)
    
    unlink(x = tmp_dir,
           recursive = TRUE)
  }
  
  blocks
}

#' Get roxygen2 block object
#'
#' Extracts a single object from a list of [`roxy_block`][roxygen2::roxy_block] objects.
#'
#' @param blocks A list of [`roxy_block`][roxygen2::roxy_block] objects as returned by [roxy_blocks()].
#' @param obj_name Object name to extract, usually a function name. A character scalar.
#'
#' @return A [`roxy_block`][roxygen2::roxy_block] object.
#' @family roxy
#' @export
roxy_obj <- function(blocks,
                     obj_name) {
  
  checkmate::assert_list(blocks,
                         types = "roxy_block",
                         all.missing = FALSE)
  obj_names <-
    blocks |>
    purrr::map_depth(.depth = 1L,
                     .f = purrr::pluck,
                     "object", "topic") |>
    purrr::compact() |>
    purrr::list_c(ptype = character())
  
  obj_name <- rlang::arg_match0(arg = obj_name,
                                values = obj_names)
  
  blocks[[which(obj_names == obj_name)]]
}

#' Get an object's roxygen2 tag value(s)
#'
#' Extracts the value(s) belonging to the `tag_name`s documenting `obj_name` from a list of [`roxy_block`][roxygen2::roxy_block] objects.
#'
#' @inheritParams roxy_obj
#' @param tag_names Name(s) of the [roxygen2 tag(s)](https://roxygen2.r-lib.org/articles/rd.html) (without the `@` prefix) to extract the value(s) from. A
#'   character vector.
#' @param param_name Parameter name to extract the value from. Only relevant if `"param" %in% tag_names`. A character scalar.
#'
#' @return A character vector of the same length as `tag_names`.
#' @family roxy
#' @export
#'
#' @examples
#' if (interactive()) {
#'   try(
#'     pal::roxy_blocks(pkg = "viridisLite") |>
#'       pal::roxy_tag_value(obj_name = "viridis",
#'                           tag_names = "param",
#'                           param_name = "alpha") |>
#'       cat()
#'   )
#' }
roxy_tag_value <- function(blocks,
                           obj_name,
                           tag_names,
                           param_name) {
  
  block <- roxy_obj(blocks = blocks,
                    obj_name = obj_name)
  tags <- block$tags
  checkmate::assert_subset(tag_names,
                           choices = purrr::map_chr(tags,
                                                    purrr::pluck,
                                                    "tag"),
                           empty.ok = FALSE)
  tag_names %<>% unique()
  
  if ("param" %in% tag_names) {
    
    ix_param <-
      tags |>
      purrr::map_lgl(\(x) x$tag == "param") |>
      which()
    
    param_name <- rlang::arg_match0(arg = param_name,
                                    values = purrr::map_chr(tags[ix_param],
                                                            purrr::pluck,
                                                            "val", "name"))
    i_to_keep <-
      tags[ix_param] |>
      purrr::map_lgl(\(x) x$val$name == param_name) |>
      which()
    
    block$tags %<>% magrittr::extract(-ix_param[-i_to_keep])
  }
  
  purrr::map_chr(tag_names,
                 \(x) roxygen2::block_get_tag_value(block = block,
                                                    tag = x) |>
                   when(is.list(.) ~ .$description,
                        ~ .))
}

#' Convert to verbatim Markdown
#'
#' Converts the provided \R expressions to their character representation using [base::deparse1()] and formats them as [verbatim
#' Markdown](https://pandoc.org/MANUAL.html#verbatim).
#'
#' @param ... \R expression(s) to convert to verbatim Markdown. Must be unnamed. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#' @param .eval Whether or not to evaluate the expression(s) in `...`.
#' @param .collapse String to separate the results of a single expression in `...`.
#' @param .backtick Whether or not to enclose symbolic names in backticks if they do not follow the standard syntax.
#' @param .control Deparsing options. A character vector or `NULL`. See [base::.deparseOpts] for all possible options.
#' @param .width.cutoff Cutoff (in bytes) at which line-breaking is tried. An integer scalar between `20` and `500`.
#' @param .nlines Maximum number of lines to produce. A negative value indicates no limit. An integer scalar.
#'
#' @return A character vector of the same length as `...`.
#' @seealso [pander::pandoc.verbatim()] for a similar function with different features.
#' @family md
#' @export
#'
#' @examples
#' pal::md_verb(1:3, "It", is.logical, `||`, FALSE, quote(`?!`)) |>
#'   pal::cat_lines()
#' 
#' # you can splice vector or list expressions if you like
#' pal::md_verb(!!!1:3, "It", is.logical, `||`, FALSE, quote(`?!`)) |>
#'   pal::cat_lines()
#'
#' # to evaluate, or not to evaluate, that is the question
#' pal::md_verb(!!!1:3, "It", is.logical, `||`, FALSE, quote(`?!`),
#'              .eval = FALSE) |>
#'   pal::cat_lines()
#' 
#' # unevaluated expressions do not need to exist
#' pal::md_verb(Not, actual(), `R-expressions`,
#'              .eval = FALSE) |>
#'   pal::cat_lines()
#' 
#' # you can opt out of wrapping non-standard syntax in additional backticks
#' pal::md_verb(Not, actual(), `R-expressions`,
#'              .eval = FALSE,
#'              .backtick = FALSE) |>
#'   pal::cat_lines()

# nolint start: object_name_linter.
md_verb <- function(...,
                    .eval = TRUE,
                    .collapse = " ",
                    .backtick = TRUE,
                    .control = c("keepNA",
                                 "keepInteger",
                                 "niceNames",
                                 "showAttributes",
                                 "warnIncomplete"),
                    .width.cutoff = 500L,
                    .nlines = -1L) {
  
  checkmate::assert_flag(.eval)
  checkmate::assert_flag(.backtick)
  
  if (.eval) {
    result <- rlang::list2(...)
  } else {
    result <- eval(substitute(alist(...)))
  }
  
  result |>
    purrr::map(\(x) {
      
      deparsed <- deparse1(expr = x,
                           collapse = .collapse,
                           width.cutoff = .width.cutoff,
                           backtick = .backtick,
                           control = .control,
                           nlines = .nlines)
      
      escape_backtick <- stringr::str_detect(string = deparsed,
                                             pattern = stringr::fixed("`"))
      
      paste0("`", "` "[escape_backtick], deparsed, " `"[escape_backtick], "`")
    }) |>
    purrr::list_c(ptype = character())
}
# nolint end

#' Convert a character vector to a Markdown list
#'
#' Convenience wrapper around [pander::pandoc.list.return()] to convert a character vector (or something coercible to) to a [Markdown
#' list](https://pandoc.org/MANUAL.html#lists).
#'
#' @param x \R object, e.g. a character vector. Each element of `x` will become an item in the resulting Markdown list.
#' @param type Markdown list type. One of
#'   - `"unordered"` for an unordered aka [bullet list](https://pandoc.org/MANUAL.html#bullet-lists). Corresponds to
#'     [`<ul>`](https://developer.mozilla.org/docs/Web/HTML/Element/ul) in HTML.
#'   - `"ordered"` for an ordered aka [numbered list](https://pandoc.org/MANUAL.html#ordered-lists). Corresponds to
#'     [`<ol>`](https://developer.mozilla.org/docs/Web/HTML/Element/ol) in HTML.
#'   - `"ordered_roman"` for a variation of an ordered/numbered list with uppercase roman numerals instead of Arabic numerals as list markers.
#' @param tight Whether or not to add additional spacing between list items.
#' @param indent_lvl Level of indentation of the resulting Markdown list. For each level, four additional spaces are added in front of every list item. An
#'   integer scalar.
#' @param wrap An optional string to wrap the list items in.
#'
#' @return If `x` is empty, a character vector of length zero. Otherwise, a character scalar.
#' @family md
#' @export
#'
#' @examples
#' rownames(mtcars) |>
#'   pal::as_md_list() |>
#'   cat()
as_md_list <- function(x,
                       type = c("unordered", "ordered", "ordered_roman"),
                       tight = TRUE,
                       indent_lvl = 0L,
                       wrap = NULL) {
  
  type <- rlang::arg_match(type)
  checkmate::assert_flag(tight)
  checkmate::assert_count(indent_lvl)
  checkmate::assert_string(wrap,
                           null.ok = TRUE)
  rlang::check_installed("pander",
                         reason = reason_pkg_required())
  x %<>% as_chr()
  
  if (length(x) == 0L) {
    return(character())
  }
  
  pander::pandoc.list.return(elements = paste0(wrap, x, wrap),
                             style = switch(EXPR = type,
                                            unordered = "bullet",
                                            ordered = "ordered",
                                            ordered_roman = "roman"),
                             loose = !tight,
                             indent.level = indent_lvl,
                             add.line.breaks = FALSE,
                             add.end.of.list = FALSE)
}

#' Format values as verbatim Markdown
#'
#' Converts the given values to a character vector, formatted as [verbatim Markdown](https://pandoc.org/MANUAL.html#verbatim). Character values are additionally
#' wrapped in double quotes. Metadata like [types][base::typeof] or [attributes][base::attributes] of the input is *not* represented in the output, only its
#' *values*.
#'
#' @param ... Values to be formatted. One or more \R objects.
#'
#' @return A character vector.
#' @family md
#' @export
#'
#' @examples
#' list(1L, 2.2, "other") |>
#'   pal::as_md_vals()
#'   
#' # note that values are flattened before further processing them, so this yields the same result
#' list(list(list(1L, list(2.2), list(list("other"))))) |>
#'   pal::as_md_vals()
as_md_vals <- function(...) {
  
  rlang::list2(...) |>
    as_flat_list(is_node = is.list) |>
    purrr::map(\(x) {
      
      # wrap chr vals in double quotes
      if (is.character(x)) x %<>% wrap_chr()
      
      # determine nr of backticks to wrap result in, safely escaping any number of existing backticks
      add_space <- TRUE
      backticks <-
        x |>
        stringr::str_extract_all(pattern = "`+") |>
        purrr::list_c(ptype = character()) |>
        unique() %>%
        magrittr::extract(nchar(.) == safe_max(nchar(.)))
      
      if (length(backticks) == 0L) {
        add_space <- FALSE
        backticks <- "`"
      } else {
        backticks %<>% paste0("`")
      }
      
      paste0(backticks, " "[add_space], x, " "[add_space], backticks)
    }) |>
    purrr::list_c(ptype = character())
}

#' List values as a Markdown list
#'
#' Generates a Markdown list of the given values formatted as [verbatim](https://pandoc.org/MANUAL.html#verbatim). Character values are additionally wrapped in
#' double quotes.
#'
#' @inheritParams as_md_vals
#'
#' @return A character scalar.
#' @family md
#' @export
#'
#' @examples
#' list(1L, 2.2, "other") |>
#'   pal::as_md_val_list() |>
#'   cat()
#'   
#' # note that values are flattened before listing them, so this yields the same list
#' list(list(list(1L, list(2.2), list(list("other"))))) |>
#'   pal::as_md_val_list() |>
#'   cat()
as_md_val_list <- function(...) {
  
  as_md_list(as_md_vals(...))
}

#' Convert dataframe/tibble to Markdown pipe table
#'
#' Convenience wrapper around [`knitr::kable(format = "pipe")`][knitr::kable] to create a [Markdown pipe
#' table](https://pandoc.org/MANUAL.html#extension-pipe_tables).
#' 
#' # Create tables dynamically in roxygen2 documentation
#' 
#' This function can be useful to create tables inside [roxygen2][roxygen2::roxygen2] documentation programmatically from data using
#' [dynamic \R code](https://roxygen2.r-lib.org/articles/rd-formatting.html#dynamic-r-code-1).
#' 
#' For example, the inline code
#' 
#' `` `r mtcars |> head() |> pipe_table()` ``
#'
#' should produce the following table in [roxygen2 7.1.0](https://www.tidyverse.org/blog/2020/03/roxygen2-7-1-0/) and above:
#'
#' `r mtcars |> head() |> pipe_table()`
#'
#' @inherit knitr::kable details
#'
#' @inheritParams knitr::kable
#' @param x Dataframe/tibble/matrix to be converted to a pipe table.
#' @param incl_rownames Whether to include row names or not. A logical scalar or `NULL`. If `NULL`, row names are included if `rownames(x)` is neither `NULL`
#'   nor identical to `seq_len(nrow(x))`.
#' @param strong_colnames Whether or not to highlight column names by formatting them `<strong>` (wrapping them in two asterisks).
#' @param strong_rownames Whether or not to highlight row names by formatting them `<strong>` (wrapping them in two asterisks).
#' @param align Column alignment. Either `NULL` for auto-alignment or a character vector consisting of `'l'` (left), `'c'` (center) and/or `'r'` (right). If
#'   `align = NULL`, numeric columns are right-aligned, and other columns are left-aligned. If `length(align) == 1L`, the string will be expanded to a vector
#'   of individual letters, e.g. `'clc'` becomes `c('c', 'l', 'c')`.
#' @param format_args A list of arguments to be passed to [base::format()] to format table values, e.g. `list(big.mark = ',')`.
#'
#' @return A character vector.
#' @seealso [xfun::md_table()]
#' @family md
#' @export
#'
#' @examples
#' mtcars |> head() |> pal::pipe_table() |> pal::cat_lines()
pipe_table <- function(x,
                       incl_rownames = NULL,
                       strong_colnames = TRUE,
                       strong_rownames = TRUE,
                       align = NULL,
                       label = NULL,
                       digits = getOption("digits"),
                       format_args = list()) {
  
  checkmate::assert_flag(incl_rownames,
                         null.ok = TRUE)
  checkmate::assert_flag(strong_colnames)
  checkmate::assert_flag(strong_rownames)
  rlang::check_installed("knitr",
                         reason = reason_pkg_required())
  
  if (is.null(incl_rownames)) {
    incl_rownames <- !identical(rownames(x), as.character(seq_len(nrow(x))))
  }
  
  # format rownames <strong> if requested and sensible
  if ((is.null(incl_rownames) || incl_rownames) && strong_rownames) {
    # setting rownames on a tibble is deprecated, thus we convert to dataframe
    x %<>% as.data.frame()
    rownames(x) %<>% paste0("**", ., "**")
  }
  
  kable_args <-
    alist(x = x,
          format = "pipe",
          digits = digits,
          row.names = incl_rownames,
          col.names = colnames(x) |> when(strong_colnames ~ paste0("**", ., "**"),
                                          ~ .),
          label = label,
          format.args = format_args) |>
    when(!is.null(align) ~ c(., alist(align = align)),
         ~ .)
  
  do.call(what = knitr::kable,
          args = kable_args)
}

#' Strip Markdown formatting from character vector
#'
#' Removes all Markdown formatting from a character vector.
#'
#' This function relies on [commonmark::markdown_text()] which [supports the CommonMark specification plus the Github
#' extensions](https://github.com/r-lib/commonmark#readme). Unfortunately, [Markdown footnotes](https://pandoc.org/MANUAL.html#footnotes) can't be stripped
#' using `commonmark::markdown_text()`. Therefore a separate option `strip_footnotes` is offered which relies on regular expressions to remove inline footnotes
#' and footnote definitions and references.
#'
#' @param x A character vector to strip Markdown formatting from.
#' @param strip_footnotes Whether to remove Markdown footnotes, too. If `FALSE`, footnotes are canonicalized (to have sequential integer identifiers).
#'
#' @return A character vector of the same length as `x`.
#' @family md
#' @export
#'
#' @examples
#' pal::strip_md(
#'   "A **MD** formatted [string](https://en.wikipedia.org/wiki/String_(computer_science))"
#' )
#'
#' # link references are only removed *iff* the reference is included in `x`:
#' pal::strip_md("[A reference link][refid]\n\n[refid]: https://example.com")
#' pal::strip_md("[A reference link][refid]\n\n_No ref here..._")
strip_md <- function(x,
                     strip_footnotes = TRUE) {
  
  rlang::check_installed("commonmark",
                         version = "1.9.0",
                         reason = reason_pkg_required())
  checkmate::assert_character(x)
  checkmate::assert_flag(strip_footnotes)
  
  purrr::map_chr(x,
                 \(x2) {
                   
                   if (is.na(x2)) {
                     x2
                   } else {
                     
                     result <- commonmark::markdown_text(text = x2,
                                                         footnotes = TRUE,
                                                         extensions = TRUE)
                     if (!isTRUE(endsWith(x2, "\n"))) {
                       result %<>% stringr::str_remove(pattern = "\n$")
                     }
                     
                     if (strip_footnotes) {
                       result %<>% strip_md_footnotes()
                     }
                     
                     result
                   }
                 })
}

#' Strip Markdown footnotes from character vector
#'
#' Removes all Markdown footnotes from a character vector.
#'
#' Note that it is not checked whether footnote references and definitions actually match (by identifier), thus they are removed even if they were invalid.
#'
#' @param x A character vector to strip Markdown footnotes from. Note that elements in `x` are processed as separate Markdown domains, i.e. _not_ as individual
#' lines belonging to the same Markdown document.
#'
#' @return A character vector of the same length as `x`.
#' @family md
#' @export
#'
#' @examples
#' pal::strip_md_footnotes(
#'   "An **MD** formatted string with footnote[^fn].\n\n[^fn]: A note.\n"
#' )
strip_md_footnotes <- function(x) {
  
  checkmate::assert_character(x) |>
    stringr::str_split(pattern = "(\\n{3,}|\\n{2}(?! {4}))") |>
    purrr::map_chr(\(x2) {
      
      # remove footnote defs
      ix_to_rm <-
        x2 |>
        stringr::str_detect("^\\[\\^[^\\s\\]]+\\]: .*") |>
        which()
      
      if (length(ix_to_rm)) {
        x2 <- x2[-ix_to_rm]
      }
      
      # remove inline footnotes and refs
      x2 %<>% stringr::str_remove_all(pattern = "\\[\\^[^\\s\\]]+\\]|\\^\\[.+?\\]")
      
      if (length(x2) > 0L && !all(is.na(x2))) {
        x2 %<>% paste0(collapse = "\n\n")
      }
      
      x2
    })
}

#' Parse (R) Markdown as CommonMark XML tree
#'
#' Parses (R) Markdown file content according to the [CommonMark](https://commonmark.org/) specification and returns it as an XML parse tree.
#'
#' @inheritParams as_line_feed_chr
#' @inheritParams gitlab_document
#' @inheritParams commonmark::markdown_xml
#' @param md (R) Markdown file content as a character scalar.
#' @param hardbreaks Whether or not to treat newlines as hard line breaks.
#' @param strip_xml_ns Whether or not to [remove the default XML namespace][xml2::xml_ns_strip] (`d1`) assigned by [commonmark::markdown_xml()].
#'
#' @return An [`xml_document`][xml2::xml_document-class].
#' @family commonmark
#' @export
#'
#' @examples
#' "# A title
#' 
#' Some prose.
#' 
#' ## A subtitle
#' 
#' More prose.
#' 
#' ## Another subtitle
#' 
#' Out of prose here.
#' 
#' ### A sub-subtitle
#' 
#' I'm dug in.
#' 
#' # Another title
#' 
#' A last word." |> pal::md_to_xml()
md_to_xml <- function(md,
                      smart_punctuation = FALSE,
                      hardbreaks = FALSE,
                      normalize = TRUE,
                      sourcepos = FALSE,
                      footnotes = TRUE,
                      extensions = c("strikethrough", "table", "tasklist"),
                      eol = c("LF", "CRLF", "CR", "LFCR"),
                      strip_xml_ns = TRUE) {
  
  rlang::check_installed("commonmark",
                         reason = reason_pkg_required())
  rlang::check_installed("xml2",
                         reason = reason_pkg_required())
  result <-
    strip_yaml_header(rmd = md,
                      eol = eol) |>
    commonmark::markdown_xml(hardbreaks = hardbreaks,
                             smart = smart_punctuation,
                             normalize = normalize,
                             sourcepos = sourcepos,
                             footnotes = footnotes,
                             extensions = extensions) |>
    xml2::read_xml() |>
    when(strip_xml_ns ~ xml2::xml_ns_strip(.),
         ~ .)
  
  # `xml2::xml_ns_strip()` returns its result invisibly, so we make it visible again
  (result)
}

#' Determine CommonMark XML subnode indices
#'
#' Determines the XML children node indices for every XML node at the highest level of `xml` by interpreting [Markdown heading
#' levels](https://pandoc.org/MANUAL.html#headings) (1–6).
#'
#' [commonmark::markdown_xml()] (and so [md_to_xml()] which builds upon it) **do** parse (R) Markdown file content according to the
#' **[CommonMark](https://commonmark.org/) specification**, but **do not** return any information about the document's **heading hierarchy**.
#' `md_xml_subnode_ix()` fills this gap by giving the hierarchy structure in the form of the XML subnode indices for every node at the highest level of `xml`.
#'
#' @param xml CommonMark parse tree. An [`xml_document`][xml2::xml_document-class], [`xml_nodeset`][xml2::xml_nodeset-class] or
#'   [`xml_node`][xml2::xml_node-class].
#'
#' @return A list of integer vectors of the same length as the number of XML nodes at the highest level of `xml`.
#' @family commonmark
#' @export
#'
#' @examples
#' "# A title
#' 
#' Some prose.
#' 
#' ## A subtitle
#' 
#' More prose.
#' 
#' ## Another subtitle
#' 
#' Out of prose here.
#' 
#' ### A sub-subtitle
#' 
#' I'm dug in.
#' 
#' # Another title
#' 
#' A last word." |>
#'   pal::md_to_xml() |>
#'   pal::md_xml_subnode_ix()
md_xml_subnode_ix <- function(xml) {
  
  assert_class_any(xml,
                   classes = c("xml_document", "xml_nodeset", "xml_node"),
                   name = "xml")
  rlang::check_installed("xml2",
                         reason = reason_pkg_required())
  
  xml_names <- xml2::xml_name(xml)
  
  if (length(xml_names) == 1L && xml_names == "document") {
    xml %<>% xml2::xml_contents()
  }
  
  purrr::map(seq_along(xml),
             \(i) subnode_ix(xml_nodes = xml,
                             i = i))
}

#' Convert from CommonMark XML to (R) Markdown
#'
#' @inheritParams md_xml_subnode_ix
#'
#' @return A character scalar.
#' @family commonmark
#' @export
#'
#' @examples
#' "# A title
#' 
#' Some prose.
#' 
#' ## A subtitle
#' 
#' More prose.
#' 
#' ## Another subtitle
#' 
#' Out of prose here.
#' 
#' ### A sub-subtitle
#' 
#' I'm dug in.
#' 
#' # Another title
#' 
#' A last word." |>
#'   pal::md_to_xml() |>
#'   xml2::xml_contents() |>
#'   pal::xml_to_md() |>
#'   cat()
xml_to_md <- function(xml) {
  
  assert_class_any(xml,
                   classes = c("xml_document", "xml_nodeset", "xml_node"),
                   name = "xml")
  rlang::check_installed("tinkr",
                         reason = reason_pkg_required())
  rlang::check_installed("xml2",
                         reason = reason_pkg_required())
  rlang::check_installed("xslt",
                         reason = reason_pkg_required())
  xml |>
    as.character() |>
    paste0(collapse = "\n") |>
    # trim whitespace in case `xml` was already of type character
    stringr::str_trim() |>
    # add CommonMark XML namespace
    when(stringr::str_detect(string = .,
                             pattern = "^<document[>\\s]") ~ .,
         ~ paste0('<document xmlns="', as.character(tinkr::md_ns()), '">', ., '</document>')) |>
    xml2::read_xml() |>
    # convert XML to CommonMark
    xslt::xml_xslt(stylesheet = xml2::read_xml(tinkr::stylesheet()))
}

#' Build `README.Rmd`
#'
#' A simpler, but considerably faster alternative to [devtools::build_readme()] since it doesn't install your package in a temporary library before building the
#' `README.Rmd`. This has the pleasant side effect that, other than the latter function, it also works for `.Rmd` files which aren't part of an \R package.
#' 
#' Note that for public package repositories, it's recommended to use [devtools::build_readme()] since it ensures the `README.Rmd` can be built _reproducibly_,
#' which means all the objects and files it references must be accessible from the repository.
#' 
#' `r pkgsnip::md_snip("rstudio_addin")`
#'
#' @param input Path to the R Markdown README file to be built. A character scalar.
#' @param output Path to the built Markdown README. A character scalar.
#' @param build_index_md Whether to build a separate [pkgdown][pkgdown::pkgdown]-optimized `pkgdown/index.md` alongside `output` (i.e. in the same parent
#'   directory). If `NULL`, it will only be built if the parent directory of `output` [contains a pkgdown configuration file][is_pkgdown_dir]. Note that it will
#'   be built with the \R option `pal.build_readme.is_pkgdown` set to `TRUE`, allowing for conditional content inclusion in `input` – e.g. via the [code chunk
#'   option](https://yihui.org/knitr/options/#code-evaluation) `eval = isTRUE(getOption("pal.build_readme.is_pkgdown"))`.
#' @param env Environment in which code chunks are to be evaluated, e.g. [base::parent.frame()], [base::new.env()], or [base::globalenv()].
#' @param clean Whether or not to delete intermediate files created by [rmarkdown::render()].
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return The path to `input` as a character scalar, invisibly.
#' @family rmd_knitr
#' @export
build_readme <- function(input = "README.Rmd",
                         output = "README.md",
                         build_index_md = NULL,
                         env = parent.frame(),
                         clean = TRUE,
                         quiet = FALSE) {
  
  checkmate::assert_environment(env)
  checkmate::assert_string(input)
  checkmate::assert_path_for_output(output,
                                    overwrite = TRUE)
  checkmate::assert_flag(build_index_md,
                         null.ok = TRUE)
  checkmate::assert_flag(clean)
  checkmate::assert_flag(quiet)
  rlang::check_installed("knitr",
                         reason = reason_pkg_required())
  rlang::check_installed("rmarkdown",
                         reason = reason_pkg_required())
  # add args to `env`
  rlang::env_bind(.env = env,
                  input = input,
                  output = output,
                  build_index_md = build_index_md)
  
  # add `pkg_metadata` to env
  parent_dir <- fs::path_dir(input) |> fs::path_abs()
  
  if (is_pkg_dir(parent_dir)) {
    rlang::env_bind(.env = env,
                    pkg_metadata = desc_list(parent_dir))
  }
  
  if (!quiet) {
    cli_progress_step_quick("Building {.file {input}}")
  }
  
  # generate `output`
  ## render to the output format specified in the YAML header (defaults to `rmarkdown::md_document`)
  rmarkdown::render(input = input,
                    output_file = output,
                    clean = clean,
                    quiet = TRUE,
                    envir = env)
  
  # generate `index.md` if indicated
  if (!isFALSE(build_index_md)) {
    
    output_dir <- fs::path_dir(output)
    
    if (is_pkgdown_dir(output_dir)) {
      
      index_md_path <-
        fs::path(output_dir, "pkgdown") |>
        fs::dir_create() |>
        fs::path("index.md") |>
        checkmate::assert_path_for_output(overwrite = TRUE) |>
        fs::path_abs()
      
      # clean Rmd file
      rlang::check_installed("brio",
                             reason = reason_pkg_required())
      rlang::check_installed("withr",
                             reason = reason_pkg_required())
      
      tmp_file <- fs::file_temp(pattern = "tmp-pal-build_readme",
                                ext = fs::path_ext(input))
      
      brio::read_file(input) |>
        # remove possible trailing horizontal line in Rmd file since pkgdown always adds one below content
        # TODO: submit PR to pkgdown fixing this?
        stringr::str_replace(pattern = " {0,3}([-\\*_]{3,}|<hr */?>)(\\s*(\\n\\[\\^[\\w-]+\\]:.*\\n?)*$)",
                             replacement = "\\2") |>
        # remove `align` and `height` <img> tags (rely on custom CSS file `pkgdown/extra.css` instead)
        stringr::str_replace_all(pattern = "(<img [^>]+)(align=['\"].*?['\"]\\s*)",
                                 replacement = "\\1") |>
        stringr::str_replace_all(pattern = "(<img [^>]+)(height=['\"].*?['\"]\\s*)",
                                 replacement = "\\1") |>
        brio::write_file(path = tmp_file)
      
      # render `pkgdown/index.md`
      withr::with_options(
        new = list(pal.build_readme.is_pkgdown = TRUE),
        code = rmarkdown::render(input = tmp_file,
                                 output_file = index_md_path,
                                 output_format =
                                   rmarkdown::md_document(variant = "markdown",
                                                          # disable Pandoc extensions in input which rmarkdown only adds for backwards compatibility
                                                          md_extensions = c("-autolink_bare_uris",
                                                                            "-tex_math_single_backslash"),
                                                          pandoc_args = "--columns=9999") |>
                                   # overwrite Pandoc output format (i.a. disable Pandoc's raw attributes to have more control over inline HTML)
                                   purrr::list_modify(pandoc = list(to = "markdown-raw_attribute")),
                                 knit_root_dir = parent_dir,
                                 clean = clean,
                                 quiet = TRUE,
                                 envir = env)
      )
      
      fs::file_delete(tmp_file)
    }
  }
}

#' Determine current knitr table format
#'
#' Determines the current knitr table format based on the \R option
#' [`knitr.table.format`](https://bookdown.org/yihui/rmarkdown-cookbook/kable.html#kable-formats) which can either be set directly to a valid format string or
#' to a function returning one of these strings conditionally.
#'
#' This is basically a convenience wrapper to be able to access the current `knitr.table.format` in a hassle-free way, i.e. it provides the conditional logic to
#' account for the possibility that `knitr.table.format` is set to a function rather than a format string.
#'
#' @param default knitr table format to fall back to when the \R option `knitr.table.format` is not set. One of
#' `r fn_param_defaults(param = "default", fn = knitr_table_format) |> wrap_chr("\x60") |> as_md_list()`
#'   
#' See [knitr::kable()]'s `format` argument for details.
#'
#' @return A character scalar.
#' @family rmd_knitr
#' @export
knitr_table_format <- function(default = c("pipe",
                                           "simple",
                                           "html",
                                           "latex",
                                           "rst")) {
  
  allowed_formats <- eval(formals()$default)
  
  opt <- getOption("knitr.table.format")
  result <- opt %||% rlang::arg_match(default)
  
  if (is.function(result)) result <- result()
  
  if (!(result %in% allowed_formats)) {
    
    cli::cli_abort(paste0("R option {.field knitr.table.format} must evaluate to one of ",
                          enum_str(x = paste0("{.val ", allowed_formats, "}"),
                                   sep2 = " or "),
                          ", but is {.code {deparse1(opt)}}",
                          ifelse(is.function(opt),
                                 " which evaluates to {.val {result}}",
                                 ""),
                          "."))
  }
  
  result
}

#' Strip YAML header from R Markdown
#'
#' Extracts the body from R Markdown file content, stripping a possible [YAML metadata 
#' block](https://bookdown.org/yihui/rmarkdown-cookbook/rmarkdown-anatomy.html#yaml-metadata) at the beginning.
#'
#' Note that for the [R Markdown file format](https://rmarkdown.rstudio.com/), the [YAML metadata 
#' block](https://pandoc.org/MANUAL.html#extension-yaml_metadata_block) must occur at the beginning of the document (and there can be only one). Additional
#' whitespace characters (incl. newlines) before the YAML metadata block are allowed.
#'
#' @inheritParams as_line_feed_chr
#' @param rmd R Markdown file content as a character scalar.
#'
#' @return Body of the R Markdown file as a character vector of lines.
#' @family rmd_knitr
#' @export
#'
#' @examples
#' rmd <- "
#' ---
#' output: pal::gitlab_document
#' ---
#' 
#' # A heading
#' 
#' Some prose.
#' "
#'
#' pal::cat_lines(rmd)
#' 
#' rmd |>
#'   pal::strip_yaml_header() |>
#'   pal::cat_lines()
strip_yaml_header <- function(rmd,
                              eol = c("LF", "CRLF", "CR", "LFCR")) {
  
  checkmate::assert_string(rmd)
  
  has_yaml <- stringr::str_detect(string = rmd,
                                  pattern = "^(\\n\\s*)?---\\s*\\n.*(---|...)\\s*\\n")
  eol %<>% as_line_feed_chr()
  rmd %<>% stringr::str_split_1(pattern = eol)
  last_yaml_line_nr <- 0L
  
  if (has_yaml) {
    
    last_yaml_line_nr <-
      rmd |>
      stringr::str_locate("^---\\s*$") |>
      tibble::as_tibble() |>
      tibble::rowid_to_column() |>
      dplyr::filter(!dplyr::if_any(.cols = everything(),
                                   .fns = is.na)) |>
      dplyr::pull("rowid") |>
      purrr::chuck(2L) |>
      min(rmd |>
            stringr::str_locate("^\\.{3}\\s*$") |>
            tibble::as_tibble() |>
            tibble::rowid_to_column() |>
            dplyr::filter(!dplyr::if_any(.cols = everything(),
                                         .fns = is.na)) |>
            dplyr::pull("rowid") |>
            purrr::pluck(1L),
          na.rm = TRUE)
  }
  
  rmd[(last_yaml_line_nr + 1L):length(rmd)]
}

#' Convert to GitLab Flavored Markdown
#'
#' Format for converting from R Markdown to [GitLab Flavored Markdown](https://gitlab.com/help/user/markdown.md).
#'
#' This is the GitLab equivalent to the [`github_document`][rmarkdown::github_document] R Markdown [output
#' format](https://bookdown.org/yihui/rmarkdown/output-formats.html). It basically ensures Pandoc is called with a custom set of options optimized for maximum
#' compatibility with [GitLab Flavored Markdown](https://gitlab.com/help/user/markdown.md).
#'
#' ## Caveats regarding GitLab-Flavored-Markdown-specific features
#'
#' GitLab Flavored Markdown extends the [CommonMark](https://spec.commonmark.org/current/) Markdown specification with a bunch of
#' [special features](https://gitlab.com/help/user/markdown.md#gfm-extends-standard-markdown). To be able to properly make use of them, observe the following
#' points:
#'
#' - For [inline diffs](https://gitlab.com/help/user/markdown.md#inline-diff), only use curly braces (`{}`), not square brackets (`[]`). The latter will be
#'   escaped by Pandoc during conversion and thus not recognized by GitLab as starting/ending an inline diff.
#'
#' - You have to set `smart_punctuation = FALSE` in order to leave certain
#'   [special GitLab references](https://gitlab.com/help/user/markdown.md#special-gitlab-references) (like commit range comparisons) untouched for GitLab to
#'   interpret them correctly.
#'
#'   All the special GitLab references for snippets and labels that start with a tilde (`~`) or a dollar sign (`$`) won't work because these characters will be
#'   escaped by Pandoc during conversion.
#'
#' - The `[[_TOC_]]` tag to let GitLab [generate a table of contents](https://gitlab.com/help/user/markdown.md#table-of-contents) won't work because it will be
#'   escaped by Pandoc during conversion. You can let Pandoc generate the TOC instead by setting `toc = TRUE`.
#'
#' - [Multiline blockquotes](https://gitlab.com/help/user/markdown.md#multiline-blockquote) won't work because the fence delimiters `>>>` will be escaped by
#'   Pandoc during conversion.
#'
#' @inheritParams rmarkdown::output_format
#' @inheritParams rmarkdown::md_document
#' @param smart_punctuation Whether or not to enable [Pandoc's `smart` extension](https://pandoc.org/MANUAL.html#extension-smart) which converts straight quotes
#'   to curly quotes, `---` to an em-dash (—), `--` to an en-dash (–), and `...` to ellipses (…). It also replaces regular spaces after certain abbreviations
#'   such as `Mr.` with [non-breaking spaces](https://en.wikipedia.org/wiki/Non-breaking_space).
#' @param parse_emoji_markup Whether to enable [Pandoc's `emoji` extension](https://pandoc.org/MANUAL.html#extension-emoji) which parses emoji markup (e.g.
#'   `:smile:`) as Unicode emoticons.
#' @param toc Include a table of contents (TOC) [automatically generated by Pandoc](https://pandoc.org/MANUAL.html#option--toc). Note that the TOC will be
#'   placed _before_ the README's body, meaning also _before_ the first Markdown header.
#' @param autolink_bare_uris Enable the [`autolink_bare_uris` Pandoc Markdown extension](https://pandoc.org/MANUAL.html#extension-autolink_bare_uris) which
#'   makes all absolute URIs into links, even when not surrounded by pointy braces `<...>`.
#' @param tex_math_single_backslash Enable the
#'   [`tex_math_single_backslash` Pandoc Markdown extension](https://pandoc.org/MANUAL.html#extension-tex_math_single_backslash) which causes anything between
#'   `\(` and `\)` to be interpreted as inline TeX math, and anything between `\[` and `\]` to be interpreted as display TeX math. Note: a drawback of this
#'   extension is that it precludes escaping `(` and `[`.
#'
#' @return R Markdown output format intended to be fed to [rmarkdown::render()].
#' @family rmd_format
#' @export
#'
#' @examples
#' \donttest{
#' tmp_file <- fs::file_temp()
#' download.file(url = paste0("https://gitlab.com/rpkg.dev/pal/-/raw/",
#'                            "77b791b9a5e80b2980a67d4ed7dc06553c4450ca/",
#'                            "README.Rmd"),
#'               destfile = tmp_file,
#'               quiet = TRUE,
#'               mode = "wb")
#'
#' rmarkdown::render(input = tmp_file,
#'                   output_format = pal::gitlab_document(),
#'                   quiet = TRUE) |>
#'   brio::read_lines() |>
#'   pal::cat_lines()}
gitlab_document <- function(smart_punctuation = TRUE,
                            parse_emoji_markup = FALSE,
                            df_print = "kable",
                            toc = FALSE,
                            toc_depth = 6L,
                            fig_width = 7L,
                            fig_height = 5L,
                            dev = "png",
                            preserve_yaml = FALSE,
                            autolink_bare_uris = FALSE,
                            tex_math_single_backslash = FALSE) {
  
  checkmate::assert_flag(smart_punctuation)
  checkmate::assert_flag(parse_emoji_markup)
  checkmate::assert_flag(toc)
  checkmate::assert_int(toc_depth,
                        lower = 1L,
                        upper = 6L)
  checkmate::assert_flag(preserve_yaml)
  checkmate::assert_flag(autolink_bare_uris)
  checkmate::assert_flag(tex_math_single_backslash)
  rlang::check_installed("rmarkdown",
                         reason = reason_pkg_required())
  
  # create rmd output format
  rmarkdown::output_format(
    knitr = rmarkdown::knitr_options_html(fig_width = fig_width,
                                          fig_height = fig_height,
                                          fig_retina = NULL,
                                          keep_md = FALSE,
                                          dev = dev),
    pandoc = rmarkdown::pandoc_options(to =
                                         c("markdown",
                                           "+emoji"[parse_emoji_markup],
                                           "-smart",
                                           "-simple_tables",
                                           "-multiline_tables",
                                           "-grid_tables",
                                           "-header_attributes",
                                           "-fenced_code_attributes",
                                           "-inline_code_attributes",
                                           "-link_attributes",
                                           "-raw_attribute",
                                           "-pandoc_title_block",
                                           "-yaml_metadata_block"[!preserve_yaml]) |>
                                         paste0(collapse = ""),
                                       from =
                                         c("markdown",
                                           "+autolink_bare_uris"[autolink_bare_uris],
                                           "+tex_math_single_backslash"[tex_math_single_backslash],
                                           "-smart"[!smart_punctuation]) |>
                                         paste0(collapse = ""),
                                       args = c("--columns=9999",
                                                "--standalone",
                                                "--table-of-contents"[toc],
                                                paste0("--toc-depth=", toc_depth)[toc])),
    df_print = df_print,
    pre_knit = NULL,
    post_knit = NULL,
    pre_processor = NULL,
    intermediates_generator = NULL,
    post_processor = NULL,
    on_exit = NULL,
    base_format = NULL
  )
}

#' Write HTML widget to file
#'
#' Writes an HTML widget's main container `<div>` and data `<script>` tags to file.
#' 
#' Other than [htmlwidgets::saveWidget()], this function does *not* write a complete HTML document but only the widget's core tags. Hence it is intended to
#' create files which are in turn included in another HTML file, e.g. via a static site generator. The HTML widget's dependencies must be handled separately,
#' e.g. using [write_widget_deps()].
#'
#' To directly get the HTML widget's core tags as a character scalar, use `as.character(htmltools::tagList(x))`.
#'
#' @param x HTML widget object.
#' @param path File path to write the HTML widget's core tags to.
#'
#' @return The content written to `path` as a character scalar, invisibly.
#' @family htmlwidget
#' @export
write_widget <- function(x,
                         path) {
  
  checkmate::assert_class(x,
                          classes = "htmlwidget")
  checkmate::assert_path_for_output(path,
                                    overwrite = TRUE)
  rlang::check_installed("brio",
                         reason = reason_pkg_required())
  rlang::check_installed("htmltools",
                         reason = reason_pkg_required())
  x |>
    htmltools::tagList() |>
    as.character() |>
    brio::write_lines(path = path)
}

#' Write HTML widget dependencies
#'
#' Writes an HTML widget's JS and CSS dependencies to files and returns a snippet for their inclusion in the `<head>` of an HTML document.
#'
#' To write the HTML widget's core tags to file, use [write_widget()].
#'
#' @inheritParams write_widget
#' @param path Directory path to write the HTML widget's dependencies to. A subfolder will be created for each dependency.
#' @param path_base Base path to determine the relative root of `path` for inclusion of the widget's dependencies. Must be a parent of or equal to `path`.
#' @param path_head_snippet Optional file path to write the snippet for inclusion of the widget's dependencies in the `<head>` of an HTML document to.
#' @param quiet Whether or not to print the snippet for inclusion of the widget's dependencies to console.
#'
#' @return An [HTML][htmltools::HTML] object suitable for inclusion in the `<head>` of an HTML document, invisibly.
#' @family htmlwidget
#' @export
write_widget_deps <- function(x,
                              path,
                              path_base,
                              path_head_snippet = NULL,
                              quiet = !is.null(path_head_snippet)) {
  
  checkmate::assert_class(x,
                          classes = "htmlwidget")
  checkmate::assert_directory_exists(path,
                                     access = "w")
  if (!fs::path_has_parent(path = path,
                           parent = path_base)) {
    cli::cli_abort("{.arg path_base} is not a parent of (or equal to) {.arg path}.")
  }
  if (!is.null(path_head_snippet)) {
    checkmate::assert_path_for_output(path_head_snippet,
                                      overwrite = TRUE)
  }
  checkmate::assert_flag(quiet)
  rlang::check_installed("brio",
                         reason = reason_pkg_required())
  rlang::check_installed("htmltools",
                         reason = reason_pkg_required())
  deps <-
    htmltools::findDependencies(x) |>
    purrr::map(\(x) {
      htmltools::copyDependencyToDir(dependency = x,
                                     outputDir = path) |>
        htmltools::makeDependencyRelative(basepath = path_base)
    })
  
  head_snippet <-
    htmltools::renderDependencies(deps) |>
    # manually add the root slash
    stringr::str_replace_all(pattern = "(\\s(src|href)\\s*=\\s*[\"'])([^/])",
                             replacement = "\\1/\\3") |>
    htmltools::HTML()
  
  if (!is.null(path_head_snippet)) {
    brio::write_lines(text = head_snippet,
                      path = path_head_snippet)
  }
  
  if (!quiet) {
    cli::cli_alert_info(text = "Include the following HTML snippet in the {.field <head>} of your document:")
    cli::cli_par()
    cli::cli_end()
    cli::cli_code(lines = head_snippet)
  }
  
  invisible(head_snippet)
}

#' Assert object is member of any class
#'
#' Asserts that an object is member of any of the specified classes.
#'
#' In contrast to [checkmate::assert_class()], this function returns `TRUE` as long as `x` is at least member of *one* of `classes`.
#'
#' @param x \R object to test.
#' @param classes Class names to check for inheritance. A character vector.
#' @param name Name of the checked object to print in error message in case the assertion fails. A character scalar.
#'
#' @return `x`, invisibly.
#' @family checkmate
#' @export
#'
#' @examples
#' xml2::read_html("https://pal.rpkg.dev/dev/license") |>
#'   assert_class_any(classes = c("xml_document", "xml_nodeset", "xml_node"))
assert_class_any <- function(x,
                             classes,
                             name = "x") {
  
  checkmate::assert_character(classes,
                              any.missing = FALSE)
  if (!inherits(x = x,
                what = classes)) {
    
    checkmate::assert_string(name)
    classes_actual <- class(x)
    cli::cli_abort(paste0("{.arg {name}} must {cli::qty(classes)} be {?of class/member of any of the classes} ",
                          classes %>% paste0("{.val ", ., "}") |> enum_str(sep2 = " or "),
                          ", but is {cli::qty(classes_actual)} of class{?es} {.val {classes_actual}}."))
  }
  
  invisible(x)
}

#' Assert object is data frame or tibble (extension)
#'
#' Asserts that an object is `pkgsnip::param_lbl(id = "df_or_tibble", as_sentence = FALSE)`.
#'
#' @inheritParams assert_class_any
#'
#' @inherit assert_class_any return
#' @family checkmate
#' @export
#'
#' @examples
#' tibble::tibble() |> pal::assert_df_or_tibble()
#' 
#' try(
#'   matrix() |> pal::assert_df_or_tibble()
#' )
assert_df_or_tibble <- function(x,
                                name = "x") {
  
  assert_class_any(x = x,
                   classes = c("data.frame",
                               "tbl",
                               "tbl_dbi",
                               "tbl_df",
                               "tbl_lazy",
                               "tbl_sql"),
                   name = name)
}

#' Assert count or `Inf`
#'
#' Asserts that `x` is either a [count][checkmate::assert_count] or [positive infinity][is.infinite] (`Inf`).
#'
#' @param x Object to check.
#' @param ... Further arguments passed on to [checkmate::assert_count()].
#'
#' @return `x`, invisibly.
#' @family checkmate
#' @export
#'
#' @examples
#' pal::assert_inf_count(1001L)
#' pal::assert_inf_count(1)
#' pal::assert_inf_count(0.0)
#' pal::assert_inf_count(Inf)
#' 
#' try(
#'   pal::assert_inf_count(-1)
#' )
#' try(
#'   pal::assert_inf_count(1.5)
#' )
#' try(
#'   pal::assert_inf_count(-Inf)
#' )
assert_inf_count <- function(x,
                             ...) {
  
  if (isTRUE(is.infinite(x))) {
    
    if (x < 0L) {
      cli::cli_abort("{.arg x} must either be a single integerish value or positive infinity ({.code Inf}).")
    }
    
    return(invisible(x))
    
  } else {
    
    return(checkmate::assert_count(x = x,
                                   ...))
  }
}

#' [cli](https://cli.r-lib.org/) pluralization helpers for booleans
#'
#' Equivalents to [cli::qty()] and [cli::no()] for a logical input.
#' 
#' If `cnd` evaluates to `TRUE`, the resulting cli quantity is `1`, otherwise `0`. See cli's [pluralization
#' rules](https://cli.r-lib.org/articles/pluralization.html#pluralization-markup-1) for details about how these quantities are interpreted.
#'
#' @param cnd Condition. A logical scalar.
#'
#' @return `0L` or `1L` with the additional class `cli_noprint`.
#' @family cli
#' @export
#'
#' @examples
#' cnd <- runif(1L) < 0.5
#' 
#' cli::pluralize(paste0(
#'   "{pal::cli_qty_lgl(cnd)}I think this function ",
#'   "{?comes in handy/is not worth a second of my attention}. Having looked at the rest of the ",
#'   "package, this {?is quite surprising/does not come as a surprise}."
#' ))
#' 
#' cli::pluralize("This function is worth exactly {pal::cli_no_lgl(cnd)} second of my time.")
cli_qty_lgl <- function(cnd) {
  
  checkmate::assert_flag(cnd)
  
  cnd %<>% as.integer()
  class(cnd) <- "cli_noprint"
  
  cnd
}

#' @rdname cli_qty_lgl
#' @export
cli_no_lgl <- function(cnd) {
  
  checkmate::assert_flag(cnd)
  
  cnd %<>% as.integer()
  class(cnd) <- "cli_no"
  
  cnd
}

#' Quick [cli](https://cli.r-lib.org/) simplified progress message
#'
#' Version of [cli::cli_progress_step()] that for the messages on (un)successful termination simply appends **done**/**failed** to `msg`.
#'
#' @inheritParams cli::cli_progress_bar
#' @inheritParams cli::cli_progress_step
#'
#' @return The cli progress bar `id` as a character scalar, invisibly.
#' @family cli
#' @export
#'
#' @examples
#' {
#'   pal::cli_progress_step_quick(msg = "Doing my thing")
#'   Sys.sleep(1)
#'   cli::cli_progress_done()
#' }
cli_progress_step_quick <- function(msg,
                                    spinner = FALSE,
                                    class = if (!spinner) ".alert-info",
                                    current = TRUE,
                                    .auto_close = TRUE,
                                    .envir = parent.frame()) {
  checkmate::assert_string(msg)
  msg %<>% paste0("\u2026")
  
  cli::cli_progress_step(msg = msg,
                         msg_done = paste(msg, "done"),
                         msg_failed = paste(msg, "failed"),
                         spinner = spinner,
                         class = class,
                         current = current,
                         .auto_close = .auto_close,
                         .envir = .envir)
}

#' Evaluate an expression with [cli](https://cli.r-lib.org/) process indication
#'
#' @description
#'
#' `r lifecycle::badge("superseded")` \cr
#' This function is superseded in favor of [cli_progress_step_quick()] and the underlying `cli::cli_progress_*` family of functions which are more powerful and
#' versatile than the `cli::cli_process_*` family on which `cli_process_expr()` is built.
#'
#' @inheritParams cli::cli_process_start
#' @param expr An expression to be evaluated.
#' @param env Default environment to evaluate `expr`, as well as possible [glue][glue::glue] expressions within `msg`, in.
#'
#' @return The result of the evaluated `expr`, invisibly.
#' @family cli
#' @export
#'
#' @examples
#' if (interactive()) {
#'   pal::cli_process_expr(Sys.sleep(3L), "Zzzz")
#' }
#'
#' russian_roulette <- function() {
#'   msg <- "Spinning the cylinder \U0001F91E … "
#'   pal::cli_process_expr(msg = msg,
#'                         msg_done = paste0(msg, "and pulling the trigger – lucky again. \U0001F60C"),
#'                         msg_failed = paste0(msg, "and pulling the trigger – head blast!"),
#'                         expr = {
#'                           if (interactive()) Sys.sleep(1)
#'                           if (runif(1L) < 0.4) stop("\U0001F92F\u2620")
#'                         })
#' }
#' 
#' set.seed(321)
#' russian_roulette()
#' set.seed(123)
#' try(russian_roulette())
cli_process_expr <- function(expr,
                             msg,
                             msg_done = paste(msg, "... done"),
                             msg_failed = paste(msg, "... failed"),
                             msg_class = "alert-info",
                             done_class = "alert-success",
                             failed_class = "alert-danger",
                             env = parent.frame()) {
  checkmate::assert_string(msg,
                           # NOTE: This is necessary since `cli::cli_process_start(msg = "")` throws an error
                           min.chars = 1L)
  checkmate::assert_string(msg_done)
  checkmate::assert_string(msg_failed)
  checkmate::assert_string(msg_class)
  checkmate::assert_string(done_class)
  checkmate::assert_string(failed_class)
  checkmate::assert_environment(env)
  
  # NOTE: We cannot rely on `on_exit = "done"` since in case of an error the on-exit code of this function won't reach execution because we throw the error
  #       using `rlang::cnd_signal(.x)` first.
  status_bar_container_id <- cli::cli_process_start(msg = msg,
                                                    msg_done = msg_done,
                                                    msg_failed = msg_failed,
                                                    msg_class = msg_class,
                                                    done_class = done_class,
                                                    failed_class = failed_class,
                                                    .envir = env)
  
  result <- tryCatch(expr = rlang::eval_tidy(expr = {{ expr }},
                                             env = env),
                     error = \(x) {
                       cli::cli_process_failed(status_bar_container_id)
                       rlang::cnd_signal(x)
                     })
  
  cli::cli_process_done(status_bar_container_id)
  
  invisible(result)
}

#' Get Git file modification time
#'
#' Determine the time a file in a Git repository was last modified.
#'
#' Note that only *committed* changes to the file are regarded. The modification is returned in [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time).
#'
#' @param path Path to a file, relative to the Git repository root.
#' @param repo Path to a Git repository.
#'
#' @return `r pkgsnip::return_lbl("datetime")`
#' @family git
#' @export
#'
#' @examples
#' \dontrun{
#' pal::git_file_mod_time(path = "README.md")}
git_file_mod_time <- function(path,
                              repo = ".") {
  
  rlang::check_installed("git2r",
                         reason = reason_pkg_required())
  
  # make path relative to `repo` if necessary to please `git2r::blame()`
  if (fs::is_absolute_path(path)) {
    
    path <-
      fs::path_abs(repo) |>
      c(path) |>
      fs::path_common() |>
      fs::path_rel(path = path)
  }
  
  git2r::blame(repo = repo,
               path = path)$hunks |>
    purrr::map(\(x) as.POSIXct(x$final_signature$when)) |>
    purrr::list_c(ptype = vctrs::new_datetime()) |>
    safe_max()
}

#' Get Git remote tree URL
#'
#' Determines the base Git tree URL to the current branch's upstream remote of `repo`.
#' 
#' This function is useful to assemble URLs to files and folders in your repo's Git forge (GitHub, GitLab, etc.).
#'
#' @inheritParams gert::git_remote_list
#' @param remote Name of the Git remote to determine the URL for. Defaults to `"origin"` if present, otherwise the first remote listed by
#'   [gert::git_remote_list()].
#'
#' @return If `repo` is a Git repository, a character scalar. Otherwise, a character vector of length zero.
#' @family git
#' @export
#'
#' @examples
#' pal::git_remote_tree_url() |>
#'   paste0("Rmd/pal.Rmd") |>
#'   browseURL()
git_remote_tree_url <- function(repo = ".",
                                remote = NULL) {
  
  checkmate::assert_string(remote,
                           null.ok = TRUE)
  rlang::check_installed("gert",
                         reason = reason_pkg_required())
  
  remotes <- try(gert::git_remote_list(repo = repo))
  
  # return empty chr if no remotes found
  if (!is.data.frame(remotes)) {
    return(character())
  }
  
  # if multiple remotes present and no explicit `remote` provided, take "origin" if present, otherwise the first one
  remote <- remote %||% ifelse(nrow(remotes) > 1L && "origin" %in% remotes$name,
                               "origin",
                               remotes$name[1L])
  url <-
    gert::git_remote_info(repo = repo,
                          remote = remote) |>
    _$url
  
  # convert SSH address to URL
  if (startsWith(url, "git@")) {
    url %<>% stringr::str_replace_all(pattern = c("(git@[\\w\\.]{1,}\\.[a-z]{2,}):" = "\\1/",
                                                  "^git@" = "https://",
                                                  "\\.git$" = ""))
  }
  
  add_slash_minus <- stringr::str_detect(url, stringr::fixed("gitlab"))
  url %<>% paste0("/-"[add_slash_minus], "/tree/", gert::git_branch(), "/")

  url
}

#' Test if an HTTP request is successful
#'
#' @description
#' Convenience wrapper around a bunch of [httr2][httr2::httr2-package] functions that returns
#'
#' - `TRUE` if the specified `url` could be resolved _and_ a [`HEAD`](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Request_methods) request could
#'   be [successfully completed](https://en.wikipedia.org/wiki/List_of_HTTP_status_codes), or
#'
#' - `FALSE` in any other case.
#'
#' @details
#' This function is similar to [RCurl::url.exists()], i.e. it only retrieves the header, no body, but is based on [httr2][httr2::httr2-package] which in turn is
#' based on [curl](https://jeroen.cran.dev/curl/).
#'
#' For checks on lower levels of the network stack like performing DNS queries or TCP port pings, see the [pingr](https://r-lib.github.io/pingr/) package.
#'
#' @inheritParams httr2::req_retry
#' @inheritParams httr2::req_perform
#' @param url HTTP protocol address. The scheme is optional, so both `"google.com"` and `"https://google.com"` will work. A character scalar.
#' @param max_tries `r pkgsnip::param_lbl("max_tries")`
#'
#' @return A logical scalar.
#' @family http
#' @export
#'
#' @examples
#' pal::is_http_success("goo.gl")
#' pal::is_http_success("https://google.com/")
#' pal::is_http_success("https://google.not/")
#' 
#' # by default, requests are only retried on HTTP 429 and 503 status codes
#' pal::is_http_success(url = "https://httpstat.us/503",
#'                      max_tries = 2,
#'                      verbosity = 1)
#' pal::is_http_success(url = "https://httpstat.us/500",
#'                      max_tries = 2,
#'                      verbosity = 1)
#'
#' # to retry on *all* failing status codes, set `is_transient` accordingly:
#' pal::is_http_success(url = "https://httpstat.us/500",
#'                      max_tries = 2,
#'                      is_transient = \(x) TRUE,
#'                      verbosity = 1)
is_http_success <- function(url,
                            max_tries = 1L,
                            retry_on_failure = FALSE,
                            is_transient = NULL,
                            verbosity = NULL) {
  
  rlang::check_installed("httr2",
                         reason = reason_pkg_required())
  tryCatch(expr =
             httr2::request(base_url = url) |>
             httr2::req_method(method = "HEAD") |>
             httr2::req_retry(max_tries = max_tries,
                              retry_on_failure = retry_on_failure,
                              is_transient = is_transient) |>
             httr2::req_perform(verbosity = verbosity) |>
             httr2::resp_is_error() |>
             magrittr::not(),
           error = \(x) FALSE,
           interrupt = \(x) cli::cli_abort("Terminated by the user."))
}

#' Test if URL
#'
#' Tests whether the elements of a character vector are [Uniform Resource Locators](https://de.wikipedia.org/wiki/Uniform_Resource_Locator) (URLs).
#'
#' This function is based on [xml2::url_parse()] and simply checks whether the elements in `x` contain both a **scheme** as well as *some* **scheme-specific
#' part** (excl. ports). No further checks are performed, so it's explicitly not verified that a URL actually conforms to its respective scheme.
#'
#' @param x Character vector to test.
#'
#' @return A logical vector of the same length as `x`.
#' @family http
#' @export
#'
#' @examples
#' pal::is_url(c("/some/path",
#'               "file:///some/path"))
is_url <- function(x) {
  
  checkmate::assert_character(x,
                              any.missing = FALSE)
  rlang::check_installed("xml2",
                         reason = reason_pkg_required())
  
  parsed <- xml2::url_parse(x)
  has_scheme <- nchar(parsed$scheme) > 0L
  has_specifics <- purrr::pmap_lgl(parsed,
                                   # NOTE: we don't consider col `port`
                                   ~ any(nchar(c(..2, ..4, ..5, ..6, ..7)) > 0L))
  has_scheme & has_specifics
}

#' Perform a cached HTTP GET request
#'
#' Convenience wrapper around a bunch of [httr2][httr2::httr2-package] functions.
#'
#' @inheritParams is_http_success
#'
#' @inherit httr2::req_perform return
#' @family http
#' @export
http_get_cached <- function(url,
                            max_tries = 3L) {
  
  rlang::check_installed("httr2",
                         reason = reason_pkg_required())
  
  httr2::request(base_url = url) |>
    httr2::req_method(method = "GET") |>
    httr2::req_cache(path = fs::path(tools::R_user_dir(package = "httr2",
                                                       which = "cache"),
                                     "req_cache")) |>
    httr2::req_retry(max_tries = max_tries) |>
    httr2::req_perform()
}

#' Read in and parse TOML file as strict list
#'
#' Reads in a file in [Tom's Obvious Minimal Language (TOML)](https://toml.io/) format and returns its content as a (nested) [strict list][xfun::strict_list].
#'
#' The file is parsed using [`RcppTOML::parseTOML(escape = FALSE)`][RcppTOML::parseTOML].
#'
#' @inheritParams RcppTOML::parseTOML
#' @param input If `from_file = TRUE`, the path to a TOML file as a character scalar. Otherwise, TOML content as a character vector.
#' @param from_file Whether `input` is the path to a TOML file or already a character vector of TOML content.
#'
#' @return `r pkgsnip::return_lbl("strict_list")`
#' @family toml
#' @export
toml_read <- function(input,
                      from_file = TRUE,
                      verbose = FALSE) {
  
  checkmate::assert_flag(from_file)
  checkmate::assert_flag(verbose)
  rlang::check_installed("RcppTOML",
                         reason = reason_pkg_required())
  rlang::check_installed("xfun",
                         reason = reason_pkg_required())
  if (from_file) {
    checkmate::assert_file_exists(input,
                                  access = "r")
  } else {
    checkmate::assert_character(input)
    # reduce to character scalar since `RcppTOML::parseTOML()` doesn't accept vectors
    input %<>% paste0(collapse = "\n")
  }
  
  xfun::as_strict_list(RcppTOML::parseTOML(input = input,
                                           verbose = verbose,
                                           fromFile = from_file,
                                           escape = FALSE))
}

#' Validate TOML
#'
#' Validates a TOML file or character vector using the external [Taplo CLI](https://taplo.tamasfe.dev/cli/introduction.html), optionally against a [JSON
#' Schema](https://json-schema.org/) ([Draft 4](https://json-schema.org/specification-links.html#draft-4)).
#'
#' The highest supported JSON Schema specification is [Draft 4](https://json-schema.org/specification-links.html#draft-4). This is a [limitation of
#' the underlying tool Taplo](https://taplo.tamasfe.dev/configuration/developing-schemas.html).
#'
#' Taplo allows to define the schema to be used directly in the TOML file using the [`schema` header 
#' directive](https://taplo.tamasfe.dev/configuration/directives.html#the-schema-directive). Note that the `schema` argument has precendence unless set to
#' `NULL` (the default).
#'
#' # Why JSON Schema-based validation
#'
#' Although there are two noteworthy attempts at introducing a native validation format for TOML, [TOLS](https://github.com/toml-lang/toml/pull/116/) and [TOML
#' Schema](https://github.com/toml-lang/toml/issues/792), neither of them has been officially adopted yet. As it appears, it could take several more years to
#' decades until such thing happens, if ever.
#'
#' In the meantime, we can use the JSON Schema format as an alternative thanks to an awesome implementation by the [Taplo
#' CLI](https://taplo.tamasfe.dev/cli/introduction.html), which itself is written in the Rust programming language and [available as a single-binary program for
#' all common platforms](https://taplo.tamasfe.dev/cli/installation/binary.html).
#'
#' @inheritParams toml_read
#' @param schema URL to a [JSON Schema](https://json-schema.org/) ([Draft 4](https://json-schema.org/specification-links.html#draft-4)) file to validate `input`
#'   against. Can also be a local filesystem path specified in the [file URI scheme](https://en.wikipedia.org/wiki/File_URI_scheme) (absolute path prefixed with
#'   `file://`). If `NULL`, no schema-based validation is performed and `input` is only checked to be TOML-compliant.
#' @param top_errors_only Whether to reduce the output to the top error message of each of TOML CLI's error classes. If `FALSE`, TOML CLI's complete error
#'   output is shown.
#'
#' @return If the validation is successful, `input` invisibly. Otherwise an error is thrown.
#' @family toml
#' @export
#'
#' @examples
#' try(
#'   pal::toml_validate(input = "key = tru",
#'                      from_file = FALSE)
#' )
toml_validate <- function(input,
                          from_file = TRUE,
                          schema = NULL,
                          top_errors_only = TRUE) {
  
  checkmate::assert_flag(from_file)
  checkmate::assert_flag(top_errors_only)
  checkmate::assert_string(schema,
                           null.ok = TRUE)
  
  if (!is.null(schema) && !is_url(schema)) {
    cli::cli_abort(paste0("{.arg schema} must be a valid URL. To refer to a (local) filesystem path, prepend it with the {.href [`file://` URI ",
                          "scheme](https://en.wikipedia.org/wiki/File_URI_scheme)}."))
  }
  
  if (from_file) {
    checkmate::assert_file_exists(input,
                                  access = "r")
  } else {
    checkmate::assert_character(input)
  }
  
  assert_cli(cmd = "taplo",
             error_msg = paste0("The {.strong taplo} executable is required but couldn't be found on system's {.href ",
                                "[PATH](https://en.wikipedia.org/wiki/PATH_(variable))}. Binaries of the {.href [Taplo ",
                                "CLI](https://taplo.tamasfe.dev/cli/introduction.html)} are available for all common platforms from here: ",
                                "{.url https://taplo.tamasfe.dev/cli/installation/binary.html}"))
  
  result <- suppressWarnings(system2(command = "taplo",
                                     args = c("lint",
                                              "--no-auto-config",
                                              "--colors=always",
                                              paste0("--schema=", schema)[!is.null(schema)],
                                              ifelse(from_file,
                                                     input,
                                                     "-")),
                                     stdout = TRUE,
                                     stderr = TRUE,
                                     input = if (from_file) NULL else input,
                                     # cf. https://taplo.tamasfe.dev/cli/usage/configuration.html
                                     env = "RUST_LOG=error",
                                     timeout = 5L))
  
  result_excl_ansi <- cli::ansi_strip(result)
  
  if (length(result) > 1L && stringr::str_detect(string = result_excl_ansi[1L],
                                                 pattern = "(?i)error")) {
    
    # only retain first Taplo error of each class (later ones are usually unhelpful)
    ix_error <-
      result_excl_ansi |>
      stringr::str_detect(pattern = "^(?i)error:") |>
      which()
    
    ix_error_begin <-
      result_excl_ansi |>
      stringr::str_detect(pattern = "^(?i)error:") |>
      magrittr::and(!duplicated(result_excl_ansi)) |>
      which()
    
    if (top_errors_only && length(ix_error) > 0L) {
      
      i_error_end <-
        result_excl_ansi |>
        stringr::str_detect(pattern = "^ERROR ") |>
        which() |>
        dplyr::first()
      
      ix_final <-
        ix_error_begin |>
        purrr::map(\(x) {
          
          i_error_current <- which(ix_error == x)
          has_next <- i_error_current < length(ix_error)
          
          seq(from = x,
              to = ifelse(has_next,
                          ix_error[i_error_current + 1L] - 1L,
                          i_error_end - 1L))
        }) |>
        purrr::list_c(ptype = integer())
      
    } else {
      ix_final <- seq_along(result)
    }
    
    # we avoid `cli::cli_abort()` for now since it always strips consecutive whitespaces, even non-breaking ones
    # cf. https://github.com/r-lib/cli/issues/531#issuecomment-1292639286
    rlang::abort(message = c(ifelse(from_file,
                                    cli::format_inline("Validation of {.file {input}} failed with"),
                                    "TOML validation failed with:"),
                             # turning it into a named vctr avoids the default bullets
                             " " = "",
                             result[ix_final],
                             ""),
                 use_cli_format = FALSE)
  }
  
  invisible(input)
}

#' Check if CLI tool is available
#'
#' Tests whether a [command-line interface (CLI)](https://en.wikipedia.org/wiki/Command-line_interface) tool is found on the system's
#' [`PATH`](https://en.wikipedia.org/wiki/PATH_(variable)) and optionally returns the executable's filesystem path.
#'
#' @param cmd System command to invoke the CLI tool. A character scalar.
#' @param get_cmd_path Whether or not to return the filesystem path to the CLI tool. If `FALSE`, a boolean is returned indicating if the CLI tool is found on
#'   the system or not.
#' @param force_which If set to `TRUE`, [base::Sys.which()], which relies on the system command `which`, will be used instead of `command -v` to determine the
#'   availability of `cmd` on Unix-like systems. On Windows, `base::Sys.which()` is used in any case. `command -v` is
#'   [generally recommended for bourne-like shells](https://unix.stackexchange.com/q/85249/201803) and therefore is the default on Linux, macOS and other
#'   [Unixes](https://en.wikipedia.org/wiki/Unix-like).
#'
#' @return A logical scalar if `get_cmd_path = FALSE`, otherwise the filesystem [path][fs::path] to the `cmd` executable.
#' @family sys
#' @export
#'
#' @examples
#' pal::test_cli("Rscript")
#'
#' cmd <- ifelse(checkmate::test_os("windows"), "pandoc.exe", "pandoc")
#' pal::test_cli(cmd, get_cmd_path = TRUE)
test_cli <- function(cmd,
                     get_cmd_path = FALSE,
                     force_which = FALSE) {
  
  checkmate::assert_string(cmd)
  checkmate::assert_flag(get_cmd_path)
  checkmate::assert_flag(force_which)
  
  if (force_which || checkmate::test_os("windows")) {
    
    result <-
      Sys.which(names = cmd) |>
      as.character() |>
      when(. == "" ~ character(),
           ~ .) |>
      when(get_cmd_path ~ fs::path(.),
           length(.) == 0L ~ FALSE,
           ~ TRUE)
  } else {
    
    # define "defused" warning/error handler
    defuse <- function(e) if (get_cmd_path) character() else FALSE
    
    result <-
      tryCatch(expr = system2(command = "command",
                              args = c("-v",
                                       cmd),
                              stdout = get_cmd_path,
                              stderr = get_cmd_path),
               warning = defuse,
               error = defuse) |>
      when(get_cmd_path ~ fs::path(.),
           isFALSE(.) ~ .,
           ~ TRUE)
  }
  
  result
}

#' Assert CLI tool is available
#'
#' Asserts that a [command-line interface (CLI)](https://en.wikipedia.org/wiki/Command-line_interface) tool is found on the system's
#' [`PATH`](https://en.wikipedia.org/wiki/PATH_(variable)) and returns the executable's filesystem path.
#'
#' @inheritParams test_cli
#' @param error_msg Error message to print in case `cmd` is not found on system's `PATH`. `r pkgsnip::param_lbl("cli_markup_support")` A character scalar.
#'
#' @return If the CLI tool is available on the system's `PATH`, its filesystem path, invisibly. Otherwise, an error is thrown.
#' @family sys
#' @family checkmate
#' @export
#'
#' @examples
#' pal::assert_cli("Rscript")
#'
#' cmd <- ifelse(checkmate::test_os("windows"), "pandoc.exe", "pandoc")
#' pal::assert_cli(cmd)
assert_cli <- function(cmd,
                       error_msg = paste0("The {.strong {cmd}} executable is required but couldn't be found on ",
                                          "system's {.href [PATH](https://en.wikipedia.org/wiki/PATH_(variable))}."),
                       force_which = FALSE) {
  
  checkmate::assert_string(error_msg)
  
  cli_path <- test_cli(cmd = cmd,
                       get_cmd_path = TRUE,
                       force_which = force_which)
  
  if (length(cli_path) == 0L) {
    cli::cli_abort(error_msg)
  }
  
  invisible(cli_path)
}

#' Capture printed console output as string
#'
#' Returns what [`print(x)`][base::print] would output on the console – if `collapse` is set to anything other than `NULL`, as a character scalar (i.e. a
#' string), otherwise as a character vector of output lines.
#'
#' This is a simple convenience wrapper around [utils::capture.output()]. Note that [ANSI escape sequences](https://en.wikipedia.org/wiki/ANSI_escape_code)
#' (e.g. as output by the `print()` methods of tidyverse packages) are included in the result. To remove them, use [cli::ansi_strip()].
#'
#' @param x \R object of which the output of `print()` is to be captured.
#' @param collapse Optional string for concatenating the results. If `NULL`, a character vector of print lines is returned.
#'
#' @return A character vector if `collapse = NULL`, otherwise a character scalar.
#' @export
#'
#' @examples
#' mtcars |> pal::capture_print()
#' mtcars |> pal::capture_print(collapse = "\n") |> cat()
#' 
#' # to strip ANSI escape sequences, use `cli::ansi_strip()`
#' airquality |> tibble::as_tibble()
#' 
#' airquality |>
#'   tibble::as_tibble() |>
#'   pal::capture_print(collapse = "\n") |>
#'   cli::ansi_strip() |>
#'   cat()
capture_print <- function(x,
                          collapse = NULL) {
  
  utils::capture.output(print(x),
                        file = NULL,
                        type = "output",
                        split = FALSE) |>
    paste0(collapse = collapse)
}

#' Convert to character vector and print newline-separated
#' 
#' Convenience wrapper around [as_chr()] and [base::cat()], mainly intended for interactive use.
#'
#' @param ... \R object(s) to convert to character and print. `r pkgsnip::roxy_lbl("dyn_dots_support")`
#'
#' @inherit base::cat return
#' @seealso
#' [cli::cat_line()] for a faster alternative that doesn't _recursively_ convert its input to type character.
#'
#' [xfun::raw_string()] (and [xfun::file_string()]) for an alternative approach to the same use case (but without any conversion to type character at all).
#' 
#' [`writeLines(con = stdout())`][base::writeLines] for a base R alternative that *only* accepts character inputs.
#' @export
#'
#' @examples
#' fs::path_package(package = "pal",
#'                  "rstudio", "addins.dcf") |>
#'   readr::read_lines() |>
#'   pal::cat_lines()
#' 
#' # conversion to type character, recursive vs. non-recursive
#' to_convert <- list(tibble::tibble(a = 1:3), "A", factor("wonderful"), xfun::strict_list("day"))
#' 
#' to_convert |> pal::cat_lines()
#' to_convert |> cli::cat_line()
#' 
#' # this OTOH only accepts chr inputs
#' try(
#'   to_convert |> writeLines()
#' )
cat_lines <- function(...) {
  
  cat(as_chr(...),
      sep = "\n")
}

#' Create [readr][readr::readr-package] column specification using regular expression matching
#'
#' Allows to define a regular expression per desired [column specification object][readr::cols] matching the respective column names.
#'
#' The main limitation of `cols_regex()` is that it needs to know the input dataset's full set of `.col_names` in advance, for which [dsv_colnames()] can help.
#' See the examples for further details.
#'
#' @param ... Named arguments where the names are (Perl-compatible) regular expressions and the values are column objects created by `col_*()`, or their
#'   abbreviated character names (as described in the `col_types` parameter of [readr::read_delim()]). `r pkgsnip::roxy_lbl("dyn_dots_support")`
#' @param .col_names Column names which should be matched by `...`.
#' @param .default Any named columns not matched by any of the regular expressions in `...` will be read with this column type.
#'
#' @return A [column specification][readr::cols].
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' # for some hypothetical CSV data column names like these...
#' col_names <- c("VAR1_Text",
#'                "VAR2_Text",
#'                "VAR3_Text_Other",
#'                "VAR1_Code_R1",
#'                "VAR2_Code_R2",
#'                "HAS_R1_Lag",
#'                "HAS_R2_Lag",
#'                "GARBAGEX67",
#'                "GARBAGEY09")
#' 
#' # ...a column spec could be created concisely as follows:
#' col_regex <- list("_Text(_|$)" = "c",
#'                   "_Code(_|$)" = "i",
#'                   "^GARBAGE"   = readr::col_skip())
#' 
#' pal::cols_regex(.col_names = col_names,
#'                 !!!col_regex,
#'                 .default = "l")
#'
#' # we can parse some real data:
#' url <- "https://salim_b.gitlab.io/misc/Kantonsratswahl_Zuerich_2019_Ergebnisse_Gemeinden.csv"
#' 
#' raw_data <-
#'   httr2::request(url) |>
#'   httr2::req_perform() |>
#'   httr2::resp_body_string()
#'
#' col_spec <- pal::cols_regex("^(Gemeindenamen|Liste|Wahlkreis)$" = "c",
#'                             "(?i)anteil" = "d",
#'                             .default = "i",
#'                             .col_names = pal::dsv_colnames(raw_data))
#' print(col_spec)
#'
#' readr::read_csv(file = raw_data,
#'                 col_types = col_spec)
#'
#' # we can also do basically the same in a more concise way without having to rely on
#' # `pal::dsv_colnames()`:
#' readr::read_csv(file = url,
#'                 col_types = list(.default = "c")) %>%
#'   readr::type_convert(col_types = pal::cols_regex("^(Gemeindenamen|Liste|Wahlkreis)$" = "c",
#'                                                   "(?i)anteil" = "d",
#'                                                   .default = "i",
#'                                                   .col_names = colnames(.)))
cols_regex <- function(...,
                       .col_names,
                       .default = readr::col_character()) {
  
  rlang::check_installed("readr",
                         reason = reason_pkg_required())
  spec <- list()
  patterns <- rlang::list2(...)
  
  if (length(setdiff(names(patterns), "")) < length(patterns)) {
    cli::cli_abort("All column specifications in {.arg ...} must be named by a regular expression.")
  }
  
  for (i in seq_along(patterns)) {
    
    matched_vars <- grep(x = .col_names,
                         pattern = names(patterns[i]),
                         value = TRUE,
                         perl = TRUE)
    spec <-
      rep(list(patterns[[i]]),
          length(matched_vars)) |>
      magrittr::set_names(matched_vars) %>%
      c(spec, .)
  }
  
  spec <- c(spec, alist(.default = .default))
  do.call(readr::cols, spec)
}

#' MIME type to file extension
#'
#' Determines a suitable file extension from a [MIME type](https://en.wikipedia.org/wiki/Media_type), based i.a. on [`mime::mimemap`] and `mime:::mimeextra`. In
#' case of multiple matches, the first one is returned and a warning is printed (unless `quiet = TRUE`).
#'
#' @param mime_type MIME type to determine the file extension for. A character scalar.
#' @param quiet `r pkgsnip::param_lbl("quiet")`
#'
#' @return A character scalar.
#' @export
#'
#' @examples
#' pal::mime_to_ext("application/json")
#' pal::mime_to_ext("audio/mpeg")
mime_to_ext = function(mime_type,
                       quiet = FALSE) {
  
  checkmate::assert_string(mime_type)
  
  i <- which(mime_types_exts %in% mime_type)
  result <- names(mime_types_exts[i])
  
  if (length(i) == 0L) {
    return(NA_character_)
  }
  
  if (!quiet && length(i) > 1L) {
    cli::cli_warn("The MIME type {.val {mime_type}} maps to multiple file extensions ({.val {result}}). Only the first one is returned.")
  }
  
  result[1L]
}

#' Rename elements from dictionary
#'
#' Renames the elements of a vector or list from a dictionary, leaving unmatched names untouched by default.
#'
#' @param x Object whose elements are to be renamed.
#' @param dict A named character vector or list of strings where names are the old and the values are the new names.
#' @param default Value(s) used when names aren't matched by any name in `dict`. Recycled to the length of `x`.
#'
#' @return `x` with elements renamed according to `dict`.
#' @export
#'
#' @examples
#' mtcars |> pal::rename_from(dict = c(mpg  = "Miles/(US) gallon",
#'                                     cyl  = "Number of cylinders",
#'                                     disp = "Displacement (cu.in.)",
#'                                     hp   = "Gross horsepower",
#'                                     drat = "Rear axle ratio",
#'                                     wt   = "Weight (1000 lbs)",
#'                                     qsec = "1/4 mile time",
#'                                     vs   = "Engine (0 = V-shaped, 1 = straight)",
#'                                     am   = "Transmission (0 = automatic, 1 = manual)",
#'                                     gear = "Number of forward gears",
#'                                     carb = "Number of carburetors",
#'                                     not_there = "Yikes!"))
rename_from <- function(x,
                        dict,
                        default = names(x)) {
  
  checkmate::assert_named(x)
  
  if (is.list(dict)) {
    checkmate::assert_list(dict,
                           types = "character",
                           any.missing = FALSE,
                           names = "unique")
  } else {
    checkmate::assert_vector(dict,
                             strict = TRUE,
                             any.missing = FALSE,
                             names = "unique")
  }
  
  if (length(x) > 0L) {
    
    checkmate::assert_character(default)
    dict %<>% purrr::imap(\(x, name) rlang::new_formula(lhs = name,
                                                        rhs = x,
                                                        env = emptyenv()))
    names_new <- dplyr::case_match(names(x),
                                   !!!dict,
                                   .default = default)
    
    return(magrittr::set_names(x, names_new))
    
  } else {
    return(x)
  }
}

#' Sort vector by another vector
#'
#' Arranges a vector `x` by the order of another vector `by`.
#'
#' Note that this function significantly differs from [base::sort_by()].
#'
#' @param x Vector to be ordered.
#' @param by Reference vector which `x` will be ordered by.
#'
#' @return A permutation of `x`.
#' @export
#'
#' @examples
#' library(magrittr)
#'
#' # generate 100 random letters
#' random_letters <-
#'   letters |>
#'   magrittr::extract(sample.int(n = 26L,
#'                                size = 100L,
#'                                replace = TRUE)) %T>%
#'   print()
#'
#' # sort the random letters alphabetically
#' random_letters |> pal::arrange_by(by = letters)
arrange_by <- function(x,
                       by) {
  
  x[order(match(x = x, table = by))]
}

#' Generalized `if`-`else` for pipes
#'
#' A flavour of pattern matching (or an if-else abstraction) in which a value `.` is matched against a sequence `...` of condition-action sets. When a valid
#' match/condition is found, the action is executed and the result of the action is returned.
#'
#' Condition-action sets are written as formulas with conditions as left-hand sides (LHS) and actions as right-hand sides (RHS). A formula with only a
#' right-hand side will be treated as a condition which is always satisfied. For such a default case one can also omit the `~` symbol, but note that its value
#' will then be evaluated early. Any named argument will be made available in all conditions and actions, which is useful in avoiding repeated temporary
#' computations or temporary assignments.
#
#' Validity of the conditions are tested with [base::isTRUE()]. In other words conditions resulting in more than one logical will never be valid. Note that the
#' input value is always treated as a single object, as opposed to the [base::ifelse()] function.
#'
#' This function is copied over from package purrr since it [has been deprecated with the release of purrr
#' 1.0](https://www.tidyverse.org/blog/2022/12/purrr-1-0-0/#core-purpose-refinements). `pal::when()` can be used as a drop-in replacement for `purrr::when()`.
#' All credits go to the original authors of `purrr::when()` [as of
#' 2022-12-21](https://github.com/tidyverse/purrr/blame/54546b6bf938c54da4d232061db2fc91b1d5923c/R/deprec-when.R).
#'
#' @param . Value to match against.
#' @param ... Formulas, each containing a condition as LHS and an action as RHS. Named arguments will define additional values.
#'   `r pkgsnip::roxy_lbl("dyn_dots_support")`
#'
#' @return The value resulting from the action of the first matched condition, or `NULL` if no matches are found and no default is given.
#' @export
#'
#' @examples
#' 1:10 |> pal::when(sum(.) <=  50 ~ sum(.),
#'                   sum(.) <= 100 ~ sum(.)/2L,
#'                   ~ 0L)
when <- function(.,
                 ...) {
  
  dots  <- rlang::list2(...)
  names <- names(dots)
  named <- if (is.null(names)) rep(FALSE, length(dots)) else names != ""
  
  if (sum(!named) == 0L) {
    cli::cli_abort("At least one matching condition is needed.")
  }
  
  is_formula <- vapply(dots,
                       function(dot) identical(class(dot), "formula"),
                       logical(1L))
  env <- new.env(parent = parent.frame())
  env[["."]] <- .
  
  if (sum(named) > 0L) {
    for (i in which(named)) {
      env[[names[i]]] <- dots[[i]]
    }
  }
  
  result <- NULL
  
  for (i in which(!named)) {
    
    if (is_formula[i]) {
      
      action <- length(dots[[i]])
      
      if (action == 2L || isTRUE(eval(dots[[i]][[2L]], env, env))) {
        result <- eval(dots[[i]][[action]], env, env)
        break
      }
    } else {
      result <- dots[[i]]
    }
  }
  
  result
}



utils::globalVariables(names = c(".",
                                 # tidyselect fns
                                 "any_of",
                                 "everything",
                                 "where",
                                 # other
                                 "data",
                                 "default_value",
                                 "default_value_dynamic",
                                 "env_var",
                                 "key",
                                 "package",
                                 "Package",
                                 "r_opt",
                                 "repository",
                                 "rowid",
                                 "type",
                                 "Version",
                                 "X1",
                                 "X2"))

# forbidden dots arguments
forbidden_dots <- list(roxy_tag_value = c("pkgs",
                                          "destdir",
                                          "available",
                                          "type",
                                          "quiet"))

has_root <- function(criterion,
                     path,
                     check_parent_dirs) {
  
  checkmate::assert_class(criterion,
                          classes = "root_criterion")
  checkmate::assert_flag(check_parent_dirs)
  rlang::check_installed("rprojroot",
                         reason = reason_pkg_required())
  checkmate::assert_directory_exists(path,
                                     access = "r")
  if (check_parent_dirs) {
    result <- tryCatch(
      expr = {
        rprojroot::find_root(criterion = criterion,
                             path = path)
        TRUE
      },
      error = \(e) FALSE
    )
    
  } else {
    result <-
      criterion$testfun |>
      purrr::map_lgl(\(x) x(path = path)) |>
      any()
  }
  
  result
}

#' @inherit purrr::list_flatten title description return
#' @inheritParams purrr::list_flatten
#' @inheritParams purrr::modify_tree
#'
#' @keywords internal
list_flatten <- function(x,
                         is_node,
                         name_spec = "{outer}_{inner}",
                         name_repair = c("minimal", "unique", "check_unique", "universal")) {
  
  obj_check_node(x, is_node)
  checkmate::assert_string(name_spec)

  # Take the proxy as we restore on exit
  proxy <- vctrs::vec_proxy(x)

  # Unclass S3 lists to avoid their coercion methods. Wrap atoms in a
  # list of size 1 so the elements can be concatenated in a single list.
  proxy <- purrr::map_if(proxy, is_node, unclass, .else = list)

  out <- vctrs::list_unchop(
    proxy,
    ptype = list(),
    name_spec = name_spec,
    name_repair = name_repair,
    error_arg = x,
    error_call = rlang::current_env()
  )

  # Preserve input type
  vctrs::vec_restore(out, x)
}

obj_check_node <- function(
    x,
    f,
    error_call = rlang::caller_env(),
    error_arg = rlang::caller_arg(x)
) {
  if (!f(x)) {
    if (nzchar(error_arg)) {
      error_arg <- cli::format_inline("{.arg {error_arg}}")
    }
    else {
      error_arg <- "Input"
    }
    cli::cli_abort("{error_arg} must be a list, not {obj_type_friendly(x)}.",
                   call = error_call)
  }
}

is_heading_node <- function(xml_node) {
  
  xml2::xml_name(xml_node) == "heading"
}

node_heading_lvl <- function(xml_node) {
  
  xml_node |>
    xml2::xml_attr(attr = "level") |>
    as.integer()
}

subnode_ix <- function(xml_nodes,
                       i) {
  i_node <- i
  i <- i + 1L
  ix_subnodes <- integer()
  heading_lvl_node <- node_heading_lvl(xml_nodes[i_node])
  is_subnode <- is_heading_node(xml_nodes[i_node]) && i <= length(xml_nodes)
  
  while (is_subnode) {
    is_subnode <- !is_heading_node(xml_nodes[i]) || isTRUE(node_heading_lvl(xml_nodes[i]) > heading_lvl_node)
    ix_subnodes %<>% c(i[is_subnode])
    i <- i + 1L
    if (i > length(xml_nodes)) is_subnode <- FALSE
  }
  
  ix_subnodes
}