Nothing
#------------------------------------------------------------------------------#
#
# /$$
# | $$
# /$$$$$$ /$$$$$$
# /$$__ $$|_ $$_/
# | $$ \ $$ | $$
# | $$ | $$ | $$ /$$
# | $$$$$$$ | $$$$/
# \____ $$ \___/
# /$$ \ $$
# | $$$$$$/
# \______/
#
# This file is part of the 'rstudio/gt' project.
#
# Copyright (c) 2018-2023 gt authors
#
# For full copyright and license information, please look at
# https://gt.rstudio.com/LICENSE.html
#
#------------------------------------------------------------------------------#
#' Format numeric values
#'
#' @description
#'
#' With numeric values in a **gt** table, we can perform number-based
#' formatting so that the targeted values are rendered with a higher
#' consideration for tabular presentation. Furthermore, there is finer control
#' over numeric formatting with the following options:
#'
#' - decimals: choice of the number of decimal places, option to drop
#' trailing zeros, and a choice of the decimal symbol
#' - digit grouping separators: options to enable/disable digit separators
#' and provide a choice of separator symbol
#' - scaling: we can choose to scale targeted values by a multiplier value
#' - large-number suffixing: larger figures (thousands, millions, etc.) can
#' be autoscaled and decorated with the appropriate suffixes
#' - pattern: option to use a text pattern for decoration of the formatted
#' values
#' - locale-based formatting: providing a locale ID will result in number
#' formatting specific to the chosen locale
#'
#' @param data *The gt table data object*
#'
#' `obj:<gt_tbl>` // **required**
#'
#' This is the **gt** table object that is commonly created through use of the
#' [gt()] function.
#'
#' @param columns *Columns to target*
#'
#' `<column-targeting expression>` // *default:* `everything()`
#'
#' Can either be a series of column names provided in [c()], a vector of
#' column indices, or a select helper function. Examples of select helper
#' functions include [starts_with()], [ends_with()], [contains()],
#' [matches()], [one_of()], [num_range()], and [everything()].
#'
#' @param rows *Rows to target*
#'
#' `<row-targeting expression>` // *default:* `everything()`
#'
#' In conjunction with `columns`, we can specify which of their rows should
#' undergo formatting. The default [everything()] results in all rows in
#' `columns` being formatted. Alternatively, we can supply a vector of row
#' captions within [c()], a vector of row indices, or a select helper
#' function. Examples of select helper functions include [starts_with()],
#' [ends_with()], [contains()], [matches()], [one_of()], [num_range()], and
#' [everything()]. We can also use expressions to filter down to the rows we
#' need (e.g., `[colname_1] > 100 & [colname_2] < 50`).
#'
#' @param decimals *Number of decimal places*
#'
#' `scalar<numeric|integer>(val>=0)` // *default:* `2`
#'
#' This corresponds to the exact number of decimal places to use. A value
#' such as `2.34` can, for example, be formatted with `0` decimal places and
#' it would result in `"2"`. With `4` decimal places, the formatted value
#' becomes `"2.3400"`. The trailing zeros can be removed with
#' `drop_trailing_zeros = TRUE`. If you always need `decimals = 0`, the
#' [fmt_integer()] function should be considered.
#'
#' @param n_sigfig *Number of significant figures*
#'
#' `scalar<numeric|integer>(val>=1)` // *default:* `NULL` (`optional`)
#'
#' A option to format numbers to *n* significant figures. By default, this is
#' `NULL` and thus number values will be formatted according to the number of
#' decimal places set via `decimals`. If opting to format according to the
#' rules of significant figures, `n_sigfig` must be a number greater than or
#' equal to `1`. Any values passed to the `decimals` and `drop_trailing_zeros`
#' arguments will be ignored.
#'
#' @param drop_trailing_zeros *Drop any trailing zeros*
#'
#' `scalar<logical>` // *default:* `FALSE`
#'
#' A logical value that allows for removal of trailing zeros (those redundant
#' zeros after the decimal mark).
#'
#' @param drop_trailing_dec_mark *Drop the trailing decimal mark*
#'
#' `scalar<logical>` // *default:* `TRUE`
#'
#' A logical value that determines whether decimal marks should always appear
#' even if there are no decimal digits to display after formatting (e.g., `23`
#' becomes `23.` if `FALSE`). By default trailing decimal marks are not shown.
#'
#' @param use_seps *Use digit group separators*
#'
#' `scalar<logical>` // *default:* `TRUE`
#'
#' An option to use digit group separators. The type of digit group separator
#' is set by `sep_mark` and overridden if a locale ID is provided to `locale`.
#' This setting is `TRUE` by default.
#'
#' @param accounting *Use accounting style*
#'
#' `scalar<logical>` // *default:* `FALSE`
#'
#' An option to use accounting style for values. Normally, negative values
#' will be shown with a minus sign but using accounting style will instead put
#' any negative values in parentheses.
#'
#' @param scale_by *Scale values by a fixed multiplier*
#'
#' `scalar<numeric|integer>` // *default:* `1`
#'
#' All numeric values will be multiplied by the `scale_by` value before
#' undergoing formatting. Since the `default` value is `1`, no values will be
#' changed unless a different multiplier value is supplied. This value will be
#' ignored if using any of the `suffixing` options (i.e., where `suffixing` is
#' not set to `FALSE`).
#'
#' @param suffixing *Specification for large-number suffixing*
#'
#' `scalar<logical>|vector<character>` // *default:* `FALSE`
#'
#' The `suffixing` option allows us to scale and apply suffixes to larger
#' numbers (e.g., `1924000` can be transformed to `1.92M`). This option can
#' accept a logical value, where `FALSE` (the default) will not perform this
#' transformation and `TRUE` will apply thousands (`"K"`), millions (`"M"`),
#' billions (`"B"`), and trillions (`"T"`) suffixes after automatic value
#' scaling.
#'
#' We can alternatively provide a character vector that serves as a
#' specification for which symbols are to used for each of the value ranges.
#' These preferred symbols will replace the defaults (e.g.,
#' `c("k", "Ml", "Bn", "Tr")` replaces `"K"`, `"M"`, `"B"`, and `"T"`).
#'
#' Including `NA` values in the vector will ensure that the particular range
#' will either not be included in the transformation (e.g.,
#' `c(NA, "M", "B", "T")` won't modify numbers at all in the thousands range)
#' or the range will inherit a previous suffix (e.g., with
#' `c("K", "M", NA, "T")`, all numbers in the range of millions and billions
#' will be in terms of millions).
#'
#' Any use of `suffixing` (where it is not set expressly as `FALSE`) means
#' that any value provided to `scale_by` will be ignored.
#'
#' If using `system = "ind"` then the default suffix set provided by
#' `suffixing = TRUE` will be the equivalent of `c(NA, "L", "Cr")`. This
#' doesn't apply suffixes to the thousands range, but does express values in
#' *lakhs* and *crores*.
#'
#' @param pattern *Specification of the formatting pattern*
#'
#' `scalar<character>` // *default:* `"{x}"`
#'
#' A formatting pattern that allows for decoration of the formatted value. The
#' formatted value is represented by the `{x}` (which can be used multiple
#' times, if needed) and all other characters will be interpreted as string
#' literals.
#'
#' @param sep_mark *Separator mark for digit grouping*
#'
#' `scalar<character>` // *default:* `","`
#'
#' The string to use as a separator between groups of digits. For example,
#' using `sep_mark = ","` with a value of `1000` would result in a formatted
#' value of `"1,000"`. This argument is ignored if a `locale` is supplied
#' (i.e., is not `NULL`).
#'
#' @param dec_mark *Decimal mark*
#'
#' `scalar<character>` // *default:* `"."`
#'
#' The string to be used as the decimal mark. For example, using
#' `dec_mark = ","` with the value `0.152` would result in a formatted value
#' of `"0,152"`). This argument is ignored if a `locale` is supplied (i.e., is
#' not `NULL`).
#'
#' @param force_sign *Forcing the display of a positive sign*
#'
#' `scalar<logical>` // *default:* `FALSE`
#'
#' Should the positive sign be shown for positive values (effectively showing
#' a sign for all values except zero)? If so, use `TRUE` for this option. The
#' default is `FALSE`, where only negative numbers will display a minus sign.
#' This option is disregarded when using accounting notation with
#' `accounting = TRUE`.
#'
#' @param system *Numbering system for grouping separators*
#'
#' `singl-kw:[intl|ind]` // *default:* `"intl"`
#'
#' The international numbering system (keyword: `"intl"`) is widely used and
#' its grouping separators (i.e., `sep_mark`) are always separated by three
#' digits. The alternative system, the Indian numbering system (keyword:
#' `"ind"`), uses grouping separators that correspond to thousand, lakh,
#' crore, and higher quantities.
#'
#' @param locale *Locale identifier*
#'
#' `scalar<character>` // *default:* `NULL` (`optional`)
#'
#' An optional locale identifier that can be used for formatting values
#' according the locale's rules. Examples include `"en"` for English (United
#' States) and `"fr"` for French (France). We can use the [info_locales()]
#' function as a useful reference for all of the locales that are supported. A
#' locale ID can be also set in the initial [gt()] function call (where it
#' would be used automatically by any function with a `locale` argument) but a
#' `locale` value provided here will override that global locale.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_number()` formatting function is compatible with body cells that are
#' of the `"numeric"` or `"integer"` types. Any other types of body cells are
#' ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_number()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `decimals`
#' - `n_sigfig`
#' - `drop_trailing_zeros`
#' - `drop_trailing_dec_mark`
#' - `use_seps`
#' - `accounting`
#' - `scale_by`
#' - `suffixing`
#' - `pattern`
#' - `sep_mark`
#' - `dec_mark`
#' - `force_sign`
#' - `system`
#' - `locale`
#'
#' Please note that for all of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Adapting output to a specific `locale`:
#'
#' This formatting function can adapt outputs according to a provided `locale`
#' value. Examples include `"en"` for English (United States) and `"fr"` for
#' French (France). The use of a valid locale ID here means separator and
#' decimal marks will be correct for the given locale. Should any values be
#' provided in `sep_mark` or `dec_mark`, they will be overridden by the locale's
#' preferred values.
#'
#' Note that a `locale` value provided here will override any global locale
#' setting performed in [gt()]'s own `locale` argument (it is settable there as
#' a value received by all other functions that have a `locale` argument). As a
#' useful reference on which locales are supported, we can use the
#' [info_locales()] function to view an info table.
#'
#' @section Examples:
#'
#' Use the [`exibble`] dataset to create a **gt** table. With the `fmt_number()`
#' function, we'll format the `num` column to have three decimal places (with
#' `decimals = 3`) and omit the use of digit separators (with `use_seps =
#' FALSE`).
#'
#' ```r
#' exibble |>
#' gt() |>
#' fmt_number(
#' columns = num,
#' decimals = 3,
#' use_seps = FALSE
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_number_1.png")`
#' }}
#'
#' Use a modified version of the [`countrypops`] dataset to create a **gt**
#' table with row labels. Format all columns to use large-number suffixing
#' (e.g., where `"10,000,000"` becomes `"10M"`) with the `suffixing = TRUE`
#' option.
#'
#' ```r
#' countrypops |>
#' dplyr::select(country_code_3, year, population) |>
#' dplyr::filter(country_code_3 %in% c("CHN", "IND", "USA", "PAK", "IDN")) |>
#' dplyr::filter(year > 1975 & year %% 5 == 0) |>
#' tidyr::spread(year, population) |>
#' dplyr::arrange(desc(`2015`)) |>
#' gt(rowname_col = "country_code_3") |>
#' fmt_number(suffixing = TRUE)
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_number_2.png")`
#' }}
#'
#' In a variation of the previous table, we can combine large-number suffixing
#' with a declaration of the number of significant digits to use. With things
#' like population figures, `n_sigfig = 3` is a very good option.
#'
#' ```r
#' countrypops |>
#' dplyr::select(country_code_3, year, population) |>
#' dplyr::filter(country_code_3 %in% c("CHN", "IND", "USA", "PAK", "IDN")) |>
#' dplyr::filter(year > 1975 & year %% 5 == 0) |>
#' tidyr::spread(year, population) |>
#' dplyr::arrange(desc(`2015`)) |>
#' gt(rowname_col = "country_code_3") |>
#' fmt_number(suffixing = TRUE, n_sigfig = 3)
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_number_3.png")`
#' }}
#'
#' There can be cases where you want to show numbers to a large number of
#' decimal places but also drop the unnecessary trailing zeros for low-precision
#' values. Let's take a portion of the [`towny`] dataset and format the
#' `latitude` and `longitude` columns with `fmt_number()`. We'll have up to 5
#' digits displayed as decimal values, but we'll also unconditionally drop any
#' runs of trailing zeros in the decimal part with `drop_trailing_zeros = TRUE`.
#'
#' ```r
#' towny |>
#' dplyr::select(name, latitude, longitude) |>
#' dplyr::slice_head(n = 10) |>
#' gt() |>
#' fmt_number(decimals = 5, drop_trailing_zeros = TRUE) |>
#' cols_merge(columns = -name, pattern = "{1}, {2}") |>
#' cols_label(
#' name ~ "Municipality",
#' latitude = "Location"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_number_4.png")`
#' }}
#'
#' Another strategy for dealing with precision of decimals is to have a separate
#' column of values that specify how many decimal digits to retain. Such a
#' column can be added via [cols_add()] or it can be part of the input table for
#' [gt()]. With that column available, it can be referenced in the `decimals`
#' argument with the [from_column()] helper function. This approach yields a
#' display of coordinate values that reflects the measurement precision of each
#' value.
#'
#' ```r
#' towny |>
#' dplyr::select(name, latitude, longitude) |>
#' dplyr::slice_head(n = 10) |>
#' gt() |>
#' cols_add(dec_digits = c(1, 2, 2, 5, 5, 2, 3, 2, 3, 3)) |>
#' fmt_number(decimals = from_column(column = "dec_digits")) |>
#' cols_merge(columns = -name, pattern = "{1}, {2}") |>
#' cols_label(
#' name ~ "Municipality",
#' latitude = "Location"
#' ) |>
#' cols_hide(columns = dec_digits)
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_number_5.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-1
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @seealso The [fmt_integer()] function might be more useful if you really need
#' to format numeric values to appear as integers (i.e., no decimals will be
#' shown and input values are rounded as necessary). Need to do numeric
#' formatting on a vector? Take a look at the vector-formatting version of
#' this function: [vec_fmt_number()].
#'
#' @import rlang
#' @export
fmt_number <- function(
data,
columns = everything(),
rows = everything(),
decimals = 2,
n_sigfig = NULL,
drop_trailing_zeros = FALSE,
drop_trailing_dec_mark = TRUE,
use_seps = TRUE,
accounting = FALSE,
scale_by = 1.0,
suffixing = FALSE,
pattern = "{x}",
sep_mark = ",",
dec_mark = ".",
force_sign = FALSE,
system = c("intl", "ind"),
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - decimals
# - n_sigfig
# - drop_trailing_zeros
# - drop_trailing_dec_mark
# - use_seps
# - accounting
# - scale_by
# - suffixing
# - pattern
# - sep_mark
# - dec_mark
# - force_sign
# - system
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_number",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_number(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
decimals = p_i$decimals %||% decimals,
n_sigfig = p_i$n_sigfig %||% n_sigfig,
drop_trailing_zeros = p_i$drop_trailing_zeros %||% drop_trailing_zeros,
drop_trailing_dec_mark = p_i$drop_trailing_dec_mark %||% drop_trailing_dec_mark,
use_seps = p_i$use_seps %||% use_seps,
accounting = p_i$accounting %||% accounting,
scale_by = p_i$scale_by %||% scale_by,
suffixing = p_i$suffixing %||% suffixing,
pattern = p_i$pattern %||% pattern,
sep_mark = p_i$sep_mark %||% sep_mark,
dec_mark = p_i$dec_mark %||% dec_mark,
force_sign = p_i$force_sign %||% force_sign,
system = p_i$system %||% system,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Ensure that arguments are matched
system <- rlang::arg_match(system)
# Declare formatting function compatibility
compat <- c("numeric", "integer")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
# Use locale-based marks if a locale ID is provided
sep_mark <- get_locale_sep_mark(locale, sep_mark, use_seps)
dec_mark <- get_locale_dec_mark(locale, dec_mark)
# Normalize the `suffixing` input to either return a character vector
# of suffix labels, or NULL (the case where `suffixing` is FALSE)
suffix_labels <- normalize_suffixing_inputs(suffixing, scale_by, system)
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_number()` and `fmt_integer()` functions can only be
used on `columns` with numeric data."
)
}
}
# Set the `formatC_format` option according to whether number
# formatting with significant figures is to be performed
if (!is.null(n_sigfig) && !is.na(n_sigfig[1])) {
# Stop function if `n_sigfig` does not have a valid value
validate_n_sigfig(n_sigfig = n_sigfig)
formatC_format <- "fg"
} else {
formatC_format <- "f"
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
compat = compat,
fns = num_fmt_factory_multi(
pattern = pattern,
format_fn = function(x, context) {
# Create the `suffix_df` object
suffix_df <-
create_suffix_df(
x,
decimals = decimals,
suffix_labels = suffix_labels,
scale_by = scale_by,
system = system
)
# Scale the `x` values by the `scale_by` values in `suffix_df`
x <- scale_x_values(x, scale_by = suffix_df$scale_by)
# Format numeric values to character-based numbers
x_str <-
format_num_to_str(
x,
context = context,
decimals = decimals,
n_sigfig = n_sigfig,
sep_mark = sep_mark,
dec_mark = dec_mark,
drop_trailing_zeros = drop_trailing_zeros,
drop_trailing_dec_mark = drop_trailing_dec_mark,
format = formatC_format,
system = system
)
# Paste the vector of suffixes to the right of the values
x_str <- paste_right(x_str, x_right = suffix_df$suffix)
# Format values in accounting notation (if `accounting = TRUE`)
x_str <-
format_as_accounting(
x_str,
x = x,
context = context,
accounting = accounting
)
# Force a positive sign on certain values if the option is taken
if (!accounting && force_sign) {
positive_x <- !is.na(x) & x > 0
x_str[positive_x] <- paste_left(x_str[positive_x], x_left = "+")
}
x_str
}
)
)
}
#' Format values as integers
#'
#' @description
#'
#' With numeric values in a **gt** table, we can perform number-based
#' formatting so that the targeted values are always rendered as integer values.
#' We can have fine control over integer formatting with the following options:
#'
#' - digit grouping separators: options to enable/disable digit separators
#' and provide a choice of separator symbol
#' - scaling: we can choose to scale targeted values by a multiplier value
#' - large-number suffixing: larger figures (thousands, millions, etc.) can
#' be autoscaled and decorated with the appropriate suffixes
#' - pattern: option to use a text pattern for decoration of the formatted
#' values
#' - locale-based formatting: providing a locale ID will result in number
#' formatting specific to the chosen locale
#'
#' @inheritParams fmt_number
#'
#' @param suffixing *Specification for large-number suffixing*
#'
#' `scalar<logical>|vector<character>` // *default:* `FALSE`
#'
#' The `suffixing` option allows us to scale and apply suffixes to larger
#' numbers (e.g., `1924000` can be transformed to `2M`). This option can
#' accept a logical value, where `FALSE` (the default) will not perform this
#' transformation and `TRUE` will apply thousands (`K`), millions (`M`),
#' billions (`B`), and trillions (`T`) suffixes after automatic value scaling.
#'
#' We can alternatively provide a character vector that serves as a
#' specification for which symbols are to used for each of the value ranges.
#' These preferred symbols will replace the defaults (e.g.,
#' `c("k", "Ml", "Bn", "Tr")` replaces `"K"`, `"M"`, `"B"`, and `"T"`).
#'
#' Including `NA` values in the vector will ensure that the particular range
#' will either not be included in the transformation (e.g.,
#' `c(NA, "M", "B", "T")` won't modify numbers at all in the thousands range)
#' or the range will inherit a previous suffix (e.g., with
#' `c("K", "M", NA, "T")`, all numbers in the range of millions and billions
#' will be in terms of millions).
#'
#' Any use of `suffixing` (where it is not set expressly as `FALSE`) means
#' that any value provided to `scale_by` will be ignored.
#'
#' If using `system = "ind"` then the default suffix set provided by
#' `suffixing = TRUE` will be the equivalent of `c(NA, "L", "Cr")`. This
#' doesn't apply suffixes to the thousands range, but does express values in
#' *lakhs* and *crores*.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_integer()` formatting function is compatible with body cells that
#' are of the `"numeric"` or `"integer"` types. Any other types of body cells
#' are ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_integer()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `use_seps`
#' - `accounting`
#' - `scale_by`
#' - `suffixing`
#' - `pattern`
#' - `sep_mark`
#' - `force_sign`
#' - `system`
#' - `locale`
#'
#' Please note that for all of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Adapting output to a specific `locale`:
#'
#' This formatting function can adapt outputs according to a provided `locale`
#' value. Examples include `"en"` for English (United States) and `"fr"` for
#' French (France). The use of a valid locale ID here means separator and
#' decimal marks will be correct for the given locale. Should any value be
#' provided in `sep_mark`, it will be overridden by the locale's preferred
#' values.
#'
#' Note that a `locale` value provided here will override any global locale
#' setting performed in [gt()]'s own `locale` argument (it is settable there as
#' a value received by all other functions that have a `locale` argument). As a
#' useful reference on which locales are supported, we can use the
#' [info_locales()] function to view an info table.
#'
#' @section Examples:
#'
#' For this example, we'll use two columns from the [`exibble`] dataset and
#' create a simple **gt** table. With the `fmt_integer()` function, we'll format
#' the `num` column as integer values having no digit separators (with the
#' `use_seps = FALSE` option).
#'
#' ```r
#' exibble |>
#' dplyr::select(num, char) |>
#' gt() |>
#' fmt_integer(use_seps = FALSE)
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_integer_1.png")`
#' }}
#'
#' Let's use a modified version of the [`countrypops`] dataset to create a
#' **gt** table with row labels. We will format all numeric columns with
#' `fmt_integer()` and scale all values by `1 / 1E6`, giving us integer values
#' representing millions of people. We can make clear what the values represent
#' with an informative spanner label via [tab_spanner()].
#'
#' ```r
#' countrypops |>
#' dplyr::select(country_code_3, year, population) |>
#' dplyr::filter(country_code_3 %in% c("CHN", "IND", "USA", "PAK", "IDN")) |>
#' dplyr::filter(year > 1975 & year %% 5 == 0) |>
#' tidyr::spread(year, population) |>
#' dplyr::arrange(desc(`2015`)) |>
#' gt(rowname_col = "country_code_3") |>
#' fmt_integer(scale_by = 1 / 1E6) |>
#' tab_spanner(label = "Millions of People", columns = everything())
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_integer_2.png")`
#' }}
#'
#' Using a subset of the [`towny`] dataset, we can do interesting things with
#' integer values. Through [cols_add()] we'll add the `difference` column (which
#' calculates the difference between 2021 and 2001 populations). All numeric
#' values will be formatted with a first pass of `fmt_integer()`; a second pass
#' of `fmt_integer()` focuses on the `difference` column and here we use the
#' `force_sign = TRUE` option to draw attention to positive and negative
#' difference values.
#'
#' ```r
#' towny |>
#' dplyr::select(name, population_2001, population_2021) |>
#' dplyr::slice_tail(n = 10) |>
#' gt() |>
#' cols_add(difference = population_2021 - population_2001) |>
#' fmt_integer() |>
#' fmt_integer(columns = difference, force_sign = TRUE) |>
#' cols_label_with(fn = function(x) gsub("population_", "", x)) |>
#' tab_style(
#' style = cell_fill(color = "gray90"),
#' locations = cells_body(columns = difference)
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_integer_3.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-2
#'
#' @section Function Introduced:
#' `v0.3.1` (August 9, 2021)
#'
#' @seealso The [fmt_number()] function might be more of what you need if you'd
#' like decimal values in your outputs. Need to do integer-based formatting on
#' a vector? Take a look at the vector-formatting version of this function:
#' [vec_fmt_integer()].
#'
#' @import rlang
#' @export
fmt_integer <- function(
data,
columns = everything(),
rows = everything(),
use_seps = TRUE,
accounting = FALSE,
scale_by = 1.0,
suffixing = FALSE,
pattern = "{x}",
sep_mark = ",",
force_sign = FALSE,
system = c("intl", "ind"),
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - use_seps
# - accounting
# - scale_by
# - suffixing
# - pattern
# - sep_mark
# - force_sign
# - system
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_integer",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_integer(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
use_seps = p_i$use_seps %||% use_seps,
accounting = p_i$accounting %||% accounting,
scale_by = p_i$scale_by %||% scale_by,
suffixing = p_i$suffixing %||% suffixing,
pattern = p_i$pattern %||% pattern,
sep_mark = p_i$sep_mark %||% sep_mark,
force_sign = p_i$force_sign %||% force_sign,
system = p_i$system %||% system,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
fmt_number(
data = data,
columns = {{ columns }},
rows = {{ rows }},
decimals = 0,
n_sigfig = NULL,
drop_trailing_zeros = FALSE,
drop_trailing_dec_mark = TRUE,
use_seps = use_seps,
accounting = accounting,
scale_by = scale_by,
suffixing = suffixing,
pattern = pattern,
sep_mark = sep_mark,
dec_mark = "not used",
force_sign = force_sign,
system = system,
locale = locale
)
}
#' Format values to scientific notation
#'
#' @description
#'
#' With numeric values in a **gt** table, we can perform formatting so that the
#' targeted values are rendered in scientific notation, where extremely large or
#' very small numbers can be expressed in a more practical fashion. Here,
#' numbers are written in the form of a mantissa (`m`) and an exponent (`n`)
#' with the construction *m* x 10^*n* or *m*E*n*. The mantissa component is a
#' number between `1` and `10`. For instance, `2.5 x 10^9` can be used to
#' represent the value 2,500,000,000 in scientific notation. In a similar way,
#' 0.00000012 can be expressed as `1.2 x 10^-7`. Due to its ability to describe
#' numbers more succinctly and its ease of calculation, scientific notation is
#' widely employed in scientific and technical domains.
#'
#' We have fine control over the formatting task, with the following options:
#'
#' - decimals: choice of the number of decimal places, option to drop
#' trailing zeros, and a choice of the decimal symbol
#' - scaling: we can choose to scale targeted values by a multiplier value
#' - pattern: option to use a text pattern for decoration of the formatted
#' values
#' - locale-based formatting: providing a locale ID will result in
#' formatting specific to the chosen locale
#'
#' @inheritParams fmt_number
#'
#' @param scale_by *Scale values by a fixed multiplier*
#'
#' `scalar<numeric|integer>` // *default:* `1`
#'
#' All numeric values will be multiplied by the `scale_by` value before
#' undergoing formatting. Since the `default` value is `1`, no values will be
#' changed unless a different multiplier value is supplied.
#'
#' @param exp_style *Style declaration for exponent formatting*
#'
#' `scalar<character>` // *default:* `"x10n"`
#'
#' Style of formatting to use for the scientific notation formatting. By
#' default this is `"x10n"` but other options include using a single letter
#' (e.g., `"e"`, `"E"`, etc.), a letter followed by a `"1"` to signal a
#' minimum digit width of one, or `"low-ten"` for using a stylized `"10"`
#' marker.
#'
#' @param force_sign_m,force_sign_n *Forcing the display of a positive sign*
#'
#' `scalar<logical>` // *default:* `FALSE`
#'
#' Should the plus sign be shown for positive values of the mantissa (first
#' component, `force_sign_m`) or the exponent (`force_sign_n`)? This would
#' effectively show a sign for all values except zero on either of those
#' numeric components of the notation. If so, use `TRUE` for either one of
#' these options. The default for both is `FALSE`, where only negative numbers
#' will display a sign.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_scientific()` formatting function is compatible with body cells that
#' are of the `"numeric"` or `"integer"` types. Any other types of body cells
#' are ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_scientific()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `decimals`
#' - `drop_trailing_zeros`
#' - `drop_trailing_dec_mark`
#' - `scale_by`
#' - `exp_style`
#' - `pattern`
#' - `sep_mark`
#' - `dec_mark`
#' - `force_sign_m`
#' - `force_sign_n`
#' - `locale`
#'
#' Please note that for all of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Adapting output to a specific `locale`:
#'
#' This formatting function can adapt outputs according to a provided `locale`
#' value. Examples include `"en"` for English (United States) and `"fr"` for
#' French (France). The use of a valid locale ID here means separator and
#' decimal marks will be correct for the given locale. Should any values be
#' provided in `sep_mark` or `dec_mark`, they will be overridden by the locale's
#' preferred values.
#'
#' Note that a `locale` value provided here will override any global locale
#' setting performed in [gt()]'s own `locale` argument (it is settable there as
#' a value received by all other functions that have a `locale` argument). As a
#' useful reference on which locales are supported, we can use the
#' [info_locales()] function to view an info table.
#'
#' @section Examples:
#'
#' Let's use the [`exibble`] dataset to create a simple **gt** table. We'll
#' elect to the `num` column as partially numeric and partially in scientific
#' notation. This is done with two separate calls of [fmt_number()] and
#' `fmt_scientific()`. We'll use the expressions `num > 500` and `num <= 500` in
#' the functions' respective `rows` arguments to target formatting to specific
#' cells.
#'
#' ```r
#' exibble |>
#' gt() |>
#' fmt_number(
#' columns = num,
#' rows = num > 500,
#' decimals = 1,
#' scale_by = 1/1000,
#' pattern = "{x}K"
#' ) |>
#' fmt_scientific(
#' columns = num,
#' rows = num <= 500,
#' decimals = 1
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_scientific_1.png")`
#' }}
#'
#' The [`constants`] table contains a plethora of data on the fundamental
#' physical constant and most values (in the units used) are either very small
#' or very large, so scientific formatting is suitable. The values differ in the
#' degree of measurement precision and separate columns (`sf_value` and
#' `sf_uncert`) contain the exact number of significant figures for each
#' measurement value and the associated uncertainty value. We can use the
#' `n_sigfig` argument of `fmt_scientific()` in conjunction with the
#' [from_column()] helper to get the correct number of significant digits for
#' each value.
#'
#' ```r
#' constants |>
#' dplyr::filter(grepl("Planck", name)) |>
#' gt() |>
#' fmt_scientific(
#' columns = value,
#' n_sigfig = from_column(column = "sf_value")
#' ) |>
#' fmt_scientific(
#' columns = uncert,
#' n_sigfig = from_column(column = "sf_uncert")
#' ) |>
#' cols_hide(columns = starts_with("sf")) |>
#' fmt_units(columns = units) |>
#' sub_missing(missing_text = "")
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_scientific_2.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-3
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @seealso The vector-formatting version of this function:
#' [vec_fmt_scientific()].
#'
#' @import rlang
#' @export
fmt_scientific <- function(
data,
columns = everything(),
rows = everything(),
decimals = 2,
n_sigfig = NULL,
drop_trailing_zeros = FALSE,
drop_trailing_dec_mark = TRUE,
scale_by = 1.0,
exp_style = "x10n",
pattern = "{x}",
sep_mark = ",",
dec_mark = ".",
force_sign_m = FALSE,
force_sign_n = FALSE,
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - decimals
# - n_sigfig
# - drop_trailing_zeros
# - drop_trailing_dec_mark
# - scale_by
# - exp_style
# - pattern
# - sep_mark
# - dec_mark
# - force_sign_m
# - force_sign_n
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_scientific",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_scientific(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
decimals = p_i$decimals %||% decimals,
n_sigfig = p_i$n_sigfig %||% n_sigfig,
drop_trailing_zeros = p_i$drop_trailing_zeros %||% drop_trailing_zeros,
drop_trailing_dec_mark = p_i$drop_trailing_dec_mark %||% drop_trailing_dec_mark,
scale_by = p_i$scale_by %||% scale_by,
exp_style = p_i$exp_style %||% exp_style,
pattern = p_i$pattern %||% pattern,
sep_mark = p_i$sep_mark %||% sep_mark,
dec_mark = p_i$dec_mark %||% dec_mark,
force_sign_m = p_i$force_sign_m %||% force_sign_m,
force_sign_n = p_i$force_sign_n %||% force_sign_n,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Declare formatting function compatibility
compat <- c("numeric", "integer")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
# Set default values
suffixing <- FALSE
use_seps <- TRUE
# Use locale-based marks if a locale ID is provided
sep_mark <- get_locale_sep_mark(locale, sep_mark, use_seps)
dec_mark <- get_locale_dec_mark(locale, dec_mark)
# Normalize the `suffixing` input to either return a character vector
# of suffix labels, or NULL (the case where `suffixing` is FALSE)
suffix_labels <- normalize_suffixing_inputs(suffixing, scale_by, system = "intl")
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_scientific()` function can only be used on `columns`
with numeric data."
)
}
}
# If `n_sigfig` is defined (and not `NA`) modify the number of
# decimal places and keep all trailing zeros
if (!is.null(n_sigfig) && !is.na(n_sigfig[1])) {
# Stop function if `n_sigfig` does not have a valid value
validate_n_sigfig(n_sigfig = n_sigfig)
decimals <- n_sigfig - 1
drop_trailing_zeros <- FALSE
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
compat = compat,
fns = num_fmt_factory_multi(
pattern = pattern,
format_fn = function(x, context) {
# Define the marks by context
exp_marks <- context_exp_marks(context = context)
minus_mark <- context_minus_mark(context = context)
# Define the `replace_minus()` function
replace_minus <- function(x) {
tidy_gsub(x, "-", minus_mark, fixed = TRUE)
}
# Create the `suffix_df` object
suffix_df <-
create_suffix_df(
x,
decimals = decimals,
suffix_labels = suffix_labels,
scale_by = scale_by,
system = "intl"
)
# Scale the `x` values by the `scale_by` values in `suffix_df`
x <- scale_x_values(x, scale_by = suffix_df$scale_by)
x_str <-
format_num_to_str(
x,
context = context,
decimals = decimals,
n_sigfig = NULL,
sep_mark = sep_mark,
dec_mark = dec_mark,
drop_trailing_zeros = FALSE,
drop_trailing_dec_mark = FALSE,
format = "e",
replace_minus_mark = FALSE
)
if (exp_style == "x10n") {
# Determine which values don't require the (x 10^n)
# for scientific formatting since their order would be zero
small_pos <- has_order_zero(x)
# For any numbers that shouldn't have an exponent, remove
# that portion from the character version
x_str[small_pos] <- replace_minus(gsub("(e|E).*", "", x_str[small_pos]))
# For any non-NA numbers that do have an exponent, format
# those according to the output context
sci_parts <- split_scientific_notn(x_str = x_str[!small_pos])
m_part <- sci_parts[["num"]]
n_part <- sci_parts[["exp"]]
if (force_sign_n) {
n_part <-
vapply(
n_part,
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (x > 0) gsub("^", "+", x) else as.character(x)
}
)
}
if (drop_trailing_zeros) {
m_part <- sub("0+$", "", m_part)
x_str[small_pos] <- sub("0+$", "", x_str[small_pos])
}
if (drop_trailing_dec_mark) {
m_part <- sub("\\.$", "", m_part)
x_str[small_pos] <- sub("\\.$", "", x_str[small_pos])
}
m_part <- replace_minus(m_part)
n_part <- replace_minus(n_part)
x_str[!small_pos] <-
paste0(m_part, exp_marks[1], n_part, exp_marks[2])
} else {
exp_str <- context_exp_str(exp_style = exp_style, context = context)
if (grepl("^[a-zA-Z]{1}1$", exp_style)) {
n_min_width <- 1
} else {
n_min_width <- 2
}
# The `n_part` will be extracted here and it must be padded to
# the defined minimum number of decimal places
n_part <-
vapply(
x_str,
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (!grepl("e(\\+|-)[0-9]{2,}", x)) return("")
x <- unlist(strsplit(x, "e"))[2]
if (grepl("-", x)) {
x <- gsub("-", "", x)
x <- formatC(as.numeric(x), width = n_min_width, flag = "0")
x <- paste0("-", x)
} else {
x <- formatC(as.numeric(x), width = n_min_width, flag = "0")
}
x
}
)
# Generate `x_str_left` using `x_str` here
x_str_left <-
vapply(
x_str,
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (!grepl("e(\\+|-)[0-9]{2,}", x)) return("")
unlist(strsplit(x, "e"))[1]
}
)
if (force_sign_n) {
n_part <-
vapply(
seq_along(n_part),
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(i) {
if (!grepl("-", n_part[i])) {
out <- gsub("^", "+", n_part[i])
} else {
out <- n_part[i]
}
out
}
)
}
x_str[!is.infinite(x)] <-
paste0(x_str_left[!is.infinite(x)], exp_str, replace_minus(n_part[!is.infinite(x)]))
x_str <- replace_minus(x_str)
}
# Force a positive sign on certain values if the option is taken
if (force_sign_m) {
positive_x <- !is.na(x) & x > 0
x_str[positive_x] <- paste_left(x_str[positive_x], x_left = "+")
}
x_str
}
)
)
}
#' Format values to engineering notation
#'
#' @description
#'
#' With numeric values in a **gt** table, we can perform formatting so that the
#' targeted values are rendered in engineering notation, where numbers are
#' written in the form of a mantissa (`m`) and an exponent (`n`). When combined
#' the construction is either of the form *m* x 10^*n* or *m*E*n*. The mantissa
#' is a number between `1` and `1000` and the exponent is a multiple of `3`. For
#' example, the number 0.0000345 can be written in engineering notation as
#' `34.50 x 10^-6`. This notation helps to simplify calculations and make it
#' easier to compare numbers that are on very different scales.
#'
#' We have fine control over the formatting task, with the following options:
#'
#' - decimals: choice of the number of decimal places, option to drop
#' trailing zeros, and a choice of the decimal symbol
#' - scaling: we can choose to scale targeted values by a multiplier value
#' - pattern: option to use a text pattern for decoration of the formatted
#' values
#' - locale-based formatting: providing a locale ID will result in
#' formatting specific to the chosen locale
#'
#' @inheritParams fmt_number
#'
#' @param scale_by *Scale values by a fixed multiplier*
#'
#' `scalar<numeric|integer>` // *default:* `1`
#'
#' All numeric values will be multiplied by the `scale_by` value before
#' undergoing formatting. Since the `default` value is `1`, no values will be
#' changed unless a different multiplier value is supplied.
#'
#' @param exp_style *Style declaration for exponent formatting*
#'
#' `scalar<character>` // *default:* `"x10n"`
#'
#' Style of formatting to use for the scientific notation formatting. By
#' default this is `"x10n"` but other options include using a single letter
#' (e.g., `"e"`, `"E"`, etc.), a letter followed by a `"1"` to signal a
#' minimum digit width of one, or `"low-ten"` for using a stylized `"10"`
#' marker.
#'
#' @param force_sign_m,force_sign_n *Forcing the display of a positive sign*
#'
#' `scalar<logical>` // *default:* `FALSE`
#'
#' Should the plus sign be shown for positive values of the mantissa (first
#' component, `force_sign_m`) or the exponent (`force_sign_n`)? This would
#' effectively show a sign for all values except zero on either of those
#' numeric components of the notation. If so, use `TRUE` for either one of
#' these options. The default for both is `FALSE`, where only negative numbers
#' will display a sign.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_engineering()` formatting function is compatible with body cells
#' that are of the `"numeric"` or `"integer"` types. Any other types of body
#' cells are ignored during formatting. This is to say that cells of
#' incompatible data types may be targeted, but there will be no attempt to
#' format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_engineering()` to obtain varying parameter values from a specified
#' column within the table. This means that each row could be formatted a little
#' bit differently. These arguments provide support for [from_column()]:
#'
#' - `decimals`
#' - `drop_trailing_zeros`
#' - `drop_trailing_dec_mark`
#' - `scale_by`
#' - `exp_style`
#' - `pattern`
#' - `sep_mark`
#' - `dec_mark`
#' - `force_sign_m`
#' - `force_sign_n`
#' - `locale`
#'
#' Please note that for all of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Adapting output to a specific `locale`:
#'
#' This formatting function can adapt outputs according to a provided `locale`
#' value. Examples include `"en"` for English (United States) and `"fr"` for
#' French (France). The use of a valid locale ID here means separator and
#' decimal marks will be correct for the given locale. Should any values be
#' provided in `sep_mark` or `dec_mark`, they will be overridden by the locale's
#' preferred values.
#'
#' Note that a `locale` value provided here will override any global locale
#' setting performed in [gt()]'s own `locale` argument (it is settable there as
#' a value received by all other functions that have a `locale` argument). As a
#' useful reference on which locales are supported, we can use the
#' [info_locales()] function to view an info table.
#'
#' @section Examples:
#'
#' Use the [`exibble`] dataset to create a **gt** table. Format the `num` column
#' to display values in engineering notation using the `fmt_engineering()`
#' function.
#'
#' ```r
#' exibble |>
#' gt() |>
#' fmt_engineering(columns = num)
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_engineering_1.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-4
#'
#' @section Function Introduced:
#' `v0.3.1` (August 9, 2021)
#'
#' @seealso The vector-formatting version of this function:
#' [vec_fmt_engineering()].
#'
#' @import rlang
#' @export
fmt_engineering <- function(
data,
columns = everything(),
rows = everything(),
decimals = 2,
drop_trailing_zeros = FALSE,
drop_trailing_dec_mark = TRUE,
scale_by = 1.0,
exp_style = "x10n",
pattern = "{x}",
sep_mark = ",",
dec_mark = ".",
force_sign_m = FALSE,
force_sign_n = FALSE,
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - decimals
# - drop_trailing_zeros
# - drop_trailing_dec_mark
# - scale_by
# - exp_style
# - pattern
# - sep_mark
# - dec_mark
# - force_sign_m
# - force_sign_n
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_engineering",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_engineering(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
decimals = p_i$decimals %||% decimals,
drop_trailing_zeros = p_i$drop_trailing_zeros %||% drop_trailing_zeros,
drop_trailing_dec_mark = p_i$drop_trailing_dec_mark %||% drop_trailing_dec_mark,
scale_by = p_i$scale_by %||% scale_by,
exp_style = p_i$exp_style %||% exp_style,
pattern = p_i$pattern %||% pattern,
sep_mark = p_i$sep_mark %||% sep_mark,
dec_mark = p_i$dec_mark %||% dec_mark,
force_sign_m = p_i$force_sign_m %||% force_sign_m,
force_sign_n = p_i$force_sign_n %||% force_sign_n,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Declare formatting function compatibility
compat <- c("numeric", "integer")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
# Set default values
suffixing <- FALSE
use_seps <- TRUE
# Use locale-based marks if a locale ID is provided
sep_mark <- get_locale_sep_mark(locale, sep_mark, use_seps)
dec_mark <- get_locale_dec_mark(locale, dec_mark)
# Normalize the `suffixing` input to either return a character vector
# of suffix labels, or NULL (the case where `suffixing` is FALSE)
suffix_labels <- normalize_suffixing_inputs(suffixing, scale_by, system = "intl")
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_engineering()` function can only be used on `columns`
with numeric data."
)
}
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
compat = compat,
fns = num_fmt_factory_multi(
pattern = pattern,
format_fn = function(x, context) {
# Define the marks by context
exp_marks <- context_exp_marks(context = context)
minus_mark <- context_minus_mark(context = context)
# Define the `replace_minus()` function
replace_minus <- function(x) {
tidy_gsub(x, "-", minus_mark, fixed = TRUE)
}
# Create the `suffix_df` object
suffix_df <-
create_suffix_df(
x,
decimals = decimals,
suffix_labels = suffix_labels,
scale_by = scale_by,
system = "intl"
)
# Scale the `x_vals` by the `scale_by` values
x <- scale_x_values(x, suffix_df$scale_by)
zero_x <- x == 0
negative_x <- x < 0
x_str_left <- x_str_right <- x_str <- character(length = length(x))
# Powers in engineering notation always in steps of 3; this
# calculation gets, for every value, the effective power value
power_3 <- floor(log(abs(x), base = 1000)) * 3
# Any zero values will return Inf from the previous calculation
# so we must replace these with a `0`
power_3[is.infinite(power_3)] <- 0L
# The numbers on the LHS must be scaled to correspond to the
# RHS 10^`power_level` values (i.e., `<LHS> x 10^(n * 3)`)
x <- x / 10^(power_3)
# With the scaled values for the LHS, format these according
# to the options set by the user
x_str_left <-
format_num_to_str(
x,
context = context,
decimals = decimals,
n_sigfig = NULL,
sep_mark = sep_mark,
dec_mark = dec_mark,
drop_trailing_zeros = drop_trailing_zeros,
drop_trailing_dec_mark = drop_trailing_dec_mark,
format = "f",
replace_minus_mark = FALSE
)
x_str_left <- replace_minus(x_str_left)
n_part <-
vapply(
power_3,
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (x > 0 && force_sign_n) {
out <- gsub("^", "+", x)
} else {
out <- as.character(x)
}
out
}
)
if (exp_style == "x10n") {
# Generate the RHS of the formatted value (i.e., the `x 10^(n * 3)`)
x_str_right <-
paste0(
exp_marks[1],
replace_minus(n_part),
exp_marks[2]
)
# Replace elements from `x_str_right` where exponent values
# are zero with empty strings
x_str_right[power_3 == 0] <- ""
# Paste the LHS and RHS components to generate the formatted values
x_str <- paste0(x_str_left, x_str_right)
} else {
exp_str <- context_exp_str(exp_style = exp_style, context = context)
if (grepl("^[a-zA-Z]{1}1$", exp_style)) {
n_min_width <- 1
} else {
n_min_width <- 2
}
# `power_3` must be padded to two decimal places
n_part <-
vapply(
power_3,
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (grepl("-", x)) {
x <- gsub("-", "", x)
x <- formatC(as.numeric(x), width = n_min_width, flag = "0")
x <- paste0("-", x)
} else {
x <- formatC(as.numeric(x), width = n_min_width, flag = "0")
}
x
}
)
if (force_sign_n) {
n_part <-
vapply(
seq_along(n_part),
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(i) {
if (power_3[i] >= 0) {
out <- gsub("^", "+", n_part[i])
} else {
out <- n_part[i]
}
out
}
)
}
x_str[!is.infinite(x)] <-
paste0(x_str_left[!is.infinite(x)], exp_str, replace_minus(n_part[!is.infinite(x)]))
x_str[is.infinite(x)] <- as.character(x[is.infinite(x)])
x_str <- replace_minus(x_str)
}
# Force a positive sign on certain values if the option is taken
if (force_sign_m) {
positive_x <- !is.na(x) & x > 0
x_str[positive_x] <- paste_left(x_str[positive_x], x_left = "+")
}
x_str
}
)
)
}
#' Format values to take a predefined symbol
#'
#' @inheritParams fmt_number
#' @inheritParams fmt_currency
#' @return An object of class `gt_tbl`.
#' @noRd
fmt_symbol <- function(
data,
columns,
rows,
symbol = "*",
accounting = FALSE,
decimals = NULL,
drop_trailing_zeros = FALSE,
drop_trailing_dec_mark = TRUE,
use_seps = TRUE,
scale_by = 1.0,
suffixing = FALSE,
pattern = "{x}",
sep_mark = ",",
dec_mark = ".",
force_sign = FALSE,
placement = "left",
incl_space = FALSE,
system = c("intl", "ind"),
locale = NULL
) {
# Ensure that arguments are matched
system <- rlang::arg_match(system)
# Use locale-based marks if a locale ID is provided
sep_mark <- get_locale_sep_mark(locale, sep_mark, use_seps)
dec_mark <- get_locale_dec_mark(locale, dec_mark)
# Normalize the `suffixing` input to either return a character vector
# of suffix labels, or NULL (the case where `suffixing` is FALSE)
suffix_labels <- normalize_suffixing_inputs(suffixing, scale_by, system = system)
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
compat = c("numeric", "integer"),
fns = num_fmt_factory_multi(
pattern = pattern,
format_fn = function(x, context) {
# Create the `x_str` vector
x_str <- character(length(x))
# Create the `suffix_df` object
suffix_df <-
create_suffix_df(
x,
decimals = decimals,
suffix_labels = suffix_labels,
scale_by = scale_by,
system = system
)
# Scale the `x_vals` by the `scale_by` value
x <- scale_x_values(x, suffix_df$scale_by)
is_negative_x <- x < 0
is_not_negative_x <- !is_negative_x
if (any(is_not_negative_x)) {
# Format numeric values to character-based numbers
x_str[is_not_negative_x] <-
format_num_to_str_c(
x[is_not_negative_x],
context = context,
decimals = decimals,
n_sigfig = NULL,
sep_mark = sep_mark,
dec_mark = dec_mark,
drop_trailing_zeros = drop_trailing_zeros,
drop_trailing_dec_mark = drop_trailing_dec_mark,
system = system
)
}
x_abs_str <- x_str
if (any(is_negative_x)) {
# Format numeric values to character-based numbers
x_abs_str[is_negative_x] <-
format_num_to_str_c(
abs(x[is_negative_x]),
context = context,
decimals = decimals,
n_sigfig = NULL,
sep_mark = sep_mark,
dec_mark = dec_mark,
drop_trailing_zeros = drop_trailing_zeros,
drop_trailing_dec_mark = drop_trailing_dec_mark,
system = system
)
}
# If we supply a per mille or per myriad keyword as
# `symbol` (possible inputs in `fmt_partsper()`),
# get the contextually correct mark
if (is.character(symbol)) {
if (symbol == "per-mille") {
symbol <- I(context_permille_mark(context = context))
} else if (symbol == "per-myriad") {
symbol <- I(context_permyriad_mark(context = context))
}
}
# Format values with a symbol string
x_str <-
format_symbol_str(
x_abs_str = x_abs_str,
x = x,
context = context,
symbol = symbol,
incl_space = incl_space,
placement = placement
)
# Format values in accounting notation (if `accounting = TRUE`)
x_str <-
format_as_accounting(
x_str,
x = x,
context = context,
accounting = accounting
)
# Paste the vector of suffixes to the right of the values
x_str <- paste_right(x_str, x_right = suffix_df$suffix)
# Force a positive sign on certain values if the option is taken
if (!accounting && force_sign) {
positive_x <- !is.na(x) & x > 0
x_str[positive_x] <- paste_left(x_str[positive_x], x_left = "+")
}
x_str
}
)
)
}
#' Format values as a percentage
#'
#' @description
#'
#' With numeric values in a **gt** table, we can perform percentage-based
#' formatting. It is assumed the input numeric values are proportional values
#' and, in this case, the values will be automatically multiplied by `100`
#' before decorating with a percent sign (the other case is accommodated though
#' setting the `scale_values` to `FALSE`). For more control over percentage
#' formatting, we can use the following options:
#'
#' - percent sign placement: the percent sign can be placed after or
#' before the values and a space can be inserted between the symbol and the
#' value.
#' - decimals: choice of the number of decimal places, option to drop
#' trailing zeros, and a choice of the decimal symbol
#' - digit grouping separators: options to enable/disable digit separators
#' and provide a choice of separator symbol
#' - value scaling toggle: choose to disable automatic value scaling in the
#' situation that values are already scaled coming in (and just require the
#' percent symbol)
#' - pattern: option to use a text pattern for decoration of the formatted
#' values
#' - locale-based formatting: providing a locale ID will result in number
#' formatting specific to the chosen locale
#'
#' @inheritParams fmt_number
#'
#' @param scale_values *Multiply input values by 100*
#'
#' `scalar<logical>` // *default:* `TRUE`
#'
#' Should the values be scaled through multiplication by 100? By default this
#' scaling is performed since the expectation is that incoming values are
#' usually proportional. Setting to `FALSE` signifies that the values are
#' already scaled and require only the percent sign when formatted.
#'
#' @param incl_space *Include a space between the value and the % sign*
#'
#' `scalar<logical>` // *default:* `FALSE`
#'
#' An option for whether to include a space between the value and the percent
#' sign. The default is to not introduce a space character.
#'
#' @param placement *Percent sign placement*
#'
#' `scalar<character>` // *default:* `"right"`
#'
#' This option governs the placement of the percent sign. This can be either
#' be `right` (the default) or `left`.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_percent()` formatting function is compatible with body cells that
#' are of the `"numeric"` or `"integer"` types. Any other types of body cells
#' are ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_percent()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `decimals`
#' - `drop_trailing_zeros`
#' - `drop_trailing_dec_mark`
#' - `scale_values`
#' - `use_seps`
#' - `accounting`
#' - `pattern`
#' - `sep_mark`
#' - `dec_mark`
#' - `force_sign`
#' - `incl_space`
#' - `placement`
#' - `system`
#' - `locale`
#'
#' Please note that for all of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Adapting output to a specific `locale`:
#'
#' This formatting function can adapt outputs according to a provided `locale`
#' value. Examples include `"en"` for English (United States) and `"fr"` for
#' French (France). The use of a valid locale ID here means separator and
#' decimal marks will be correct for the given locale. Should any values be
#' provided in `sep_mark` or `dec_mark`, they will be overridden by the locale's
#' preferred values.
#'
#' Note that a `locale` value provided here will override any global locale
#' setting performed in [gt()]'s own `locale` argument (it is settable there as
#' a value received by all other functions that have a `locale` argument). As a
#' useful reference on which locales are supported, we can use the
#' [info_locales()] function to view an info table.
#'
#' @section Examples:
#'
#' Use a summarized version of the [`pizzaplace`] dataset to create a **gt**
#' table. With the `fmt_percent()` function, we can format the `frac_of_quota`
#' column to display values as percentages (to one decimal place).
#'
#' ```r
#' pizzaplace |>
#' dplyr::mutate(month = as.numeric(substr(date, 6, 7))) |>
#' dplyr::group_by(month) |>
#' dplyr::summarize(pizzas_sold = dplyr::n()) |>
#' dplyr::ungroup() |>
#' dplyr::mutate(frac_of_quota = pizzas_sold / 4000) |>
#' gt(rowname_col = "month") |>
#' fmt_percent(
#' columns = frac_of_quota,
#' decimals = 1
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_percent_1.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-5
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @seealso The vector-formatting version of this function: [vec_fmt_percent()].
#'
#' @import rlang
#' @export
fmt_percent <- function(
data,
columns = everything(),
rows = everything(),
decimals = 2,
drop_trailing_zeros = FALSE,
drop_trailing_dec_mark = TRUE,
scale_values = TRUE,
use_seps = TRUE,
accounting = FALSE,
pattern = "{x}",
sep_mark = ",",
dec_mark = ".",
force_sign = FALSE,
incl_space = FALSE,
placement = "right",
system = c("intl", "ind"),
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - decimals
# - drop_trailing_zeros
# - drop_trailing_dec_mark
# - scale_values
# - use_seps
# - accounting
# - pattern
# - sep_mark
# - dec_mark
# - force_sign
# - incl_space
# - placement
# - system
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_percent",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_percent(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
decimals = p_i$decimals %||% decimals,
drop_trailing_zeros = p_i$drop_trailing_zeros %||% drop_trailing_zeros,
drop_trailing_dec_mark = p_i$drop_trailing_dec_mark %||% drop_trailing_dec_mark,
scale_values = p_i$scale_values %||% scale_values,
use_seps = p_i$use_seps %||% use_seps,
accounting = p_i$accounting %||% accounting,
pattern = p_i$pattern %||% pattern,
sep_mark = p_i$sep_mark %||% sep_mark,
dec_mark = p_i$dec_mark %||% dec_mark,
force_sign = p_i$force_sign %||% force_sign,
incl_space = p_i$incl_space %||% incl_space,
placement = p_i$placement %||% placement,
system = p_i$system %||% system,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Ensure that arguments are matched
system <- rlang::arg_match(system)
# Declare formatting function compatibility
compat <- c("numeric", "integer")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_percent()` function can only be used on `columns`
with numeric data."
)
}
}
if (scale_values) {
scale_by <- 100
} else {
scale_by <- 1.0
}
# Pass `data`, `columns`, `rows`, and other options to `fmt_symbol()`
fmt_symbol(
data = data,
columns = {{ columns }},
rows = {{ rows }},
symbol = "%",
accounting = accounting,
decimals = decimals,
drop_trailing_zeros = drop_trailing_zeros,
drop_trailing_dec_mark = drop_trailing_dec_mark,
use_seps = use_seps,
scale_by = scale_by,
suffixing = FALSE,
pattern = pattern,
sep_mark = sep_mark,
dec_mark = dec_mark,
force_sign = force_sign,
placement = placement,
incl_space = incl_space,
system = system,
locale = locale
)
}
#' Format values as parts-per quantities
#'
#' @description
#'
#' With numeric values in a **gt** table we can format the values so that they
#' are rendered as *per mille*, *ppm*, *ppb*, etc., quantities. The following
#' list of keywords (with associated naming and scaling factors) is available to
#' use within `fmt_partsper()`:
#'
#' - `"per-mille"`: Per mille, (1 part in `1,000`)
#' - `"per-myriad"`: Per myriad, (1 part in `10,000`)
#' - `"pcm"`: Per cent mille (1 part in `100,000`)
#' - `"ppm"`: Parts per million, (1 part in `1,000,000`)
#' - `"ppb"`: Parts per billion, (1 part in `1,000,000,000`)
#' - `"ppt"`: Parts per trillion, (1 part in `1,000,000,000,000`)
#' - `"ppq"`: Parts per quadrillion, (1 part in `1,000,000,000,000,000`)
#'
#' The function provides a lot of formatting control and we can use the
#' following options:
#'
#' - custom symbol/units: we can override the automatic symbol or units display
#' with our own choice as the situation warrants
#' - decimals: choice of the number of decimal places, option to drop
#' trailing zeros, and a choice of the decimal symbol
#' - digit grouping separators: options to enable/disable digit separators
#' and provide a choice of separator symbol
#' - value scaling toggle: choose to disable automatic value scaling in the
#' situation that values are already scaled coming in (and just require the
#' appropriate symbol or unit display)
#' - pattern: option to use a text pattern for decoration of the formatted
#' values
#' - locale-based formatting: providing a locale ID will result in number
#' formatting specific to the chosen locale
#'
#' @inheritParams fmt_number
#'
#' @param to_units *Output Quantity*
#'
#' `singl-kw:[per-mille|per-myriad|pcm|ppm|ppb|ppt|ppq]` // *default:* `"per-mille"`
#'
#' A keyword that signifies the desired output quantity. This can be any from
#' the following set: `"per-mille"`, `"per-myriad"`, `"pcm"`, `"ppm"`,
#' `"ppb"`, `"ppt"`, or `"ppq"`.
#'
#' @param symbol *Symbol or units to use in output display*
#'
#' `scalar<character>` // *default:* `"auto"`
#'
#' The symbol/units to use for the quantity. By default, this is set to
#' `"auto"` and **gt** will choose the appropriate symbol based on the
#' `to_units` keyword and the output context. However, this can be changed by
#' supplying a string (e.g, using `symbol = "ppbV"` when `to_units = "ppb"`).
#'
#' @param scale_values *Scale input values accordingly*
#'
#' `scalar<logical>` // *default:* `TRUE`
#'
#' Should the values be scaled through multiplication according to the keyword
#' set in `to_units`? By default this is `TRUE` since the expectation is that
#' normally values are proportions. Setting to `FALSE` signifies that the
#' values are already scaled and require only the appropriate symbol/units
#' when formatted.
#'
#' @param incl_space *Include a space between the value and the symbol/units*
#'
#' `scalar<character>|scalar<logical>` // *default:* `"auto"`
#'
#' An option for whether to include a space between the value and the
#' symbol/units. The default is `"auto"` which provides spacing dependent on
#' the mark itself. This can be directly controlled by using either `TRUE` or
#' `FALSE`.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_partsper()` formatting function is compatible with body cells that
#' are of the `"numeric"` or `"integer"` types. Any other types of body cells
#' are ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_partsper()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `to_units`
#' - `symbol`
#' - `decimals`
#' - `drop_trailing_zeros`
#' - `drop_trailing_dec_mark`
#' - `scale_values`
#' - `use_seps`
#' - `pattern`
#' - `sep_mark`
#' - `dec_mark`
#' - `force_sign`
#' - `incl_space`
#' - `system`
#' - `locale`
#'
#' Please note that for all of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Adapting output to a specific `locale`:
#'
#' This formatting function can adapt outputs according to a provided `locale`
#' value. Examples include `"en"` for English (United States) and `"fr"` for
#' French (France). The use of a valid locale ID here means separator and
#' decimal marks will be correct for the given locale. Should any values be
#' provided in `sep_mark` or `dec_mark`, they will be overridden by the locale's
#' preferred values.
#'
#' Note that a `locale` value provided here will override any global locale
#' setting performed in [gt()]'s own `locale` argument (it is settable there as
#' a value received by all other functions that have a `locale` argument). As a
#' useful reference on which locales are supported, we can use the
#' [info_locales()] function to view an info table.
#'
#' @section Examples:
#'
#' Create a tibble of small numeric values and generate a **gt** table. Format
#' the `a` column to appear in scientific notation with [fmt_scientific()] and
#' format the `b` column as *per mille* values with `fmt_partsper()`.
#'
#' ```r
#' dplyr::tibble(x = 0:-5, a = 10^(0:-5), b = a) |>
#' gt(rowname_col = "x") |>
#' fmt_scientific(a, decimals = 0) |>
#' fmt_partsper(
#' columns = b,
#' to_units = "per-mille"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_partsper_1.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-6
#'
#' @section Function Introduced:
#' `v0.6.0` (May 24, 2022)
#'
#' @seealso The vector-formatting version of this function:
#' [vec_fmt_partsper()].
#'
#' @import rlang
#' @export
fmt_partsper <- function(
data,
columns = everything(),
rows = everything(),
to_units = c("per-mille", "per-myriad", "pcm", "ppm", "ppb", "ppt", "ppq"),
symbol = "auto",
decimals = 2,
drop_trailing_zeros = FALSE,
drop_trailing_dec_mark = TRUE,
scale_values = TRUE,
use_seps = TRUE,
pattern = "{x}",
sep_mark = ",",
dec_mark = ".",
force_sign = FALSE,
incl_space = "auto",
system = c("intl", "ind"),
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - to_units
# - symbol
# - decimals
# - drop_trailing_zeros
# - drop_trailing_dec_mark
# - scale_values
# - use_seps
# - pattern
# - sep_mark
# - dec_mark
# - force_sign
# - incl_space
# - system
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_partsper",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_partsper(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
to_units = p_i$to_units %||% to_units,
symbol = p_i$symbol %||% symbol,
decimals = p_i$decimals %||% decimals,
drop_trailing_zeros = p_i$drop_trailing_zeros %||% drop_trailing_zeros,
drop_trailing_dec_mark = p_i$drop_trailing_dec_mark %||% drop_trailing_dec_mark,
scale_values = p_i$scale_values %||% scale_values,
use_seps = p_i$use_seps %||% use_seps,
pattern = p_i$pattern %||% pattern,
sep_mark = p_i$sep_mark %||% sep_mark,
dec_mark = p_i$dec_mark %||% dec_mark,
force_sign = p_i$force_sign %||% force_sign,
incl_space = p_i$incl_space %||% incl_space,
system = p_i$system %||% system,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Ensure that arguments are matched
to_units <- rlang::arg_match(to_units)
system <- rlang::arg_match(system)
# Declare formatting function compatibility
compat <- c("numeric", "integer")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_partsper()` function can only be used on `columns`
with numeric data."
)
}
}
# Scale values according to `to_units` value
if (scale_values) {
scale_by <-
switch(
to_units,
`per-mille` = 1E3,
`per-myriad` = 1E4,
pcm = 1E5,
ppm = 1E6,
ppb = 1E9,
ppt = 1E12,
ppq = 1E15,
)
} else {
scale_by <- 1.0
}
if (symbol == "auto") {
symbol <-
switch(
to_units,
`per-mille` = "per-mille",
`per-myriad` = "per-myriad",
pcm = "pcm",
ppm = "ppm",
ppb = "ppb",
ppt = "ppt",
ppq = "ppq"
)
}
if (incl_space == "auto") {
incl_space <-
switch(
to_units,
`per-mille` = ,
`per-myriad` = FALSE,
pcm = ,
ppm = ,
ppb = ,
ppt = ,
ppq = TRUE
)
}
# Pass `data`, `columns`, `rows`, and other options to `fmt_symbol()`
fmt_symbol(
data = data,
columns = {{ columns }},
rows = {{ rows }},
symbol = symbol,
accounting = FALSE,
decimals = decimals,
drop_trailing_zeros = drop_trailing_zeros,
drop_trailing_dec_mark = drop_trailing_dec_mark,
use_seps = use_seps,
scale_by = scale_by,
suffixing = FALSE,
pattern = pattern,
sep_mark = sep_mark,
dec_mark = dec_mark,
force_sign = force_sign,
placement = "right",
incl_space = incl_space,
system = system,
locale = locale
)
}
#' Format values as mixed fractions
#'
#' @description
#'
#' With numeric values in a **gt** table, we can perform mixed-fraction-based
#' formatting. There are several options for setting the accuracy of the
#' fractions. Furthermore, there is an option for choosing a layout (i.e.,
#' typesetting style) for the mixed-fraction output.
#'
#' The following options are available for controlling this type of formatting:
#'
#' - accuracy: how to express the fractional part of the mixed fractions; there
#' are three keyword options for this and an allowance for arbitrary denominator
#' settings
#' - simplification: an option to simplify fractions whenever possible
#' - layout: We can choose to output values with diagonal or inline fractions
#' - digit grouping separators: options to enable/disable digit separators
#' and provide a choice of separator symbol for the whole number portion
#' - pattern: option to use a text pattern for decoration of the formatted
#' mixed fractions
#' - locale-based formatting: providing a locale ID will result in number
#' formatting specific to the chosen locale
#'
#' @inheritParams fmt_number
#'
#' @param accuracy *Accuracy of fractions*
#'
#' `singl-kw:[low|med|high]|scalar<numeric|integer>(val>=1)` // *default:* `"low"`
#'
#' The type of fractions to generate. This can either be one of the keywords
#' `"low"`, `"med"`, or `"high"` (to generate fractions with denominators of
#' up to 1, 2, or 3 digits, respectively) or an integer value greater than
#' zero to obtain fractions with a fixed denominator (`2` yields halves, `3`
#' is for thirds, `4` is quarters, etc.). For the latter option, using
#' `simplify = TRUE` will simplify fractions where possible (e.g., `2/4` will
#' be simplified as `1/2`). By default, the `"low"` option is used.
#'
#' @param simplify *Simplify the fraction*
#'
#' `scalar<logical>` // *default:* `TRUE`
#'
#' If choosing to provide a numeric value for `accuracy`, the option to
#' simplify the fraction (where possible) can be taken with `TRUE` (the
#' default). With `FALSE`, denominators in fractions will be fixed to the
#' value provided in `accuracy`.
#'
#' @param layout *Layout of fractions in HTML output*
#'
#' `singl-kw:[inline|diagonal]` // *default:* `"inline"`
#'
#' For HTML output, the `"inline"` layout is the default. This layout places
#' the numerals of the fraction on the baseline and uses a standard slash
#' character. The `"diagonal"` layout will generate fractions that are typeset
#' with raised/lowered numerals and a virgule.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_fraction()` formatting function is compatible with body cells that
#' are of the `"numeric"` or `"integer"` types. Any other types of body cells
#' are ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_fraction()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `accuracy`
#' - `simplify`
#' - `layout`
#' - `use_seps`
#' - `pattern`
#' - `sep_mark`
#' - `system`
#' - `locale`
#'
#' Please note that for all of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Adapting output to a specific `locale`:
#'
#' This formatting function can adapt outputs according to a provided `locale`
#' value. Examples include `"en"` for English (United States) and `"fr"` for
#' French (France). The use of a valid locale ID here means separator and
#' decimal marks will be correct for the given locale. Should any value be
#' provided in `sep_mark`, it will be overridden by the locale's preferred
#' values.
#'
#' Note that a `locale` value provided here will override any global locale
#' setting performed in [gt()]'s own `locale` argument (it is settable there as
#' a value received by all other functions that have a `locale` argument). As a
#' useful reference on which locales are supported, we can use the
#' [info_locales()] function to view an info table.
#'
#' @section Examples:
#'
#' Using a summarized version of the [`pizzaplace`] dataset, let's create a
#' **gt** table. With the `fmt_fraction()` function we can format the `f_sold`
#' and `f_income` columns to display fractions. As for how the fractions are
#' represented, we are electing to use `accuracy = 10`. This gives all fractions
#' as tenths. We won't simplify the fractions (by using `simplify = FALSE`) and
#' this means that a fraction like `5/10` won't become `1/2`. With `layout =
#' "diagonal"`, we get a diagonal display of all fractions.
#'
#' ```r
#' pizzaplace |>
#' dplyr::group_by(type, size) |>
#' dplyr::summarize(
#' sold = dplyr::n(),
#' income = sum(price),
#' .groups = "drop_last"
#' ) |>
#' dplyr::group_by(type) |>
#' dplyr::mutate(
#' f_sold = sold / sum(sold),
#' f_income = income / sum(income),
#' ) |>
#' dplyr::arrange(type, dplyr::desc(income)) |>
#' gt(rowname_col = "size") |>
#' tab_header(
#' title = "Pizzas Sold in 2015",
#' subtitle = "Fraction of Sell Count and Revenue by Size per Type"
#' ) |>
#' fmt_integer(columns = sold) |>
#' fmt_currency(columns = income) |>
#' fmt_fraction(
#' columns = starts_with("f_"),
#' accuracy = 10,
#' simplify = FALSE,
#' layout = "diagonal"
#' ) |>
#' sub_missing(missing_text = "") |>
#' tab_spanner(
#' label = "Sold",
#' columns = contains("sold")
#' ) |>
#' tab_spanner(
#' label = "Revenue",
#' columns = contains("income")
#' ) |>
#' text_transform(
#' locations = cells_body(),
#' fn = function(x) {
#' dplyr::case_when(
#' x == 0 ~ "<em>nil</em>",
#' x != 0 ~ x
#' )
#' }
#' ) |>
#' cols_label(
#' sold = "Amount",
#' income = "Amount",
#' f_sold = md("_f_"),
#' f_income = md("_f_")
#' ) |>
#' cols_align(align = "center", columns = starts_with("f")) |>
#' tab_options(
#' table.width = px(400),
#' row_group.as_column = TRUE
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_fraction_1.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-7
#'
#' @section Function Introduced:
#' `v0.4.0` (February 15, 2022)
#'
#' @seealso The vector-formatting version of this function:
#' [vec_fmt_fraction()].
#'
#' @import rlang
#' @export
fmt_fraction <- function(
data,
columns = everything(),
rows = everything(),
accuracy = NULL,
simplify = TRUE,
layout = c("inline", "diagonal"),
use_seps = TRUE,
pattern = "{x}",
sep_mark = ",",
system = c("intl", "ind"),
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - accuracy
# - simplify
# - layout
# - use_seps
# - pattern
# - sep_mark
# - system
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_fraction",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_fraction(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
accuracy = p_i$accuracy %||% accuracy,
simplify = p_i$simplify %||% simplify,
layout = p_i$layout %||% layout,
use_seps = p_i$use_seps %||% use_seps,
pattern = p_i$pattern %||% pattern,
sep_mark = p_i$sep_mark %||% sep_mark,
system = p_i$system %||% system,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Ensure that arguments are matched
system <- rlang::arg_match(system)
layout <- rlang::arg_match(layout)
# Declare formatting function compatibility
compat <- c("numeric", "integer")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
if (is.null(accuracy)) {
accuracy <- "low"
} else {
if (is.character(accuracy)) {
rlang::arg_match0(accuracy, c("low", "med", "high"))
} else if (is.numeric(accuracy)) {
if (accuracy < 1) {
cli::cli_abort(c(
"The numeric value supplied for `accuracy` is invalid.",
"*" = "Must be an integer value greater than zero."
))
}
} else {
cli::cli_abort(c(
"The input for `accuracy` is invalid.",
"*" = "Must be a keyword \"low\", \"med\", or \"high\", or",
"*" = "Must be an integer value greater than zero."
))
}
}
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"{.fn fmt_fraction} must be used on `columns` with numeric data."
)
}
}
# Use locale-based `sep_mark` if a locale ID is provided
sep_mark <- get_locale_sep_mark(locale, sep_mark, use_seps)
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
compat = compat,
fns = num_fmt_factory_multi(
pattern = pattern,
format_fn = function(x, context) {
# Get the correct minus mark based on the output context
minus_mark <- context_minus_mark(context = context)
# Generate an vector of empty strings that will eventually contain
# all of the fractional parts of the finalized numbers
fraction_x <- rep("", length(x))
# Round all values of x to 3 digits with the R-H-U method of
# rounding (for reproducibility purposes)
x <- round_gt(x, 3)
# Determine which of `x` are finite values
x_is_a_number <- is.finite(x)
# Divide the `x` values in 'big' and 'small' components; delay the
# formatting of `big_x` until it is appropriately rounded on the
# basis of the fractions obtained at the desired accuracy
big_x <- trunc(x)
small_x <- abs(x - big_x)
if (is.numeric(accuracy)) {
fraction_x[x_is_a_number] <-
make_frac(
x[x_is_a_number],
denom = accuracy,
simplify = simplify
)
} else {
# Format the 'small' portion of the numeric values
# to character-based numbers with exactly 3 decimal places
small_x_str <- as.character(small_x)
small_x_str[x_is_a_number] <-
format_num_to_str(
small_x[x_is_a_number],
context = context, decimals = 3, n_sigfig = NULL,
sep_mark = ",", dec_mark = ".",
drop_trailing_zeros = FALSE,
drop_trailing_dec_mark = TRUE,
format = "f"
)
# For every `small_x` value that corresponds to a number
# (i.e., not Inf), get the fractional part from the `fractions`
# lookup table
fraction_x[x_is_a_number] <-
fractions[(as.numeric(small_x_str[x_is_a_number]) * 1000) + 1, accuracy, drop = TRUE]
}
# Round up or down the `big_x` values when necessary; values
# of exactly "1" indicate a requirement for rounding and this
# is a two-pass operation to handle positive and then negative
# values of `big_x`
big_x[big_x >= 0 & fraction_x == "1"] <-
big_x[big_x >= 0 & fraction_x == "1"] + 1
big_x[big_x <= 0 & fraction_x == "1"] <-
big_x[big_x <= 0 & fraction_x == "1"] - 1
# Remove whole number values from `fraction_x`; they were only
# needed for rounding guidance and they signal the lack of a
# fractional part
fraction_x[fraction_x %in% c("0", "1")] <- ""
# Format the 'big' portion of the numeric values
# to character-based numbers
big_x <-
format_num_to_str(
big_x,
context = context,
decimals = 0,
n_sigfig = NULL,
sep_mark = sep_mark,
dec_mark = "",
drop_trailing_zeros = TRUE,
drop_trailing_dec_mark = TRUE,
format = "f",
system = system
)
# Initialize a vector that will contain the finalized strings
x_str <- character(length(x))
# Generate the mixed fractions by pasting `big_x` and `small_x`
# while ensuring there is a single space between these components
x_str[x_is_a_number] <-
paste(
big_x[x_is_a_number],
fraction_x[x_is_a_number],
sep = " "
)
# Trim any whitespace
x_str <- gsub("(^ | $)", "", x_str)
# Eliminate the display of leading zeros in mixed fractions
x_str <- gsub("^0\\s+?", "", x_str)
# There are situations where small fractions (not mixed) require
# a minus mark; these conditions are specific so we need to ascertain
# which values in `x_str` require this and then apply the mark to
# the targets
x_is_negative <- x < 0
x_is_zero <- x_str == "0"
x_has_minus_mark <- grepl(minus_mark, big_x)
x_needs_minus_mark <- x_is_negative & !x_is_zero & !x_has_minus_mark
x_str[x_needs_minus_mark] <- paste0(minus_mark, x_str[x_needs_minus_mark])
# Generate diagonal fractions if the `layout = "diagonal"` option was chosen
if (layout == "diagonal") {
has_a_fraction <- grepl("/", x_str)
non_fraction_part <- gsub("^(.*?)[0-9]*/[0-9]*", "\\1", x_str[has_a_fraction])
fraction_part <- gsub("^(.*?)([0-9]*/[0-9]*)", "\\2", x_str[has_a_fraction])
num_vec <- unlist(lapply(strsplit(fraction_part, "/"), `[[`, 1))
denom_vec <- unlist(lapply(strsplit(fraction_part, "/"), `[[`, 2))
if (context == "html") {
narrow_no_break_space_char <- "\U0202F"
slash_mark_char <- "\U02044"
num_vec <-
paste0(
"<span style=\"",
"font-size:0.6em;",
"line-height:0.6em;",
"vertical-align:0.45em;",
"\">",
num_vec,
"</span>"
)
denom_vec <-
paste0(
"<span style=\"",
"font-size:0.6em;",
"line-height:0.6em;",
"vertical-align:-0.05em;",
"\">",
denom_vec,
"</span>"
)
slash_mark <-
paste0(
"<span style=\"",
"font-size:0.7em;",
"line-height:0.7em;",
"vertical-align:0.15em;",
"\">",
slash_mark_char,
"</span>"
)
x_str[has_a_fraction] <-
paste0(
gsub(" ", narrow_no_break_space_char, non_fraction_part),
num_vec, slash_mark, denom_vec
)
} else if (context == "latex") {
x_str[has_a_fraction] <-
paste0(
gsub(" ", "\\\\, ", non_fraction_part),
paste0("{{}^{", num_vec, "}\\!/_{", denom_vec, "}}")
)
} else if (context == "rtf") {
x_str[has_a_fraction] <-
paste0(
gsub(" ", "", non_fraction_part),
paste0("{\\super ", num_vec, "}/{\\sub ", denom_vec, "}")
)
}
}
# For the `layout = "inline"` option, LaTeX outputs in math mode
# disregard space characters so the `\ ` spacing command must used
if (layout == "inline" && context == "latex") {
x_str <- gsub(" ", "\\\\ ", x_str)
}
# In rare cases that Inf or -Inf appear, ensure that these
# special values are printed correctly
x_str[is.infinite(x)] <- x[is.infinite(x)]
x_str
}
)
)
}
gcd <- function(x,y) {
r <- x %% y
return(ifelse(r, gcd(y, r), y))
}
make_frac <- function(x, denom, simplify = TRUE) {
big_x <- trunc(x)
small_x <- abs(x - big_x)
numer <- round_gt(small_x * denom)
if (simplify) {
denom <- rep_len(denom, length(x))
factor <- gcd(numer, denom)
numer <- numer / factor
denom <- denom / factor
}
ifelse(
numer == denom, "1",
ifelse(
numer == 0, "0",
paste0(
format_num_to_str(
numer,
context = "plain",
decimals = 0,
n_sigfig = NULL,
sep_mark = "",
dec_mark = ".",
drop_trailing_zeros = TRUE,
drop_trailing_dec_mark = TRUE,
format = "f"
),
"/",
format_num_to_str(
denom,
context = "plain",
decimals = 0,
n_sigfig = NULL,
sep_mark = "",
dec_mark = ".",
drop_trailing_zeros = TRUE,
drop_trailing_dec_mark = TRUE,
format = "f"
)
)
)
)
}
# The `round_gt()` function is used in gt over `base::round()` for consistency
# in rounding across R versions; it uses the 'Round-Half-Up' (R-H-U) algorithm,
# which is *not* used in R >= 4.0
round_gt <- function(x, digits = 0) {
x_sign <- sign(x)
z <- abs(x) * 10^digits
z <- 0.5 + z + sqrt(.Machine$double.eps)
z <- trunc(z)
z <- z / 10^digits
z * x_sign
}
#' Format values as currencies
#'
#' @description
#'
#' With numeric values in a **gt** table, we can perform currency-based
#' formatting with the `fmt_currency()` function. The function supports both
#' automatic formatting with either a three-letter or a numeric currency code.
#' We can also specify a custom currency that is formatted according to one or
#' more output contexts with the [currency()] helper function. We have fine
#' control over the conversion from numeric values to currency values, where we
#' could take advantage of the following options:
#'
#' - the currency: providing a currency code or common currency name will
#' procure the correct currency symbol and number of currency subunits; we could
#' also use the [currency()] helper function to specify a custom currency
#' - currency symbol placement: the currency symbol can be placed before
#' or after the values
#' - decimals/subunits: choice of the number of decimal places, and a
#' choice of the decimal symbol, and an option on whether to include or exclude
#' the currency subunits (the decimal portion)
#' - negative values: choice of a negative sign or parentheses for values
#' less than zero
#' - digit grouping separators: options to enable/disable digit separators
#' and provide a choice of separator symbol
#' - scaling: we can choose to scale targeted values by a multiplier value
#' - large-number suffixing: larger figures (thousands, millions, etc.) can
#' be autoscaled and decorated with the appropriate suffixes
#' - pattern: option to use a text pattern for decoration of the formatted
#' currency values
#' - locale-based formatting: providing a locale ID will result in currency
#' formatting specific to the chosen locale; it will also retrieve the locale's
#' currency if none is explicitly given
#'
#' We can use the [info_currencies()] function for a useful reference on all of
#' the possible inputs to the `currency` argument.
#'
#' @inheritParams fmt_number
#'
#' @param currency *Currency to use*
#'
#' `scalar<character>|obj:<gt_currency>` // *default:* `NULL` (`optional`)
#'
#' The currency to use for the numeric value. This input can be
#' supplied as a 3-letter currency code (e.g., `"USD"` for U.S. Dollars,
#' `"EUR"` for the Euro currency). Use [info_currencies()] to get an
#' information table with all of the valid currency codes and examples of
#' each. Alternatively, we can provide a common currency name (e.g.,
#' `"dollar"`, `"pound"`, `"yen"`, etc.) to simplify the process. Use
#' [info_currencies()] with the `type == "symbol"` option to view an
#' information table with all of the supported currency symbol names along
#' with examples.
#'
#' We can also use the [currency()] helper function to specify a custom
#' currency, where the string could vary across output contexts. For example,
#' using `currency(html = "ƒ", default = "f")` would give us a suitable
#' glyph for the Dutch guilder in an HTML output table, and it would simply be
#' the letter "f" in all other output contexts). Please note that `decimals`
#' will default to `2` when using the [currency()] helper function.
#'
#' If nothing is provided here but a `locale` value has been set (either in
#' this function call or as part of the initial [gt()] call), the currency
#' will be obtained from that locale. Virtually all locales are linked to a
#' territory that is a country (use [info_locales()] for details on all
#' locales used in this package), so, the in-use (or *de facto*) currency will
#' be obtained. As the default locale is `"en"`, the `"USD"` currency will be
#' used if neither a `locale` nor a `currency` value is given.
#'
#' @param use_subunits *Show or hide currency subunits*
#'
#' `scalar<logical>` // *default:* `TRUE`
#'
#' An option for whether the subunits portion of a currency value should be
#' displayed. For example, with an input value of `273.81`, the default
#' formatting will produce `"$273.81"`. Removing the subunits (with
#' `use_subunits = FALSE`) will give us `"$273"`.
#'
#' @param placement *Currency symbol placement*
#'
#' `scalar<character>` // *default:* `"left"`
#'
#' The placement of the currency symbol. This can be either be `left` (as
#' in `"$450"`) or `right` (which yields `"450$"`).
#'
#' @param incl_space *Include a space between the value and the currency symbol*
#'
#' `scalar<logical>` // *default:* `FALSE`
#'
#' An option for whether to include a space between the value and the currency
#' symbol. The default is to not introduce a space character.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_currency()` formatting function is compatible with body cells that
#' are of the `"numeric"` or `"integer"` types. Any other types of body cells
#' are ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_currency()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `currency`
#' - `use_subunits`
#' - `decimals`
#' - `drop_trailing_dec_mark`
#' - `use_seps`
#' - `accounting`
#' - `scale_by`
#' - `suffixing`
#' - `pattern`
#' - `sep_mark`
#' - `dec_mark`
#' - `force_sign`
#' - `placement`
#' - `incl_space`
#' - `system`
#' - `locale`
#'
#' Please note that for all of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Adapting output to a specific `locale`:
#'
#' This formatting function can adapt outputs according to a provided `locale`
#' value. Examples include `"en"` for English (United States) and `"fr"` for
#' French (France). The use of a locale ID here means separator and
#' decimal marks will be correct for the given locale. Should any values be
#' provided in `sep_mark` or `dec_mark`, they will be overridden by the locale's
#' preferred values. In addition to number formatting, providing a `locale`
#' value and not providing a `currency` allows **gt** to obtain the currency
#' code from the locale's territory.
#'
#' Note that a `locale` value provided here will override any global locale
#' setting performed in [gt()]'s own `locale` argument (it is settable there as
#' a value received by all other functions that have a `locale` argument). As a
#' useful reference on which locales are supported, we can use the
#' [info_locales()] function to view an info table.
#'
#' @section Examples:
#'
#' Let's make a simple **gt** table from the [`exibble`] dataset. We'll keep
#' only the `num` and `currency`, columns, then, format those columns using
#' `fmt_currency()` (with the `"JPY"` and `"GBP"` currencies).
#'
#' ```r
#' exibble |>
#' dplyr::select(num, currency) |>
#' gt() |>
#' fmt_currency(
#' columns = num,
#' currency = "JPY"
#' ) |>
#' fmt_currency(
#' columns = currency,
#' currency = "GBP"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_currency_1.png")`
#' }}
#'
#' Let's take a single column from [`exibble`] (`currency`) and format it with a
#' currency name (this differs from the 3-letter currency code). In this case,
#' we'll use the `"euro"` currency and set the placement of the symbol to the
#' right of any value. Additionally, the currency symbol will separated from the
#' value with a single space character (using `incl_space = TRUE`).
#'
#' ```r
#' exibble |>
#' dplyr::select(currency) |>
#' gt() |>
#' fmt_currency(
#' currency = "euro",
#' placement = "right",
#' incl_space = TRUE
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_currency_2.png")`
#' }}
#'
#' With the [`pizzaplace`] dataset, let's make a summary table that gets the
#' number of `"hawaiian"` pizzas sold (and revenue generated) by month. In the
#' **gt** table, we'll format only the `revenue` column. The `currency` value is
#' automatically U.S. Dollars when don't supply either a currency code or a
#' locale. We'll also create a grand summary with the [grand_summary_rows()]
#' function. Within that summary row, the total revenue needs to be formatted
#' with `fmt_currency()` and we can do that within the `fmt` argument.
#'
#' ```r
#' pizzaplace |>
#' dplyr::filter(name == "hawaiian") |>
#' dplyr::mutate(month = lubridate::month(date, label = TRUE, abbr = TRUE)) |>
#' dplyr::select(month, price) |>
#' dplyr::group_by(month) |>
#' dplyr::summarize(
#' `number sold` = dplyr::n(),
#' revenue = sum(price)
#' ) |>
#' gt(rowname_col = "month") |>
#' tab_header(title = "Summary of Hawaiian Pizzas Sold by Month") |>
#' fmt_currency(columns = revenue) |>
#' grand_summary_rows(
#' fns = list(label = "Totals:", id = "totals", fn = "sum"),
#' fmt = ~ fmt_currency(., columns = revenue),
#' ) |>
#' opt_all_caps()
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_currency_3.png")`
#' }}
#'
#' If supplying a `locale` value to `fmt_currency()`, we can opt use the
#' locale's assumed currency and not have to supply a `currency` value (doing so
#' would override the locale's default currency). With a column of locale
#' values, we can format currency values on a row-by-row basis through the use
#' of the [from_column()] helper function. Here, we'll reference the `locale`
#' column in the argument of the same name.
#'
#' ```r
#' dplyr::tibble(
#' amount = rep(50.84, 5),
#' currency = c("JPY", "USD", "GHS", "KRW", "CNY"),
#' locale = c("ja", "en", "ee", "ko", "zh"),
#' ) |>
#' gt() |>
#' fmt_currency(
#' columns = amount,
#' locale = from_column(column = "locale")
#' ) |>
#' cols_hide(columns = locale)
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_currency_4.png")`
#' }}
#'
#' We can similarly use [from_column()] to reference a column that has currency
#' code values. Here's an example of how to create a simple currency conversion
#' table. The `curr` column contains the 3-letter currency codes, and that
#' column is referenced via [from_column()] in the `currency` argument of
#' `fmt_currency()`.
#'
#' ```r
#' dplyr::tibble(
#' flag = c("EU", "GB", "CA", "AU", "JP", "IN"),
#' curr = c("EUR", "GBP", "CAD", "AUD", "JPY", "INR"),
#' conv = c(
#' 0.912952, 0.787687, 1.34411,
#' 1.53927, 144.751, 82.9551
#' )
#' ) |>
#' gt() |>
#' fmt_currency(
#' columns = conv,
#' currency = from_column(column = "curr")
#' ) |>
#' fmt_flag(columns = flag) |>
#' cols_merge(columns = c(flag, curr)) |>
#' cols_label(
#' flag = "Currency",
#' conv = "Amount"
#' ) |>
#' tab_header(
#' title = "Conversion of 1 USD to Six Other Currencies",
#' subtitle = md("Conversion rates obtained on **Aug 13, 2023**")
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_currency_5.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-8
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @seealso The vector-formatting version of this function:
#' [vec_fmt_currency()].
#'
#' @import rlang
#' @export
fmt_currency <- function(
data,
columns = everything(),
rows = everything(),
currency = NULL,
use_subunits = TRUE,
decimals = NULL,
drop_trailing_dec_mark = TRUE,
use_seps = TRUE,
accounting = FALSE,
scale_by = 1.0,
suffixing = FALSE,
pattern = "{x}",
sep_mark = ",",
dec_mark = ".",
force_sign = FALSE,
placement = "left",
incl_space = FALSE,
system = c("intl", "ind"),
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - currency
# - use_subunits
# - decimals
# - drop_trailing_dec_mark
# - use_seps
# - accounting
# - scale_by
# - suffixing
# - pattern
# - sep_mark
# - dec_mark
# - force_sign
# - placement
# - incl_space
# - system
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_currency",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_currency(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
currency = p_i$currency %||% currency,
use_subunits = p_i$use_subunits %||% use_subunits,
decimals = p_i$decimals %||% decimals,
drop_trailing_dec_mark = p_i$drop_trailing_dec_mark %||% drop_trailing_dec_mark,
use_seps = p_i$use_seps %||% use_seps,
accounting = p_i$accounting %||% accounting,
scale_by = p_i$scale_by %||% scale_by,
suffixing = p_i$suffixing %||% suffixing,
pattern = p_i$pattern %||% pattern,
sep_mark = p_i$sep_mark %||% sep_mark,
dec_mark = p_i$dec_mark %||% dec_mark,
force_sign = p_i$force_sign %||% force_sign,
placement = p_i$placement %||% placement,
incl_space = p_i$incl_space %||% incl_space,
system = p_i$system %||% system,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Ensure that arguments are matched
system <- rlang::arg_match(system)
# Declare formatting function compatibility
compat <- c("numeric", "integer")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_currency()` function can only be used on `columns`
with numeric data."
)
}
}
# Resolve the currency either from direct input in `currency` or
# through a locale
if (is.null(currency)) {
currency <- get_locale_currency_code(locale = locale)
}
# Stop function if `currency` does not have a valid value
validate_currency(currency = currency)
# Get the number of decimal places
decimals <-
get_currency_decimals(
currency = currency,
decimals = decimals,
use_subunits = use_subunits
)
# Pass `data`, `columns`, `rows`, and other options to `fmt_symbol()`
fmt_symbol(
data = data,
columns = {{ columns }},
rows = {{ rows }},
symbol = currency,
accounting = accounting,
decimals = decimals,
drop_trailing_zeros = FALSE,
drop_trailing_dec_mark = drop_trailing_dec_mark,
use_seps = use_seps,
scale_by = scale_by,
suffixing = suffixing,
pattern = pattern,
sep_mark = sep_mark,
dec_mark = dec_mark,
force_sign = force_sign,
placement = placement,
incl_space = incl_space,
system = system,
locale = locale
)
}
#' Format values as Roman numerals
#'
#' @description
#'
#' With numeric values in a **gt** table we can transform those to Roman
#' numerals, rounding values as necessary.
#'
#' @inheritParams fmt_number
#'
#' @param case *Use uppercase or lowercase letters*
#'
#' `singl-kw:[upper|lower]` // *default:* `"upper"`
#'
#' Should Roman numerals should be rendered as uppercase (`"upper"`) or
#' lowercase (`"lower"`) letters? By default, this is set to `"upper"`.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_roman()` formatting function is compatible with body cells that are
#' of the `"numeric"` or `"integer"` types. Any other types of body cells are
#' ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_roman()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `case`
#' - `pattern`
#'
#' Please note that for both of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Examples:
#'
#' Create a tibble of small numeric values and generate a **gt** table. Format
#' the `roman` column to appear as Roman numerals with `fmt_roman()`.
#'
#' ```r
#' dplyr::tibble(arabic = c(1, 8, 24, 85), roman = arabic) |>
#' gt(rowname_col = "arabic") |>
#' fmt_roman(columns = roman)
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_roman_1.png")`
#' }}
#'
#' Formatting values to Roman numerals can be very useful when combining such
#' output with row labels (usually through [cols_merge()]). Here's an example
#' where we take a portion of the [`illness`] dataset and generate some row
#' labels that combine (1) a row number (in lowercase Roman numerals), (2) the
#' name of the test, and (3) the measurement units for the test (nicely
#' formatted by way of [fmt_units()]):
#'
#' ```r
#' illness |>
#' dplyr::slice_head(n = 6) |>
#' gt(rowname_col = "test") |>
#' fmt_units(columns = units) |>
#' cols_hide(columns = starts_with("day")) |>
#' sub_missing(missing_text = "") |>
#' cols_merge_range(col_begin = norm_l, col_end = norm_u) |>
#' cols_add(i = 1:6) |>
#' fmt_roman(columns = i, case = "lower", pattern = "{x}.") |>
#' cols_merge(columns = c(test, i, units), pattern = "{2} {1} ({3})") |>
#' cols_label(norm_l = "Normal Range") |>
#' tab_stubhead(label = "Test")
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_roman_2.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-9
#'
#' @section Function Introduced:
#' `v0.8.0` (November 16, 2022)
#'
#' @seealso The vector-formatting version of this function: [vec_fmt_roman()].
#'
#' @import rlang
#' @export
fmt_roman <- function(
data,
columns = everything(),
rows = everything(),
case = c("upper", "lower"),
pattern = "{x}"
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - case
# - pattern
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_roman",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_roman(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
case = p_i$case %||% case,
pattern = p_i$pattern %||% pattern
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Ensure that arguments are matched
case <- rlang::arg_match(case)
# Declare formatting function compatibility
compat <- c("numeric", "integer")
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_roman()` function can only be used on `columns`
with numeric data."
)
}
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
compat = compat,
fns = num_fmt_factory_multi(
pattern = pattern,
use_latex_math_mode = FALSE,
format_fn = function(x, context) {
# Generate an vector of empty strings that will eventually contain
# all of the roman numerals
x_str <- character(length(x))
# Round all values of x to 3 digits with the R-H-U method of
# rounding (for reproducibility purposes)
x <- round_gt(x, 0)
# Determine which of `x` are finite values
x_is_a_number <- is.finite(x)
x[x_is_a_number] <- abs(x[x_is_a_number])
x_is_in_range <- x > 0 & x < 3900
x_str[x_is_a_number & x_is_in_range] <-
as.character(utils::as.roman(x[x_is_a_number & x_is_in_range]))
x_str[x_is_a_number & x == 0] <- if (case == "lower") "n" else "N"
if (case == "lower") {
x_str[x_is_in_range] <- tolower(x_str[x_is_in_range])
}
# In rare cases that Inf or -Inf appear, ensure that these
# special values are printed correctly
x_str[is.infinite(x)] <- x[is.infinite(x)]
x_str[x_is_a_number & x != 0 & !x_is_in_range] <- "ex terminis"
x_str
}
)
)
}
#' Format values to indexed characters
#'
#' @description
#'
#' With numeric values in a **gt** table we can transform those to index values,
#' usually based on letters. These characters can be derived from a specified
#' locale and they are intended for ordering (often leaving out characters with
#' diacritical marks).
#'
#' @inheritParams fmt_number
#'
#' @param case *Use uppercase or lowercase letters*
#'
#' `singl-kw:[upper|lower]` // *default:* `"upper"`
#'
#' Should the resulting index characters be rendered as uppercase (`"upper"`)
#' or lowercase (`"lower"`) letters? By default, this is set to `"upper"`.
#'
#' @param index_algo *Indexing algorithm*
#'
#' `singl-kw:[repeat|excel]` // *default:* `"repeat"`
#'
#' The indexing algorithm handles the recycling of the index character set. By
#' default, the `"repeat"` option is used where characters are doubled,
#' tripled, and so on, when moving past the character set limit. The
#' alternative is the `"excel"` option, where Excel-based column naming is
#' adapted and used here (e.g., `[..., Y, Z, AA, AB, ...]`).
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_index()` formatting function is compatible with body cells that are
#' of the `"numeric"` or `"integer"` types. Any other types of body cells are
#' ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_index()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `case`
#' - `index_algo`
#' - `pattern`
#' - `locale`
#'
#' Please note that for all of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Examples:
#'
#' Using a summarized version of the [`towny`] dataset, let's create a **gt**
#' table. Here, the `fmt_index()` function is used to transform incremental
#' integer values into capitalized letters (in the `ranking` column). With
#' [cols_merge()] that formatted column of `"A"` to `"E"` values is merged with
#' the `census_div` column to create an indexed listing of census subdivisions,
#' here ordered by increasing total municipal population.
#'
#' ```r
#' towny |>
#' dplyr::select(name, csd_type, census_div, population_2021) |>
#' dplyr::group_by(census_div) |>
#' dplyr::summarize(
#' population = sum(population_2021),
#' .groups = "drop_last"
#' ) |>
#' dplyr::arrange(population) |>
#' dplyr::slice_head(n = 5) |>
#' dplyr::mutate(ranking = dplyr::row_number()) |>
#' dplyr::select(ranking, dplyr::everything()) |>
#' gt() |>
#' fmt_integer() |>
#' fmt_index(columns = ranking, pattern = "{x}.") |>
#' cols_merge(columns = c(ranking, census_div)) |>
#' cols_align(align = "left", columns = ranking) |>
#' cols_label(
#' ranking = md("Census \nSubdivision"),
#' population = md("Population \nin 2021")
#' ) |>
#' tab_header(title = md("The smallest \ncensus subdivisions")) |>
#' tab_options(table.width = px(325))
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_index_1.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-10
#'
#' @section Function Introduced:
#' `v0.9.0` (Mar 31, 2023)
#'
#' @seealso The vector-formatting version of this function: [vec_fmt_index()].
#'
#' @import rlang
#' @export
fmt_index <- function(
data,
columns = everything(),
rows = everything(),
case = c("upper", "lower"),
index_algo = c("repeat", "excel"),
pattern = "{x}",
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - case
# - index_algo
# - pattern
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_index",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_index(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
case = p_i$case %||% case,
index_algo = p_i$index_algo %||% index_algo,
pattern = p_i$pattern %||% pattern,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Ensure that arguments are matched
case <- rlang::arg_match(case)
index_algo <- rlang::arg_match(index_algo)
# Declare formatting function compatibility
compat <- c("numeric", "integer")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
# Use locale-based `idx_set` if a locale ID is provided
idx_set <- get_locale_idx_set(locale)
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_index()` function can only be used on `columns`
with numeric data."
)
}
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
compat = compat,
fns = num_fmt_factory_multi(
pattern = pattern,
use_latex_math_mode = FALSE,
format_fn = function(x, context) {
# Generate an vector of empty strings that will eventually contain
# all of the roman numerals
x_str <- character(length(x))
# Round all values of x to 3 digits with the R-H-U method of
# rounding (for reproducibility purposes)
x <- round_gt(x, 0)
# Determine which of `x` are finite values
x_is_a_number <- is.finite(x)
x[x_is_a_number] <- abs(x[x_is_a_number])
# Select the correct indexing function
if (index_algo == "repeat") {
index_fn <- index_repeat
} else {
index_fn <- index_excel
}
x_str[x_is_a_number] <-
vapply(
x[x_is_a_number],
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) index_fn(x, set = idx_set)
)
x_str[x_is_a_number & x == 0] <- ""
if (case == "lower") {
x_str <- tolower(x_str)
}
# In rare cases that Inf or -Inf appear, ensure that these
# special values are printed correctly
x_str[is.infinite(x)] <- x[is.infinite(x)]
x_str
}
)
)
}
index_repeat <- function(x, set) {
marks_rep <- floor((x - 1) / length(set)) + 1
marks_val <- set[(x - 1) %% length(set) + 1]
unname(
mapply(
marks_val, marks_rep,
FUN = function(val_i, rep_i) {
paste(rep(val_i, rep_i), collapse = "")}
)
)
}
index_excel <- function(num, set) {
result <-
vapply(
num,
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
get_letters_from_div(x, set = set)
}
)
ifelse(result == "", NA_character_, result)
}
get_letters_from_div <- function(x, set) {
if (is.na(x)) {
return(NA_character_)
}
result <- integer()
while (x > 0) {
remainder <- ((x - 1) %% 26) + 1
result <- c(remainder, result)
x <- (x - remainder) %/% 26
}
paste(set[result], collapse = "")
}
#' Format values to spelled-out numbers
#'
#' @description
#'
#' With numeric values in a **gt** table we can transform those to numbers that
#' are spelled out with the `fmt_spelled_num()` function. Any values from `0` to
#' `100` can be spelled out so, for example, the value `23` will be formatted as
#' `"twenty-three"`. Providing a locale ID will result in the number spelled out
#' in the locale's language rules. For example, should a Swedish locale (`"sv"`)
#' be provided, the input value `23` will yield `"tjugotre"`. In addition to
#' this, we can optionally use the `pattern` argument for decoration of the
#' formatted values.
#'
#' @inheritParams fmt_number
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_spelled_num()` formatting function is compatible with body cells
#' that are of the `"numeric"` or `"integer"` types. Any other types of body
#' cells are ignored during formatting. This is to say that cells of
#' incompatible data types may be targeted, but there will be no attempt to
#' format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_spelled_num()` to obtain varying parameter values from a specified
#' column within the table. This means that each row could be formatted a little
#' bit differently. These arguments provide support for [from_column()]:
#'
#' - `pattern`
#' - `locale`
#'
#' Please note that for both of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Supported locales:
#'
#' The following 80 locales are supported in the `locale` argument of
#' `fmt_spelled_num()`: `"af"` (Afrikaans), `"ak"` (Akan), `"am"` (Amharic),
#' `"ar"` (Arabic), `"az"` (Azerbaijani), `"be"` (Belarusian), `"bg"`
#' (Bulgarian), `"bs"` (Bosnian), `"ca"` (Catalan), `"ccp"` (Chakma), `"chr"`
#' (Cherokee), `"cs"` (Czech), `"cy"` (Welsh), `"da"` (Danish), `"de"` (German),
#' `"de-CH"` (German (Switzerland)), `"ee"` (Ewe), `"el"` (Greek), `"en"`
#' (English), `"eo"` (Esperanto), `"es"` (Spanish), `"et"` (Estonian), `"fa"`
#' (Persian), `"ff"` (Fulah), `"fi"` (Finnish), `"fil"` (Filipino), `"fo"`
#' (Faroese), `"fr"` (French), `"fr-BE"` (French (Belgium)), `"fr-CH"` (French
#' (Switzerland)), `"ga"` (Irish), `"he"` (Hebrew), `"hi"` (Hindi), `"hr"`
#' (Croatian), `"hu"` (Hungarian), `"hy"` (Armenian), `"id"` (Indonesian),
#' `"is"` (Icelandic), `"it"` (Italian), `"ja"` (Japanese), `"ka"` (Georgian),
#' `"kk"` (Kazakh), `"kl"` (Kalaallisut), `"km"` (Khmer), `"ko"` (Korean),
#' `"ky"` (Kyrgyz), `"lb"` (Luxembourgish), `"lo"` (Lao), `"lrc"` (Northern
#' Luri), `"lt"` (Lithuanian), `"lv"` (Latvian), `"mk"` (Macedonian), `"ms"`
#' (Malay), `"mt"` (Maltese), `"my"` (Burmese), `"ne"` (Nepali), `"nl"` (Dutch),
#' `"nn"` (Norwegian Nynorsk), `"no"` (Norwegian), `"pl"` (Polish), `"pt"`
#' (Portuguese), `"qu"` (Quechua), `"ro"` (Romanian), `"ru"` (Russian), `"se"`
#' (Northern Sami), `"sk"` (Slovak), `"sl"` (Slovenian), `"sq"` (Albanian),
#' `"sr"` (Serbian), `"sr-Latn"` (Serbian (Latin)), `"su"` (Sundanese), `"sv"`
#' (Swedish), `"sw"` (Swahili), `"ta"` (Tamil), `"th"` (Thai), `"tr"` (Turkish),
#' `"uk"` (Ukrainian), `"vi"` (Vietnamese), `"yue"` (Cantonese), and `"zh"`
#' (Chinese).
#'
#' @section Examples:
#'
#' Let's use a summarized version of the [`gtcars`] dataset to create a
#' **gt** table. The `fmt_spelled_num()` function is used to transform
#' integer values into spelled-out numbering (in the `n` column). That formatted
#' column of numbers-as-words is given cell background colors via [data_color()]
#' (the underlying numerical values are always available).
#'
#' ```r
#' gtcars |>
#' dplyr::select(mfr, ctry_origin) |>
#' dplyr::group_by(mfr, ctry_origin) |>
#' dplyr::count() |>
#' dplyr::ungroup() |>
#' dplyr::arrange(ctry_origin) |>
#' gt(rowname_col = "mfr", groupname_col = "ctry_origin") |>
#' cols_label(n = "No. of Entries") |>
#' fmt_spelled_num() |>
#' tab_stub_indent(rows = everything(), indent = 2) |>
#' data_color(
#' columns = n,
#' method = "numeric",
#' palette = "viridis",
#' alpha = 0.8
#' ) |>
#' opt_all_caps() |>
#' opt_vertical_padding(scale = 0.5) |>
#' cols_align(align = "center", columns = n)
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_spelled_num_1.png")`
#' }}
#'
#' With a considerable amount of **dplyr** and **tidyr** work done to the
#' [`pizzaplace`] dataset, we can create a new **gt** table. The
#' `fmt_spelled_num()` function will be used here to transform the integer
#' values in the `rank` column. We'll do so with a special `pattern` that puts
#' the word 'Number' in front of every spelled-out number.
#'
#' ```r
#' pizzaplace |>
#' dplyr::mutate(month = lubridate::month(date, label = TRUE)) |>
#' dplyr::filter(month %in% month.abb[1:6]) |>
#' dplyr::group_by(name, month) |>
#' dplyr::summarize(sum = sum(price), .groups = "drop") |>
#' dplyr::arrange(month, desc(sum)) |>
#' dplyr::group_by(month) |>
#' dplyr::slice_head(n = 5) |>
#' dplyr::mutate(rank = dplyr::row_number()) |>
#' dplyr::ungroup() |>
#' dplyr::select(-sum) |>
#' tidyr::pivot_wider(names_from = month, values_from = c(name)) |>
#' gt() |>
#' fmt_spelled_num(columns = rank, pattern = "Number {x}") |>
#' opt_all_caps() |>
#' cols_align(columns = -rank, align = "center") |>
#' cols_width(
#' rank ~ px(120),
#' everything() ~ px(100)
#' ) |>
#' opt_table_font(stack = "rounded-sans") |>
#' tab_options(table.font.size = px(14))
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_spelled_num_2.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-11
#'
#' @section Function Introduced:
#' `v0.9.0` (Mar 31, 2023)
#'
#' @seealso The vector-formatting version of this function:
#' [vec_fmt_spelled_num()].
#'
#' @import rlang
#' @export
fmt_spelled_num <- function(
data,
columns = everything(),
rows = everything(),
pattern = "{x}",
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - pattern
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_spelled_num",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_spelled_num(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
pattern = p_i$pattern %||% pattern,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Declare formatting function compatibility
compat <- c("numeric", "integer")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
# Obtain a locale-based `num_spellout_set` vector
num_spellout_set <- get_locale_num_spellout(locale = locale)
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_spelled_num()` function can only be used on `columns`
with numeric data."
)
}
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
compat = compat,
fns = num_fmt_factory_multi(
pattern = pattern,
use_latex_math_mode = FALSE,
format_fn = function(x, context) {
# Generate an vector of empty strings that will eventually contain
# all of the roman numerals
x_str <- character(length(x))
# Round all values of x to 3 digits with the R-H-U method of
# rounding (for reproducibility purposes)
x <- floor(x)
# Determine which of `x` are finite values
x_is_a_number <- is.finite(x)
# x[x_is_a_number] <- abs(x[x_is_a_number])
# The allowed range of numbers that can be spelled out
# is `0` to `100`
x_is_in_range <- x >= 0 & x <= 100
# The `num_spellout_set` vector should always contain 101
# elements; it contains zero then the numbers from 1 to 100
x_str[x_is_a_number & x_is_in_range] <-
num_spellout_set[x[x_is_a_number & x_is_in_range] + 1]
# Ensure that numbers not in range are included as
# floored numeric values
x_str[x_is_a_number & !x_is_in_range] <-
x[x_is_a_number & !x_is_in_range]
# In rare cases that Inf or -Inf appear, ensure that these
# special values are printed correctly
x_str[is.infinite(x)] <- x[is.infinite(x)]
x_str
}
)
)
}
#' Format values as bytes
#'
#' @description
#'
#' With numeric values in a **gt** table, we can transform those to values of
#' bytes with human readable units. The `fmt_bytes()` function allows for the
#' formatting of byte sizes to either of two common representations: (1) with
#' decimal units (powers of 1000, examples being `"kB"` and `"MB"`), and (2)
#' with binary units (powers of 1024, examples being `"KiB"` and `"MiB"`).
#'
#' It is assumed the input numeric values represent the number of bytes and
#' automatic truncation of values will occur. The numeric values will be scaled
#' to be in the range of 1 to <1000 and then decorated with the correct unit
#' symbol according to the standard chosen. For more control over the formatting
#' of byte sizes, we can use the following options:
#'
#' - decimals: choice of the number of decimal places, option to drop
#' trailing zeros, and a choice of the decimal symbol
#' - digit grouping separators: options to enable/disable digit separators
#' and provide a choice of separator symbol
#' - pattern: option to use a text pattern for decoration of the formatted
#' values
#' - locale-based formatting: providing a locale ID will result in number
#' formatting specific to the chosen locale
#'
#' @inheritParams fmt_number
#'
#' @param standard *Standard used to express byte sizes*
#'
#' `singl-kw:[decimal|binary]` // *default:* `"decimal"`
#'
#' The form of expressing large byte sizes is divided between: (1) decimal
#' units (powers of 1000; e.g., `"kB"` and `"MB"`), and (2) binary units
#' (powers of 1024; e.g., `"KiB"` and `"MiB"`).
#'
#' @param decimals *Number of decimal places*
#'
#' `scalar<numeric|integer>(val>=0)` // *default:* `1`
#'
#' This corresponds to the exact number of decimal places to use. A value
#' such as `2.34` can, for example, be formatted with `0` decimal places and
#' it would result in `"2"`. With `4` decimal places, the formatted value
#' becomes `"2.3400"`. The trailing zeros can be removed with
#' `drop_trailing_zeros = TRUE`.
#'
#' @param force_sign *Forcing the display of a positive sign*
#'
#' `scalar<logical>` // *default:* `FALSE`
#'
#' Should the positive sign be shown for positive numbers (effectively showing
#' a sign for all numbers except zero)? If so, use `TRUE` for this option. The
#' default is `FALSE`, where only negative numbers will display a minus sign.
#'
#' @param incl_space *Include a space between the value and the units*
#'
#' `scalar<logical>` // *default:* `TRUE`
#'
#' An option for whether to include a space between the value and the units.
#' The default is to use a space character for separation.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_bytes()` formatting function is compatible with body cells that are
#' of the `"numeric"` or `"integer"` types. Any other types of body cells are
#' ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_bytes()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `standard`
#' - `decimals`
#' - `n_sigfig`
#' - `drop_trailing_zeros`
#' - `drop_trailing_dec_mark`
#' - `use_seps`
#' - `pattern`
#' - `sep_mark`
#' - `dec_mark`
#' - `force_sign`
#' - `incl_space`
#' - `locale`
#'
#' Please note that for each of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Adapting output to a specific `locale`:
#'
#' This formatting function can adapt outputs according to a provided `locale`
#' value. Examples include `"en"` for English (United States) and `"fr"` for
#' French (France). The use of a valid locale ID here means separator and
#' decimal marks will be correct for the given locale. Should any values be
#' provided in `sep_mark` or `dec_mark`, they will be overridden by the locale's
#' preferred values.
#'
#' Note that a `locale` value provided here will override any global locale
#' setting performed in [gt()]'s own `locale` argument (it is settable there as
#' a value received by all other functions that have a `locale` argument). As a
#' useful reference on which locales are supported, we can use the
#' [info_locales()] function to view an info table.
#'
#' @section Examples:
#'
#' Use a single column from the [`exibble`] dataset and create a simple **gt**
#' table. We'll format the `num` column to display as byte sizes in the decimal
#' standard through use of the `fmt_bytes()` function.
#'
#' ```r
#' exibble |>
#' dplyr::select(num) |>
#' gt() |>
#' fmt_bytes()
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_bytes_1.png")`
#' }}
#'
#' Let's create an analogous table again by using the `fmt_bytes()` function,
#' this time showing byte sizes as binary values by using `standard = "binary"`.
#'
#' ```r
#' exibble |>
#' dplyr::select(num) |>
#' gt() |>
#' fmt_bytes(standard = "binary")
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_bytes_2.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-12
#'
#' @section Function Introduced:
#' `v0.3.0` (May 12, 2021)
#'
#' @seealso The vector-formatting version of this function: [vec_fmt_bytes()].
#'
#' @import rlang
#' @export
fmt_bytes <- function(
data,
columns = everything(),
rows = everything(),
standard = c("decimal", "binary"),
decimals = 1,
n_sigfig = NULL,
drop_trailing_zeros = TRUE,
drop_trailing_dec_mark = TRUE,
use_seps = TRUE,
pattern = "{x}",
sep_mark = ",",
dec_mark = ".",
force_sign = FALSE,
incl_space = TRUE,
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - standard
# - decimals
# - n_sigfig
# - drop_trailing_zeros
# - drop_trailing_dec_mark
# - use_seps
# - pattern
# - sep_mark
# - dec_mark
# - force_sign
# - incl_space
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_bytes",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_bytes(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
standard = p_i$standard %||% standard,
decimals = p_i$decimals %||% decimals,
n_sigfig = p_i$n_sigfig %||% n_sigfig,
drop_trailing_zeros = p_i$drop_trailing_zeros %||% drop_trailing_zeros,
drop_trailing_dec_mark = p_i$drop_trailing_dec_mark %||% drop_trailing_dec_mark,
use_seps = p_i$use_seps %||% use_seps,
pattern = p_i$pattern %||% pattern,
sep_mark = p_i$sep_mark %||% sep_mark,
dec_mark = p_i$dec_mark %||% dec_mark,
force_sign = p_i$force_sign %||% force_sign,
incl_space = p_i$incl_space %||% incl_space,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Ensure that arguments are matched
standard <- rlang::arg_match(standard)
# Declare formatting function compatibility
compat <- c("numeric", "integer")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_bytes()` function can only be used on `columns`
with numeric data."
)
}
}
# Use locale-based marks if a locale ID is provided
sep_mark <- get_locale_sep_mark(locale, sep_mark, use_seps)
dec_mark <- get_locale_dec_mark(locale, dec_mark)
# Set the `formatC_format` option according to whether number
# formatting with significant figures is to be performed
if (!is.null(n_sigfig)) {
# Stop function if `n_sigfig` does not have a valid value
validate_n_sigfig(n_sigfig = n_sigfig)
formatC_format <- "fg"
} else {
formatC_format <- "f"
}
if (standard == "decimal") {
base <- 1000
byte_units <- c("B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
} else {
base <- 1024
byte_units <- c("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
compat = compat,
fns = num_fmt_factory_multi(
pattern = pattern,
format_fn = function(x, context) {
# Truncate all byte values
x <- trunc(x)
num_power_idx <- floor(log(abs(x), base = base)) + 1
num_power_idx <- pmax(1, pmin(length(byte_units), num_power_idx))
units_str <- byte_units[num_power_idx]
x <- x / base^(num_power_idx - 1)
# Format numeric values to character-based numbers
x_str <-
format_num_to_str(
x,
context = context,
decimals = decimals,
n_sigfig = n_sigfig,
sep_mark = sep_mark,
dec_mark = dec_mark,
drop_trailing_zeros = drop_trailing_zeros,
drop_trailing_dec_mark = drop_trailing_dec_mark,
format = formatC_format
)
x_str <-
paste_right(x_str, x_right = paste0(if (incl_space) " ", units_str))
# Force a positive sign on certain values if the option is taken
if (force_sign) {
positive_x <- !is.na(x) & x > 0
x_str[positive_x] <- paste_left(x_str[positive_x], x_left = "+")
}
x_str
}
)
)
}
#' Format values as dates
#'
#' @description
#'
#' Format input values to time values using one of 41 preset date styles. Input
#' can be in the form of `POSIXt` (i.e., datetimes), the `Date` type, or
#' `character` (must be in the ISO 8601 form of `YYYY-MM-DD HH:MM:SS` or
#' `YYYY-MM-DD`).
#'
#' @inheritParams fmt_number
#'
#' @param date_style *Predefined style for dates*
#'
#' `scalar<character>|scalar<numeric|integer>(1<=val<=41)` // *default:* `"iso"`
#'
#' The date style to use. By default this is the short name `"iso"` which
#' corresponds to ISO 8601 date formatting. There are 41 date styles in total
#' and their short names can be viewed using [info_date_style()].
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_date()` formatting function is compatible with body cells that are
#' of the `"Date"`, `"POSIXt"` or `"character"` types. Any other types of body
#' cells are ignored during formatting. This is to say that cells of
#' incompatible data types may be targeted, but there will be no attempt to
#' format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_date()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `date_style`
#' - `pattern`
#' - `locale`
#'
#' Please note that for each of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Formatting with the `date_style` argument:
#'
#' We need to supply a preset date style to the `date_style` argument. The date
#' styles are numerous and can handle localization to any supported locale. A
#' large segment of date styles are termed flexible date formats and this means
#' that their output will adapt to any `locale` provided. That feature makes the
#' flexible date formats a better option for locales other than `"en"` (the
#' default locale).
#'
#' The following table provides a listing of all date styles and their output
#' values (corresponding to an input date of `2000-02-29`).
#'
#' | | Date Style | Output | Notes |
#' |----|-----------------------|-------------------------|---------------|
#' | 1 | `"iso"` | `"2000-02-29"` | ISO 8601 |
#' | 2 | `"wday_month_day_year"`| `"Tuesday, February 29, 2000"` | |
#' | 3 | `"wd_m_day_year"` | `"Tue, Feb 29, 2000"` | |
#' | 4 | `"wday_day_month_year"`| `"Tuesday 29 February 2000"` | |
#' | 5 | `"month_day_year"` | `"February 29, 2000"` | |
#' | 6 | `"m_day_year"` | `"Feb 29, 2000"` | |
#' | 7 | `"day_m_year"` | `"29 Feb 2000"` | |
#' | 8 | `"day_month_year"` | `"29 February 2000"` | |
#' | 9 | `"day_month"` | `"29 February"` | |
#' | 10 | `"day_m"` | `"29 Feb"` | |
#' | 11 | `"year"` | `"2000"` | |
#' | 12 | `"month"` | `"February"` | |
#' | 13 | `"day"` | `"29"` | |
#' | 14 | `"year.mn.day"` | `"2000/02/29"` | |
#' | 15 | `"y.mn.day"` | `"00/02/29"` | |
#' | 16 | `"year_week"` | `"2000-W09"` | |
#' | 17 | `"year_quarter"` | `"2000-Q1"` | |
#' | 18 | `"yMd"` | `"2/29/2000"` | flexible |
#' | 19 | `"yMEd"` | `"Tue, 2/29/2000"` | flexible |
#' | 20 | `"yMMM"` | `"Feb 2000"` | flexible |
#' | 21 | `"yMMMM"` | `"February 2000"` | flexible |
#' | 22 | `"yMMMd"` | `"Feb 29, 2000"` | flexible |
#' | 23 | `"yMMMEd"` | `"Tue, Feb 29, 2000"` | flexible |
#' | 24 | `"GyMd"` | `"2/29/2000 A"` | flexible |
#' | 25 | `"GyMMMd"` | `"Feb 29, 2000 AD"` | flexible |
#' | 26 | `"GyMMMEd"` | `"Tue, Feb 29, 2000 AD"`| flexible |
#' | 27 | `"yM"` | `"2/2000"` | flexible |
#' | 28 | `"Md"` | `"2/29"` | flexible |
#' | 29 | `"MEd"` | `"Tue, 2/29"` | flexible |
#' | 30 | `"MMMd"` | `"Feb 29"` | flexible |
#' | 31 | `"MMMEd"` | `"Tue, Feb 29"` | flexible |
#' | 32 | `"MMMMd"` | `"February 29"` | flexible |
#' | 33 | `"GyMMM"` | `"Feb 2000 AD"` | flexible |
#' | 34 | `"yQQQ"` | `"Q1 2000"` | flexible |
#' | 35 | `"yQQQQ"` | `"1st quarter 2000"` | flexible |
#' | 36 | `"Gy"` | `"2000 AD"` | flexible |
#' | 37 | `"y"` | `"2000"` | flexible |
#' | 38 | `"M"` | `"2"` | flexible |
#' | 39 | `"MMM"` | `"Feb"` | flexible |
#' | 40 | `"d"` | `"29"` | flexible |
#' | 41 | `"Ed"` | `"29 Tue"` | flexible |
#'
#' We can use the [info_date_style()] function within the console to view a
#' similar table of date styles with example output.
#'
#' @section Adapting output to a specific `locale`:
#'
#' This formatting function can adapt outputs according to a provided `locale`
#' value. Examples include `"en"` for English (United States) and `"fr"` for
#' French (France). Note that a `locale` value provided here will override any
#' global locale setting performed in [gt()]'s own `locale` argument (it is
#' settable there as a value received by all other functions that have a
#' `locale` argument). As a useful reference on which locales are supported, we
#' can use the [info_locales()] function to view an info table.
#'
#' @section Examples:
#'
#' Let's use the [`exibble`] dataset to create a simple, two-column **gt** table
#' (keeping only the `date` and `time` columns). With the `fmt_date()` function,
#' we'll format the `date` column to display dates formatted with the
#' `"month_day_year"` date style.
#'
#' ```r
#' exibble |>
#' dplyr::select(date, time) |>
#' gt() |>
#' fmt_date(
#' columns = date,
#' date_style = "month_day_year"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_date_1.png")`
#' }}
#'
#' Again using the [`exibble`] dataset, let's format the `date` column to have
#' mixed date formats, where dates after April 1st will be different than the
#' others because of the expressions used in the `rows` argument. This will
#' involve two calls of `fmt_date()` with different statements provided for
#' `rows`. In the first call (dates after the 1st of April) the date style
#' `"m_day_year"` is used; for the second call, `"day_m_year"` is the named
#' date style supplied to `date_style`.
#'
#' ```r
#' exibble |>
#' dplyr::select(date, time) |>
#' gt() |>
#' fmt_date(
#' columns = date,
#' rows = as.Date(date) > as.Date("2015-04-01"),
#' date_style = "m_day_year"
#' ) |>
#' fmt_date(
#' columns = date,
#' rows = as.Date(date) <= as.Date("2015-04-01"),
#' date_style = "day_m_year"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_date_2.png")`
#' }}
#'
#' Use the [`exibble`] dataset to create a single-column **gt** table (with only
#' the `date` column). Format the date values using the `"yMMMEd"` date style
#' (which is one of the 'flexible' styles). Also, we'll set the locale to `"nl"`
#' to get the dates in Dutch.
#'
#' ```r
#' exibble |>
#' dplyr::select(date) |>
#' gt() |>
#' fmt_date(
#' date_style = "yMMMEd",
#' locale = "nl"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_date_3.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-13
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @seealso The vector-formatting version of this function: [vec_fmt_date()].
#'
#' @import rlang
#' @export
fmt_date <- function(
data,
columns = everything(),
rows = everything(),
date_style = "iso",
pattern = "{x}",
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - date_style
# - pattern
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_date",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_date(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
date_style = p_i$date_style %||% date_style,
pattern = p_i$pattern %||% pattern,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Declare formatting function compatibility
compat <- c("Date", "POSIXt", "character")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
# Transform `date_style` to `date_format_str`
date_format_str <- get_date_format(date_style = date_style)
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(c(
"The `fmt_date()` function can only be used on `columns` of certain types.",
"*" = "Allowed types are `Date`, `POSIXt`, and `character` (with
ISO-8601 formatted dates)."
))
}
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
compat = compat,
fns = num_fmt_factory_multi(
pattern = pattern,
use_latex_math_mode = FALSE,
format_fn = function(x, context) {
# Convert incoming values to POSIXlt but provide a friendly error
# if the values cannot be parsed by `as.POSIXlt()`
date <-
tryCatch(
as.POSIXlt(x, tz = "GMT"),
error = function(cond) {
cli::cli_abort(
"One or more of the provided date/datetime values are invalid."
)
}
)
# Format the date string using `fdt()`
bigD::fdt(
input = as.character(date),
format = date_format_str,
locale = locale
)
}
)
)
}
#' Format values as times
#'
#' @description
#'
#' Format input values to time values using one of 25 preset time styles. Input
#' can be in the form of `POSIXt` (i.e., datetimes), `character` (must be in the
#' ISO 8601 forms of `HH:MM:SS` or `YYYY-MM-DD HH:MM:SS`), or `Date` (which
#' always results in the formatting of `00:00:00`).
#'
#' @inheritParams fmt_number
#'
#' @param time_style *Predefined style for times*
#'
#' `scalar<character>|scalar<numeric|integer>(1<=val<=25)` // *default:* `"iso"`
#'
#' The time style to use. By default this is the short name `"iso"` which
#' corresponds to how times are formatted within ISO 8601 datetime values.
#' There are 25 time styles in total and their short names can be viewed using
#' [info_time_style()].
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_time()` formatting function is compatible with body cells that are
#' of the `"Date"`, `"POSIXt"` or `"character"` types. Any other types of body
#' cells are ignored during formatting. This is to say that cells of
#' incompatible data types may be targeted, but there will be no attempt to
#' format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_time()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `time_style`
#' - `pattern`
#' - `locale`
#'
#' Please note that for each of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Formatting with the `time_style` argument:
#'
#' We need to supply a preset time style to the `time_style` argument. There are
#' many time styles and all of them can handle localization to any supported
#' locale. Many of the time styles are termed flexible time formats and this
#' means that their output will adapt to any `locale` provided. That feature
#' makes the flexible time formats a better option for locales other than `"en"`
#' (the default locale).
#'
#' The following table provides a listing of all time styles and their output
#' values (corresponding to an input time of `14:35:00`). It is noted which of
#' these represent 12- or 24-hour time.
#'
#' | | Time Style | Output | Notes |
#' |----|---------------|---------------------------------|---------------|
#' | 1 | `"iso"` | `"14:35:00"` | ISO 8601, 24h |
#' | 2 | `"iso-short"` | `"14:35"` | ISO 8601, 24h |
#' | 3 | `"h_m_s_p"` | `"2:35:00 PM"` | 12h |
#' | 4 | `"h_m_p"` | `"2:35 PM"` | 12h |
#' | 5 | `"h_p"` | `"2 PM"` | 12h |
#' | 6 | `"Hms"` | `"14:35:00"` | flexible, 24h |
#' | 7 | `"Hm"` | `"14:35"` | flexible, 24h |
#' | 8 | `"H"` | `"14"` | flexible, 24h |
#' | 9 | `"EHm"` | `"Thu 14:35"` | flexible, 24h |
#' | 10 | `"EHms"` | `"Thu 14:35:00"` | flexible, 24h |
#' | 11 | `"Hmsv"` | `"14:35:00 GMT+00:00"` | flexible, 24h |
#' | 12 | `"Hmv"` | `"14:35 GMT+00:00"` | flexible, 24h |
#' | 13 | `"hms"` | `"2:35:00 PM"` | flexible, 12h |
#' | 14 | `"hm"` | `"2:35 PM"` | flexible, 12h |
#' | 15 | `"h"` | `"2 PM"` | flexible, 12h |
#' | 16 | `"Ehm"` | `"Thu 2:35 PM"` | flexible, 12h |
#' | 17 | `"Ehms"` | `"Thu 2:35:00 PM"` | flexible, 12h |
#' | 18 | `"EBhms"` | `"Thu 2:35:00 in the afternoon"` | flexible, 12h |
#' | 19 | `"Bhms"` | `"2:35:00 in the afternoon"` | flexible, 12h |
#' | 20 | `"EBhm"` | `"Thu 2:35 in the afternoon"` | flexible, 12h |
#' | 21 | `"Bhm"` | `"2:35 in the afternoon"` | flexible, 12h |
#' | 22 | `"Bh"` | `"2 in the afternoon"` | flexible, 12h |
#' | 23 | `"hmsv"` | `"2:35:00 PM GMT+00:00"` | flexible, 12h |
#' | 24 | `"hmv"` | `"2:35 PM GMT+00:00"` | flexible, 12h |
#' | 25 | `"ms"` | `"35:00"` | flexible |
#'
#' We can use the [info_time_style()] function within the console to view a
#' similar table of time styles with example output.
#'
#' @section Adapting output to a specific `locale`:
#'
#' This formatting function can adapt outputs according to a provided `locale`
#' value. Examples include `"en"` for English (United States) and `"fr"` for
#' French (France). Note that a `locale` value provided here will override any
#' global locale setting performed in [gt()]'s own `locale` argument (it is
#' settable there as a value received by all other functions that have a
#' `locale` argument). As a useful reference on which locales are supported, we
#' can use the [info_locales()] function to view an info table.
#'
#' @section Examples:
#'
#' Let's use the [`exibble`] dataset to create a simple, two-column **gt** table
#' (keeping only the `date` and `time` columns). Format the `time` column with
#' the `fmt_time()` function to display times formatted with the `"h_m_s_p"`
#' time style.
#'
#' ```r
#' exibble |>
#' dplyr::select(date, time) |>
#' gt() |>
#' fmt_time(
#' columns = time,
#' time_style = "h_m_s_p"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_time_1.png")`
#' }}
#'
#' Again using the [`exibble`] dataset, let's format the `time` column to have
#' mixed time formats, where times after 16:00 will be different than the others
#' because of the expressions used in the `rows` argument. This will involve two
#' calls of `fmt_time()` with different statements provided for `rows`. In the
#' first call (times after 16:00) the time style `"h_m_s_p"` is used; for the
#' second call, `"h_m_p"` is the named time style supplied to `time_style`.
#'
#' ```r
#' exibble |>
#' dplyr::select(date, time) |>
#' gt() |>
#' fmt_time(
#' columns = time,
#' rows = time > "16:00",
#' time_style = "h_m_s_p"
#' ) |>
#' fmt_time(
#' columns = time,
#' rows = time <= "16:00",
#' time_style = "h_m_p"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_time_2.png")`
#' }}
#'
#' Use the [`exibble`] dataset to create a single-column **gt** table (with only
#' the `time` column). Format the time values using the `"EBhms"` time style
#' (which is one of the 'flexible' styles). Also, we'll set the locale to `"sv"`
#' to get the times in Swedish.
#'
#' ```r
#' exibble |>
#' dplyr::select(time) |>
#' gt() |>
#' fmt_time(
#' columns = time,
#' time_style = "EBhms",
#' locale = "sv"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_time_3.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-14
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @seealso The vector-formatting version of this function: [vec_fmt_time()].
#'
#' @import rlang
#' @export
fmt_time <- function(
data,
columns = everything(),
rows = everything(),
time_style = "iso",
pattern = "{x}",
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - time_style
# - pattern
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_time",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_time(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
time_style = p_i$time_style %||% time_style,
pattern = p_i$pattern %||% pattern,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Declare formatting function compatibility
compat <- c("Date", "POSIXt", "character")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
# Transform `time_style` to `time_format_str`
time_format_str <- get_time_format(time_style = time_style)
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(c(
"The `fmt_time()` function can only be used on `columns` of certain types.",
"*" = "Allowed types are `Date`, `POSIXt`, and `character` (in
`HH:MM:SS` format)."
))
}
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
compat = compat,
fns = num_fmt_factory_multi(
pattern = pattern,
use_latex_math_mode = FALSE,
format_fn = function(x, context) {
# If the incoming values are strings that adequately represent time
# values, then prepend with the `1970-01-01` dummy date to create an
# input that will works with `strftime()`
if (all(is_string_time(x))) {
x <- paste("1970-01-01", x)
}
# Convert incoming values to POSIXlt but provide a friendly error
# if the values cannot be parsed by `as.POSIXlt()`
time <-
tryCatch(
as.POSIXlt(x, tz = "GMT"),
error = function(cond) {
cli::cli_abort(
"One or more of the provided date/time/datetime values are invalid."
)
}
)
# Format the time string using `fdt()`
bigD::fdt(
input = as.character(time),
format = time_format_str,
locale = locale
)
}
)
)
}
#' Format values as datetimes
#'
#' @description
#'
#' Format input values to datetime values using either presets for the date and
#' time components or a formatting directive (this can either use a *CLDR*
#' datetime pattern or `strptime` formatting). The input values can be in the
#' form of `POSIXct` (i.e., datetimes), the `Date` type, or `character` (must be
#' in the ISO 8601 form of `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD`).
#'
#' @inheritParams fmt_number
#'
#' @inheritParams fmt_date
#'
#' @inheritParams fmt_time
#'
#' @param sep *Separator between date and time components*
#'
#' `scalar<character>` // *default:* `" "`
#'
#' The separator string to use between the date and time components. By
#' default, this is a single space character (`" "`). Only used when not
#' specifying a `format` code.
#'
#' @param format *Date/time formatting string*
#'
#' `scalar<character>` // *default:* `NULL` (`optional`)
#'
#' An optional formatting string used for generating custom dates/times. If
#' used then the arguments governing preset styles (`date_style` and
#' `time_style`) will be ignored in favor of formatting via the `format`
#' string.
#'
#' @param tz *Time zone*
#'
#' `scalar<character>` // *default:* `NULL` (`optional`)
#'
#' The time zone for printing dates/times (i.e., the output). The
#' default of `NULL` will preserve the time zone of the input data in the
#' output. If providing a time zone, it must be one that is recognized by the
#' user's operating system (a vector of all valid `tz` values can be produced
#' with [OlsonNames()]).
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_datetime()` formatting function is compatible with body cells that
#' are of the `"Date"`, `"POSIXct"` or `"character"` types. Any other types of
#' body cells are ignored during formatting. This is to say that cells of
#' incompatible data types may be targeted, but there will be no attempt to
#' format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_datetime()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `date_style`
#' - `time_style`
#' - `sep`
#' - `format`
#' - `tz`
#' - `pattern`
#' - `locale`
#'
#' Please note that for each of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Formatting with the `date_style` argument:
#'
#' We can supply a preset date style to the `date_style` argument to separately
#' handle the date portion of the output. The date styles are numerous and can
#' handle localization to any supported locale. A large segment of date styles
#' are termed flexible date formats and this means that their output will adapt
#' to any `locale` provided. That feature makes the flexible date formats a
#' better option for locales other than `"en"` (the default locale).
#'
#' The following table provides a listing of all date styles and their output
#' values (corresponding to an input date of `2000-02-29`).
#'
#' | | Date Style | Output | Notes |
#' |----|-----------------------|-------------------------|---------------|
#' | 1 | `"iso"` | `"2000-02-29"` | ISO 8601 |
#' | 2 | `"wday_month_day_year"`| `"Tuesday, February 29, 2000"` | |
#' | 3 | `"wd_m_day_year"` | `"Tue, Feb 29, 2000"` | |
#' | 4 | `"wday_day_month_year"`| `"Tuesday 29 February 2000"` | |
#' | 5 | `"month_day_year"` | `"February 29, 2000"` | |
#' | 6 | `"m_day_year"` | `"Feb 29, 2000"` | |
#' | 7 | `"day_m_year"` | `"29 Feb 2000"` | |
#' | 8 | `"day_month_year"` | `"29 February 2000"` | |
#' | 9 | `"day_month"` | `"29 February"` | |
#' | 10 | `"day_m"` | `"29 Feb"` | |
#' | 11 | `"year"` | `"2000"` | |
#' | 12 | `"month"` | `"February"` | |
#' | 13 | `"day"` | `"29"` | |
#' | 14 | `"year.mn.day"` | `"2000/02/29"` | |
#' | 15 | `"y.mn.day"` | `"00/02/29"` | |
#' | 16 | `"year_week"` | `"2000-W09"` | |
#' | 17 | `"year_quarter"` | `"2000-Q1"` | |
#' | 18 | `"yMd"` | `"2/29/2000"` | flexible |
#' | 19 | `"yMEd"` | `"Tue, 2/29/2000"` | flexible |
#' | 20 | `"yMMM"` | `"Feb 2000"` | flexible |
#' | 21 | `"yMMMM"` | `"February 2000"` | flexible |
#' | 22 | `"yMMMd"` | `"Feb 29, 2000"` | flexible |
#' | 23 | `"yMMMEd"` | `"Tue, Feb 29, 2000"` | flexible |
#' | 24 | `"GyMd"` | `"2/29/2000 A"` | flexible |
#' | 25 | `"GyMMMd"` | `"Feb 29, 2000 AD"` | flexible |
#' | 26 | `"GyMMMEd"` | `"Tue, Feb 29, 2000 AD"`| flexible |
#' | 27 | `"yM"` | `"2/2000"` | flexible |
#' | 28 | `"Md"` | `"2/29"` | flexible |
#' | 29 | `"MEd"` | `"Tue, 2/29"` | flexible |
#' | 30 | `"MMMd"` | `"Feb 29"` | flexible |
#' | 31 | `"MMMEd"` | `"Tue, Feb 29"` | flexible |
#' | 32 | `"MMMMd"` | `"February 29"` | flexible |
#' | 33 | `"GyMMM"` | `"Feb 2000 AD"` | flexible |
#' | 34 | `"yQQQ"` | `"Q1 2000"` | flexible |
#' | 35 | `"yQQQQ"` | `"1st quarter 2000"` | flexible |
#' | 36 | `"Gy"` | `"2000 AD"` | flexible |
#' | 37 | `"y"` | `"2000"` | flexible |
#' | 38 | `"M"` | `"2"` | flexible |
#' | 39 | `"MMM"` | `"Feb"` | flexible |
#' | 40 | `"d"` | `"29"` | flexible |
#' | 41 | `"Ed"` | `"29 Tue"` | flexible |
#'
#' We can use the [info_date_style()] function within the console to view a
#' similar table of date styles with example output.
#'
#' @section Formatting with the `time_style` argument:
#'
#' We can supply a preset time style to the `time_style` argument to separately
#' handle the time portion of the output. There are many time styles and all of
#' them can handle localization to any supported locale. Many of the time styles
#' are termed flexible time formats and this means that their output will adapt
#' to any `locale` provided. That feature makes the flexible time formats a
#' better option for locales other than `"en"` (the default locale).
#'
#' The following table provides a listing of all time styles and their output
#' values (corresponding to an input time of `14:35:00`). It is noted which of
#' these represent 12- or 24-hour time. Some of the flexible formats (those
#' that begin with `"E"`) include the the day of the week. Keep this in mind
#' when pairing such `time_style` values with a `date_style` so as to avoid
#' redundant or repeating information.
#'
#' | | Time Style | Output | Notes |
#' |----|---------------|---------------------------------|---------------|
#' | 1 | `"iso"` | `"14:35:00"` | ISO 8601, 24h |
#' | 2 | `"iso-short"` | `"14:35"` | ISO 8601, 24h |
#' | 3 | `"h_m_s_p"` | `"2:35:00 PM"` | 12h |
#' | 4 | `"h_m_p"` | `"2:35 PM"` | 12h |
#' | 5 | `"h_p"` | `"2 PM"` | 12h |
#' | 6 | `"Hms"` | `"14:35:00"` | flexible, 24h |
#' | 7 | `"Hm"` | `"14:35"` | flexible, 24h |
#' | 8 | `"H"` | `"14"` | flexible, 24h |
#' | 9 | `"EHm"` | `"Thu 14:35"` | flexible, 24h |
#' | 10 | `"EHms"` | `"Thu 14:35:00"` | flexible, 24h |
#' | 11 | `"Hmsv"` | `"14:35:00 GMT+00:00"` | flexible, 24h |
#' | 12 | `"Hmv"` | `"14:35 GMT+00:00"` | flexible, 24h |
#' | 13 | `"hms"` | `"2:35:00 PM"` | flexible, 12h |
#' | 14 | `"hm"` | `"2:35 PM"` | flexible, 12h |
#' | 15 | `"h"` | `"2 PM"` | flexible, 12h |
#' | 16 | `"Ehm"` | `"Thu 2:35 PM"` | flexible, 12h |
#' | 17 | `"Ehms"` | `"Thu 2:35:00 PM"` | flexible, 12h |
#' | 18 | `"EBhms"` | `"Thu 2:35:00 in the afternoon"` | flexible, 12h |
#' | 19 | `"Bhms"` | `"2:35:00 in the afternoon"` | flexible, 12h |
#' | 20 | `"EBhm"` | `"Thu 2:35 in the afternoon"` | flexible, 12h |
#' | 21 | `"Bhm"` | `"2:35 in the afternoon"` | flexible, 12h |
#' | 22 | `"Bh"` | `"2 in the afternoon"` | flexible, 12h |
#' | 23 | `"hmsv"` | `"2:35:00 PM GMT+00:00"` | flexible, 12h |
#' | 24 | `"hmv"` | `"2:35 PM GMT+00:00"` | flexible, 12h |
#' | 25 | `"ms"` | `"35:00"` | flexible |
#'
#' We can use the [info_time_style()] function within the console to view a
#' similar table of time styles with example output.
#'
#' @section Formatting with a *CLDR* datetime pattern:
#'
#' We can use a *CLDR* datetime pattern with the `format` argument to create
#' a highly customized and locale-aware output. This is a character string that
#' consists of two types of elements:
#'
#' - Pattern fields, which repeat a specific pattern character one or more
#' times. These fields are replaced with date and time data when formatting.
#' The character sets of `A`-`Z` and `a`-`z` are reserved for use as pattern
#' characters.
#' - Literal text, which is output verbatim when formatting. This can include:
#' - Any characters outside the reserved character sets, including
#' spaces and punctuation.
#' - Any text between single vertical quotes (e.g., `'text'`).
#' - Two adjacent single vertical quotes (''), which represent a literal
#' single quote, either inside or outside quoted text.
#'
#' The number of pattern fields is quite sizable so let's first look at how some
#' *CLDR* datetime patterns work. We'll use the datetime string
#' `"2018-07-04T22:05:09.2358(America/Vancouver)"` for all of the examples that
#' follow.
#'
#' - `"mm/dd/y"` -> `"05/04/2018"`
#' - `"EEEE, MMMM d, y"` -> `"Wednesday, July 4, 2018"`
#' - `"MMM d E"` -> `"Jul 4 Wed"`
#' - `"HH:mm"` -> `"22:05"`
#' - `"h:mm a"` -> `"10:05 PM"`
#' - `"EEEE, MMMM d, y 'at' h:mm a"` -> `"Wednesday, July 4, 2018 at 10:05 PM"`
#'
#' Here are the individual pattern fields:
#'
#' ## Year
#'
#' ### Calendar Year
#'
#' This yields the calendar year, which is always numeric. In most cases the
#' length of the `"y"` field specifies the minimum number of digits to display,
#' zero-padded as necessary. More digits will be displayed if needed to show the
#' full year. There is an exception: `"yy"` gives use just the two low-order
#' digits of the year, zero-padded as necessary. For most use cases, `"y"` or
#' `"yy"` should be good enough.
#'
#' | Field Patterns | Output |
#' |------------------------------- |----------------------------------------|
#' | `"y"` | `"2018"` |
#' | `"yy"` | `"18"` |
#' | `"yyy"` to `"yyyyyyyyy"` | `"2018"` to `"000002018"` |
#'
#' ### Year in the Week in Year Calendar
#'
#' This is the year in 'Week of Year' based calendars in which the year
#' transition occurs on a week boundary. This may differ from calendar year
#' `"y"` near a year transition. This numeric year designation is used in
#' conjunction with pattern character `"w"` in the ISO year-week calendar as
#' defined by ISO 8601.
#'
#' | Field Patterns | Output |
#' |--------------------------------|----------------------------------------|
#' | `"Y"` | `"2018"` |
#' | `"YY"` | `"18"` |
#' | `"YYY"` to `"YYYYYYYYY"` | `"2018"` to `"000002018"` |
#'
#' ## Quarter
#'
#' ### Quarter of the Year: formatting and standalone versions
#'
#' The quarter names are identified numerically, starting at `1` and ending at
#' `4`. Quarter names may vary along two axes: the width and the context. The
#' context is either 'formatting' (taken as a default), which the form used
#' within a complete date format string, or, 'standalone', the form for date
#' elements used independently (such as in calendar headers). The standalone
#' form may be used in any other date format that shares the same form of the
#' name. Here, the formatting form for quarters of the year consists of some run
#' of `"Q"` values whereas the standalone form uses `"q"`.
#'
#' | Field Patterns | Output | Notes |
#' |-------------------|-----------------|-----------------------------------|
#' | `"Q"`/`"q"` | `"3"` | Numeric, one digit |
#' | `"QQ"`/`"qq"` | `"03"` | Numeric, two digits (zero padded) |
#' | `"QQQ"`/`"qqq"` | `"Q3"` | Abbreviated |
#' | `"QQQQ"`/`"qqqq"` | `"3rd quarter"` | Wide |
#' | `"QQQQQ"`/`"qqqqq"` | `"3"` | Narrow |
#'
#' ## Month
#'
#' ### Month: formatting and standalone versions
#'
#' The month names are identified numerically, starting at `1` and ending at
#' `12`. Month names may vary along two axes: the width and the context. The
#' context is either 'formatting' (taken as a default), which the form used
#' within a complete date format string, or, 'standalone', the form for date
#' elements used independently (such as in calendar headers). The standalone
#' form may be used in any other date format that shares the same form of the
#' name. Here, the formatting form for months consists of some run of `"M"`
#' values whereas the standalone form uses `"L"`.
#'
#' | Field Patterns | Output | Notes |
#' |-------------------|-----------------|-----------------------------------|
#' | `"M"`/`"L"` | `"7"` | Numeric, minimum digits |
#' | `"MM"`/`"LL"` | `"07"` | Numeric, two digits (zero padded) |
#' | `"MMM"`/`"LLL"` | `"Jul"` | Abbreviated |
#' | `"MMMM"`/`"LLLL"` | `"July"` | Wide |
#' | `"MMMMM"`/`"LLLLL"` | `"J"` | Narrow |
#'
#' ## Week
#'
#' ### Week of Year
#'
#' Values calculated for the week of year range from `1` to `53`. Week `1` for a
#' year is the first week that contains at least the specified minimum number of
#' days from that year. Weeks between week `1` of one year and week `1` of the
#' following year are numbered sequentially from `2` to `52` or `53` (if
#' needed).
#'
#' There are two available field lengths. Both will display the week of year
#' value but the `"ww"` width will always show two digits (where weeks `1` to
#' `9` are zero padded).
#'
#' | Field Patterns | Output | Notes |
#' |------------------|-----------|------------------------------------------|
#' | `"w"` | `"27"` | Minimum digits |
#' | `"ww"` | `"27"` | Two digits (zero padded) |
#'
#' ### Week of Month
#'
#' The week of a month can range from `1` to `5`. The first day of every month
#' always begins at week `1` and with every transition into the beginning of a
#' week, the week of month value is incremented by `1`.
#'
#' | Field Pattern | Output |
#' |------------------|------------------------------------------------------|
#' | `"W"` | `"1"` |
#'
#' ## Day
#'
#' ### Day of Month
#'
#' The day of month value is always numeric and there are two available field
#' length choices in its formatting. Both will display the day of month value
#' but the `"dd"` formatting will always show two digits (where days `1` to `9`
#' are zero padded).
#'
#' | Field Patterns | Output | Notes |
#' |----------------|-----------|--------------------------------------------|
#' | `"d"` | `"4"` | Minimum digits |
#' | `"dd"` | `"04"` | Two digits, zero padded |
#'
#' ### Day of Year
#'
#' The day of year value ranges from `1` (January 1) to either `365` or `366`
#' (December 31), where the higher value of the range indicates that the year is
#' a leap year (29 days in February, instead of 28). The field length specifies
#' the minimum number of digits, with zero-padding as necessary.
#'
#' | Field Patterns | Output | Notes |
#' |-----------------|----------|--------------------------------------------|
#' | `"D"` | `"185"` | |
#' | `"DD"` | `"185"` | Zero padded to minimum width of 2 |
#' | `"DDD"` | `"185"` | Zero padded to minimum width of 3 |
#'
#' ### Day of Week in Month
#'
#' The day of week in month returns a numerical value indicating the number of
#' times a given weekday had occurred in the month (e.g., '2nd Monday in
#' March'). This conveniently resolves to predicable case structure where ranges
#' of day of the month values return predictable day of week in month values:
#'
#' - days `1` - `7` -> `1`
#' - days `8` - `14` -> `2`
#' - days `15` - `21` -> `3`
#' - days `22` - `28` -> `4`
#' - days `29` - `31` -> `5`
#'
#' | Field Pattern | Output |
#' |--------------------------------|----------------------------------------|
#' | `"F"` | `"1"` |
#'
#' ### Modified Julian Date
#'
#' The modified version of the Julian date is obtained by subtracting
#' 2,400,000.5 days from the Julian date (the number of days since January 1,
#' 4713 BC). This essentially results in the number of days since midnight
#' November 17, 1858. There is a half day offset (unlike the Julian date, the
#' modified Julian date is referenced to midnight instead of noon).
#'
#' | Field Patterns | Output |
#' |--------------------------------|----------------------------------------|
#' | `"g"` to `"ggggggggg"` | `"58303"` -> `"000058303"` |
#'
#' ## Weekday
#'
#' ### Day of Week Name
#'
#' The name of the day of week is offered in four different widths.
#'
#' | Field Patterns | Output | Notes |
#' |----------------------------|----------------|---------------------------|
#' | `"E"`, `"EE"`, or `"EEE"` | `"Wed"` | Abbreviated |
#' | `"EEEE"` | `"Wednesday"` | Wide |
#' | `"EEEEE"` | `"W"` | Narrow |
#' | `"EEEEEE"` | `"We"` | Short |
#'
#' ## Periods
#'
#' ### AM/PM Period of Day
#'
#' This denotes before noon and after noon time periods. May be upper or
#' lowercase depending on the locale and other options. The wide form may be
#' the same as the short form if the 'real' long form (e.g. 'ante meridiem') is
#' not customarily used. The narrow form must be unique, unlike some other
#' fields.
#'
#' | Field Patterns | Output | Notes |
#' |--------------------------------|----------|-----------------------------|
#' | `"a"`, `"aa"`, or `"aaa"` | `"PM"` | Abbreviated |
#' | `"aaaa"` | `"PM"` | Wide |
#' | `"aaaaa"` | `"p"` | Narrow |
#'
#' ### AM/PM Period of Day Plus Noon and Midnight
#'
#' Provide AM and PM as well as phrases for exactly noon and midnight. May be
#' upper or lowercase depending on the locale and other options. If the locale
#' doesn't have the notion of a unique 'noon' (i.e., 12:00), then the PM form
#' may be substituted. A similar behavior can occur for 'midnight' (00:00) and
#' the AM form. The narrow form must be unique, unlike some other fields.
#'
#' (a) `input_midnight`: `"2020-05-05T00:00:00"`
#' (b) `input_noon`: `"2020-05-05T12:00:00"`
#'
#' | Field Patterns | Output | Notes |
#' |--------------------------------|--------------------|-------------------|
#' | `"b"`, `"bb"`, or `"bbb"` | (a) `"midnight"` | Abbreviated |
#' | | (b) `"noon"` | |
#' | `"bbbb"` | (a) `"midnight"` | Wide |
#' | | (b) `"noon"` | |
#' | `"bbbbb"` | (a) `"mi"` | Narrow |
#' | | (b) `"n"` | |
#'
#' ### Flexible Day Periods
#'
#' Flexible day periods denotes things like 'in the afternoon', 'in the
#' evening', etc., and the flexibility comes from a locale's language and
#' script. Each locale has an associated rule set that specifies when the day
#' periods start and end for that locale.
#'
#' (a) `input_morning`: `"2020-05-05T00:08:30"`
#' (b) `input_afternoon`: `"2020-05-05T14:00:00"`
#'
#' | Field Patterns | Output | Notes |
#' |----------------------------|--------------------------|-----------------|
#' | `"B"`, `"BB"`, or `"BBB"` | (a) `"in the morning"` | Abbreviated |
#' | | (b) `"in the afternoon"` | |
#' | `"BBBB"` | (a) `"in the morning"` | Wide |
#' | | (b) `"in the afternoon"` | |
#' | `"BBBBB"` | (a) `"in the morning"` | Narrow |
#' | | (b) `"in the afternoon"` | |
#'
#' ## Hours, Minutes, and Seconds
#'
#' ### Hour 0-23
#'
#' Hours from `0` to `23` are for a standard 24-hour clock cycle (midnight plus
#' 1 minute is `00:01`) when using `"HH"` (which is the more common width that
#' indicates zero-padding to 2 digits).
#'
#' Using `"2015-08-01T08:35:09"`:
#'
#' | Field Patterns | Output | Notes |
#' |------------------------|---------|--------------------------------------|
#' | `"H"` | `"8"` | Numeric, minimum digits |
#' | `"HH"` | `"08"` | Numeric, 2 digits (zero padded) |
#'
#' ### Hour 1-12
#'
#' Hours from `1` to `12` are for a standard 12-hour clock cycle (midnight plus
#' 1 minute is `12:01`) when using `"hh"` (which is the more common width that
#' indicates zero-padding to 2 digits).
#'
#' Using `"2015-08-01T08:35:09"`:
#'
#' | Field Patterns | Output | Notes |
#' |------------------------|---------|--------------------------------------|
#' | `"h"` | `"8"` | Numeric, minimum digits |
#' | `"hh"` | `"08"` | Numeric, 2 digits (zero padded) |
#'
#' ### Hour 1-24
#'
#' Using hours from `1` to `24` is a less common way to express a 24-hour clock
#' cycle (midnight plus 1 minute is `24:01`) when using `"kk"` (which is the
#' more common width that indicates zero-padding to 2 digits).
#'
#' Using `"2015-08-01T08:35:09"`:
#'
#' | Field Patterns | Output | Notes |
#' |------------------------|---------|--------------------------------------|
#' | `"k"` | `"9"` | Numeric, minimum digits |
#' | `"kk"` | `"09"` | Numeric, 2 digits (zero padded) |
#'
#' ### Hour 0-11
#'
#' Using hours from `0` to `11` is a less common way to express a 12-hour clock
#' cycle (midnight plus 1 minute is `00:01`) when using `"KK"` (which is the
#' more common width that indicates zero-padding to 2 digits).
#'
#' Using `"2015-08-01T08:35:09"`:
#'
#' | Field Patterns | Output | Notes |
#' |------------------------|---------|--------------------------------------|
#' | `"K"` | `"7"` | Numeric, minimum digits |
#' | `"KK"` | `"07"` | Numeric, 2 digits (zero padded) |
#'
#' ### Minute
#'
#' The minute of the hour which can be any number from `0` to `59`. Use `"m"` to
#' show the minimum number of digits, or `"mm"` to always show two digits
#' (zero-padding, if necessary).
#'
#' | Field Patterns | Output | Notes |
#' |------------------------|---------|--------------------------------------|
#' | `"m"` | `"5"` | Numeric, minimum digits |
#' | `"mm"` | `"06"` | Numeric, 2 digits (zero padded) |
#'
#' ### Seconds
#'
#' The second of the minute which can be any number from `0` to `59`. Use `"s"`
#' to show the minimum number of digits, or `"ss"` to always show two digits
#' (zero-padding, if necessary).
#'
#' | Field Patterns | Output | Notes |
#' |------------------------|---------|--------------------------------------|
#' | `"s"` | `"9"` | Numeric, minimum digits |
#' | `"ss"` | `"09"` | Numeric, 2 digits (zero padded) |
#'
#' ### Fractional Second
#'
#' The fractional second truncates (like other time fields) to the width
#' requested (i.e., count of letters). So using pattern `"SSSS"` will display
#' four digits past the decimal (which, incidentally, needs to be added manually
#' to the pattern).
#'
#' | Field Patterns | Output |
#' |--------------------------------|----------------------------------------|
#' | `"S"` to `"SSSSSSSSS"` | `"2"` -> `"235000000"` |
#'
#' ### Milliseconds Elapsed in Day
#'
#' There are 86,400,000 milliseconds in a day and the `"A"` pattern will provide
#' the whole number. The width can go up to nine digits with `"AAAAAAAAA"` and
#' these higher field widths will result in zero padding if necessary.
#'
#' Using `"2011-07-27T00:07:19.7223"`:
#'
#' | Field Patterns | Output |
#' |--------------------------------|----------------------------------------|
#' | `"A"` to `"AAAAAAAAA"` | `"439722"` -> `"000439722"` |
#'
#' ## Era
#'
#' ### The Era Designator
#'
#' This provides the era name for the given date. The Gregorian calendar has two
#' eras: AD and BC. In the AD year numbering system, AD 1 is immediately
#' preceded by 1 BC, with nothing in between them (there was no year zero).
#'
#' | Field Patterns | Output | Notes |
#' |--------------------------------|-----------------|----------------------|
#' | `"G"`, `"GG"`, or `"GGG"` | `"AD"` | Abbreviated |
#' | `"GGGG"` | `"Anno Domini"` | Wide |
#' | `"GGGGG"` | `"A"` | Narrow |
#'
#' ## Time Zones
#'
#' ### TZ // Short and Long Specific non-Location Format
#'
#' The short and long specific non-location formats for time zones are suggested
#' for displaying a time with a user friendly time zone name. Where the short
#' specific format is unavailable, it will fall back to the short localized GMT
#' format (`"O"`). Where the long specific format is unavailable, it will fall
#' back to the long localized GMT format (`"OOOO"`).
#'
#' | Field Patterns | Output | Notes |
#' |----------------------------|---------------------------|----------------|
#' | `"z"`, `"zz"`, or `"zzz"` | `"PDT"` | Short Specific |
#' | `"zzzz"` | `"Pacific Daylight Time"` | Long Specific |
#'
#' ### TZ // Common UTC Offset Formats
#'
#' The ISO8601 basic format with hours, minutes and optional seconds fields is
#' represented by `"Z"`, `"ZZ"`, or `"ZZZ"`. The format is equivalent to RFC 822
#' zone format (when the optional seconds field is absent). This is equivalent
#' to the `"xxxx"` specifier. The field pattern `"ZZZZ"` represents the long
#' localized GMT format. This is equivalent to the `"OOOO"` specifier. Finally,
#' `"ZZZZZ"` pattern yields the ISO8601 extended format with hours, minutes and
#' optional seconds fields. The ISO8601 UTC indicator `Z` is used when local
#' time offset is `0`. This is equivalent to the `"XXXXX"` specifier.
#'
#' | Field Patterns | Output | Notes |
#' |----------------------------|--------------|-----------------------------|
#' | `"Z"`, `"ZZ"`, or `"ZZZ"` | `"-0700"` | ISO 8601 basic format |
#' | `"ZZZZ"` | `"GMT-7:00"` | Long localized GMT format |
#' | `"ZZZZZ"` | `"-07:00"` | ISO 8601 extended format |
#'
#' ### TZ // Short and Long Localized GMT Formats
#'
#' The localized GMT formats come in two widths `"O"` (which removes the minutes
#' field if it's `0`) and `"OOOO"` (which always contains the minutes field).
#' The use of the `GMT` indicator changes according to the locale.
#'
#' | Field Patterns | Output | Notes |
#' |-------------------------|---------------|-------------------------------|
#' | `"O"` | `"GMT-7"` | Short localized GMT format |
#' | `"OOOO"` | `"GMT-07:00"` | Long localized GMT format |
#'
#' ### TZ // Short and Long Generic non-Location Formats
#'
#' The generic non-location formats are useful for displaying a recurring wall
#' time (e.g., events, meetings) or anywhere people do not want to be overly
#' specific. Where either of these is unavailable, there is a fallback to the
#' generic location format (`"VVVV"`), then the short localized GMT format as
#' the final fallback.
#'
#' | Field Patterns | Output | Notes |
#' |-----------------|------------------|------------------------------------|
#' | `"v"` | `"PT"` | Short generic non-location format |
#' | `"vvvv"` | `"Pacific Time"` | Long generic non-location format |
#'
#' ### TZ // Short Time Zone IDs and Exemplar City Formats
#'
#' These formats provide variations of the time zone ID and often include the
#' exemplar city. The widest of these formats, `"VVVV"`, is useful for
#' populating a choice list for time zones, because it supports 1-to-1 name/zone
#' ID mapping and is more uniform than other text formats.
#'
#' | Field Patterns | Output | Notes |
#' |--------------------|-----------------------|----------------------------|
#' | `"V"` | `"cavan"` | Short time zone ID |
#' | `"VV"` | `"America/Vancouver"` | Long time zone ID |
#' | `"VVV"` | `"Vancouver"` | The tz exemplar city |
#' | `"VVVV"` | `"Vancouver Time"` | Generic location format |
#'
#' ### TZ // ISO 8601 Formats with Z for +0000
#'
#' The `"X"`-`"XXX"` field patterns represent valid ISO 8601 patterns for time
#' zone offsets in datetimes. The final two widths, `"XXXX"` and `"XXXXX"` allow
#' for optional seconds fields. The seconds field is *not* supported by the ISO
#' 8601 specification. For all of these, the ISO 8601 UTC indicator `Z` is used
#' when the local time offset is `0`.
#'
#' | Field Patterns | Output | Notes |
#' |----------------|------------|-------------------------------------------|
#' | `"X"` | `"-07"` | ISO 8601 basic format (h, optional m) |
#' | `"XX"` | `"-0700"` | ISO 8601 basic format (h & m) |
#' | `"XXX"` | `"-07:00"` | ISO 8601 extended format (h & m) |
#' | `"XXXX"` | `"-0700"` | ISO 8601 basic format (h & m, optional s) |
#' | `"XXXXX"` | `"-07:00"` | ISO 8601 extended format (h & m, optional s) |
#'
#' ### TZ // ISO 8601 Formats (no use of Z for +0000)
#'
#' The `"x"`-`"xxxxx"` field patterns represent valid ISO 8601 patterns for time
#' zone offsets in datetimes. They are similar to the `"X"`-`"XXXXX"` field
#' patterns except that the ISO 8601 UTC indicator `Z` *will not* be used when
#' the local time offset is `0`.
#'
#' | Field Patterns | Output | Notes |
#' |----------------|------------|-------------------------------------------|
#' | `"x"` | `"-07"` | ISO 8601 basic format (h, optional m) |
#' | `"xx"` | `"-0700"` | ISO 8601 basic format (h & m) |
#' | `"xxx"` | `"-07:00"` | ISO 8601 extended format (h & m) |
#' | `"xxxx"` | `"-0700"` | ISO 8601 basic format (h & m, optional s) |
#' | `"xxxxx"` | `"-07:00"` | ISO 8601 extended format (h & m, optional s) |
#'
#' @section Formatting with a `strptime` format code:
#'
#' Performing custom date/time formatting with the `format` argument can also
#' occur with a `strptime` format code. This works by constructing a string of
#' individual format codes representing formatted date and time elements. These
#' are all indicated with a leading `%`, literal characters are interpreted as
#' any characters not starting with a `%` character.
#'
#' First off, let's look at a few format code combinations that work well
#' together as a `strptime` format. This will give us an intuition on how these
#' generally work. We'll use the datetime `"2015-06-08 23:05:37.48"` for all of
#' the examples that follow.
#'
#' - `"%m/%d/%Y"` -> `"06/08/2015"`
#' - `"%A, %B %e, %Y"` -> `"Monday, June 8, 2015"`
#' - `"%b %e %a"` -> `"Jun 8 Mon"`
#' - `"%H:%M"` -> `"23:05"`
#' - `"%I:%M %p"` -> `"11:05 pm"`
#' - `"%A, %B %e, %Y at %I:%M %p"` -> `"Monday, June 8, 2015 at 11:05 pm"`
#'
#' Here are the individual format codes for the date components:
#'
#' - `"%a"` -> `"Mon"` (abbreviated day of week name)
#' - `"%A"` -> `"Monday"` (full day of week name)
#' - `"%w"` -> `"1"` (day of week number in `0..6`; Sunday is `0`)
#' - `"%u"` -> `"1"` (day of week number in `1..7`; Monday is `1`, Sunday `7`)
#' - `"%y"` -> `"15"` (abbreviated year, using the final two digits)
#' - `"%Y"` -> `"2015"` (full year)
#' - `"%b"` -> `"Jun"` (abbreviated month name)
#' - `"%B"` -> `"June"` (full month name)
#' - `"%m"` -> `"06"` (month number)
#' - `"%d"` -> `"08"` (day number, zero-padded)
#' - `"%e"` -> `"8"` (day number without zero padding)
#' - `"%j"` -> `"159"` (day of the year, always zero-padded)
#' - `"%W"` -> `"23"` (week number for the year, always zero-padded)
#' - `"%V"` -> `"24"` (week number for the year, following the ISO 8601
#' standard)
#' - `"%C"` -> `"20"` (the century number)
#'
#' Here are the individual format codes for the time components:
#'
#' - `"%H"` -> `"23"` (24h hour)
#' - `"%I"` -> `"11"` (12h hour)
#' - `"%M"` -> `"05"` (minute)
#' - `"%S"` -> `"37"` (second)
#' - `"%OS3"` -> `"37.480"` (seconds with decimals; `3` decimal places here)
#' - `%p` -> `"pm"` (AM or PM indicator)
#'
#' Here are some extra formats that you may find useful:
#'
#' - `"%z"` -> `"+0000"` (signed time zone offset, here using UTC)
#' - `"%F"` -> `"2015-06-08"` (the date in the ISO 8601 date format)
#' - `"%%"` -> `"%"` (the literal "`%`" character, in case you need it)
#'
#' @section Adapting output to a specific `locale`:
#'
#' This formatting function can adapt outputs according to a provided `locale`
#' value. Examples include `"en"` for English (United States) and `"fr"` for
#' French (France). Note that a `locale` value provided here will override any
#' global locale setting performed in [gt()]'s own `locale` argument (it is
#' settable there as a value received by all other functions that have a
#' `locale` argument). As a useful reference on which locales are supported, we
#' can use the [info_locales()] function to view an info table.
#'
#' @section Examples:
#'
#' Use the [`exibble`] dataset to create a single-column **gt** table (with only
#' the `datetime` column). With `fmt_datetime()` we'll format the `datetime`
#' column to have dates formatted with the `"month_day_year"` style and times
#' with the `"h_m_s_p"` 12-hour time style.
#'
#' ```r
#' exibble |>
#' dplyr::select(datetime) |>
#' gt() |>
#' fmt_datetime(
#' date_style = "month_day_year",
#' time_style = "h_m_s_p"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_datetime_1.png")`
#' }}
#'
#' Using the same input table, we can use `fmt_datetime()` with flexible date
#' and time styles. Two that work well together are `"MMMEd"` and `"Hms"`. These
#' date and time styles will, being flexible, create outputs that conform to the
#' locale value given to the `locale` argument. Let's use two calls of
#' `fmt_datetime()`: the first will format all rows in `datetime` to the Danish
#' locale (with `locale = "da"`) and the second call will target the first three
#' rows with the same formatting, but in the default locale (which is `"en"`).
#'
#' ```r
#' exibble |>
#' dplyr::select(datetime) |>
#' gt() |>
#' fmt_datetime(
#' date_style = "MMMEd",
#' time_style = "Hms",
#' locale = "da"
#' ) |>
#' fmt_datetime(
#' rows = 1:3,
#' date_style = "MMMEd",
#' time_style = "Hms"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_datetime_2.png")`
#' }}
#'
#' It's possible to use the `format` argument and write our own formatting
#' specification. Using the CLDR datetime pattern
#' `"EEEE, MMMM d, y 'at' h:mm a (zzzz)"` gives us datetime outputs with time
#' zone formatting. Let's provide a time zone ID (`"America/Vancouver"`) to the
#' `tz` argument.
#'
#' ```r
#' exibble |>
#' dplyr::select(datetime) |>
#' gt() |>
#' fmt_datetime(
#' format = "EEEE, MMMM d, y 'at' h:mm a (zzzz)",
#' tz = "America/Vancouver"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_datetime_3.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-15
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @seealso The vector-formatting version of this function:
#' [vec_fmt_datetime()].
#'
#' @import rlang
#' @export
fmt_datetime <- function(
data,
columns = everything(),
rows = everything(),
date_style = "iso",
time_style = "iso",
sep = " ",
format = NULL,
tz = NULL,
pattern = "{x}",
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - date_style
# - time_style
# - sep
# - format
# - tz
# - pattern
# - locale
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_datetime",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_datetime(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
date_style = p_i$date_style %||% date_style,
time_style = p_i$time_style %||% time_style,
sep = p_i$sep %||% sep,
format = p_i$format %||% format,
tz = p_i$tz %||% tz,
pattern = p_i$pattern %||% pattern,
locale = p_i$locale %||% locale
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Declare formatting function compatibility
compat <- c("Date", "POSIXct", "character")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
if (!is.null(format)) {
# Ensure that the format code meets some basic validation requirements
check_format_code(format = format)
} else {
# Transform `date_style` to `date_format`
date_format_str <- get_date_format(date_style = date_style)
# Transform `time_style` to `time_format`
time_format_str <- get_time_format(time_style = time_style)
}
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(c(
"The `fmt_datetime()` function can only be used on `columns` of certain types.",
"*" = "Allowed types are `Date`, `POSIXct`, and `character` (with
ISO-8601 formatted dates)"
))
}
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
compat = compat,
fns = num_fmt_factory_multi(
pattern = pattern,
use_latex_math_mode = FALSE,
format_fn = function(x, context) {
# If a format string is provided then use that to generate the
# formatted date/time string
if (!is.null(format)) {
# If the incoming values are strings that adequately represent time
# values, then prepend with the `1970-01-01` dummy date to create an
# input that will works with `strftime()`
if (all(is_string_time(x))) {
x <- paste("1970-01-01", x)
}
if (grepl("%", format)) {
if (is.character(x)) {
if (is.null(tz)) {
tz <- "GMT"
}
datetime <-
tryCatch(
as.POSIXlt(x),
error = function(cond) {
cli::cli_abort(
"One or more of the provided date/datetime values are invalid."
)
}
)
attr(datetime, which = "tzone") <- tz
datetime <- as.POSIXct(datetime)
return(strftime(datetime, format = format, tz = tz))
}
# Format the datetime values using `strftime()`
return(strftime(x, format = format, tz = tz))
} else {
if (is.null(tz)) {
tz <- "UTC"
}
dt_str <- strftime(x, format = "%Y-%m-%dT%H:%M:%S%z", tz = tz)
if ("tzone" %in% names(attributes(x))) {
tzone <- attr(x, which = "tzone", exact = TRUE)
dt_str <- paste0(dt_str, "(", tzone, ")")
}
# Format the datetime values using `fdt()`
return(
bigD::fdt(
input = dt_str,
format = format,
locale = locale,
use_tz = tz
)
)
}
}
#
# Format the date and time portions of the datetime value
#
# Convert incoming values to POSIXlt but provide a friendly error
# if the values cannot be parsed by `as.POSIXlt()`
datetime <-
tryCatch(
as.POSIXlt(x),
error = function(cond) {
cli::cli_abort(
"One or more of the provided date/datetime values are invalid."
)
}
)
#
# Separately format the date and time portions using `fdt()`
#
date_str <-
bigD::fdt(
input = as.character(datetime),
format = date_format_str,
locale = locale
)
time_str <-
bigD::fdt(
input = as.character(datetime),
format = time_format_str,
locale = locale
)
paste0(date_str, sep, time_str)
}
)
)
}
#' Format numeric or duration values as styled time duration strings
#'
#' @description
#'
#' Format input values to time duration values whether those input values are
#' numbers or of the `difftime` class. We can specify which time units any
#' numeric input values have (as weeks, days, hours, minutes, or seconds) and
#' the output can be customized with a duration style (corresponding to narrow,
#' wide, colon-separated, and ISO forms) and a choice of output units ranging
#' from weeks to seconds.
#'
#' @section Output units for the colon-separated duration style:
#'
#' The colon-separated duration style (enabled when
#' `duration_style = "colon-sep"`) is essentially a clock-based output format
#' which uses the display logic of chronograph watch functionality. It will, by
#' default, display duration values in the `(D/)HH:MM:SS` format. Any duration
#' values greater than or equal to 24 hours will have the number of days
#' prepended with an adjoining slash mark. While this output format is
#' versatile, it can be changed somewhat with the `output_units` option. The
#' following combinations of output units are permitted:
#'
#' - `c("minutes", "seconds")` -> `MM:SS`
#' - `c("hours", "minutes")` -> `HH:MM`
#' - `c("hours", "minutes", "seconds")` -> `HH:MM:SS`
#' - `c("days", "hours", "minutes")` -> `(D/)HH:MM`
#'
#' Any other specialized combinations will result in the default set being used,
#' which is `c("days", "hours", "minutes", "seconds")`
#'
#' @inheritParams fmt_number
#'
#' @param input_units *Declaration of duration units for numerical values*
#'
#' `scalar<character>` // *default:* `NULL` (`optional`)
#'
#' If one or more selected columns contains numeric values (not `difftime`
#' values, which contain the duration units), a keyword must be provided for
#' `input_units` for **gt** to determine how those values are to be
#' interpreted in terms of duration. The accepted units are: `"seconds"`,
#' `"minutes"`, `"hours"`, `"days"`, and `"weeks"`.
#'
#' @param output_units *Choice of output units*
#'
#' `mult-kw:[weeks|days|hours|minutes|seconds]` // *default:* `NULL` (`optional`)
#'
#' Controls the output time units. The default, `NULL`, means that **gt** will
#' automatically choose time units based on the input duration value. To
#' control which time units are to be considered for output (before trimming
#' with `trim_zero_units`) we can specify a vector of one or more of the
#' following keywords: `"weeks"`, `"days"`, `"hours"`, `"minutes"`, or
#' `"seconds"`.
#'
#' @param duration_style *Style for representing duration values*
#'
#' `singl-kw:[narrow|wide|colon-sep|iso]` // *default:* `"narrow"`
#'
#' A choice of four formatting styles for the output duration values. With
#' `"narrow"` (the default style), duration values will be formatted with
#' single letter time-part units (e.g., 1.35 days will be styled as
#' `"1d 8h 24m"`). With `"wide"`, this example value will be expanded to
#' `"1 day 8 hours 24 minutes"` after formatting. The `"colon-sep"` style will
#' put days, hours, minutes, and seconds in the `"([D]/)[HH]:[MM]:[SS]"`
#' format. The `"iso"` style will produce a value that conforms to the ISO
#' 8601 rules for duration values (e.g., 1.35 days will become `"P1DT8H24M"`).
#'
#' @param trim_zero_units *Trimming of zero values*
#'
#' `scalar<logical>|mult-kw:[leading|trailing|internal]` // *default:* `TRUE`
#'
#' Provides methods to remove output time units that have zero values. By
#' default this is `TRUE` and duration values that might otherwise be
#' formatted as `"0w 1d 0h 4m 19s"` with `trim_zero_units = FALSE` are instead
#' displayed as `"1d 4m 19s"`. Aside from using `TRUE`/`FALSE` we could
#' provide a vector of keywords for more precise control. These keywords are:
#' (1) `"leading"`, to omit all leading zero-value time units (e.g., `"0w 1d"`
#' -> `"1d"`), (2) `"trailing"`, to omit all trailing zero-value time units
#' (e.g., `"3d 5h 0s"` -> `"3d 5h"`), and `"internal"`, which removes all
#' internal zero-value time units (e.g., `"5d 0h 33m"` -> `"5d 33m"`).
#'
#' @param max_output_units *Maximum number of time units to display*
#'
#' `scalar<numeric|integer>(val>=1)` // *default:* `NULL` (`optional`)
#'
#' If `output_units` is `NULL`, where the output time units are unspecified
#' and left to **gt** to handle, a numeric value provided for
#' `max_output_units` will be taken as the maximum number of time units to
#' display in all output time duration values. By default, this is `NULL` and
#' all possible time units will be displayed. This option has no effect when
#' `duration_style = "colon-sep"` (only `output_units` can be used to
#' customize that type of duration output).
#'
#' @param force_sign *Forcing the display of a positive sign*
#'
#' `scalar<logical>` // *default:* `FALSE`
#'
#' Should the positive sign be shown for positive values (effectively showing
#' a sign for all values except zero)? If so, use `TRUE` for this option. By
#' default only negative values will display a minus sign.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_duration()` formatting function is compatible with body cells that
#' are of the `"numeric"`, `"integer"`, or `"difftime"` types. Any other types
#' of body cells are ignored during formatting. This is to say that cells of
#' incompatible data types may be targeted, but there will be no attempt to
#' format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Adapting output to a specific `locale`:
#'
#' This formatting function can adapt outputs according to a provided `locale`
#' value. Examples include `"en"` for English (United States) and `"fr"` for
#' French (France). The use of a valid locale ID here means separator and
#' decimal marks will be correct for the given locale. Should any value be
#' provided in `sep_mark`, it will be overridden by the locale's preferred
#' values.
#'
#' Note that a `locale` value provided here will override any global locale
#' setting performed in [gt()]'s own `locale` argument (it is settable there as
#' a value received by all other functions that have a `locale` argument). As a
#' useful reference on which locales are supported, we can use the
#' [info_locales()] function to view an info table.
#'
#' @section Examples:
#'
#' Use part of the `sp500` table to create a **gt** table. Create a
#' `difftime`-based column and format the duration values to be displayed as the
#' number of days since March 30, 2020.
#'
#' ```r
#' sp500 |>
#' dplyr::slice_head(n = 10) |>
#' dplyr::mutate(
#' time_point = lubridate::ymd("2020-03-30"),
#' time_passed = difftime(time_point, date)
#' ) |>
#' dplyr::select(time_passed, open, close) |>
#' gt(rowname_col = "month") |>
#' fmt_duration(
#' columns = time_passed,
#' output_units = "days",
#' duration_style = "wide"
#' ) |>
#' fmt_currency(columns = c(open, close))
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_duration_1.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-16
#'
#' @section Function Introduced:
#' `v0.7.0` (Aug 25, 2022)
#'
#' @seealso The vector-formatting version of this function:
#' [vec_fmt_duration()].
#'
#' @import rlang
#' @export
fmt_duration <- function(
data,
columns = everything(),
rows = everything(),
input_units = NULL,
output_units = NULL,
duration_style = c("narrow", "wide", "colon-sep", "iso"),
trim_zero_units = TRUE,
max_output_units = NULL,
pattern = "{x}",
use_seps = TRUE,
sep_mark = ",",
force_sign = FALSE,
system = c("intl", "ind"),
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
# Ensure that arguments are matched
duration_style <- rlang::arg_match(duration_style)
system <- rlang::arg_match(system)
# Declare formatting function compatibility
compat <- c("numeric", "integer", "difftime")
# Stop function if `locale` does not have a valid value; normalize locale
# and resolve one that might be set globally
validate_locale(locale = locale)
locale <- normalize_locale(locale = locale)
locale <- resolve_locale(data = data, locale = locale)
# Duration values will never have decimal marks
dec_mark <- "unused"
# Use locale-based marks if a locale ID is provided
sep_mark <- get_locale_sep_mark(locale, sep_mark, use_seps)
if (is_true(trim_zero_units)) {
trim_zero_units <- c("leading", "trailing", "internal")
} else if (is_false(trim_zero_units)) {
trim_zero_units <- NULL
} else if (is.character(trim_zero_units) && length(trim_zero_units) > 0) {
# Validate that `trim_zero_units` contains only the allowed keywords
validate_trim_zero_units(trim_zero_units = trim_zero_units)
} else {
cli::cli_abort(c(
"The value provided for `trim_zero_units` is invalid. Either use:",
"*" = "`TRUE` or `FALSE`, or",
"*" = "A vector with any of the keywords \"leading\", \"trailing\", or \"internal\"."
))
}
if (
!is.null(max_output_units) &&
(
!is.numeric(max_output_units) ||
length(max_output_units) != 1 ||
max_output_units < 1
)
) {
cli::cli_abort(c(
"The numeric value supplied for `max_output_units` is invalid.",
"*" = "Must either be `NULL` or an integer value greater than zero."
))
}
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(c(
"The `fmt_duration()` function can only be used on `columns` of certain types.",
"*" = "Allowed types are `numeric` and `difftime`."
))
}
}
# Stop function if any columns have numeric data and `input_units` is NULL
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = "difftime"
) &&
is.null(input_units)
) {
cli::cli_abort(c(
"When there are numeric columns to format, `input_units` must not be `NULL`.",
"*" = "Use one of \"seconds\", \"minutes\", \"hours\", \"days\", or \"weeks\"."
))
}
# Initialize `colon_sep_params` list
colon_sep_params <- list()
# Resolve input units
if (!is.null(input_units)) {
# Stop function if `input_units` isn't a character vector, isn't of
# the right length (1 or greater), and does not contain valid values
validate_duration_input_units(input_units = input_units)
# Normalize the valid set of provided `input_units`
input_units <- normalize_duration_input_units(input_units = input_units)
}
# Resolve output units
if (is.null(output_units)) {
output_units <- c("weeks", "days", "hours", "minutes", "seconds")
} else {
# Stop function if `output_units` isn't a character vector, isn't of
# the right length (1 or greater), and does not contain valid values
validate_duration_output_units(output_units = output_units)
# Normalize the valid set of provided `output_units`
output_units <- normalize_duration_output_units(output_units = output_units)
}
# If `duration_style` is of the "iso" or "colon-sep" types, then
# some options need to be overridden
if (duration_style == "iso") {
output_units <- c("days", "hours", "minutes", "seconds")
max_output_units <- NULL
trim_zero_units <- c("leading", "trailing")
}
if (duration_style == "colon-sep") {
if (
any(
identical(output_units, c("minutes", "seconds")),
identical(output_units, c("hours", "minutes")),
identical(output_units, c("hours", "minutes", "seconds")),
identical(output_units, c("days", "hours", "minutes"))
)
) {
colon_sep_output_units <- output_units
} else {
colon_sep_output_units <- c("days", "hours", "minutes", "seconds")
}
output_units <- c("days", "hours", "minutes", "seconds")
if (identical(trim_zero_units, "leading")) {
colon_sep_trim_zero_units <- "leading"
} else {
colon_sep_trim_zero_units <- FALSE
}
colon_sep_params <-
list(
output_units = colon_sep_output_units,
trim_zero_units = colon_sep_trim_zero_units
)
trim_zero_units <- FALSE
max_output_units <- NULL
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
compat = compat,
fns = num_fmt_factory_multi(
pattern = pattern,
use_latex_math_mode = FALSE,
format_fn = function(x, context) {
if (duration_style %in% c("narrow", "wide")) {
patterns <-
get_localized_duration_patterns(
value = x,
type = duration_style,
locale = locale
)
} else {
patterns <- NULL
}
x_str <-
values_to_durations(
x,
in_units = input_units,
out_units = output_units,
out_style = duration_style,
trim_zero_units = trim_zero_units,
max_output_units = max_output_units,
colon_sep_params = colon_sep_params,
sep_mark = sep_mark,
dec_mark = dec_mark,
system = system,
locale = locale,
patterns = patterns
)
#
# Prefix with plus and minus signs where necessary
#
x_str[x < 0 & !is.infinite(x)] <-
paste0(
context_minus_mark(context = context),
x_str[x < 0 & !is.infinite(x)]
)
if (force_sign) {
x_str[x > 0 & !is.infinite(x)] <-
paste0("+", x_str[x > 0 & !is.infinite(x)])
}
x_str
}
)
)
}
validate_trim_zero_units <- function(trim_zero_units) {
if (!all(trim_zero_units %in% c("leading", "trailing", "internal"))) {
cli::cli_abort(c(
"The character vector provided for `trim_zero_units` is invalid.",
"*" = "It should only contain any of the keywords \"leading\", \"trailing\",
or ", "\"internal\"."
))
}
}
validate_duration_input_units <- function(input_units) {
if (is.null(input_units)) {
return(NULL)
}
if (!is.character(input_units)) {
cli::cli_abort(
"The `input_units` input to `fmt_duration()` must be a character vector."
)
}
time_parts_vec <- c("weeks", "days", "hours", "mins", "minutes", "secs", "seconds")
if (!all(input_units %in% time_parts_vec) || length(input_units) != 1) {
cli::cli_abort(c(
"The value of `input_units` for `fmt_duration()` is invalid.",
"*" = "Only one of the \"weeks\", \"days\", \"hours\", \"minutes\", or
\"seconds\" time parts should be present."
))
}
}
normalize_duration_input_units <- function(input_units) {
# Ensure that key transforms occur
input_units <- tidy_sub(input_units, "secs", "seconds")
input_units <- tidy_sub(input_units, "mins", "minutes")
input_units
}
validate_duration_output_units <- function(output_units) {
if (!is.character(output_units)) {
cli::cli_abort(
"The `output_units` input to `fmt_duration()` must be a character vector."
)
}
if (length(output_units) < 1) {
cli::cli_abort(
"The `output_units` input to `fmt_duration()` must be a vector with at
least one element."
)
}
time_parts_vec <- c("weeks", "days", "hours", "mins", "minutes", "secs", "seconds")
if (!all(output_units %in% time_parts_vec)) {
cli::cli_abort(c(
"There are invalid components in the `output_units` input to `fmt_duration()`.",
"*" = "Only the \"weeks\", \"days\", \"hours\", \"minutes\", and \"seconds\`
time parts should be present."
))
}
}
normalize_duration_output_units <- function(output_units) {
# Ensure that key transforms occur and that the output units are a unique set
output_units <- tidy_sub(output_units, "secs", "seconds")
output_units <- tidy_sub(output_units, "mins", "minutes")
output_units <- unique(output_units)
# Ensure that the order of output units is from greatest to smallest
time_parts <- c("weeks", "days", "hours", "minutes", "seconds")
output_units[order(match(output_units, time_parts))]
}
values_to_durations <- function(
x,
in_units,
out_units,
out_style,
trim_zero_units,
max_output_units,
colon_sep_params,
sep_mark,
dec_mark,
system,
locale,
patterns
) {
# Obtain the units of `x` if it is of the difftime class (and
# drop difftime attrs with `as.numeric()`)
if (inherits(x, "difftime")) {
in_units <- units(x)
x <- as.numeric(x)
}
if (inherits(x, "integer")) {
x <- as.numeric(x)
}
if (in_units == "mins") {
in_units <- "minutes"
}
if (in_units == "secs") {
in_units <- "seconds"
}
second_conversion_factor <-
c(
weeks = 604800L,
days = 86400L,
hours = 3600L,
minutes = 60L,
seconds = 1L
)
# Should `in_units` be anything other than seconds then
# convert all `x` values to seconds
if (in_units != "seconds") {
x <- x * second_conversion_factor[[in_units]]
}
x_str <- character(length(x))
for (i in seq_along(x)) {
x_df_i <-
dplyr::tibble(
value = NA_integer_,
time_part = out_units,
formatted = NA_character_
)
x_rem_i <- abs(x[[i]])
for (j in seq_along(out_units)) {
factor <- second_conversion_factor[[out_units[[j]]]]
x_df_i$value[[j]] <- floor(x_rem_i / factor)
x_rem_i <- x_rem_i %% factor
}
# Remove time parts according to keywords in `trim_zero_units`
total_time_units <- nrow(x_df_i)
first_non_zero_unit_idx <- utils::head(which(x_df_i$value != 0), 1)
last_non_zero_unit_idx <- utils::tail(which(x_df_i$value != 0), 1)
remove_idx <- c()
# Possibly add leading zero time parts to `remove_idx`
if (
"leading" %in% trim_zero_units &&
length(first_non_zero_unit_idx) > 0 &&
first_non_zero_unit_idx > 1
) {
remove_idx <- c(remove_idx, seq(1, first_non_zero_unit_idx - 1))
}
# Possibly add trailing zero time parts to `remove_idx`
if (
"trailing" %in% trim_zero_units &&
length(last_non_zero_unit_idx) > 0 &&
last_non_zero_unit_idx < total_time_units
) {
remove_idx <- c(remove_idx, seq(last_non_zero_unit_idx + 1, total_time_units))
}
# Possibly add internal zero time parts to `remove_idx`
if (
"internal" %in% trim_zero_units &&
length(first_non_zero_unit_idx) > 0
) {
internal_idx <- first_non_zero_unit_idx:last_non_zero_unit_idx
remove_idx <- c(remove_idx, base::intersect(internal_idx, which(x_df_i$value == 0)))
}
# Remove rows from `x_df_i`
if (length(remove_idx) > 0) {
x_df_i <- x_df_i[-remove_idx, ]
}
if (all(x_df_i$value == 0) && length(trim_zero_units) > 0) {
# Remove all but the final row
x_df_i <- utils::tail(x_df_i, n = 1)
}
# Remove units that exceed a maximum number according to `max_output_units`
if (!is.null(max_output_units) && nrow(x_df_i) > max_output_units) {
x_df_i <- x_df_i[seq_len(max_output_units), ]
}
for (j in seq_len(nrow(x_df_i))) {
pattern <-
extract_duration_pattern(
value = x_df_i$value[j],
time_p = x_df_i$time_part[j],
patterns = patterns
)
x_df_i[j, "formatted"] <-
format_time_part(
x = x_df_i$value[j],
time_part = x_df_i$time_part[j],
out_style = out_style,
sep_mark = sep_mark,
dec_mark = dec_mark,
locale = locale,
system = system,
pattern = pattern
)
}
# Handle edge cases where duration is smaller
# than the smallest unit in `out_units`
if (all(x_df_i$value == 0)) {
# Obtain the smallest time unit; `normalize_duration_output_units()`
# ensures that `out_units` is sorted from largest to smallest so the
# last component will always be needed here
time_p <- out_units[length(out_units)]
# If the time duration is zero then use `0` as the value,
# otherwise, use `1` and indicate that the value is less than that
pattern <-
extract_duration_pattern(
value = if (x_rem_i == 0) 0 else 1,
time_p = time_p,
patterns = patterns
)
x_df_i[nrow(x_df_i), "formatted"] <-
format_time_part(
x = if (x_rem_i == 0) 0 else 1,
time_part = time_p,
out_style = out_style,
sep_mark = sep_mark,
dec_mark = dec_mark,
locale = locale,
system = system,
pattern = pattern
)
if (x_rem_i != 0) {
x_df_i[1, "formatted"] <- paste0("<", x_df_i[1, "formatted"])
}
}
if (out_style == "colon-sep") {
colon_sep_output_units <- colon_sep_params$output_units
colon_sep_trim_zero_units <- colon_sep_params$trim_zero_units
# Filter to only the output units needed
x_df_i <- dplyr::filter(x_df_i, time_part %in% colon_sep_output_units)
# If days has a zero value, remove that entry unconditionally
if ("days" %in% x_df_i$time_part && x_df_i[[1, "value"]] == 0) {
x_df_i <- dplyr::filter(x_df_i, time_part != "days")
}
if (colon_sep_trim_zero_units == "leading") {
if (
identical(x_df_i$time_part, c("hours", "minutes", "seconds")) &&
x_df_i[[1, "value"]] == 0
) {
x_df_i <- dplyr::filter(x_df_i, time_part != "hours")
}
}
# Assemble the remaining time parts
hms_part <-
x_df_i %>%
dplyr::filter(time_part %in% c("hours", "minutes", "seconds")) %>%
dplyr::pull(formatted) %>%
paste(collapse = ":")
d_part <-
ifelse("days" %in% x_df_i$time_part, paste0(x_df_i$formatted[1], "/"), "")
x_str[i] <- paste0(d_part, hms_part)
} else if (out_style == "iso") {
x_str[i] <-
paste0("P", paste0(x_df_i$formatted, collapse = "")) %>%
tidy_sub("D", "DT", fixed = TRUE)
} else {
x_str[i] <- paste0(x_df_i$formatted, collapse = " ")
}
}
x_str
}
format_time_part <- function(
x,
time_part,
out_style,
sep_mark,
dec_mark,
locale,
system,
pattern
) {
x_val <-
format_num_to_str(
x,
context = "plain",
decimals = 0,
n_sigfig = NULL,
sep_mark = if (out_style != "iso") sep_mark else "",
dec_mark = dec_mark,
drop_trailing_zeros = TRUE,
drop_trailing_dec_mark = TRUE,
format = "f",
system = system
)
if (out_style %in% c("narrow", "wide")) {
out <- gsub("{0}", x_val, pattern, fixed = TRUE)
} else if (out_style == "iso") {
out <- paste0(x_val, toupper(substr(time_part, 1, 1)))
} else {
if (time_part %in% c("hours", "minutes", "seconds") && x < 10) {
out <- paste0("0", x_val)
} else {
out <- as.character(x_val)
}
}
out
}
get_localized_duration_patterns <- function(
value,
type,
locale
) {
if (is.null(locale)) locale <- "en"
if (type == "wide") type <- "long"
pattern_tbl <-
durations[
durations$locale == locale,
grepl(
"^duration-(week|day|hour|minute|second).unitPattern-count-(zero|one|other)$",
colnames(durations)
) |
grepl("type", colnames(durations), fixed = TRUE)
] %>%
dplyr::filter(type == .env$type) %>%
dplyr::select(-type)
colnames(pattern_tbl) <- gsub("(duration|-|unitPattern-count)", "", colnames(pattern_tbl))
as.list(pattern_tbl)
}
extract_duration_pattern <- function(
value,
time_p,
patterns
) {
x_val_i_type <-
dplyr::case_when(
value == 1 ~ "one",
value == 0 ~ "zero",
TRUE ~ "other"
)
pattern <- patterns[grepl(paste0(gsub("s$", "", time_p), ".*?.", x_val_i_type), names(patterns))][[1]]
if (!is.null(pattern) && is.na(pattern)) {
pattern <- patterns[grepl(paste0(gsub("s$", "", time_p), ".*?.other"), names(patterns))][[1]]
}
pattern
}
#' Format column data containing bin/interval information
#'
#' When using the `cut()` function (or other functions that use it in some way)
#' you get bins that can look like this: `"(0,10]"`, `"(10,15]"`, `"(15,20]"`,
#' `"(20,40]"`. This interval notation expresses the lower and upper limits of
#' each range. The square or round brackets define whether each of the endpoints
#' are included in the range (`[`/`]` for inclusion, `(`/`)` for exclusion).
#' Should bins of this sort be present in a table, the `fmt_bins()` function can
#' be used to format that syntax to a form that presents better in a display
#' table. It's possible to format the values of the intervals with the `fmt`
#' argument, and, the separator can be modified with the `sep` argument.
#'
#' @inheritParams fmt_number
#'
#' @param sep *Separator between values*
#'
#' `scalar<character>` // *default:* `"--"`
#'
#' The separator text that indicates the values are ranged. The default value
#' of `"--"` indicates that an en dash will be used for the range separator.
#' Using `"---"` will be taken to mean that an em dash should be used. Should
#' you want these special symbols to be taken literally, they can be supplied
#' within the base [I()] function.
#'
#' @param fmt *Formatting expressions*
#'
#' `<single expression>` // *default:* `NULL` (`optional`)
#'
#' An optional formatting expression in formula form. If used, the RHS of `~`
#' should contain a formatting call (e.g.,
#' `~ fmt_number(., decimals = 3, use_seps = FALSE`).
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_bins()` formatting function is compatible with body cells that are
#' of the `"character"` or `"factor"` types. Any other types of body cells are
#' ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Formatting expressions for `fmt`:
#'
#' We can supply a one-sided (RHS only) expression to `fmt`, and, several can be
#' provided in a list. The expression uses a formatting function (e.g.,
#' [fmt_number()], [fmt_currency()], etc.) and it must contain an initial `.`
#' that stands for the data object. If performing numeric formatting it might
#' look something like this:
#'
#' `fmt = ~ fmt_number(., decimals = 1, use_seps = FALSE)`
#'
#' @section Examples:
#'
#' Use the [`countrypops`] dataset to create a **gt** table. Before even getting
#' to the [gt()] call, we use the `cut()` function in conjunction with the
#' [scales::breaks_log()] function to create some highly customized bins.
#' Consequently each country's population in the 2021 year is assigned to a bin.
#' These bins have a characteristic type of formatting that can be used as input
#' to `fmt_bins()`, and using that formatting function allows us to customize
#' the presentation of those ranges. For instance, here we are formatting the
#' left and right values of the ranges with the [fmt_integer()] function (using
#' formula syntax).
#'
#' ```r
#' countrypops |>
#' dplyr::filter(year == 2021) |>
#' dplyr::select(country_code_2, population) |>
#' dplyr::mutate(population_class = cut(
#' population,
#' breaks = scales::breaks_log(n = 20)(population)
#' )
#' ) |>
#' dplyr::group_by(population_class) |>
#' dplyr::summarize(
#' count = dplyr::n(),
#' countries = paste0(country_code_2, collapse = ",")
#' ) |>
#' dplyr::arrange(desc(population_class)) |>
#' gt() |>
#' fmt_flag(columns = countries) |>
#' fmt_bins(
#' columns = population_class,
#' fmt = ~ fmt_integer(., suffixing = TRUE)
#' ) |>
#' cols_label(
#' population_class = "Population Range",
#' count = "",
#' countries = "Countries"
#' ) |>
#' cols_width(
#' population_class ~ px(150),
#' count ~ px(50)
#' ) |>
#' tab_style(
#' style = cell_text(style = "italic"),
#' locations = cells_body(columns = count)
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_bins_1.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-17
#'
#' @section Function Introduced:
#' `v0.9.0` (Mar 31, 2023)
#'
#' @import rlang
#' @export
fmt_bins <- function(
data,
columns = everything(),
rows = everything(),
sep = "--",
fmt = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
# Declare formatting function compatibility
compat <- c("character", "factor")
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_bins()` function can only be used on `columns`
with character or factor data."
)
}
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
fns = list(
html = function(x) {
format_bins_by_context(x, sep = sep, fmt = fmt, context = "html")
},
latex = function(x) {
format_bins_by_context(x, sep = sep, fmt = fmt, context = "latex")
},
rtf = function(x) {
format_bins_by_context(x, sep = sep, fmt = fmt, context = "rtf")
},
word = function(x) {
format_bins_by_context(x, sep = sep, fmt = fmt, context = "word")
},
default = function(x) {
format_bins_by_context(x, sep = sep, fmt = fmt, context = "plain")
}
)
)
}
format_bins_by_context <- function(x, sep, fmt, context) {
# Format `sep` for output context
if (context != "plain") {
sep <- context_dash_mark(sep, context = context)
}
# Generate an vector of empty strings that will eventually
# contain all of the ranged value text
x_str <- character(length(x))
x_str_non_missing <- x[!is.na(x)]
x_str_non_missing <- as.character(x_str_non_missing)
x_str_is_bin <-
grepl("^(\\(|\\[]).*?,.*?(\\)|\\])$", x_str_non_missing)
x_str_lhs <-
gsub(
"^(\\(|\\[])(.*?),(.*?)(\\)|\\])$",
"\\2",
x_str_non_missing[x_str_is_bin]
)
x_str_rhs <-
gsub(
"^(\\(|\\[])(.*?),(.*?)(\\)|\\])$",
"\\3",
x_str_non_missing[x_str_is_bin]
)
if (!is.null(fmt)) {
# Format the LHS and RHS values
val_tbl <-
dplyr::tibble(
left = as.numeric(x_str_lhs),
right = as.numeric(x_str_rhs)
)
val_tbl_gt <- gt(val_tbl)
# Ensure that the expression (a RHS formula) is made a closure
format_fn <- rlang::as_closure(fmt)
# Perform the formatting on this gt table with closure
val_tbl_gt <- format_fn(val_tbl_gt)
#
# Extract the columns of formatted data
#
x_val_lhs_fmt <-
extract_cells(val_tbl_gt, columns = "left", output = context)
x_val_rhs_fmt <-
extract_cells(val_tbl_gt, columns = "right", output = context)
} else {
x_val_lhs_fmt <- x_str_lhs
x_val_rhs_fmt <- x_str_rhs
}
x_str_non_missing[x_str_is_bin] <-
paste0(x_val_lhs_fmt, sep, x_val_rhs_fmt)
x_str[!is.na(x)] <- x_str_non_missing
x_str[is.na(x)] <- as.character(NA_character_)
x_str
}
#' Format measurement units
#'
#' @description
#'
#' The `fmt_units()` function lets you better format measurement units in the
#' table body. These must conform to **gt**'s specialized units notation (e.g.,
#' `"J Hz^-1 mol^-1"` can be used to generate units for the
#' *molar Planck constant*) for the best conversion. The notation here provides
#' several conveniences for defining units, so as long as the values to be
#' formatted conform to this syntax, you'll obtain nicely-formatted units no
#' matter what the table output format might be (i.e., HTML, LaTeX, RTF, etc.).
#' Details pertaining to the units notation can be found in the section entitled
#' *How to use **gt**'s units notation*.
#'
#' @inheritParams fmt_number
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section How to use **gt**'s units notation:
#'
#' The units notation involves a shorthand of writing units that feels familiar
#' and is fine-tuned for the task at hand. Each unit is treated as a separate
#' entity (parentheses and other symbols included) and the addition of subscript
#' text and exponents is flexible and relatively easy to formulate. This is all
#' best shown with examples:
#'
#' - `"m/s"` and `"m / s"` both render as `"m/s"`
#' - `"m s^-1"` will appear with the `"-1"` exponent intact
#' - `"m /s"` gives the the same result, as `"/<unit>"` is equivalent to
#' `"<unit>^-1"`
#' - `"E_h"` will render an `"E"` with the `"h"` subscript
#' - `"t_i^2.5"` provides a `t` with an `"i"` subscript and a `"2.5"` exponent
#' - `"m[_0^2]"` will use overstriking to set both scripts vertically
#' - `"g/L %C6H12O6%"` uses a chemical formula (enclosed in a pair of `"%"`
#' characters) as a unit partial, and the formula will render correctly with
#' subscripted numbers
#' - Common units that are difficult to write using ASCII text may be implicitly
#' converted to the correct characters (e.g., the `"u"` in `"ug"`, `"um"`,
#' `"uL"`, and `"umol"` will be converted to the Greek *mu* symbol; `"degC"`
#' and `"degF"` will render a degree sign before the temperature unit)
#' - We can transform shorthand symbol/unit names enclosed in `":"` (e.g.,
#' `":angstrom:"`, `":ohm:"`, etc.) into proper symbols
#' - Greek letters can added by enclosing the letter name in `":"`; you can
#' use lowercase letters (e.g., `":beta:"`, `":sigma:"`, etc.) and uppercase
#' letters too (e.g., `":Alpha:"`, `":Zeta:"`, etc.)
#' - The components of a unit (unit name, subscript, and exponent) can be
#' fully or partially italicized/emboldened by surrounding text with `"*"` or
#' `"**"`
#'
#' @section Examples:
#'
#' Let's use the [`illness`] dataset and create a new **gt** table. The `units`
#' column contains character values in **gt**'s specialized units notation
#' (e.g., `"x10^9 / L"`) so the `fmt_units()` function was used to better format
#' those units.
#'
#' ```r
#' illness |>
#' gt() |>
#' fmt_units(columns = units) |>
#' sub_missing(columns = -starts_with("norm")) |>
#' sub_missing(columns = c(starts_with("norm"), units), missing_text = "") |>
#' sub_large_vals(rows = test == "MYO", threshold = 1200) |>
#' fmt_number(
#' decimals = 2,
#' drop_trailing_zeros = TRUE
#' ) |>
#' tab_header(title = "Laboratory Findings for the YF Patient") |>
#' tab_spanner(label = "Day", columns = starts_with("day")) |>
#' cols_label_with(fn = ~ gsub("day_", "", .)) |>
#' cols_merge_range(col_begin = norm_l, col_end = norm_u) |>
#' cols_label(
#' starts_with("norm") ~ "Normal Range",
#' test ~ "Test",
#' units ~ "Units"
#' ) |>
#' cols_width(
#' starts_with("day") ~ px(80),
#' everything() ~ px(120)
#' ) |>
#' tab_style(
#' style = cell_text(align = "center"),
#' locations = cells_column_labels(columns = starts_with("day"))
#' ) |>
#' tab_style(
#' style = cell_fill(color = "aliceblue"),
#' locations = cells_body(columns = c(test, units))
#' ) |>
#' opt_vertical_padding(scale = 0.4) |>
#' opt_align_table_header(align = "left") |>
#' tab_options(heading.padding = px(10))
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_units_1.png")`
#' }}
#'
#' The [`constants`] dataset contains values for hundreds of fundamental
#' physical constants. We'll take a subset of values that have some molar basis
#' and generate a **gt** table from that. Like the [`illness`] dataset, this one
#' has a `units` column so, again, the `fmt_units()` function will be used to
#' format those units. Here, the preference for typesetting measurement units is
#' to have positive and negative exponents (e.g., not `"<unit_1> / <unit_2>"`
#' but rather `"<unit_1> <unit_2>^-1"`).
#'
#' ```r
#' constants |>
#' dplyr::filter(grepl("molar", name)) |>
#' gt() |>
#' cols_hide(columns = c(uncert, starts_with("sf"))) |>
#' fmt_units(columns = units) |>
#' fmt_scientific(columns = value, decimals = 3) |>
#' tab_header(title = "Physical Constants Having a Molar Basis") |>
#' tab_options(column_labels.hidden = TRUE)
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_units_2.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-18
#'
#' @section Function Introduced:
#' *In Development*
#'
#' @import rlang
#' @export
fmt_units <- function(
data,
columns = everything(),
rows = everything()
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
# Declare formatting function compatibility
compat <- c("character", "factor")
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_units()` function can only be used on `columns`
with character or factor data."
)
}
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
fns = list(
html = function(x) {
format_units_by_context(x, context = "html")
},
latex = function(x) {
format_units_by_context(x, context = "latex")
},
rtf = function(x) {
format_units_by_context(x, context = "rtf")
},
word = function(x) {
format_units_by_context(x, context = "word")
},
default = function(x) {
format_units_by_context(x, context = "plain")
}
)
)
}
format_units_by_context <- function(x, context = "html") {
# Generate an vector of empty strings that will eventually
# contain all of the ranged value text
x_str <- character(length(x))
x_str_non_missing <- x[!is.na(x)]
x_str_non_missing <- as.character(x_str_non_missing)
x_str_non_missing <-
vapply(
seq_along(x_str_non_missing),
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
render_units(define_units(x_str_non_missing[x]), context = context)
}
)
x_str[!is.na(x)] <- x_str_non_missing
x_str[is.na(x)] <- as.character(NA_character_)
x_str
}
#' Format URLs to generate links
#'
#' @description
#'
#' Should cells contain URLs, the `fmt_url()` function can be used to make them
#' navigable links. This should be expressly used on columns that contain *only*
#' URL text (i.e., no URLs as part of a larger block of text). Should you have
#' such a column of data, there are options for how the links should be styled.
#' They can be of the conventional style (with underlines and text coloring that
#' sets it apart from other text), or, they can appear to be button-like (with
#' a surrounding box that can be filled with a color of your choosing).
#'
#' URLs in data cells are detected in two ways. The first is using the simple
#' Markdown notation for URLs of the form: `[label](URL)`. The second assumes
#' that the text is the URL. In the latter case the URL is also used as the
#' label but there is the option to use the `label` argument to modify that
#' text.
#'
#' @inheritParams fmt_number
#'
#' @param label *Link label*
#'
#' `scalar<character>` // *default:* `NULL` (`optional`)
#'
#' The visible 'label' to use for the link. If `NULL` (the default)
#' the URL will serve as the label. There are two non-`NULL` options: (1) a
#' static text can be used for the label by providing a string, and (2) a
#' function can be provided to fashion a label from every URL.
#'
#' @param as_button *Style link as a button*
#'
#' `scalar<logical>` // *default:* `FALSE`
#'
#' An option to style the link as a button. By default, this is
#' `FALSE`. If this option is chosen then the `button_fill` argument becomes
#' usable.
#'
#' @param color *Link color*
#'
#' `scalar<character>` // *default:* `"auto"`
#'
#' The color used for the resulting link and its underline. This is
#' `"auto"` by default; this allows **gt** to choose an appropriate color
#' based on various factors (such as the background `button_fill` when
#' `as_button` is `TRUE`).
#'
#' @param show_underline *Show the link underline*
#'
#' `scalar<character>|scalar<logical>` // *default:* `"auto"`
#'
#' Should the link be decorated with an underline? By
#' default this is `"auto"` which means that **gt** will choose `TRUE` when
#' `as_button = FALSE` and `FALSE` in the other case. The link underline will
#' be the same color as that set in the `color` option.
#'
#' @param button_fill,button_width,button_outline *Button options*
#'
#' `scalar<character>` // *default:* `"auto"`
#'
#' Options for styling a link-as-button (and only applies if
#' `as_button = TRUE`). All of these options are by default set to `"auto"`,
#' allowing **gt** to choose appropriate fill, width, and outline values.
#'
#' @param target,rel,referrerpolicy,hreflang *Anchor element attributes*
#'
#' `scalar<character>` // *default:* `NULL`
#'
#' Additional anchor element attributes. For descriptions of each attribute
#' and the allowed values, refer to the [MDN Web Docs reference on the anchor
#' HTML element](
#' https://developer.mozilla.org/en-US/docs/Web/HTML/Element/a#attributes).
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_url()` formatting function is compatible with body cells that are
#' of the `"character"` or `"factor"` types. Any other types of body cells are
#' ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_url()` to obtain varying parameter values from a specified column within
#' the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `label`
#' - `as_button`
#' - `color`
#' - `show_underline`
#' - `button_fill`
#' - `button_width`
#' - `button_outline`
#'
#' Please note that for each of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Examples:
#'
#' Using a portion of the [`towny`] dataset, let's create a **gt** table. We can
#' use the `fmt_url()` function on the `website` column to generate navigable
#' links to websites. By default the links are underlined and the color will be
#' chosen for you (it's dark cyan).
#'
#' ```r
#' towny |>
#' dplyr::filter(csd_type == "city") |>
#' dplyr::arrange(desc(population_2021)) |>
#' dplyr::select(name, website, population_2021) |>
#' dplyr::slice_head(n = 10) |>
#' gt() |>
#' tab_header(
#' title = md("The 10 Largest Municipalities in `towny`"),
#' subtitle = "Population values taken from the 2021 census."
#' ) |>
#' fmt_integer() |>
#' fmt_url(columns = website) |>
#' cols_label(
#' name = "Name",
#' website = "Site",
#' population_2021 = "Population"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_url_1.png")`
#' }}
#'
#' Let's try something else. We can set a static text label for the link with
#' the `label` argument (and we'll use the word `"site"` for this). The link
#' underline is removable with `show_underline = FALSE`. With this change, it
#' seems sensible to merge the link to the `"name"` column and enclose the link
#' text in parentheses (the [cols_merge()] function handles all that).
#'
#' ```r
#' towny |>
#' dplyr::filter(csd_type == "city") |>
#' dplyr::arrange(desc(population_2021)) |>
#' dplyr::select(name, website, population_2021) |>
#' dplyr::slice_head(n = 10) |>
#' gt() |>
#' tab_header(
#' title = md("The 10 Largest Municipalities in `towny`"),
#' subtitle = "Population values taken from the 2021 census."
#' ) |>
#' fmt_integer() |>
#' fmt_url(
#' columns = website,
#' label = "site",
#' show_underline = FALSE
#' ) |>
#' cols_merge(
#' columns = c(name, website),
#' pattern = "{1} ({2})"
#' ) |>
#' cols_label(
#' name = "Name",
#' population_2021 = "Population"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_url_2.png")`
#' }}
#'
#' The `fmt_url()` function allows for the styling of links as 'buttons'. This
#' is as easy as setting `as_button = TRUE`. Doing that unlocks the ability to
#' set a `button_fill` color. This color can automatically selected by **gt**
#' (this is the default) but here we're using `"steelblue"`. The `label`
#' argument also accepts a function! We can choose to adapt the label text from
#' the URLs by eliminating any leading `"https://"` or `"www."` parts.
#'
#' ```r
#' towny |>
#' dplyr::filter(csd_type == "city") |>
#' dplyr::arrange(desc(population_2021)) |>
#' dplyr::select(name, website, population_2021) |>
#' dplyr::slice_head(n = 10) |>
#' dplyr::mutate(ranking = dplyr::row_number()) |>
#' gt(rowname_col = "ranking") |>
#' tab_header(
#' title = md("The 10 Largest Municipalities in `towny`"),
#' subtitle = "Population values taken from the 2021 census."
#' ) |>
#' fmt_integer() |>
#' fmt_url(
#' columns = website,
#' label = function(x) gsub("https://|www.", "", x),
#' as_button = TRUE,
#' button_fill = "steelblue",
#' button_width = px(150)
#' ) |>
#' cols_move_to_end(columns = website) |>
#' cols_align(align = "center", columns = website) |>
#' cols_width(
#' ranking ~ px(40),
#' website ~ px(200)
#' ) |>
#' tab_options(column_labels.hidden = TRUE) |>
#' tab_style(
#' style = cell_text(weight = "bold"),
#' locations = cells_stub()
#' ) %>%
#' opt_vertical_padding(scale = 0.75)
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_url_3.png")`
#' }}
#'
#' It's perhaps inevitable that you'll come across missing values in your column
#' of URLs. The `fmt_url()` function will preserve input `NA` values, allowing
#' you to handle them with [sub_missing()]. Here's an example of that.
#'
#' ```r
#' towny |>
#' dplyr::arrange(population_2021) |>
#' dplyr::select(name, website, population_2021) |>
#' dplyr::slice_head(n = 10) |>
#' gt() |>
#' tab_header(
#' title = md("The 10 Smallest Municipalities in `towny`"),
#' subtitle = "Population values taken from the 2021 census."
#' ) |>
#' fmt_integer() |>
#' fmt_url(columns = website) |>
#' cols_label(
#' name = "Name",
#' website = "Site",
#' population_2021 = "Population"
#' ) |>
#' sub_missing()
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_url_4.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-19
#'
#' @section Function Introduced:
#' `v0.9.0` (Mar 31, 2023)
#'
#' @import rlang
#' @export
fmt_url <- function(
data,
columns = everything(),
rows = everything(),
label = NULL,
as_button = FALSE,
color = "auto",
show_underline = "auto",
button_fill = "auto",
button_width = "auto",
button_outline = "auto",
target = NULL,
rel = NULL,
referrerpolicy = NULL,
hreflang = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - label
# - as_button
# - color
# - show_underline
# - button_fill
# - button_width
# - button_outline
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_url",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_url(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
label = p_i$label %||% label,
as_button = p_i$as_button %||% as_button,
color = p_i$color %||% color,
show_underline = p_i$show_underline %||% show_underline,
button_fill = p_i$button_fill %||% button_fill,
button_width = p_i$button_width %||% button_width,
button_outline = p_i$button_outline %||% button_outline
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Declare formatting function compatibility
compat <- c("character", "factor")
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_url()` function can only be used on `columns`
with character or factor data."
)
}
}
if (as_button) {
#
# All determinations of `color`, `show_underline`, `button_fill` and
# `button_width` for the case where `as_button = TRUE`; each of the
# above arguments are set to "auto" by default
#
# In the button case, we opt to never show an underline unless it's
# requested by the user (i.e., `show_underline = TRUE`)
if (show_underline == "auto") {
show_underline <- FALSE
}
if (button_width == "auto") {
button_width <- NULL
}
button_outline_color <- button_outline
button_outline_style <- "solid"
button_outline_width <- "2px"
# There are various combinations of "auto" or not with `button_fill` and
# `color` that need to be handled delicately so as to ensure contrast
# between foreground text and background fill is maximized
if (button_fill == "auto" && color == "auto") {
# Choose a fixed and standard color combination if both options are
# 'auto'; these will be 'steelblue' and 'white'
button_fill <- "#4682B4"
color <- "#FFFFFF"
} else if (button_fill == "auto" && color != "auto") {
# Case where text color is chosen but background is left to gt
# to determine; will either by light blue or dark blue based on the
# brightness of the text color (can be of poor contrast if user chooses
# a text color somewhere in the mid range of brightness, but nothing
# really can be done there to compensate)
# Ensure that the incoming `color` is transformed to hexadecimal form
color <- html_color(colors = color, alpha = NULL)
# Use `ideal_fgnd_color()` in a backwards manner only to see whether
# the proxy background color is light (#FFFFFF) or dark (#000000)
bgrnd_bw <-
ideal_fgnd_color(
bgnd_color = color,
algo = "apca"
)
if (bgrnd_bw == "#FFFFFF") {
# Background should be light so using 'lightblue'
button_fill <- "#ADD8E6"
} else {
# Background should be dark so using 'darkblue'
button_fill <- "#00008B"
}
if (button_outline == "auto") {
button_outline_color <- "#BEBEBE"
button_outline_style <- "none"
}
} else if (button_fill != "auto" && color == "auto") {
# Ensure that the incoming `button_fill` is transformed
# to hexadecimal form
button_fill <- html_color(colors = button_fill, alpha = NULL)
# Case where background color is chosen for foreground text color is
# not; this is the simple case where `ideal_fgnd_color()` is well suited
# to determine the text color (either black or white)
color <-
ideal_fgnd_color(
bgnd_color = button_fill,
algo = "apca"
)
if (button_outline == "auto") {
button_outline_color <- "#DFDFDF"
if (button_fill %in% c(
"#FFFFFF", "#FFFFFF", "#FAF5EF", "#FAFAFA",
"#FFFEFC", "#FBFCFA", "#FBFAF2"
)) {
button_outline_style <- "solid"
} else {
button_outline_style <- "none"
}
}
} else {
# Ensure that the incoming `color` is transformed to hexadecimal form
color <- html_color(colors = color, alpha = NULL)
}
} else {
if (show_underline == "auto") {
show_underline <- TRUE
}
if (color == "auto") {
color <- "#008B8B"
} else {
# Ensure that the incoming `color` is transformed to hexadecimal form
color <- html_color(colors = color, alpha = NULL)
}
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
fns = list(
html = function(x) {
# Generate an vector of empty strings that will eventually
# contain all of the link text
x_str <- character(length(x))
x_str_non_missing <- x[!is.na(x)]
if (!is.null(label)) {
if (rlang::is_function(label)) {
label_str <- label(x_str_non_missing)
} else {
label_str <- label
}
} else {
if (any(grepl("\\[.*?\\]\\(.*?\\)", x_str_non_missing))) {
# Generate labels
label_str <-
vapply(
x_str_non_missing,
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (grepl("\\[.*?\\]\\(.*?\\)", x)) {
out <- sub("\\[(.*?)\\]\\(.*?\\)", "\\1", x)
} else {
out <- x
}
out
}
)
# Generate href values
x_str_non_missing <-
vapply(
x_str_non_missing,
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (grepl("\\[.*?\\]\\(.*?\\)", x)) {
out <- sub("\\[.*?\\]\\((.*?)\\)", "\\1", x)
} else {
out <- x
}
out
}
)
} else {
label_str <- x_str_non_missing
}
}
add_anchor_attr <- function(
init = NULL,
arg,
nm,
values = NULL,
error_arg = caller_arg(arg),
error_call = caller_env()
) {
if (!is.null(values)) {
arg <-
rlang::arg_match(
arg,
values = values,
error_arg = error_arg,
error_call = error_call
)
}
if (!is_string(arg)) {
cli::cli_abort(
"{.arg {nm}} must be a string, not {.obj_type_friendly {arg}}",
call = error_call
)
}
paste0(init, " ", nm, "=\"", arg, "\"")
}
target <- target %||% "_blank"
target_values <- NULL
if (grepl("^_", target)) {
target_values <- c("_blank", "_self", "_parent", "_top")
}
anchor_attr <-
add_anchor_attr(
arg = target,
nm = "target",
values = target_values
)
if (!is.null(rel)) {
anchor_attr <-
add_anchor_attr(
anchor_attr,
rel,
nm = "rel",
values = c(
"alternate", "author", "bookmark", "external", "help",
"license", "next", "nofollow", "noreferrer", "noopener",
"prev", "search", "tag"
)
)
}
if (!is.null(referrerpolicy)) {
anchor_attr <-
add_anchor_attr(
anchor_attr,
referrerpolicy,
nm = "referrerpolicy",
values = c(
"no-referrer", "no-referrer-when-downgrade", "origin",
"origin-when-cross-origin", "same-origin", "strict-origin",
"strict-origin-when-cross-origin", "unsafe-url"
)
)
}
if (!is.null(hreflang)) {
anchor_attr <-
add_anchor_attr(
anchor_attr,
arg = hreflang,
nm = "hreflang"
)
}
anchor_attr <-
add_anchor_attr(
anchor_attr,
arg = paste0(
"color:", color[1], ";",
"text-decoration:",
if (show_underline) "underline" else "none", ";",
if (show_underline) "text-underline-position: under;" else NULL,
"display: inline-block;",
if (as_button) {
paste0(
"background-color: ", button_fill, ";",
"padding: 8px 12px;",
if (!is.null(button_width)) {
paste0("width: ", button_width, "; text-align: center;")
} else {
NULL
},
"outline-style: ", button_outline_style, "; ",
"outline-color: ", button_outline_color, "; ",
"outline-width: ", button_outline_width, ";"
)
} else {
NULL
}
),
nm = "style"
)
x_str_non_missing <-
paste0(
"<a",
" href=\"", x_str_non_missing, "\"",
anchor_attr,
">",
label_str,
"</a>"
)
x_str[!is.na(x)] <- x_str_non_missing
x_str[is.na(x)] <- as.character(NA_character_)
x_str
},
latex = function(x) {
x
},
rtf = function(x) {
x
},
word = function(x) {
x
},
default = function(x) {
x
}
)
)
}
#' Format image paths to generate images in cells
#'
#' @description
#'
#' To more easily insert graphics into body cells, we can use the `fmt_image()`
#' function. This allows for one or more images to be placed in the targeted
#' cells. The cells need to contain some reference to an image file, either: (1)
#' complete http/https or local paths to the files; (2) the file names, where a
#' common path can be provided via `path`; or (3) a fragment of the file name,
#' where the `file_pattern` helps to compose the entire file name and `path`
#' provides the path information. This should be expressly used on columns that
#' contain *only* references to image files (i.e., no image references as part
#' of a larger block of text). Multiple images can be included per cell by
#' separating image references by commas. The `sep` argument allows for a common
#' separator to be applied between images.
#'
#' @inheritParams fmt_number
#'
#' @param height,width *Height and width of images*
#'
#' `scalar<character>` // *default:* `NULL` (`optional`)
#'
#' The absolute height of the image in the table cell. If you set the `width`
#' and `height` remains `NULL` (or vice versa), the width-to-height ratio will
#' be preserved when **gt** calculates the length of the missing dimension. If
#' `width` and `height` are both `NULL`, `height` is set as `"2em"` and
#' `width` will be calculated.
#'
#' @param sep *Separator between images*
#'
#' `scalar<character>` // *default:* `" "`
#'
#' In the output of images within a body cell, `sep` provides the separator
#' between each image.
#'
#' @param path *Path to image files*
#'
#' `scalar<character>` // *default:* `NULL` (`optional`)
#'
#' An optional path to local image files (this is combined with all
#' filenames).
#'
#' @param file_pattern *File pattern specification*
#'
#' `scalar<character>` // *default:* `"{x}"`
#'
#' The pattern to use for mapping input values in the body cells to the names
#' of the graphics files. The string supplied should use `"{x}"` in the
#' pattern to map filename fragments to input strings.
#'
#' @param encode *Use Base64 encoding*
#'
#' `scalar<logical>` // *default:* `TRUE`
#'
#' The option to always use Base64 encoding for image paths that are
#' determined to be local. By default, this is `TRUE`.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler
#' **tidyselect**-style expressions (the select helpers should work well here)
#' and we can use quoted row identifiers in `c()`. It's also possible to use row
#' indices (e.g., `c(3, 5, 6)`) though these index values must correspond to the
#' row numbers of the input data (the indices won't necessarily match those of
#' rearranged rows if row groups are present). One more type of expression is
#' possible, an expression that takes column values (can involve any of the
#' available columns in the table) and returns a logical vector. This is nice if
#' you want to base formatting on values in the column or another column, or,
#' you'd like to use a more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_image()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `height`
#' - `width`
#' - `sep`
#' - `path`
#' - `file_pattern`
#' - `encode`
#'
#' Please note that for each of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Examples:
#'
#' Using a small portion of [`metro`] dataset, let's create a **gt** table. We
#' will only include a few columns and rows from that table. The `lines` and
#' `connect_rer` columns have comma-separated listings of numbers/letters
#' (corresponding to lines served at each station). We have a directory SVG
#' graphics for all of these lines in the package (the path for the image
#' directory can be accessed via `system.file("metro_svg", package = "gt")`),
#' and the filenames roughly correspond to the data in those two columns. The
#' `fmt_image()` function can be used with these inputs since the `path` and
#' `file_pattern` arguments allow us to compose complete and valid file
#' locations. What you get from this are sequences of images in the table cells,
#' taken from the referenced graphics files on disk.
#'
#' ```r
#' metro |>
#' dplyr::select(name, caption, lines, connect_rer) |>
#' dplyr::slice_head(n = 10) |>
#' gt() |>
#' cols_merge(
#' columns = c(name, caption),
#' pattern = "{1}<< ({2})>>"
#' ) |>
#' text_replace(
#' locations = cells_body(columns = name),
#' pattern = "\\((.*?)\\)",
#' replacement = "<br>(<em>\\1</em>)"
#' ) |>
#' sub_missing(columns = connect_rer, missing_text = "") |>
#' fmt_image(
#' columns = lines,
#' path = system.file("metro_svg", package = "gt"),
#' file_pattern = "metro_{x}.svg"
#' ) |>
#' fmt_image(
#' columns = connect_rer,
#' path = system.file("metro_svg", package = "gt"),
#' file_pattern = "rer_{x}.svg"
#' ) |>
#' cols_label(
#' name = "Station",
#' lines = "Lines",
#' connect_rer = "RER"
#' ) |>
#' cols_align(align = "left") |>
#' tab_style(
#' style = cell_borders(
#' sides = c("left", "right"),
#' weight = px(1),
#' color = "gray85"
#' ),
#' locations = cells_body(columns = lines)
#' ) |>
#' opt_stylize(style = 6, color = "blue") |>
#' opt_all_caps() |>
#' opt_horizontal_padding(scale = 1.75)
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_image_1.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-20
#'
#' @section Function Introduced:
#' `v0.9.0` (Mar 31, 2023)
#'
#' @import rlang
#' @export
fmt_image <- function(
data,
columns = everything(),
rows = everything(),
height = NULL,
width = NULL,
sep = " ",
path = NULL,
file_pattern = "{x}",
encode = TRUE
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - height
# - width
# - sep
# - path
# - file_pattern
# - encode
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_image",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_image(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
height = p_i$height %||% height,
width = p_i$width %||% width,
sep = p_i$sep %||% sep,
path = p_i$path %||% path,
file_pattern = p_i$file_pattern %||% file_pattern,
encode = p_i$encode %||% encode
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# If width & height not provided, default width to '2em' and let width scale
if (is.null(height) & is.null(width)) {
height <- "2em"
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
fns = list(
html = function(x) {
# Generate an vector of empty strings that will eventually
# contain all of the link text
x_str <- character(length(x))
x_str_non_missing <- x[!is.na(x)]
x_str_non_missing <-
vapply(
seq_along(x_str_non_missing),
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (grepl(",", x_str_non_missing[x])) {
files <- unlist(strsplit(x_str_non_missing[x], ",\\s*"))
} else {
files <- x_str_non_missing[x]
}
# Automatically append `px` length unit when `height`
# is given as a number
if (is.numeric(height)) {
height <- paste0(height, "px")
}
# Handle formatting of `file_pattern`
files <-
apply_pattern_fmt_x(
pattern = file_pattern,
values = files
)
out <- c()
for (y in seq_along(files)) {
if (
(!is.null(path) && grepl("https?://", path)) ||
grepl("https?://", files[y])
) {
if (!is.null(path)) {
# Normalize ending of `path`
path <- gsub("/\\s+$", "", path)
uri <- paste0(path, "/", files[y])
} else {
uri <- files[y]
}
} else {
# Compose and normalize the local file path
filename <- gtsave_filename(path = path, filename = files[y])
filename <- path_expand(filename)
# Create the image URI; this uses the logical value of
# `encode` to either perform or bypass Base64 encoding
if (encode) {
uri <- get_image_uri(filename)
} else {
uri <- filename
}
}
style_string <- paste0(
c(
ifelse(!is.null(height), paste0("height:", height, ";"), ""),
ifelse(!is.null(width), paste0("width:", width, ";"), "")
),
collapse = ""
)
# Place the `uri` value within an <img>, setting the
# height and always preferring vertical alignment as 'middle'
out_y <-
paste0(
"<img src=\"", uri, "\" ",
"style=\"", style_string,
"vertical-align:middle;\">"
)
out <- c(out, out_y)
}
paste0(
"<span style=\"white-space:nowrap;\">",
paste0(out, collapse = sep),
"</span>"
)
}
)
x_str[!is.na(x)] <- x_str_non_missing
x_str[is.na(x)] <- as.character(NA_character_)
x_str
},
latex = function(x) {
x
},
rtf = function(x) {
x
},
word = function(x) {
x_str <- character(length(x))
x_str_non_missing <- x[!is.na(x)]
# Automatically append `px` length unit when `height` or `width`
# is given as a number
if (!is.null(height)) {
if (is.numeric(height)) {
height <- paste0(height, "px")
} else {
if (is.character(height)) {
height <- convert_to_px(height)
}
}
}
if (!is.null(width)) {
if (is.numeric(width)) {
width <- paste0(width, "px")
} else {
if (is.character(width)) {
width <- convert_to_px(width)
}
}
}
x_str_non_missing <-
vapply(
seq_along(x_str_non_missing),
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (grepl(",", x_str_non_missing[x])) {
files <- unlist(strsplit(x_str_non_missing[x], ",\\s*"))
} else {
files <- x_str_non_missing[x]
}
# Handle formatting of `file_pattern`
files <-
apply_pattern_fmt_x(
pattern = file_pattern,
values = files
)
out <- list()
for (y in seq_along(files)) {
# Handle case where the image is online
if (
(!is.null(path) && grepl("https?://", path)) ||
grepl("https?://", files[y])
) {
if (!is.null(path)) {
# Normalize ending of `path`
path <- gsub("/\\s+$", "", path)
uri <- paste0(path, "/", files[y])
} else {
uri <- files[y]
}
filename <- download_file(uri)
} else {
# Compose and normalize the local file path
filename <- gtsave_filename(path = path, filename = files[y])
filename <- path_expand(filename)
}
if (is.null(height) | is.null(width)) {
hw_ratio <- get_image_hw_ratio(filename)
if (is.null(width)) {
width <- round(height / hw_ratio, 0)
} else {
height <- round(width * hw_ratio, 0)
}
}
out_y <-
xml_r(
xml_rPr(),
xml_image(filename, height = height, width = width, units = "px")
)
out <- c(out, list(out_y))
}
paste0(
"<md_container>",
as.character(xml_p(xml_pPr(), htmltools::tagList(c(out)))),
"</md_container>"
)
}
)
x_str[!is.na(x)] <- x_str_non_missing
x_str[is.na(x)] <- as.character(NA_character_)
x_str
},
default = function(x) {
x
}
)
)
}
download_file <- function(uri) {
filename <- tempfile(fileext = paste0("_", basename(uri)))
utils::download.file(uri, destfile = filename, quiet = TRUE)
filename
}
convert_to_px <- function(x) {
units <- tolower(gsub("\\d+","", x))
value <- as.numeric(gsub(units, "", x))
px_conversion <- c(
"in" = 96,
"cm" = 37.7952755906,
"emu" = 1 / 9525,
"em" = 16 # https://www.w3schools.com/tags/ref_pxtoemconversion.asp
)
if (units %in% c("px", names(px_conversion))) {
if (units == "px") {
value
} else {
round(value * px_conversion[[units]], 0)
}
} else {
rlang::abort(
paste0(
"invalid units provided - `", units,
"`. Must be one of of type ", paste0("`", names(px_conversion), "`", collapse = "")
)
)
}
}
get_image_hw_ratio <- function(filepath) {
if (rlang::is_installed("magick")) {
if (tolower(tools::file_ext(filepath)) == "svg") {
image <- magick::image_read_svg(filepath)
} else if (tolower(tools::file_ext(filepath)) == "pdf") {
image <- magick::image_read_pdf(filepath)
} else {
image <- magick::image_read(filepath)
}
image_dims <- magick::image_info(image)
ratio <- image_dims$height / image_dims$width
} else {
cli::cli_warn("{.pkg magick} must be installed to derive image height/width ratio.")
ratio <- 1
}
ratio
}
#' Generate flag icons for countries from their country codes
#'
#' @description
#'
#' While it is fairly straightforward to insert images into body cells (using
#' [fmt_image()] is one way to it), there is often the need to incorporate
#' specialized types of graphics within a table. One such group of graphics
#' involves iconography representing different countries, and the `fmt_flag()`
#' function helps with inserting a flag icon (or multiple) in body cells. To
#' make this work seamlessly, the input cells need to contain some reference to
#' a country, and this is in the form of a 2-letter ISO 3166-1 country code
#' (e.g., Egypt has the `"EG"` country code). This function will parse the
#' targeted body cells for those codes (and the [countrypops] dataset contains
#' all of them) and insert the appropriate flag graphics. Multiple flags can be
#' included per cell by separating country codes with commas (e.g., `"GB,TT"`).
#' The `sep` argument allows for a common separator to be applied between flag
#' icons.
#'
#' @inheritParams fmt_number
#'
#' @param height *Height of flag*
#'
#' `scalar<character>` // *default:* `"1em"`
#'
#' The absolute height of the flag icon in the table cell. By default, this is
#' set to `"1em"`.
#'
#' @param sep *Separator between flags*
#'
#' `scalar<character>` // *default:* `" "`
#'
#' In the output of flag icons within a body cell, `sep` provides the
#' separator between each icon. By default, this is a single space character
#' (`" "`).
#'
#' @param use_title *Display country name on hover*
#'
#' `scalar<logical>` // *default:* `TRUE`
#'
#' An option to display a tooltip for the country name (in English) when
#' hovering over the flag icon.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_flag()` formatting function is compatible with body cells that are
#' of the `"character"` or `"factor"` types. Any other types of body cells are
#' ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_flag()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `height`
#' - `sep`
#' - `use_title`
#'
#' Please note that for each of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Flag icons that can be used:
#'
#' You can view the entire set of supported flag icons as an informative table
#' by using the [info_flags()] function. In the information table that is
#' provided, you'll see every flag icon and the associated identifier that can
#' be used with `fmt_flag()`.
#'
#' @section Examples:
#'
#' Use the [`countrypops`] dataset to create a **gt** table. We will only
#' include a few columns and rows from that table. The `country_code_2` column
#' has 2-letter country codes in the format required for `fmt_flag()` and using
#' that function transforms the codes in circular flag icons.
#'
#' ```r
#' countrypops |>
#' dplyr::filter(year == 2021) |>
#' dplyr::filter(grepl("^S", country_name)) |>
#' dplyr::arrange(country_name) |>
#' dplyr::select(-country_code_3, -year) |>
#' dplyr::slice_head(n = 10) |>
#' gt() |>
#' cols_move_to_start(columns = country_code_2) |>
#' fmt_integer() |>
#' fmt_flag(columns = country_code_2) |>
#' cols_label(
#' country_code_2 = "",
#' country_name = "Country",
#' population = "Population (2021)"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_flag_1.png")`
#' }}
#'
#' Using [`countrypops`] we can generate a table that provides populations
#' every five years for the Benelux countries (`"BE"`, `"NL"`, and `"LU"`).
#' This requires some manipulation with **dplyr** and **tidyr** before
#' introducing the table to **gt**. With `fmt_flag()` we can obtain flag icons
#' in the `country_code_2` column. After that, we can merge the flag icons into
#' the stub column, generating row labels that have a combination of icon and
#' text.
#'
#' ```r
#' countrypops |>
#' dplyr::filter(country_code_2 %in% c("BE", "NL", "LU")) |>
#' dplyr::filter(year %% 10 == 0) |>
#' dplyr::select(country_name, country_code_2, year, population) |>
#' tidyr::pivot_wider(names_from = year, values_from = population) |>
#' dplyr::slice(1, 3, 2) |>
#' gt(rowname_col = "country_name") |>
#' tab_header(title = "Populations of the Benelux Countries") |>
#' tab_spanner(columns = everything(), label = "Year") |>
#' fmt_integer() |>
#' fmt_flag(columns = country_code_2) |>
#' cols_merge(
#' columns = c(country_name, country_code_2),
#' pattern = "{2} {1}"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_flag_2.png")`
#' }}
#'
#' The `fmt_flag()` function works well even when there are multiple country
#' codes within the same cell. It can operate on comma-separated codes without
#' issue. When rendered to HTML, hovering over each of the flag icons results in
#' tooltip text showing the name of the country.
#'
#' ```r
#' countrypops |>
#' dplyr::filter(year == 2021, population < 100000) |>
#' dplyr::select(country_code_2, population) |>
#' dplyr::mutate(population_class = cut(
#' population,
#' breaks = scales::breaks_pretty(n = 5)(population)
#' )
#' ) |>
#' dplyr::group_by(population_class) |>
#' dplyr::summarize(
#' countries = paste0(country_code_2, collapse = ",")
#' ) |>
#' dplyr::arrange(desc(population_class)) |>
#' gt() |>
#' tab_header(title = "Countries with Small Populations") |>
#' fmt_flag(columns = countries) |>
#' fmt_bins(
#' columns = population_class,
#' fmt = ~ fmt_integer(., suffixing = TRUE)
#' ) |>
#' cols_label(
#' population_class = "Population Range",
#' countries = "Countries"
#' ) |>
#' cols_width(population_class ~ px(150))
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_flag_3.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-21
#'
#' @section Function Introduced:
#' `v0.9.0` (Mar 31, 2023)
#'
#' @import rlang
#' @export
fmt_flag <- function(
data,
columns = everything(),
rows = everything(),
height = "1em",
sep = " ",
use_title = TRUE
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - height
# - sep
# - use_title
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_flag",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_flag(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
height = p_i$height %||% height,
sep = p_i$sep %||% sep,
use_title = p_i$use_title %||% use_title
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Declare formatting function compatibility
compat <- c("character", "factor")
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"{.fn fmt_flag} must be used on `columns` with character or factor data."
)
}
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
fns = list(
html = function(x) {
# Generate an vector of empty strings that will eventually
# contain all of the link text
x_str <- character(length(x))
x_str_non_missing <- x[!is.na(x)]
x_str_non_missing <-
vapply(
seq_along(x_str_non_missing),
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (grepl(",", x_str_non_missing[x])) {
countries <-
toupper(unlist(strsplit(x_str_non_missing[x], ",\\s*")))
} else {
countries <- toupper(x_str_non_missing[x])
}
# Automatically append `px` length unit when `height`
# is given as a number
if (is.numeric(height)) {
height <- paste0(height, "px")
}
# TODO: Parse to ensure that `country_code` values are valid
out <- c()
for (y in seq_along(countries)) {
flag_svg <-
flag_tbl[
flag_tbl[["country_code_2"]] == countries[y],
][["country_flag"]]
if (use_title) {
flag_title <-
flag_tbl[
flag_tbl[["country_code_2"]] == countries[y],
][["country_name"]]
}
out_y <-
gsub(
"<svg.*?>",
paste0(
"<svg xmlns=\"http://www.w3.org/2000/svg\" ",
"aria-hidden=\"true\" role=\"img\" ",
"width=\"512\" height=\"512\" ",
"viewBox=\"0 0 512 512\" ",
"style=\"vertical-align:-0.125em;",
"image-rendering:optimizeQuality;",
"height:", height, ";",
"width:", height, ";",
"\"",
">",
if (use_title) {
paste0("<title>", flag_title, "</title>")
} else {
NULL
}
),
flag_svg
)
out <- c(out, out_y)
}
paste0(
"<span style=\"white-space:nowrap;\">",
paste0(out, collapse = sep),
"</span>"
)
}
)
x_str[!is.na(x)] <- x_str_non_missing
x_str[is.na(x)] <- as.character(NA_character_)
x_str
},
latex = function(x) {
x
},
rtf = function(x) {
x
},
word = function(x) {
x
},
default = function(x) {
x
}
)
)
}
#' Use icons within a table's body cells
#'
#' @description
#'
#' We can draw from a library of thousands of icons and selectively insert them
#' into a **gt** table. The `fmt_icon()` function makes this possible and it
#' operates a lot like [fmt_flag()] in that input cells need to contain some
#' reference to an icon name. We are exclusively using *Font Awesome* icons here
#' (and we do need to have the **fontawesome** package installed) so the
#' reference is the short icon name. Multiple icons can be included per cell by
#' separating icon names with commas (e.g., `"hard-drive,clock"`). The `sep`
#' argument allows for a common separator to be applied between flag icons.
#'
#' @inheritParams fmt_number
#'
#' @param height *Height of icon*
#'
#' `scalar<character>` // *default:* `"1em"`
#'
#' The absolute height of the icon in the table cell. By default, this is set
#' to `"1em"`.
#'
#' @param sep *Separator between icons*
#'
#' `scalar<character>` // *default:* `" "`
#'
#' In the output of icons within a body cell, `sep` provides the separator
#' between each icon. By default, this is a single space character (`" "`).
#'
#' @param stroke_color *Color of the icon stroke/outline*
#'
#' `scalar<character>` // *default:* `NULL` (`optional`)
#'
#' The icon stroke is essentially the outline of the icon. The color of the
#' stroke can be modified by applying a single color here. If not provided
#' then the default value of `"currentColor"` is applied so that the stroke
#' color matches that of the parent HTML element's color attribute.
#'
#' @param stroke_width *Width of the icon stroke/outline*
#'
#' `scalar<character|numeric|integer>` // *default:* `NULL` (`optional`)
#'
#' The `stroke_width` option allows for setting the color of the icon outline
#' stroke. By default, the stroke width is very small at `"1px"` so a size
#' adjustment here can sometimes be useful.
#'
#' @param stroke_alpha *Transparency value for icon stroke/outline*
#'
#' `scalar<numeric>` // *default:* `NULL` (`optional`)
#'
#' The level of transparency for the icon stroke can be controlled with a
#' decimal value between `0` and `1`.
#'
#' @param fill_color *Color of the icon fill*
#'
#' `scalar<character>` // *default:* `NULL` (`optional`)
#'
#' The fill color of the icon can be set with `fill_color`; providing a single
#' color here will change the color of the fill but not of the icon's 'stroke'
#' or outline (use `stroke_color` to modify that). If not provided then the
#' default value of `"currentColor"` is applied so that the fill matches the
#' color of the parent HTML element's color attribute.
#'
#' @param fill_alpha *Transparency value for icon fill*
#'
#' `scalar<numeric|integer>(0>=val>=1)` // *default:* `NULL` (`optional`)
#'
#' The level of transparency for the icon fill can be controlled with a
#' decimal value between `0` and `1`.
#'
#' @param vertical_adj *Vertical adjustment of icon from baseline*
#'
#' `scalar<character|numeric|integer>` // *default:* `NULL` (`optional`)
#'
#' The vertical alignment of the icon. By default, a length of `"-0.125em"`
#' is used.
#'
#' @param margin_left *Margin width left of icon*
#'
#' `scalar<character|numeric|integer>` // *default:* `NULL` (`optional`)
#'
#' The length value for the margin that's to the left of the icon can be set
#' with `margin_left`. By default, `"auto"` is used for this but if space is
#' needed on the left-hand side then a length of `"0.2em"` is recommended as a
#' starting point.
#'
#' @param margin_right *Margin width right of icon*
#'
#' `scalar<character|numeric|integer>` // *default:* `NULL` (`optional`)
#'
#' The length value for the margin that's to the right of the icon can be set
#' with `margin_right`. By default, `"auto"` is used for this but if space is
#' needed on the right-hand side then a length of `"0.2em"` is recommended as
#' a starting point.
#'
#' @param a11y *Accessibility mode for icon*
#'
#' `singl-kw:[semantic|decorative|none]` // *default:* `"semantic"`
#'
#' The accessibility mode for the icon display can be set with the `a11y`
#' argument. Icons can either be `"semantic"` or `"decorative"`. Using
#' `"none"` will result in no accessibility features for the icons.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Compatibility of formatting function with data values:
#'
#' The `fmt_icon()` formatting function is compatible with body cells that are
#' of the `"character"` or `"factor"` types. Any other types of body cells are
#' ignored during formatting. This is to say that cells of incompatible data
#' types may be targeted, but there will be no attempt to format them.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_icon()` to obtain varying parameter values from a specified column
#' within the table. This means that each row could be formatted a little bit
#' differently. These arguments provide support for [from_column()]:
#'
#' - `height`
#' - `sep`
#' - `stroke_color`
#' - `stroke_width`
#' - `stroke_alpha`
#' - `fill_color`
#' - `fill_alpha`
#' - `vertical_adj`
#' - `margin_left`
#' - `margin_right`
#' - `a11y`
#'
#' Please note that for each of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Icons that can be used:
#'
#' The `fmt_icon()` function relies on an installation of the **fontawesome**
#' package to operate and every icon within that package can be accessed here
#' with either an icon name or a full name. For example, the *Arrow Down* icon
#' has an icon name of `"arrow-down"` and its corresponding full name is
#' `"fas fa-arrow-down"`. In most cases you'll want to use the shorter name, but
#' some icons have both a *Solid* (`"fas"`) and a *Regular* (`"far"`) variant so
#' only the full name can disambiguate the pairing. In the latest release of
#' **fontawesome** (`v0.5.2`), there are 2,025 icons and you can view the entire
#' icon listing by using the [info_icons()] function. What you'll get from that
#' is an information table showing every icon and associated set of identifiers.
#'
#' @section Examples:
#'
#' For this first example of generating icons with `fmt_icon()`, let's make a
#' simple tibble that has two columns of *Font Awesome* icon names. We separate
#' multiple icons per cell with commas. By default, the icons are 1 em in
#' height; we're going to make the icons slightly larger here (so we can see the
#' fine details of them) by setting `height = "4em"`.
#'
#' ```r
#' dplyr::tibble(
#' animals = c(
#' "hippo", "fish,spider", "mosquito,locust,frog",
#' "dog,cat", "kiwi-bird"
#' ),
#' foods = c(
#' "bowl-rice", "egg,pizza-slice", "burger,lemon,cheese",
#' "carrot,hotdog", "bacon"
#' )
#' ) |>
#' gt() |>
#' fmt_icon(height = "4em") |>
#' cols_align(align = "center", columns = everything())
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_icon_1.png")`
#' }}
#'
#' Let's take a few rows from the [`towny`] dataset and make it so the
#' `csd_type` column contains *Font Awesome* icon names (we want only the
#' `"city"` and `"house-chimney"` icons here). After using `fmt_icon()` to
#' format the `csd_type` column, we get icons that are representative of the two
#' categories of municipality for this subset of data.
#'
#' ```r
#' towny |>
#' dplyr::select(name, csd_type, population_2021) |>
#' dplyr::filter(csd_type %in% c("city", "town")) |>
#' dplyr::group_by(csd_type) |>
#' dplyr::arrange(desc(population_2021)) |>
#' dplyr::slice_head(n = 5) |>
#' dplyr::ungroup() |>
#' dplyr::mutate(
#' csd_type = ifelse(csd_type == "town", "house-chimney", "city")
#' ) |>
#' gt() |>
#' fmt_integer() |>
#' fmt_icon(columns = csd_type) |>
#' cols_move_to_start(columns = csd_type) |>
#' cols_label(
#' csd_type = "",
#' name = "City/Town",
#' population_2021 = "Population"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_icon_2.png")`
#' }}
#'
#' Let's use a portion of the [`metro`] dataset to create a **gt** table.
#' Depending on which train services are offered at the subset of stations,
#' *Font Awesome* icon names will be applied to cells where the different
#' services exist (the specific names are `"train-subway"`, `"train"`, and
#' `"train-tram"`). With **tidyr**'s `unite()` function, those icon names
#' can be converged into a single column (`services`) with the `NA` values
#' removed. Since the names correspond to icons and they are in the correct
#' format (separated by commas), they can be formatted as *Font Awesome* icons
#' with the `fmt_icon()` function.
#'
#' ```r
#' metro |>
#' dplyr::select(name, lines, connect_rer, connect_tramway, location) |>
#' dplyr::slice_tail(n = 10) |>
#' dplyr::mutate(lines = "train-subway") |>
#' dplyr::mutate(connect_rer = ifelse(!is.na(connect_rer), "train", NA)) |>
#' dplyr::mutate(
#' connect_tramway = ifelse(!is.na(connect_tramway), "train-tram", NA)
#' ) |>
#' tidyr::unite(
#' col = services,
#' lines:connect_tramway,
#' sep = ",",
#' na.rm = TRUE
#' ) |>
#' gt() |>
#' fmt_icon(
#' columns = services,
#' a11y = "decorative"
#' ) |>
#' cols_merge(
#' columns = c(name, services),
#' pattern = "{1} ({2})"
#' ) |>
#' cols_label(
#' name = "Station",
#' location = "Location"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_icon_3.png")`
#' }}
#'
#' Taking a handful of starred reviews from a popular film review website, we
#' will attempt to format a numerical score (0 to 4) to use the `"star"` and
#' `"star-half"` icons. In this case, it is useful to generate the repeating
#' sequence of icon names (separated by commas) in the `rating` column before
#' introducing the table to [gt()]. We can make use of the numerical rating
#' values in `stars` within the `fmt_icon()` function with a little help from
#' the [from_column()] helper. Using that, we can dynamically adjust the icon's
#' `fill_alpha` (i.e., opacity) value and accentuate the films with higher
#' scores.
#'
#' ```r
#' dplyr::tibble(
#' film = c(
#' "The Passengers of the Night", "Serena", "The Father",
#' "Roma", "The Handmaiden", "Violet", "Vice"
#' ),
#' stars = c(3, 1, 3.5, 4, 4, 2.5, 1.5)
#' ) |>
#' dplyr::mutate(rating = dplyr::case_when(
#' stars %% 1 == 0 ~ strrep("star,", stars),
#' stars %% 1 != 0 ~ paste0(strrep("star,", floor(stars)), "star-half")
#' )) |>
#' gt() |>
#' fmt_icon(
#' columns = rating,
#' fill_color = "red",
#' fill_alpha = from_column("stars", fn = function(x) x / 4)
#' ) |>
#' cols_hide(columns = stars) |>
#' tab_source_note(
#' source_note = md(
#' "Data obtained from <https://www.rogerebert.com/reviews>."
#' )
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_icon_4.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-22
#'
#' @section Function Introduced:
#' *In Development*
#'
#' @import rlang
#' @export
fmt_icon <- function(
data,
columns = everything(),
rows = everything(),
height = "1em",
sep = " ",
stroke_color = NULL,
stroke_width = NULL,
stroke_alpha = NULL,
fill_color = NULL,
fill_alpha = NULL,
vertical_adj = NULL,
margin_left = NULL,
margin_right = NULL,
a11y = c("semantic", "decorative", "none")
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
# Determine if the fontawesome package is installed and stop the
# function if it is not present
rlang::check_installed("fontawesome", "to insert icons with `fmt_icons()`.")
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - height
# - sep
# - stroke_color
# - stroke_width
# - stroke_alpha
# - fill_color
# - fill_alpha
# - vertical_adj
# - margin_left
# - margin_right
# - a11y
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_icon",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_icon(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
height = p_i$height %||% height,
sep = p_i$sep %||% sep,
stroke_color = p_i$stroke_color %||% stroke_color,
stroke_width = p_i$stroke_width %||% stroke_width,
stroke_alpha = p_i$stroke_alpha %||% stroke_alpha,
fill_color = p_i$fill_color %||% fill_color,
fill_alpha = p_i$fill_alpha %||% fill_alpha,
vertical_adj = p_i$vertical_adj %||% vertical_adj,
margin_left = p_i$margin_left %||% margin_left,
margin_right = p_i$margin_right %||% margin_right,
a11y = p_i$a11y %||% a11y
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Ensure that arguments are matched
a11y <- rlang::arg_match(a11y)
if (a11y == "semantic") {
a11y <- "sem"
}
if (a11y == "decorative") {
a11y <- "deco"
}
# Declare formatting function compatibility
compat <- c("character", "factor")
# In this case where strict mode is being used (with the option
# called "gt.strict_column_fmt"), stop the function if any of the
# resolved columns have data that is incompatible with this formatter
if (
!column_classes_are_valid(
data = data,
columns = {{ columns }},
valid_classes = compat
)
) {
if (isTRUE(getOption("gt.strict_column_fmt", TRUE))) {
cli::cli_abort(
"The `fmt_icon()` function can only be used on `columns`
with character or factor data."
)
}
}
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
fns = list(
html = function(x) {
# Generate an vector of empty strings that will eventually
# contain all of the link text
x_str <- character(length(x))
x_str_non_missing <- x[!is.na(x)]
x_str_non_missing <-
vapply(
seq_along(x_str_non_missing),
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
if (grepl(",", x_str_non_missing[x])) {
icons <-
tolower(unlist(strsplit(x_str_non_missing[x], ",\\s*")))
} else {
icons <- tolower(x_str_non_missing[x])
}
# Automatically append `px` length unit when `height`
# is given as a number
if (is.numeric(height)) {
height <- paste0(height, "px")
}
# TODO: Parse to ensure that `icons` values are valid
out <- c()
for (y in seq_along(icons)) {
out_y <-
as.character(
fontawesome::fa(
name = icons[y],
fill = fill_color,
fill_opacity = fill_alpha,
stroke = stroke_color,
stroke_width = stroke_width,
stroke_opacity = stroke_alpha,
height = height,
width = NULL,
margin_left = margin_left,
margin_right = margin_right,
vertical_align = vertical_adj,
position = NULL,
prefer_type = "regular",
a11y = a11y
)
)
out <- c(out, out_y)
}
paste0(
"<span style=\"white-space:nowrap;\">",
paste0(out, collapse = sep),
"</span>"
)
}
)
x_str[!is.na(x)] <- x_str_non_missing
x_str[is.na(x)] <- as.character(NA_character_)
x_str
},
latex = function(x) {
x
},
rtf = function(x) {
x
},
word = function(x) {
x
},
default = function(x) {
x
}
)
)
}
#' Format Markdown text
#'
#' @description
#'
#' Any Markdown-formatted text in the incoming cells will be transformed to the
#' appropriate output type during render when using `fmt_markdown()`.
#'
#' @inheritParams fmt_number
#'
#' @param md_engine *Choice of Markdown engine*
#'
#' `singl-kw:[markdown|commonmark]` // *default:* `"markdown"`
#'
#' The engine preference for Markdown rendering. By default, this is set to
#' `"markdown"` where **gt** will use the **markdown** package for Markdown
#' conversion to HTML and LaTeX. The other option is `"commonmark"` and with
#' that the **commonmark** package will be used.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with the `md_engine` argument
#' of `fmt_markdown()` to obtain varying parameter values from a specified
#' column within the table. This means that each row could be formatted a little
#' bit differently.
#'
#' Please note that for this argument (`md_engine`), a [from_column()] call
#' needs to reference a column that has data of the `character` type. Additional
#' columns for parameter values can be generated with the [cols_add()] function
#' (if not already present). Columns that contain parameter data can also be
#' hidden from final display with [cols_hide()].
#'
#' @section Examples:
#'
#' Create a few Markdown-based text snippets.
#'
#' ```r
#' text_1a <- "
#' ### This is Markdown.
#'
#' Markdown’s syntax is comprised entirely of
#' punctuation characters, which punctuation
#' characters have been carefully chosen so as
#' to look like what they mean... assuming
#' you’ve ever used email.
#' "
#'
#' text_1b <- "
#' Info on Markdown syntax can be found
#' [here](https://daringfireball.net/projects/markdown/).
#' "
#'
#' text_2a <- "
#' The **gt** package has these datasets:
#'
#' - `countrypops`
#' - `sza`
#' - `gtcars`
#' - `sp500`
#' - `pizzaplace`
#' - `exibble`
#' "
#'
#' text_2b <- "
#' There's a quick reference [here](https://commonmark.org/help/).
#' "
#' ```
#'
#' Arrange the text snippets as a tibble using the `dplyr::tribble()` function.
#' then, create a **gt** table and format all columns with `fmt_markdown()`.
#'
#' ```r
#' dplyr::tribble(
#' ~Markdown, ~md,
#' text_1a, text_2a,
#' text_1b, text_2b,
#' ) |>
#' gt() |>
#' fmt_markdown(columns = everything()) |>
#' tab_options(table.width = px(400))
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_markdown_1.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-23
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @seealso The vector-formatting version of this function:
#' [vec_fmt_markdown()].
#'
#' @import rlang
#' @export
fmt_markdown <- function(
data,
columns = everything(),
rows = everything(),
md_engine = c("markdown", "commonmark")
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - md_engine
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_markdown",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_markdown(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
md_engine = p_i$md_engine %||% md_engine
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Ensure that arguments are matched
md_engine <- rlang::arg_match(md_engine)
# Pass `data`, `columns`, `rows`, and the formatting
# functions as a function list to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
fns = list(
html = function(x) {
md_to_html(x, md_engine = md_engine)
},
latex = function(x) {
markdown_to_latex(x, md_engine = md_engine)
},
rtf = function(x) {
markdown_to_rtf(x)
},
word = function(x) {
markdown_to_xml(x)
},
default = function(x) {
sub(
"\n$", "",
vapply(
x,
FUN.VALUE = character(1),
USE.NAMES = FALSE,
commonmark::markdown_text
)
)
}
)
)
}
#' Format by simply passing data through
#'
#' @description
#'
#' We can format values with the `fmt_passthrough()` function, which does little
#' more than: (1) coercing to `character` (as all the `fmt_*()` functions do),
#' and (2) applying decorator text via the `pattern` argument (the default is to
#' apply nothing). This foramtting function is useful when don't want to modify
#' the input data other than to decorate it within a pattern.
#'
#' @inheritParams fmt_number
#'
#' @param escape *Text escaping*
#'
#' `scalar<logical>` // *default:* `TRUE`
#'
#' An option to escape text according to the final output format of the table.
#' For example, if a LaTeX table is to be generated then LaTeX escaping would
#' be performed during rendering. By default this is set to `TRUE` but setting
#' as `FALSE` would be useful in the case where text is crafted for a specific
#' output format in mind.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Compatibility of arguments with the `from_column()` helper function:
#'
#' The [from_column()] helper function can be used with certain arguments of
#' `fmt_passthrough()` to obtain varying parameter values from a specified
#' column within the table. This means that each row could be formatted a little
#' bit differently. These arguments provide support for [from_column()]:
#'
#' - `escape`
#' - `pattern`
#'
#' Please note that for both of the aforementioned arguments, a [from_column()]
#' call needs to reference a column that has data of the correct type (this is
#' different for each argument). Additional columns for parameter values can be
#' generated with the [cols_add()] function (if not already present). Columns
#' that contain parameter data can also be hidden from final display with
#' [cols_hide()]. Finally, there is no limitation to how many arguments the
#' [from_column()] helper is applied so long as the arguments belong to this
#' closed set.
#'
#' @section Examples:
#'
#' Let's use the [`exibble`] dataset to create a single-column **gt** table
#' (with only the `char` column). Now we can pass the data in that column
#' through the 'non-formatter' that is `fmt_passthrough()`. While the the
#' function doesn't do any explicit formatting it has a feature common to all
#' other formatting functions: the `pattern` argument. So that's what we'll use
#' in this example, applying a simple pattern to the non-`NA` values that adds
#' an `"s"` character.
#'
#' ```r
#' exibble |>
#' dplyr::select(char) |>
#' gt() |>
#' fmt_passthrough(
#' rows = !is.na(char),
#' pattern = "{x}s"
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_passthrough_1.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-24
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @import rlang
#' @export
fmt_passthrough <- function(
data,
columns = everything(),
rows = everything(),
escape = TRUE,
pattern = "{x}"
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Begin support for `from_column()` objects passed to compatible arguments
#
# Supports parameters:
#
# - escape
# - pattern
arg_vals <-
mget(
get_arg_names(
function_name = "fmt_passthrough",
all_args_except = c("data", "columns", "rows")
)
)
if (args_have_gt_column_obj(arg_vals = arg_vals)) {
# Resolve the row numbers using the `resolve_vars` function
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
param_tbl <-
generate_param_tbl(
data = data,
arg_vals = arg_vals,
resolved_rows_idx = resolved_rows_idx
)
for (i in seq_len(nrow(param_tbl))) {
p_i <- as.list(param_tbl[i, ])
data <-
fmt_passthrough(
data = data,
columns = {{ columns }},
rows = resolved_rows_idx[i],
escape = p_i$escape %||% escape,
pattern = p_i$pattern %||% pattern
)
}
return(data)
}
#
# End support for `gt_column()` objects passed to compatible arguments
#
# Pass `data`, `columns`, `rows`, and the formatting
# functions (as a function list) to `fmt()`
fmt(
data = data,
columns = {{ columns }},
rows = {{ rows }},
fns = list(
html = function(x) {
# Create `x_str` with same length as `x`
x_str <- rep(NA_character_, length(x))
# Handle formatting of pattern
x_str <-
apply_pattern_fmt_x(
pattern,
values = x
)
if (escape) {
x_str <- process_text(text = x_str, context = "html")
}
x_str
},
latex = function(x) {
# Create `x_str` with same length as `x`
x_str <- rep(NA_character_, length(x))
# Handle formatting of pattern
x_str <-
apply_pattern_fmt_x(
pattern,
values = x
)
if (escape) {
x_str <- process_text(text = x_str, context = "latex")
}
x_str
},
rtf = function(x) {
# Create `x_str` with same length as `x`
x_str <- rep(NA_character_, length(x))
# Handle formatting of pattern
x_str <-
apply_pattern_fmt_x(
pattern,
values = x
)
if (escape) {
x_str <- process_text(text = x_str, context = "rtf")
}
x_str
},
default = function(x) {
# Create `x_str` with same length as `x`
x_str <- rep(NA_character_, length(x))
# Handle formatting of pattern
x_str <-
apply_pattern_fmt_x(
pattern,
values = x
)
x_str
}
)
)
}
#' Automatically format column data according to their values
#'
#' @description
#'
#' The `fmt_auto()` function will automatically apply formatting of various
#' types in a way that best suits the data table provided. The function will
#' attempt to format numbers such that they are condensed to an optimal width,
#' either with scientific notation or large-number suffixing. Currency values
#' are detected by currency codes embedded in the column name and formatted in
#' the correct way. Although the functionality here is comprehensive it's still
#' possible to reduce the scope of automatic formatting with the `scope`
#' argument and also by choosing a subset of columns and rows to which the
#' formatting will be applied.
#'
#' @inheritParams fmt_number
#'
#' @param scope *Scope of automatic formatting*
#'
#' `mult-kw:[numbers|currency]` // *default:* `c("numbers", "currency")`
#'
#' By default, the function will format both `"numbers"`-type values and
#' `"currency"`-type values though the scope can be reduced to a single type
#' of value to format.
#'
#' @param lg_num_pref *Large-number preference*
#'
#' `singl-kw:[sci|suf]` // *default:* `"sci"`
#'
#' When large numbers are present, there can be a fixed preference toward how
#' they are formatted. Choices are scientific notation for very small and very
#' large values (`"sci"`), or, the use of suffixed numbers (`"suf"`, for large
#' values only).
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Examples:
#'
#' Use the [`exibble`] dataset to create a **gt** table. Format all of the
#' columns automatically with the `fmt_auto()` function.
#'
#' ```r
#' exibble |>
#' gt() |>
#' fmt_auto()
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_auto_1.png")`
#' }}
#'
#' Let's now use the [`countrypops`] dataset to create another **gt** table.
#' We'll again use `fmt_auto()` to automatically format all columns but this
#' time the choice will be made to opt for large-number suffixing instead of
#' scientific notation. This is done by using the `lg_num_pref = "suf"` option.
#'
#' ```r
#' countrypops |>
#' dplyr::select(country_code_3, year, population) |>
#' dplyr::filter(country_code_3 %in% c("CHN", "IND", "USA", "PAK", "IDN")) |>
#' dplyr::filter(year > 1975 & year %% 5 == 0) |>
#' tidyr::spread(year, population) |>
#' dplyr::arrange(desc(`2020`)) |>
#' gt(rowname_col = "country_code_3") |>
#' fmt_auto(lg_num_pref = "suf")
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_auto_2.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-25
#'
#' @section Function Introduced:
#' `v0.9.0` (Mar 31, 2023)
#'
#' @import rlang
#' @export
fmt_auto <- function(
data,
columns = everything(),
rows = everything(),
scope = c("numbers", "currency"),
lg_num_pref = c("sci", "suf"),
locale = NULL
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
# Ensure that arguments are matched
lg_num_pref <- rlang::arg_match(lg_num_pref)
# Resolve the `locale` value here with the global locale value
locale <- resolve_locale(data = data, locale = locale)
currency_codes <- tolower(currencies[["curr_code"]])
resolved_columns <-
resolve_cols_c(
expr = {{ columns }},
data = data,
excl_stub = FALSE
)
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
vars_default <- dt_boxhead_get_vars_default(data = data)
# Get the intersection of the resolved columns and the default vars
columns_to_format <- base::intersect(vars_default, resolved_columns)
# Get the internal data table
data_tbl <- dt_data_get(data = data)
for (i in seq_along(columns_to_format)) {
col_name <- columns_to_format[i]
col_vec <- data_tbl[[columns_to_format[i]]]
if (
is.numeric(col_vec) &&
"currency" %in% scope &&
grepl(
paste0("(\\.|_)(", paste0(currency_codes, collapse = "|"), ")$"),
tolower(col_name)
)
) {
# Case where numeric values are inferred to be currency values
# since the column name contains a valid currency code after a
# period or underscore
# Obtain the currency code (which is known to exist and be valid)
# from the column name
currency <- toupper(sub(".*(?=.{3}$)", "", col_name, perl = TRUE))
# Format all values in the selected column as currency values
data <-
fmt_currency(
data = data,
columns = columns_to_format[i],
rows = rows,
currency = currency,
locale = locale
)
} else if (is.numeric(col_vec) && "numbers" %in% scope) {
# Case where column values are numeric or integer values,
# known through inspection of the column class
# Obtain the row series vector which actually just `resolved_rows_idx`
row_series_vec <- resolved_rows_idx
# Create a subset of `col_vec` which should only correspond to the
# resolved rows
col_vec <- col_vec[row_series_vec]
# Determine whether the column class is of the integer type or
# integer-like
is_integer_column <- is.integer(col_vec) || rlang::is_integerish(col_vec)
# Conditions for numbers in `col_vec` to be good candidates for
# a scientific notation representation
rows_sci <- col_vec != 0 & (abs(col_vec) < 1E-3 | abs(col_vec) >= 1E6)
# Conditions for numbers in `col_vec` to be suitable for a
# large-number-suffixing treatment (best in the millions to
# trillions range)
rows_suf <- abs(col_vec) >= 1E6 & col_vec < 1E15
if (lg_num_pref == "sci") {
# In the case where we prefer to have scientific notation
# for very small and very large numbers, we need to partition
# the `row_series_vec` into `rows_num` and `rows_sci` vectors
# of integers; these represent the rows to be formatted in
# the column by either `fmt_number()` or `fmt_scientific()`
# This is the vector of row indices that will be used
# for scientific notation formatting
rows_sci_vec <- row_series_vec[rows_sci]
# The remainder of values in `row_series_vec` will undergo
# numeric formatting
rows_num_vec <- base::setdiff(row_series_vec, rows_sci_vec)
# Set `row_suf_vec` as a zero-length vector because the
# preference is to not have any suffixed numbers at all
rows_suf_vec <- integer(0)
}
if (lg_num_pref == "suf") {
# In the case where we would rather have suffixed numbers
# represent large values (in the millions to trillions range);
# we can't, however, rule out scientific notation for very large
# or very small values though
# This is the vector of row indices that will be used
# for scientific notation formatting
rows_sci_vec <- row_series_vec[rows_sci & !rows_suf]
# If there's an overlapping range then preference is given
# to the suffixing form
rows_suf_vec <- row_series_vec[rows_sci & rows_suf]
# The remainder of values in `row_series_vec` will undergo
# numeric formatting without large number suffixing
rows_num_vec <-
base::setdiff(row_series_vec, c(rows_sci_vec, rows_suf_vec))
}
# Remove NA values from the different `vec` objects
rows_sci_vec <- rows_sci_vec[!is.na(rows_sci_vec)]
rows_suf_vec <- rows_suf_vec[!is.na(rows_suf_vec)]
rows_num_vec <- rows_num_vec[!is.na(rows_num_vec)]
if (length(rows_num_vec) > 0) {
# Format non-scientific, non-suffixed values with
# `fmt_number()` if they aren't integer or integer-like
data <-
fmt_number(
data = data,
columns = columns_to_format[i],
rows = rows_num_vec,
decimals = if (is_integer_column) 0 else 3,
drop_trailing_zeros = TRUE,
locale = locale
)
}
if (length(rows_suf_vec) > 0) {
# Format values with large-number suffixes using
# `fmt_number(..., suffixing = TRUE)`
data <-
fmt_number(
data = data,
columns = columns_to_format[i],
rows = rows_suf_vec,
decimals = 1,
drop_trailing_zeros = TRUE,
suffixing = TRUE,
locale = locale
)
}
if (length(rows_sci_vec) > 0) {
# Format values with in scientific notation using
# `fmt_scientific()`
data <-
fmt_scientific(
data = data,
columns = columns_to_format[i],
rows = rows_sci_vec,
decimals = if (is_integer_column) 0 else 3,
locale = locale
)
}
if (length(rows_sci_vec) < 1 && length(rows_suf_vec) < 1) {
data <-
cols_align_decimal(
data = data,
columns = columns_to_format[i],
locale = locale
)
}
}
}
data
}
#' Set a column format with a formatter function
#'
#' @description
#'
#' The `fmt()` function provides a way to execute custom formatting
#' functionality with raw data values in a way that can consider all output
#' contexts.
#'
#' Along with the `columns` and `rows` arguments that provide some precision in
#' targeting data cells, the `fns` argument allows you to define one or more
#' functions for manipulating the raw data.
#'
#' If providing a single function to `fns`, the recommended format is in the
#' form: `fns = function(x) ...`. This single function will format the targeted
#' data cells the same way regardless of the output format (e.g., HTML, LaTeX,
#' RTF).
#'
#' If you require formatting of `x` that depends on the output format, a list of
#' functions can be provided for the `html`, `latex`, `rtf`, and `default`
#' contexts. This can be in the form of `fns = list(html = function(x) ...,
#' latex = function(x) ..., default = function(x) ...)`. In this
#' multiple-function case, we recommended including the `default` function as a
#' fallback if all contexts aren't provided.
#'
#' @inheritParams fmt_number
#'
#' @param compat *Formatting compatibility*
#'
#' `vector<character>` // *default:* `NULL` (`optional`)
#'
#' An optional vector that provides the compatible classes for the formatting.
#' By default this is `NULL`.
#'
#' @param fns *Formatting functions*
#'
#' `function|list of functions` // **required**
#'
#' Either a single formatting function or a named list of functions.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Targeting cells with `columns` and `rows`:
#'
#' Targeting of values is done through `columns` and additionally by `rows` (if
#' nothing is provided for `rows` then entire columns are selected). The
#' `columns` argument allows us to target a subset of cells contained in the
#' resolved columns. We say resolved because aside from declaring column names
#' in `c()` (with bare column names or names in quotes) we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns and rows are selected (with the `everything()`
#' defaults). Cell values that are incompatible with a given formatting function
#' will be skipped over, like `character` values and numeric `fmt_*()`
#' functions. So it's safe to select all columns with a particular formatting
#' function (only those values that can be formatted will be formatted), but,
#' you may not want that. One strategy is to format the bulk of cell values with
#' one formatting function and then constrain the columns for later passes with
#' other types of formatting (the last formatting done to a cell is what you get
#' in the final output).
#'
#' Once the columns are targeted, we may also target the `rows` within those
#' columns. This can be done in a variety of ways. If a stub is present, then we
#' potentially have row identifiers. Those can be used much like column names in
#' the `columns`-targeting scenario. We can use simpler **tidyselect**-style
#' expressions (the select helpers should work well here) and we can use quoted
#' row identifiers in `c()`. It's also possible to use row indices (e.g.,
#' `c(3, 5, 6)`) though these index values must correspond to the row numbers of
#' the input data (the indices won't necessarily match those of rearranged rows
#' if row groups are present). One more type of expression is possible, an
#' expression that takes column values (can involve any of the available columns
#' in the table) and returns a logical vector. This is nice if you want to base
#' formatting on values in the column or another column, or, you'd like to use a
#' more complex predicate expression.
#'
#' @section Examples:
#'
#' Use the [`exibble`] dataset to create a **gt** table. Using the `fmt()`
#' function, we'll format the numeric values in the `num` column with a function
#' supplied to the `fns` argument. This supplied function will take values in
#' the column (`x`), multiply them by 1000, and exclose them in single quotes.
#'
#' ```r
#' exibble |>
#' dplyr::select(-row, -group) |>
#' gt() |>
#' fmt(
#' columns = num,
#' fns = function(x) {
#' paste0("'", x * 1000, "'")
#' }
#' )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_fmt_1.png")`
#' }}
#'
#' @family data formatting functions
#' @section Function ID:
#' 3-26
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @import rlang
#' @export
fmt <- function(
data,
columns = everything(),
rows = everything(),
compat = NULL,
fns
) {
# Perform input object validation
stop_if_not_gt_tbl(data = data)
#
# Resolution of columns and rows as character vectors
#
resolved_columns <-
resolve_cols_c(
expr = {{ columns }},
data = data,
excl_stub = FALSE
)
resolved_rows_idx <-
resolve_rows_i(
expr = {{ rows }},
data = data
)
# If a single function is supplied to `fns` then
# repackage that into a list as the `default` function
if (is.function(fns)) {
fns <- list(default = fns)
}
# Create the `formatter_list`, which is a bundle of
# formatting functions for specific columns and rows
formatter_list <-
list(
func = fns,
cols = resolved_columns,
rows = resolved_rows_idx,
compat = compat
)
dt_formats_add(
data = data,
formats = formatter_list
)
}
#' Insert separator marks to an integer to conform to Indian numbering system
#'
#' @param integer The integer portion of a numeric value. Should be supplied as
#' a length-1 character vector. The element should only contain numeral
#' characters.
#'
#' @noRd
insert_seps_ind <- function(integer) {
# The `fmt_fraction()` formatter can sometimes generate
# empty strings; if seen here, just return them unchanged
if (integer == "") {
return(integer)
}
# Ensure that integer-based strings only contain numbers
if (!grepl("^[0-9]+?$", integer)) {
cli::cli_abort(
"The `integer` string must only contain numbers."
)
}
# Return integer unchanged if there are no commas to insert
if (nchar(integer) < 4) return(integer)
# Generate an 'insertion sequence' (where to place the separators)
insertion_seq <- cumsum(c(3, rep(2, floor((nchar(integer) - 4) / 2)))) + 1
insertion_seq <- (nchar(integer) - insertion_seq) + 2
split_strings <- split_str_by_index(target = integer, index = insertion_seq)
paste(split_strings, collapse = ",")
}
split_str_by_index <- function(target, index) {
index <- sort(index)
substr(
rep(target, length(index) + 1),
start = c(1, index),
stop = c(index - 1, nchar(target))
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.