#' Restore trailing zeros
#'
#' @description `restore_zeros()` takes a vector with values that might have
#' lost trailing zeros, most likely from being registered as numeric. It turns
#' each value into a string and adds trailing zeros until the mantissa hits
#' some limit.
#'
#' The default for that limit is the number of digits in the longest mantissa
#' of the vector's values. The length of the integer part plays no role.
#'
#' Don't rely on the default limit without checking: The original width could
#' have been larger because the longest extant mantissa might itself have lost
#' trailing zeros.
#'
#' `restore_zeros_df()` is a variant for data frames. It wraps
#' `restore_zeros()` and, by default, applies it to all columns that are
#' coercible to numeric.
#' @details These functions exploit the fact that groups of summary values such
#' as means or percentages are often reported to the same number of decimal
#' places. If such a number is known but values were not entered as strings,
#' trailing zeros will be lost. In this case, `restore_zeros()` or
#' `restore_zeros_df()` will be helpful to prepare data for consistency
#' testing functions such as [`grim_map()`] or [`grimmer_map()`].
#' @section Displaying decimal places: You might not see all decimal places of
#' numeric values in a vector, and consequently wonder if `restore_zeros()`,
#' when applied to the vector, adds too many zeros. That is because displayed
#' numbers, unlike stored numbers, are often rounded.
#'
#' For a vector `x`, you can count the characters of the longest mantissa from
#' among its values like this:
#'
#' `x %>% decimal_places() %>% max()`
#'
#' @param x Numeric (or string coercible to numeric). Vector of numbers that
#' might have lost trailing zeros.
#' @param width Integer. Number of decimal places the mantissas should have,
#' including the restored zeros. Default is `NULL`, in which case the number
#' of characters in the longest mantissa will be used instead.
#' @param sep_in Substring that separates the input's mantissa from its integer
#' part. Default is `"\\."`, which renders a decimal point.
#' @param sep_out Substring that will be returned in the output to separate the
#' mantissa from the integer part. By default, `sep_out` is the same as
#' `sep_in`.
#' @param data Data frame or matrix. Only in `restore_zeros_df()`, and instead
#' of `x`.
#' @param cols Only in `restore_zeros_df()`. Select columns from `data` using
#' \href{https://tidyselect.r-lib.org/reference/language.html}{tidyselect}.
#' Default is `everything()`, which selects all columns that pass the test of
#' `check_numeric_like`.
#' @param check_numeric_like Logical. Only in `restore_zeros_df()`. If `TRUE`
#' (the default), the function will skip columns that are not numeric or
#' coercible to numeric, as determined by [`is_numeric_like()`].
#' @param check_decimals Logical. Only in `restore_zeros_df()`. If set to
#' `TRUE`, the function will skip columns where no values have any decimal
#' places. Default is `FALSE`.
#' @param ... Only in `restore_zeros_df()`. These dots must be empty.
#' @return
#' - For `restore_zeros()`, a string vector. At least some of the strings
#' will have newly restored zeros, unless (1) all input values had the same
#' number of decimal places, and (2) `width` was not specified as a number
#' greater than that single number of decimal places.
#' - For `restore_zeros_df()`, a data frame.
#'
#' @export
#'
#' @include utils.R
#'
#' @seealso Wrapped functions: [`sprintf()`].
#'
#' @examples
#' # By default, the target width is that of
#' # the longest mantissa:
#' vec <- c(212, 75.38, 4.9625)
#' vec %>%
#' restore_zeros()
#'
#' # Alternatively, supply a number via `width`:
#' vec %>%
#' restore_zeros(width = 6)
#'
#' # For better printing:
#' iris <- tibble::as_tibble(iris)
#'
#' # Apply `restore_zeros()` to all numeric
#' # columns, but not to the factor column:
#' iris %>%
#' restore_zeros_df()
#'
#' # Select columns as in `dplyr::select()`:
#' iris %>%
#' restore_zeros_df(starts_with("Sepal"), width = 3)
restore_zeros <- function(x, width = NULL, sep_in = "\\.", sep_out = sep_in) {
# Make sure no whitespace (from values that already were strings) is factored
# into the count:
x <- stringr::str_trim(x)
# Determine the maximal width to which the mantissas should be padded in
# accordance with the `width` argument, the default of which, `NULL`, makes
# the function go by the maximal length of already-present mantissas:
if (is.null(width)) {
# Count characters of the mantissa part:
parts <- stringr::str_split_fixed(x, sep_in, n = 2L)
width_mantissa <- stringr::str_length(parts[, 2L])
# Throw a warning if `x` can't be formatted with the given arguments:
if (length(x) == 1L) {
cli::cli_warn(c(
"No trailing zeros can be restored",
"!" = "`x` has length 1",
">" = "Specify `width` to predetermine a number of decimal places \\
to which `x` values should be padded."
))
} else if (all(width_mantissa == 0L)) {
cli::cli_warn(c(
"No trailing zeros can be restored",
"!" = "None of the {length(x)} `x` values has any decimal places.",
">" = "Specify `width` to predetermine a number of decimal places \\
to which `x` values should be padded."
))
}
# The number of decimal places to which `x` values will be padded with zeros
# is determined by the number of characters in the longest mantissa...
width_target <- max(width_mantissa, na.rm = TRUE)
} else {
# ... unless the user manually specified that target number via `width`:
width_target <- width
}
# In `x`, if integers and mantissas are separated by something other than
# decimal points, these separators need to be temporarily changed to points,
# so that `sprintf()` will be able to operate on `x` below:
if (any(sep_in != "\\.")) {
x <- stringr::str_replace(x, sep_in, "\\.")
}
# Assemble the formatting expression, determined by `width_target` -- the
# desired number of decimal places to which the `x` values should be padded:
out_format <- paste0("%.", width_target, "f")
# Pad `x` with the correct amount of trailing zeros:
out <- sprintf(out_format, as.numeric(x))
# By default, the separator in the output vector should be a decimal point,
# but it might have been overridden -- either directly via `sep_out` or
# indirectly via `sep_in` (because the default for `sep_out` is `sep_in`). If
# so, it now takes its place again. In any case, the output is returned:
if (all(sep_out == "\\.")) {
out
} else {
stringr::str_replace(out, "\\.", sep_out)
}
}
#' @rdname restore_zeros
#' @export
restore_zeros_df <- function(data, cols = everything(),
check_numeric_like = TRUE, check_decimals = FALSE,
width = NULL, sep_in = "\\.", sep_out = NULL,
...) {
# Check whether the user specified any "old" arguments: those starting on a
# dot. This check is now the only remaining purpose of the `...` dots because
# these are no longer meant to be used. Any other arguments passed through
# them should still lead to an error:
check_new_args_without_dots(
data, dots = rlang::enquos(...), old_args = c(
".data", ".check_decimals", ".width", ".sep_in", ".sep_out", ".sep"
), name_fn = "restore_zeros_df"
)
# Check that `data` is a data frame or matrix:
if (!is.data.frame(data)) {
if (is.matrix(data)) {
data <- tibble::as_tibble(data, .name_repair = "unique")
} else {
cli::cli_abort(c(
"!" = "`data` must be a data frame (or a matrix).",
"x" = "It is {an_a_type(data)}.",
">" = "Did you mean `restore_zeros()`, without `_df`?"
))
}
}
# Names of selection-suitable columns:
names_num_cols <- data %>%
dplyr::select(where(is_numeric_like)) %>%
colnames()
# By default, selection is restricted to columns that are numeric or coercible
# to numeric. This is checked with an internal helper from the utils.R file:
if (check_numeric_like) {
selection2 <- rlang::expr(all_of(names_num_cols))
} else {
selection2 <- rlang::expr(dplyr::everything())
}
# If desired by the user, create an additional selection criterion: In each
# numeric-like column, at least one value must have at least one decimal
# place. Otherwise...
if (check_decimals) {
selection3 <- rlang::expr(where(function(x, sep = "\\.") {
!is_numeric_like(x) || !all(decimal_places(x, sep = sep_in) == 0L)
}
))
} else {
# ... the new variable is set up to be evaluated as `everything()`, which is
# an identity element of the `&` operator in tidyselect:
selection3 <- rlang::expr(dplyr::everything())
}
# Column selection is outsourced here so that the result can also be used in a
# check below:
cols_to_select <- rlang::expr({{ cols }} & !!selection2 & !!selection3)
cols_to_select <- tidyselect::eval_select(cols_to_select, data)
# Check whether any selected columns are not numeric-like, in which case they
# can't have any decimal places restored. If so...
names_cols_select <- names(cols_to_select)
names_wrong_cols <- names_cols_select[!names_cols_select %in% names_num_cols]
# ...the user is warned:
if (length(names_wrong_cols) > 0L) {
warn_wrong_columns_selected(
names_wrong_cols,
msg_exclusion = "didn't have any decimal places restored",
msg_reason = "numeric-like",
msg_it_they = c("It isn't", "They aren't")
)
}
# By default, a columns is selected if and only if it's numeric-like.
# Additional constrains might come via `selection2` or `selection3` (see
# `cols_to_select` above; by default, only `selection2` takes effect). The
# `.fns` argument uses an anonymous function to pass on all the named
# arguments to `restore_zeros()`:
dplyr::mutate(data, dplyr::across(
.cols = all_of(cols_to_select),
.fns = function(data_dummy) {
restore_zeros(
x = data_dummy, width = width,
sep_in = sep_in, sep_out = sep_out
)
}
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.