R/summary_rows.R

Defines functions normalize_fmt_fns normalize_summary_fns grand_summary_rows summary_rows

Documented in grand_summary_rows summary_rows

#------------------------------------------------------------------------------#
#
#                /$$
#               | $$
#     /$$$$$$  /$$$$$$
#    /$$__  $$|_  $$_/
#   | $$  \ $$  | $$
#   | $$  | $$  | $$ /$$
#   |  $$$$$$$  |  $$$$/
#    \____  $$   \___/
#    /$$  \ $$
#   |  $$$$$$/
#    \______/
#
#  This file is part of the 'rstudio/gt' project.
#
#  Copyright (c) 2018-2024 gt authors
#
#  For full copyright and license information, please look at
#  https://gt.rstudio.com/LICENSE.html
#
#------------------------------------------------------------------------------#


#' Add group-wise summary rows using aggregation functions
#'
#' @description
#'
#' Add summary rows to one or more row groups by using the table data and any
#' suitable aggregation functions. Multiple summary rows can be added for
#' selected groups via expressions given to `fns`. You can selectively format
#' the values in the resulting summary cells by use of formatting expressions in
#' `fmt`.
#'
#' @inheritParams fmt_number
#'
#' @param groups *Specification of row group IDs*
#'
#'   `<row-group-targeting expression>` // *default:* `everything()`
#'
#'   The row groups to which targeting operations are constrained. Can either be
#'   a series of row group ID values provided in [c()] or a select helper
#'   function. Examples of select helper functions include [starts_with()],
#'   [ends_with()], [contains()], [matches()], [one_of()], [num_range()], and
#'   [everything()]. By default this is set to [everything()], which means that
#'   all available groups will obtain summary rows.
#'
#' @param columns *Columns to target*
#'
#'   `<column-targeting expression>` // *default:* `everything()`
#'
#'   The columns for which the summaries should be calculated. Can either
#'   be a series of column names provided in [c()], a vector of column indices,
#'   or a select helper function. Examples of select helper functions include
#'   [starts_with()], [ends_with()], [contains()], [matches()], [one_of()],
#'   [num_range()], and [everything()].
#'
#' @param fns *Aggregation Expressions*
#'
#'   `<expression|list of expressions>`
#'
#'   Functions used for aggregations. This can include base functions like
#'   `mean`, `min`, `max`, `median`, `sd`, or `sum` or any other user-defined
#'   aggregation function. Multiple functions, each of which would generate a
#'   different row, are to be supplied within a `list()`. We can specify the
#'   functions by use of function names in quotes (e.g., `"sum"`), as bare
#'   functions (e.g., `sum`), or in formula form (e.g., `minimum ~ min(.)`)
#'   where the LHS could be used to supply the summary row label and ID values.
#'   More information on this can be found in the
#'   *Aggregation expressions for `fns`* section.
#'
#' @param fmt *Formatting expressions*
#'
#'   `<expression|list of expressions>`
#'
#'   Formatting expressions in formula form. The RHS of `~` should contain a
#'   formatting call (e.g., `~ fmt_number(., decimals = 3, use_seps = FALSE`).
#'   Optionally, the LHS could contain a group-targeting expression (e.g.,
#'   `"group_a" ~ fmt_number(.)`). More information on this can be found in the
#'   *Formatting expressions for `fmt`* section.
#'
#' @param side *Side used for placement of summary rows*
#'
#'   `singl-kw:[bottom|top]` // *default:* `"bottom"`
#'
#'   Should the summary rows be placed at the `"bottom"` (the default) or the
#'   `"top"` of the row group?
#'
#' @param missing_text *Replacement text for `NA` values*
#'
#'   `scalar<character>` // *default:* `"---"`
#'
#'   The text to be used in place of `NA` values in summary cells with no data
#'   outputs.
#'
#' @param formatter *[Deprecated] Formatting function*
#'
#'   `<expression>`
#'
#'   Deprecated, please use `fmt` instead. This was previously used as a way to
#'   input a formatting function name, which could be any of the `fmt_*()`
#'   functions available in the package (e.g., [fmt_number()], [fmt_percent()],
#'   etc.), or a custom function using [fmt()]. The options of a formatter can
#'   be accessed through `...`.
#'
#' @param ... *[Deprecated] Formatting arguments*
#'
#'   `<Named arguments>`
#'
#'   Deprecated (along with `formatter`) but otherwise used for argument values
#'   for a formatting function supplied in `formatter`. For example, if using
#'   `formatter = fmt_number`, options such as `decimals = 1`, `use_seps =
#'   FALSE`, and the like can be used here.
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Using `columns` to target column data for aggregation:
#'
#' Targeting of column data for which aggregates should be generated is done
#' through the `columns` argument. We can declare column names in `c()` (with
#' bare column names or names in quotes) or we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns are selected (with the `everything()` default). This
#' default may be not what's needed unless all columns can undergo useful
#' aggregation by expressions supplied in `fns`.
#'
#' @section Aggregation expressions for `fns`:
#'
#' There are a number of ways to express how an aggregation should work for
#' each summary row. In addition to that, we have the ability to pass important
#' information such as the summary row ID value and its label (the former
#' necessary for targeting within [tab_style()] or [tab_footnote()] and the
#' latter used for display in the rendered table). Here are a number of
#' instructive examples for how to supply such expressions.
#'
#' ### Double-sided formula with everything supplied
#'
#' We can be explicit and provide a double-sided formula (in the form
#' `<LHS> ~ <RHS>`) that expresses everything about a summary row. That is, it
#' has an aggregation expression (where `.` represents the data in the
#' focused column). Here's an example:
#'
#' `list(id = "minimum", label = "min") ~ min(., na.rm = TRUE)`
#'
#' The left side (the list) contains named elements that identify the `id` and
#' `label` for the summary row. The right side has an expression for obtaining
#' a minimum value (dropping `NA` values in the calculation).
#'
#' The `list()` can be replaced with `c()` but the advantage of a list is
#' allowing the use of the [md()] and [html()] helper functions. The above
#' example can be written as:
#'
#' `list(id = "minimum", label = md("**Minimum**")) ~ min(., na.rm = TRUE)`
#'
#' and we can have that label value interpreted as Markdown text.
#'
#' ### Function names in quotes
#'
#' With `fns = "min"` we get the equivalent of the fuller expression:
#'
#' `list(id = "min", label = "min") ~ min(., na.rm = TRUE)`
#'
#' For sake of convenience, common aggregation functions with the `na.rm`
#' argument will be rewritten with the `na.rm = TRUE` option. These functions
#' are: `"min"`, `"max"`, `"mean"`, `"median"`, `"sd"`, and `"sum"`.
#'
#' Should you need to specify multiple aggregation functions in this way (giving
#' you multiple summary rows), use `c()` or `list()`.
#'
#' ### RHS formula expressions
#'
#' With `fns = ~ min(.)` or `fns = list(~ min(.))`, **gt** will use the function
#' name as the `id` and `label`. The expansion of this shorthand to full form
#' looks like this:
#'
#' `list(id = "min", label = "min") ~ min(.)`
#'
#' The RHS expression is kept as written and the name portion is both the `id`
#' and the `label`.
#'
#' ### Named vector or list with RHS formula expression
#'
#' Using `fns = c(minimum = ~ min(.))` or `fns = list(minimum = ~ min(.))`
#' expands to this:
#'
#' `list(id = "minimum", label = "minimum") ~ min(.)`
#'
#' ### Unnamed vector or list with RHS formula expression
#'
#' With `fns = c("minimum", "min") ~ min(.)` or
#' `fns = list("minimum", "min") ~ min(.)` the LHS contains the `label` and `id`
#' values and, importantly, the order is `label` first and `id` second. This can
#' be rewritten as:
#'
#' `list(id = "min", label = "minimum") ~ min(.)`
#'
#' If the vector or list is partially named, **gt** has enough to go on to
#' disambiguate the unnamed element. So with
#' `fns = c("minimum", label = "min") ~ min(.)`, `"min"` is indeed the `label`
#' and `"minimum"` is taken as the `id` value.
#'
#' ### A fully named list with three specific elements
#'
#' We can avoid using a formula if we are satisfied with the default options of
#' a function (except some of those functions with the `na.rm` options, see
#' above). Instead, a list with the named elements `id`, `label`, and `fn` could
#' be used. It can look like this:
#'
#' `fns = list(id = "mean_id", label = "average", fn = "mean")`
#'
#' which translates to
#'
#' `list(id = "mean_id", label = "average") ~ mean(., na.rm = TRUE)`
#'
#' @section Formatting expressions for `fmt`:
#'
#' Given that we are generating new data in a table, we might also want to
#' take the opportunity to format those new values right away. We can do this
#' in the `fmt` argument, either with a single expression or a number of them
#' in a list.
#'
#' ### Formatting cells across all groups
#'
#' We can supply a one-sided (RHS only) or two-sided expression (targeting
#' groups) to `fmt`, and, several can be provided in a list. The RHS will always
#' contain an expression that uses a formatting function (e.g., [fmt_number()],
#' [fmt_currency()], etc.) and it must contain an initial `.` that stands for
#' the data object. If performing numeric formatting on all columns in the new
#' summary rows, it might look something like this:
#'
#' `fmt = ~ fmt_number(., decimals = 1, use_seps = FALSE)`
#'
#' We can use the `columns` and `rows` arguments that are available in every
#' formatting function. This allows us to format only a subset of columns or
#' rows. Summary rows can be targeted by using their ID values and these are
#' settable within expressions given to `fns` (see the *Aggregation expressions
#' for `fns`* section for details on this). Here's an example with hypothetical
#' column and row names:
#'
#' `fmt = ~ fmt_number(., columns = num, rows = "mean", decimals = 3)`
#'
#' ### Formatting cells in specific groups
#'
#' A two-sided expression is needed for targeting the formatting directives to
#' specific summary row groups. In this format, the LHS should contain an
#' expression that resolves to a set of available groups. We can use a single
#' row group name in quotes, several of those in a vector, or a select helper
#' expression like `starts_with()` or `matches()`.
#'
#' In a situation where summary rows were generated across the row groups named
#' `"group_1"`, `"group_2"`, and `"group_3"`, we could format all summary cells
#' in `"group_2"` with the following:
#'
#' `fmt = "group_2" ~ fmt_number(., decimals = 1, use_seps = FALSE)`
#'
#' If you wanted to target the latter two groups, this can be done:
#'
#' `fmt = matches("2|3") ~ fmt_number(., decimals = 1, use_seps = FALSE)`
#'
#' Should you need to target a single cell, the LHS expression for group
#' targeting could be paired with single values for `columns` and `rows` on the
#' RHS formatting expression. Like this:
#'
#' `fmt = "group_1" ~ fmt_number(., columns = num, rows = "mean")`
#'
#' @section Extraction of summary rows:
#'
#' Should we need to obtain the summary data for external purposes, the
#' [extract_summary()] function can be used with a `gt_tbl` object where summary
#' rows were added via `summary_rows()` or [grand_summary_rows()].
#'
#' @section Examples:
#'
#' Use a modified version of [`sp500`] dataset to create a **gt** table with row
#' groups and row labels. Create the summary rows labeled `min`, `max`, and
#' `avg` by row group (where each each row group is a week number) with the
#' `summary_rows()` function.
#'
#' ```r
#' sp500 |>
#'   dplyr::filter(date >= "2015-01-05" & date <= "2015-01-16") |>
#'   dplyr::arrange(date) |>
#'   dplyr::mutate(week = paste0("W", strftime(date, format = "%V"))) |>
#'   dplyr::select(-adj_close, -volume) |>
#'   gt(
#'     rowname_col = "date",
#'     groupname_col = "week"
#'   ) |>
#'   summary_rows(
#'     fns = list(
#'       "min",
#'       "max",
#'       list(label = "avg", fn = "mean")
#'     ),
#'     fmt = ~ fmt_number(., use_seps = FALSE)
#'   )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_summary_rows_1.png")`
#' }}
#'
#' Using the [`countrypops`] dataset, let's process that a bit before giving it
#' to **gt**. We can create a summary rows with totals that appear at the top of
#' each row group (with `side = "top"`). We can define the aggregation with a
#' list that contains parameters for the summary row label (`md("**ALL**")`),
#' the shared ID value of those rows across groups (`"totals"`), and the
#' aggregation function (expressed as `"sum"`, which **gt** recognizes as the
#' `sum()` function). To top it all off, we'll add background fills to the
#' summary rows with [tab_style()].
#'
#' ```r
#' countrypops |>
#'   dplyr::filter(
#'     country_code_2 %in% c("BR", "RU", "IN", "CN", "FR", "DE", "IT", "GB")
#'   ) |>
#'   dplyr::filter(year %% 10 == 0) |>
#'   dplyr::select(country_name, year, population) |>
#'   tidyr::pivot_wider(names_from = year, values_from = population) |>
#'   gt(rowname_col = "country_name") |>
#'   tab_row_group(
#'     label = md("*BRIC*"),
#'     rows = c("Brazil", "Russian Federation", "India", "China"),
#'     id = "bric"
#'   ) |>
#'   tab_row_group(
#'     label = md("*Big Four*"),
#'     rows = c("France", "Germany", "Italy", "United Kingdom"),
#'     id = "big4"
#'   ) |>
#'   row_group_order(groups = c("bric", "big4")) |>
#'   tab_stub_indent(rows = everything()) |>
#'   tab_header(title = "Populations of the BRIC and Big Four Countries") |>
#'   tab_spanner(columns = everything(), label = "Year") |>
#'   fmt_number(n_sigfig = 3, suffixing = TRUE) |>
#'   summary_rows(
#'     fns =  list(label = md("**ALL**"), id = "totals", fn = "sum"),
#'     fmt = ~ fmt_number(., n_sigfig = 3, suffixing = TRUE),
#'     side = "top"
#'   ) |>
#'   tab_style(
#'     locations = cells_summary(),
#'     style = cell_fill(color = "lightblue" |> adjust_luminance(steps = +1))
#'   )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_summary_rows_2.png")`
#' }}
#'
#' @family row addition/modification functions
#' @section Function ID:
#' 6-1
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @export
summary_rows <- function(
    data,
    groups = everything(),
    columns = everything(),
    fns = NULL,
    fmt = NULL,
    side = c("bottom", "top"),
    missing_text = "---",
    formatter = NULL,
    ...
) {

  # Perform input object validation
  stop_if_not_gt_tbl(data = data)

  # Get the correct `side` value
  side <- rlang::arg_match(side)

  # Collect all provided formatting options in a list
  formatter_options <- list(...)

  # Perform a partial build of the table to obtain the current
  # state of `group_id` values in the table; we should not assign this
  # to `data` but to a new object (`data_built`) so that we do not
  # affect the return value of `data`
  data_built <- dt_body_build_init(data = data)
  data_built <- dt_groups_rows_build(data = data_built, context = "html")
  groups_rows_tbl <- dt_groups_rows_get(data = data_built)

  # Pull a character vector of available groups from `groups_rows_tbl`
  available_groups <- dplyr::pull(groups_rows_tbl, group_id)

  # Return data unchanged if there are no groups and this summary is not
  # a grand summary
  if (length(available_groups) < 1) {
    if (!(
      is.character(groups) &&
      length(groups) == 1 &&
      groups == ":GRAND_SUMMARY:"
    )) {
      return(data)
    }
  }

  # Resolve the group names
  groups <-
    resolve_groups(
      expr = {{ groups }},
      vector = available_groups
    )

  # Return data unchanged if no groups were resolved
  if (is.null(groups)) {
    return(data)
  }

  # Resolve the column names
  columns <-
    resolve_cols_c(
      expr = {{ columns }},
      data = data
    )

  stub_available <- dt_stub_df_exists(data = data)

  # If there isn't a stub available, create an
  # 'empty' stub (populated with empty strings);
  # the stub is necessary for summary row labels
  if (!stub_available) {

    data <-
      dt_boxhead_add_var(
        data = data,
        var = rowname_col_private,
        type = "stub",
        column_label = list(rowname_col_private),
        column_align = "left",
        column_width = list(NULL),
        hidden_px = list(NULL),
        add_where = "bottom"
      )

    # Add the `"::rowname::"` column into `_data`
    data$`_data` <-
      dplyr::mutate(
        data$`_data`,
        !!rowname_col_private := rep("", nrow(data$`_data`))
      )
    data$`_data` <-
      dplyr::select(
        data$`_data`,
        dplyr::everything(), dplyr::all_of(rowname_col_private)
      )

    # Get the `stub_df` object from `data`
    stub_df <- dt_stub_df_get(data = data)

    # Place the `::rowname::` values into `stub_df$row_id`; these are
    # empty strings which will provide an empty stub for locations
    # adjacent to the body rows
    stub_df[["row_id"]] <- ""

    data <-
      dt_stub_df_set(
        data = data,
        stub_df = stub_df
      )
  }

  # With `fns` input, normalize to list of summary functions
  summary_fns <- normalize_summary_fns(fns = fns)

  # If `formatter` has a value but `fmt` does not, adapt the value provided
  # in `formatter` and `...` and warn about their deprecation
  if (is.null(fmt) && !is.null(formatter) && is.function(formatter)) {

    fmt_args <-
      vapply(
        seq_along(formatter_options),
        FUN.VALUE = character(1),
        USE.NAMES = FALSE,
        FUN = function(x) {
          paste0(names(formatter_options[x]), " = ", formatter_options[x])
        }
      )

    fmt_args <-
      paste0(
        ".",
        if (length(fmt_args) > 0) ", " else NULL,
        paste(
          fmt_args, collapse = ", "
        )
      )

    formatter_name <- deparse(substitute(formatter))

    formatter_formula <- paste0("~ ", formatter_name, "(", fmt_args, ")")

    fmt <- list(stats::as.formula(formatter_formula))

    # Provide deprecation warning
    cli::cli_warn(c(
      "Since gt v0.9.0, the `formatter` argument (and associated `...`) has been deprecated.",
      "*" = "Please use the `fmt` argument to provide formatting directives."
    ),
    .frequency = "regularly",
    .frequency_id = "summary_rows_formatter_arg_deprecated"
    )
  }

  # With `fmt` input, normalize to list of formatting functions
  fmt_fns <- normalize_fmt_fns(fmt = fmt)

  summary_list <-
    list(
      groups = groups,
      columns = columns,
      fns = summary_fns,
      fmt = fmt_fns,
      side = side,
      missing_text = missing_text,
      formatter = formatter,
      formatter_options = formatter_options
    )

  dt_summary_add(
    data = data,
    summary = summary_list
  )
}

#' Add grand summary rows using aggregation functions
#'
#' @description
#'
#' Add grand summary rows by using the table data and any suitable aggregation
#' functions. With grand summary rows, all of the available data in the **gt**
#' table is incorporated (regardless of whether some of the data are part of row
#' groups). Multiple grand summary rows can be added via expressions given to
#' `fns`. You can selectively format the values in the resulting grand summary
#' cells by use of formatting expressions in `fmt`.
#'
#' @inheritParams summary_rows
#'
#' @param side *Side used for placement of grand summary rows*
#'
#'   `singl-kw:[bottom|top]` // *default:* `"bottom"`
#'
#'   Should the grand summary rows be placed at the `"bottom"` (the default) or
#'   the `"top"` of the table?
#'
#' @return An object of class `gt_tbl`.
#'
#' @section Using `columns` to target column data for aggregation:
#'
#' Targeting of column data for which aggregates should be generated is done
#' through the `columns` argument. We can declare column names in `c()` (with
#' bare column names or names in quotes) or we can use
#' **tidyselect**-style expressions. This can be as basic as supplying a select
#' helper like `starts_with()`, or, providing a more complex incantation like
#'
#' `where(~ is.numeric(.x) && max(.x, na.rm = TRUE) > 1E6)`
#'
#' which targets numeric columns that have a maximum value greater than
#' 1,000,000 (excluding any `NA`s from consideration).
#'
#' By default all columns are selected (with the `everything()` default). This
#' default may be not what's needed unless all columns can undergo useful
#' aggregation by expressions supplied in `fns`.
#'
#' @section Aggregation expressions for `fns`:
#'
#' There are a number of ways to express how an aggregation should work for
#' each summary row. In addition to that, we have the ability to pass important
#' information such as the summary row ID value and its label (the former
#' necessary for targeting within [tab_style()] or [tab_footnote()] and the
#' latter used for display in the rendered table). Here are a number of
#' instructive examples for how to supply such expressions.
#'
#' ### Double-sided formula with everything supplied
#'
#' We can be explicit and provide a double-sided formula (in the form
#' `<LHS> ~ <RHS>`) that expresses everything about a summary row. That is, it
#' has an aggregation expression (where `.` represents the data in the
#' focused column). Here's an example:
#'
#' `list(id = "minimum", label = "min") ~ min(., na.rm = TRUE)`
#'
#' The left side (the list) contains named elements that identify the `id` and
#' `label` for the summary row. The right side has an expression for obtaining
#' a minimum value (dropping `NA` values in the calculation).
#'
#' The `list()` can be replaced with `c()` but the advantage of a list is
#' allowing the use of the [md()] and [html()] helper functions. The above
#' example can be written as:
#'
#' `list(id = "minimum", label = md("**Minimum**")) ~ min(., na.rm = TRUE)`
#'
#' and we can have that label value interpreted as Markdown text.
#'
#' ### Function names in quotes
#'
#' With `fns = "min"` we get the equivalent of the fuller expression:
#'
#' `list(id = "min", label = "min") ~ min(., na.rm = TRUE)`
#'
#' For sake of convenience, common aggregation functions with the `na.rm`
#' argument will be rewritten with the `na.rm = TRUE` option. These functions
#' are: `"min"`, `"max"`, `"mean"`, `"median"`, `"sd"`, and `"sum"`.
#'
#' Should you need to specify multiple aggregation functions in this way (giving
#' you multiple summary rows), use `c()` or `list()`.
#'
#' ### RHS formula expressions
#'
#' With `fns = ~ min(.)` or `fns = list(~ min(.))`, **gt** will use the function
#' name as the `id` and `label`. The expansion of this shorthand to full form
#' looks like this:
#'
#' `list(id = "min", label = "min") ~ min(.)`
#'
#' The RHS expression is kept as written and the name portion is both the `id`
#' and the `label`.
#'
#' ### Named vector or list with RHS formula expression
#'
#' Using `fns = c(minimum = ~ min(.))` or `fns = list(minimum = ~ min(.))`
#' expands to this:
#'
#' `list(id = "minimum", label = "minimum") ~ min(.)`
#'
#' ### Unnamed vector or list with RHS formula expression
#'
#' With `fns = c("minimum", "min") ~ min(.)` or
#' `fns = list("minimum", "min") ~ min(.)` the LHS contains the `label` and `id`
#' values and, importantly, the order is `label` first and `id` second. This can
#' be rewritten as:
#'
#' `list(id = "min", label = "minimum") ~ min(.)`
#'
#' If the vector or list is partially named, **gt** has enough to go on to
#' disambiguate the unnamed element. So with
#' `fns = c("minimum", label = "min") ~ min(.)`, `"min"` is indeed the `label`
#' and `"minimum"` is taken as the `id` value.
#'
#' ### A fully named list with three specific elements
#'
#' We can avoid using a formula if we are satisfied with the default options of
#' a function (except some of those functions with the `na.rm` options, see
#' above). Instead, a list with the named elements `id`, `label`, and `fn` could
#' be used. It can look like this:
#'
#' `fns = list(id = "mean_id", label = "average", fn = "mean")`
#'
#' which translates to
#'
#' `list(id = "mean_id", label = "average") ~ mean(., na.rm = TRUE)`
#'
#' @section Formatting expressions for `fmt`:
#'
#' Given that we are generating new data in a table, we might also want to
#' take the opportunity to format those new values right away. We can do this
#' in the `fmt` argument, either with a single expression or a number of them
#' in a list.
#'
#' We can supply a one-sided (RHS only) expression to `fmt`, and, several can
#' be provided in a list. The expression uses a formatting function (e.g.,
#' [fmt_number()], [fmt_currency()], etc.) and it must contain an initial `.`
#' that stands for the data object. If performing numeric formatting on all
#' columns in the new grand summary rows, it might look something like this:
#'
#' `fmt = ~ fmt_number(., decimals = 1, use_seps = FALSE)`
#'
#' We can use the `columns` and `rows` arguments that are available in every
#' formatting function. This allows us to format only a subset of columns or
#' rows. Summary rows can be targeted by using their ID values and these are
#' settable within expressions given to `fns` (see the *Aggregation expressions
#' for `fns`* section for details on this). Here's an example with hypothetical
#' column and row names:
#'
#' `fmt = ~ fmt_number(., columns = num, rows = "mean", decimals = 3)`
#'
#' @section Extraction of summary rows:
#'
#' Should we need to obtain the summary data for external purposes, the
#' [extract_summary()] function can be used with a `gt_tbl` object where summary
#' rows were added via `grand_summary_rows()` or [summary_rows()].
#'
#' @section Examples:
#'
#' Use a modified version of the [`sp500`] dataset to create a **gt** table with
#' row groups and row labels. Create the grand summary rows `min`, `max`, and
#' `avg` for the table with the `grand_summary_rows()` function.
#'
#' ```r
#' sp500 |>
#'   dplyr::filter(date >= "2015-01-05" & date <= "2015-01-16") |>
#'   dplyr::arrange(date) |>
#'   dplyr::mutate(week = paste0("W", strftime(date, format = "%V"))) |>
#'   dplyr::select(-adj_close, -volume) |>
#'   gt(
#'     rowname_col = "date",
#'     groupname_col = "week"
#'   ) |>
#'   grand_summary_rows(
#'     columns = c(open, high, low, close),
#'     fns = list(
#'       min ~ min(.),
#'       max ~ max(.),
#'       avg ~ mean(.)
#'     ),
#'     fmt = ~ fmt_number(., use_seps = FALSE)
#'   )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_grand_summary_rows_1.png")`
#' }}
#'
#' Let's take the [`countrypops`] dataset and process that a bit before handing
#' it off to **gt**. We can create a single grand summary row with totals that
#' appears at the top of the table body (with `side = "top"`). We can define the
#' aggregation with a list that contains parameters for the grand summary row
#' label (`"TOTALS"`), the ID value of that row (`"totals"`), and the
#' aggregation function (expressed as `"sum"`, which **gt** recognizes as the
#' `sum()` function). Finally, we'll add a background fill to the grand summary
#' row with [tab_style()].
#'
#' ```r
#' countrypops |>
#'   dplyr::filter(country_code_2 %in% c("BE", "NL", "LU")) |>
#'   dplyr::filter(year %% 10 == 0) |>
#'   dplyr::select(country_name, year, population) |>
#'   tidyr::pivot_wider(names_from = year, values_from = population) |>
#'   gt(rowname_col = "country_name") |>
#'   tab_header(title = "Populations of the Benelux Countries") |>
#'   tab_spanner(columns = everything(), label = "Year") |>
#'   fmt_integer() |>
#'   grand_summary_rows(
#'     fns =  list(label = "TOTALS", id = "totals", fn = "sum"),
#'     fmt = ~ fmt_integer(.),
#'     side = "top"
#'   ) |>
#'   tab_style(
#'     locations = cells_grand_summary(),
#'     style = cell_fill(color = "lightblue" |> adjust_luminance(steps = +1))
#'   )
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_grand_summary_rows_2.png")`
#' }}
#'
#' @family row addition/modification functions
#' @section Function ID:
#' 6-2
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @export
grand_summary_rows <- function(
    data,
    columns = everything(),
    fns = NULL,
    fmt = NULL,
    side = c("bottom", "top"),
    missing_text = "---",
    formatter = NULL,
    ...
) {

  # Perform input object validation
  stop_if_not_gt_tbl(data = data)

  # Get the correct `side` value
  side <- rlang::arg_match(side)

  summary_rows(
    data = data,
    groups = ":GRAND_SUMMARY:",
    columns = {{ columns }},
    fns = fns,
    fmt = fmt,
    side = side,
    missing_text = missing_text,
    formatter = formatter,
    ...
  )
}

# Normalize `fns` input in `summary_rows` and `grand_summary_rows`
#
# Returns a normalized list of summary functions
# list(
#   <id> = list(label = "the **label**", fn = ~mean(., na.rm = TRUE)),
#   <id> = list(label = "another label", fn = ~sum(.))
# )
normalize_summary_fns <- function(fns) {

  if (length(fns) < 1) {
    return(list())
  }

  summary_fns <- list()

  if (rlang::is_formula(fns)) {
    fns <- list(rlang::new_formula(lhs = rlang::f_lhs(fns), rhs = rlang::f_rhs(fns)))
  }

  # Upgrade `fns` to a list if it's a vector
  if (!is.list(fns)) {
    fns <- as.list(fns)
  }

  if (
    is.list(fns) &&
    rlang::is_named(fns) &&
    all(names(fns) %in% c("id", "label", "fn"))
  ) {
    fns <- list(fns)
  }

  for (i in seq_along(fns)) {

    id <- NULL
    label <- NULL

    # Get `id` values if we have a named list
    if (rlang::is_named(fns[i])) {
      id <- names(fns[i])
    }

    # Extract the value of the first list component
    value <- fns[i][[1]]

    # If component is a named list, extract those elements by
    # any recognizable names
    if (
      is.list(value) &&
      rlang::is_named(value) &&
      all(names(value) %in% c("id", "label", "fn"))
    ) {

      id <- value$id
      label <- value$label
      value <- value$fn
    }

    if (is.character(value)) {

      #
      # Handle character vector cases
      #

      if (value %in% c("min", "max", "mean", "median", "sd", "sum")) {
        fn <- stats::as.formula(paste0("~", value, "(., na.rm = TRUE)"))
      } else {
        fn <- stats::as.formula(paste0("~", value, "(.)"))
      }

      if (is.null(id)) {
        id <- value
      }

      if (is.null(label)) {
        label <- value
      }

    } else if (rlang::is_formula(value)) {

      #
      # Handle formula cases
      #

      # Handles the syntax `label ~ fn(.)`

      formula_lhs <- rlang::f_lhs(value)
      formula_rhs <- rlang::f_rhs(value)

      fn <- rlang::new_formula(lhs = NULL, rhs = formula_rhs)

      # Determine if the LHS contains a two-element vector with a label and
      # an ID value; the order if not named is label then ID
      if (
        !is.name(formula_lhs) &&
        is.character(rlang::eval_bare(formula_lhs)) &&
        length(rlang::eval_bare(formula_lhs)) == 2
      ) {

        lhs_vector <- rlang::eval_bare(formula_lhs)

        # Get the names of the vector
        lhs_vector_names <- rlang::names2(lhs_vector)

        # If the vector is partially named, fill in the missing name (but
        # only if it has one recognized name)
        if (
          "" %in% lhs_vector_names &&
          any(lhs_vector_names %in% c("id", "label"))
        ) {
          if ("id" %in% lhs_vector_names) {
            names(lhs_vector)[names(lhs_vector) == ""] <- "label"
          } else {
            names(lhs_vector)[names(lhs_vector) == ""] <- "id"
          }
        }

        if (rlang::is_named(lhs_vector)) {

          vector_names <- names(lhs_vector)

          if (!all(vector_names %in% c("id", "label"))) {

            rlang::abort(c(
              "If using a named vector on the lhs of a formula for a summary
              function label and ID, it must have the correct names:",
              "*" = "The \"label\" and \"id\" names must be used."
            ))
          }

          if (length(unique(vector_names)) == 1) {

            which_duplicate_name <- unique(vector_names)

            rlang::abort(c(
              "If using a named vector on the lhs of a formula for a summary",
              "function label and ID, it must have two distinct names:",
              "*" = paste0("The \"", which_duplicate_name, "\" name was used twice.")
            ))
          }

          label <- unname(lhs_vector["label"])
          id <- unname(lhs_vector["id"])

        } else {

          label <- lhs_vector[1]
          id <- lhs_vector[2]
        }

      } else if (
        is.name(formula_lhs) ||
        !is.list(rlang::eval_bare(formula_lhs))
      ) {

        lhs_chr <- as.character(formula_lhs)

        if (is.null(id)) {
          id <- lhs_chr

          if (length(id) < 1) {

            formula_label <- rlang::as_label(formula_rhs)
            formula_label <- gsub("\\(.*", "", formula_label)
            id <- formula_label
          }
        }

        if (length(lhs_chr) < 1) {
          label <- id
        } else {
          label <- lhs_chr
        }

      } else if (is.list(rlang::eval_bare(formula_lhs))) {

        lhs_list <- rlang::eval_bare(formula_lhs)

        if (length(lhs_list) == 2) {

          if (rlang::is_named(lhs_list)) {

            component_names <- names(lhs_list)

            if (!all(component_names %in% c("id", "label"))) {

              rlang::abort(c(
                "If using a named list on the lhs of a formula for a summary",
                "function label and ID, it must have the correct names:",
                "*" = "The \"label\" and \"id\" names must be used"
              ))
            }

            label <- lhs_list[["label"]]
            id <- lhs_list[["id"]]

          } else {

            label <- lhs_list[[1]]
            id <- lhs_list[[2]]
          }
        }

        if (length(lhs_list) == 1) {

          if (is.null(id)) {

            id <- label <- lhs_list[[1]]

            if (
              inherits(id, "from_markdown") ||
              inherits(id, "html")
            ) {

              id <- create_unique_id_vals(id, simplify = TRUE)

            } else {
              label <- id
            }
          }
        }
      }
    }

    list_i <- list(list(label = label, fn = fn))

    names(list_i) <- id

    summary_fns <- c(summary_fns, list_i)
  }

  summary_fns
}

normalize_fmt_fns <- function(fmt) {

  if (is.null(fmt) || length(fmt) < 1) {
    return(NULL)
  }

  if (rlang::is_formula(fmt)) {
    fmt <- list(rlang::new_formula(lhs = rlang::f_lhs(fmt), rhs = rlang::f_rhs(fmt)))
  }

  fmt
}
rstudio/gt documentation built on April 29, 2024, 10:37 p.m.