#' Aggregate data using summary statistics
#'
#' @description
#' Aggregate data using summary statistics such as mean or median. Can be calculated by group.
#'
#' @param .df A data.frame or data.table
#' @param ... Aggregations to perform
#' @param .by Columns to group by.
#' * A single column can be passed with `.by = d`.
#' * Multiple columns can be passed with `.by = c(c, d)`
#' * `tidyselect` can be used:
#' + Single predicate: `.by = where(is.character)`
#' + Multiple predicates: `.by = c(where(is.character), where(is.factor))`
#' + A combination of predicates and column names: `.by = c(where(is.character), b)`
#' @param .sort _experimental_: Default `TRUE`.
#' If FALSE the original order of the grouping variables will be preserved.
#' @param .groups Grouping structure of the result
#' * "drop_last": Drop the last level of grouping
#' * "drop": Drop all groups
#' * "keep": Keep all groups
#' @param .unpack _experimental_: Default `FALSE`. Should unnamed data frame inputs be unpacked.
#' The user must opt in to this option as it can lead to a reduction in performance.
#'
#' @export
#' @examples
#' df <- data.table(
#' a = 1:3,
#' b = 4:6,
#' c = c("a", "a", "b"),
#' d = c("a", "a", "b")
#' )
#'
#' df %>%
#' summarize(avg_a = mean(a),
#' max_b = max(b),
#' .by = c)
#'
#' df %>%
#' summarize(avg_a = mean(a),
#' .by = c(c, d))
summarize <- function(.df, ...,
.by = NULL,
.sort = TRUE,
.groups = "drop_last",
.unpack = FALSE) {
UseMethod("summarize")
}
#' @export
summarize.tidytable <- function(.df, ...,
.by = NULL,
.sort = TRUE,
.groups = "drop_last",
.unpack = FALSE) {
dots <- enquos(...)
.by <- enquo(.by)
if (length(dots) == 0) {
# Issue #379
out <- distinct(.df, !!.by)
} else {
dt_env <- get_dt_env(dots)
dots <- prep_exprs(dots, .df, !!.by, dt_env = dt_env)
.by <- tidyselect_names(.df, !!.by)
if (is_true(.unpack)) {
# https://github.com/markfairbanks/tidytable/issues/576
j <- call2("df_list", !!!dots, .ns = "vctrs")
} else {
j <- call2(".", !!!dots)
}
dt_expr <- call2_j(.df, j, .by, .sort)
out <- eval_tidy(dt_expr, .df, dt_env)
out <- remove_key(out)
out <- df_name_repair(out, "unique")
}
out
}
#' @export
summarize.grouped_tt <- function(.df, ...,
.by = NULL,
.sort = TRUE,
.groups = "drop_last",
.unpack = FALSE) {
.by <- group_vars(.df)
out <- ungroup(.df)
out <- summarize(out, ...,
.by = {{ .by }},
.sort = .sort,
.groups = .groups,
.unpack = .unpack)
.groups <- arg_match0(.groups, c("drop_last", "drop", "keep"))
if (.groups == "drop_last") {
.by <- .by[-length(.by)]
} else if (.groups == "drop") {
.by <- character()
}
group_by(out, any_of(.by))
}
#' @export
summarize.rowwise_tt <- function(.df, ...,
.by = NULL,
.sort = TRUE,
.groups = "drop_last",
.unpack = FALSE) {
abort("`summarize()` is not yet supported on a rowwise tidytable.")
}
#' @export
summarize.data.frame <- function(.df, ...,
.by = NULL,
.sort = TRUE,
.groups = "drop_last",
.unpack = FALSE) {
.df <- as_tidytable(.df)
summarize(.df, ...,
.by = {{ .by }},
.sort = .sort,
.groups = .groups,
.unpack = .unpack)
}
#' @export
#' @rdname summarize
summarise <- summarize
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.