#' Summarise and mutate multiple columns.
#'
#' \code{summarise_all()} and \code{mutate_all()} apply the functions
#' to all (non-grouping) columns. \code{summarise_at()} and
#' \code{mutate_at()} allow you to select columns
#' using the same name-based \code{\link{select_helpers}} as with
#' \code{\link{select}()}. \code{summarise_if}() and
#' \code{mutate_if}() operate on columns for which a predicate returns
#' \code{TRUE}. Finally, \code{\link{summarise_each}()} and
#' \code{\link{mutate_each}()} are older variants that will be
#' deprecated in the future.
#'
#' @param .tbl a tbl
#' @param .funs List of function calls generated by
#' \code{\link{funs}()}, or a character vector of function names, or
#' simply a function (only for local sources).
#' @param .cols A list of columns generated by \code{\link{vars}()},
#' or a character vector of column names, or a numeric vector of column
#' positions.
#' @param .predicate A predicate function to be applied to the columns
#' or a logical vector. The columns for which \code{.predicate} is
#' or returns \code{TRUE} will be summarised or mutated.
#' @param ... Additional arguments for the function calls. These are
#' evaluated only once.
#' @return A data frame. By default, the newly created columns have the shortest
#' names needed to distinguish the output. To force inclusion of a name,
#' even when not needed, name the input (see examples for details).
#' @seealso \code{\link{vars}()}, \code{\link{funs}()}
#' @examples
#' by_species <- iris %>% group_by(Species)
#'
#' # One function
#' by_species %>% summarise_all(n_distinct)
#' by_species %>% summarise_all(mean)
#'
#' # Use the _at and _if variants for conditional mapping.
#' by_species %>% summarise_if(is.numeric, mean)
#'
#' # summarise_at() can use select() helpers with the vars() function:
#' by_species %>% summarise_at(vars(Petal.Width), mean)
#' by_species %>% summarise_at(vars(matches("Width")), mean)
#'
#' # You can also specify columns with column names or column positions:
#' by_species %>% summarise_at(c("Sepal.Width", "Petal.Width"), mean)
#' by_species %>% summarise_at(c(1, 3), mean)
#'
#' # You can provide additional arguments. Those are evaluated only once:
#' by_species %>% summarise_all(mean, trim = 1)
#' by_species %>% summarise_at(vars(Petal.Width), mean, trim = 1)
#'
#' # You can provide an expression or multiple functions with the funs() helper.
#' by_species %>% mutate_all(funs(. * 0.4))
#' by_species %>% summarise_all(funs(min, max))
#' # Note that output variable name must now include function name, in order to
#' # keep things distinct.
#'
#' # Function names will be included if .funs has names or whenever multiple
#' # functions are used.
#' by_species %>% mutate_all(funs("in" = . / 2.54))
#' by_species %>% mutate_all(funs(rg = diff(range(.))))
#' by_species %>% summarise_all(funs(med = median))
#' by_species %>% summarise_all(funs(Q3 = quantile), probs = 0.75)
#' by_species %>% summarise_all(c("min", "max"))
#'
#' # Two functions, continued
#' by_species %>% summarise_at(vars(Petal.Width, Sepal.Width), funs(min, max))
#' by_species %>% summarise_at(vars(matches("Width")), funs(min, max))
#'
#' @aliases summarise_each_q mutate_each_q
#' @export
summarise_all <- function(.tbl, .funs, ...) {
funs <- as.fun_list(.funs, .env = parent.frame(), ...)
vars <- colwise_(.tbl, funs, list())
summarise_(.tbl, .dots = vars)
}
#' @rdname summarise_all
#' @export
mutate_all <- function(.tbl, .funs, ...) {
funs <- as.fun_list(.funs, .env = parent.frame(), ...)
vars <- colwise_(.tbl, funs, list())
mutate_(.tbl, .dots = vars)
}
#' @rdname summarise_all
#' @export
summarise_if <- function(.tbl, .predicate, .funs, ...) {
if (inherits(.tbl, "tbl_lazy")) {
stop("Conditional colwise operations currently require local sources",
call. = FALSE)
}
cols <- probe_colwise_names(.tbl, .predicate)
funs <- as.fun_list(.funs, .env = parent.frame(), ...)
vars <- colwise_(.tbl, funs, cols)
summarise_(.tbl, .dots = vars)
}
#' @rdname summarise_all
#' @export
mutate_if <- function(.tbl, .predicate, .funs, ...) {
if (inherits(.tbl, "tbl_lazy")) {
stop("Conditional colwise operations currently require local sources",
call. = FALSE)
}
cols <- probe_colwise_names(.tbl, .predicate)
funs <- as.fun_list(.funs, .env = parent.frame(), ...)
vars <- colwise_(.tbl, funs, cols)
mutate_(.tbl, .dots = vars)
}
probe_colwise_names <- function(tbl, p, ...) {
if (is.logical(p)) {
stopifnot(length(p) == length(tbl))
selected <- p
} else {
selected <- vapply(tbl, p, logical(1), ...)
}
vars <- tbl_vars(tbl)
vars[selected]
}
#' @rdname summarise_all
#' @export
summarise_at <- function(.tbl, .cols, .funs, ...) {
cols <- select_colwise_names(.tbl, .cols)
funs <- as.fun_list(.funs, .env = parent.frame(), ...)
vars <- colwise_(.tbl, funs, cols)
summarise_(.tbl, .dots = vars)
}
#' @rdname summarise_all
#' @export
mutate_at <- function(.tbl, .cols, .funs, ...) {
cols <- select_colwise_names(.tbl, .cols)
funs <- as.fun_list(.funs, .env = parent.frame(), ...)
vars <- colwise_(.tbl, funs, cols)
mutate_(.tbl, .dots = vars)
}
#' @rdname summarise_all
#' @export
summarize_all <- summarise_all
#' @rdname summarise_all
#' @export
summarize_at <- summarise_at
#' @rdname summarise_all
#' @export
summarize_if <- summarise_if
#' Select columns
#'
#' This helper has equivalent semantics to \code{\link{select}()}. Its
#' purpose is to provide \code{select()} semantics to the colwise
#' summarising and mutating verbs.
#' @param ... Variables to include/exclude in mutate/summarise. You
#' can use same specifications as in \code{\link{select}}. If
#' missing, defaults to all non-grouping variables.
#' @seealso \code{\link{summarise_all}()}
#' @export
vars <- function(...) {
structure(lazyeval::lazy_dots(...),
class = c("col_list", "lazy_dots")
)
}
is_col_list <- function(cols) inherits(cols, "col_list")
select_colwise_names <- function(tbl, cols) {
vars <- tbl_vars(tbl)
if (is.character(cols) || is_col_list(cols)) {
selected <- cols
} else if (is.numeric(cols)) {
selected <- vars[cols]
} else {
stop(".cols should be a character/numeric vector or a columns object",
call. = FALSE)
}
selected
}
colwise_ <- function(tbl, calls, vars) {
stopifnot(is.fun_list(calls))
named_calls <- attr(calls, "has_names")
named_vars <- any(has_names(vars))
if (length(vars) == 0) {
vars <- lazyeval::lazy_dots(everything())
}
vars <- select_vars_(tbl_vars(tbl), vars, exclude = as.character(groups(tbl)))
out <- vector("list", length(vars) * length(calls))
dim(out) <- c(length(vars), length(calls))
vars <- enc2native(vars)
for (i in seq_along(vars)) {
for (j in seq_along(calls)) {
out[[i, j]] <- lazyeval::interp(calls[[j]],
.values = list(. = as.name(vars[i])))
}
}
dim(out) <- NULL
if (length(calls) == 1 && !named_calls) {
names(out) <- names(vars)
} else if (length(vars) == 1 && !named_vars) {
names(out) <- names(calls)
} else {
grid <- expand.grid(var = names(vars), call = names(calls))
names(out) <- paste(grid$var, grid$call, sep = "_")
}
out
}
#' Summarise and mutate multiple columns.
#'
#' Apply one or more functions to one or more columns. Grouping variables
#' are always excluded from modification.
#'
#' In the future \code{mutate_each()} and \code{summarise_each()} will
#' be deprecated in favour of a more featureful family of functions:
#' \code{\link{mutate_all}()}, \code{\link{mutate_at}()},
#' \code{\link{mutate_if}()}, \code{\link{summarise_all}()},
#' \code{\link{summarise_at}()} and \code{\link{summarise_if}()}.
#' @param tbl a tbl
#' @param funs List of function calls, generated by \code{\link{funs}}, or
#' a character vector of function names.
#' @param vars,... Variables to include/exclude in mutate/summarise.
#' You can use same specifications as in \code{\link{select}}. If missing,
#' defaults to all non-grouping variables.
#'
#' For standard evaluation versions (ending in \code{_}) these can
#' be either a list of expressions or a character vector.
#' @export
summarise_each <- function(tbl, funs, ...) {
summarise_each_(tbl, funs, lazyeval::lazy_dots(...))
}
#' @export
#' @rdname summarise_each
summarise_each_ <- function(tbl, funs, vars) {
if (is.character(funs)) {
funs <- funs_(funs)
}
vars <- colwise_(tbl, funs, vars)
summarise_(tbl, .dots = vars)
}
#' @rdname summarise_each
#' @export
summarize_each <- summarise_each
#' @rdname summarise_each
#' @export
summarize_each_ <- summarise_each_
#' @export
#' @rdname summarise_each
mutate_each <- function(tbl, funs, ...) {
if (is.character(funs)) {
funs <- funs_(funs)
}
mutate_each_(tbl, funs, lazyeval::lazy_dots(...))
}
#' @export
#' @rdname summarise_each
mutate_each_ <- function(tbl, funs, vars) {
vars <- colwise_(tbl, funs, vars)
mutate_(tbl, .dots = vars)
}
#' @export
summarise_each_q <- function(...) {
.Deprecated("summarise_all")
summarise_each_(...)
}
#' @export
mutate_each_q <- function(...) {
.Deprecated("mutate_all")
mutate_each_(...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.