R/colwise.R

Defines functions summarise_all mutate_all summarise_if mutate_if probe_colwise_names summarise_at mutate_at vars is_col_list select_colwise_names colwise_ summarise_each summarise_each_ mutate_each mutate_each_ summarise_each_q mutate_each_q

Documented in mutate_all mutate_at mutate_each mutate_each_ mutate_each_q mutate_if summarise_all summarise_at summarise_each summarise_each_ summarise_each_q summarise_if vars

#' Summarise and mutate multiple columns.
#'
#' \code{summarise_all()} and \code{mutate_all()} apply the functions
#' to all (non-grouping) columns. \code{summarise_at()} and
#' \code{mutate_at()} allow you to select columns
#' using the same name-based \code{\link{select_helpers}} as with
#' \code{\link{select}()}. \code{summarise_if}() and
#' \code{mutate_if}() operate on columns for which a predicate returns
#' \code{TRUE}. Finally, \code{\link{summarise_each}()} and
#' \code{\link{mutate_each}()} are older variants that will be
#' deprecated in the future.
#'
#' @param .tbl a tbl
#' @param .funs List of function calls generated by
#'   \code{\link{funs}()}, or a character vector of function names, or
#'   simply a function (only for local sources).
#' @param .cols A list of columns generated by \code{\link{vars}()},
#'   or a character vector of column names, or a numeric vector of column
#'   positions.
#' @param .predicate A predicate function to be applied to the columns
#'   or a logical vector. The columns for which \code{.predicate} is
#'   or returns \code{TRUE} will be summarised or mutated.
#' @param ... Additional arguments for the function calls. These are
#'   evaluated only once.
#' @return A data frame. By default, the newly created columns have the shortest
#'   names needed to distinguish the output. To force inclusion of a name,
#'   even when not needed, name the input (see examples for details).
#' @seealso \code{\link{vars}()}, \code{\link{funs}()}
#' @examples
#' by_species <- iris %>% group_by(Species)
#'
#' # One function
#' by_species %>% summarise_all(n_distinct)
#' by_species %>% summarise_all(mean)
#'
#' # Use the _at and _if variants for conditional mapping.
#' by_species %>% summarise_if(is.numeric, mean)
#'
#' # summarise_at() can use select() helpers with the vars() function:
#' by_species %>% summarise_at(vars(Petal.Width), mean)
#' by_species %>% summarise_at(vars(matches("Width")), mean)
#'
#' # You can also specify columns with column names or column positions:
#' by_species %>% summarise_at(c("Sepal.Width", "Petal.Width"), mean)
#' by_species %>% summarise_at(c(1, 3), mean)
#'
#' # You can provide additional arguments. Those are evaluated only once:
#' by_species %>% summarise_all(mean, trim = 1)
#' by_species %>% summarise_at(vars(Petal.Width), mean, trim = 1)
#'
#' # You can provide an expression or multiple functions with the funs() helper.
#' by_species %>% mutate_all(funs(. * 0.4))
#' by_species %>% summarise_all(funs(min, max))
#' # Note that output variable name must now include function name, in order to
#' # keep things distinct.
#'
#' # Function names will be included if .funs has names or whenever multiple
#' # functions are used.
#' by_species %>% mutate_all(funs("in" = . / 2.54))
#' by_species %>% mutate_all(funs(rg = diff(range(.))))
#' by_species %>% summarise_all(funs(med = median))
#' by_species %>% summarise_all(funs(Q3 = quantile), probs = 0.75)
#' by_species %>% summarise_all(c("min", "max"))
#'
#' # Two functions, continued
#' by_species %>% summarise_at(vars(Petal.Width, Sepal.Width), funs(min, max))
#' by_species %>% summarise_at(vars(matches("Width")), funs(min, max))
#'
#' @aliases summarise_each_q mutate_each_q
#' @export
summarise_all <- function(.tbl, .funs, ...) {
  funs <- as.fun_list(.funs, .env = parent.frame(), ...)
  vars <- colwise_(.tbl, funs, list())
  summarise_(.tbl, .dots = vars)
}

#' @rdname summarise_all
#' @export
mutate_all <- function(.tbl, .funs, ...) {
  funs <- as.fun_list(.funs, .env = parent.frame(), ...)
  vars <- colwise_(.tbl, funs, list())
  mutate_(.tbl, .dots = vars)
}

#' @rdname summarise_all
#' @export
summarise_if <- function(.tbl, .predicate, .funs, ...) {
  if (inherits(.tbl, "tbl_lazy")) {
    stop("Conditional colwise operations currently require local sources",
      call. = FALSE)
  }
  cols <- probe_colwise_names(.tbl, .predicate)
  funs <- as.fun_list(.funs, .env = parent.frame(), ...)
  vars <- colwise_(.tbl, funs, cols)

  summarise_(.tbl, .dots = vars)
}

#' @rdname summarise_all
#' @export
mutate_if <- function(.tbl, .predicate, .funs, ...) {
  if (inherits(.tbl, "tbl_lazy")) {
    stop("Conditional colwise operations currently require local sources",
      call. = FALSE)
  }
  cols <- probe_colwise_names(.tbl, .predicate)
  funs <- as.fun_list(.funs, .env = parent.frame(), ...)
  vars <- colwise_(.tbl, funs, cols)

  mutate_(.tbl, .dots = vars)
}

probe_colwise_names <- function(tbl, p, ...) {
  if (is.logical(p)) {
    stopifnot(length(p) == length(tbl))
    selected <- p
  } else {
    selected <- vapply(tbl, p, logical(1), ...)
  }

  vars <- tbl_vars(tbl)
  vars[selected]
}

#' @rdname summarise_all
#' @export
summarise_at <- function(.tbl, .cols, .funs, ...) {
  cols <- select_colwise_names(.tbl, .cols)
  funs <- as.fun_list(.funs, .env = parent.frame(), ...)
  vars <- colwise_(.tbl, funs, cols)

  summarise_(.tbl, .dots = vars)
}

#' @rdname summarise_all
#' @export
mutate_at <- function(.tbl, .cols, .funs, ...) {
  cols <- select_colwise_names(.tbl, .cols)
  funs <- as.fun_list(.funs, .env = parent.frame(), ...)
  vars <- colwise_(.tbl, funs, cols)

  mutate_(.tbl, .dots = vars)
}

#' @rdname summarise_all
#' @export
summarize_all <- summarise_all

#' @rdname summarise_all
#' @export
summarize_at <- summarise_at

#' @rdname summarise_all
#' @export
summarize_if <- summarise_if

#' Select columns
#'
#' This helper has equivalent semantics to \code{\link{select}()}. Its
#' purpose is to provide \code{select()} semantics to the colwise
#' summarising and mutating verbs.
#' @param ... Variables to include/exclude in mutate/summarise. You
#'   can use same specifications as in \code{\link{select}}. If
#'   missing, defaults to all non-grouping variables.
#' @seealso \code{\link{summarise_all}()}
#' @export
vars <- function(...) {
  structure(lazyeval::lazy_dots(...),
    class = c("col_list", "lazy_dots")
  )
}
is_col_list <- function(cols) inherits(cols, "col_list")

select_colwise_names <- function(tbl, cols) {
  vars <- tbl_vars(tbl)

  if (is.character(cols) || is_col_list(cols)) {
    selected <- cols
  } else if (is.numeric(cols)) {
    selected <- vars[cols]
  } else {
    stop(".cols should be a character/numeric vector or a columns object",
      call. = FALSE)
  }

  selected
}

colwise_ <- function(tbl, calls, vars) {
  stopifnot(is.fun_list(calls))

  named_calls <- attr(calls, "has_names")
  named_vars <- any(has_names(vars))

  if (length(vars) == 0) {
    vars <- lazyeval::lazy_dots(everything())
  }
  vars <- select_vars_(tbl_vars(tbl), vars, exclude = as.character(groups(tbl)))

  out <- vector("list", length(vars) * length(calls))
  dim(out) <- c(length(vars), length(calls))

  vars <- enc2native(vars)
  for (i in seq_along(vars)) {
    for (j in seq_along(calls)) {
      out[[i, j]] <- lazyeval::interp(calls[[j]],
        .values = list(. = as.name(vars[i])))
    }
  }
  dim(out) <- NULL

  if (length(calls) == 1 && !named_calls) {
    names(out) <- names(vars)
  } else if (length(vars) == 1 && !named_vars) {
    names(out) <- names(calls)
  } else {
    grid <- expand.grid(var = names(vars), call = names(calls))
    names(out) <- paste(grid$var, grid$call, sep = "_")
  }

  out
}

#' Summarise and mutate multiple columns.
#'
#' Apply one or more functions to one or more columns. Grouping variables
#' are always excluded from modification.
#'
#' In the future \code{mutate_each()} and \code{summarise_each()} will
#' be deprecated in favour of a more featureful family of functions:
#' \code{\link{mutate_all}()}, \code{\link{mutate_at}()},
#' \code{\link{mutate_if}()}, \code{\link{summarise_all}()},
#' \code{\link{summarise_at}()} and \code{\link{summarise_if}()}.
#' @param tbl a tbl
#' @param funs List of function calls, generated by \code{\link{funs}}, or
#'   a character vector of function names.
#' @param vars,... Variables to include/exclude in mutate/summarise.
#'   You can use same specifications as in \code{\link{select}}. If missing,
#'   defaults to all non-grouping variables.
#'
#'   For standard evaluation versions (ending in \code{_}) these can
#'   be either a list of expressions or a character vector.
#' @export
summarise_each <- function(tbl, funs, ...) {
  summarise_each_(tbl, funs, lazyeval::lazy_dots(...))
}

#' @export
#' @rdname summarise_each
summarise_each_ <- function(tbl, funs, vars) {
  if (is.character(funs)) {
    funs <- funs_(funs)
  }

  vars <- colwise_(tbl, funs, vars)
  summarise_(tbl, .dots = vars)
}

#' @rdname summarise_each
#' @export
summarize_each <- summarise_each

#' @rdname summarise_each
#' @export
summarize_each_ <- summarise_each_

#' @export
#' @rdname summarise_each
mutate_each <- function(tbl, funs, ...) {
  if (is.character(funs)) {
    funs <- funs_(funs)
  }

  mutate_each_(tbl, funs, lazyeval::lazy_dots(...))
}

#' @export
#' @rdname summarise_each
mutate_each_ <- function(tbl, funs, vars) {
  vars <- colwise_(tbl, funs, vars)
  mutate_(tbl, .dots = vars)
}


#' @export
summarise_each_q <- function(...) {
  .Deprecated("summarise_all")
  summarise_each_(...)
}
#' @export
mutate_each_q <- function(...) {
  .Deprecated("mutate_all")
  mutate_each_(...)
}
sctyner/dplyr050 documentation built on May 17, 2019, 2:22 p.m.