R/colwise.R

#' Operate on a selection of variables
#'
#' The variants suffixed with `_if`, `_at` or `_all` apply an
#' expression (sometimes several) to all variables within a specified
#' subset. This subset can contain all variables (`_all` variants), a
#' [vars()] selection (`_at` variants), or variables selected with a
#' predicate (`_if` variants).
#'
#' The verbs with scoped variants are:
#'
#' * [mutate()], [transmute()] and [summarise()]. See [summarise_all()].
#' * [filter()]. See [filter_all()].
#' * [group_by()]. See [group_by_all()].
#' * [rename()] and [select()]. See [select_all()].
#' * [arrange()]. See [arrange_all()]
#'
#' There are three kinds of scoped variants. They differ in the scope
#' of the variable selection on which operations are applied:
#'
#' * Verbs suffixed with `_all()` apply an operation on all variables.
#'
#' * Verbs suffixed with `_at()` apply an operation on a subset of
#'   variables specified with the quoting function [vars()]. This
#'   quoting function accepts [tidyselect::vars_select()] helpers like
#'   [starts_with()]. Instead of a [vars()] selection, you can also
#'   supply an [integerish][rlang::is_integerish] vector of column
#'   positions or a character vector of column names.
#'
#' * Verbs suffixed with `_if()` apply an operation on the subset of
#'   variables for which a predicate function returns `TRUE`. Instead
#'   of a predicate function, you can also supply a logical vector.
#'
#' @param .tbl A `tbl` object.
#' @param .funs A function `fun`, a quosure style lambda `~ fun(.)` or a list of either form.
#'
#' @param .vars A list of columns generated by [vars()],
#'   a character vector of column names, a numeric vector of column
#'   positions, or `NULL`.
#' @param .predicate A predicate function to be applied to the columns
#'   or a logical vector. The variables for which `.predicate` is or
#'   returns `TRUE` are selected. This argument is passed to
#'   [rlang::as_function()] and thus supports quosure-style lambda
#'   functions and strings representing function names.
#' @param ... Additional arguments for the function calls in
#'   `.funs`. These are evaluated only once, with [tidy
#'   dots][rlang::tidy-dots] support.
#'
#' @section Grouping variables:
#'
#' Most of these operations also apply on the grouping variables when
#' they are part of the selection. This includes:
#'
#' * [arrange_all()], [arrange_at()], and [arrange_if()]
#' * [distinct_all()], [distinct_at()], and [distinct_if()]
#' * [filter_all()], [filter_at()], and [filter_if()]
#' * [group_by_all()], [group_by_at()], and [group_by_if()]
#' * [select_all()], [select_at()], and [select_if()]
#'
#' This is not the case for summarising and mutating variants where
#' operations are *not* applied on grouping variables. The behaviour
#' depends on whether the selection is **implicit** (`all` and `if`
#' selections) or **explicit** (`at` selections). Grouping variables
#' covered by explicit selections (with [summarise_at()],
#' [mutate_at()], and [transmute_at()]) are always an error. For
#' implicit selections, the grouping variables are always ignored. In
#' this case, the level of verbosity depends on the kind of operation:
#'
#' * Summarising operations ([summarise_all()] and [summarise_if()])
#'   ignore grouping variables silently because it is obvious that
#'   operations are not applied on grouping variables.
#'
#' * On the other hand it isn't as obvious in the case of mutating
#'   operations ([mutate_all()], [mutate_if()], [transmute_all()], and
#'   [transmute_if()]). For this reason, they issue a message
#'   indicating which grouping variables are ignored.
#'
#' @name scoped
NULL


#' Select variables
#'
#' This helper is intended to provide equivalent semantics to
#' [select()]. It is used for instance in scoped summarising and
#' mutating verbs ([mutate_at()] and [summarise_at()]).
#'
#' Note that verbs accepting a `vars()` specification also accept a
#' numeric vector of positions or a character vector of column names.
#'
#' @param ... Variables to include/exclude in mutate/summarise. You
#'   can use same specifications as in [select()]. If missing,
#'   defaults to all non-grouping variables.
#'
#'   These arguments are automatically [quoted][rlang::quo] and later
#'   [evaluated][rlang::eval_tidy] in the context of the data
#'   frame. They support [unquoting][rlang::quasiquotation]. See
#'   `vignette("programming")` for an introduction to these concepts.
#' @seealso [all_vars()] and [any_vars()] for other quoting
#'   functions that you can use with scoped verbs.
#' @export
vars <- function(...) {
  quos(...)
}

#' Apply predicate to all variables
#'
#' These quoting functions signal to scoped filtering verbs
#' (e.g. [filter_if()] or [filter_all()]) that a predicate expression
#' should be applied to all relevant variables. The `all_vars()`
#' variant takes the intersection of the predicate expressions with
#' `&` while the `any_vars()` variant takes the union with `|`.
#'
#' @param expr A predicate expression. This variable supports
#'   [unquoting][rlang::quasiquotation] and will be evaluated in the
#'   context of the data frame. It should return a logical vector.
#'
#'   This argument is automatically [quoted][rlang::quo] and later
#'   [evaluated][rlang::eval_tidy] in the context of the data
#'   frame. It supports [unquoting][rlang::quasiquotation]. See
#'   `vignette("programming")` for an introduction to these concepts.
#' @seealso [vars()] for other quoting functions that you
#'   can use with scoped verbs.
#' @export
all_vars <- function(expr) {
  structure(enquo(expr), class = c("all_vars", "quosure", "formula"))
}
#' @rdname all_vars
#' @export
any_vars <- function(expr) {
  structure(enquo(expr), class = c("any_vars", "quosure", "formula"))
}
#' @export
print.all_vars <- function(x, ...) {
  cat("<predicate intersection>\n")
  NextMethod()
}
#' @export
print.any_vars <- function(x, ...) {
  cat("<predicate union>\n")
  NextMethod()
}


# Requires tbl_vars() method
tbl_at_vars <- function(tbl, vars, .include_group_vars = FALSE) {
  if (.include_group_vars) {
    tibble_vars <- tbl_vars(tbl)
  } else {
    tibble_vars <- tbl_nongroup_vars(tbl)
  }

  if (is_null(vars)) {
    character()
  } else if (is_integerish(vars)) {
    tibble_vars[vars]
  } else if (is_quosures(vars) || is_character(vars)) {
    out <- tidyselect::vars_select(tibble_vars, !!!vars)
    if (!any(have_name(vars))) {
      names(out) <- NULL
    }
    out
  } else {
    bad_args(".vars", "must be a character/numeric vector or a `vars()` object, ",
      "not {friendly_type_of(vars)}"
    )
  }
}
tbl_at_syms <- function(tbl, vars, .include_group_vars = FALSE) {
  vars <- tbl_at_vars(tbl, vars, .include_group_vars = .include_group_vars)
  set_names(syms(vars), names(vars))
}

# Requires tbl_vars(), `[[`() and length() methods
tbl_if_vars <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE) {
  if (.include_group_vars) {
    tibble_vars <- tbl_vars(.tbl)
  } else {
    tibble_vars <- tbl_nongroup_vars(.tbl)
  }

  if (is_logical(.p)) {
    stopifnot(length(.p) == length(tibble_vars))
    return(syms(tibble_vars[.p]))
  }

  if (inherits(.tbl, "tbl_lazy")) {
    inform("Applying predicate on the first 100 rows")
    .tbl <- collect(.tbl, n = 100)
  }

  if (is_fun_list(.p) || is_list(.p)) {
    if (length(.p) != 1) {
      bad_args(".predicate", "must have length 1, not {length(.p)}")
    }
    .p <- .p[[1]]
  }
  if (is_quosure(.p)) {
    .p <- quo_as_function(.p)
  } else {
    .p <- as_function(.p, .env)
  }

  n <- length(tibble_vars)
  selected <- new_logical(n)

  for (i in seq_len(n)) {
    column <- .tbl[[tibble_vars[[i]]]]
    selected[[i]] <- eval_tidy(.p(column, ...))
  }

  tibble_vars[selected]
}
tbl_if_syms <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE) {
  syms(tbl_if_vars(.tbl, .p, .env, ..., .include_group_vars = .include_group_vars))
}

# The lambda must inherit from:
# - Execution environment (bound arguments with purrr lambda syntax)
# - Lexical environment (local variables)
# - Data mask (other columns)
#
# So we need:
# - Inheritance from closure -> lexical
# - A maskable quosure
as_inlined_function <- function(f, env, ...) {
  # Process unquote operator at inlining time
  f <- expr_interp(f)

  # Transform to a purrr-like lambda
  fn <- as_function(f, env = env)

  body(fn) <- expr({
    # Force all arguments
    base::pairlist(...)

    # Transform the lambda body into a maskable quosure inheriting
    # from the execution environment
    `_quo` <- rlang::quo(!!body(fn))

    # Evaluate the quosure in the mask
    rlang::eval_bare(`_quo`, base::parent.frame())
  })

  structure(fn, class = "inline_colwise_function", formula = f)
}

Try the dplyr package in your browser

Any scripts or data that you put into this service are public.

dplyr documentation built on July 4, 2019, 5:08 p.m.