R/colwise.R

#' Operate on a selection of variables
#'
#' The variants suffixed with `_if`, `_at` or `_all` apply an
#' expression (sometimes several) to all variables within a specified
#' subset. This subset can contain all variables (`_all` variants), a
#' [vars()] selection (`_at` variants), or variables selected with a
#' predicate (`_if` variants).
#'
#' The verbs with scoped variants are:
#'
#' * [mutate()], [transmute()] and [summarise()]. See [summarise_all()].
#' * [filter()]. See [filter_all()].
#' * [group_by()]. See [group_by_all()].
#' * [rename()] and [select()]. See [select_all()].
#' * [arrange()]. See [arrange_all()]
#'
#' There are three kinds of scoped variants. They differ in the scope
#' of the variable selection on which operations are applied:
#'
#' * Verbs suffixed with `_all()` apply an operation on all variables.
#'
#' * Verbs suffixed with `_at()` apply an operation on a subset of
#'   variables specified with the quoting function [vars()]. This
#'   quoting function accepts [tidyselect::vars_select()] helpers like
#'   [starts_with()]. Instead of a [vars()] selection, you can also
#'   supply an [integerish][rlang::is_integerish] vector of column
#'   positions or a character vector of column names.
#'
#' * Verbs suffixed with `_if()` apply an operation on the subset of
#'   variables for which a predicate function returns `TRUE`. Instead
#'   of a predicate function, you can also supply a logical vector.
#'
#' @param .tbl A `tbl` object.
#' @param .funs List of function calls generated by [funs()], or a
#'   character vector of function names, or simply a function.
#'
#'   Bare formulas are passed to [rlang::as_function()] to create
#'   purrr-style lambda functions. Note that these lambda prevent
#'   hybrid evaluation from happening and it is thus more efficient to
#'   supply functions like `mean()` directly rather than in a
#'   lambda-formula.
#' @param .vars A list of columns generated by [vars()],
#'   a character vector of column names, a numeric vector of column
#'   positions, or `NULL`.
#' @param .predicate A predicate function to be applied to the columns
#'   or a logical vector. The variables for which `.predicate` is or
#'   returns `TRUE` are selected. This argument is passed to
#'   [rlang::as_function()] and thus supports quosure-style lambda
#'   functions and strings representing function names.
#' @param ... Additional arguments for the function calls in
#'   `.funs`. These are evaluated only once, with [tidy
#'   dots][rlang::tidy-dots] support.
#' @name scoped
NULL


#' Select variables
#'
#' This helper is intended to provide equivalent semantics to
#' [select()]. It is used for instance in scoped summarising and
#' mutating verbs ([mutate_at()] and [summarise_at()]).
#'
#' Note that verbs accepting a `vars()` specification also accept an
#' [integerish][rlang::is_integerish] vector of positions or a
#' character vector of column names.
#'
#' @param ... Variables to include/exclude in mutate/summarise. You
#'   can use same specifications as in [select()]. If missing,
#'   defaults to all non-grouping variables.
#'
#'   These arguments are automatically [quoted][rlang::quo] and later
#'   [evaluated][rlang::eval_tidy] in the context of the data
#'   frame. They support [unquoting][rlang::quasiquotation]. See
#'   `vignette("programming")` for an introduction to these concepts.
#' @seealso [funs()], [all_vars()] and [any_vars()] for other quoting
#'   functions that you can use with scoped verbs.
#' @export
vars <- function(...) {
  quos(...)
}

#' Apply predicate to all variables
#'
#' These quoting functions signal to scoped filtering verbs
#' (e.g. [filter_if()] or [filter_all()]) that a predicate expression
#' should be applied to all relevant variables. The `all_vars()`
#' variant takes the intersection of the predicate expressions with
#' `&` while the `any_vars()` variant takes the union with `|`.
#'
#' @param expr A predicate expression. This variable supports
#'   [unquoting][rlang::quasiquotation] and will be evaluated in the
#'   context of the data frame. It should return a logical vector.
#'
#'   This argument is automatically [quoted][rlang::quo] and later
#'   [evaluated][rlang::eval_tidy] in the context of the data
#'   frame. It supports [unquoting][rlang::quasiquotation]. See
#'   `vignette("programming")` for an introduction to these concepts.
#' @seealso [funs()] and [vars()] for other quoting functions that you
#'   can use with scoped verbs.
#' @export
all_vars <- function(expr) {
  set_attrs(enquo(expr), class = c("all_vars", "quosure", "formula"))
}
#' @rdname all_vars
#' @export
any_vars <- function(expr) {
  set_attrs(enquo(expr), class = c("any_vars", "quosure", "formula"))
}
#' @export
print.all_vars <- function(x, ...) {
  cat("<predicate intersection>\n")
  NextMethod()
}
#' @export
print.any_vars <- function(x, ...) {
  cat("<predicate union>\n")
  NextMethod()
}


# Requires tbl_vars() method
tbl_at_vars <- function(tbl, vars, .include_group_vars = FALSE) {
  if (.include_group_vars) {
    tibble_vars <- tbl_vars(tbl)
  } else {
    tibble_vars <- tbl_nongroup_vars(tbl)
  }

  if (is_null(vars)) {
    character()
  } else if (is_character(vars)) {
    vars
  } else if (is_integerish(vars)) {
    tibble_vars[vars]
  } else if (is_quosures(vars)) {
    out <- tidyselect::vars_select(tibble_vars, !!!vars)
    if (!any(have_name(vars))) {
      names(out) <- NULL
    }
    out
  } else {
    bad_args(".vars", "must be a character/numeric vector or a `vars()` object, ",
      "not {type_of(vars)}"
    )
  }
}
tbl_at_syms <- function(tbl, vars, .include_group_vars = FALSE) {
  vars <- tbl_at_vars(tbl, vars, .include_group_vars = .include_group_vars)
  set_names(syms(vars), names(vars))
}

# Requires tbl_vars(), `[[`() and length() methods
tbl_if_vars <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE) {
  if (.include_group_vars) {
    tibble_vars <- tbl_vars(.tbl)
  } else {
    tibble_vars <- tbl_nongroup_vars(.tbl)
  }

  if (is_logical(.p)) {
    stopifnot(length(.p) == length(tibble_vars))
    return(syms(tibble_vars[.p]))
  }

  if (inherits(.tbl, "tbl_lazy")) {
    inform("Applying predicate on the first 100 rows")
    .tbl <- collect(.tbl, n = 100)
  }

  if (is_fun_list(.p)) {
    if (length(.p) != 1) {
      bad_args(".predicate", "must have length 1, not {length(.p)}")
    }
    .p <- .p[[1]]
  }
  if (is_quosure(.p)) {
    .p <- quo_as_function(.p)
  } else {
    .p <- as_function(.p, .env)
  }

  n <- length(tibble_vars)
  selected <- lgl_len(n)
  for (i in seq_len(n)) {
    selected[[i]] <- .p(.tbl[[tibble_vars[[i]]]], ...)
  }

  tibble_vars[selected]
}
tbl_if_syms <- function(.tbl, .p, .env, ..., .include_group_vars = FALSE) {
  syms(tbl_if_vars(.tbl, .p, .env, ..., .include_group_vars = .include_group_vars))
}
olascodgreat/samife documentation built on May 13, 2019, 6:11 p.m.