#' Create or transform variables
#'
#' `mutate()` adds new variables and preserves existing ones; `transmute()` adds new variables and drops existing ones.
#' Both functions preserve the number of rows of the input. New variables overwrite existing variables of the same name.
#' Variables can be removed by setting their value to `NULL`.
#'
#' @section Useful mutate functions:
#'
#' * [`+`], [`-`], [log()], etc., for their usual mathematical meanings
#'
#' * [lead()], [lag()]
#'
#' * [dense_rank()], [min_rank()], [percent_rank()], [row_number()], [cume_dist()], [ntile()]
#'
#' * [cumsum()], [cummin()], [cummax()]
#'
#' * [na_if()], [coalesce()]
#'
#' * [if_else()], [recode()], [case_when()]
#'
#' @param .data A `data.frame`.
#' @param ... Name-value pairs of expressions, each with length `1L`. The name of each argument will be the name of a
#' new column and the value will be its corresponding value. Use a `NULL` value in `mutate` to drop a variable. New
#' variables overwrite existing variables of the same name.
#'
#' @examples
#' mutate(mtcars, mpg2 = mpg * 2)
#' mtcars %>% mutate(mpg2 = mpg * 2)
#' mtcars %>% mutate(mpg2 = mpg * 2, cyl2 = cyl * 2)
#'
#' # Newly created variables are available immediately
#' mtcars %>% mutate(mpg2 = mpg * 2, mpg4 = mpg2 * 2)
#'
#' # You can also use mutate() to remove variables and modify existing variables
#' mtcars %>% mutate(
#' mpg = NULL,
#' disp = disp * 0.0163871 # convert to litres
#' )
#'
#' # By default, new columns are placed on the far right.
#' # You can override this with `.before` or `.after`.
#' df <- data.frame(x = 1, y = 2)
#' df %>% mutate(z = x + y)
#' df %>% mutate(z = x + y, .before = 1)
#' df %>% mutate(z = x + y, .after = x)
#'
#' # By default, mutate() keeps all columns from the input data.
#' # You can override with `.keep`
#' df <- data.frame(
#' x = 1, y = 2, a = "a", b = "b",
#' stringsAsFactors = FALSE
#' )
#' df %>% mutate(z = x + y, .keep = "all") # the default
#' df %>% mutate(z = x + y, .keep = "used")
#' df %>% mutate(z = x + y, .keep = "unused")
#' df %>% mutate(z = x + y, .keep = "none") # same as transmute()
#'
#' # mutate() vs transmute --------------------------
#' # mutate() keeps all existing variables
#' mtcars %>%
#' mutate(displ_l = disp / 61.0237)
#'
#' # transmute keeps only the variables you create
#' mtcars %>%
#' transmute(displ_l = disp / 61.0237)
#'
#' @name mutate
#' @export
mutate <- function(.data, ...) {
UseMethod("mutate")
}
#' @rdname mutate
#' @param .keep This argument allows you to control which columns from `.data` are retained in the output:
#'
#' * `"all"`, the default, retains all variables.
#' * `"used"` keeps any variables used to make new variables; it's useful for checking your work as it displays inputs
#' and outputs side-by-side.
#' * `"unused"` keeps only existing variables **not** used to make new variables.
#' * `"none"`, only keeps grouping keys (like [transmute()]).
#'
#' Grouping variables are always kept, unconditional to `.keep`.
#' @param .before,.after <[`poor-select`][select_helpers]> Optionally, control where new columns should appear (the
#' default is to add to the right hand side). See [relocate()] for more details.
#' @export
mutate.data.frame <- function(
.data,
...,
.keep = c("all", "used", "unused", "none"),
.before = NULL,
.after = NULL
) {
keep <- match.arg(arg = .keep, choices = c("all", "used", "unused", "none"), several.ok = FALSE)
res <- mutate_df(.data = .data, ...)
data <- res$data
new_cols <- res$new_cols
.before <- substitute(.before)
.after <- substitute(.after)
if (!is.null(.before) || !is.null(.after)) {
new <- setdiff(new_cols, names(.data))
data <- do.call(relocate, c(list(.data = data), new, .before = .before, .after = .after))
}
if (keep == "all") {
data
} else if (keep == "unused") {
unused <- setdiff(colnames(.data), res$used_cols)
keep <- intersect(colnames(data), c(group_vars(.data), unused, new_cols))
select(.data = data, keep)
} else if (keep == "used") {
keep <- intersect(colnames(data), c(group_vars(.data), res$used_cols, new_cols))
select(.data = data, keep)
} else if (keep == "none") {
keep <- c(setdiff(group_vars(.data), new_cols), intersect(new_cols, colnames(data)))
select(.data = data, keep)
}
}
#' @export
mutate.grouped_df <- function(.data, ...) {
context$group_env <- parent.frame(n = 1)
on.exit(rm(list = c("group_env"), envir = context), add = TRUE)
rows <- rownames(.data)
res <- apply_grouped_function("mutate", .data, drop = TRUE, ...)
res[rows, , drop = FALSE]
}
## -- Helpers ----------------------------------------------------------------------------------------------------------
mutate_df <- function(.data, ...) {
conditions <- dotdotdot(..., .impute_names = TRUE)
cond_nms <- names(dotdotdot(..., .impute_names = FALSE))
if (length(conditions) == 0L) {
return(list(
data = .data,
used_cols = NULL,
new_cols = NULL
))
}
used <- unname(do.call(c, lapply(conditions, find_used)))
used <- used[used %in% colnames(.data)]
context$setup(.data)
on.exit(context$clean(), add = TRUE)
for (i in seq_along(conditions)) {
not_named <- (is.null(cond_nms) || cond_nms[i] == "")
res <- eval(
conditions[[i]],
envir = context$as_env(),
enclos = if (!is.null(context$group_env)) context$group_env else parent.frame(n = 2)
)
res_nms <- names(res)
if (is.data.frame(res)) {
if (not_named) {
context$.data[, res_nms] <- res
} else {
context$.data[[cond_nms[i]]] <- res
}
} else if (is.atomic(res)) {
cond_nms[i] <- names(conditions)[[i]]
context$.data[[cond_nms[i]]] <- res
} else {
context$.data[[names(conditions)[[i]]]] <- res
}
}
list(
data = context$.data,
used_cols = used,
new_cols = cond_nms
)
}
#' Recursively find used variables in an expression
#'
#' @param expr An expression.
#'
#' @return `character(n)`.
#'
#' @examples
#' \dontrun{
#' expr <- quote(x * var + ifelse(x < y, 1, 0) + n())
#' find_used(expr)
#' }
#'
#' @author Mark T Fairbanks, \email{mark.t.fairbanks@@gmail.com}
#'
#' @noRd
find_used <- function(expr) {
if (is.symbol(expr)) {
as.character(expr)
} else {
unique(unlist(lapply(expr[-1], find_used)))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.