#' Pivot data from wide to long
#'
#' @description
#' `pivot_longer()` "lengthens" data, increasing the number of rows and
#' decreasing the number of columns. The inverse transformation is
#' [tidyr::pivot_wider()].
#'
#' Learn more in `vignette("pivot", "tidyr")`.
#'
#' While most functionality is identical there are some differences to
#' `pivot_longer()` on local data frames:
#' * the output is sorted differently/not explicitly,
#' * the coercion of mixed column types is left to the database,
#' * `values_ptypes` NOT supported.
#'
#' Note that `build_longer_spec()` and `pivot_longer_spec()` do not work with
#' remote tables.
#'
#' @details
#' The SQL translation basically works as follows:
#'
#' 1. split the specification by its key columns i.e. by variables crammed
#' into the column names.
#' 2. for each part in the split specification `transmute()` `data` into the
#' following columns
#' * id columns i.e. columns that are not pivotted
#' * key columns
#' * value columns i.e. columns that are pivotted
#' 3. combine all the parts with `union_all()`
#'
#' @param data A data frame to pivot.
#' @param cols Columns to pivot into longer format.
#' @param ... Additional arguments passed on to methods.
#' @param cols_vary Unsupported; included for compatibility with the generic.
#' @param names_to A string specifying the name of the column to create
#' from the data stored in the column names of `data`.
#' @param names_prefix A regular expression used to remove matching text
#' from the start of each variable name.
#' @param names_sep,names_pattern If `names_to` contains multiple values,
#' these arguments control how the column name is broken up.
#' @param names_repair What happens if the output has invalid column names?
#' @param values_to A string specifying the name of the column to create
#' from the data stored in cell values. If `names_to` is a character
#' containing the special `.value` sentinel, this value will be ignored,
#' and the name of the value column will be derived from part of the
#' existing column names.
#' @param values_drop_na If `TRUE`, will drop rows that contain only `NA`s
#' in the `value_to` column.
#' @param names_transform,values_transform A list of column name-function pairs.
#' @param names_ptypes A list of column name-prototype pairs.
#' @param values_ptypes Not supported.
#' @examplesIf rlang::is_installed("tidyr", version = "1.0.0")
#' # See vignette("pivot") for examples and explanation
#'
#' # Simplest case where column names are character data
#' memdb_frame(
#' id = c("a", "b"),
#' x = 1:2,
#' y = 3:4
#' ) %>%
#' tidyr::pivot_longer(-id)
#' @exportS3Method tidyr::pivot_longer
pivot_longer.tbl_lazy <- function(data,
cols,
...,
cols_vary,
names_to = "name",
names_prefix = NULL,
names_sep = NULL,
names_pattern = NULL,
names_ptypes = NULL,
names_transform = NULL,
names_repair = "check_unique",
values_to = "value",
values_drop_na = FALSE,
values_ptypes,
values_transform = NULL) {
check_unsupported_arg(cols_vary)
check_unsupported_arg(values_ptypes)
rlang::check_dots_empty()
cols <- enquo(cols)
spec <- tidyr::build_longer_spec(tidyselect_data_proxy(data), !!cols,
names_to = names_to,
values_to = values_to,
names_prefix = names_prefix,
names_sep = names_sep,
names_pattern = names_pattern,
names_ptypes = names_ptypes,
names_transform = names_transform,
error_call = current_env()
)
dbplyr_pivot_longer_spec(data, spec,
names_repair = names_repair,
values_drop_na = values_drop_na,
values_transform = values_transform
)
}
dbplyr_pivot_longer_spec <- function(data,
spec,
names_repair = "check_unique",
values_drop_na = FALSE,
values_transform = NULL) {
spec <- tidyr::check_pivot_spec(spec)
# .seq col needed if different input columns are mapped to the same output
# column
spec <- deduplicate_spec(spec, data)
id_cols <- syms(setdiff(colnames(data), spec$.name))
repair_info <- apply_name_repair_pivot_longer(id_cols, spec, names_repair)
id_cols <- repair_info$id_cols
spec <- repair_info$spec
spec_split <- vctrs::vec_split(spec, spec[, -(1:2)])
call <- current_env()
value_names <- unique(spec$.value)
values_transform <- check_list_of_functions(values_transform, value_names, call = call)
nms_map <- tibble(
name = colnames(spec_split$key),
name_mapped = ifelse(
name %in% unlist(spec_split$key),
paste0("..", name),
name
)
)
spec_split$key <- set_names(spec_split$key, nms_map$name_mapped)
data_long_list <- purrr::map(
vctrs::vec_seq_along(spec_split),
function(idx) {
row <- spec_split$val[[idx]][, 1:2]
keys <- spec_split$key[idx, ]
keys$.seq <- NULL
measure_cols_exprs <- get_measure_column_exprs(
row[[".name"]],
row[[".value"]],
values_transform,
call = call
)
transmute(
data,
!!!id_cols,
!!!keys,
!!!measure_cols_exprs
)
}
)
data_long <- purrr::reduce(data_long_list, union_all)
if (values_drop_na) {
value_cols <- unique(spec$.value)
data_long <- dplyr::filter_at(
data_long,
value_cols,
dplyr::all_vars(!is.na(.))
)
}
data_long %>%
rename(!!!tibble::deframe(nms_map))
}
get_measure_column_exprs <- function(name, value, values_transform, call) {
measure_cols <- set_names(syms(name), value)
purrr::imap(
measure_cols,
~ {
f_trans <- values_transform[[.y]]
if (is_null(f_trans)) {
.x
} else {
resolve_fun(f_trans, .x, call)
}
}
)
}
apply_name_repair_pivot_longer <- function(id_cols, spec, names_repair) {
# Calculates how the column names in `pivot_longer()` need to be repaired
# and applies this to the `id_cols` and the `spec` argument:
# * The names of `id_cols` are the repaired id column names
# * The `spec` columns after the third column are renamed to the repaired name
# * The entries in the `value` column of `spec` are changed to the repaired name
nms_map_df <- vctrs::vec_rbind(
tibble(from = "id_cols", name = as.character(id_cols)),
tibble(from = "measure_cols", name = colnames(spec)[-(1:2)]),
tibble(from = "value_cols", name = unique(spec[[".value"]]))
) %>%
mutate(name_rep = vctrs::vec_as_names(name, repair = names_repair))
nms_map <- split(nms_map_df, nms_map_df$from)
id_cols <- purrr::set_names(id_cols, nms_map$id_cols$name_rep)
colnames(spec)[-(1:2)] <- nms_map$measure_cols$name_rep
value_nms_map <- purrr::set_names(
nms_map$value_cols$name_rep,
nms_map$value_cols$name
)
spec$.value <- dplyr::recode(spec$.value, !!!value_nms_map)
list(id_cols = id_cols, spec = spec)
}
# The following is copy-pasted from `tidyr`
# nocov start
# Ensure that there's a one-to-one match from spec to data by adding
# a special .seq variable which is automatically removed after pivotting.
deduplicate_spec <- function(spec, df) {
# COPIED FROM tidyr
# Ensure each .name has a unique output identifier
key <- spec[setdiff(names(spec), ".name")]
if (vctrs::vec_duplicate_any(key)) {
pos <- vctrs::vec_group_loc(key)$loc
seq <- vector("integer", length = nrow(spec))
for (i in seq_along(pos)) {
seq[pos[[i]]] <- seq_along(pos[[i]])
}
spec$.seq <- seq
}
# Match spec to data, handling duplicated column names
col_id <- vctrs::vec_match(colnames(df), spec$.name)
has_match <- !is.na(col_id)
if (!vctrs::vec_duplicate_any(col_id[has_match])) {
return(spec)
}
spec <- vctrs::vec_slice(spec, col_id[has_match])
# Need to use numeric indices because names only match first
spec$.name <- seq_along(df)[has_match]
pieces <- vctrs::vec_split(seq_len(nrow(spec)), col_id[has_match])
copy <- integer(nrow(spec))
for (i in seq_along(pieces$val)) {
idx <- pieces$val[[i]]
copy[idx] <- seq_along(idx)
}
spec$.seq <- copy
spec
}
check_unique_names <- function(x, arg = caller_arg(x), call = caller_env()) {
# COPIED FROM tidyr
if (length(x) > 0L && !is_named(x)) {
cli::cli_abort("All elements of {.arg {arg}} must be named.", call = call)
}
if (vctrs::vec_duplicate_any(names(x))) {
cli::cli_abort("The names of {.arg {arg}} must be unique.", call = call)
}
}
check_list_of_functions <- function(x, names, arg = caller_arg(x), call = caller_env()) {
# mostly COPIED FROM tidyr
if (is.null(x)) {
x <- set_names(list(), character())
} else if (is.function(x) || is_formula(x)) {
x <- rep_named(names, list(x))
} else if (!vctrs::vec_is_list(x)) {
cli::cli_abort(
"{.arg {arg}} must be `NULL`, a function, or a named list of functions.",
call = call
)
}
check_unique_names(x, arg = arg, call = call)
x_names <- names(x)
# Silently drop user supplied names not found in the data
x <- x[intersect(x_names, names)]
x
}
# nocov end
utils::globalVariables(".")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.