#' Legacy versions of `nest()` and `unnest()`
#'
#' @description
#' `r lifecycle::badge("superseded")`
#'
#' tidyr 1.0.0 introduced a new syntax for [nest()] and [unnest()]. The majority
#' of existing usage should be automatically translated to the new syntax with a
#' warning. However, if you need to quickly roll back to the previous behaviour,
#' these functions provide the previous interface. To make old code work as is,
#' add the following code to the top of your script:
#'
#' ```
#' library(tidyr)
#' nest <- nest_legacy
#' unnest <- unnest_legacy
#' ```
#'
#' @param data A data frame.
#' @param .key The name of the new column, as a string or symbol. This argument
#' is passed by expression and supports
#' [quasiquotation][rlang::quasiquotation] (you can unquote strings and
#' symbols). The name is captured from the expression with [rlang::ensym()]
#' (note that this kind of interface where symbols do not represent actual
#' objects is now discouraged in the tidyverse; we support it here for
#' backward compatibility).
#' @param ... Specification of columns to unnest. Use bare variable names or
#' functions of variables. If omitted, defaults to all list-cols.
#' @param .drop Should additional list columns be dropped? By default,
#' `unnest()` will drop them if unnesting the specified columns requires the
#' rows to be duplicated.
#' @param .preserve Optionally, list-columns to preserve in the output. These
#' will be duplicated in the same way as atomic vectors. This has
#' [dplyr::select()] semantics so you can preserve multiple variables with
#' `.preserve = c(x, y)` or `.preserve = starts_with("list")`.
#' @param .id Data frame identifier - if supplied, will create a new column with
#' name `.id`, giving a unique identifier. This is most useful if the list
#' column is named.
#' @param .sep If non-`NULL`, the names of unnested data frame columns will
#' combine the name of the original list-col with the names from the nested
#' data frame, separated by `.sep`.
#' @export
#' @examples
#' # Nest and unnest are inverses
#' df <- tibble(x = c(1, 1, 2), y = 3:1)
#' df %>% nest_legacy(y)
#' df %>% nest_legacy(y) %>% unnest_legacy()
#'
#' # nesting -------------------------------------------------------------------
#' as_tibble(iris) %>% nest_legacy(!Species)
#' as_tibble(chickwts) %>% nest_legacy(weight)
#'
#' # unnesting -----------------------------------------------------------------
#' df <- tibble(
#' x = 1:2,
#' y = list(
#' tibble(z = 1),
#' tibble(z = 3:4)
#' )
#' )
#' df %>% unnest_legacy(y)
#'
#' # You can also unnest multiple columns simultaneously
#' df <- tibble(
#' a = list(c("a", "b"), "c"),
#' b = list(1:2, 3),
#' c = c(11, 22)
#' )
#' df %>% unnest_legacy(a, b)
#' # If you omit the column names, it'll unnest all list-cols
#' df %>% unnest_legacy()
nest_legacy <- function(data, ..., .key = "data") {
UseMethod("nest_legacy")
}
#' @importFrom utils packageVersion
#' @export
nest_legacy.tbl_df <- function(data, ..., .key = "data") {
key_var <- as_string(ensym(.key))
nest_vars <- unname(tidyselect::vars_select(names(data), ...))
if (is_empty(nest_vars)) {
nest_vars <- names(data)
}
if (dplyr::is_grouped_df(data)) {
group_vars <- dplyr::group_vars(data)
} else {
group_vars <- setdiff(names(data), nest_vars)
}
nest_vars <- setdiff(nest_vars, group_vars)
data <- dplyr::ungroup(data)
if (is_empty(group_vars)) {
return(tibble(!!key_var := list(data)))
}
out <- dplyr::select(data, !!!syms(group_vars))
grouped_data <- dplyr::group_by(data, !!!syms(group_vars), .drop = TRUE)
idx <- dplyr::group_indices(grouped_data)
representatives <- which(!duplicated(idx))
out <- dplyr::slice(out, representatives)
out[[key_var]] <- unname(split(data[nest_vars], idx))[unique(idx)]
out
}
#' @export
nest_legacy.data.frame <- function(data, ..., .key = "data") {
.key <- enquo(.key)
# Don't transform subclasses
if (identical(class(data), "data.frame")) {
data <- tibble::as_tibble(data)
}
nest_legacy.tbl_df(data, ..., .key = !!.key)
}
#' @export
#' @rdname nest_legacy
unnest_legacy <- function(data, ..., .drop = NA, .id = NULL, .sep = NULL, .preserve = NULL) {
UseMethod("unnest_legacy")
}
#' @export
unnest_legacy.data.frame <- function(data, ..., .drop = NA, .id = NULL,
.sep = NULL, .preserve = NULL) {
preserve <- tidyselect::vars_select(names(data), !!enquo(.preserve))
quos <- quos(...)
if (is_empty(quos)) {
list_cols <- names(data)[map_lgl(data, is_list)]
list_cols <- setdiff(list_cols, preserve)
quos <- syms(list_cols)
}
if (length(quos) == 0) {
return(data)
}
nested <- dplyr::transmute(dplyr::ungroup(data), !!!quos)
n <- map(nested, function(x) unname(map_int(x, NROW)))
if (length(unique(n)) != 1) {
cli::cli_abort("All nested columns must have the same number of elements.")
}
types <- map_chr(nested, list_col_type)
nest_types <- split.default(nested, types)
if (length(nest_types$mixed) > 0) {
probs <- names(nest_types$mixed)
cli::cli_abort(c(
"Each column must either be a list of vectors or a list of data frames.",
i = "Problems in: {.var {probs}}"
))
}
unnested_atomic <- imap(nest_types$atomic %||% list(), enframe, .id = .id)
if (length(unnested_atomic) > 0) {
unnested_atomic <- dplyr::bind_cols(unnested_atomic)
}
unnested_dataframe <- map(nest_types$dataframe %||% list(), function(.) {
if (length(.) == 0L) {
attr(., "ptype") %||% data.frame()
} else {
dplyr::bind_rows(., .id = .id)
}
})
if (!is_null(.sep)) {
unnested_dataframe <- imap(
unnested_dataframe,
function(df, name) {
set_names(df, paste(name, names(df), sep = .sep))
}
)
}
if (length(unnested_dataframe) > 0) {
unnested_dataframe <- dplyr::bind_cols(unnested_dataframe)
}
# Keep list columns by default, only if the rows aren't expanded
if (identical(.drop, NA)) {
n_in <- nrow(data)
if (length(unnested_atomic)) {
n_out <- nrow(unnested_atomic)
} else {
n_out <- nrow(unnested_dataframe)
}
.drop <- n_out != n_in
}
if (.drop) {
is_atomic <- map_lgl(data, is_atomic)
group_vars <- names(data)[is_atomic]
} else {
group_vars <- names(data)
}
group_vars <- setdiff(group_vars, names(nested))
# Add list columns to be preserved
group_vars <- union(group_vars, preserve)
rest <- data[rep(seq_nrow(data), n[[1]]), group_vars, drop = FALSE]
out <- dplyr::bind_cols(rest, unnested_atomic, unnested_dataframe)
reconstruct_tibble(data, out)
}
list_col_type <- function(x) {
is_data_frame <- is.data.frame(attr(x, "ptype", exact = TRUE)) || (is.list(x) && all(map_lgl(x, is.data.frame)))
is_atomic <- all(map_lgl(x, function(x) is_atomic(x) || (is_list(x) && !is.object(x))))
if (is_data_frame) {
"dataframe"
} else if (is_atomic) {
"atomic"
} else {
"mixed"
}
}
enframe <- function(x, col_name, .id = NULL) {
if (!is_list(x)) {
x <- list(x)
}
col <- unname(x)
col <- list_unchop(col)
out <- set_names(list(col), col_name)
out <- as_tibble(out)
if (!is_null(.id)) {
out[[.id]] <- id_col(x)
}
out
}
id_col <- function(x) {
stopifnot(is_list(x))
ids <- if (is_null(names(x))) seq_along(x) else names(x)
lengths <- map_int(x, length)
ids[rep(seq_along(ids), lengths)]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.