Nothing
#' Pivot data from wide to long
#'
#' @description
#' This is a method for the tidyr `pivot_longer()` generic. It is translated to
#' [data.table::melt()]
#'
#' @param data A [lazy_dt()].
#' @inheritParams tidyr::pivot_longer
#' @param names_ptypes,names_transform,values_ptypes,values_transform
#' Not currently supported by dtplyr.
#' @examples
#' library(tidyr)
#'
#' # Simplest case where column names are character data
#' relig_income_dt <- lazy_dt(relig_income)
#' relig_income_dt %>%
#' pivot_longer(!religion, names_to = "income", values_to = "count")
#'
#' # Slightly more complex case where columns have common prefix,
#' # and missing missings are structural so should be dropped.
#' billboard_dt <- lazy_dt(billboard)
#' billboard %>%
#' pivot_longer(
#' cols = starts_with("wk"),
#' names_to = "week",
#' names_prefix = "wk",
#' values_to = "rank",
#' values_drop_na = TRUE
#' )
#'
#' # Multiple variables stored in column names
#' lazy_dt(who) %>%
#' pivot_longer(
#' cols = new_sp_m014:newrel_f65,
#' names_to = c("diagnosis", "gender", "age"),
#' names_pattern = "new_?(.*)_(.)(.*)",
#' values_to = "count"
#' )
#'
#' # Multiple observations per row
#' anscombe_dt <- lazy_dt(anscombe)
#' anscombe_dt %>%
#' pivot_longer(
#' everything(),
#' names_to = c(".value", "set"),
#' names_pattern = "(.)(.)"
#' )
# exported onLoad
pivot_longer.dtplyr_step <- function(data,
cols,
names_to = "name",
names_prefix = NULL,
names_sep = NULL,
names_pattern = NULL,
names_ptypes = NULL,
names_transform = NULL,
names_repair = "check_unique",
values_to = "value",
values_drop_na = FALSE,
values_ptypes = NULL,
values_transform = NULL,
...) {
if (!is.null(names_ptypes)) {
abort("`names_ptypes` is not supported by dtplyr")
}
if (!is.null(names_transform)) {
abort("`names_transform` is not supported by dtplyr")
}
if (!is.null(values_ptypes)) {
abort("`values_ptypes` is not supported by dtplyr")
}
if (!is.null(values_transform)) {
abort("`values_transform` is not supported by dtplyr")
}
measure_vars <- names(tidyselect::eval_select(enquo(cols), data))
if (length(measure_vars) == 0) {
abort("`cols` must select at least one column.")
}
multiple_names_to <- length(names_to) > 1
uses_dot_value <- ".value" %in% names_to
variable_name <- "variable"
if (uses_dot_value) {
if (!is.null(names_sep)) {
names_to_setup <- str_separate(measure_vars, into = names_to, sep = names_sep)
} else if (!is.null(names_pattern)) {
names_to_setup <- str_extract(measure_vars, into = names_to, names_pattern)
} else {
abort("If you use '.value' in `names_to` you must also supply
`names_sep' or `names_pattern")
}
.value <- names_to_setup$.value
v_fct <- factor(.value, levels = unique(.value))
measure_vars <- split(measure_vars, v_fct)
values_to <- names(measure_vars)
names(measure_vars) <- NULL
if (multiple_names_to) {
variable_name <- names_to[!names_to == ".value"]
.value_ids <- split(names_to_setup[[variable_name]], v_fct)
.value_id <- .value_ids[[1]]
# Make sure data is "balanced"
# https://github.com/Rdatatable/data.table/issues/2575
# The list passed to measure.vars also needs the same number of column names per element
equal_ids <- map_lgl(
.value_ids[-1],
function(.x) isTRUE(all.equal(.value_id, .x))
)
if (all(equal_ids)) {
.value_id <- vctrs::vec_rep_each(.value_id, length(pull(data)))
} else {
abort("`data.table::melt()` doesn't currently support melting of unbalanced datasets.")
}
}
} else if (multiple_names_to) {
if (is.null(names_sep) && is.null(names_pattern)) {
abort("If you supply multiple names in `names_to` you must also
supply `names_sep` or `names_pattern`")
} else if (!is.null(names_sep) && !is.null(names_pattern)) {
abort("only one of names_sep or names_pattern should be provided")
}
} else {
variable_name <- names_to
}
args <- list(
measure.vars = measure_vars,
variable.name = variable_name,
value.name = values_to,
na.rm = values_drop_na,
variable.factor = FALSE
)
# Clean up call args if defaults are used
if (variable_name == "variable") {
args$variable.name <- NULL
}
if (identical(values_to, "value")) {
args$value.name <- NULL
}
if (is_false(values_drop_na)) {
args$na.rm <- NULL
}
id_vars <- setdiff(data$vars, unlist(measure_vars))
out <- step_call(
data,
"melt",
args = args,
vars = c(id_vars, variable_name, values_to)
)
if (!is.null(names_prefix)) {
out <- mutate(out, !!variable_name := gsub(paste0("^", names_prefix), "", !!sym(variable_name)))
}
if (multiple_names_to && uses_dot_value) {
out <- mutate(out, !!variable_name := !!.value_id)
} else if (multiple_names_to && !uses_dot_value) {
if (!is.null(names_sep)) {
into_cols <- str_separate(pull(out, !!sym(variable_name)), names_to, sep = names_sep)
} else {
into_cols <- str_extract(pull(out, !!sym(variable_name)), into = names_to, regex = names_pattern)
}
out <- mutate(out, !!!into_cols)
# Need to drop variable_name and move names_to vars to correct position
# Recreates relocate logic so only select is necessary, not relocate + select
out_vars <- out$vars
var_idx <- which(out_vars == variable_name)
before_vars <- out_vars[seq_along(out_vars) < var_idx]
after_vars <- out_vars[seq_along(out_vars) > var_idx]
out <- select(out, !!!syms(before_vars), !!!syms(names_to), !!!syms(after_vars))
} else if (!multiple_names_to && uses_dot_value) {
out <- mutate(out, variable = NULL)
}
step_repair(out, repair = names_repair)
}
# ==============================================================================
# inlined from tidyr
# https://github.com/tidyverse/tidyr/issues/1103
# ==============================================================================
# nocov start
# str_extract() -----------------------------------------------------------------
str_extract <- function(x, into, regex, convert = FALSE) {
stopifnot(
is_string(regex),
is_character(into)
)
out <- str_match_first(x, regex)
if (length(out) != length(into)) {
stop(
"`regex` should define ", length(into), " groups; ", ncol(out), " found.",
call. = FALSE
)
}
# Handle duplicated names
if (anyDuplicated(into)) {
pieces <- split(out, into)
into <- names(pieces)
out <- lapply(pieces, pmap_chr, paste0, sep = "")
}
into <- as_utf8_character(into)
non_na_into <- !is.na(into)
out <- out[non_na_into]
names(out) <- into[non_na_into]
if (convert) {
out[] <- lapply(out, utils::type.convert, as.is = TRUE)
}
out
}
str_match_first <- function(string, regex) {
loc <- regexpr(regex, string, perl = TRUE)
loc <- group_loc(loc)
out <- lapply(
seq_len(loc$matches),
function(i) substr(string, loc$start[, i], loc$end[, i])
)
out[-1]
}
group_loc <- function(x) {
start <- cbind(as.vector(x), attr(x, "capture.start"))
end <- start + cbind(attr(x, "match.length"), attr(x, "capture.length")) - 1L
no_match <- start == -1L
start[no_match] <- NA
end[no_match] <- NA
list(matches = ncol(start), start = start, end = end)
}
# str_separate() -----------------------------------------------------------------
str_separate <- function(x, into, sep, convert = FALSE, extra = "warn", fill = "warn") {
if (!is.character(into)) {
abort("`into` must be a character vector")
}
if (is.numeric(sep)) {
out <- strsep(x, sep)
} else if (is_character(sep)) {
out <- data.table::tstrsplit(x, sep, fixed = TRUE, names = TRUE)
out <- as_tibble(out)
} else {
abort("`sep` must be either numeric or character")
}
names(out) <- as_utf8_character(into)
out <- out[!is.na(names(out))]
if (convert) {
out[] <- lapply(out, utils::type.convert, as.is = TRUE)
}
out
}
strsep <- function(x, sep) {
nchar <- nchar(x)
pos <- lapply(sep, function(i) {
if (i >= 0) return(i)
pmax(0, nchar + i)
})
pos <- c(list(0), pos, list(nchar))
lapply(1:(length(pos) - 1), function(i) {
substr(x, pos[[i]] + 1, pos[[i + 1]])
})
}
str_split_n <- function(x, pattern, n_max = -1) {
if (is.factor(x)) {
x <- as.character(x)
}
m <- gregexpr(pattern, x, perl = TRUE)
if (n_max > 0) {
m <- lapply(m, function(x) slice_match(x, seq_along(x) < n_max))
}
regmatches(x, m, invert = TRUE)
}
slice_match <- function(x, i) {
structure(
x[i],
match.length = attr(x, "match.length")[i],
index.type = attr(x, "index.type"),
useBytes = attr(x, "useBytes")
)
}
list_indices <- function(x, max = 20) {
if (length(x) > max) {
x <- c(x[seq_len(max)], "...")
}
paste(x, collapse = ", ")
}
# pmap_chr() -----------------------------------------------------------------
pmap_chr <- function(.l, .f, ...) {
as.character(pmap(.l, .f, ...))
}
# nocov end
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.