#' Pivot data from long to wide
#'
#' `pivot_wider()` "widens" data, increasing the number of columns and decreasing the number of rows. The inverse
#' transformation is [pivot_longer()].
#'
#' @param data `data.frame`. The data to pivot.
#' @param id_cols `character(1)`. The name of the column that identifies the rows. If `NULL`, it will use all the unique
#' rows.
#' @param names_from `character(n)`. The name of the column(s) that contains the levels to be used as future column
#' names.
#' @param names_prefix `character(1)`. String added to the start of every variable name. This is particularly useful if
#' `names_from` is a numeric vector and you want to create syntactic variable names.
#' @param names_sep `character(1)`. If `names_from` or `values_from` contains multiple variables, this will be used to
#' join their values together into a single string to use as a column name.
#' @param names_glue `character(1)`. Instead of `names_sep` and `names_prefix`, you can supply a
#' [glue specification](https://glue.tidyverse.org/index.html) that uses the `names_from` columns to create custom
#' column names. Note that the only delimiters supported by `names_glue` are curly brackets, `{` and `}`.
#' @param values_from `character(n)`. The name of the column that contains the values to be used as future variable
#' values.
#' @param values_fill `numeric(n)`. Optionally, a (scalar) value that will be used to replace missing values in the new
#' columns created.
#' @param ... Not used for now.
#'
#' @return If a tibble was provided as input, `pivot_wider()` also returns a
#' tibble. Otherwise, it returns a data frame.
#'
#' @examples
#' data_long <- read.table(header = TRUE, text = "
#' subject sex condition measurement
#' 1 M control 7.9
#' 1 M cond1 12.3
#' 1 M cond2 10.7
#' 2 F control 6.3
#' 2 F cond1 10.6
#' 2 F cond2 11.1
#' 3 F control 9.5
#' 3 F cond1 13.1
#' 3 F cond2 13.8
#' 4 M control 11.5
#' 4 M cond1 13.4
#' 4 M cond2 12.9")
#'
#'
#' pivot_wider(
#' data_long,
#' id_cols = "subject",
#' names_from = "condition",
#' values_from = "measurement"
#' )
#'
#' pivot_wider(
#' data_long,
#' id_cols = "subject",
#' names_from = "condition",
#' values_from = "measurement",
#' names_prefix = "Var.",
#' names_sep = "."
#' )
#'
#' production <- expand.grid(
#' product = c("A", "B"),
#' country = c("AI", "EI"),
#' year = 2000:2014
#' ) %>%
#' filter((product == "A" & country == "AI") | product == "B") %>%
#' mutate(production = rnorm(nrow(.)))
#'
#' pivot_wider(
#' production,
#' names_from = c("product", "country"),
#' values_from = "production",
#' names_glue = "prod_{product}_{country}"
#' )
#'
#' @export
pivot_wider <- function(
data,
id_cols = NULL,
values_from = "Value",
names_from = "Name",
names_sep = "_",
names_prefix = "",
names_glue = NULL,
values_fill = NULL,
...
) {
old_names <- names(data)
names_from <- names(eval_select_pos(data, substitute(names_from)))
values_from <- names(eval_select_pos(data, substitute(values_from)))
# Preserve attributes
variable_attr <- lapply(data, attributes)
# Create an id for stats::reshape
if (is.null(id_cols)) {
row_index <- do.call(
paste,
c(data[, !names(data) %in% c(values_from, names_from), drop = FALSE], sep = "_")
)
if (length(row_index) == 0) row_index <- rep("", nrow(data))
data[["_Rows"]] <- row_index
id_cols <- "_Rows"
}
# create pattern of column names - stats::reshape renames columns that concatenates "v.names" + values - we only want
# values
current_colnames <- colnames(data)
current_colnames <- current_colnames[current_colnames != "_Rows"]
if (is.null(names_glue)) {
future_colnames <- unique(do.call(paste, c(data[, names_from, drop = FALSE], sep = names_sep)))
} else {
vars <- regmatches(names_glue, gregexpr("\\{\\K[^{}]+(?=\\})", names_glue, perl = TRUE))[[1]]
tmp_data <- unique(data[, vars])
future_colnames <- unique(apply(tmp_data, 1, function(x) {
tmp_vars <- list()
for (i in seq_along(vars)) {
tmp_vars[[i]] <- x[vars[i]]
}
tmp_colname <- gsub("\\{\\K[^{}]+(?=\\})", "", names_glue, perl = TRUE)
tmp_colname <- gsub("\\{\\}", "%s", tmp_colname)
do.call(sprintf, c(fmt = tmp_colname, tmp_vars))
}))
}
# stop if some column names would be duplicated (follow tidyr workflow)
if (any(future_colnames %in% current_colnames)) {
stop(
paste0(
"Some values of the columns specified in 'names_from' are already present
as column names. Either use `name_prefix` or rename the following columns: ",
paste(current_colnames[which(current_colnames %in% future_colnames)], sep = ", ")
),
call. = FALSE
)
}
# stats::reshape works strangely when several variables are in idvar/timevar so we unite all ids in a single temporary
# column that will be used by stats::reshape
data$new_time <- do.call(paste, c(data[, names_from, drop = FALSE], sep = "_"))
data[, names_from] <- NULL
wide <- stats::reshape(
data,
v.names = values_from,
idvar = id_cols,
timevar = "new_time",
sep = names_sep,
direction = "wide"
)
# Clean
if ("_Rows" %in% names(wide)) wide[["_Rows"]] <- NULL
rownames(wide) <- NULL
if (length(values_from) == 1) {
to_rename <- which(startsWith(names(wide), paste0(values_from, names_sep)))
names(wide)[to_rename] <- future_colnames
}
# Order columns as in tidyr
if (length(values_from) > 1) {
for (i in values_from) {
tmp1 <- wide[, which(!startsWith(names(wide), i))]
tmp2 <- wide[, which(startsWith(names(wide), i))]
wide <- cbind(tmp1, tmp2)
# TODO: Fix relocate() when using numeric values for .before and .after
# wide <- relocate(wide, starts_with(i), .after = -1)
}
}
new_cols <- setdiff(names(wide), old_names)
names(wide)[which(names(wide) %in% new_cols)] <- paste0(names_prefix, new_cols)
# Fill missing values
if (!is.null(values_fill)) {
if (length(values_fill) == 1) {
if (is.numeric(wide[[new_cols[1]]])) {
if (!is.numeric(values_fill)) {
stop(paste0("`values_fill` must be of type numeric."), call. = FALSE)
} else {
for (i in new_cols) {
wide[[i]] <- replace_na(wide[[i]], replace = values_fill)
}
}
} else if (is.character(wide[[new_cols[1]]])) {
if (!is.character(values_fill)) {
stop(paste0("`values_fill` must be of type character."), call. = FALSE)
} else {
for (i in new_cols) {
wide[[i]] <- replace_na(wide[[i]], replace = values_fill)
}
}
} else if (is.factor(wide[[new_cols[1]]])) {
if (!is.factor(values_fill)) {
stop(paste0("`values_fill` must be of type factor."), call. = FALSE)
} else {
for (i in new_cols) {
wide[[i]] <- replace_na(wide[[i]], replace = values_fill)
}
}
}
} else {
stop("`values_fill` must be of length 1.", call. = FALSE)
}
}
# Remove reshape attributes
attributes(wide)$reshapeWide <- NULL
# add back attributes where possible
for (i in colnames(wide)) {
attributes(wide[[i]]) <- variable_attr[[i]]
}
wide
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.