to_long: Convert data to long or to wide form

View source: R/to_wide.R

to_longR Documentation

Convert data to long or to wide form

Description

to_long increases number of rows in the dataset and reduce number of columns. to_wide makes invert transformation. You can use cols for selecting variables in the arguments. See examples.

Usage

to_long(
  data,
  columns = NULL,
  keep = NULL,
  names_in = "variable",
  values_in = "value",
  drop_na = FALSE,
  names_factor = TRUE,
  value_factor = FALSE,
  ...
)

to_wide(
  data,
  keep = NULL,
  names_in = variable,
  values_in = value,
  fun = identity,
  sep = "_",
  fill = NA,
  missing_comb = c("none", "rows", "columns", "all"),
  ...
)

Arguments

data

A data.frame to convert

columns

unquoted names of variables for stacking. When missing, we will stack all columns outside keep columns.

keep

unquoted names of columns which will be kept as is, e. g. only recycled or deduplicated. If missing, it is all columns except stacked or unstacked. If FALSE then nothing will be kept.

names_in

name of the stacked variable names column. The default name is 'variable'. It is quoted in the to_long and unquoted in to_wide. If FALSE in the to_wide than nothing will be widening.

values_in

name(-s) of the stacked data values column(s). The default name is 'value'. Multiple names can be provided here for the case when columns is a list, though note well that the names provided in columns take precedence. It is quoted in the to_long and unqoted in to_wide

drop_na

If TRUE, NA values will be removed from the stacked data.

names_factor

If TRUE, the column with names will be converted to factor, else it will be a character column. TRUE by default.

value_factor

If TRUE, the value column will be converted to factor, else the stacked values type is left unchanged. FALSE by default.

...

other arguments passed to data.table::melt/data.table::dcast

fun

Should the data be aggregated before casting? By default, it is identity - no aggregation. To use multiple aggregation functions, pass a list; see Examples.

sep

Character vector of length 1, indicating the separating character in variable names generated during casting. Default is "_".

fill

Value with which to fill missing cells. NA by default. If fun is present, takes the value by applying the function on a 0-length vector.

missing_comb

One of "none" (the default), "rows" - include missing combinations in rows, "columns" - include missing combinations in columns, and "all" include all missing combinations.

Value

data.table in the wide or long form.

Examples

data(iris)

# 'to_long'

long_iris = iris %>%
    to_long(keep = Species)

long_iris

iris_with_stat = long_iris %>%
    take(mean = mean(value),
         sd = sd(value),
         n = .N*1.0,
         by = .(Species, variable)
    ) %>%
    to_long(columns = c(mean, sd, n), names_in = "stat")

# 'to_wide' - table with multiple stats
iris_with_stat %>%
    to_wide()


iris_with_stat %>%
    to_wide(names_in = c(variable, stat))

iris_with_stat %>%
    to_wide(names_in = c(variable, Species))

# 'to_wide' - aggregation function
long_iris %>%
    to_wide(fun = list(Mean = mean, SD = sd, N = length))

# multiple variables
iris %>%
    to_long(list(Sepal = cols("^Sepal"), Petal = cols("^Petal"))) %>%
    let(
        variable = factor(variable, levels = 1:2, labels = c("Length", "Width"))
    ) %>%
    to_wide(values_in = c(Sepal, Petal))

# '%to%' selector - example from tidyr::pivot_longer

data(anscombe)
anscombe %>%
    to_long(
        list(x = x1 %to% x4, y = y1 %to% y4),
        names_in = "set"
    )

######################################
## Examples from data.table melt/dcast
######################################

set.seed(45)
DT = data.table(
    i_1 = c(1:5, NA)*1.0,
    i_2 = c(NA,6,7,8,9,10)*1.0,
    f_1 = factor(sample(c(letters[1:3], NA), 6, TRUE)),
    f_2 = factor(c("z", "a", "x", "c", "x", "x"), ordered=TRUE),
    c_1 = sample(c(letters[1:3], NA), 6, TRUE),
    d_1 = as.Date(c(1:3,NA,4:5), origin="2013-09-01"),
    d_2 = as.Date(6:1, origin="2012-01-01")
)

# id, values as character/integer/numeric vectors

to_long(DT, f_1, keep = 1:2)
to_long(DT, f_1, keep = c(i_1, i_2))
to_long(DT, f_1, keep = i_1 %to% i_2)
to_long(DT, f_1, keep = cols(i_1:i_2), names_factor = FALSE)
to_long(DT, f_1, keep = cols("i_{1:2}"))
to_long(DT, f_1, keep = cols("^i_"))
to_long(DT, f_1, keep = cols("^i_"), names_in = "var", values_in = "val")

col_var = "^i_"
to_long(DT, 3, keep = cols(col_var))

to_long(DT, cols("^f_"), keep = cols("^i_"), value_factor = TRUE)

to_long(mtcars)
to_long(mtcars, keep = am)
to_long(mtcars, columns = c(am, vs, mpg))
to_long(mtcars, columns = c(am, vs, mpg), keep = FALSE)
to_long(DT, keep = f_1, columns = c(i_1, i_2), drop_na = TRUE)
to_long(DT, keep=1:2, columns = list(cols("^f_"), cols("^d_")), value_factor=TRUE)

data("ChickWeight")
names(ChickWeight) = tolower(names(ChickWeight))
DT = to_long(ChickWeight, keep=2:4)

to_wide(DT, keep = time, fun = mean)
to_wide(DT, keep = FALSE, fun = mean)
to_wide(DT, keep = diet, fun = mean)
to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all")
to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all", fill = 0)
to_wide(DT, chick, time, fun = mean)



# using FALSE
DT = data.table(v1 = rep(1:2, each = 6),
                v2 = rep(rep(1:3, 2), each = 2),
                v3 = rep(1:2, 6),
                v4 = rnorm(6))

## for each combination of (v1, v2), add up all values of v4
to_wide(DT,
        cols("^v(1|2)"),
        names_in = FALSE,
        values_in = v4,
        fun = sum
)

# multiple values_in and multiple fun
DT = data.table(x=sample(5,20,TRUE),
                y=sample(2,20,TRUE),
                z=sample(letters[1:2], 20,TRUE),
                d1 = runif(20),
                d2=1L)

# multiple values_in
to_wide(DT,
        keep = c(x, y),
        names_in = z,
        values_in = c(d1, d2),
        fun = sum,
        fill = 0)

# multiple funs
to_wide(DT,
        keep = c(x, y),
        names_in = z,
        values_in = d1,
        fun = list(sum = sum, mean = mean),
        fill = NULL)

# multiple fun and values_in (all combinations)
to_wide(DT,
        keep = c(x, y),
        names_in = z,
        values_in = c(d1, d2),
        fun = list(sum = sum, mean = mean)
)

# multiple fun and values_in (one-to-one)
to_wide(DT,
        keep = c(x, y),
        names_in = z,
        values_in = list(d1, d2),
        fun = list(sum = sum, mean = mean)
)

maditr documentation built on May 29, 2024, 3:55 a.m.