Subassignment

knitr::opts_chunk$set(
  error = TRUE,
  collapse = TRUE,
  comment = "#>"
)

tibble:::set_dftbl_hooks()

options(
  lifecycle_disable_warnings = FALSE,
  lifecycle_verbose_soft_deprecation = TRUE,
  lifecycle_repeat_warnings = TRUE
)

This vignette is an attempt to provide a comprehensive overview over all subassignment operations, highlighting where the tibble implementation differs from the data frame implementation.

library(tibble)

new_df <- function() {
  df <- data.frame(a = 1:4)
  df$b <- letters[5:8]
  df$cd <- list(9, 10:11, 12:14, "text")
  df
}

new_tbl <- function() {
  as_tibble(new_df())
}

Results of the same code for data frames and tibbles are presented side by side:

new_df()

In the following, if the results are identical (after converting to a data frame if necessary), only the tibble result is shown, as in the example below. This allows to spot differences easier.

new_df()

For subassignment, we need a fresh copy of the data for each test. The with_*() functions allow for a more concise notation (with_tbl() omitted here for brevity):

with_df <- function(code, verbose = FALSE) {
  code <- rlang::enexpr(code)

  full_code <- rlang::quo({
    df <- new_df()
    !!code
    df
  })
  if (verbose) rlang::expr_print(rlang::quo_get_expr(full_code))
  rlang::eval_tidy(full_code)
}
with_tbl <- function(code, verbose = FALSE) {
  code <- rlang::enexpr(code)

  full_code <- rlang::quo({
    tbl <- new_tbl()
    !!code
    tbl
  })
  if (verbose) rlang::expr_print(rlang::quo_get_expr(full_code))
  rlang::eval_tidy(full_code)
}

This function takes an assignment expression and executes it on a fresh copy of the data. The first example prints what's really executed, further examples omit this output.

with_df(df$a <- rev(df$a), verbose = TRUE)

$<-

Scalars and full length

Assigning a scalar or a full-length vector to a column consistently overwrites existing data or appends a new column at the end. Partial matching doesn't happen:

with_df(df$a <- 1)
with_df(df$b <- 1)
with_df(df$c <- 1)
with_df(df$cd <- 1)
with_df(df$a <- 4:1)
with_df(df$b <- 4:1)
with_df(df$c <- 4:1)
with_df(df$cd <- 4:1)

Recycling

Tibbles allow recycling only for vectors of length 1 or of the same length as the data.

with_df(df$a <- 1:2)
with_df(df$a <- 1:3)
with_df(df$a <- 1:5)
with_df(df$c <- 1:2)

Subset assignment

Updating parts of a column extracted by $ is the responsibility of the column vector. Tibbles can't control what happens after $ has returned.

with_df(df$a[1:2] <- 4:3)
with_df(df$b[1:2] <- 4:3)
with_df(df$c[1:2] <- 4:3)
with_df(df$cd[1:2] <- 4:3)
with_df(df$a[1:3] <- 4:3)
with_df(df$a[1:4] <- 4:3)

For columns of the stricter "vctrs_vctr" class, this class implements the check, which then works identically for data frames and tibbles:

with_df({ df$v = vctrs::new_vctr(1:4); df$v[1:2] <- vctrs::new_vctr(4:3)})
with_df({ df$v = vctrs::new_vctr(1:4); df$v[1:2] <- vctrs::new_vctr(letters[4:3])})

[[<-

Scalars and full length

As with $ subsetting, columns are consistently overwritten, and partial matching doesn't occur. Numeric indexing is supported, but tibbles don't support creation of new numbered columns for a good reason.

with_df(df[["a"]] <- 1)
with_df(df[["a"]] <- "x")
with_df(df[["b"]] <- "x")
with_df(df[["c"]] <- "x")
with_df(df[["cd"]] <- "x")
with_df(df[[1]] <- "x")
with_df(df[[2]] <- "x")
with_df(df[[4]] <- "x")
with_df(df[[5]] <- "x")
with_df(df[["a"]] <- 4:1)
with_df(df[["a"]] <- letters[4:1])
with_df(df[["b"]] <- letters[4:1])
with_df(df[["c"]] <- letters[4:1])
with_df(df[["cd"]] <- letters[4:1])

Cells

Tibbles are stricter when updating single cells, the value must be coercible to the existing contents. Updating a list column requires the contents to be wrapped in a list, consistently with [[ subsetting which returns a list if a cell in a list column is accessed:

with_df(df[[2, "a"]] <- 1)
with_df(df[[2, "a"]] <- 1.5)
with_df(df[[2, "a"]] <- "x")
with_df(df[[2, "b"]] <- "x")
with_df(df[[2, 1]] <- "x")
with_df(df[[2, 2]] <- "x")
with_df(df[[2, "cd"]] <- "x")
with_df(df[[2, "cd"]] <- list("x"))
with_df(df[[2, "c"]] <- "x")
with_df(df[[1:2, "cd"]] <- "x")
with_df(df[[1:2, "c"]] <- "x")
with_df(df[[2, c("cd", "d")]] <- "x")

[<-

Scalars

with_df(df[2, "a"] <- 1)
with_df(df[2, "a"] <- 1.5)
with_df(df[2, "a"] <- "x")
with_df(df[2, "b"] <- "x")
with_df(df[2, "cd"] <- "x")
with_df(df[2, "cd"] <- list("x"))
with_df(df[2, "c"] <- "x")
with_df(df[2, 1] <- "x")
with_df(df[2, 2] <- "x")
with_df(df[2, 3] <- "x")
with_df(df[2, 4] <- "x")

Full length columns

with_df(df[, "a"] <- 4:1)
with_df(df[, "b"] <- 4:1)
with_df(df[, "c"] <- 4:1)
with_df(df[, "cd"] <- 4:1)
with_df(df[, 1] <- 4:1)
with_df(df[, 2] <- 4:1)
with_df(df[, 3] <- 4:1)
with_df(df[, 4] <- 4:1)
with_df(df[, "a"] <- 1)
with_df(df[, "b"] <- 1)
with_df(df[, "c"] <- 1)
with_df(df[, "cd"] <- 1)
with_df(df[, 1] <- 1)
with_df(df[, 2] <- 1)
with_df(df[, 3] <- 1)
with_df(df[, 4] <- 1)

Multiple full length columns

with_df(df[, c("a", "b")] <- 4:1)
with_df(df[, c("a", "b")] <- 1)
with_df(df[, c("a", "b")] <- data.frame(a = 4:1, b = letters[4:1]))
with_df(df[, c("a", "b")] <- data.frame(b = 4:1, a = letters[4:1]))
with_df(df[, c("a", "b")] <- data.frame(c = 4:1, d = letters[4:1]))
with_df(df[, c("a", "b")] <- data.frame(a = 4:1))
with_df(df[, c("a", "b")] <- data.frame(a = 4:1, b = letters[4:1], c = 1:4))
with_df(df[, c("a", "b")] <- data.frame(4:1, 1))
with_df(df[, c("a", "b", "c")] <- data.frame(4:1, letters[4:1]))
with_df(df[, c("a", "b", "cd")] <- data.frame(4:1, letters[4:1]))

Full length rows

with_df(df[2, ] <- 1)
with_df(df[2, ] <- "x")
with_df(df[2, ] <- tibble(a = 1, b = "x"))
with_df(df[2, ] <- tibble(a = 1, b = "x", c = list("y")))
with_df(df[2, ] <- tibble(a = 1, b = "x", c = list("y"), d = "z"))
with_df(df[0, ] <- tibble(a = 1, b = "x", c = list("y")))
with_df(df[5, ] <- tibble(a = 1, b = "x", c = list("y")))

Multiple full length rows

with_df(df[2:3, ] <- 1)
with_df(df[2:3, ] <- 1:2)
with_df(df[2:3, ] <- c("x", "y"))
with_df(df[2:3, ] <- tibble(a = 1:2, b = c("x", "y")))
with_df(df[2:3, ] <- tibble(a = 1, b = "x", c = list("y")))
with_df(df[2:3, ] <- tibble(a = 1:2, b = "x", c = list("y")))
with_df(df[2:3, ] <- tibble(a = 1, b = "x", c = list("y"), d = "z"))
with_df(df[-(1:2), ] <- tibble(a = 1:2, b = "x", c = list("y")))
with_df(df[0:1, ] <- tibble(a = 1:2, b = "x", c = list("y")))
with_df(df[4:5, ] <- tibble(a = 1:2, b = "x", c = list("y")))

Unspecified

with_df(df[] <- 1)
with_df(df[] <- 4:1)
with_df(df[] <- 3:1)
with_df(df[] <- 5:1)
with_df(df[] <- data.frame(1, "x"))
with_df(df[] <- data.frame(4:1, "x", 2))
with_df(df[] <- data.frame(1, "x", 2))
with_df(df[] <- data.frame(1, "x", 2, 3))
with_df(df[] <- df)
with_df(df[,] <- 1)
with_df(df[,] <- 4:1)
with_df(df[,] <- 3:1)
with_df(df[,] <- 5:1)
with_df(df[,] <- data.frame(1, "x"))
with_df(df[,] <- data.frame(4:1, "x", 2))
with_df(df[,] <- data.frame(1, "x", 2))
with_df(df[,] <- data.frame(1, "x", 2, 3))
with_df(df[,] <- df)

Subset assignment

Due to tibble's default of drop = FALSE, updating a portion of a [ subset is still safe, because tibble is still in control. Only one example is given here.

with_df(df["a"][1, ] <- "b")


Try the tibble package in your browser

Any scripts or data that you put into this service are public.

tibble documentation built on March 31, 2023, 11 p.m.