tests/testthat/test-data_modify.R

test_that("data_modify works", {
  data(iris)
  out <- data_modify(
    iris,
    Sepal_W_z = standardize(Sepal.Width),
    Sepal_Wz_double = 2 * Sepal_W_z
  )
  expect_equal(
    out$Sepal_W_z,
    as.vector(scale(iris$Sepal.Width)),
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
  expect_equal(
    out$Sepal_Wz_double,
    2 * as.vector(scale(iris$Sepal.Width)),
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
})


test_that("data_modify works with strings", {
  data(iris)
  out <- data_modify(
    iris,
    "Sepal_W_z = standardize(Sepal.Width)"
  )
  expect_equal(
    out$Sepal_W_z,
    as.vector(scale(iris$Sepal.Width)),
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
  out <- data_modify(
    iris,
    c(
      "Sepal_W_z = standardize(Sepal.Width)",
      "Sepal_Wz_double = 2 * Sepal_W_z"
    )
  )
  expect_equal(
    out$Sepal_Wz_double,
    2 * as.vector(scale(iris$Sepal.Width)),
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
})


test_that("data_modify preserves labels", {
  data(efc)
  out <- data_modify(
    efc,
    c12hour_c = center(c12hour),
    c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)
  )
  expect_identical(
    attributes(out$c12hour_c)$label,
    attributes(efc$c12hour)$label
  )
  expect_identical(
    attributes(out$c12hour_z)$label,
    attributes(efc$c12hour)$label
  )
  out <- data_modify(
    efc,
    c(
      "c12hour_c = center(c12hour)",
      "c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)"
    )
  )
  expect_identical(
    attributes(out$c12hour_c)$label,
    attributes(efc$c12hour)$label
  )
  expect_identical(
    attributes(out$c12hour_z)$label,
    attributes(efc$c12hour)$label
  )
})


test_that("data_modify recycling works", {
  data(iris)
  out <- data_modify(iris, x = 1)
  expect_equal(out$x, rep(1, nrow(iris)), ignore_attr = TRUE)
  out <- data_modify(iris, x = c(1, 2))
  expect_equal(out$x, rep(c(1, 2), nrow(iris) / 2), ignore_attr = TRUE)
  expect_error(data_modify(iris, x = 1:4), regex = "same length")
  out <- data_modify(iris, x = "a")
  expect_equal(out$x, rep("a", nrow(iris)), ignore_attr = TRUE)
})


test_that("data_modify recycling works", {
  data(iris)
  d <- data_group(iris, "Species")
  expect_silent(data_modify(d, x = 1, test = 1:2))
})


test_that("data_modify expression in character vector", {
  data(iris)
  x <- "var_a = Sepal.Width"
  out <- data_modify(iris, x)
  expect_identical(
    colnames(out),
    c(
      "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width",
      "Species", "var_a"
    )
  )
})


test_that("data_modify expression in character vector", {
  data(iris)
  foo <- function(data) {
    y <- "var_a = Sepal.Width"
    head(data_modify(data, y))
  }
  out <- foo(iris)
  expect_identical(
    colnames(out),
    c(
      "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width",
      "Species", "var_a"
    )
  )
  expect_identical(out$var_a, out$Sepal.Width)

  foo2 <- function(data, z) {
    head(data_modify(data, z))
  }
  out <- foo2(iris, "var_a = Sepal.Width")
  expect_identical(
    colnames(out),
    c(
      "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width",
      "Species", "var_a"
    )
  )
  expect_identical(out$var_a, out$Sepal.Width)
})


test_that("data_modify expression in character vector", {
  data(iris)
  aa <- "2 * Sepal.Width"
  out <- data_modify(iris, new_var = aa)
  expect_identical(
    colnames(out),
    c(
      "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width",
      "Species", "new_var"
    )
  )
  expect_identical(out$new_var, 2 * out$Sepal.Width)

  foo_nv <- function(data, z) {
    head(data_modify(data, new_var = z))
  }
  out <- foo_nv(iris, "2 * Sepal.Width")
  expect_identical(
    colnames(out),
    c(
      "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width",
      "Species", "new_var"
    )
  )
  expect_identical(out$new_var, 2 * out$Sepal.Width)
})


test_that("data_modify expression as character vector", {
  data(iris)
  x <- "var_a = Sepal.Width"
  y <- "Sepal_Wz_double = 2 * var_a"
  out <- data_modify(iris, c(x, y))
  expect_identical(
    colnames(out),
    c(
      "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width",
      "Species", "var_a", "Sepal_Wz_double"
    )
  )
  expect_identical(out$var_a, out$Sepal.Width)
  expect_identical(out$Sepal_Wz_double, 2 * out$Sepal.Width)

  foo1 <- function(data) {
    x1 <- "var_a = Sepal.Width"
    y1 <- "Sepal_Wz_double = 2 * var_a"
    data_modify(iris, c(x1, y1))
  }
  out <- foo1(iris)
  expect_identical(
    colnames(out),
    c(
      "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width",
      "Species", "var_a", "Sepal_Wz_double"
    )
  )
  expect_identical(out$var_a, out$Sepal.Width)
  expect_identical(out$Sepal_Wz_double, 2 * out$Sepal.Width)

  foo2 <- function(data, z3) {
    data_modify(data, z3)
  }
  out <- foo2(iris, c("var_a = Sepal.Width", "Sepal_Wz_double = 2 * var_a"))
  expect_identical(
    colnames(out),
    c(
      "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width",
      "Species", "var_a", "Sepal_Wz_double"
    )
  )
  expect_identical(out$var_a, out$Sepal.Width)
  expect_identical(out$Sepal_Wz_double, 2 * out$Sepal.Width)

  # works with separated strings
  data(iris)
  out <- data_modify(iris, "var_a = Sepal.Width", "Sepal_Wz_double = 2 * var_a")
  expect_identical(out$var_a, out$Sepal.Width)
  expect_identical(out$Sepal_Wz_double, 2 * out$Sepal.Width)
})


test_that("data_modify remove variables with NULL", {
  data(iris)
  out <- data_modify(iris, PL_new = 2 * Petal.Length, Petal.Length = NULL)
  expect_identical(colnames(out), c("Sepal.Length", "Sepal.Width", "Petal.Width", "Species", "PL_new"))
  expect_identical(out$PL_new, 2 * iris$Petal.Length)
})


test_that("data_modify works on grouped data", {
  data(efc)
  grouped_efc <- data_group(efc, "c172code")
  out <- data_modify(
    grouped_efc,
    c12hour_c = center(c12hour),
    c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),
    c12hour_z2 = standardize(c12hour)
  )
  out2 <- lapply(by(efc["c12hour"], efc$c172code, scale), as.vector)
  expect_equal(
    na.omit(out$c12hour_z2[out$c172code == 1]),
    out2[[1]],
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
})


test_that("data_modify works on grouped data, with character vectors", {
  data(efc)
  grouped_efc <- data_group(efc, "c172code")
  out <- data_modify(
    grouped_efc,
    c(
      "c12hour_c = center(c12hour)",
      "c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)",
      "c12hour_z2 = standardize(c12hour)"
    )
  )
  out2 <- lapply(by(efc["c12hour"], efc$c172code, scale), as.vector)
  expect_equal(
    na.omit(out$c12hour_z2[out$c172code == 1]),
    out2[[1]],
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
  expect_equal(
    na.omit(out$c12hour_z[out$c172code == 1]),
    out2[[1]],
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
})


test_that("data_modify works on grouped data, preserves attributes and labels", {
  data(efc)
  grouped_efc <- data_group(efc, "c172code")
  out <- data_modify(
    grouped_efc,
    c12hour_c = center(c12hour)
  )
  expect_identical(
    attributes(out$c12hour)$label,
    attributes(efc$c12hour)$label
  )
})


test_that("data_modify works on grouped data, inside functions", {
  data(efc)
  foo4 <- function(data) {
    data_modify(
      data,
      c(
        "c12hour_c = center(c12hour)",
        "c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)",
        "c12hour_z2 = standardize(c12hour)"
      )
    )
  }
  out <- foo4(data_group(efc, "c172code"))
  out2 <- lapply(by(efc["c12hour"], efc$c172code, scale), as.vector)
  expect_equal(
    na.omit(out$c12hour_z2[out$c172code == 1]),
    out2[[1]],
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
  expect_equal(
    na.omit(out$c12hour_z[out$c172code == 1]),
    out2[[1]],
    ignore_attr = TRUE,
    tolerance = 1e-3
  )

  foo5 <- function(data, rec) {
    data_modify(data, rec)
  }
  out <- foo5(
    data_group(efc, "c172code"),
    c(
      "c12hour_c = center(c12hour)",
      "c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)",
      "c12hour_z2 = standardize(c12hour)"
    )
  )
  out2 <- lapply(by(efc["c12hour"], efc$c172code, scale), as.vector)
  expect_equal(
    na.omit(out$c12hour_z2[out$c172code == 1]),
    out2[[1]],
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
  expect_equal(
    na.omit(out$c12hour_z[out$c172code == 1]),
    out2[[1]],
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
})


test_that("data_modify errors for non df", {
  expect_error(data_modify(iris$Sepal.Length, Sepal_W_z = standardize(Sepal.Width)))
})


test_that("data_modify errors for non df", {
  data(efc)
  a <- "center(c22hour)" # <---------------- error in variable name
  b <- "c12hour_c / sd(c12hour, na.rm = TRUE)"
  expect_error(
    data_modify(efc, c12hour_c = a, c12hour_z = b),
    regex = "c22hour"
  )

  a <- "center(c12hour)"
  b <- "c12hour_c / sd(c21hour, na.rm = TRUE)"
  expect_error(
    data_modify(efc, c12hour_c = a, c12hour_z = b),
    regex = "c12hour_c"
  )
  expect_error(
    data_modify(efc, c12hour_c = a, c12hour_z = b),
    regex = "second expression"
  )
})


test_that("data_modify message about recycling values", {
  expect_snapshot(head(data_modify(iris, Sepal.Width = 1)))
  expect_snapshot(head(data_modify(iris, Sepal.Width = 1:2)))
  expect_snapshot(head(data_modify(iris, Petal.Length = 1, Sepal.Width = 1)))
  expect_snapshot(head(data_modify(iris, Petal.Length = 1, Sepal.Width = 1:2)))
  expect_snapshot(head(data_modify(iris, Petal.Length = 2, Sepal.Width = 2)))
})


test_that("data_modify message about modified variables", {
  expect_snapshot(head(data_modify(iris, Sepal.Width = 2 * Sepal.Width)))
  expect_snapshot(head(data_modify(iris, Petal.Length = Sepal.Length, Sepal.Width = Petal.Width)))
})


test_that("data_modify works with character variables, and inside functions", {
  data(efc)
  a <- "center(c12hour)"
  b <- "c12hour_c / sd(c12hour, na.rm = TRUE)"
  d <- "standardize(c12hour)"
  out <- data_modify(
    efc,
    c12hour_c = a,
    c12hour_z = b,
    c12hour_z2 = d
  )
  expect_equal(
    out$c12hour_z2,
    as.vector(scale(efc$c12hour)),
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
  expect_equal(
    out$c12hour_z,
    as.vector(scale(efc$c12hour)),
    ignore_attr = TRUE,
    tolerance = 1e-3
  )

  # when calling functions
  a1 <- "center(c12hour)"
  b1 <- "c12hour_c / sd(c12hour, na.rm = TRUE)"
  d1 <- "standardize(c12hour)"
  foo <- function(data, x1, x2, x3) {
    data_modify(
      efc,
      c12hour_c = x1,
      c12hour_z = x2,
      c12hour_z2 = x3
    )
  }
  out <- foo(efc, a1, b1, d1)
  expect_equal(
    out$c12hour_z2,
    as.vector(scale(efc$c12hour)),
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
  expect_equal(
    out$c12hour_z,
    as.vector(scale(efc$c12hour)),
    ignore_attr = TRUE,
    tolerance = 1e-3
  )

  # when calling functions, arguments inside function defined
  foo2 <- function(data) {
    a2 <- "center(c12hour)"
    b2 <- "c12hour_c / sd(c12hour, na.rm = TRUE)"
    d2 <- "standardize(c12hour)"

    data_modify(
      efc,
      c12hour_c = a2,
      c12hour_z = b2,
      c12hour_z2 = d2
    )
  }
  out <- foo2(efc)
  expect_equal(
    out$c12hour_z2,
    as.vector(scale(efc$c12hour)),
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
  expect_equal(
    out$c12hour_z,
    as.vector(scale(efc$c12hour)),
    ignore_attr = TRUE,
    tolerance = 1e-3
  )
})


test_that("data_modify works with grouped df when overwriting existing variables", {
  data(iris)
  iris_grp <- data_group(iris, "Species")
  out <- data_modify(iris_grp, Sepal.Length = normalize(Sepal.Length))
  expect_equal(head(out$Sepal.Length), c(0.53333, 0.4, 0.26667, 0.2, 0.46667, 0.73333), tolerance = 1e-3)

  out <- data_modify(
    iris_grp,
    Sepal.Length = normalize(Sepal.Length),
    Sepal.Length2 = 2 * Sepal.Length
  )
  expect_equal(head(out$Sepal.Length2), 2 * c(0.53333, 0.4, 0.26667, 0.2, 0.46667, 0.73333), tolerance = 1e-3)
})


test_that("data_modify works with functions that return character vectors", {
  data(iris)
  set.seed(123)
  out <- data_modify(iris, grp = sample(letters[1:3], nrow(iris), TRUE))
  expect_identical(head(out$grp), c("c", "c", "c", "b", "c", "b"))
})

Try the datawizard package in your browser

Any scripts or data that you put into this service are public.

datawizard documentation built on Sept. 15, 2023, 9:06 a.m.