tests/testthat/test_collapse_groups_by.R

library(groupdata2)
context("collapse_groups_by_*()")

# These tests only test that no errors are thrown, as
# the functionality is tested in collapse_groups()

test_that("testing collapse_groups_by_size()", {

  # Set seed
  xpectr::set_test_seed(42)

  # Create data frame
  df <- data.frame(
    "participant" = factor(rep(1:20, 3)),
    "participant_2" = factor(rep(1:20, 3)),
    "age" = rep(sample(c(1:100), 20), 3),
    "answer" = factor(sample(c("a", "b", "c", "d"), 60, replace = TRUE)),
    "score" = sample(c(1:100), 20 * 3)
  )
  df <- df %>% dplyr::arrange(participant)
  df$session <- rep(c("1", "2", "3"), 20)

  # Sample rows to get unequal sizes per participant
  df <- dplyr::sample_n(df, size = 15)

  # Create the initial groups (to be collapsed)
  df <- fold(
    data = df,
    k = 8,
    method = "n_dist"
  )

  # Ungroup the data frame
  # Otherwise `collapse_groups()` would be
  # applied to each fold separately!
  df <- dplyr::ungroup(df)

  xpectr::set_test_seed(42)
  df_coll <- collapse_groups_by_size(
    data = df,
    n = 3,
    group_cols = ".folds",
    auto_tune = FALSE
  )


  ## Testing 'df_coll'                                                      ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(df_coll),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    df_coll[["participant"]],
    structure(c(3L, 2L, 9L, 15L, 14L, 19L, 4L, 17L, 6L, 3L, 12L, 18L,
      13L, 7L, 11L), .Label = c("1", "2", "3", "4", "5", "6", "7",
      "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18",
      "19", "20"), class = "factor"))
  expect_equal(
    df_coll[["participant_2"]],
    structure(c(3L, 2L, 9L, 15L, 14L, 19L, 4L, 17L, 6L, 3L, 12L, 18L,
      13L, 7L, 11L), .Label = c("1", "2", "3", "4", "5", "6", "7",
      "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18",
      "19", "20"), class = "factor"))
  expect_equal(
    df_coll[["age"]],
    c(29, 93, 61, 40, 23, 39, 81, 88, 50, 29, 91, 10, 83, 70, 42),
    tolerance = 1e-4)
  expect_equal(
    df_coll[["answer"]],
    structure(c(4L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 3L, 1L, 1L, 1L, 2L,
      2L, 2L), .Label = c("a", "b", "c", "d"), class = "factor"))
  expect_equal(
    df_coll[["score"]],
    c(34, 18, 13, 40, 45, 33, 63, 90, 54, 17, 77, 43, 52, 22, 83),
    tolerance = 1e-4)
  expect_equal(
    df_coll[["session"]],
    c("3", "2", "3", "3", "3", "3", "1", "3", "1", "2", "3", "1", "2",
      "1", "2"),
    fixed = TRUE)
  expect_equal(
    df_coll[[".folds"]],
    structure(c(7L, 2L, 1L, 2L, 5L, 6L, 3L, 5L, 8L, 3L, 7L, 4L, 6L,
      8L, 4L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"),
      class = "factor"))
  expect_equal(
    df_coll[[".coll_groups"]],
    structure(c(2L, 1L, 1L, 1L, 2L, 1L, 3L, 2L, 3L, 3L, 2L, 2L, 1L,
      3L, 2L), .Label = c("1", "2", "3"), class = "factor"))
  # Testing column names
  expect_equal(
    names(df_coll),
    c("participant", "participant_2", "age", "answer", "score", "session",
      ".folds", ".coll_groups"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(df_coll),
    c("factor", "factor", "integer", "factor", "integer", "character",
      "factor", "factor"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(df_coll),
    c("integer", "integer", "integer", "integer", "integer", "character",
      "integer", "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(df_coll),
    c(15L, 8L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(df_coll)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'df_coll'                                             ####

  summ <- df_coll %>%
    summarize_balances(group_cols = ".coll_groups")


  ## Testing 'summ$Groups'                                                  ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(summ$Groups),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    summ$Groups[[".group_col"]],
    structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
  expect_equal(
    summ$Groups[[".group"]],
    structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
  expect_equal(
    summ$Groups[["# rows"]], # Decent
    c(5, 6, 4),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(summ$Groups),
    c(".group_col", ".group", "# rows"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(summ$Groups),
    c("factor", "factor", "integer"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(summ$Groups),
    c("integer", "integer", "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(summ$Groups),
    c(3L, 3L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(summ$Groups)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'summ$Groups'                                         ####


  # Method ascending
  xpectr::set_test_seed(42)
  df_coll <- collapse_groups_by_size(
    data = df,
    n = 3,
    group_cols = ".folds",
    auto_tune = FALSE,
    method = "ascending"
  )

  summ <- df_coll %>%
    summarize_balances(group_cols = ".coll_groups")


  ## Testing 'summ$Groups'                                                  ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(summ$Groups),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    summ$Groups[[".group_col"]],
    structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
  expect_equal(
    summ$Groups[[".group"]],
    structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
  expect_equal(
    summ$Groups[["# rows"]],
    c(3, 6, 6),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(summ$Groups),
    c(".group_col", ".group", "# rows"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(summ$Groups),
    c("factor", "factor", "integer"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(summ$Groups),
    c("integer", "integer", "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(summ$Groups),
    c(3L, 3L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(summ$Groups)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'summ$Groups'                                         ####

})

test_that("testing collapse_groups_by_numeric()", {

  # Set seed
  xpectr::set_test_seed(42)

  # Create data frame
  df <- data.frame(
    "participant" = factor(rep(1:20, 3)),
    "participant_2" = factor(rep(1:20, 3)),
    "age" = rep(sample(c(1:100), 20), 3),
    "answer" = factor(sample(c("a", "b", "c", "d"), 60, replace = TRUE)),
    "score" = sample(c(1:100), 20 * 3)
  )
  df <- df %>% dplyr::arrange(participant)
  df$session <- rep(c("1", "2", "3"), 20)

  # Sample rows to get unequal sizes per participant
  df <- dplyr::sample_n(df, size = 47)

  # Create the initial groups (to be collapsed)
  df <- fold(
    data = df,
    k = 8,
    method = "n_dist"
  )

  # Ungroup the data frame
  # Otherwise `collapse_groups()` would be
  # applied to each fold separately!
  df <- dplyr::ungroup(df)

  xpectr::set_test_seed(42)
  df_coll <- collapse_groups_by_numeric(
    data = df,
    n = 3,
    group_cols = ".folds",
    auto_tune = FALSE,
    num_cols = c("age", "score")
  )


  ## Testing 'df_coll'                                                      ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(df_coll),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    xpectr::smpl(df_coll[["participant"]], n = 30),
    structure(c(2L, 15L, 19L, 6L, 18L, 13L, 7L, 12L, 20L, 2L, 16L, 8L,
      8L, 7L, 3L, 17L, 4L, 19L, 13L, 4L, 10L, 5L, 14L, 17L, 5L, 6L,
      16L, 11L, 20L, 20L), .Label = c("1", "2", "3", "4", "5", "6",
      "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
      "18", "19", "20"), class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[["participant_2"]], n = 30),
    structure(c(2L, 15L, 19L, 6L, 18L, 13L, 7L, 12L, 20L, 2L, 16L, 8L,
      8L, 7L, 3L, 17L, 4L, 19L, 13L, 4L, 10L, 5L, 14L, 17L, 5L, 6L,
      16L, 11L, 20L, 20L), .Label = c("1", "2", "3", "4", "5", "6",
      "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
      "18", "19", "20"), class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[["age"]], n = 30),
    c(93, 40, 39, 50, 10, 83, 70, 91, 46, 93, 80, 13, 13, 70, 29, 88,
      81, 39, 83, 81, 65, 62, 23, 88, 62, 50, 80, 42, 46, 46),
    tolerance = 1e-4)
  expect_equal(
    xpectr::smpl(df_coll[["answer"]], n = 30),
    structure(c(2L, 1L, 3L, 3L, 1L, 2L, 2L, 2L, 1L, 4L, 3L, 4L, 3L,
      4L, 4L, 1L, 4L, 2L, 1L, 3L, 3L, 1L, 4L, 3L, 2L, 1L, 3L, 3L,
      3L, 3L), .Label = c("a", "b", "c", "d"), class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[["score"]], n = 30),
    c(18, 40, 33, 54, 43, 52, 22, 58, 26, 25, 27, 9, 95, 55, 36, 91,
      30, 38, 67, 69, 89, 75, 31, 71, 72, 60, 35, 61, 86, 51),
    tolerance = 1e-4)
  expect_equal(
    xpectr::smpl(df_coll[["session"]], n = 30),
    c("2", "3", "3", "1", "1", "2", "1", "2", "3", "3", "3", "1", "2",
      "2", "1", "1", "2", "2", "3", "3", "2", "1", "2", "2", "2",
      "3", "2", "1", "2", "1"),
    fixed = TRUE)
  expect_equal(
    xpectr::smpl(df_coll[[".folds"]], n = 30),
    structure(c(2L, 2L, 2L, 7L, 5L, 5L, 6L, 5L, 3L, 8L, 8L, 8L, 6L,
      3L, 2L, 1L, 2L, 2L, 5L, 8L, 5L, 3L, 7L, 6L, 1L, 4L, 8L, 3L,
      6L, 4L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"),
      class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[[".coll_groups"]], n = 30),
    structure(c(1L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L,
      1L, 1L, 3L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 2L, 3L, 2L, 2L, 1L,
      2L, 2L), .Label = c("1", "2", "3"), class = "factor"))
  # Testing column names
  expect_equal(
    names(df_coll),
    c("participant", "participant_2", "age", "answer", "score", "session",
      ".folds", ".coll_groups"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(df_coll),
    c("factor", "factor", "integer", "factor", "integer", "character",
      "factor", "factor"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(df_coll),
    c("integer", "integer", "integer", "integer", "integer", "character",
      "integer", "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(df_coll),
    c(47L, 8L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(df_coll)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'df_coll'                                             ####

  summ <- summarize_balances(df_coll, group_cols = ".coll_groups", num_cols = c("age", "score"))


  ## Testing 'summ$Groups'                                                  ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(summ$Groups),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    summ$Groups[[".group_col"]],
    structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
  expect_equal(
    summ$Groups[[".group"]],
    structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
  expect_equal(
    summ$Groups[["# rows"]],
    c(18, 18, 11),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["mean(age)"]],
    c(59.83333, 59.5, 60.36364),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["sum(age)"]],
    c(1077, 1071, 664),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["mean(score)"]],
    c(45.72222, 49.11111, 58.36364),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["sum(score)"]],
    c(823, 884, 642),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(summ$Groups),
    c(".group_col", ".group", "# rows", "mean(age)", "sum(age)", "mean(score)",
      "sum(score)"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(summ$Groups),
    c("factor", "factor", "integer", "numeric", "integer", "numeric",
      "integer"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(summ$Groups),
    c("integer", "integer", "integer", "double", "integer", "double",
      "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(summ$Groups),
    c(3L, 7L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(summ$Groups)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'summ$Groups'                                         ####


  xpectr::set_test_seed(42)
  df_coll <- collapse_groups_by_numeric(
    data = df,
    n = 3,
    group_cols = ".folds",
    auto_tune = FALSE,
    num_cols = c("age", "score"),
    group_aggregation_fn = sum,
    balance_size = FALSE,
    col_name = ".lol_coups"
  )


  ## Testing 'df_coll'                                                      ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(df_coll),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    xpectr::smpl(df_coll[["participant"]], n = 30),
    structure(c(2L, 15L, 19L, 6L, 18L, 13L, 7L, 12L, 20L, 2L, 16L, 8L,
      8L, 7L, 3L, 17L, 4L, 19L, 13L, 4L, 10L, 5L, 14L, 17L, 5L, 6L,
      16L, 11L, 20L, 20L), .Label = c("1", "2", "3", "4", "5", "6",
      "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
      "18", "19", "20"), class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[["participant_2"]], n = 30),
    structure(c(2L, 15L, 19L, 6L, 18L, 13L, 7L, 12L, 20L, 2L, 16L, 8L,
      8L, 7L, 3L, 17L, 4L, 19L, 13L, 4L, 10L, 5L, 14L, 17L, 5L, 6L,
      16L, 11L, 20L, 20L), .Label = c("1", "2", "3", "4", "5", "6",
      "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
      "18", "19", "20"), class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[["age"]], n = 30),
    c(93, 40, 39, 50, 10, 83, 70, 91, 46, 93, 80, 13, 13, 70, 29, 88,
      81, 39, 83, 81, 65, 62, 23, 88, 62, 50, 80, 42, 46, 46),
    tolerance = 1e-4)
  expect_equal(
    xpectr::smpl(df_coll[["answer"]], n = 30),
    structure(c(2L, 1L, 3L, 3L, 1L, 2L, 2L, 2L, 1L, 4L, 3L, 4L, 3L,
      4L, 4L, 1L, 4L, 2L, 1L, 3L, 3L, 1L, 4L, 3L, 2L, 1L, 3L, 3L,
      3L, 3L), .Label = c("a", "b", "c", "d"), class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[["score"]], n = 30),
    c(18, 40, 33, 54, 43, 52, 22, 58, 26, 25, 27, 9, 95, 55, 36, 91,
      30, 38, 67, 69, 89, 75, 31, 71, 72, 60, 35, 61, 86, 51),
    tolerance = 1e-4)
  expect_equal(
    xpectr::smpl(df_coll[["session"]], n = 30),
    c("2", "3", "3", "1", "1", "2", "1", "2", "3", "3", "3", "1", "2",
      "2", "1", "1", "2", "2", "3", "3", "2", "1", "2", "2", "2",
      "3", "2", "1", "2", "1"),
    fixed = TRUE)
  expect_equal(
    xpectr::smpl(df_coll[[".folds"]], n = 30),
    structure(c(2L, 2L, 2L, 7L, 5L, 5L, 6L, 5L, 3L, 8L, 8L, 8L, 6L,
      3L, 2L, 1L, 2L, 2L, 5L, 8L, 5L, 3L, 7L, 6L, 1L, 4L, 8L, 3L,
      6L, 4L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"),
      class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[[".lol_coups"]], n = 30),
    structure(c(1L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L,
      2L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 2L, 3L, 2L, 1L, 2L,
      2L, 2L), .Label = c("1", "2", "3"), class = "factor"))
  # Testing column names
  expect_equal(
    names(df_coll),
    c("participant", "participant_2", "age", "answer", "score", "session",
      ".folds", ".lol_coups"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(df_coll),
    c("factor", "factor", "integer", "factor", "integer", "character",
      "factor", "factor"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(df_coll),
    c("integer", "integer", "integer", "integer", "integer", "character",
      "integer", "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(df_coll),
    c(47L, 8L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(df_coll)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'df_coll'                                             ####


  summ <- summarize_balances(
    df_coll, group_cols = ".lol_coups", num_cols = c("age", "score"))


  ## Testing 'summ$Groups'                                                  ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(summ$Groups),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    summ$Groups[[".group_col"]],
    structure(c(1L, 1L, 1L), .Label = ".lol_coups", class = "factor"))
  expect_equal(
    summ$Groups[[".group"]],
    structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
  expect_equal(
    summ$Groups[["# rows"]],
    c(18, 18, 11),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["mean(age)"]],
    c(62.83333, 56.5, 60.36364),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["sum(age)"]],
    c(1131, 1017, 664),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["mean(score)"]],
    c(44.72222, 50.11111, 58.36364),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["sum(score)"]],
    c(805, 902, 642),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(summ$Groups),
    c(".group_col", ".group", "# rows", "mean(age)", "sum(age)", "mean(score)",
      "sum(score)"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(summ$Groups),
    c("factor", "factor", "integer", "numeric", "integer", "numeric",
      "integer"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(summ$Groups),
    c("integer", "integer", "integer", "double", "integer", "double",
      "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(summ$Groups),
    c(3L, 7L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(summ$Groups)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'summ$Groups'                                         ####

  # Method ascending
  xpectr::set_test_seed(42)
  df_coll <- collapse_groups_by_numeric(
    data = df,
    n = 3,
    group_cols = ".folds",
    auto_tune = FALSE,
    num_cols = c("age", "score"),
    method = "ascending"
  )

  summ <- summarize_balances(
    df_coll, group_cols = ".coll_groups", num_cols = c("age", "score"))


  ## Testing 'summ$Groups'                                                  ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(summ$Groups),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    summ$Groups[[".group_col"]],
    structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
  expect_equal(
    summ$Groups[[".group"]],
    structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
  expect_equal(
    summ$Groups[["# rows"]],
    c(12, 18, 17),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["mean(age)"]],
    c(48.83333, 56.5, 71.11765),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["sum(age)"]],
    c(586, 1017, 1209),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["mean(score)"]],
    c(38.08333, 50.11111, 58.23529),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["sum(score)"]],
    c(457, 902, 990),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(summ$Groups),
    c(".group_col", ".group", "# rows", "mean(age)", "sum(age)", "mean(score)",
      "sum(score)"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(summ$Groups),
    c("factor", "factor", "integer", "numeric", "integer", "numeric",
      "integer"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(summ$Groups),
    c("integer", "integer", "integer", "double", "integer", "double",
      "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(summ$Groups),
    c(3L, 7L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(summ$Groups)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'summ$Groups'                                         ####


})

test_that("testing collapse_groups_by_levels()", {

  # Set seed
  xpectr::set_test_seed(42)

  # Create data frame
  df <- data.frame(
    "participant" = factor(rep(1:20, 3)),
    "participant_2" = factor(sample(rep(1:20, 3), 60)),
    "age" = rep(sample(c(1:100), 20), 3),
    "answer" = factor(sample(c("a", "b", "c", "d"), 60, replace = TRUE)),
    "answer_2" = factor(sample(c("a", "b", "c", "d"), 60, replace = TRUE)),
    "score" = sample(c(1:100), 20 * 3)
  )
  df <- df %>% dplyr::arrange(participant)
  df$session <- rep(c("1", "2", "3"), 20)

  # Sample rows to get unequal sizes per participant
  df <- dplyr::sample_n(df, size = 47)

  # Create the initial groups (to be collapsed)
  df <- fold(
    data = df,
    k = 8,
    method = "n_dist"
  )

  # Ungroup the data frame
  # Otherwise `collapse_groups()` would be
  # applied to each fold separately!
  df <- dplyr::ungroup(df)

  xpectr::set_test_seed(42)
  df_coll <- collapse_groups_by_levels(
    data = df,
    n = 3,
    group_cols = ".folds",
    auto_tune = FALSE,
    cat_cols = c("answer", "answer_2")
  )

  ## Testing 'df_coll'                                                      ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(df_coll),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    xpectr::smpl(df_coll[["participant"]], n = 30),
    structure(c(3L, 18L, 20L, 17L, 13L, 8L, 17L, 6L, 5L, 7L, 20L, 1L,
      19L, 4L, 2L, 6L, 1L, 11L, 9L, 16L, 3L, 11L, 10L, 14L, 5L, 2L,
      4L, 18L, 12L, 12L), .Label = c("1", "2", "3", "4", "5", "6",
      "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
      "18", "19", "20"), class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[["participant_2"]], n = 30),
    structure(c(18L, 8L, 7L, 4L, 14L, 8L, 11L, 9L, 7L, 5L, 10L, 17L,
      20L, 18L, 9L, 18L, 15L, 4L, 12L, 6L, 20L, 1L, 19L, 13L, 3L,
      16L, 8L, 6L, 4L, 11L), .Label = c("1", "2", "3", "4", "5", "6",
      "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
      "18", "19", "20"), class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[["age"]], n = 30),
    c(75, 32, 84, 1, 20, 78, 1, 19, 82, 26, 84, 68, 43, 55, 98, 19,
      68, 4, 64, 62, 75, 4, 22, 42, 82, 98, 55, 32, 13, 13),
    tolerance = 1e-4)
  expect_equal(
    xpectr::smpl(df_coll[["answer"]], n = 30),
    structure(c(1L, 1L, 4L, 2L, 3L, 1L, 1L, 3L, 3L, 4L, 3L, 3L, 3L,
      3L, 2L, 4L, 3L, 4L, 1L, 3L, 3L, 3L, 2L, 4L, 4L, 1L, 3L, 3L,
      4L, 1L), .Label = c("a", "b", "c", "d"), class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[["answer_2"]], n = 30),
    structure(c(3L, 1L, 4L, 1L, 1L, 4L, 4L, 4L, 3L, 4L, 3L, 4L, 1L,
      2L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 4L, 1L,
      2L, 2L), .Label = c("a", "b", "c", "d"), class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[["score"]], n = 30),
    c(95, 76, 26, 90, 57, 46, 87, 97, 31, 9, 73, 40, 45, 13, 34, 62,
      89, 17, 61, 36, 60, 25, 92, 6, 94, 52, 43, 88, 11, 58),
    tolerance = 1e-4)
  expect_equal(
    xpectr::smpl(df_coll[["session"]], n = 30),
    c("2", "3", "2", "1", "3", "1", "2", "1", "3", "3", "1", "2", "1",
      "3", "3", "2", "1", "3", "3", "3", "3", "2", "1", "1", "2",
      "1", "1", "1", "2", "1"),
    fixed = TRUE)
  expect_equal(
    xpectr::smpl(df_coll[[".folds"]], n = 30),
    structure(c(3L, 1L, 4L, 7L, 6L, 6L, 8L, 8L, 7L, 5L, 4L, 4L, 5L,
      1L, 7L, 6L, 8L, 7L, 2L, 4L, 6L, 6L, 1L, 4L, 5L, 7L, 1L, 3L,
      7L, 2L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"),
      class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[[".coll_groups"]], n = 30),
    structure(c(3L, 1L, 3L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 3L, 3L, 2L,
      1L, 1L, 2L, 1L, 1L, 3L, 3L, 2L, 2L, 1L, 3L, 2L, 1L, 1L, 3L,
      1L, 3L), .Label = c("1", "2", "3"), class = "factor"))
  # Testing column names
  expect_equal(
    names(df_coll),
    c("participant", "participant_2", "age", "answer", "answer_2", "score",
      "session", ".folds", ".coll_groups"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(df_coll),
    c("factor", "factor", "integer", "factor", "factor", "integer",
      "character", "factor", "factor"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(df_coll),
    c("integer", "integer", "integer", "integer", "integer", "integer",
      "character", "integer", "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(df_coll),
    c(47L, 9L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(df_coll)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'df_coll'                                             ####

  summ <- summarize_balances(df_coll, group_cols = ".coll_groups", cat_cols = c("answer", "answer_2"))


  ## Testing 'summ$Groups'                                                  ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(summ$Groups),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    summ$Groups[[".group_col"]],
    structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
  expect_equal(
    summ$Groups[[".group"]],
    structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
  expect_equal(
    summ$Groups[["# rows"]],
    c(17, 12, 18),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer__a"]],
    c(8, 4, 6),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer__b"]],
    c(3, 3, 3),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer__c"]],
    c(2, 2, 5),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer__d"]],
    c(4, 3, 4),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer_a"]],
    c(4, 2, 4),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer_b"]],
    c(3, 1, 5),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer_c"]],
    c(8, 6, 5),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer_d"]],
    c(2, 3, 4),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(summ$Groups),
    c(".group_col", ".group", "# rows", "# answer__a", "# answer__b",
      "# answer__c", "# answer__d", "# answer_a", "# answer_b", "# answer_c",
      "# answer_d"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(summ$Groups),
    c("factor", "factor", "integer", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(summ$Groups),
    c("integer", "integer", "integer", "double", "double", "double",
      "double", "double", "double", "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(summ$Groups),
    c(3L, 11L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(summ$Groups)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'summ$Groups'                                         ####

  # Method ascending

  xpectr::set_test_seed(42)
  df_coll <- collapse_groups_by_levels(
    data = df,
    n = 3,
    group_cols = ".folds",
    auto_tune = FALSE,
    cat_cols = c("answer", "answer_2"),
    method = "ascending"
  )

  summ <- summarize_balances(df_coll, group_cols = ".coll_groups", cat_cols = c("answer", "answer_2"))

  ## Testing 'summ$Groups'                                                  ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(summ$Groups),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    summ$Groups[[".group_col"]],
    structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
  expect_equal(
    summ$Groups[[".group"]],
    structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
  expect_equal(
    summ$Groups[["# rows"]],
    c(11, 18, 18),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer__a"]],
    c(6, 6, 6),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer__b"]],
    c(2, 3, 4),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer__c"]],
    c(1, 3, 5),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer__d"]],
    c(2, 6, 3),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer_a"]],
    c(2, 3, 5),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer_b"]],
    c(1, 3, 5),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer_c"]],
    c(7, 8, 4),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# answer_d"]],
    c(1, 4, 4),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(summ$Groups),
    c(".group_col", ".group", "# rows", "# answer__a", "# answer__b",
      "# answer__c", "# answer__d", "# answer_a", "# answer_b", "# answer_c",
      "# answer_d"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(summ$Groups),
    c("factor", "factor", "integer", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric", "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(summ$Groups),
    c("integer", "integer", "integer", "double", "double", "double",
      "double", "double", "double", "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(summ$Groups),
    c(3L, 11L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(summ$Groups)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'summ$Groups'                                         ####


})

test_that("testing collapse_groups_by_ids()", {

  # Set seed
  xpectr::set_test_seed(42)

  # Create data frame
  df <- data.frame(
    "participant" = factor(rep(1:20, 3)),
    "participant_2" = factor(sample(rep(1:20, 3), 60)),
    "age" = rep(sample(c(1:100), 20), 3),
    "answer" = factor(sample(c("a", "b", "c", "d"), 60, replace = TRUE)),
    "score" = sample(c(1:100), 20 * 3)
  )
  df <- df %>% dplyr::arrange(participant)
  df$session <- rep(c("1", "2", "3"), 20)

  # Sample rows to get unequal sizes per participant
  df <- dplyr::sample_n(df, size = 47)

  # Create the initial groups (to be collapsed)
  df <- fold(
    data = df,
    k = 8,
    method = "n_dist"
  )

  # Ungroup the data frame
  # Otherwise `collapse_groups()` would be
  # applied to each fold separately!
  df <- dplyr::ungroup(df)

  xpectr::set_test_seed(42)
  df_coll <- collapse_groups_by_ids(
    data = df,
    n = 3,
    group_cols = ".folds",
    auto_tune = FALSE,
    id_cols = c("participant", "participant_2")
  )


  ## Testing 'df_coll'                                                      ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(df_coll),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    xpectr::smpl(df_coll[["participant"]], n = 30),
    structure(c(11L, 9L, 9L, 5L, 11L, 6L, 1L, 13L, 20L, 8L, 7L, 3L,
      16L, 4L, 10L, 4L, 13L, 3L, 12L, 2L, 19L, 1L, 15L, 16L, 19L,
      5L, 17L, 14L, 2L, 8L), .Label = c("1", "2", "3", "4", "5", "6",
      "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
      "18", "19", "20"), class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[["participant_2"]], n = 30),
    structure(c(3L, 15L, 15L, 3L, 1L, 18L, 13L, 11L, 2L, 10L, 14L, 17L,
      3L, 18L, 6L, 9L, 14L, 18L, 11L, 17L, 2L, 15L, 1L, 1L, 20L, 16L,
      4L, 13L, 16L, 7L), .Label = c("1", "2", "3", "4", "5", "6",
      "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
      "18", "19", "20"), class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[["age"]], n = 30),
    c(4, 64, 64, 82, 4, 19, 68, 20, 84, 78, 26, 75, 62, 55, 22, 55,
      20, 75, 13, 98, 43, 68, 17, 62, 43, 82, 1, 42, 98, 78),
    tolerance = 1e-4)
  expect_equal(
    xpectr::smpl(df_coll[["answer"]], n = 30),
    structure(c(3L, 3L, 1L, 4L, 3L, 4L, 2L, 4L, 3L, 3L, 3L, 2L, 3L,
      3L, 1L, 2L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 4L, 2L, 4L,
      1L, 4L), .Label = c("a", "b", "c", "d"), class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[["score"]], n = 30),
    c(65, 9, 28, 41, 52, 91, 81, 93, 30, 24, 34, 46, 66, 100, 80, 84,
      70, 47, 29, 44, 50, 15, 68, 72, 12, 71, 94, 35, 8, 43),
    tolerance = 1e-4)
  expect_equal(
    xpectr::smpl(df_coll[["session"]], n = 30),
    c("1", "2", "1", "2", "2", "2", "3", "2", "3", "2", "2", "1", "1",
      "3", "2", "2", "3", "2", "1", "2", "2", "1", "2", "2", "1",
      "1", "1", "1", "1", "3"),
    fixed = TRUE)
  expect_equal(
    xpectr::smpl(df_coll[[".folds"]], n = 30),
    structure(c(4L, 3L, 3L, 6L, 4L, 7L, 5L, 2L, 6L, 1L, 2L, 5L, 8L,
      4L, 7L, 1L, 4L, 5L, 5L, 8L, 7L, 3L, 7L, 2L, 5L, 1L, 3L, 3L,
      6L, 8L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"),
      class = "factor"))
  expect_equal(
    xpectr::smpl(df_coll[[".coll_groups"]], n = 30),
    structure(c(2L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 1L, 3L, 3L, 1L,
      2L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 3L, 2L, 3L, 3L, 1L, 3L, 3L,
      2L, 1L), .Label = c("1", "2", "3"), class = "factor"))
  # Testing column names
  expect_equal(
    names(df_coll),
    c("participant", "participant_2", "age", "answer", "score", "session",
      ".folds", ".coll_groups"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(df_coll),
    c("factor", "factor", "integer", "factor", "integer", "character",
      "factor", "factor"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(df_coll),
    c("integer", "integer", "integer", "integer", "integer", "character",
      "integer", "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(df_coll),
    c(47L, 8L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(df_coll)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'df_coll'                                             ####

  summ <- summarize_balances(df_coll,
                             group_cols = ".coll_groups",
                             id_cols = c("participant", "participant_2"))


  ## Testing 'summ$Groups'                                                  ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(summ$Groups),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    summ$Groups[[".group_col"]],
    structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
  expect_equal(
    summ$Groups[[".group"]],
    structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
  expect_equal(
    summ$Groups[["# rows"]],
    c(11, 18, 18),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# participant"]],
    c(10, 12, 12),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# participant_2"]],
    c(10, 12, 12),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(summ$Groups),
    c(".group_col", ".group", "# rows", "# participant", "# participant_2"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(summ$Groups),
    c("factor", "factor", "integer", "integer", "integer"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(summ$Groups),
    c("integer", "integer", "integer", "integer", "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(summ$Groups),
    c(3L, 5L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(summ$Groups)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'summ$Groups'                                         ####

  # Method descending

  xpectr::set_test_seed(42)
  df_coll <- collapse_groups_by_ids(
    data = df,
    n = 3,
    group_cols = ".folds",
    auto_tune = FALSE,
    id_cols = c("participant", "participant_2"),
    method = "descending"
  )

  summ <- summarize_balances(df_coll,
                             group_cols = ".coll_groups",
                             id_cols = c("participant", "participant_2"))


  ## Testing 'summ$Groups'                                                  ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(summ$Groups),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    summ$Groups[[".group_col"]],
    structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
  expect_equal(
    summ$Groups[[".group"]],
    structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
  expect_equal(
    summ$Groups[["# rows"]],
    c(18, 18, 11),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# participant"]],
    c(13, 12, 10),
    tolerance = 1e-4)
  expect_equal(
    summ$Groups[["# participant_2"]],
    c(12, 14, 7),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(summ$Groups),
    c(".group_col", ".group", "# rows", "# participant", "# participant_2"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(summ$Groups),
    c("factor", "factor", "integer", "integer", "integer"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(summ$Groups),
    c("integer", "integer", "integer", "integer", "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(summ$Groups),
    c(3L, 5L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(summ$Groups)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'summ$Groups'                                         ####


})
LudvigOlsen/R-splitters documentation built on March 7, 2024, 6:59 p.m.