library(groupdata2)
context("collapse_groups_by_*()")
# These tests only test that no errors are thrown, as
# the functionality is tested in collapse_groups()
test_that("testing collapse_groups_by_size()", {
# Set seed
xpectr::set_test_seed(42)
# Create data frame
df <- data.frame(
"participant" = factor(rep(1:20, 3)),
"participant_2" = factor(rep(1:20, 3)),
"age" = rep(sample(c(1:100), 20), 3),
"answer" = factor(sample(c("a", "b", "c", "d"), 60, replace = TRUE)),
"score" = sample(c(1:100), 20 * 3)
)
df <- df %>% dplyr::arrange(participant)
df$session <- rep(c("1", "2", "3"), 20)
# Sample rows to get unequal sizes per participant
df <- dplyr::sample_n(df, size = 15)
# Create the initial groups (to be collapsed)
df <- fold(
data = df,
k = 8,
method = "n_dist"
)
# Ungroup the data frame
# Otherwise `collapse_groups()` would be
# applied to each fold separately!
df <- dplyr::ungroup(df)
xpectr::set_test_seed(42)
df_coll <- collapse_groups_by_size(
data = df,
n = 3,
group_cols = ".folds",
auto_tune = FALSE
)
## Testing 'df_coll' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(df_coll),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
df_coll[["participant"]],
structure(c(3L, 2L, 9L, 15L, 14L, 19L, 4L, 17L, 6L, 3L, 12L, 18L,
13L, 7L, 11L), .Label = c("1", "2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18",
"19", "20"), class = "factor"))
expect_equal(
df_coll[["participant_2"]],
structure(c(3L, 2L, 9L, 15L, 14L, 19L, 4L, 17L, 6L, 3L, 12L, 18L,
13L, 7L, 11L), .Label = c("1", "2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18",
"19", "20"), class = "factor"))
expect_equal(
df_coll[["age"]],
c(29, 93, 61, 40, 23, 39, 81, 88, 50, 29, 91, 10, 83, 70, 42),
tolerance = 1e-4)
expect_equal(
df_coll[["answer"]],
structure(c(4L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 3L, 1L, 1L, 1L, 2L,
2L, 2L), .Label = c("a", "b", "c", "d"), class = "factor"))
expect_equal(
df_coll[["score"]],
c(34, 18, 13, 40, 45, 33, 63, 90, 54, 17, 77, 43, 52, 22, 83),
tolerance = 1e-4)
expect_equal(
df_coll[["session"]],
c("3", "2", "3", "3", "3", "3", "1", "3", "1", "2", "3", "1", "2",
"1", "2"),
fixed = TRUE)
expect_equal(
df_coll[[".folds"]],
structure(c(7L, 2L, 1L, 2L, 5L, 6L, 3L, 5L, 8L, 3L, 7L, 4L, 6L,
8L, 4L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"),
class = "factor"))
expect_equal(
df_coll[[".coll_groups"]],
structure(c(2L, 1L, 1L, 1L, 2L, 1L, 3L, 2L, 3L, 3L, 2L, 2L, 1L,
3L, 2L), .Label = c("1", "2", "3"), class = "factor"))
# Testing column names
expect_equal(
names(df_coll),
c("participant", "participant_2", "age", "answer", "score", "session",
".folds", ".coll_groups"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(df_coll),
c("factor", "factor", "integer", "factor", "integer", "character",
"factor", "factor"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(df_coll),
c("integer", "integer", "integer", "integer", "integer", "character",
"integer", "integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(df_coll),
c(15L, 8L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(df_coll)),
character(0),
fixed = TRUE)
## Finished testing 'df_coll' ####
summ <- df_coll %>%
summarize_balances(group_cols = ".coll_groups")
## Testing 'summ$Groups' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(summ$Groups),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
summ$Groups[[".group_col"]],
structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
expect_equal(
summ$Groups[[".group"]],
structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
expect_equal(
summ$Groups[["# rows"]], # Decent
c(5, 6, 4),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(summ$Groups),
c(".group_col", ".group", "# rows"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(summ$Groups),
c("factor", "factor", "integer"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(summ$Groups),
c("integer", "integer", "integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(summ$Groups),
c(3L, 3L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(summ$Groups)),
character(0),
fixed = TRUE)
## Finished testing 'summ$Groups' ####
# Method ascending
xpectr::set_test_seed(42)
df_coll <- collapse_groups_by_size(
data = df,
n = 3,
group_cols = ".folds",
auto_tune = FALSE,
method = "ascending"
)
summ <- df_coll %>%
summarize_balances(group_cols = ".coll_groups")
## Testing 'summ$Groups' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(summ$Groups),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
summ$Groups[[".group_col"]],
structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
expect_equal(
summ$Groups[[".group"]],
structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
expect_equal(
summ$Groups[["# rows"]],
c(3, 6, 6),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(summ$Groups),
c(".group_col", ".group", "# rows"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(summ$Groups),
c("factor", "factor", "integer"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(summ$Groups),
c("integer", "integer", "integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(summ$Groups),
c(3L, 3L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(summ$Groups)),
character(0),
fixed = TRUE)
## Finished testing 'summ$Groups' ####
})
test_that("testing collapse_groups_by_numeric()", {
# Set seed
xpectr::set_test_seed(42)
# Create data frame
df <- data.frame(
"participant" = factor(rep(1:20, 3)),
"participant_2" = factor(rep(1:20, 3)),
"age" = rep(sample(c(1:100), 20), 3),
"answer" = factor(sample(c("a", "b", "c", "d"), 60, replace = TRUE)),
"score" = sample(c(1:100), 20 * 3)
)
df <- df %>% dplyr::arrange(participant)
df$session <- rep(c("1", "2", "3"), 20)
# Sample rows to get unequal sizes per participant
df <- dplyr::sample_n(df, size = 47)
# Create the initial groups (to be collapsed)
df <- fold(
data = df,
k = 8,
method = "n_dist"
)
# Ungroup the data frame
# Otherwise `collapse_groups()` would be
# applied to each fold separately!
df <- dplyr::ungroup(df)
xpectr::set_test_seed(42)
df_coll <- collapse_groups_by_numeric(
data = df,
n = 3,
group_cols = ".folds",
auto_tune = FALSE,
num_cols = c("age", "score")
)
## Testing 'df_coll' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(df_coll),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
xpectr::smpl(df_coll[["participant"]], n = 30),
structure(c(2L, 15L, 19L, 6L, 18L, 13L, 7L, 12L, 20L, 2L, 16L, 8L,
8L, 7L, 3L, 17L, 4L, 19L, 13L, 4L, 10L, 5L, 14L, 17L, 5L, 6L,
16L, 11L, 20L, 20L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20"), class = "factor"))
expect_equal(
xpectr::smpl(df_coll[["participant_2"]], n = 30),
structure(c(2L, 15L, 19L, 6L, 18L, 13L, 7L, 12L, 20L, 2L, 16L, 8L,
8L, 7L, 3L, 17L, 4L, 19L, 13L, 4L, 10L, 5L, 14L, 17L, 5L, 6L,
16L, 11L, 20L, 20L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20"), class = "factor"))
expect_equal(
xpectr::smpl(df_coll[["age"]], n = 30),
c(93, 40, 39, 50, 10, 83, 70, 91, 46, 93, 80, 13, 13, 70, 29, 88,
81, 39, 83, 81, 65, 62, 23, 88, 62, 50, 80, 42, 46, 46),
tolerance = 1e-4)
expect_equal(
xpectr::smpl(df_coll[["answer"]], n = 30),
structure(c(2L, 1L, 3L, 3L, 1L, 2L, 2L, 2L, 1L, 4L, 3L, 4L, 3L,
4L, 4L, 1L, 4L, 2L, 1L, 3L, 3L, 1L, 4L, 3L, 2L, 1L, 3L, 3L,
3L, 3L), .Label = c("a", "b", "c", "d"), class = "factor"))
expect_equal(
xpectr::smpl(df_coll[["score"]], n = 30),
c(18, 40, 33, 54, 43, 52, 22, 58, 26, 25, 27, 9, 95, 55, 36, 91,
30, 38, 67, 69, 89, 75, 31, 71, 72, 60, 35, 61, 86, 51),
tolerance = 1e-4)
expect_equal(
xpectr::smpl(df_coll[["session"]], n = 30),
c("2", "3", "3", "1", "1", "2", "1", "2", "3", "3", "3", "1", "2",
"2", "1", "1", "2", "2", "3", "3", "2", "1", "2", "2", "2",
"3", "2", "1", "2", "1"),
fixed = TRUE)
expect_equal(
xpectr::smpl(df_coll[[".folds"]], n = 30),
structure(c(2L, 2L, 2L, 7L, 5L, 5L, 6L, 5L, 3L, 8L, 8L, 8L, 6L,
3L, 2L, 1L, 2L, 2L, 5L, 8L, 5L, 3L, 7L, 6L, 1L, 4L, 8L, 3L,
6L, 4L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"),
class = "factor"))
expect_equal(
xpectr::smpl(df_coll[[".coll_groups"]], n = 30),
structure(c(1L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L,
1L, 1L, 3L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 2L, 3L, 2L, 2L, 1L,
2L, 2L), .Label = c("1", "2", "3"), class = "factor"))
# Testing column names
expect_equal(
names(df_coll),
c("participant", "participant_2", "age", "answer", "score", "session",
".folds", ".coll_groups"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(df_coll),
c("factor", "factor", "integer", "factor", "integer", "character",
"factor", "factor"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(df_coll),
c("integer", "integer", "integer", "integer", "integer", "character",
"integer", "integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(df_coll),
c(47L, 8L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(df_coll)),
character(0),
fixed = TRUE)
## Finished testing 'df_coll' ####
summ <- summarize_balances(df_coll, group_cols = ".coll_groups", num_cols = c("age", "score"))
## Testing 'summ$Groups' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(summ$Groups),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
summ$Groups[[".group_col"]],
structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
expect_equal(
summ$Groups[[".group"]],
structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
expect_equal(
summ$Groups[["# rows"]],
c(18, 18, 11),
tolerance = 1e-4)
expect_equal(
summ$Groups[["mean(age)"]],
c(59.83333, 59.5, 60.36364),
tolerance = 1e-4)
expect_equal(
summ$Groups[["sum(age)"]],
c(1077, 1071, 664),
tolerance = 1e-4)
expect_equal(
summ$Groups[["mean(score)"]],
c(45.72222, 49.11111, 58.36364),
tolerance = 1e-4)
expect_equal(
summ$Groups[["sum(score)"]],
c(823, 884, 642),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(summ$Groups),
c(".group_col", ".group", "# rows", "mean(age)", "sum(age)", "mean(score)",
"sum(score)"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(summ$Groups),
c("factor", "factor", "integer", "numeric", "integer", "numeric",
"integer"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(summ$Groups),
c("integer", "integer", "integer", "double", "integer", "double",
"integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(summ$Groups),
c(3L, 7L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(summ$Groups)),
character(0),
fixed = TRUE)
## Finished testing 'summ$Groups' ####
xpectr::set_test_seed(42)
df_coll <- collapse_groups_by_numeric(
data = df,
n = 3,
group_cols = ".folds",
auto_tune = FALSE,
num_cols = c("age", "score"),
group_aggregation_fn = sum,
balance_size = FALSE,
col_name = ".lol_coups"
)
## Testing 'df_coll' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(df_coll),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
xpectr::smpl(df_coll[["participant"]], n = 30),
structure(c(2L, 15L, 19L, 6L, 18L, 13L, 7L, 12L, 20L, 2L, 16L, 8L,
8L, 7L, 3L, 17L, 4L, 19L, 13L, 4L, 10L, 5L, 14L, 17L, 5L, 6L,
16L, 11L, 20L, 20L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20"), class = "factor"))
expect_equal(
xpectr::smpl(df_coll[["participant_2"]], n = 30),
structure(c(2L, 15L, 19L, 6L, 18L, 13L, 7L, 12L, 20L, 2L, 16L, 8L,
8L, 7L, 3L, 17L, 4L, 19L, 13L, 4L, 10L, 5L, 14L, 17L, 5L, 6L,
16L, 11L, 20L, 20L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20"), class = "factor"))
expect_equal(
xpectr::smpl(df_coll[["age"]], n = 30),
c(93, 40, 39, 50, 10, 83, 70, 91, 46, 93, 80, 13, 13, 70, 29, 88,
81, 39, 83, 81, 65, 62, 23, 88, 62, 50, 80, 42, 46, 46),
tolerance = 1e-4)
expect_equal(
xpectr::smpl(df_coll[["answer"]], n = 30),
structure(c(2L, 1L, 3L, 3L, 1L, 2L, 2L, 2L, 1L, 4L, 3L, 4L, 3L,
4L, 4L, 1L, 4L, 2L, 1L, 3L, 3L, 1L, 4L, 3L, 2L, 1L, 3L, 3L,
3L, 3L), .Label = c("a", "b", "c", "d"), class = "factor"))
expect_equal(
xpectr::smpl(df_coll[["score"]], n = 30),
c(18, 40, 33, 54, 43, 52, 22, 58, 26, 25, 27, 9, 95, 55, 36, 91,
30, 38, 67, 69, 89, 75, 31, 71, 72, 60, 35, 61, 86, 51),
tolerance = 1e-4)
expect_equal(
xpectr::smpl(df_coll[["session"]], n = 30),
c("2", "3", "3", "1", "1", "2", "1", "2", "3", "3", "3", "1", "2",
"2", "1", "1", "2", "2", "3", "3", "2", "1", "2", "2", "2",
"3", "2", "1", "2", "1"),
fixed = TRUE)
expect_equal(
xpectr::smpl(df_coll[[".folds"]], n = 30),
structure(c(2L, 2L, 2L, 7L, 5L, 5L, 6L, 5L, 3L, 8L, 8L, 8L, 6L,
3L, 2L, 1L, 2L, 2L, 5L, 8L, 5L, 3L, 7L, 6L, 1L, 4L, 8L, 3L,
6L, 4L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"),
class = "factor"))
expect_equal(
xpectr::smpl(df_coll[[".lol_coups"]], n = 30),
structure(c(1L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 2L, 3L, 2L, 1L, 2L,
2L, 2L), .Label = c("1", "2", "3"), class = "factor"))
# Testing column names
expect_equal(
names(df_coll),
c("participant", "participant_2", "age", "answer", "score", "session",
".folds", ".lol_coups"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(df_coll),
c("factor", "factor", "integer", "factor", "integer", "character",
"factor", "factor"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(df_coll),
c("integer", "integer", "integer", "integer", "integer", "character",
"integer", "integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(df_coll),
c(47L, 8L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(df_coll)),
character(0),
fixed = TRUE)
## Finished testing 'df_coll' ####
summ <- summarize_balances(
df_coll, group_cols = ".lol_coups", num_cols = c("age", "score"))
## Testing 'summ$Groups' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(summ$Groups),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
summ$Groups[[".group_col"]],
structure(c(1L, 1L, 1L), .Label = ".lol_coups", class = "factor"))
expect_equal(
summ$Groups[[".group"]],
structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
expect_equal(
summ$Groups[["# rows"]],
c(18, 18, 11),
tolerance = 1e-4)
expect_equal(
summ$Groups[["mean(age)"]],
c(62.83333, 56.5, 60.36364),
tolerance = 1e-4)
expect_equal(
summ$Groups[["sum(age)"]],
c(1131, 1017, 664),
tolerance = 1e-4)
expect_equal(
summ$Groups[["mean(score)"]],
c(44.72222, 50.11111, 58.36364),
tolerance = 1e-4)
expect_equal(
summ$Groups[["sum(score)"]],
c(805, 902, 642),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(summ$Groups),
c(".group_col", ".group", "# rows", "mean(age)", "sum(age)", "mean(score)",
"sum(score)"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(summ$Groups),
c("factor", "factor", "integer", "numeric", "integer", "numeric",
"integer"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(summ$Groups),
c("integer", "integer", "integer", "double", "integer", "double",
"integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(summ$Groups),
c(3L, 7L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(summ$Groups)),
character(0),
fixed = TRUE)
## Finished testing 'summ$Groups' ####
# Method ascending
xpectr::set_test_seed(42)
df_coll <- collapse_groups_by_numeric(
data = df,
n = 3,
group_cols = ".folds",
auto_tune = FALSE,
num_cols = c("age", "score"),
method = "ascending"
)
summ <- summarize_balances(
df_coll, group_cols = ".coll_groups", num_cols = c("age", "score"))
## Testing 'summ$Groups' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(summ$Groups),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
summ$Groups[[".group_col"]],
structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
expect_equal(
summ$Groups[[".group"]],
structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
expect_equal(
summ$Groups[["# rows"]],
c(12, 18, 17),
tolerance = 1e-4)
expect_equal(
summ$Groups[["mean(age)"]],
c(48.83333, 56.5, 71.11765),
tolerance = 1e-4)
expect_equal(
summ$Groups[["sum(age)"]],
c(586, 1017, 1209),
tolerance = 1e-4)
expect_equal(
summ$Groups[["mean(score)"]],
c(38.08333, 50.11111, 58.23529),
tolerance = 1e-4)
expect_equal(
summ$Groups[["sum(score)"]],
c(457, 902, 990),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(summ$Groups),
c(".group_col", ".group", "# rows", "mean(age)", "sum(age)", "mean(score)",
"sum(score)"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(summ$Groups),
c("factor", "factor", "integer", "numeric", "integer", "numeric",
"integer"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(summ$Groups),
c("integer", "integer", "integer", "double", "integer", "double",
"integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(summ$Groups),
c(3L, 7L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(summ$Groups)),
character(0),
fixed = TRUE)
## Finished testing 'summ$Groups' ####
})
test_that("testing collapse_groups_by_levels()", {
# Set seed
xpectr::set_test_seed(42)
# Create data frame
df <- data.frame(
"participant" = factor(rep(1:20, 3)),
"participant_2" = factor(sample(rep(1:20, 3), 60)),
"age" = rep(sample(c(1:100), 20), 3),
"answer" = factor(sample(c("a", "b", "c", "d"), 60, replace = TRUE)),
"answer_2" = factor(sample(c("a", "b", "c", "d"), 60, replace = TRUE)),
"score" = sample(c(1:100), 20 * 3)
)
df <- df %>% dplyr::arrange(participant)
df$session <- rep(c("1", "2", "3"), 20)
# Sample rows to get unequal sizes per participant
df <- dplyr::sample_n(df, size = 47)
# Create the initial groups (to be collapsed)
df <- fold(
data = df,
k = 8,
method = "n_dist"
)
# Ungroup the data frame
# Otherwise `collapse_groups()` would be
# applied to each fold separately!
df <- dplyr::ungroup(df)
xpectr::set_test_seed(42)
df_coll <- collapse_groups_by_levels(
data = df,
n = 3,
group_cols = ".folds",
auto_tune = FALSE,
cat_cols = c("answer", "answer_2")
)
## Testing 'df_coll' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(df_coll),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
xpectr::smpl(df_coll[["participant"]], n = 30),
structure(c(3L, 18L, 20L, 17L, 13L, 8L, 17L, 6L, 5L, 7L, 20L, 1L,
19L, 4L, 2L, 6L, 1L, 11L, 9L, 16L, 3L, 11L, 10L, 14L, 5L, 2L,
4L, 18L, 12L, 12L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20"), class = "factor"))
expect_equal(
xpectr::smpl(df_coll[["participant_2"]], n = 30),
structure(c(18L, 8L, 7L, 4L, 14L, 8L, 11L, 9L, 7L, 5L, 10L, 17L,
20L, 18L, 9L, 18L, 15L, 4L, 12L, 6L, 20L, 1L, 19L, 13L, 3L,
16L, 8L, 6L, 4L, 11L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20"), class = "factor"))
expect_equal(
xpectr::smpl(df_coll[["age"]], n = 30),
c(75, 32, 84, 1, 20, 78, 1, 19, 82, 26, 84, 68, 43, 55, 98, 19,
68, 4, 64, 62, 75, 4, 22, 42, 82, 98, 55, 32, 13, 13),
tolerance = 1e-4)
expect_equal(
xpectr::smpl(df_coll[["answer"]], n = 30),
structure(c(1L, 1L, 4L, 2L, 3L, 1L, 1L, 3L, 3L, 4L, 3L, 3L, 3L,
3L, 2L, 4L, 3L, 4L, 1L, 3L, 3L, 3L, 2L, 4L, 4L, 1L, 3L, 3L,
4L, 1L), .Label = c("a", "b", "c", "d"), class = "factor"))
expect_equal(
xpectr::smpl(df_coll[["answer_2"]], n = 30),
structure(c(3L, 1L, 4L, 1L, 1L, 4L, 4L, 4L, 3L, 4L, 3L, 4L, 1L,
2L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 4L, 1L,
2L, 2L), .Label = c("a", "b", "c", "d"), class = "factor"))
expect_equal(
xpectr::smpl(df_coll[["score"]], n = 30),
c(95, 76, 26, 90, 57, 46, 87, 97, 31, 9, 73, 40, 45, 13, 34, 62,
89, 17, 61, 36, 60, 25, 92, 6, 94, 52, 43, 88, 11, 58),
tolerance = 1e-4)
expect_equal(
xpectr::smpl(df_coll[["session"]], n = 30),
c("2", "3", "2", "1", "3", "1", "2", "1", "3", "3", "1", "2", "1",
"3", "3", "2", "1", "3", "3", "3", "3", "2", "1", "1", "2",
"1", "1", "1", "2", "1"),
fixed = TRUE)
expect_equal(
xpectr::smpl(df_coll[[".folds"]], n = 30),
structure(c(3L, 1L, 4L, 7L, 6L, 6L, 8L, 8L, 7L, 5L, 4L, 4L, 5L,
1L, 7L, 6L, 8L, 7L, 2L, 4L, 6L, 6L, 1L, 4L, 5L, 7L, 1L, 3L,
7L, 2L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"),
class = "factor"))
expect_equal(
xpectr::smpl(df_coll[[".coll_groups"]], n = 30),
structure(c(3L, 1L, 3L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 3L, 3L, 2L,
1L, 1L, 2L, 1L, 1L, 3L, 3L, 2L, 2L, 1L, 3L, 2L, 1L, 1L, 3L,
1L, 3L), .Label = c("1", "2", "3"), class = "factor"))
# Testing column names
expect_equal(
names(df_coll),
c("participant", "participant_2", "age", "answer", "answer_2", "score",
"session", ".folds", ".coll_groups"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(df_coll),
c("factor", "factor", "integer", "factor", "factor", "integer",
"character", "factor", "factor"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(df_coll),
c("integer", "integer", "integer", "integer", "integer", "integer",
"character", "integer", "integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(df_coll),
c(47L, 9L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(df_coll)),
character(0),
fixed = TRUE)
## Finished testing 'df_coll' ####
summ <- summarize_balances(df_coll, group_cols = ".coll_groups", cat_cols = c("answer", "answer_2"))
## Testing 'summ$Groups' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(summ$Groups),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
summ$Groups[[".group_col"]],
structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
expect_equal(
summ$Groups[[".group"]],
structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
expect_equal(
summ$Groups[["# rows"]],
c(17, 12, 18),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer__a"]],
c(8, 4, 6),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer__b"]],
c(3, 3, 3),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer__c"]],
c(2, 2, 5),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer__d"]],
c(4, 3, 4),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer_a"]],
c(4, 2, 4),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer_b"]],
c(3, 1, 5),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer_c"]],
c(8, 6, 5),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer_d"]],
c(2, 3, 4),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(summ$Groups),
c(".group_col", ".group", "# rows", "# answer__a", "# answer__b",
"# answer__c", "# answer__d", "# answer_a", "# answer_b", "# answer_c",
"# answer_d"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(summ$Groups),
c("factor", "factor", "integer", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "numeric", "numeric"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(summ$Groups),
c("integer", "integer", "integer", "double", "double", "double",
"double", "double", "double", "double", "double"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(summ$Groups),
c(3L, 11L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(summ$Groups)),
character(0),
fixed = TRUE)
## Finished testing 'summ$Groups' ####
# Method ascending
xpectr::set_test_seed(42)
df_coll <- collapse_groups_by_levels(
data = df,
n = 3,
group_cols = ".folds",
auto_tune = FALSE,
cat_cols = c("answer", "answer_2"),
method = "ascending"
)
summ <- summarize_balances(df_coll, group_cols = ".coll_groups", cat_cols = c("answer", "answer_2"))
## Testing 'summ$Groups' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(summ$Groups),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
summ$Groups[[".group_col"]],
structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
expect_equal(
summ$Groups[[".group"]],
structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
expect_equal(
summ$Groups[["# rows"]],
c(11, 18, 18),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer__a"]],
c(6, 6, 6),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer__b"]],
c(2, 3, 4),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer__c"]],
c(1, 3, 5),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer__d"]],
c(2, 6, 3),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer_a"]],
c(2, 3, 5),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer_b"]],
c(1, 3, 5),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer_c"]],
c(7, 8, 4),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# answer_d"]],
c(1, 4, 4),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(summ$Groups),
c(".group_col", ".group", "# rows", "# answer__a", "# answer__b",
"# answer__c", "# answer__d", "# answer_a", "# answer_b", "# answer_c",
"# answer_d"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(summ$Groups),
c("factor", "factor", "integer", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric", "numeric", "numeric"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(summ$Groups),
c("integer", "integer", "integer", "double", "double", "double",
"double", "double", "double", "double", "double"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(summ$Groups),
c(3L, 11L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(summ$Groups)),
character(0),
fixed = TRUE)
## Finished testing 'summ$Groups' ####
})
test_that("testing collapse_groups_by_ids()", {
# Set seed
xpectr::set_test_seed(42)
# Create data frame
df <- data.frame(
"participant" = factor(rep(1:20, 3)),
"participant_2" = factor(sample(rep(1:20, 3), 60)),
"age" = rep(sample(c(1:100), 20), 3),
"answer" = factor(sample(c("a", "b", "c", "d"), 60, replace = TRUE)),
"score" = sample(c(1:100), 20 * 3)
)
df <- df %>% dplyr::arrange(participant)
df$session <- rep(c("1", "2", "3"), 20)
# Sample rows to get unequal sizes per participant
df <- dplyr::sample_n(df, size = 47)
# Create the initial groups (to be collapsed)
df <- fold(
data = df,
k = 8,
method = "n_dist"
)
# Ungroup the data frame
# Otherwise `collapse_groups()` would be
# applied to each fold separately!
df <- dplyr::ungroup(df)
xpectr::set_test_seed(42)
df_coll <- collapse_groups_by_ids(
data = df,
n = 3,
group_cols = ".folds",
auto_tune = FALSE,
id_cols = c("participant", "participant_2")
)
## Testing 'df_coll' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(df_coll),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
xpectr::smpl(df_coll[["participant"]], n = 30),
structure(c(11L, 9L, 9L, 5L, 11L, 6L, 1L, 13L, 20L, 8L, 7L, 3L,
16L, 4L, 10L, 4L, 13L, 3L, 12L, 2L, 19L, 1L, 15L, 16L, 19L,
5L, 17L, 14L, 2L, 8L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20"), class = "factor"))
expect_equal(
xpectr::smpl(df_coll[["participant_2"]], n = 30),
structure(c(3L, 15L, 15L, 3L, 1L, 18L, 13L, 11L, 2L, 10L, 14L, 17L,
3L, 18L, 6L, 9L, 14L, 18L, 11L, 17L, 2L, 15L, 1L, 1L, 20L, 16L,
4L, 13L, 16L, 7L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20"), class = "factor"))
expect_equal(
xpectr::smpl(df_coll[["age"]], n = 30),
c(4, 64, 64, 82, 4, 19, 68, 20, 84, 78, 26, 75, 62, 55, 22, 55,
20, 75, 13, 98, 43, 68, 17, 62, 43, 82, 1, 42, 98, 78),
tolerance = 1e-4)
expect_equal(
xpectr::smpl(df_coll[["answer"]], n = 30),
structure(c(3L, 3L, 1L, 4L, 3L, 4L, 2L, 4L, 3L, 3L, 3L, 2L, 3L,
3L, 1L, 2L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 4L, 2L, 4L,
1L, 4L), .Label = c("a", "b", "c", "d"), class = "factor"))
expect_equal(
xpectr::smpl(df_coll[["score"]], n = 30),
c(65, 9, 28, 41, 52, 91, 81, 93, 30, 24, 34, 46, 66, 100, 80, 84,
70, 47, 29, 44, 50, 15, 68, 72, 12, 71, 94, 35, 8, 43),
tolerance = 1e-4)
expect_equal(
xpectr::smpl(df_coll[["session"]], n = 30),
c("1", "2", "1", "2", "2", "2", "3", "2", "3", "2", "2", "1", "1",
"3", "2", "2", "3", "2", "1", "2", "2", "1", "2", "2", "1",
"1", "1", "1", "1", "3"),
fixed = TRUE)
expect_equal(
xpectr::smpl(df_coll[[".folds"]], n = 30),
structure(c(4L, 3L, 3L, 6L, 4L, 7L, 5L, 2L, 6L, 1L, 2L, 5L, 8L,
4L, 7L, 1L, 4L, 5L, 5L, 8L, 7L, 3L, 7L, 2L, 5L, 1L, 3L, 3L,
6L, 8L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"),
class = "factor"))
expect_equal(
xpectr::smpl(df_coll[[".coll_groups"]], n = 30),
structure(c(2L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 1L, 3L, 3L, 1L,
2L, 2L, 1L, 2L, 3L, 3L, 1L, 2L, 3L, 2L, 3L, 3L, 1L, 3L, 3L,
2L, 1L), .Label = c("1", "2", "3"), class = "factor"))
# Testing column names
expect_equal(
names(df_coll),
c("participant", "participant_2", "age", "answer", "score", "session",
".folds", ".coll_groups"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(df_coll),
c("factor", "factor", "integer", "factor", "integer", "character",
"factor", "factor"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(df_coll),
c("integer", "integer", "integer", "integer", "integer", "character",
"integer", "integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(df_coll),
c(47L, 8L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(df_coll)),
character(0),
fixed = TRUE)
## Finished testing 'df_coll' ####
summ <- summarize_balances(df_coll,
group_cols = ".coll_groups",
id_cols = c("participant", "participant_2"))
## Testing 'summ$Groups' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(summ$Groups),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
summ$Groups[[".group_col"]],
structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
expect_equal(
summ$Groups[[".group"]],
structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
expect_equal(
summ$Groups[["# rows"]],
c(11, 18, 18),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# participant"]],
c(10, 12, 12),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# participant_2"]],
c(10, 12, 12),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(summ$Groups),
c(".group_col", ".group", "# rows", "# participant", "# participant_2"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(summ$Groups),
c("factor", "factor", "integer", "integer", "integer"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(summ$Groups),
c("integer", "integer", "integer", "integer", "integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(summ$Groups),
c(3L, 5L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(summ$Groups)),
character(0),
fixed = TRUE)
## Finished testing 'summ$Groups' ####
# Method descending
xpectr::set_test_seed(42)
df_coll <- collapse_groups_by_ids(
data = df,
n = 3,
group_cols = ".folds",
auto_tune = FALSE,
id_cols = c("participant", "participant_2"),
method = "descending"
)
summ <- summarize_balances(df_coll,
group_cols = ".coll_groups",
id_cols = c("participant", "participant_2"))
## Testing 'summ$Groups' ####
## Initially generated by xpectr
xpectr::set_test_seed(42)
# Testing class
expect_equal(
class(summ$Groups),
c("tbl_df", "tbl", "data.frame"),
fixed = TRUE)
# Testing column values
expect_equal(
summ$Groups[[".group_col"]],
structure(c(1L, 1L, 1L), .Label = ".coll_groups", class = "factor"))
expect_equal(
summ$Groups[[".group"]],
structure(1:3, .Label = c("1", "2", "3"), class = "factor"))
expect_equal(
summ$Groups[["# rows"]],
c(18, 18, 11),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# participant"]],
c(13, 12, 10),
tolerance = 1e-4)
expect_equal(
summ$Groups[["# participant_2"]],
c(12, 14, 7),
tolerance = 1e-4)
# Testing column names
expect_equal(
names(summ$Groups),
c(".group_col", ".group", "# rows", "# participant", "# participant_2"),
fixed = TRUE)
# Testing column classes
expect_equal(
xpectr::element_classes(summ$Groups),
c("factor", "factor", "integer", "integer", "integer"),
fixed = TRUE)
# Testing column types
expect_equal(
xpectr::element_types(summ$Groups),
c("integer", "integer", "integer", "integer", "integer"),
fixed = TRUE)
# Testing dimensions
expect_equal(
dim(summ$Groups),
c(3L, 5L))
# Testing group keys
expect_equal(
colnames(dplyr::group_keys(summ$Groups)),
character(0),
fixed = TRUE)
## Finished testing 'summ$Groups' ####
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.