sample_groups <- c("A", "B", "A", "B", "C", "C", "A", "A", "D")
test_that("get_partition_indices() works", {
set.seed(0)
outcomes <- c(
"normal", "cancer", "normal", "normal", "cancer", "cancer",
"normal", "normal", "normal", "cancer"
)
expect_equal(
get_partition_indices(outcomes,
training_frac = 0.8,
groups = sample_groups
),
c(1L, 3L, 5L, 6L, 7L, 8L, 9L)
)
set.seed(0)
expect_equal(
get_partition_indices(outcomes,
training_frac = 0.5,
groups = NULL
),
c(1L, 2L, 3L, 5L, 7L)
)
})
check_simple_grouped_partition <- function(groups, train_indices) {
expect_false(any(groups[train_indices] %in% groups[-train_indices]))
expect_false(any(groups[-train_indices] %in% groups[train_indices]))
expect_true(length(train_indices) / length(groups) <= 0.8)
}
test_that("create_grouped_data_partition() works with entire groups in train/test splits", {
set.seed(0)
train_ind <- create_grouped_data_partition(sample_groups, training_frac = 0.8)
expect_equal(train_ind, c(1L, 3L, 5L, 6L, 7L, 8L, 9L))
check_simple_grouped_partition(sample_groups, train_ind)
})
check_custom_grouped_partition <- function(groups, train_indices, group_partitions) {
unique_groups <- unique(groups)
in_train_only <-
setdiff(group_partitions$train, group_partitions$test)
in_test_only <-
setdiff(group_partitions$test, group_partitions$train)
in_both <-
intersect(group_partitions$test, group_partitions$train)
in_neither <-
setdiff(
unique_groups,
union(group_partitions$test, group_partitions$train)
)
train_groups <- groups[train_indices]
test_groups <- groups[-train_indices]
expect_false(any(test_groups %in% in_train_only))
expect_false(any(train_groups %in% in_test_only))
}
test_that("create_grouped_data_partition() works with custom group partitions", {
set.seed(20211102)
sample_groups <- rep.int(c("A", "B", "C", "D"), 3)
group_part <- list(train = c("A"), test = c("A", "B", "C"))
train_ind <- create_grouped_data_partition(sample_groups,
group_partitions = group_part,
training_frac = 0.33
)
expect_equal(train_ind, c(1L, 5L, 12L, 8L))
check_custom_grouped_partition(sample_groups, train_ind, group_part)
set.seed(2019)
sample_groups <- sample(LETTERS[1:8], nrow(otu_mini_bin), replace = TRUE)
group_part <- list(train = c("A", "B"), test = c("C", "D"))
train_ind <- create_grouped_data_partition(sample_groups,
group_partitions = group_part,
training_frac = 0.8
)
check_custom_grouped_partition(sample_groups, train_ind, group_part)
group_part <- list(
train = c("A", "B"),
test = c("A", "B", "C", "D", "E", "F", "G", "H")
)
train_ind <- create_grouped_data_partition(sample_groups,
group_partitions = group_part,
training_frac = 0.2
)
check_custom_grouped_partition(sample_groups, train_ind, group_part)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.