library(testthat)
library(purrr)
library(rsamplestudy)
seed <- 196
seed <- floor(runif(1, min = 0, max = 1000))
cat(paste('Using seed:', seed, '\n\n'))
set.seed(seed)
context("test-ref-quest-split")
set.seed(seed)
# Replication -------------------------------------------------------------
# Rerun tests multiple times
n_replicate_offline <- 20
n_replicate_online <- 5
if (exists('TRAVIS') && exists('NOT_CRAN')) {
# Fast tests on TRAVIS or CRAN
if (TRAVIS) {
n_replicate <- n_replicate_online
} else {
if (!NOT_CRAN) {
# on CRAN
n_replicate <- 1
} else {
# somewhere else
n_replicate <- n_replicate_online
}
}
} else {
# Slow tests offline
n_replicate <- n_replicate_offline
}
replicate(n_replicate, {
# Parameters --------------------------------------------------------------
# Number of sources
n <- 10
# Number of items per source
m <- 20
sources <- as.numeric(sapply(seq(n), function(s){rep(s, m)}))
sources_all <- 1:n
# Sampling properties
# Number of reference and questioned items
k_ref <- 5
k_quest <- 4
# Number of different questioned sources
# n_quest_diff <- sample.int(n - 1, 1) # from 0 to m-2 background sources
n_quest_diff <- sample.int(n - 1 - 1, 1) # guarantee that there is at least one background source
# n_quest_diff <- n - 1 # no background
# Pick out the reference source
s_ref <- sample(sources_all, 1)
s_quest_same <- s_ref
# and the different questioned source(s)
s_quest_diff_candidates <- setdiff(unique(sources), s_ref)
s_quest_diff <- sort(sample_safe(s_quest_diff_candidates, n_quest_diff))
is_background_empty <- isTRUE(all.equal(unique(sources), unique(sort(union(s_ref, s_quest_diff)))))
# Source restriction (new in 0.3)
n_allowed <- floor(n/2)
# Restricted sources
s_ref_allowed <- sort(union(sample(sources_all, n_allowed), s_ref))
s_quest_allowed_same <- sort(union(sample(sources_all, n_allowed), s_quest_same))
s_quest_allowed_diff <- sort(union(sample(sources_all, n_allowed), s_quest_diff))
# Sources which are not in the restricted set
s_ref_forbidden <- sort(setdiff(sources_all, s_ref_allowed))
s_quest_forbidden_same <- sort(setdiff(sources_all, s_quest_allowed_same))
s_quest_forbidden_diff <- sort(setdiff(sources_all, s_quest_allowed_diff))
# Wrong candidates: explicit sources are not allowed
s_ref_allowed_wrong <- sort(setdiff(sample(sources_all, n_allowed), s_ref))
s_quest_allowed_same_wrong <- sort(setdiff(sample(sources_all, n_allowed), s_quest_same))
s_quest_allowed_diff_wrong <- sort(setdiff(sample(sources_all, n_allowed), s_quest_diff))
# Error expectations
# expect only errors which can be expected
# tests still fail on failed assertions (explicitly: class `fatal`)
# expect_error_all_but_fatal <- partial(expect_error, class = c('simpleError', 'serious'))
expect_error_all_but_fatal <- partial(expect_error, class = c('serious', 'assertError', 'simpleError'))
# make_idx_splits: explicit sources -------------------------------------------------------------------
# Source not found
test_that("make_idx_splits: missing reference or questioned source", {
expect_error_all_but_fatal(make_idx_splits(sources, k_ref, k_quest, source_ref = n + 1, source_quest = s_quest_same))
expect_error_all_but_fatal(make_idx_splits(sources, k_ref, k_quest, source_ref = n + 1, source_quest = s_quest_diff))
expect_error_all_but_fatal(make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, source_quest = n + 1))
})
# Same source
test_that("make_idx_splits: quest=ref", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_same))
expect_true(all(unique(sources[splits$idx_reference]) %in% s_ref))
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_quest_same))
})
test_that("make_idx_splits: quest=ref, same_source = TRUE", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, same_source = TRUE))
expect_true(all(unique(sources[splits$idx_reference]) %in% s_ref))
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_ref))
})
# Different sources
test_that("make_idx_splits: reference source selection, different", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff))
expect_equal(unique(sources[splits$idx_reference]), s_ref)
expect_true(all(sources[splits$idx_questioned] %in% s_quest_diff))
})
test_that("make_idx_splits: quest!=ref", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff))
expect_true(unique(sources[splits$idx_reference] == s_ref))
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_quest_diff))
expect_length(intersect(sources[splits$idx_reference], sources[splits$idx_questioned]), 0)
})
test_that("make_idx_splits: quest!=ref, same_source = FALSE", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff, same_source = FALSE))
source_ref_this <- unique(sources[splits$idx_reference])
source_quest_this <- unique(sources[splits$idx_questioned])
expect_true(source_ref_this == s_ref)
expect_true(all(source_quest_this %in% s_quest_diff))
expect_length(intersect(source_ref_this, source_quest_this), 0)
})
test_that("make_idx_splits: quest!=ref, same_source = TRUE (WARNING)", {
# Should complain about overriding
splits <- expect_warning(make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff, same_source = TRUE))
expect_error_all_but_fatal(make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff, same_source = TRUE, strict = TRUE))
source_ref_this <- unique(sources[splits$idx_reference])
source_quest_this <- unique(sources[splits$idx_questioned])
expect_true(source_ref_this == s_ref)
expect_true(all(source_quest_this %in% s_ref))
})
test_that("make_idx_splits: quest==ref, same_source = FALSE (ERROR)", {
expect_s3_class(expect_error_all_but_fatal(
make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, source_quest = s_ref, same_source = FALSE)
), 'serious')
})
# make_idx_splits: with restrictions -------------------------------------------------------------------
# Generic: forbid not allowed explicit sources
test_that("make_idx_splits: forbid restricted explicit sources", {
expect_error_all_but_fatal(make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, source_ref_allowed = s_ref_allowed_wrong))
expect_error_all_but_fatal(make_idx_splits(sources, k_ref, k_quest, source_quest = s_quest_same, source_quest_allowed = s_quest_allowed_same_wrong))
expect_error_all_but_fatal(make_idx_splits(sources, k_ref, k_quest, source_quest = s_quest_diff, source_quest_allowed = s_quest_allowed_diff_wrong))
})
# With specified candidate sources
# Same source
test_that("make_idx_splits: quest=ref, restricted ref", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_same, source_ref_allowed = s_ref_allowed))
expect_true(all(unique(sources[splits$idx_reference]) %in% s_ref))
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_quest_same))
# Check candidate restriction
expect_true(all(unique(sources[splits$idx_reference]) %in% s_ref_allowed))
expect_true(!any(unique(sources[splits$idx_reference]) %in% s_ref_forbidden))
})
test_that("make_idx_splits: quest=ref, restricted quest", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_same, source_quest_allowed = s_quest_allowed_same))
expect_true(all(unique(sources[splits$idx_reference]) %in% s_ref))
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_quest_same))
# Check candidate restriction
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_quest_allowed_same))
expect_true(!any(unique(sources[splits$idx_questioned]) %in% s_quest_forbidden_same))
})
# Different sources
test_that("make_idx_splits: quest!=ref, restricted ref", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest,
source_ref = s_ref,
source_quest = s_quest_diff,
source_ref_allowed = s_ref_allowed))
expect_true(all(unique(sources[splits$idx_reference]) %in% s_ref))
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_quest_diff))
# Check candidate restriction
expect_true(all(unique(sources[splits$idx_reference]) %in% s_ref_allowed))
expect_true(!any(unique(sources[splits$idx_reference]) %in% s_ref_forbidden))
})
test_that("make_idx_splits: quest!=ref, restricted quest", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest,
source_ref = s_ref,
source_quest = s_quest_diff,
source_quest_allowed = s_quest_allowed_diff))
expect_true(all(unique(sources[splits$idx_reference]) %in% s_ref))
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_quest_diff))
# Check candidate restriction
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_quest_allowed_diff))
expect_true(!any(unique(sources[splits$idx_questioned]) %in% s_quest_forbidden_diff))
})
# make_idx_splits: with restrictions, same_source override -------------------------------------------------------------------
test_that("make_idx_splits: quest=ref, restricted quest, same_source = TRUE", {
splits <- expect_silent(
make_idx_splits(sources, k_ref, k_quest,
source_ref = s_ref,
source_quest = s_quest_same,
source_quest_allowed = s_quest_allowed_same,
same_source = TRUE))
expect_true(all(unique(sources[splits$idx_reference]) %in% s_ref))
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_quest_same))
# Check candidate restriction
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_quest_allowed_same))
expect_true(!any(unique(sources[splits$idx_questioned]) %in% s_quest_forbidden_same))
})
test_that("make_idx_splits: quest!=ref, restricted ref, same_source = FALSE", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff, same_source = FALSE, source_ref_allowed = s_ref_allowed))
expect_true(unique(sources[splits$idx_reference] == s_ref))
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_quest_diff))
expect_length(intersect(sources[splits$idx_reference], sources[splits$idx_questioned]), 0)
# Check candidate restriction
expect_true(all(unique(sources[splits$idx_reference]) %in% s_ref_allowed))
expect_true(!any(unique(sources[splits$idx_reference]) %in% s_ref_forbidden))
})
test_that("make_idx_splits: quest!=ref, restricted quest, same_source = FALSE", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff, same_source = FALSE, source_quest_allowed = s_quest_allowed_diff))
expect_true(unique(sources[splits$idx_reference] == s_ref))
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_quest_diff))
expect_length(intersect(sources[splits$idx_reference], sources[splits$idx_questioned]), 0)
# Check candidate restriction
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_quest_allowed_diff))
expect_true(!any(unique(sources[splits$idx_questioned]) %in% s_quest_forbidden_diff))
})
test_that("make_idx_splits: quest!=ref, same_source = TRUE (WARNING)", {
splits <- expect_warning(
make_idx_splits(sources, k_ref, k_quest,
source_ref = s_ref,
source_quest = s_quest_diff,
same_source = TRUE),
regexp = 'Honoring')
expect_true(unique(sources[splits$idx_reference] == s_ref))
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_ref))
})
test_that("make_idx_splits: quest!=ref, restricted ref+quest, same_source = TRUE (WARNING)", {
## Should complain about overriding
splits <- expect_warning(make_idx_splits(sources, k_ref, k_quest,
source_ref = s_ref,
source_quest = s_quest_diff,
same_source = TRUE,
source_ref_allowed = s_ref_allowed,
source_quest_allowed = s_quest_allowed_diff),
regexp = 'Honoring')
expect_true(unique(sources[splits$idx_reference] == s_ref))
expect_true(all(unique(sources[splits$idx_questioned]) %in% s_ref))
})
test_that("make_idx_splits: quest!=ref, ref free, restricted ref+quest, same_source = TRUE (WARNING)", {
## Should complain about overriding
splits <- expect_warning(make_idx_splits(sources, k_ref, k_quest,
source_quest = s_quest_diff,
same_source = TRUE,
source_ref_allowed = s_ref_allowed,
source_quest_allowed = s_quest_allowed_diff),
regexp = 'Honoring')
source_ref_this <- unique(sources[splits$idx_reference])
expect_true(unique(sources[splits$idx_reference] == source_ref_this))
expect_true(all(unique(sources[splits$idx_questioned]) %in% source_ref_this))
})
test_that("make_idx_splits: quest, ref free, restricted ref+quest, same_source = FALSE (WARNING if ref=quest)", {
## source_ref (randomly picked) = source_quest: ERROR
splits <- expect_error(make_idx_splits(sources, k_ref, k_quest,
source_quest = s_quest_same, # must be 1D
same_source = FALSE,
# source_ref_allowed = s_ref_allowed,
source_ref_allowed = s_quest_same,
source_quest_allowed = s_quest_same),
class = 'serious')
## source_ref (randomly picked) != source_quest: just a message
splits <- expect_message(make_idx_splits(sources, k_ref, k_quest,
source_quest = s_quest_diff, # must be >1D
same_source = FALSE,
# source_ref_allowed = s_ref_allowed,
source_ref_allowed = s_quest_same,
source_quest_allowed = s_quest_diff),
regexp = 'Honoring')
source_ref_this <- unique(sources[splits$idx_reference])
source_quest_this <- unique(sources[splits$idx_questioned])
expect_true(!identical(source_ref_this, source_quest_this))
})
# make_idx_splits: with restrictions, no explicit sources, only same_source ------------------------------------
# With specified candidate sources
test_that("make_idx_splits: restricted ref, same_source = TRUE", {
splits <- make_idx_splits(sources, k_ref, k_quest, source_ref_allowed = s_ref_allowed, same_source = TRUE)
source_ref_this <- unique(sources[splits$idx_reference])
source_quest_this <- unique(sources[splits$idx_questioned])
expect_equal(source_ref_this, source_quest_this)
# Check candidate restriction
expect_true(all(source_ref_this %in% s_ref_allowed))
expect_true(!any(source_ref_this %in% s_ref_forbidden))
})
test_that("make_idx_splits: restricted ref, same_source = FALSE", {
splits <- make_idx_splits(sources, k_ref, k_quest, source_ref_allowed = s_ref_allowed, same_source = FALSE)
source_ref_this <- unique(sources[splits$idx_reference])
source_quest_this <- unique(sources[splits$idx_questioned])
expect_true(!any(source_ref_this %in% source_quest_this))
# Check candidate restriction
expect_true(all(source_ref_this %in% s_ref_allowed))
expect_true(!any(source_ref_this %in% s_ref_forbidden))
})
test_that("make_idx_splits: restricted quest, same_source = FALSE", {
splits <- make_idx_splits(sources, k_ref, k_quest, source_quest_allowed = s_quest_allowed_diff, same_source = FALSE)
source_ref_this <- unique(sources[splits$idx_reference])
source_quest_this <- unique(sources[splits$idx_questioned])
expect_true(!any(source_ref_this %in% source_quest_this))
# Check candidate restriction
expect_true(all(source_quest_this %in% s_quest_allowed_diff))
expect_true(!any(source_quest_this %in% s_quest_forbidden_diff))
})
test_that("make_idx_splits: restricted quest, same_source = TRUE", {
splits <- make_idx_splits(sources, k_ref, k_quest, source_quest_allowed = s_quest_allowed_diff, same_source = TRUE)
source_ref_this <- unique(sources[splits$idx_reference])
source_quest_this <- unique(sources[splits$idx_questioned])
expect_identical(source_ref_this, source_quest_this)
# Check candidate restriction
# these may fail as questioned restriction is not honored
# expect_true(all(source_quest_this %in% s_quest_allowed_diff))
# expect_true(!any(source_quest_this %in% s_quest_forbidden_diff))
})
# make_idx_splits: Sample with replacement -----------------------------------------------------
test_that("make_idx_splits: quest=ref, not enough samples, replace", {
# Not enough reference items
expect_error_all_but_fatal(make_idx_splits(sources, m + 1, k_quest, source_ref = s_ref, source_quest = s_quest_same, replace = FALSE))
expect_message(make_idx_splits(sources, m + 1, k_quest, source_ref = s_ref, source_quest = s_quest_same, replace = TRUE))
expect_message(make_idx_splits(sources, m + 1, k_quest, source_ref = s_ref, source_quest = s_quest_same))
# Not enough questioned items
expect_error_all_but_fatal(make_idx_splits(sources, k_ref, m + 1, source_ref = s_ref, source_quest = s_quest_same, replace = FALSE))
expect_message(make_idx_splits(sources, k_ref, m + 1, source_ref = s_ref, source_quest = s_quest_same, replace = TRUE))
expect_message(make_idx_splits(sources, k_ref, m + 1, source_ref = s_ref, source_quest = s_quest_same))
# quest=ref: one sample is already taken
expect_error_all_but_fatal(make_idx_splits(sources, k_ref, m, source_ref = s_ref, source_quest = s_quest_same, replace = FALSE))
expect_message(make_idx_splits(sources, k_ref, m, source_ref = s_ref, source_quest = s_quest_same, replace = TRUE))
expect_message(make_idx_splits(sources, k_ref, m, source_ref = s_ref, source_quest = s_quest_same))
})
test_that("make_idx_splits: quest!=ref, not enough samples, replace", {
# Not enough reference items
expect_error_all_but_fatal(make_idx_splits(sources, m + 1, k_quest, source_ref = s_ref, source_quest = s_quest_diff, replace = FALSE))
expect_message(make_idx_splits(sources, m + 1, k_quest, source_ref = s_ref, source_quest = s_quest_diff, replace = TRUE))
expect_message(make_idx_splits(sources, m + 1, k_quest, source_ref = s_ref, source_quest = s_quest_diff))
# Not enough questioned items
expect_error_all_but_fatal(make_idx_splits(sources, k_ref, m*n_quest_diff + 1, source_ref = s_ref, source_quest = s_quest_diff, replace = FALSE))
expect_message(make_idx_splits(sources, k_ref, m*n_quest_diff + 1, source_ref = s_ref, source_quest = s_quest_diff, replace = TRUE))
expect_message(make_idx_splits(sources, k_ref, m*n_quest_diff + 1, source_ref = s_ref, source_quest = s_quest_diff))
# quest!=ref: should be silent
expect_silent(make_idx_splits(sources, k_ref, m*n_quest_diff, source_ref = s_ref, source_quest = s_quest_diff, replace = FALSE))
expect_silent(make_idx_splits(sources, k_ref, m*n_quest_diff, source_ref = s_ref, source_quest = s_quest_diff, replace = TRUE))
expect_silent(make_idx_splits(sources, k_ref, m*n_quest_diff, source_ref = s_ref, source_quest = s_quest_diff))
})
# make_idx_splits: Sample intersections -----------------------------------------------------
test_that("make_idx_splits: verify that background is non-intersecting, outside", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest, background = 'outside'))
expect_length(intersect(splits$idx_reference, splits$idx_questioned), 0)
expect_length(intersect(splits$idx_reference, splits$idx_background), 0)
expect_length(intersect(splits$idx_questioned, splits$idx_background), 0)
})
# background: others
test_that("make_idx_splits: others (sampling from other sources), no background (WARNING)", {
expect_warning(make_idx_splits(sources, k_ref, k_quest, background = 'others'))
})
if (!is_background_empty) {
test_that("make_idx_splits: verify that background is non-intersecting, others (sampling from other sources), with background", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest, background = 'others', source_ref = s_ref, source_quest = s_quest_diff))
expect_length(intersect(splits$idx_reference, splits$idx_questioned), 0)
expect_length(intersect(splits$idx_reference, splits$idx_background), 0)
expect_length(intersect(splits$idx_questioned, splits$idx_background), 0)
source_ref <- unique(sources[splits$idx_reference])
source_quest <- unique(sources[splits$idx_questioned])
source_background <- unique(sources[splits$idx_background])
expect_length(intersect(source_ref, source_background), 0)
expect_length(intersect(source_quest, source_background), 0)
})
}
# background: unobserved
test_that("make_idx_splits: verify that background is non-intersecting, unobserved", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest, background = 'unobserved'))
expect_length(intersect(splits$idx_reference, splits$idx_questioned), 0)
expect_length(intersect(splits$idx_reference, splits$idx_background), 0)
expect_length(intersect(splits$idx_questioned, splits$idx_background), 0)
})
if (!is_background_empty) {
test_that("make_idx_splits: verify that background is non-intersecting, unobserved (sampling from unseen sources), with actual background", {
splits <- expect_silent(make_idx_splits(sources, k_ref, k_quest, background = 'unobserved', source_ref = s_ref))
expect_length(intersect(splits$idx_reference, splits$idx_questioned), 0)
expect_length(intersect(splits$idx_reference, splits$idx_background), 0)
expect_length(intersect(splits$idx_questioned, splits$idx_background), 0)
source_ref <- unique(sources[splits$idx_reference])
source_background <- unique(sources[splits$idx_background])
source_quest_actual <- unique(sources[splits$idx_questioned])
source_quest_potential <- setdiff(sources_all, s_ref)
source_quest_in_background <- setdiff(source_quest_potential, source_quest_actual)
expect_length(intersect(source_ref, source_background), 0)
expect_length(intersect(source_ref, source_quest_actual), 0)
expect_length(intersect(source_quest_actual, source_background), 0)
expect_gte(length(intersect(source_quest_in_background, source_background)), 0)
})
}
# make_dataset_splits: df tests ----------------------------------------------------------------
df <- data.frame(source = sources, x = rnorm(length(sources)))
df_item <- data.frame(item = sources, x = rnorm(length(sources)))
# Source not found
test_that("make_idx_splits: missing reference or questioned source", {
expect_error_all_but_fatal(make_dataset_splits(df, k_ref, k_quest, source_ref = n + 1, source_quest = s_quest_same))
expect_error_all_but_fatal(make_dataset_splits(df, k_ref, k_quest, source_ref = n + 1, source_quest = s_quest_diff))
expect_error_all_but_fatal(make_dataset_splits(df, k_ref, k_quest, source_ref = s_ref, source_quest = n + 1))
})
test_that("make_dataset_splits: quest=ref", {
splits <- make_dataset_splits(df, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_same)
expect_true(all(unique(splits$df_reference$source) %in% s_ref))
expect_true(all(unique(splits$df_questioned$source) %in% s_quest_same))
})
test_that("make_dataset_splits: quest~=ref", {
splits <- make_dataset_splits(df, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff)
expect_true(unique(splits$df_reference$source) == s_ref)
expect_true(all(unique(splits$df_questioned$source) %in% s_quest_diff))
expect_length(intersect(splits$df_reference$source, splits$df_questioned$source), 0)
})
test_that("make_dataset_splits: quest~=ref, renamed column", {
expect_error_all_but_fatal(make_dataset_splits(df, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff, col_source = 'item'))
expect_error_all_but_fatal(make_dataset_splits(df_item, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff))
col_source <- 'item'
splits <- expect_silent(make_dataset_splits(df_item, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff, col_source = 'item'))
expect_true(unique(splits$df_reference[[col_source]]) == s_ref)
expect_true(all(unique(splits$df_questioned[[col_source]]) %in% s_quest_diff))
expect_length(intersect(splits$df_reference[[col_source]], splits$df_questioned[[col_source]]), 0)
})
# make_dataset_splits: idx tests -------------------------------------------------------------------
# Same source
test_that("make_dataset_splits: reference source selection, same", {
splits <- expect_silent(make_dataset_splits(df, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_same))
expect_equal(unique(df$source[splits$idx_reference]), s_ref)
expect_equal(unique(df$source[splits$idx_questioned]), s_quest_same)
})
test_that("make_dataset_splits: quest=ref", {
splits <- expect_silent(make_dataset_splits(df, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_same))
expect_true(all(unique(df$source[splits$idx_reference]) %in% s_ref))
expect_true(all(unique(df$source[splits$idx_questioned]) %in% s_quest_same))
})
test_that("make_dataset_splits: quest=ref, same_source = TRUE", {
splits <- expect_silent(make_dataset_splits(df, k_ref, k_quest, source_ref = s_ref, same_source = TRUE))
expect_true(all(unique(df$source[splits$idx_reference]) %in% s_ref))
expect_true(all(unique(df$source[splits$idx_questioned]) %in% s_ref))
})
# Different sources
test_that("make_dataset_splits: reference source selection, different", {
splits <- expect_silent(make_dataset_splits(df, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff))
expect_equal(unique(df$source[splits$idx_reference]), s_ref)
expect_true(all(df$source[splits$idx_questioned] %in% s_quest_diff))
})
test_that("make_dataset_splits: quest~=ref", {
splits <- expect_silent(make_dataset_splits(df, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff))
expect_true(unique(df$source[splits$idx_reference] == s_ref))
expect_true(all(unique(df$source[splits$idx_questioned]) %in% s_quest_diff))
expect_length(intersect(df$source[splits$idx_reference], df$source[splits$idx_questioned]), 0)
})
test_that("make_dataset_splits: quest~=ref, same_source = FALSE", {
splits <- expect_silent(make_dataset_splits(df, k_ref, k_quest, source_ref = s_ref, source_quest = s_quest_diff, same_source = FALSE))
expect_true(unique(df$source[splits$idx_reference] == s_ref))
expect_true(all(unique(df$source[splits$idx_questioned]) %in% s_quest_diff))
expect_length(intersect(df$source[splits$idx_reference], df$source[splits$idx_questioned]), 0)
})
# make_dataset_splits: Sample intersections -----------------------------------------------------
test_that("make_dataset_splits: verify that background is non-intersecting, outside", {
splits <- expect_silent(make_dataset_splits(df, k_ref, k_quest, background = 'outside'))
expect_length(intersect(splits$idx_reference, splits$idx_questioned), 0)
expect_length(intersect(splits$idx_reference, splits$idx_background), 0)
expect_length(intersect(splits$idx_questioned, splits$idx_background), 0)
})
test_that("make_dataset_splits: others (sampling from other sources), no background (WARNING)", {
expect_warning(make_dataset_splits(df, k_ref, k_quest, background = 'others'))
})
if (!is_background_empty) {
test_that("make_dataset_splits: verify that background is non-intersecting, others (sampling from other sources), with background", {
splits <- expect_silent(make_dataset_splits(df, k_ref, k_quest, background = 'others', source_ref = s_ref, source_quest = s_quest_diff))
expect_length(intersect(splits$idx_reference, splits$idx_questioned), 0)
expect_length(intersect(splits$idx_reference, splits$idx_background), 0)
expect_length(intersect(splits$idx_questioned, splits$idx_background), 0)
source_ref <- unique(df$source[splits$idx_reference])
source_quest <- unique(df$source[splits$idx_questioned])
source_background <- unique(df$source[splits$idx_background])
expect_length(intersect(source_ref, source_background), 0)
expect_length(intersect(source_quest, source_background), 0)
})
}
# background: unobserved
test_that("make_dataset_splits: verify that background is non-intersecting, unobserved", {
splits <- expect_silent(make_dataset_splits(df, k_ref, k_quest, background = 'unobserved'))
expect_length(intersect(splits$idx_reference, splits$idx_questioned), 0)
expect_length(intersect(splits$idx_reference, splits$idx_background), 0)
expect_length(intersect(splits$idx_questioned, splits$idx_background), 0)
})
if (!is_background_empty) {
test_that("make_dataset_splits: verify that background is non-intersecting, unobserved (sampling from unseen sources), with actual background", {
splits <- expect_silent(make_dataset_splits(df, k_ref, k_quest, background = 'unobserved', source_ref = s_ref))
expect_length(intersect(splits$idx_reference, splits$idx_questioned), 0)
expect_length(intersect(splits$idx_reference, splits$idx_background), 0)
expect_length(intersect(splits$idx_questioned, splits$idx_background), 0)
source_ref <- unique(splits$df_reference$source)
source_background <- unique(splits$df_background$source)
source_quest_actual <- unique(splits$df_questioned$source)
source_quest_potential <- setdiff(sources_all, s_ref)
source_quest_in_background <- setdiff(source_quest_potential, source_quest_actual)
expect_length(intersect(source_ref, source_background), 0)
expect_length(intersect(source_ref, source_quest_actual), 0)
expect_length(intersect(source_quest_actual, source_background), 0)
expect_gte(length(intersect(source_quest_in_background, source_background)), 0)
})
}
# End replicate -----------------------------------------------------------
}) # /Rerun tests multiple times
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.