Nothing
# ===========================================================================
# Tests for C++ functions
# ===========================================================================
# ---------------------------------------------------------------------------
# checkTemporalOrder
# ---------------------------------------------------------------------------
test_that("checkTemporalOrder detects look-ahead bias", {
# Timestamps: 1-10 for rows
timestamps <- as.numeric(1:10)
train_idx <- 1:5 # timestamps 1-5
test_idx <- 6:10 # timestamps 6-10
# No violations - test is after train
result <- checkTemporalOrder(timestamps, train_idx, test_idx)
expect_false(result$has_violations)
expect_equal(result$n_violations, 0)
expect_equal(result$max_train_time, 5)
})
test_that("checkTemporalOrder finds test data before train data", {
timestamps <- as.numeric(1:10)
train_idx <- 5:10 # timestamps 5-10
test_idx <- 1:4 # timestamps 1-4 (BEFORE train!)
result <- checkTemporalOrder(timestamps, train_idx, test_idx)
expect_true(result$has_violations)
expect_equal(result$n_violations, 4)
expect_equal(sort(result$violation_indices), 1:4)
expect_equal(result$max_train_time, 10)
})
test_that("checkTemporalOrder handles mixed temporal violations", {
timestamps <- as.numeric(c(1, 5, 2, 8, 3, 9, 4, 10, 6, 7))
train_idx <- c(2, 4, 6, 8) # timestamps 5, 8, 9, 10
test_idx <- c(1, 3, 5, 7, 9, 10) # timestamps 1, 2, 3, 4, 6, 7
result <- checkTemporalOrder(timestamps, train_idx, test_idx)
expect_true(result$has_violations)
# All test timestamps < max_train_time (10)
expect_equal(result$n_violations, 6)
expect_equal(result$max_train_time, 10)
})
test_that("checkTemporalOrder handles NA timestamps", {
timestamps <- as.numeric(c(1, 2, NA, 4, 5, NA, 7, 8, 9, 10))
train_idx <- 1:5
test_idx <- 6:10
result <- checkTemporalOrder(timestamps, train_idx, test_idx)
# max_train_time should be 5 (ignoring NA at position 3)
expect_equal(result$max_train_time, 5)
# Violations: positions 7,8,9,10 have times 7,8,9,10 > 5, but position 6 is NA
# So no violations since test times are after train
expect_false(result$has_violations)
})
test_that("checkTemporalOrder with partial look-ahead", {
timestamps <- as.numeric(1:10)
train_idx <- 3:7 # timestamps 3-7
test_idx <- c(1, 2, 8, 9, 10) # 1,2 are before, 8,9,10 are after
result <- checkTemporalOrder(timestamps, train_idx, test_idx)
expect_true(result$has_violations)
expect_equal(result$n_violations, 2) # positions 1 and 2
expect_equal(sort(result$violation_indices), c(1, 2))
expect_equal(result$max_train_time, 7)
})
# ---------------------------------------------------------------------------
# checkGroupOverlap
# ---------------------------------------------------------------------------
test_that("checkGroupOverlap detects no overlap when groups are separate", {
groups <- c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)
train_idx <- c(1, 2, 3, 4, 5, 6) # groups 1 and 2
test_idx <- c(7, 8, 9, 10, 11, 12) # groups 3 and 4
result <- checkGroupOverlap(groups, train_idx, test_idx)
expect_false(result$has_overlap)
expect_equal(result$n_overlapping_groups, 0)
expect_equal(result$n_affected_rows, 0)
})
test_that("checkGroupOverlap detects group overlap", {
groups <- c(1, 1, 1, 2, 2, 2, 1, 1, 3, 3) # Group 1 in both train and test
train_idx <- 1:6 # groups 1, 1, 1, 2, 2, 2
test_idx <- 7:10 # groups 1, 1, 3, 3
result <- checkGroupOverlap(groups, train_idx, test_idx)
expect_true(result$has_overlap)
expect_equal(result$n_overlapping_groups, 1) # group 1
expect_true(1 %in% result$overlapping_groups)
expect_equal(result$n_affected_rows, 2) # rows 7 and 8
expect_equal(sort(result$affected_indices), c(7, 8))
})
test_that("checkGroupOverlap handles multiple overlapping groups", {
groups <- c(1, 2, 3, 4, 1, 2, 5, 6)
train_idx <- 1:4 # groups 1, 2, 3, 4
test_idx <- 5:8 # groups 1, 2, 5, 6
result <- checkGroupOverlap(groups, train_idx, test_idx)
expect_true(result$has_overlap)
expect_equal(result$n_overlapping_groups, 2) # groups 1 and 2
expect_true(all(c(1, 2) %in% result$overlapping_groups))
expect_equal(result$n_affected_rows, 2) # rows 5 and 6
})
test_that("checkGroupOverlap handles complete overlap", {
groups <- rep(1:2, each = 5) # All same groups
train_idx <- 1:5 # group 1s
test_idx <- 6:10 # group 2s... wait, let me fix
# Actually test complete overlap
groups <- rep(1, 10) # All same group
train_idx <- 1:5
test_idx <- 6:10
result <- checkGroupOverlap(groups, train_idx, test_idx)
expect_true(result$has_overlap)
expect_equal(result$n_overlapping_groups, 1)
expect_equal(result$n_affected_rows, 5)
})
# ---------------------------------------------------------------------------
# computeCorrelation
# ---------------------------------------------------------------------------
test_that("computeCorrelation computes Pearson correlation correctly", {
x <- c(1, 2, 3, 4, 5)
y <- c(2, 4, 6, 8, 10) # Perfect positive correlation
result <- computeCorrelation(x, y)
expect_equal(result, 1.0, tolerance = 1e-10)
})
test_that("computeCorrelation handles negative correlation", {
x <- c(1, 2, 3, 4, 5)
y <- c(10, 8, 6, 4, 2) # Perfect negative correlation
result <- computeCorrelation(x, y)
expect_equal(result, -1.0, tolerance = 1e-10)
})
test_that("computeCorrelation handles zero correlation", {
set.seed(42)
x <- rnorm(1000)
y <- rnorm(1000)
result <- computeCorrelation(x, y)
# Should be close to 0 for independent random variables
expect_true(abs(result) < 0.1)
})
test_that("computeCorrelation handles NA values", {
x <- c(1, 2, NA, 4, 5)
y <- c(2, 4, 6, 8, 10)
result <- computeCorrelation(x, y)
# Should compute correlation excluding NA
expect_true(!is.na(result))
expect_equal(result, 1.0, tolerance = 1e-10)
})
test_that("computeCorrelation handles NA in both vectors", {
x <- c(1, NA, 3, 4, 5)
y <- c(2, 4, NA, 8, 10)
result <- computeCorrelation(x, y)
# Should exclude rows where either is NA
expect_true(!is.na(result))
})
test_that("computeCorrelation returns NA for constant vectors", {
x <- c(5, 5, 5, 5, 5) # Zero variance
y <- c(1, 2, 3, 4, 5)
result <- computeCorrelation(x, y)
expect_true(is.na(result))
})
test_that("computeCorrelation returns NA with insufficient data", {
x <- c(1, NA, NA, NA, NA)
y <- c(2, NA, NA, NA, NA)
result <- computeCorrelation(x, y)
expect_true(is.na(result)) # Only 1 complete case
})
test_that("computeCorrelation matches base R cor", {
set.seed(123)
x <- rnorm(100)
y <- x * 2 + rnorm(100, sd = 0.5)
cpp_result <- computeCorrelation(x, y)
r_result <- cor(x, y)
expect_equal(cpp_result, r_result, tolerance = 1e-10)
})
test_that("computeCorrelation matches cor with NAs", {
set.seed(456)
x <- rnorm(100)
y <- x * 2 + rnorm(100, sd = 0.5)
x[sample(100, 10)] <- NA
y[sample(100, 10)] <- NA
cpp_result <- computeCorrelation(x, y)
r_result <- cor(x, y, use = "pairwise.complete.obs")
expect_equal(cpp_result, r_result, tolerance = 1e-10)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.