tests/testthat/test-kld-uncertainty.R

test_that("kld_ci_subsampling has the correct coverage on an easy example", {

    # input parameters
    set.seed(123456)
    n <- 100
    alpha <- 0.4
    KL_true <- kld_gaussian(mu1 = 0, sigma1 = 1, mu2 = 1, sigma2 = 2^2)
    nrep <- 20

    # 1-/2-sample cases
    q <- function(x) dnorm(x, mean =1, sd = 2)
    cov_1s <- numeric(nrep)
    cov_2s <- numeric(nrep)
    cov_1s_se <- numeric(nrep)
    cov_2s_se <- numeric(nrep)
    for (i in 1:nrep) {
        X <- rnorm(n)
        Y <- rnorm(n, mean = 1, sd = 2)
        ci_1s <- kld_ci_subsampling(X, q = q, B = 100, alpha = alpha)$ci
        ci_2s <- kld_ci_subsampling(X, Y = Y, B = 100, alpha = alpha)$ci
        ci_1s_se <- kld_ci_subsampling(X, q = q, B = 100, alpha = alpha, method = "se")$ci
        ci_2s_se <- kld_ci_subsampling(X, Y = Y, B = 100, alpha = alpha, method = "se")$ci
        cov_1s[i] <- ci_1s[1] <= KL_true & ci_1s[2] >= KL_true
        cov_2s[i] <- ci_2s[1] <= KL_true & ci_2s[2] >= KL_true
        cov_1s_se[i] <- ci_1s_se[1] <= KL_true & ci_1s_se[2] >= KL_true
        cov_2s_se[i] <- ci_2s_se[1] <= KL_true & ci_2s_se[2] >= KL_true
    }
    # 20% tolerance since nrep and B are very small
    expect_equal(mean(cov_2s), 1-alpha, tolerance = 0.2)
    expect_equal(mean(cov_1s), 1-alpha, tolerance = 0.2)
    expect_equal(mean(cov_2s_se), 1-alpha, tolerance = 0.2)
    expect_equal(mean(cov_1s_se), 1-alpha, tolerance = 0.2)

})


test_that("kld_ci_subsampling parallelization works", {

    # NOTE: there is still randomness in the test due to different seeds used in
    # the parallel processes. I use a huge value of B and a large tolerance to
    # compensate for this. But it would be better to have a truly reproducible
    # way of handling this

    # input parameters
    set.seed(123456)
    n <- 100
    alpha <- 0.4
    B <- 5000
    X <- rnorm(n)

    # 1-sample case only
    q <- function(x) dnorm(x, mean =1, sd = 2)

    ci_1s_seq <- kld_ci_subsampling(X, q = q, B = B, alpha = alpha, n.cores = 1)$ci
    ci_1s_par <- kld_ci_subsampling(X, q = q, B = B, alpha = alpha, n.cores = 2)$ci

    expect_equal(ci_1s_par,ci_1s_seq, tolerance = 0.05)

})


test_that("kld_ci_subsampling works with mixed data", {

    # input parameters
    set.seed(123456)
    n <- 100
    alpha <- 0.4

    # 2-sample case only
    X <- data.frame(c = rnorm(n),
                    d = rbinom(n, size = 1, prob = 0.5))
    Y <- data.frame(c = rnorm(n, mean = 1, sd = 2),
                    d = rbinom(n, size = 1, prob = 0.3))

    # only check that subsampling runs without errors, no reference value
    expect_no_error(kld_ci_subsampling(X, Y = Y, B = 50, alpha = alpha, estimator = kld_est,
                                       vartype = c("c","d")))

})


test_that("kld_ci_bootstrap has the correct coverage on an easy example", {

    # input parameters
    set.seed(123456)
    n <- 100
    alpha <- 0.4
    KL_true <- kld_gaussian(mu1 = 0, sigma1 = 1, mu2 = 1, sigma2 = 2^2)
    nrep <- 20

    # 2-sample case only
    cov_2s <- numeric(nrep)
    for (i in 1:nrep) {
        X <- rnorm(n)
        Y <- rnorm(n, mean = 1, sd = 2)
        ci_2s <- kld_ci_bootstrap(X, Y, B = 100, alpha = alpha)$ci
        cov_2s[i] <- ci_2s[1] <= KL_true & ci_2s[2] >= KL_true
    }
    expect_equal(mean(cov_2s), 1-alpha, tolerance = 0.1)

})

test_that("include_boot option works", {

    set.seed(123456)

    X <- 1:10
    Y <- 11:20
    B <- 20
    KL_boot <- kld_ci_bootstrap(X, Y, B = B, include.boot = TRUE)
    KL_subs <- kld_ci_subsampling(X, Y, B = B, include.boot = TRUE)

    expect_length(KL_boot$boot, n = B)
    expect_length(KL_subs$boot, n = B)
})


test_that("kld_ci_bootstrap can deal with duplicates", {

    # input parameters
    set.seed(123456)

    X <- rep(0:10,2)
    Y <- X
    KL_se <- kld_ci_bootstrap(X, Y, method = "se")
    KL_q  <- kld_ci_bootstrap(X, Y, method = "quantile")

    expect_equal(KL_se$est, 0)
    expect_equal(KL_q$est, 0)
    expect_false(any(is.na(KL_se$ci)))
    expect_false(any(is.na(KL_q$ci)))

})


test_that("kld_ci_subsampling handles convergence.rate = NULL correctly", {

    set.seed(123456)

    X <- 1:10
    Y <- 11:20
    B <- 20
    KL_rate <- kld_ci_subsampling(X, Y, B = B, convergence.rate = NULL)
    KL_half <- kld_ci_subsampling(X, Y, B = B)

    expect_equal(KL_rate$est, KL_half$est)
    expect_equal(KL_rate$ci, KL_half$ci, tolerance = 0.1)
})

Try the kldest package in your browser

Any scripts or data that you put into this service are public.

kldest documentation built on May 29, 2024, 3 a.m.