tests/testthat/test-relativeimportance.R

context("Relative importance analysis")

data(bank, package = "flipExampleData")

X <- structure(list(v1 = c(9, 8,
    7, 8, 9, 9, 5, 7, 5, 7, 8, 6, 5, 6, 9, 8, 5, 7, 7, 4, 6, 9, 9,
    5, 9, 9, 7, 9, 6, 5, 5, 7, 7, 9, 5, 9, 9, 7, 9, 9, 9, 6, 9, 8,
    7, 7, 7, 5, 9, 9, 9, 9, 9, 7, 6, 6, 9, 7, 6, 8, 9, 4, 8, 7, 8,
    8, 6, 9, 9, 6, 7, 9, 4, 7, 9, 8, 9, 5, 8, 8, 9, 4, 4, 6, 6, 9,
    6, 7, 9, 4, 6, 8, 7, 6, 7, 5, 6, 7, 9, 9, 9, 9, 8, 4, 5, 4, 9,
    5, 8, 4, 5, 9, 8, 4, 5, 8, 9, 6, 7, 7, 6, 9, 7, 5, 4, 4, 9, 5,
    9, 8, 5, 9, 9, 9, 8, 9, 7, 7, 7, 9, 5, 5, 8, 5, 7, 8, 7, 9, 7,
    9, 7, 6, 5, 7, 6, 8, 8, 9, 9, 6, 9, 9, 7, 9, 7, 7, 7, 5, 9, 6,
    8, 5, 6, 9, 5, 9, 4, 9, 9, 7, 9, 7, 7, 6, 5, 7, 8, 7, 4, 6, 4,
    4, 9, 6, 7, 6, 6, 5, 7, 6, 9, 9, 4, 7, 4, 7, 9, 7, 4, 9, 6, 6,
    9, 6, 4, 5, 7, 7, 8, 7, 5, 9, 6, 8, 8, 7, 8, 6, 6, 4, 7, 7, 5,
    8, 9, 7, 8, 7, 7, 9, 9, 6, 6, 7, 6, 9, 6, 7, 9, 6, 8, 9, 8, 7,
    6, 9, 6, 6, 9, 9, 9, 9, 7, 7, 9, 7, 9, 6, 8, 9, 9, 9, 7, 6, 6,
    7, 7, 9, 4, 7, 6, 5, 8, 4, 9, 6, 9, 7, 7, 4, 6, 6, 9, 8, 9, 7,
    4, 5, 7, 7, 9, 8, 9, 9, 6, 7, 9, 9, 9, 8, 9, 6, 6, 5, 9, 8, 4,
    6, 4, 9, 8, 7, 7, 9, 7, 6, 4),
    v2 = c(7,
    9, 3, 1, 3, 1, 8, 4, 3, 4, 3, 6, 2, 4, 2, 1, 1, 7, 1, 3, 4, 9,
    2, 2, 6, 4, 4, 2, 1, 7, 2, 4, 9, 3, 9, 4, 3, 1, 5, 2, 4, 6, 4,
    7, 7, 1, 3, 3, 3, 6, 1, 2, 1, 7, 4, 7, 7, 5, 8, 3, 4, 2, 8, 3,
    5, 8, 4, 6, 8, 6, 8, 3, 4, 6, 1, 4, 4, 8, 6, 9, 7, 1, 6, 1, 1,
    4, 5, 9, 5, 4, 7, 4, 6, 4, 4, 2, 3, 6, 1, 1, 9, 6, 4, 6, 3, 3,
    4, 5, 6, 2, 3, 4, 7, 6, 1, 8, 6, 1, 6, 1, 6, 2, 5, 2, 5, 1, 4,
    1, 2, 4, 4, 6, 2, 4, 3, 2, 4, 1, 1, 2, 3, 4, 4, 4, 6, 5, 6, 6,
    6, 3, 2, 3, 5, 4, 1, 5, 4, 5, 1, 1, 1, 2, 1, 4, 3, 6, 1, 4, 1,
    2, 6, 1, 5, 3, 5, 2, 6, 4, 4, 1, 3, 6, 5, 7, 4, 3, 9, 1, 5, 1,
    2, 4, 3, 1, 2, 5, 2, 6, 4, 7, 8, 3, 5, 2, 5, 1, 1, 1, 4, 1, 4,
    6, 3, 3, 2, 6, 6, 4, 1, 4, 1, 3, 7, 2, 1, 4, 3, 9, 3, 3, 5, 5,
    2, 4, 1, 7, 1, 3, 2, 8, 2, 5, 2, 4, 4, 1, 1, 2, 1, 2, 2, 5, 5,
    1, 3, 2, 2, 1, 7, 9, 4, 4, 3, 2, 3, 5, 2, 5, 5, 9, 1, 4, 1, 2,
    6, 6, 1, 4, 1, 4, 9, 2, 1, 1, 3, 6, 2, 1, 2, 7, 3, 4, 2, 6, 4,
    3, 2, 7, 4, 1, 3, 3, 3, 7, 5, 2, 7, 5, 6, 5, 6, 1, 1, 6, 3, 9,
    1, 1, 1, 1, 8, 5, 3, 1, 1, 9, 3),
    v3 = c(7, 9, 9, 8, 1, 1,
    9, 9, 9, 9, 9, 8, 1, 5, 6, 9, 1, 1, 9, 1, 9, 2, 1, 9, 6, 1, 9,
    1, 1, 8, 9, 9, 9, 5, 9, 9, 8, 1, 6, 9, 9, 9, 9, 9, 9, 5, 9, 9,
    9, 7, 1, 9, 7, 1, 9, 1, 9, 6, 9, 8, 9, 9, 6, 7, 6, 8, 9, 9, 9,
    9, 9, 1, 9, 7, 9, 9, 5, 4, 9, 9, 2, 4, 7, 5, 1, 4, 9, 9, 9, 8,
    9, 8, 9, 1, 4, 9, 8, 9, 7, 9, 9, 9, 9, 7, 7, 5, 1, 2, 1, 9, 9,
    9, 9, 9, 9, 9, 9, 7, 9, 9, 9, 1, 9, 1, 9, 1, 7, 9, 9, 9, 2, 9,
    3, 9, 9, 2, 7, 7, 9, 9, 9, 1, 9, 1, 9, 1, 9, 6, 9, 1, 9, 9, 9,
    9, 9, 7, 1, 9, 8, 6, 1, 4, 9, 9, 1, 1, 9, 9, 2, 9, 9, 6, 9, 9,
    9, 5, 9, 9, 1, 9, 9, 1, 9, 1, 9, 1, 8, 9, 3, 9, 9, 8, 9, 2, 1,
    7, 9, 5, 9, 9, 8, 5, 7, 9, 9, 3, 8, 1, 4, 9, 9, 6, 4, 9, 5, 9,
    6, 9, 9, 5, 1, 3, 9, 9, 9, 9, 1, 9, 1, 9, 3, 9, 9, 9, 5, 9, 7,
    1, 1, 9, 1, 9, 7, 9, 9, 1, 9, 3, 9, 9, 9, 1, 1, 7, 1, 9, 9, 9,
    9, 9, 1, 1, 9, 9, 9, 9, 1, 9, 9, 1, 9, 3, 7, 9, 2, 9, 9, 2, 9,
    9, 6, 9, 9, 9, 1, 9, 2, 1, 1, 9, 3, 3, 9, 5, 9, 9, 9, 9, 8, 6,
    1, 8, 7, 1, 9, 1, 9, 1, 9, 9, 1, 1, 9, 9, 9, 9, 5, 9, 1, 7, 8,
    1, 9, 9, 8, 8, 5)),
    .Names = c("v1", "v2", "v3"),
    row.names = c(NA, 327L),
    questiontype = "PickOneMulti",
    question = "Q4. Frequency of drinking", class = "data.frame")

y <- c(3, 7, 3, 3, 9, 9, 8, 5, 10, 7, 7, 9, 9, 4, 10, 3, 4, 8, 5,
       2, 8, 7, 9, 8, 4, 3, 5, 6, 3, 9, 8, 9, 4, 2, 9, 4, 9, 7, 2, 6,
       9, 7, 9, 6, 7, 3, 5, 6, 6, 7, 2, 9, 5, 3, 6, 4, 9, 4, 10, 2,
       5, 6, 2, 7, 2, 4, 10, 5, 3, 5, 5, 2, 4, 6, 7, 8, 6, 9, 9, 10,
       8, 4, 5, 2, 3, 2, 8, 9, 4, 2, 2, 10, 7, 4, 2, 8, 9, 9, 5, 9,
       2, 2, 7, 5, 2, 4, 2, 2, 4, 10, 8, 7, 5, 6, 6, 5, 2, 6, 9, 8,
       8, 5, 3, 6, 3, 5, 4, 10, 3, 2, 2, 10, 4, 2, 8, 6, 9, 8, 9, 9,
       4, 9, 2, 2, 4, 10, 6, 2, 6, 2, 2, 10, 5, 7, 5, 2, 8, 6, 2, 2,
       4, 3, 3, 3, 3, 4, 4, 7, 6, 5, 8, 9, 8, 8, 8, 9, 6, 5, 3, 3, 6,
       2, 5, 9, 6, 5, 6, 3, 3, 3, 9, 3, 9, 3, 2, 2, 7, 4, 6, 9, 2, 10,
       3, 8, 9, 4, 7, 8, 4, 9, 9, 9, 2, 3, 6, 8, 10, 7, 3, 3, 4, 5,
       3, 10, 10, 6, 6, 10, 2, 10, 2, 8, 6, 9, 2, 9, 9, 8, 9, 5, 9,
       3, 9, 2, 5, 3, 10, 6, 7, 8, 9, 5, 2, 3, 6, 8, 6, 5, 6, 8, 9,
       5, 2, 9, 3, 5, 8, 10, 3, 7, 7, 8, 6, 9, 7, 7, 5, 8, 7, 8, 9,
       2, 3, 10, 7, 8, 4, 10, 9, 10, 3, 4, 9, 4, 4, 9, 9, 8, 6, 5, 7,
       9, 5, 6, 5, 3, 8, 6, 7, 5, 8, 2, 3, 9, 5, 8, 8, 8, 5, 3, 4, 4,
       8, 4, 2, 4, 8)

w <- structure(c(1.02849002849003, 0.587708587708588, 0.587708587708588,
                 1.61619861619862, 1.02849002849003, 0.293854293854294, 0.440781440781441,
                 1.46927146927147, 0.440781440781441, 1.02849002849003, 1.02849002849003,
                 1.02849002849003, 0.440781440781441, 0.734635734635735, 0.587708587708588,
                 0.734635734635735, 0.587708587708588, 0.587708587708588, 1.02849002849003,
                 0.293854293854294, 0.734635734635735, 0.293854293854294, 1.02849002849003,
                 0.881562881562882, 0.587708587708588, 0.734635734635735, 0.587708587708588,
                 0.587708587708588, 0.293854293854294, 1.17541717541718, 0.881562881562882,
                 0.734635734635735, 0.734635734635735, 0.587708587708588, 0.734635734635735,
                 0.440781440781441, 0.587708587708588, 1.61619861619862, 0.293854293854294,
                 0.734635734635735, 0.293854293854294, 0.881562881562882, 0.734635734635735,
                 1.32234432234432, 1.61619861619862, 0.881562881562882, 0.734635734635735,
                 0.734635734635735, 1.61619861619862, 0.734635734635735, 0.587708587708588,
                 1.02849002849003, 1.61619861619862, 1.46927146927147, 0.587708587708588,
                 1.61619861619862, 0.587708587708588, 0.587708587708588, 0.293854293854294,
                 0.440781440781441, 0.734635734635735, 1.46927146927147, 0.440781440781441,
                 0.587708587708588, 1.02849002849003, 0.881562881562882, 0.587708587708588,
                 0.146927146927147, 0.587708587708588, 0.587708587708588, 1.17541717541718,
                 0.146927146927147, 0.587708587708588, 0.293854293854294, 0.734635734635735,
                 0.734635734635735, 1.61619861619862, 0.734635734635735, 0.587708587708588,
                 1.61619861619862, 0.734635734635735, 0.734635734635735, 1.61619861619862,
                 0.587708587708588, 0.293854293854294, 0.881562881562882, 0.587708587708588,
                 1.02849002849003, 1.17541717541718, 0.146927146927147, 1.32234432234432,
                 0.587708587708588, 0.734635734635735, 0.881562881562882, 1.61619861619862,
                 0.734635734635735, 0.881562881562882, 0.293854293854294, 0.440781440781441,
                 1.32234432234432, 0.293854293854294, 0.440781440781441, 1.61619861619862,
                 0.734635734635735, 0.293854293854294, 0.734635734635735, 1.61619861619862,
                 1.46927146927147, 1.02849002849003, 0.587708587708588, 0.881562881562882,
                 0.587708587708588, 0.587708587708588, 1.46927146927147, 0.587708587708588,
                 0.293854293854294, 0.440781440781441, 1.32234432234432, 0.734635734635735,
                 0.734635734635735, 1.02849002849003, 1.17541717541718, 1.17541717541718,
                 0.293854293854294, 1.32234432234432, 0.440781440781441, 0.734635734635735,
                 0.293854293854294, 0.734635734635735, 0.734635734635735, 0.734635734635735,
                 0.293854293854294, 1.61619861619862, 1.17541717541718, 1.02849002849003,
                 0.734635734635735, 0.146927146927147, 0.734635734635735, 0.440781440781441,
                 0.440781440781441, 0.587708587708588, 1.17541717541718, 1.32234432234432,
                 0.587708587708588, 1.02849002849003, 0.146927146927147, 0.734635734635735,
                 0.293854293854294, 1.02849002849003, 0.146927146927147, 0.881562881562882,
                 0.587708587708588, 0.881562881562882, 0.734635734635735, 0.293854293854294,
                 0.440781440781441, 0.587708587708588, 0.293854293854294, 0.293854293854294,
                 0.734635734635735, 0.881562881562882, 1.32234432234432, 1.61619861619862,
                 1.61619861619862, 0.881562881562882, 1.02849002849003, 0.440781440781441,
                 0.440781440781441, 1.61619861619862, 0.881562881562882, 1.61619861619862,
                 0.293854293854294, 1.17541717541718, 0.293854293854294, 1.32234432234432,
                 0.440781440781441, 0.293854293854294, 1.17541717541718, 0.881562881562882,
                 0.734635734635735, 0.293854293854294, 1.32234432234432, 0.587708587708588,
                 1.61619861619862, 1.17541717541718, 0.881562881562882, 0.881562881562882,
                 1.61619861619862, 0.881562881562882, 0.734635734635735, 0.440781440781441,
                 0.734635734635735, 0.440781440781441, 0.881562881562882, 0.587708587708588,
                 0.440781440781441, 1.61619861619862, 1.32234432234432, 0.734635734635735,
                 0.734635734635735, 0.146927146927147, 0.587708587708588, 1.17541717541718,
                 0.440781440781441, 0.734635734635735, 0.881562881562882, 1.17541717541718,
                 0.293854293854294, 1.32234432234432, 1.02849002849003, 1.32234432234432,
                 1.61619861619862, 1.02849002849003, 0.440781440781441, 0.881562881562882,
                 1.61619861619862, 0.587708587708588, 1.17541717541718, 0.881562881562882,
                 0.734635734635735, 1.61619861619862, 1.17541717541718, 0.587708587708588,
                 1.61619861619862, 0.293854293854294, 0.440781440781441, 1.02849002849003,
                 1.61619861619862, 0.587708587708588, 0.734635734635735, 0.881562881562882,
                 0.734635734635735, 0.734635734635735, 0.881562881562882, 0.293854293854294,
                 0.146927146927147, 0.734635734635735, 0.587708587708588, 0.734635734635735,
                 0.881562881562882, 0.146927146927147, 0.881562881562882, 1.17541717541718,
                 1.61619861619862, 1.61619861619862, 1.02849002849003, 1.61619861619862,
                 0.734635734635735, 0.440781440781441, 0.881562881562882, 1.17541717541718,
                 0.881562881562882, 0.734635734635735, 0.734635734635735, 0.734635734635735,
                 1.32234432234432, 0.734635734635735, 1.17541717541718, 0.587708587708588,
                 1.61619861619862, 0.587708587708588, 1.32234432234432, 0.734635734635735,
                 0.293854293854294, 0.734635734635735, 0.440781440781441, 0.440781440781441,
                 0.293854293854294, 0.587708587708588, 0.734635734635735, 0.734635734635735,
                 0.881562881562882, 0.734635734635735, 0.734635734635735, 1.61619861619862,
                 0.587708587708588, 0.881562881562882, 0.293854293854294, 1.02849002849003,
                 0.881562881562882, 0.734635734635735, 0.587708587708588, 0.881562881562882,
                 0.440781440781441, 0.734635734635735, 0.734635734635735, 0.881562881562882,
                 1.61619861619862, 0.440781440781441, 0.587708587708588, 0.587708587708588,
                 0.734635734635735, 0.440781440781441, 0.587708587708588, 1.17541717541718,
                 0.734635734635735, 0.293854293854294, 0.734635734635735, 0.734635734635735,
                 0.734635734635735, 0.587708587708588, 1.61619861619862, 0.881562881562882,
                 0.587708587708588, 0.587708587708588, 0.734635734635735, 0.734635734635735,
                 1.02849002849003, 1.17541717541718, 0.734635734635735, 0.881562881562882,
                 0.587708587708588, 0.881562881562882, 0.881562881562882, 0.881562881562882,
                 0.293854293854294, 0.440781440781441, 0.881562881562882, 0.734635734635735,
                 0.881562881562882, 1.02849002849003, 0.587708587708588, 0.587708587708588,
                 0.587708587708588, 0.734635734635735, 0.587708587708588, 0.440781440781441),
               name = "Q32", label = "Q32. Income")

dat <- cbind(y, X)

test_that("Relative importance linear", {
    ria <- flipRegression:::estimateRelativeImportance(y ~ v1 + v2 + v3, data = dat, weights = NULL,
                                                       type = "Linear", signs = c(1, 1 ,1),
                                                       r.square = 0.0409055316886271,
                                                       variable.names = LETTERS[1:3], robust.se = FALSE,
                                                       show.warnings = TRUE, correction = "None")
    expect_equal(unname(ria$importance[3]), 84.254254422183)
    expect_equal(unname(ria$raw.importance[1]), 0.00427583141764991)
    expect_equal(unname(ria$standard.errors[2]), 0.00639974583223708)
    expect_equal(unname(ria$statistics[3]), 1.67709811051115)
    expect_equal(unname(ria$p.values[1]), 0.601689357057393)
})

test_that("Relative importance linear weighted", {
    ria <- flipRegression:::estimateRelativeImportance(y ~ v1 + v2 + v3, data= dat, weights = w,
                                                       type = "Linear", signs = c(1, 1, 1),
                                                       r.square = 0.0488985219292419, variable.names = LETTERS[1:3],
                                                       robust.se = FALSE, outlier.proportion = 0,
                                                       show.warnings = TRUE, correction = "None")
    expect_equal(unname(ria$importance[3]), 80.657438103125)
    expect_equal(unname(ria$raw.importance[1]), 0.00356269285452153)
    expect_equal(unname(ria$standard.errors[2]), 0.00922220001471266)
    expect_equal(unname(ria$statistics[3]), 1.80434200504385)
    expect_equal(unname(ria$p.values[1]), 0.63974959606131)
})

types <- c("Linear", "Binary Logit", "Ordered Logit", "Poisson", "Quasi-Poisson", "NBD")
output <- "Relative Importance Analysis"

data(bank, package = "flipExampleData")
bank <- transform(bank, o2 = Overall >= 4)
o2.formula <- o2 ~ Fees + Interest + Phone + Branch + Online + ATM
bank.formula <- Overall ~ Fees + Interest + Phone + Branch + Online + ATM

for (type in types)
    test_that(paste("Relative importance", type), {
        # Prevent pop-ups
        mockery::stub(print.Regression, "print.htmlwidget", NULL)
        test.formula <- if (type == "Binary Logit") o2.formula else bank.formula
        expect_warning(reg <- Regression(test.formula, data = bank, type = type, output = output),
                       "70% of the data is missing")
        expect_error(suppressWarnings(print(reg)), NA)
})
test_that("Relative importance Multinomial Logit", {
        # Prevent pop-ups
        mockery::stub(print.Regression, "print.htmlwidget", NULL)
        expect_error(suppressWarnings(print(Regression(Overall ~ Fees + Interest + Phone + Branch + Online + ATM,
                     data = bank, type = "Multinomial Logit", output = output))),
                                        "Type not handled:  Multinomial Logit")
})

# Weights
for (type in types)
    test_that(paste("Relative importance weighted", type), {
        # Prevent pop-ups
        mockery::stub(print.Regression, "print.htmlwidget", NULL)
        test.formula <- if (type == "Binary Logit") o2.formula else bank.formula
        expect_warning(reg <- Regression(test.formula, data = bank, type = type, output = output,
                                         weights = bank$weight), "73% of the data is missing")
        expect_error(suppressWarnings(print(reg)), NA)
})
test_that("Relative importance weighted Multinomial Logit", {
    # Prevent pop-ups
    mockery::stub(print.Regression, "print.htmlwidget", NULL)
    expect_error(suppressWarnings(
        print(
            Regression(bank.formula, data = bank, type = "Multinomial Logit", output = output,
                       weights = bank$weight)
            )
        ),
        "Type not handled:  Multinomial Logit"
    )
})

# Filter
test_that("Relative importance filtered", {
    # Prevent pop-ups
    mockery::stub(print.Regression, "print.htmlwidget", NULL)
    expect_error(suppressWarnings(print(Regression(bank.formula, data = bank, type = "Linear", output = output,
                                                   subset = bank$ID < 100))), NA)
})

# Robust standard error
test_that("Relative importance robust SE", {
    # Prevent pop-ups
    mockery::stub(print.Regression, "print.htmlwidget", NULL)
    expect_error(suppressWarnings(print(Regression(bank.formula, data = bank, type = "Linear", output = output,
                                                   robust.se = FALSE))), NA)
})

# Negative sign warning
test_that("Relative importance negative sign",
{
    expect_warning(
        flipRegression:::estimateRelativeImportance(
            formula = y ~ v1 + v2 + v3,
            data = dat,
            weights = NULL,
            type = "Linear",
            signs = c(1, -1, 1),
            r.square = 0.0409055316886271,
            variable.names = LETTERS[1:3],
            correction = "None"
        ),
        paste0("Negative signs in Relative Importance scores were applied from coefficient signs",
               " in Linear Regression. To disable this feature, check the Absolute importance",
               " scores option.")
    )

    res <- Regression(formula = y ~ v1 + v2 + v3, data = dat, output = "Relative Importance Analysis",
                      missing = "Multiple imputation", importance.absolute = TRUE)
    expect_true(all(res$importance$importance > 0))
})

X.factor <- X
X.factor[[1]] <- as.factor(X.factor[[1]])
X.factor[[2]] <- as.factor(X.factor[[2]])
X.factor[[3]] <- as.factor(X.factor[[3]])
dat.factor <- cbind(y, X.factor)

# Factor warning
test_that("Relative importance ordered factor",
    expect_warning(
        flipRegression:::estimateRelativeImportance(
            formula = y ~ v1 + v2 + v3,
            data = dat.factor,
            weights = NULL, type = "Linear",
            signs = c(1, -1 ,1),
            r.square = 0.0409055316886271,
            variable.names = LETTERS[1:3],
            correction = "None"
        ),
        "The following variables have been treated as categorical: v1,v2,v3. This may over-inflate their effects.")
)

test_that("Relative importance robust SE, dot in formula",
{
    bank$ID <- bank$weight <- NULL
    out <- suppressWarnings(Regression(Overall ~ .,
                                               data = bank, type = "Linear", output = output,
                                       robust.se = F))
    expect_equal(attr(out$terms, "term.labels"), names(bank)[-1L])
})

test_that("Shapley",
{
    bank.no.missing <- bank[!is.na(rowSums(bank)), ]
    bank.no.missing$Interest <- -bank.no.missing$Interest # reverse sign to test signs

    warning.msg <- paste0("Negative signs in Relative Importance scores were applied ",
                          "from coefficient signs in Linear Regression. ",
                          "To disable this feature, check the Absolute importance ",
                          "scores option.")

    expect_warning(result <- computeShapleyImportance(Overall ~ Fees + Interest + Phone + Branch + Online + ATM,
                                      data = bank.no.missing,
                                      weights = rep(1, nrow(bank.no.missing)),
                                      variable.names = c("Fees", "Interest", "Phone",
                                                         "Branch", "Online", "ATM"),
                                      signs = NULL,
                                      correction = "None"), warning.msg)

    expect_equal(result$raw.importance[1], c(Fees = 0.115570678291964))
    expect_equal(sum(result$raw.importance), 0.4988654351715)
    expect_equal(result$importance[1], c(Fees = 23.1667039133))
    expect_equal(result$standard.errors[1], c(Fees = 0.0311308198242954))
    expect_equal(result$statistics[1], c(Fees = 3.71242000513488))
    expect_equal(result$p.values[1], c(Fees = 0.000255937393846806))
    expect_equal(result$importance[2], c(Interest = -15.1397373957463))

    expect_warning(result <- computeShapleyImportance(Overall ~ Fees + Interest + Phone + Branch + Online + ATM,
                                                      data = bank.no.missing,
                                                      weights = bank.no.missing$weight,
                                                      variable.names = c("Fees", "Interest", "Phone",
                                                                         "Branch", "Online", "ATM"),
                                                      signs = NULL,
                                                      correction = "None"), warning.msg)
    expect_equal(result$raw.importance[1], c(Fees = 0.1090244509126))
    expect_equal(sum(result$raw.importance), 0.4936505179077)
    expect_equal(result$importance[1], c(Fees = 22.08535126726))
    expect_equal(result$standard.errors[1], c(Fees = 0.0302987299497523))
    expect_equal(result$statistics[1], c(Fees = 3.59831752332123))
    expect_equal(result$p.values[1], c(Fees = 0.000389870122508642))

    # Prevent pop-ups
    mockery::stub(print.Regression, "print.htmlwidget", NULL)
    suppressWarnings(print(Regression(Overall ~ Fees + Interest + Phone + Branch + Online + ATM,
                     data = bank, type = "Linear", output = "Shapley Regression")))

    expect_error(Regression(Overall ~ Fees + Interest + Phone + Branch + Online + ATM,
                            data = bank, type = "Binary Logit", output = "Shapley Regression"),
                 "Shapley requires Regression type to be Linear. Set the output to Relative Importance Analysis instead.")

    many.variables <- matrix(rnorm(3000), ncol = 30)
    colnames(many.variables) <- paste0("v", 1:30)
    many.variables <- data.frame(many.variables)
    frml <- formula(paste("v1", "~", paste0("v", 2:30, collapse = " + ")))
    expect_error(result <- computeShapleyImportance(frml,
                                                   data = many.variables,
                                                   weights = NULL,
                                                   variable.names = paste0("v", 2:30),
                                                   signs = NULL,
                                                   correction = "None"),
        "Shapley can run with a maximum of 27 predictors. Set the output to Relative Importance Analysis instead.")
})

test_that("Dummy variable adjustment valid", {
    # Simulate correlated predictors
    set.seed(12321)
    X <- MASS::mvrnorm(n = 200, mu = rep(0, 3), Sigma = matrix(c(1, 0.2, 0.3, 0.2, 1, 0.3, 0.2, 0.2, 1), ncol = 3))
    beta <- c(0.4, 0.3, 0.25)
    r2 <- 0.30

    Y <- X %*% beta + rnorm(n = 200)

    not.missing.data <- data.frame(Y, X)
    Xm <- X
    missing <- sample(seq_len(nrow(X)), size = 50, replace = FALSE)
    Xm[, 2][missing] <- NA
    missing.data <- data.frame(Y, Xm)
    # Prevent pop-ups
    mockery::stub(print.Regression, "print.htmlwidget", NULL)
    dummy.adj <- "Dummy variable adjustment"
    for (out in c("Relative Importance Analysis", "Shapley Regression"))
    {
        expect_warning(z <- Regression(Y ~ X1 + X2 + X3, data = not.missing.data, output = out),
                       NA)
        expect_warning(print(z), "Unusual observations detected")
        expect_warning(z <- Regression(Y ~ X1 + X2 + X3, data = missing.data, output = out, missing = dummy.adj),
                       NA)
        expect_warning(print(z), "Unusual observations detected")
    }
})


test_that("DS-2876: Jaccard Output", {
    # Basic Jaccard Coefficient tests
    set.seed(123)
    n <- 10
    x <- rbinom(n, size = 1, prob = 0.5)
    y <- rbinom(n, size = 1, prob = 0.5)
    weights <- w <- runif(n)
    p.x <- mean(x); p.y <- mean(y)

    computed.jaccard <- sum(x & y)/sum(x | y)
    weighted.jaccard <- sum(w[x & y])/sum(w[x | y])
    x.missing <- x; x.missing[5] <- NA; y.missing <- y; y.missing[6] <- NA
    px.missing <- mean(x.missing, na.rm = TRUE); py.missing <- mean(y.missing, na.rm = TRUE);

    # Computed Jaccard tests
    computed.missing.jaccard <- sum(x.missing & y.missing, na.rm = TRUE)/sum(x.missing | y.missing, na.rm = TRUE)
    expect_equal(flipRegression:::singleJaccardCoefficient(x, y), computed.jaccard)
    expect_equal(flipRegression:::singleJaccardCoefficient(x.missing, y.missing), computed.missing.jaccard)
    expect_equal(flipRegression:::singleJaccardCoefficient(x, y, weights = weights), weighted.jaccard)
    # Expectation tests
    computed.mean <- p.x * p.y/(p.x + p.y - p.x * p.y)
    expect_equal(flipRegression:::singleJaccardExpectation(x, y), computed.mean)
    centered.computed <- computed.jaccard - computed.mean
    # Centered values tests
    expect_equal(flipRegression:::singleJaccardCoefficient(x, y, centered = TRUE), centered.computed)
    # Larger test
    set.seed(12321)
    n <- 100
    X <- lapply(1:3, function(x) rbinom(n = n, size = 1, prob = 0.5))
    X <- as.matrix(as.data.frame(X))
    beta <- matrix(c(1, 4, 9, 2), ncol = 1)
    # Expect X2 and Y to be the same value, hence small p-value
    Y <- as.numeric(cbind(1, X) %*% beta >= 12)
    dat <- data.frame(Y, X)
    subset <- sample(c(TRUE, FALSE), size = n, replace = TRUE)
    weights <- runif(n)
    names(dat) <- c("Y", paste0("X", 1:3))
    new.names <- c("Super Y", "Oranges", "Apples", "Grapes")
    names.with.prefix <- paste0("Q4:", new.names)
    # Create dataframe with label attributes for questions
    dat.with.names <- as.data.frame(mapply(function(x, y) {
        attr(x, "label") <- y
        x
    }, x = dat, y = new.names, SIMPLIFY = FALSE))
    dat.with.prefix.names <- as.data.frame(mapply(function(x, y) {
        attr(x, "label") <- y
        x
    }, x = dat, y = names.with.prefix, SIMPLIFY = FALSE))
    # Check Importance output correct
    expect_error(model <- Regression(Y ~ X1 + X2 + X3, data = dat,
                                     output = "Jaccard Coefficient", type = "Linear"),
                 NA)
    expect_error(corrected.model <- Regression(Y ~ X1 + X2 + X3, data = dat,
                                               output = "Jaccard Coefficient", type = "Linear",
                                               correction = "False Discovery Rate"),
                 NA)
    expect_error(weighted.model <- Regression(Y ~ X1 + X2 + X3, data = dat, weights = weights,
                                              output = "Jaccard Coefficient", type = "Linear"),
                 NA)
    expect_error(subsetted.model <- Regression(Y ~ X1 + X2 + X3, data = dat, subset = subset,
                                               output = "Jaccard Coefficient", type = "Linear"),
                 NA)
    expect_error(fancy.names.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.names, show.labels = TRUE,
                                                 output = "Jaccard Coefficient", type = "Linear"),
                 NA)
    expect_error(prefix.fancy.names.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.prefix.names, show.labels = TRUE,
                                                        output = "Jaccard Coefficient", type = "Linear"),
                 NA)
    # Check relative importance element exists
    expect_identical(model$importance, model$relative.importance)
    # Model output the same
    expect_equal(model$importance, fancy.names.model$importance, check.attributes = FALSE)
    # Names are used
    expect_identical(names(fancy.names.model$importance$raw.importance), new.names[-1])
    expect_identical(names(fancy.names.model$importance$importance), new.names[-1])
    # Prefix is removed
    expect_identical(names(prefix.fancy.names.model$importance$raw.importance), new.names[-1])
    expect_identical(names(prefix.fancy.names.model$importance$importance), new.names[-1])
    # Expect jaccard coefficients to be the same as computed manually
    expect_equal(model$importance$raw.importance,
                 vapply(dat[-1], flipRegression:::singleJaccardCoefficient, numeric(1), y = dat[[1]]))
    # Relative importance values computed correctly as the relative sizes of the t-statistics
    tests <- lapply(dat[-1], flipRegression:::jaccardTest, y = dat[[1]], weights = rep(1, nrow(dat)))
    t.stats <- vapply(tests, "[[", numeric(1), "t")
    expect_equal(model$importance$importance, 100 * prop.table(t.stats))
    # Check weights applied to coefficients
    expect_equal(weighted.model$importance$raw.importance,
                 vapply(dat[-1], flipRegression:::singleJaccardCoefficient, numeric(1), y = dat[[1]], weights = CalibrateWeight(weights)))
    # subsetted model has correct coefficients
    expect_equal(subsetted.model$importance$raw.importance,
                 vapply(dat[subset, -1], flipRegression:::singleJaccardCoefficient, numeric(1), y = dat[[1]][subset]))
    # Expect p-values to be significant when predictor used in regression
    expect_equal(model$importance$p.values, c(X1 = 0.074807, X2 = 0, X3 = 0.000411), tolerance = 1e-6)
    expect_equal(corrected.model$importance$p.values, flipRegression:::pvalAdjust(c(X1 = 0.074807, X2 = 0, X3 = 0.000411),
                                                                                  "False Discovery Rate"),
                 tolerance = 1e-6, check.attributes = FALSE)
    # Expect p-values to be insignificant when predictor not used
    set.seed(12321)
    insig.dat <- as.data.frame(lapply(rep(100, 4), rbinom, size = 1, prob = 0.5))
    names(insig.dat) <- c("Y", paste0("X", 1:3))
    expect_error(insig.model <- Regression(Y ~ ., data = insig.dat, output = "Jaccard Coefficient"), NA)
    expect_true(all(insig.model$importance$p.values > 0.05))
    expect_equal(insig.model$importance$p.values, c(X1 = 0.594830988348535, X2 = 0.62906440688862, X3 = 0.249175392296114))
    expect_equal(model$importance$p.values, c(X1 = 0.074807, X2 = 0, X3 = 0.000411), tolerance = 1e-6)
    # Missing data tests (see above Jaccard computations and the following)
    dat.with.missing <- dat
    dat.with.missing[1, 2] <- NA
    expect_error(Regression(Y ~ X1 + X2 + X3, data = dat.with.missing,
                            output = "Jaccard Coefficient", missing = "Error if missing data"),
                 "The data contains missing values. Change the 'missing' option to run the analysis.", fixed = TRUE)
    expect_error(excluded.missing.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.missing,
                                                      output = "Jaccard Coefficient", missing = "Exclude cases with missing data"), NA)
    expect_equal(excluded.missing.model$importance$sample.size, c(X1 = n - 1, X2 = n - 1, X3 = n - 1))
    expect_error(partial.missing.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.missing,
                                                     output = "Jaccard Coefficient", missing = "Use partial data (pairwise correlations)"), NA)
    expect_equal(partial.missing.model$importance$sample.size, c(X1 = n - 1, X2 = n, X3 = n))
    dat.with.missing <- as.data.frame(lapply(dat, function(x) {
       x[sample(c(TRUE, FALSE), size = length(x), replace = TRUE)] <- NA
       x
    }))
    expect_error(Regression(Y ~ X1 + X2 + X3, data = dat.with.missing, missing = "Error if missing data"),
                 "The data contains missing values. Change the 'missing' option to run the analysis.", fixed = TRUE)
    # Check predict method works with missing data
    expect_error(predictions <- predict(excluded.missing.model), NA)
    expect_true(sum(is.na(predictions)) == 1)
    expected.error <- paste0(sQuote("predict"), " not available when ", sQuote("missing"), " ",
                             "= ", dQuote("Use partial data (pairwise correlations)"), ".")
    expect_error(predictions <- predict(partial.missing.model),
                 expected.error, fixed = TRUE)
    # Check model works with interaction
    dat.with.interaction <- dat
    dat.with.interaction$int <- factor(sample(LETTERS[1:3], size = n, replace = TRUE))
    expect_error(int.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.interaction,
                                         interaction = int, output = "Jaccard Coefficient"),
                 NA)
    # Check interaction with missing values is handled
    dat.with.missing.interaction <- dat.with.interaction
    missing.int <- dat.with.interaction$int
    missing.int[sample(c(TRUE, FALSE), size = nrow(dat.with.missing.interaction), replace = TRUE, prob = c(1, 5))] <- NA
    dat.with.missing.interaction$int <- missing.int
    expect_error(int.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.missing.interaction,
                                         interaction = int, output = "Jaccard Coefficient"),
                 NA)
    # Allow categorical outcome variables for Jaccard
    dat$Y <- factor(sample(LETTERS[1:2], size = nrow(dat), replace = TRUE))
    dat$Y3 <- factor(sample(LETTERS[1:3], size = nrow(dat), replace = TRUE))
    # Allow categorical variable with two levels
    expect_error(category.2 <- Regression(Y ~ X1 + X2, data = dat, output = "Jaccard Coefficient"), NA)
    # Check the output equivalent where outcome is numeric binary
    dat$Yb <- unclass(dat$Y) - 1
    expect_error(num.binary <- Regression(Yb ~ X1 + X2, data = dat, output = "Jaccard Coefficient"), NA)
    expect_identical(category.2$importance, num.binary$importance)
    # Check the outcome variable is dichotomized into a numeric binary variable
    expect_warning(dichot <- Regression(Y3 ~ X1 + X2, data = dat, output = "Jaccard Coefficient"),
                   "Y3 has been dichotimized into <= A & >= B", fixed = TRUE)
    expect_setequal(dichot$model$Y3, c(0, 1))
    # Check output equivalent to numeric binary outcome
    dat$Yb <- as.numeric(dat$Y3 %in% LETTERS[2:3])
    expect_error(num.binary <- Regression(Yb ~ X1 + X2, data = dat, output = "Jaccard Coefficient"), NA)
    expect_identical(dichot$importance, num.binary$importance)
})

test_that("DS-2876: Correlation Output", {
    # Output is computed using the flipStatistics::CorrelationsWithSignificance function
    set.seed(12321)
    n <- 100
    X <- lapply(1:3, function(x) rnorm(n = n))
    X <- as.matrix(as.data.frame(X))
    beta <- matrix(c(1, 4, 9, 2), ncol = 1)
    # Expect X2 and Y to be the same value, hence small p-value
    Y <- cbind(1, X) %*% beta + rnorm(n)
    dat <- data.frame(Y, X)
    subset <- sample(c(TRUE, FALSE), size = n, replace = TRUE)
    weights <- runif(n)
    names(dat) <- c("Y", paste0("X", 1:3))
    new.names <- c("Super Y", "Oranges", "Apples", "Grapes")
    names.with.prefix <- paste0("Q4:", new.names)
    # Create dataframe with label attributes for questions
    dat.with.names <- as.data.frame(mapply(function(x, y) {
        attr(x, "label") <- y
        x
    }, x = dat, y = new.names, SIMPLIFY = FALSE))
    dat.with.prefix.names <- as.data.frame(mapply(function(x, y) {
        attr(x, "label") <- y
        x
    }, x = dat, y = names.with.prefix, SIMPLIFY = FALSE))
    # Check Importance output correct
    expect_error(model <- Regression(Y ~ X1 + X2 + X3, data = dat,
                                     output = "Correlation", type = "Linear"),
                 NA)
    expect_error(corrected.model <- Regression(Y ~ X1 + X2 + X3, data = dat,
                                               output = "Correlation", type = "Linear",
                                               correction = "False Discovery Rate"),
                 NA)
    expect_error(weighted.model <- Regression(Y ~ X1 + X2 + X3, data = dat, weights = weights,
                                              output = "Correlation", type = "Linear"),
                 NA)
    expect_error(subsetted.model <- Regression(Y ~ X1 + X2 + X3, data = dat, subset = subset,
                                               output = "Correlation", type = "Linear"),
                 NA)
    expect_error(sub.and.weighted.model <- Regression(Y ~ X1 + X2 + X3, data = dat, subset = subset, weights = weights,
                                                      output = "Correlation", type = "Linear"),
                 NA)
    expect_error(fancy.names.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.names, show.labels = TRUE,
                                                 output = "Correlation", type = "Linear"),
                 NA)
    expect_error(prefix.fancy.names.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.prefix.names, show.labels = TRUE,
                                                        output = "Correlation", type = "Linear"),
                 NA)
    # Check relative importance element exists
    expect_identical(model$importance, model$relative.importance)
    # Model output the same
    expect_equal(model$importance, fancy.names.model$importance, check.attributes = FALSE)
    # Names are used
    expect_identical(names(fancy.names.model$importance$raw.importance), new.names[-1])
    expect_identical(names(fancy.names.model$importance$importance), new.names[-1])
    # Prefix is removed
    expect_identical(names(prefix.fancy.names.model$importance$raw.importance), new.names[-1])
    expect_identical(names(prefix.fancy.names.model$importance$importance), new.names[-1])
    # Check caclulated values agree with dependency
    basic.correlation.output <- flipStatistics::CorrelationsWithSignificance(dat, weights = rep(1, n), spearman = FALSE)
    subsetted.correlation.output <- flipStatistics::CorrelationsWithSignificance(dat[subset, ], weights = rep(1, sum(subset)), spearman = FALSE)
    weighted.correlation.output <- flipStatistics::CorrelationsWithSignificance(dat, weights = weights, spearman = FALSE)
    sub.and.weighted.correlation.output <- flipStatistics::CorrelationsWithSignificance(dat[subset, ], weights = weights[subset], spearman = FALSE)
    # Expect Correlation output to be the same as computed manually using the helper function above
    expect_equal(model$importance$raw.importance, basic.correlation.output$cor[-1, 1])
    # Relative importance values computed correctly
    expect_equal(model$importance$importance, 100 * prop.table(model$importance$raw.importance))
    # Check weights, subsets and both applied to coefficients
    expect_equal(weighted.model$importance$raw.importance, weighted.correlation.output$cor[-1, 1])
    expect_equal(subsetted.model$importance$raw.importance, subsetted.correlation.output$cor[-1, 1])
    expect_equal(sub.and.weighted.model$importance$raw.importance, sub.and.weighted.correlation.output$cor[-1, 1])
    # Check p-values are consistent
    expect_equal(model$importance$p.values, basic.correlation.output$p[-1, 1])
    expect_equal(weighted.model$importance$p.values, weighted.correlation.output$p[-1, 1])
    expect_equal(subsetted.model$importance$p.values, subsetted.correlation.output$p[-1, 1])
    expect_equal(sub.and.weighted.model$importance$p.values, sub.and.weighted.correlation.output$p[-1, 1])
    expect_equal(corrected.model$importance$p.values, flipRegression:::pvalAdjust(basic.correlation.output$p[-1, 1], "False Discovery Rate"))
    # Expect p-values to be insignificant when predictor not used
    set.seed(12321)
    insig.dat <- as.data.frame(lapply(rep(100, 4), rnorm))
    names(insig.dat) <- c("Y", paste0("X", 1:3))
    expect_error(insig.model <- Regression(Y ~ ., data = insig.dat, output = "Correlation"), NA)
    expect_true(all(insig.model$importance$p.values > 0.05))
    expect_equal(insig.model$importance$p.values, c(X1 = 0.809554824521213, X2 = 0.0616307880261012, X3 = 0.22312934720278))
    # Missing data tests
    dat.with.missing <- dat
    dat.with.missing[1, 2] <- NA
    subset <- rep(c(TRUE, FALSE), c(90,  10))
    expect_error(Regression(Y ~ X1 + X2 + X3, data = dat.with.missing,
                            output = "Correlation", missing = "Error if missing data"),
                 "The data contains missing values. Change the 'missing' option to run the analysis.", fixed = TRUE)
    expect_error(excluded.missing.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.missing,
                                                      output = "Correlation", missing = "Exclude cases with missing data"), NA)
    expect_equal(excluded.missing.model$importance$sample.size, c(X1 = n - 1, X2 = n - 1, X3 = n - 1))
    expect_error(partial.missing.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.missing,
                                                     output = "Correlation", missing = "Use partial data (pairwise correlations)"), NA)
    expect_error(subset.partial.missing.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.missing, subset = subset,
                                                             output = "Correlation", missing = "Use partial data (pairwise correlations)"), NA)
    expect_warning(weighted.subset.partial.missing.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.missing,
                                                                     subset = subset, weights = weights,
                                                                     output = "Correlation",
                                                                     missing = "Use partial data (pairwise correlations)"), "MCAR")
    expect_equal(partial.missing.model$importance$sample.size, c(X1 = n - 1, X2 = n, X3 = n))
    dat.with.missing <- as.data.frame(lapply(dat, function(x) {
        x[sample(c(TRUE, FALSE), size = length(x), replace = TRUE)] <- NA
        x
    }))
    expect_error(Regression(Y ~ X1 + X2 + X3, data = dat.with.missing, missing = "Error if missing data"),
                 "The data contains missing values. Change the 'missing' option to run the analysis.", fixed = TRUE)
    # Check predict method works with missing data
    expect_error(predictions <- predict(excluded.missing.model), NA)
    expect_true(sum(is.na(predictions)) == 1)
    expected.error <- paste0(sQuote("predict"), " not available when ", sQuote("missing"), " ",
                             "= ", dQuote("Use partial data (pairwise correlations)"), ".")
    expect_error(predictions <- predict(partial.missing.model),
                 expected.error, fixed = TRUE)
    # Check model works with interaction
    dat.with.interaction <- dat
    dat.with.interaction$int <- factor(sample(LETTERS[1:3], size = n, replace = TRUE))
    expect_error(int.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.interaction,
                                         interaction = int, output = "Correlation"),
                 NA)
    expect_error(subset.int.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.interaction,
                                                subset = subset,
                                                interaction = int, output = "Correlation"),
                 NA)
    expect_warning(subset.weight.int.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.interaction,
                                                       subset = subset, weights = weights,
                                                       interaction = int, output = "Correlation",
                                                       missing = "Use partial data (pairwise correlations)"),
                 "MCAR")
    # Check interaction with missing values is handled
    dat.with.missing.interaction <- dat.with.interaction
    missing.int <- dat.with.interaction$int
    missing.int[sample(c(TRUE, FALSE), size = nrow(dat.with.missing.interaction), replace = TRUE, prob = c(1, 5))] <- NA
    dat.with.missing.interaction$int <- missing.int
    expect_error(int.model <- Regression(Y ~ X1 + X2 + X3, data = dat.with.missing.interaction,
                                         interaction = int, output = "Correlation"),
                 NA)
    expect_error(int.model.with.partial.data <- Regression(Y ~ X1 + X2 + X3, data = dat.with.missing.interaction,
                                                           interaction = int, output = "Correlation",
                                                           missing = "Use partial data (pairwise correlations)"),
                 NA)
    expect_error(sub.int.model.with.partial.data <- Regression(Y ~ X1 + X2 + X3, data = dat.with.missing.interaction,
                                                           interaction = int, output = "Correlation", subset = subset,
                                                           missing = "Use partial data (pairwise correlations)"),
                 NA)
    # Check categorical variables are coerced to numeric in Correlation outputs
    dat.with.categ <- dat
    dat.with.categ$X4 <- factor(sample(1:10, replace = TRUE, size = nrow(dat)))
    expect_warning(categ.pred <- Regression(Y ~ X1 + X2 + X3 + X4, data = dat.with.categ,
                                            type = "Linear", output = "Correlation"),
                   "The variable X4 has been converted\\.$", perl = TRUE)
    # Computed importance uses factor coerced to numeric
    dat.categ.to.numeric <- lapply(dat.with.categ, unclass)
    expected.cor <- vapply(dat.categ.to.numeric[-1], function(x) cor(x, dat.categ.to.numeric[[1L]]), numeric(1))
    expect_equal(categ.pred$importance$raw.importance, expected.cor)
    categ.pred$importance$raw.importance
    # Labels shown in warning during conversion
    dat.with.fancy.categ <- dat.with.categ
    attr(dat.with.fancy.categ$X4, "label") <- "Some factor"
    expect_warning(Regression(Y ~ X1 + X2 + X3 + X4, data = dat.with.fancy.categ,
                              type = "Linear", output = "Correlation"),
                   "The variable Some factor \\(X4\\) has been converted\\.$", perl = TRUE)
})

test_that("Check error message when Categorical predictors used in RIA", {
    data(bank, package = "flipExampleData")
    # Expect no error if numeric predictors used
    expect_error(Regression(Overall ~ Fees + Interest, data = bank,
                            output = "Relative Importance Analysis",
                            missing = "Dummy variable adjustment"),
                 NA)
    # Expect error if factor or ordered factor used
    bank$Interest <- factor(bank$Interest)
    expect_error(Regression(Overall ~ Fees + Interest, data = bank,
                            output = "Relative Importance Analysis",
                            missing = "Dummy variable adjustment"),
                 paste0("Dummy variable adjustment method for missing data is not supported for categorical ",
                        "predictor variables in Relative Importance Analysis. Please remove the categorical ",
                        "predictors: 'Interest' and re-run the analysis"), fixed = TRUE)
    attr(bank$Interest, "label") <- "Interest charged by the bank"
    expect_error(Regression(Overall ~ Fees + Interest, data = bank,
                            output = "Relative Importance Analysis",
                            missing = "Dummy variable adjustment",
                            show.labels = TRUE),
                 paste0("Dummy variable adjustment method for missing data is not supported for categorical ",
                        "predictor variables in Relative Importance Analysis. Please remove the categorical ",
                        "predictors: 'Interest charged by the bank' and re-run the analysis"), fixed = TRUE)
    bank2 <- bank[complete.cases(bank), ]
    # Should not error but throw a warning when complete cases are used (dummy variable adjustment redundant)
    expect_warning(Regression(Overall ~ Fees + Interest, data = bank2,
                              output = "Relative Importance Analysis",
                              missing = "Dummy variable adjustment"),
                   "The following variables have been treated as categorical: Interest", fixed = TRUE)
    # Should not throw error if factor used but not dummy adjusted.
    bank2 <- bank
    bank2 <- bank2[!is.na(bank2$Interest), ]
    expect_warning(Regression(Overall ~ Fees + Interest, data = bank2,
                              output = "Relative Importance Analysis",
                              missing = "Dummy variable adjustment"),
                   paste0("The following variables have been treated as categorical: Interest. ",
                          "This may over-inflate their effects."),
                   fixed = TRUE)
})


test_that("DS-2990: Check aliased predictors removed before being passed to RIA/Shapley", {
    set.seed(1)
    sigma.mat <- matrix(c(1, 0.5, 0.3,
                          0.5, 1, 0.4,
                          0.3, 0.4, 1), byrow = TRUE, ncol = 3)
    X <- MASS::mvrnorm(n = 100, mu = rep(0, 3), Sigma = sigma.mat)
    beta <- 1:3
    Y <- X %*% beta + rnorm(100)
    dat <- data.frame(Y = Y, X = X)
    # add aliased predictors, single numeric and categorical
    dat$X.4 <- dat$X.3
    dat$cat1 <- factor(rep(c(1, 2, 3, 4), c(10, 10, 20, 60)), labels = LETTERS[1:4])
    dat$cat2 <- factor(rep(c(1, 2, 3, 4, 5), c(10, 10, 20, 30, 30)), labels = LETTERS[1:5])
    fancy.label.dat <- dat
    for (i in seq_along(dat))
        attr(fancy.label.dat[[i]], "label") <- paste0("Fancy label of ", names(dat)[i])
    # Expect error thrown about aliasing, use recursive call to prevent the warning being thrown before the error.
    expect_error(Regression(Y ~ ., data = dat, output = "Relative Importance Analysis", recursive.call = TRUE),
                 "^Some predictors are linearly dependent")
    # Same for Shapley Regression
    expect_error(Regression(Y ~ ., data = dat, output = "Shapley Regression", recursive.call = TRUE),
                 "^Some predictors are linearly dependent")
    ordered.logit.dat <- dat
    y.to.ord <- factor(cut(dat$Y, breaks = c(-Inf, quantile(dat$Y, prob = c(0.25, 0.5, 0.75)), Inf),
                           labels = LETTERS[1:4]), ordered = TRUE)
    ordered.logit.dat$Y <- y.to.ord
    expect_error(expect_warning(Regression(Y ~ ., data = ordered.logit.dat, recursive.call = TRUE,
                                           output = "Relative Importance Analysis", type = "Ordered Logit"),
                                "^Some variable\\(s\\) are colinear"),
                 "^Some predictors are linearly dependent")
})
Displayr/flipRegression documentation built on March 2, 2024, 3:51 a.m.