tests/testthat/test-explanation.R

context("test-explanation.R")

# For using same Random numer generator as CircelCI (R version 3.5.x)
suppressWarnings(RNGversion(vstr = "3.5.0"))

test_that("Test get_list_approaches", {
  m <- 4
  n_features <- c(0, 1, 1, 1, 2, 2, 2, 3)
  approach <- c("gaussian", "copula", "copula")
  l <- get_list_approaches(n_features, approach)

  expect_true(is.list(l))
  expect_equal(names(l), c("gaussian", "copula"))
  expect_equal(l$gaussian, 1:4)
  expect_equal(l$copula, 5:8)
})

test_that("Test functions in explanation.R", {

  # Load data -----------
  if (requireNamespace("MASS", quietly = TRUE)) {
    data("Boston", package = "MASS")
    x_var <- c("lstat", "rm", "dis", "indus")
    y_var <- "medv"

    y_train <- tail(Boston[, y_var], 50)
    x_test <- as.matrix(head(Boston[, x_var], 2))

    # Prepare the data for explanation. Path needs to be relative to testthat directory in the package
    explainer <- readRDS(file = "test_objects/shapley_explainer_obj.rds")

    # Creating list with lots of different explainer objects
    p0 <- mean(y_train)
    ex_list <- list()

    # Ex 1: Explain predictions (gaussian)
    ex_list[[1]] <- explain(x_test, explainer, approach = "gaussian", prediction_zero = p0)

    # Ex 2: Explain predictions (copula)
    ex_list[[2]] <- explain(x_test, explainer, approach = "copula", prediction_zero = p0)

    # Ex 3: Explain predictions (empirical, independence):
    ex_list[[3]] <- explain(x_test, explainer, approach = "empirical", prediction_zero = p0, type = "independence")

    # Ex 4: Explain predictions (empirical, fixed sigma)
    ex_list[[4]] <- explain(x_test, explainer, approach = "empirical", prediction_zero = p0, type = "fixed_sigma")

    # Ex 5: Explain predictions (empirical, AICc)
    ex_list[[5]] <- explain(x_test, explainer, approach = "empirical", prediction_zero = p0, type = "AICc_each_k")

    # Ex 6: Explain predictions (empirical, AICc full)
    ex_list[[6]] <- explain(x_test, explainer, approach = "empirical", prediction_zero = p0, type = "AICc_full")

    # Ex 7: Explain combined - empirical and gaussian
    ex_list[[7]] <- explain(x_test, explainer, approach = c("empirical", rep("gaussian", 3)), prediction_zero = p0)

    # Ex 8: Explain combined II - all gaussian
    ex_list[[8]] <- explain(x_test, explainer, approach = c(rep("gaussian", 4)), prediction_zero = p0)

    # Ex 9: Explain combined III - all copula
    ex_list[[9]] <- explain(x_test, explainer, approach = rep("copula", 4), prediction_zero = p0)

    # Ex 10: gaussian and copula XX (works with seed)
    approach <- c(rep("gaussian", 2), rep("copula", 2))
    ex_list[[10]] <- explain(x_test, explainer, approach = approach, prediction_zero = p0)

    # Ex 11: empirical and gaussian
    approach <- c(rep("empirical", 2), rep("gaussian", 2))
    ex_list[[11]] <- explain(x_test, explainer, approach = approach, prediction_zero = p0)

    # Ex 12: empirical and copula
    approach <- c(rep("empirical", 2), rep("copula", 2))
    ex_list[[12]] <- explain(x_test, explainer, approach = approach, prediction_zero = p0)

    # Ex 13: copula and empirical XX (works now)
    approach <- c(rep("copula", 2), rep("empirical", 2))
    ex_list[[13]] <- explain(x_test, explainer, approach = approach, prediction_zero = p0)

    # Ex 14: gaussian and copula XX (works with seed)
    approach <- c(rep("gaussian", 1), rep("copula", 3))
    ex_list[[14]] <- explain(x_test, explainer, approach = approach, prediction_zero = p0)

    # Ex 15: empirical and copula
    approach <- c(rep("empirical", 1), rep("copula", 3))
    ex_list[[15]] <- explain(x_test, explainer, approach = approach, prediction_zero = p0)

    # Ex 16: gaussian and empirical XX (works now)
    approach <- c(rep("gaussian", 1), rep("empirical", 3))
    ex_list[[16]] <- explain(x_test, explainer, approach = approach, prediction_zero = p0)

    # Ex 17: gaussian and empirical XX (works now!)
    approach <- c(rep("gaussian", 2), rep("empirical", 2))
    ex_list[[17]] <- explain(x_test, explainer, approach = approach, prediction_zero = p0)

    # Ex 18: Explain combined II - all empirical
    approach <- c(rep("empirical", 4))
    ex_list[[18]] <- explain(x_test, explainer, approach = approach, prediction_zero = p0)

    if (requireNamespace("party", quietly = TRUE)) {

      # Ex 19: Explain predictions (ctree, sample = FALSE, default parameters)
      ex_list[[19]] <- explain(x_test, explainer, approach = "ctree", prediction_zero = p0, sample = FALSE)

      # Ex 20: Explain predictions (ctree, sample = TRUE, default parameters)
      ex_list[[20]] <- explain(x_test, explainer, approach = "ctree", prediction_zero = p0, sample = TRUE)

      # Ex 21: Explain predictions (ctree, sample = FALSE, other ctree parameters)
      ex_list[[21]] <- explain(x_test, explainer,
        approach = "ctree", prediction_zero = p0, sample = FALSE,
        mincriterion = 0.9, minsplit = 20, minbucket = 25
      )

      # Ex 22: Explain predictions (ctree, sample = TRUE, other ctree parameters)
      ex_list[[22]] <- explain(x_test, explainer,
        approach = "ctree", prediction_zero = p0, sample = TRUE,
        mincriterion = 0.9, minsplit = 20, minbucket = 25
      )

      # Ex 23: Explain combined - ctree and gaussian, sample = FALSE
      ex_list[[23]] <- explain(x_test, explainer,
        approach = c("ctree", rep("gaussian", 3)),
        prediction_zero = p0, sample = FALSE
      )

      # Ex 24: Explain combined II - ctree and gaussian, sample = FALSE
      ex_list[[24]] <- explain(x_test, explainer,
        approach = c(rep("ctree", 2), rep("gaussian", 2)),
        prediction_zero = p0, sample = FALSE
      )

      # Ex 25: Explain combined III - ctree and gaussian, sample = FALSE
      ex_list[[25]] <- explain(x_test, explainer,
        approach = c(rep("ctree", 3), rep("gaussian", 1)),
        prediction_zero = p0, sample = FALSE
      )

      # Ex 26: Explain combined IV - ctree all, sample = FALSE
      ex_list[[26]] <- explain(x_test, explainer,
        approach = c(rep("ctree", 4)),
        prediction_zero = p0, sample = FALSE
      )

      # Ex 27: Explain combined - ctree and empirical, sample = FALSE
      ex_list[[27]] <- explain(x_test, explainer,
        approach = c("ctree", rep("empirical", 3)),
        prediction_zero = p0, sample = FALSE
      )

      # Ex 28: Explain combined II - ctree and empirical, sample = FALSE
      ex_list[[28]] <- explain(x_test, explainer,
        approach = c(rep("ctree", 2), rep("empirical", 2)),
        prediction_zero = p0, sample = FALSE
      )

      # Ex 29: Explain combined III - ctree and empirical, sample = FALSE
      ex_list[[29]] <- explain(x_test, explainer,
        approach = c(rep("ctree", 3), rep("empirical", 1)),
        prediction_zero = p0, sample = FALSE
      )

      # Ex 30: Explain combined - ctree and gaussian, sample = TRUE
      ex_list[[30]] <- explain(x_test, explainer,
        approach = c("ctree", rep("gaussian", 3)),
        prediction_zero = p0, sample = TRUE
      )

      # Ex 31: Explain combined II - ctree and gaussian, sample = TRUE
      ex_list[[31]] <- explain(x_test, explainer,
        approach = c(rep("ctree", 2), rep("gaussian", 2)),
        prediction_zero = p0, sample = TRUE
      )

      # Ex 32: Explain combined III - ctree and gaussian, sample = TRUE
      ex_list[[32]] <- explain(x_test, explainer,
        approach = c(rep("ctree", 3), rep("gaussian", 1)),
        prediction_zero = p0, sample = TRUE
      )

      # Ex 33: Explain combined IV - ctree all, sample = TRUE
      ex_list[[33]] <- explain(x_test, explainer,
        approach = c(rep("ctree", 4)),
        prediction_zero = p0, sample = TRUE
      )

      # Ex 34: Explain combined - ctree and empirical, sample = TRUE
      ex_list[[34]] <- explain(x_test, explainer,
        approach = c("ctree", rep("empirical", 3)),
        prediction_zero = p0, sample = TRUE
      )

      # Ex 35: Explain combined II - ctree and empirical, sample = TRUE
      ex_list[[35]] <- explain(x_test, explainer,
        approach = c(rep("ctree", 2), rep("empirical", 2)),
        prediction_zero = p0, sample = TRUE
      )

      # Ex 36: Explain combined III - ctree and empirical, sample = TRUE
      ex_list[[36]] <- explain(x_test, explainer,
        approach = c(rep("ctree", 3), rep("empirical", 1)),
        prediction_zero = p0, sample = TRUE
      )

      # Ex 37: Explain different ctree mincriterion for different number of dependent variables, sample = TRUE
      ex_list[[37]] <- explain(x_test, explainer,
        approach = "ctree", prediction_zero = p0, sample = TRUE,
        mincriterion = c(0.05, 0.05, 0.95, 0.95)
      )

      # Ex 38: Explain different ctree mincriterion for different number of dependent variables, sample = TRUE
      ex_list[[38]] <- explain(x_test, explainer,
        approach = "ctree", prediction_zero = p0, sample = TRUE,
        mincriterion = rep(0.95, 4)
      )

      # Ex 39: Test that ctree with mincriterion equal to same probability four times gives the same as only passing one
      # probability to mincriterion
      expect_equal(
        (explain(x_test, explainer,
          approach = "ctree", prediction_zero = p0, sample = TRUE,
          mincriterion = rep(0.95, 4)
        ))$dt,
        (explain(x_test, explainer,
          approach = "ctree", prediction_zero = p0, sample = TRUE,
          mincriterion = 0.95
        ))$dt
      )


      # Ex 40: Test that ctree with the same mincriterion repeated four times is the same as passing mincriterion once
      expect_equal(
        (explain(x_test, explainer,
          approach = "ctree", prediction_zero = p0, sample = FALSE,
          mincriterion = c(rep(0.95, 2), rep(0.95, 2))
        ))$dt,
        (explain(x_test, explainer,
          approach = "ctree", prediction_zero = p0, sample = FALSE,
          mincriterion = 0.95
        ))$dt
      )

      # Checking that explanations with different paralellizations gives the same result (only unix systems!)

      if (.Platform$OS.type == "unix") {
        explain_base_nosample <- explain(x_test, explainer, approach = "ctree", prediction_zero = p0, sample = FALSE)

        multicore <- 2

        expect_equal(
          explain_base_nosample,
          explain(x_test, explainer,
            approach = "ctree", prediction_zero = p0, sample = FALSE,
            mc_cores = multicore
          )
        )

        expect_equal(
          explain_base_nosample,
          explain(x_test, explainer,
            approach = "ctree", prediction_zero = p0, sample = FALSE,
            mc_cores_create_ctree = 1, mc_cores_sample_ctree = multicore
          )
        )

        expect_equal(
          explain_base_nosample,
          explain(x_test, explainer,
            approach = "ctree", prediction_zero = p0, sample = FALSE,
            mc_cores_create_ctree = multicore, mc_cores_sample_ctree = 1
          )
        )

        explain_base_sample <- explain(x_test, explainer, approach = "ctree", prediction_zero = p0, sample = TRUE)

        # Consistent results when only paralellizing create_ctree, and not sample_ctree
        expect_equal(
          explain_base_sample,
          explain(x_test, explainer,
            approach = "ctree", prediction_zero = p0, sample = TRUE,
            mc_cores_create_ctree = multicore, mc_cores_sample_ctree = 1
          )
        )

        # Consistent results when ran twice with same seed
        expect_equal(
          explain(x_test, explainer,
            approach = "ctree", prediction_zero = p0, sample = TRUE,
            mc_cores = multicore
          ),
          explain(x_test, explainer,
            approach = "ctree", prediction_zero = p0, sample = TRUE,
            mc_cores = multicore
          )
        )
      }
      # Checking that all explain objects produce the same as before
      expect_known_value(ex_list,
        file = "test_objects/explanation_explain_obj_list.rds",
        update = F
      )
    } else {
      # Tests using only the first 17 elements of explanation_explain_obj_list.rds
      expect_known_value(ex_list,
        file = "test_objects/explanation_explain_obj_list_no_ctree.rds",
        update = F
      )
    }


    ### Additional test to test that only the produced shapley values are the same as before
    fixed_explain_obj_list <- readRDS("test_objects/explanation_explain_obj_list_fixed.rds")
    for (i in 1:length(ex_list)) {
      expect_equal(ex_list[[i]]$dt, fixed_explain_obj_list[[i]]$dt)
    }


    # Checks that an error is returned
    expect_error(
      explain(1, explainer, approach = "gaussian", prediction_zero = p0)
    )
    expect_error(
      explain(list(), explainer, approach = "gaussian", prediction_zero = p0)
    )
    expect_error(
      explain(x_test, explainer, approach = "Gaussian", prediction_zero = p0)
    )
    expect_error(
      explain(x_test, explainer, approach = rep("gaussian", ncol(x_test) + 1), prediction_zero = p0)
    )
  }
})

test_that("Testing data input to explain in explanation.R", {

  # Setup for training data and explainer object
  if (requireNamespace("MASS", quietly = TRUE)) {
    data("Boston", package = "MASS")
    x_var <- c("lstat", "rm", "dis", "indus")
    y_var <- "medv"

    # Training data
    x_train <- as.matrix(tail(Boston[, x_var], -6))
    y_train <- tail(Boston[, y_var], -6)
    xy_train_full_df <- tail(Boston[, ], -6)

    # Test data
    x_test <- as.matrix(head(Boston[, x_var], 6))
    x_test_full <- as.matrix(head(Boston[, ], 6))
    x_test_reordered <- as.matrix(head(Boston[, rev(x_var)], 6))
    xy_test_full_df <- head(Boston[, ], 6)
    xy_test_missing_lstat_df <- xy_test_full_df[, !(colnames(xy_test_full_df) == "lstat")]
    xy_test_full_df_no_colnames <- xy_test_full_df
    colnames(xy_test_full_df_no_colnames) <- NULL

    formula <- as.formula(paste0("medv ~ ", paste0(x_var, collapse = "+")))
    p0 <- mean(y_train)

    # Test data
    all_test_data <- list(
      x_test,
      x_test_reordered,
      x_test_full
    )

    # Linear model
    list_models <- list(
      lm(
        formula = formula,
        data = xy_train_full_df
      )
    )

    all_explainers <- list(
      shapr(x_train, list_models[[1]])
    )

    # explainer 1
    # Expect message due to no label/factor checking
    list_explanation <- list()
    list_explanation[[1]] <- expect_silent(
      explain(
        all_test_data[[1]],
        all_explainers[[1]],
        approach = "empirical",
        prediction_zero = p0,
        n_samples = 1e2
      )
    )
    # Expect message due to no label/factor checking
    list_explanation[[2]] <- expect_silent(
      explain(
        all_test_data[[2]],
        all_explainers[[1]],
        approach = "empirical",
        prediction_zero = p0,
        n_samples = 1e2
      )
    )
    # Expect message due to removal of data
    list_explanation[[3]] <- expect_message(
      explain(
        all_test_data[[3]],
        all_explainers[[1]],
        approach = "empirical",
        prediction_zero = p0,
        n_samples = 1e2
      )
    )
    for (i in 2:length(list_explanation)) {
      expect_equal(list_explanation[[i - 1]], list_explanation[[i]])
    }


    if (requireNamespace("xgboost", quietly = TRUE)) {
      list_models[[length(list_models) + 1]] <- xgboost::xgboost(
        data = x_train,
        label = y_train,
        nround = 5,
        verbose = FALSE
      )

      all_explainers[[length(all_explainers) + 1]] <- shapr(x_train, list_models[[length(list_models)]])

      # explainer 2
      # Expect silent
      list_explanation <- list()
      list_explanation[[1]] <- expect_silent(
        explain(
          all_test_data[[1]],
          all_explainers[[length(all_explainers)]],
          approach = "empirical",
          prediction_zero = p0,
          n_samples = 1e2
        )
      )
      # Expect silent
      list_explanation[[2]] <- expect_silent(
        explain(
          all_test_data[[2]],
          all_explainers[[length(all_explainers)]],
          approach = "empirical",
          prediction_zero = p0,
          n_samples = 1e2
        )
      )
      # Expect message due to removal of data
      list_explanation[[3]] <- expect_message(
        explain(
          all_test_data[[3]],
          all_explainers[[length(all_explainers)]],
          approach = "empirical",
          prediction_zero = p0,
          n_samples = 1e2
        )
      )
      for (i in 2:length(list_explanation)) {
        expect_equal(list_explanation[[i - 1]], list_explanation[[i]])
      }
    }


    if (requireNamespace("ranger", quietly = TRUE)) {
      list_models[[length(list_models) + 1]] <- ranger::ranger(
        formula = formula,
        data = xy_train_full_df,
        num.trees = 50
      )

      all_explainers[[length(all_explainers) + 1]] <- shapr(x_train, list_models[[length(list_models)]])

      # explainer 3
      # Expect silent
      list_explanation <- list()
      list_explanation[[1]] <- expect_silent(
        explain(
          all_test_data[[1]],
          all_explainers[[length(all_explainers)]],
          approach = "empirical",
          prediction_zero = p0,
          n_samples = 1e2
        )
      )
      # Expect silent
      list_explanation[[2]] <- expect_silent(
        explain(
          all_test_data[[2]],
          all_explainers[[length(all_explainers)]],
          approach = "empirical",
          prediction_zero = p0,
          n_samples = 1e2
        )
      )
      # Expect message due removal of data
      list_explanation[[3]] <- expect_message(
        explain(
          all_test_data[[3]],
          all_explainers[[length(all_explainers)]],
          approach = "empirical",
          prediction_zero = p0,
          n_samples = 1e2
        )
      )
      for (i in 2:length(list_explanation)) {
        expect_equal(list_explanation[[i - 1]], list_explanation[[i]])
      }
    }

    for (i in seq_along(all_explainers)) {

      # Expect error when test data misses used variable
      expect_error(
        explain(
          xy_test_missing_lstat_df,
          all_explainers[[i]],
          approach = "empirical",
          prediction_zero = p0,
          n_samples = 1e2
        )
      )

      # Expect error when test data misses column names
      expect_error(
        explain(
          xy_test_full_df_no_colnames,
          all_explainers[[i]],
          approach = "empirical",
          prediction_zero = p0,
          n_samples = 1e2
        )
      )
    }
  }
})

Try the shapr package in your browser

Any scripts or data that you put into this service are public.

shapr documentation built on May 4, 2023, 5:10 p.m.