tests/testthat/testCCI.R

# Test script for the CCI package
devtools::check()
devtools::check_win_devel()
devtools::document()
devtools::clean_dll()
devtools::build(path = "C:/Users/chris/Documents/GitHub/CCI")

devtools::install()
devtools::load_all()
library(CCI)
citation("CCI")
#-------------------------------------------------------------------------------
# Basic tests CCI.test()
#-------------------------------------------------------------------------------
dat <- NormalData(500)
result <- CCI.test(formula = Y ~ X + Z1 + Z2, data = dat)
summary(result)
plot(result)
result

dat <- NormalData(500)
result <- CCI.test(formula = Y ~ X + Z1 + Z2, data = dat, method = 'xgboost')
summary(result)

dat <- NormalData(250)
result <- CCI.test(formula = Y ~ X + Z1 + Z2, data = dat, method = 'svm')
summary(result)

#-------------------------------------------------------------------------------
# Basic tests CCI.pretuner
#-------------------------------------------------------------------------------
dat <- NormalData(500)
CCI.pretuner(formula = Y ~ X + Z1 + Z2, data = dat, method = 'xgboost', samples = 5)
CCI.pretuner(formula = Y ~ X + Z1 + Z2, data = dat, method = 'rf')


#-------------------------------------------------------------------------------
# Basic tests QQplot
#-------------------------------------------------------------------------------
dat <- NormalData(100)
cci_obj <- CCI.test(formula = Y ~ X + Z1 + Z2, data = dat, nperm = 500)
QQplot(cci_obj)


#--------------------
# With tuning
#--------------------
dat <- NonLinNormal(500)
result <- CCI.test(formula = Y ~ X + Z1 + Z2, data = dat, interaction = F, method = 'rf', tune = TRUE)
summary(result)

dat <- NonLinNormal(200)
result <- CCI.test(formula = Y ~ X + Z1, interaction = F, nperm = 60, data = dat, method = 'xgboost', tune = TRUE)

dat <- NonLinNormal(200)
result <- CCI.test(formula = Y ~ X + Z1, interaction = F, nperm = 60, data = dat, method = 'svm', tune = TRUE)
summary(result)


#-------------------------------------------------------------------------------
# Basic tests Binary outcome
#-------------------------------------------------------------------------------
  set.seed(1985)
  dat <- BinaryData(500)
  result <- CCI.test(formula = Y ~ X + Z1, data = dat, method = 'xgboost', parametric = T)
  summary(result)
  plot(result)


set.seed(1985)
dat <- BinaryData(500)
result <- CCI.test(formula = Y ~ X + Z1, data = dat, interaction = F, method = 'xgboost', metric = 'Kappa')
summary(result)
plot(result)

set.seed(1985)
dat <- BinaryData(500)
result <- CCI.test(formula = Y ~ X + Z1, interaction = F, data = dat, method = 'svm', metric = 'Kappa')
summary(result)
plot(result)

# -----------------------------------------------------------------------------
# Basic tests Categorical outcome
#-------------------------------------------------------------------------------
dat <- NonLinearCategorization(600, d = 2)
dat$Y <- as.factor(dat$Y)
result <- CCI.test(formula = Y ~ X + Z, data = dat, method = 'rf', interaction = F)
summary(result)

dat <- NonLinearCategorization(800, d = 2)
dat$Y <- as.factor(dat$Y)
result <- CCI.test(formula = Y ~ X + Z,
                     data = dat,
                     interaction = F,
                     method = 'xgboost')
summary(result)

dat <- NonLinearCategorization(800, d = 2)
dat$Y <- as.factor(dat$Y)
result <- CCI.test(formula = Y ~ X + Z,
                     data = dat,
                     method = 'svm')

summary(result)
#-----------------------------------------------------
# Direction function
#-----------------------------------------------------

dat <- SineGaussianBiv(N = 500, a = 2, d = 1)
result <- CCI.test(formula = Y ~ X + Z1 + Z2, data = dat, choose_direction = T)
summary(result)


dat <- SineGaussianBiv(N = 500, a = 2, d = 1)
CCI.test(formula = Y ~ X + Z1 + Z2, data = dat, method = 'xgboost', choose_direction = TRUE)
summary(result)

dat <- SineGaussianBiv(N = 500, a = 2, d = 1)
result <- CCI.test(formula = Y ~ X + Z1 + Z2, data = dat, method = 'svm', choose_direction = TRUE)
summary(result)

#-------------------------------------------------------------------------------
# Testing the tuning function
#-------------------------------------------------------------------------------

data <- TrigData(700)
data$Y <- as.numeric(as.factor(data$Y)) - 1
data$X <- as.numeric(as.factor(data$X)) - 1
parameter <- CCI.pretuner(formula = Y ~ X + Z1 + Z2,
                            data = data,
                            method = 'xgboost',
                            verboseIter = F)



  
#-------------------------------------------------------------------------------
clean_formula <- clean_formula(y ~ x | z + v)
clean_formula
class(clean_formula)
#-------------------------------------------------------------------------------
test_that("clean_formula outputs correct formula", {
  clean_formula <- clean_formula(y ~ x + z + v)
  expect_true(class(clean_formula) == "formula")
  expect_equal(clean_formula, y ~ x | z + v)
})
#-------------------------------------------------------------------------------
# Get pvalues
test_that("get_pvalues outputs p-values", {
  dist <- rnorm(100)
  test_statistic <- rnorm(1)
  p_value <- get_pvalues(dist = dist, test_statistic = test_statistic, tail = "right")
  p_value
  expect_lt(p_value,1)
  expect_gt(p_value, 0)
})

#-------------------------------------------------------------------------------
test_that("get_pvalues outputs p-values", {
  dist <- rnorm(100)
  test_statistic <- rnorm(1)
  p_value <- get_pvalues(dist = dist, test_statistic = test_statistic, parametric = TRUE, tail = "right")
  expect_lt(p_value,1)
  expect_gt(p_value, 0)
})






#-------------------------------------------------------------------------------
# Testing of test.gen function. The test.gen function is the function which creates the
# null distribution
#-------------------------------------------------------------------------------
test_that("test.gen works correctly for continuous data, default method is random forest (Ranger)", {

  data <- NonLinNormal(200)
  result <- test.gen(formula = Y ~ X | Z1 + Z2, data = data, metric = "RMSE")
  expect_true(class(result) == "list")
  expect_true(class(mean(unlist(result))) == "numeric")

})
#-------------------------------------------------------------------------------
test_that("test.gen works correctly for continuous data, default method is random forest (Ranger) with poly turned off", {
  data <- NormalData(200)
  result <- test.gen(formula = Y ~ X | Z1 + Z2, data = data, poly = FALSE, metric = 'RMSE')
  expect_true(class(result) == "list")
  expect_true(class(mean(unlist(result))) == "numeric")

})
#-------------------------------------------------------------------------------
test_that("test.gen works correctly for continuous data, default method is random forest (Ranger)
          various parameter settings", {
            data <- NormalData(300)
            result <- test.gen(formula = Y ~ X | Z1 + Z2,
                               data = data,
                               permutation = TRUE,
                               nperm = 50,
                               degree = 5,
                               nrounds = 600,
                               max.depth = 6,
                               mtry = 1,
                               metric = "RMSE")
            expect_true(class(result) == "list")
            expect_true(class(mean(unlist(result))) == "numeric")

          })
#-------------------------------------------------------------------------------
test_that("test.gen works correctly for binary data, default method is random forest (Ranger)", {
  data <- BinaryData(200)
  result <- test.gen(formula = Y ~ X | Z1 + Z2,
                     data = data,
                     nperm = 40,
                     data_type = "binary",
                     permutation = TRUE,
                     degree = 3,
                     nrounds = 600)

  expect_true(class(result) == "list")
  expect_true(class(mean(unlist(result))) == "numeric")

})
#-------------------------------------------------------------------------------
test_that("test.gen works correctly for categorical data, default method is random forest (Ranger)", {
  data <- InteractiondData(200)
  result <- test.gen(formula = Y ~ X | Z1 + Z2,
                     data = data,
                     nperm = 50,
                     data_type = "categorical",
                     permutation = TRUE,
                     degree = 3,
                     nrounds = 400)

  expect_true(class(result) == "list")
  expect_true(class(mean(unlist(result))) == "numeric")

})
#-------------------------------------------------------------------------------
test_that("test.gen works correctly for continuous data, with Xgboost various parameter settings", {
            data <- NormalData(800)
            result <- test.gen(formula = Y ~ X | Z1 + Z2,
                               data = data,
                               method = "xgboost",
                               permutation = TRUE,
                               degree = 3,
                               nperm = 40,
                               nrounds = 100,
                               max.depth = 6)
            expect_true(class(result) == "list")
            expect_true(class(mean(unlist(result))) == "numeric")

          })
#-------------------------------------------------------------------------------
test_that("test.gen works correctly for categorical data, with Xgboost
          setting the num_class parameter", {
            data <- InteractiondData(400)
            result <- test.gen(formula = Y ~ X | Z1 + Z2,
                               data = data,
                               method = "xgboost",
                               permutation = TRUE,
                               degree = 3,
                               nperm = 40,
                               nrounds = 100,
                               num_class = 4)
            expect_true(class(result) == "list")
            expect_true(class(mean(unlist(result))) == "numeric")

          })
#-------------------------------------------------------------------------------
test_that("test.gen works correctly for binary outcome data using Xgboost", {
            data <- BinaryData(800)
            result <- test.gen(formula = Y ~ X | Z1 + Z2,
                               data = data,
                               method = "xgboost",
                               nperm = 40,
                               permutation = TRUE,
                               degree = 3,
                               nrounds = 120)
            expect_true(class(result) == "list")
            expect_true(class(mean(unlist(result))) == "numeric")

          })
#-------------------------------------------------------------------------------
            data <- BinaryData(500)
            result <- test.gen(formula = Y ~ X | Z1 + Z2,
                               data = data,
                               nperm = 40,
                               method = "xgboost",
                               metric = "Kappa",
                               permutation = TRUE,
                               degree = 5,
                               nrounds = 220)
            result

#-------------------------------------------------------------------------------
# Creating a wrapper function using the caret package with cross-validation
bagging_wrapper <- function(formula,
                          data,
                          train_indices,
                          test_indices,
                          ...) {
  training <- data[train_indices, ]
  testing <- data[test_indices, ]

  model <- ipred::bagging(form = formula,
                        data = training,
                            ...)

  actual <- testing[['Y']]
  pred <- predict(model, newdata = testing)
  sst <- sum((actual - mean(actual))^2)
  ssr <- sum((actual - pred)^2)
  metric <- 1 - (ssr / sst)
  return(metric)
}

#-------------------------------------------------------------------------------
  data <- NormalData(500)
  result <- CCI.test(formula = Y ~ X | Z1 + Z2 ,
                     data = data,
                     nperm = 50,
                     nbag = 50,
                     mlfunc = bagging_wrapper,
                     tail = 'right')
  summary(result)

#-------------------------------------------------------------------------------
rSquared <- function(data, model, test_indices) {
  actual <- data[test_indices,][['Y']]
  pred <- predict(model, data = data[test_indices,])$predictions
  sst <- sum((actual - mean(actual))^2)
  ssr <- sum((actual - pred)^2)
  metric <- 1 - (ssr / sst)
  return(metric)
}
  data <- NonLinNormal(500)

  result <- CCI.test(formula = Y ~ X | Z1 + Z2 ,
                     data = data,
                     nperm = 50,
                     nbag = 50,
                     metricfunc = rSquared,
                     tail = 'right')
  summary(result)

#-------------------------------------------------------------------------------
################## Troubleshooting perm.test() #################################
#-------------------------------------------------------------------------------

test_that("perm.test works correctly in the simplest case", {
  data <- NonLinNormal(500)
  result <- perm.test(Y ~ X | Z1 + Z2, data = data, nperm = 40)
  expect_is(result, "CCI")
})
#-------------------------------------------------------------------------------
test_that("perm.test works with metricfunc", {
  data <- NonLinNormal(200)
  result <- perm.test(Y ~ X | Z1 + Z2,
                      data = data,
                      nperm = 40,
                      metricfunc = rSquared,
                      tail = 'right')
  expect_is(result, "CCI")
})
#-------------------------------------------------------------------------------
test_that("perm.test works with mlfunc", {
  data <- PoissonNoise(300)
  result <- perm.test(Y ~ X + Z1 + Z2,
                      data = data,
                      coob = T,
                      nperm = 25,
                      mlfunc = bagging_wrapper,
                      tail = "right")
  expect_is(result, "CCI")
})
#-------------------------------------------------------------------------------
test_that("perm.test works correctly", {
  data <- NonLinNormal(500)
  result <- perm.test(Y ~ X | Z1 + Z2,
                      data = data,
                      nperm = 25,
                      method = "xgboost",
                      nrounds = 90)
  expect_is(result, "CCI")
})
#-------------------------------------------------------------------------------
test_that("perm.test works correctly", {
  data <- NonLinNormal(500)
  result <- perm.test(formula = Y ~ X + Z1 + Z2,
                      data = data,
                      p = 0.7,
                      nperm = 25,
                      method = "xgboost",
                      nrounds = 40,
                      parametric = TRUE,
                      poly = FALSE,
                      objective = "reg:pseudohubererror")
  expect_is(result, "CCI")
})
#-------------------------------------------------------------------------------
test_that("perm.test works correctly with dagitty object", {
  data <- NonLinNormal(500)
  dag <- dagitty::dagitty('dag {
  X
  Y
  Z1
  Z2
  Z1 -> X
  Z1 -> Y
  Z2 -> X
  Z2 -> Y
}')

  result <- perm.test(formula = NULL,
                      dag = dag,
                      dag_n = 1,
                      data = data,
                      p = 0.7,
                      nperm = 40,
                      parametric = TRUE)
  expect_is(result, "CCI")
})

#-------------------------------------------------------------------------------
##################### Troubleshooting CCI.test() ###############################
#-------------------------------------------------------------------------------

test_that("CII.test works correctly basic usage", {

  data <- NormalData(500)
  result <- CCI.test(formula = Y ~ X |  Z2,
                     data = data,
                     nperm = 25,
                     parametric = F)
  expect_is(result, "CCI")
})

test_that("CII.test works correctly with pre tuning", {

  data <- NormalData(700)
  result <- CCI.test(formula = Y ~ X |  Z2 + Z1,
                     data = data,
                     nperm = 100,
                     tune = T,
                     tune_length = 2,
                     parametric = T)
  expect_is(result, "CCI")
})

#-------------------------------------------------------------------------------
test_that("CCI.test works binary data", {

  data <- BinaryData(500)
  result <- CCI.test(formula = Y ~ X | Z2 + Z1,
                     data = data,
                     nperm = 25,
                     parametric = F
  )
  summary(result)
  expect_is(result, "CCI")
})

#-------------------------------------------------------------------------------

test_that("CCI.test works categorical data", {
  data <- TrigData(800)
  data$Y <- as.numeric(as.factor(data$Y)) - 1
  data$X <- as.numeric(as.factor(data$X)) - 1
  result <- CCI.test(formula = Y ~ X | Z2 + Z1,
                     data = data,
                     nperm = 50,
                     metric = "Kappa",
                     parametric = F
  )
  summary(result)
  expect_is(result, "CCI")
})

#-------------------------------------------------------------------------------

test_that("CII.test works correctly with pre tuning", {

  data <- ExponentialNoise(500)
  result <- CCI.test(formula = Y ~ X |  Z2,
                     data = data,
                     choose_direction = TRUE,
                     method = "rf",
                     parametric = T)
  warningexpect_is(result, "CCI")
})

#-------------------------------------------------------------------------------

test_that("CII.test works correctly with pre tuning", {

  data <- BinaryData(700)
  data$Y <- as.numeric(as.factor(data$Y))-1
  result <- CCI.test(formula = Y ~ X |  Z2,
                     data = data,
                     nperm = 10,
                     tune = T,
                     tune_length = 10,
                     method = "xgboost",
                     metric = "RMSE",
                     parametric = T)
  expect_is(result, "CCI")
})

#-------------------------------------------------------------------------------

test_that("CII.test works correctly basic usage", {
  set.seed(11)
  data <- NormalData(500)
  result <- CCI.test(formula = Y ~ X |  Z2,
                     data = data)
  expect_is(result, "CCI")
})


#-------------------------------------------------------------------------------

test_that("CCI.test works categorical data with xgboost", {
  data <- TrigData(500)
  data$Y <- as.factor(data$Y)
  data$X <- as.factor(data$X)
  result <- CCI.test(formula = Y ~ X | Z2,
                     data = data,
                     p = 0.7,
                     method = "xgboost",
                     nperm = 10,
                     metric = 'Kappa',
                     num_class = 3,
                     parametric = F
  )
  expect_is(result, "CCI")
})
#-------------------------------------------------------------------------------
test_that("CCI.test works basic", {
  data <- NormalData(5000)
  result <- CCI.test(formula = Y ~ X | Z2 + Z1,
                     data = data,
                     p = 0.7,
                     subsample = 0.2,
                     method = "svm",
                     nperm = 100,
                     parametric = T
  )
  summary(result)
  expect_is(result, "CCI")
})
#-------------------------------------------------------------------------------
# HER
test_that("CCI.test works categorical data", {
  data <- TrigData(500)

  data$Y <- as.factor(data$Y)
  data$X <- as.factor(data$X)

  result <- CCI.test(formula = Y ~ X | Z2 + Z1,
                     data = data,
                     nperm = 100,
                     metric = 'Kappa',
                     parametric = T,
                     tune = T,
                     verboseIter = T
  )
  expect_is(result, "CCI")
})

#-------------------------------------------------------------------------------
test_that("CCI.test works rejecting a wrong null with xgboost", {
  data <- NonLinNormal(500)
  result <- CCI.test(formula = Y ~ X | Z2,
                     p = 0.7,
                     data = data,
                     method = 'xgboost',
                     parametric = TRUE
  )
  expect_is(result, "CCI")
})
#-------------------------------------------------------------------------------
test_that("CCI.test works categorical data, wrong null", {
  data <- ExpLogData(500)
  data$Y <- as.numeric(as.factor(data$Y)) - 1
  data$X <- as.numeric(as.factor(data$X)) - 1
  result <- CCI.test(formula = Y ~ X | Z1,
                     p = 0.7,
                     data = data,
                     nperm = 40,
                     method = "xgboost",
                     parametric = FALSE
  )
  summary(result)
  plot(result)
  expect_is(result, "CCI")
})
#-------------------------------------------------------------------------------
test_that("CCI.test works categorical data, wrong null", {
  data <- ExpLogData(1000)
  result <- CCI.test(formula = Y ~ X | Z2,
                     p = 0.7,
                     data = data,
                     nperm = 40,
                     method = "lm",
                     data_type = 'categorical',
                     parametric = F
  )
  expect_is(result, "CCI")
})
#-------------------------------------------------------------------------------
test_that("CCI.test works with custom performance metric", {
  data <- SinusoidalData(500)
  rSquared <- function(data, model, test_indices) {
    actual <- data[test_indices,][['Y']]
    pred <- predict(model, data = data[test_indices,])$predictions
    sst <- sum((actual - mean(actual))^2)
    ssr <- sum((actual - pred)^2)
    metric <- 1 - (ssr / sst)
    return(metric)
  }
  result <- CCI.test(formula = Y ~ X | Z2 + Z1,
                     p = 0.7,
                     data = data,
                     nperm = 40,
                     parametric = T,
                     metricfunc = rSquared,
                     tail = 'right'
  )

  expect_is(result, "CCI")
})
#-------------------------------------------------------------------------------
test_that("CCI.test works with custom performance metric, wrong null", {
  data <- SinusoidalData(200)
  rSquared <- function(data, model, test_indices) {
    actual <- data[test_indices,][['Y']]
    pred <- predict(model, data = data[test_indices,])$predictions
    sst <- sum((actual - mean(actual))^2)
    ssr <- sum((actual - pred)^2)
    metric <- 1 - (ssr / sst)
    return(metric)
  }
  result <- CCI.test(formula = Y ~ X | Z2,
                     p = 0.7,
                     data = data,
                     nperm = 40,
                     parametric = T,
                     metricfunc = rSquared,
                     tail = 'right'
  )

  expect_is(result, "CCI")
})
#-------------------------------------------------------------------------------
test_that("CCI.test works with dagitty", {
  data <- SinusoidalData(200)
  dag <- dagitty::dagitty('dag {
  X
  Y
  Z1
  Z2
  Z1 -> X
  Z1 -> Y
  Z2 -> X
  Z2 -> Y
}')
plot(dag)

result <- CCI.test(formula = Y ~ X | Z2 + Z1,
                     p = 0.7,
                     data = data,
                     dag = dag,
                     dag_n = 1,
                     nperm = 50,
                     parametric = T
  )

  expect_is(result, "CCI")
})
#-------------------------------------------------------------------------------

test_that("CCI.test works with custom wrapper function, calculating R-squared", {
  data <- SinusoidalData(400)
  bagging_wrapper <- function(formula,
                              data,
                              train_indices,
                              test_indices,
                              ...) {
    training <- data[train_indices, ]
    testing <- data[test_indices, ]

    model <- ipred::bagging(form = formula,
                            data = training,
                            ...)

    actual <- testing[['Y']]
    pred <- predict(model, newdata = testing)
    sst <- sum((actual - mean(actual))^2)
    ssr <- sum((actual - pred)^2)
    metric <- 1 - (ssr / sst)
    return(metric)
  }


  result <- CCI.test(formula = Y ~ X | Z1 + Z2,
                     p = 0.7,
                     data = data,
                     nperm = 60,
                     parametric = F,
                     seed = 9,
                     mlfunc = bagging_wrapper,
                     tail = "right"
  )

  expect_is(result, "CCI")
})

#-------------------------------------------------------------------------------
##################### QQplot() ###############################
#-------------------------------------------------------------------------------

data <- SinusoidalData(1000)
result <- CCI.test(formula = Y ~ X | Z1 + Z2,
                   p = 0.7,
                   data = data,
                   nperm = 500,
                   parametric = T
)
QQplot(result)


result <- CCI.test(formula = Y ~ X | Z2,
                   p = 0.7,
                   data = data,
                   nperm = 100,
                   parametric = T
)
QQplot(result)





devtools::load_all()
library(CCI)
library(caret)
caret_wrapper <- function(formula,
                          data,
                          train_indices,
                          test_indices,
                          caret_method,
                          caret_metric,
                          ...) {

  training_data <- data[train_indices, ]
  test_data <- data[test_indices, ]
  ctrl <- caret::trainControl(method = "none")
  model <- caret::train(formula,
                        data = training_data,
                        method = caret_method,
                        trControl = ctrl,
                        verbose = F,
                        trace = F,
                        ...)

  predictions <- predict(model, newdata = test_data)
  actual <- data[test_indices, ][[all.vars(formula)[1]]]
  if (caret_metric =="RMSE") {
    metric <- sqrt(mean((predictions - actual)^2))
  } else if (caret_metric == "Kappa") {
    actual <- test_data[[all.vars(formula)[1]]]
    metric <- sum(predictions == actual) / length(actual)
  } else {
    stop("Unsupported data type for caret")
  }
  return(metric)
}

Try the CCI package in your browser

Any scripts or data that you put into this service are public.

CCI documentation built on Aug. 29, 2025, 5:17 p.m.