tests/testthat/test-select_models.R

test_that("select_models works for valid approaches and directions", {
  library(testthat)
  library(gtregression)
  library(dplyr)
  library(mlbench)
  library(MASS)
  data("PimaIndiansDiabetes2", package = "mlbench")
  pima_data <- PimaIndiansDiabetes2 |>
    mutate(
      diabetes = ifelse(diabetes == "pos", 1, 0),
      bmi = factor(case_when(
        mass < 25 ~ "Normal",
        mass >= 25 & mass < 30 ~ "Overweight",
        mass >= 30 ~ "Obese"
      )),
      age_cat = factor(case_when(
        age < 30 ~ "Young",
        age < 50 ~ "Middle-aged",
        TRUE ~ "Older"
      )),
      npreg_cat = factor(ifelse(pregnant > 2, "High", "Low")),
      glucose_cat = factor(ifelse(glucose >= 140, "High", "Normal")),
      dpf_cat = factor(ifelse(pedigree >= 0.5, "High", "Low")),
      insulin_cat = factor(ifelse(insulin >= 100, "High", "Normal"))
    )

  outcome <- "diabetes"
  exposures <- c("bmi", "age_cat", "npreg_cat", "glucose_cat",
                 "insulin_cat", "dpf_cat")

  approaches <- c("logit", "robpoisson")
  directions <- c("forward", "backward", "both")

  for (appr in approaches) {
    for (dir in directions) {
      result <- select_models(
        data = pima_data, outcome = outcome,
        exposures = exposures, approach = appr, direction = dir
      )
      expect_true("results_table" %in% names(result))
      expect_true("best_model" %in% names(result))
      expect_s3_class(result$results_table, "tbl_df")
      expect_s3_class(result$best_model, "glm")
    }
  }
})

test_that("select_models works for linear regression and returns adjusted R2", {
  set.seed(123)
  df <- data.frame(
    y = rnorm(100),
    x1 = rnorm(100),
    x2 = sample(letters[1:3], 100, replace = TRUE),
    x3 = sample(LETTERS[1:2], 100, replace = TRUE)
  )
  df$x2 <- factor(df$x2)
  df$x3 <- factor(df$x3)

  result <- select_models(df,
    outcome = "y",
    exposures = c("x1", "x2", "x3"),
    approach = "linear", direction = "forward"
  )
  expect_true("adj_r2" %in% colnames(result$results_table))
})

test_that("select_models validates outcome types appropriately", {
  df <- data.frame(
    y_bin = sample(c(0, 1), 100, replace = TRUE),
    y_cont = rnorm(100),
    y_count = rpois(100, lambda = 2),
    x = sample(letters[1:3], 100, replace = TRUE)
  )
  df$x <- factor(df$x)

  # Should pass (no error expected)
  expect_error(select_models(df, outcome = "y_bin", exposures = "x",
                             approach = "logit"), NA)
  expect_error(select_models(df, outcome = "y_count", exposures = "x",
                             approach = "poisson"), NA)
  expect_error(select_models(df, outcome = "y_cont", exposures = "x",
                             approach = "linear"), NA)

  # Should fail (wrong outcome type for given model)
  expect_error(select_models(df, outcome = "y_bin", exposures = "x",
                             approach = "poisson"), NA)
  expect_error(select_models(df, outcome = "y_cont", exposures = "x",
                             approach = "logit"))
  expect_error(select_models(df, outcome = "y_count", exposures = "x",
                             approach = "linear"), NA)
})

test_that("select_models supports negative binomial regression", {
  data("quine", package = "MASS")
  quine <- quine |>
    mutate(across(c(Eth, Sex, Age, Lrn), as.factor))

  result <- select_models(quine,
    outcome = "Days",
    exposures = c("Eth", "Sex", "Age", "Lrn"),
    approach = "negbin", direction = "forward"
  )

  expect_true("results_table" %in% names(result))
  expect_s3_class(result$best_model, "glm") # MASS::glm.nb returns class 'glm'
})

Try the gtregression package in your browser

Any scripts or data that you put into this service are public.

gtregression documentation built on Aug. 18, 2025, 5:23 p.m.