tests/testthat/test-dissect.R

test_that("dissect works correctly on Pima dataset", {
  skip_if_not_installed("gtregression")
  skip_if_not_installed("tibble")
  skip_if_not_installed("purrr")

  library(gtregression)
  data("data_PimaIndiansDiabetes", package = "gtregression")

  # categorize and transform the dataset
  pima_data <- data_PimaIndiansDiabetes |>
    mutate(diabetes = ifelse(diabetes == "pos", 1, 0)) |> # Convert outcome to numeric b                                                                                                                                                                                                                                                                     inary
    mutate(bmi = case_when(
      mass < 25 ~ "Normal",
      mass >= 25 & mass < 30 ~ "Overweight",
      mass >= 30 ~ "Obese",
      TRUE ~ NA_character_),
      bmi = factor(bmi, levels = c("Normal", "Overweight", "Obese")),
      age_cat = case_when(
        age < 30 ~ "Young",
        age >= 30 & age < 50 ~ "Middle-aged",
        age >= 50 ~ "Older"),
      age_cat = factor(age_cat, levels = c("Young", "Middle-aged", "Older")),
      npreg_cat = ifelse(pregnant > 2, "High parity", "Low parity"),
      npreg_cat = factor(npreg_cat, levels = c("Low parity", "High parity")),
      glucose_cat= case_when(glucose<=140~ "Normal", glucose>140~"High"),
      glucose_cat= factor(glucose_cat, levels = c("Normal", "High")),
      bp_cat = case_when(
        pressure < 80 ~ "Normal",
        pressure >= 80 ~ "High"
      ),
      bp_cat= factor(bp_cat, levels = c("Normal", "High")),
      triceps_cat = case_when(
        triceps < 23 ~ "Normal",
        triceps >= 23 ~ "High"
      ),
      triceps_cat= factor(triceps_cat, levels = c("Normal", "High")),
      insulin_cat = case_when(
        insulin < 30 ~ "Low",
        insulin >= 30 & insulin < 150 ~ "Normal",
        insulin >= 150 ~ "High"
      ),
      insulin_cat = factor(insulin_cat, levels = c("Low", "Normal", "High"))
    ) |>
    mutate(
      dpf_cat = case_when(
        pedigree <= 0.2 ~ "Low Genetic Risk",
        pedigree > 0.2 & pedigree <= 0.5 ~ "Moderate Genetic Risk",
        pedigree > 0.5 ~ "High Genetic Risk"
      )
    ) |>
    mutate(dpf_cat = factor(dpf_cat, levels = c("Low Genetic Risk", "Moderate Genetic Risk", "High Genetic Risk"))) |>
    mutate(diabetes_cat= case_when(diabetes== 1~ "Diabetes positive", TRUE~ "Diabetes negative")) |>
    mutate(diabetes_cat= factor(diabetes_cat, levels = c("Diabetes negative","Diabetes positive" )))
  # Run dissect
  result <- dissect(pima_data)

  # Test structure
  expect_s3_class(result, "tbl_df")
  expect_true(all(c("Variable", "Type", "Missing (%)", "Unique", "Levels", "Compatibility") %in% names(result)))

  # Check at least one of each compatibility type exists
  expect_true(any(result$Compatibility == "compatible"))
  expect_true(any(result$Compatibility == "maybe"))
  expect_true(any(result$Compatibility == "incompatible") | TRUE)  # Optional, allow none

  # Check types are as expected
  expect_true(all(result$Type %in% c("numeric", "factor", "character", "logical", "integer", "Date", "POSIXct")))

  # Check % missing is correctly formatted
  expect_true(all(grepl("^\\d+(\\.\\d)?%$", result$`Missing (%)`)))

  # Snapshot for visual inspection
  expect_snapshot(print(result, n = nrow(result)))
})

Try the gtregression package in your browser

Any scripts or data that you put into this service are public.

gtregression documentation built on Aug. 18, 2025, 5:23 p.m.