tests/testthat/test-cdisc_validate.R

test_that("detect_cdisc_domain identifies SDTM DM", {
  dm <- data.frame(
    STUDYID = "STUDY01", DOMAIN = "DM", USUBJID = "SUBJ01",
    SUBJID = "01", RFSTDTC = "2024-01-01", RFENDTC = "2024-06-01",
    SITEID = "SITE01", SEX = "M", AGE = 45, AGEU = "YEARS",
    ARMCD = "TRT", ARM = "Treatment", COUNTRY = "USA",
    ACTARMCD = "TRT", ACTARM = "Treatment",
    stringsAsFactors = FALSE
  )
  result <- detect_cdisc_domain(dm)
  expect_equal(result$standard, "SDTM")
  expect_equal(result$domain, "DM")
  expect_true(result$confidence > 0.5)
})

test_that("detect_cdisc_domain identifies ADaM ADSL", {
  adsl <- data.frame(
    STUDYID = "STUDY01", USUBJID = "SUBJ01", SUBJID = "01",
    SITEID = "SITE01", AGE = 45, AGEU = "YEARS", SEX = "M",
    RACE = "WHITE", ARM = "Treatment", ARMCD = "TRT",
    ACTARM = "Treatment", ACTARMCD = "TRT",
    TRT01P = "Drug A", TRT01PN = 1, TRT01A = "Drug A", TRT01AN = 1,
    TRTSDT = 22000, TRTEDT = 22100,
    RFSTDTC = "2024-01-01", RFENDTC = "2024-06-01",
    SAFFL = "Y", ITTFL = "Y",
    stringsAsFactors = FALSE
  )
  result <- detect_cdisc_domain(adsl)
  expect_equal(result$standard, "ADaM")
  expect_equal(result$domain, "ADSL")
})

test_that("detect_cdisc_domain returns Unknown for non-CDISC data", {
  df <- data.frame(x = 1:5, y = letters[1:5], z = rnorm(5))
  result <- detect_cdisc_domain(df)
  expect_equal(result$standard, "Unknown")
  expect_true(is.na(result$domain))
})

test_that("validate_cdisc detects missing required SDTM variables", {
  # DM dataset missing SEX (required) and ETHNIC (expected)
  dm <- data.frame(
    STUDYID = "STUDY01", DOMAIN = "DM", USUBJID = "SUBJ01",
    SUBJID = "01", RFSTDTC = "2024-01-01", RFENDTC = "2024-06-01",
    SITEID = "SITE01",
    ARMCD = "TRT", ARM = "Treatment", COUNTRY = "USA",
    ACTARMCD = "TRT", ACTARM = "Treatment",
    stringsAsFactors = FALSE
  )
  result <- validate_cdisc(dm, domain = "DM", standard = "SDTM")
  expect_s3_class(result, "data.frame")

  # Should have errors for missing required variables
  errors <- result[result$severity == "ERROR", ]
  expect_true(nrow(errors) > 0)
  expect_true("SEX" %in% errors$variable)

  # Should have warnings for missing expected variables
  warnings_df <- result[result$severity == "WARNING", ]
  expect_true(nrow(warnings_df) > 0)
})

test_that("validate_sdtm detects type mismatches", {
  dm <- data.frame(
    STUDYID = "STUDY01", DOMAIN = "DM", USUBJID = "SUBJ01",
    SUBJID = "01", RFSTDTC = "2024-01-01", RFENDTC = "2024-06-01",
    SITEID = "SITE01", SEX = "M",
    AGE = "45",  # Should be numeric!
    AGEU = "YEARS",
    ARMCD = "TRT", ARM = "Treatment", COUNTRY = "USA",
    ACTARMCD = "TRT", ACTARM = "Treatment",
    stringsAsFactors = FALSE
  )
  result <- validate_sdtm(dm, "DM")
  type_issues <- result[result$category == "Type Mismatch", ]
  expect_true(nrow(type_issues) > 0)
  expect_true("AGE" %in% type_issues$variable)
})

test_that("validate_cdisc detects non-standard variables", {
  dm <- data.frame(
    STUDYID = "STUDY01", DOMAIN = "DM", USUBJID = "SUBJ01",
    SUBJID = "01", RFSTDTC = "2024-01-01", RFENDTC = "2024-06-01",
    SITEID = "SITE01", SEX = "M", AGE = 45, AGEU = "YEARS",
    ARMCD = "TRT", ARM = "Treatment", COUNTRY = "USA",
    ACTARMCD = "TRT", ACTARM = "Treatment",
    MY_CUSTOM_VAR = "custom",  # Non-standard!
    stringsAsFactors = FALSE
  )
  result <- validate_cdisc(dm, domain = "DM", standard = "SDTM")
  non_std <- result[result$category == "Non-Standard Variable", ]
  expect_true("MY_CUSTOM_VAR" %in% non_std$variable)
})

test_that("validate_adam validates ADSL correctly", {
  adsl <- data.frame(
    STUDYID = "STUDY01", USUBJID = "SUBJ01", SUBJID = "01",
    SITEID = "SITE01", AGE = 45, AGEU = "YEARS", SEX = "M",
    RACE = "WHITE", ARM = "Treatment", ARMCD = "TRT",
    ACTARM = "Treatment", ACTARMCD = "TRT",
    TRT01P = "Drug A", TRT01PN = 1, TRT01A = "Drug A", TRT01AN = 1,
    TRTSDT = 22000, TRTEDT = 22100,
    RFSTDTC = "2024-01-01", RFENDTC = "2024-06-01",
    SAFFL = "Y", ITTFL = "Y",
    stringsAsFactors = FALSE
  )
  result <- validate_adam(adsl, "ADSL")
  expect_s3_class(result, "data.frame")
  expect_true(all(c("category", "variable", "message", "severity") %in% names(result)))
})

test_that("validate_cdisc errors on bad inputs", {
  expect_error(validate_cdisc(NULL))
  expect_error(validate_cdisc(data.frame(), domain = "DM", standard = "INVALID"))
})

test_that("cdisc_compare returns comprehensive results", {
  dm1 <- data.frame(
    STUDYID = "STUDY01", DOMAIN = "DM", USUBJID = "SUBJ01",
    SUBJID = "01", RFSTDTC = "2024-01-01", RFENDTC = "2024-06-01",
    SITEID = "SITE01", SEX = "M", AGE = 45, AGEU = "YEARS",
    ARMCD = "TRT", ARM = "Treatment", COUNTRY = "USA",
    ACTARMCD = "TRT", ACTARM = "Treatment",
    stringsAsFactors = FALSE
  )
  dm2 <- dm1
  dm2$AGE <- 46  # Small difference

  result <- cdisc_compare(dm1, dm2, domain = "DM", standard = "SDTM")
  expect_type(result, "list")
  expect_true("comparison" %in% names(result))
  expect_true("variable_comparison" %in% names(result))
  expect_true("cdisc_validation_df1" %in% names(result))
  expect_true("cdisc_validation_df2" %in% names(result))
  expect_true("cdisc_conformance_comparison" %in% names(result))
})

Try the clinCompare package in your browser

Any scripts or data that you put into this service are public.

clinCompare documentation built on Feb. 19, 2026, 1:07 a.m.