test-roc.test.R
In pROC: Display and Analyze ROC Curves

library(pROC)
data(aSAH)

context("roc.test")

# define variables shared among multiple tests here
roc.test_env <- environment()

test_that("roc.test works", {
  roc.test_env$t1 <- roc.test(r.wfns, r.s100b)
  roc.test_env$t2 <- roc.test(r.wfns, r.ndka)
  roc.test_env$t3 <- roc.test(r.ndka, r.s100b)
  expect_is(t1, "htest")
  expect_is(t2, "htest")
  expect_is(t3, "htest")
})


test_that("DeLong roc.test works when curves are identical", {
  t4 <- roc.test(r.wfns, r.wfns)
  expect_is(t4, "htest")
  expect_equal(t4$p.value, 1)
  expect_equal(t4$statistic, c(Z = 0))
})


test_that("roc.test statistic and p are as expected with defaults", {
  expect_equal(t1$statistic, c(Z = 2.20898359144091))
  expect_equal(t1$p.value, 0.0271757822291882)
  expect_equal(t1$conf.int[[1]], 0.0104061769564846)
  expect_equal(t1$conf.int[[2]], 0.174214419249478)
  expect_match(t1$method, "DeLong")
  expect_match(t1$method, "correlated")
  expect_identical(t1$alternative, "two.sided")
  expect_identical(attr(t1$conf.int, "conf.level"), 0.95)

  expect_equal(t2$statistic, c(Z = 2.79777591868904))
  expect_equal(t2$p.value, 0.00514557970691098)
  expect_equal(t2$conf.int[[1]], 0.0634011709339876)
  expect_equal(t2$conf.int[[2]], 0.3600405634833566)
  expect_match(t2$method, "DeLong")
  expect_match(t2$method, "correlated")
  expect_identical(t2$alternative, "two.sided")
  expect_identical(attr(t2$conf.int, "conf.level"), 0.95)

  expect_equal(t3$statistic, c(Z = -1.39077002573558))
  expect_equal(t3$p.value, 0.164295175223054)
  expect_equal(t3$conf.int[[1]], -0.2876917446341914)
  expect_equal(t3$conf.int[[2]], 0.0488706064228094)
  expect_match(t3$method, "DeLong")
  expect_match(t3$method, "correlated")
  expect_identical(t3$alternative, "two.sided")
  expect_identical(attr(t3$conf.int, "conf.level"), 0.95)
})

test_that("two.sided roc.test produces identical p values when roc curves are reversed", {
  t1b <- roc.test(r.s100b, r.wfns)
  expect_equal(t1b$p.value, t1$p.value)
  expect_equal(t1b$statistic, -t1$statistic)

  t2b <- roc.test(r.ndka, r.wfns)
  expect_equal(t2b$p.value, t2$p.value)
  expect_equal(t2b$statistic, -t2$statistic)

  t3b <- roc.test(r.s100b, r.ndka)
  expect_equal(t3b$p.value, t3$p.value)
  expect_equal(t3b$statistic, -t3$statistic)
})

test_that("unpaired roc.test works", {
  # Warns about pairing
  expect_warning(roc.test_env$t1up <- roc.test(r.wfns, r.s100b, paired = FALSE))
  expect_warning(roc.test_env$t2up <- roc.test(r.wfns, r.ndka, paired = FALSE))
  expect_warning(roc.test_env$t3up <- roc.test(r.ndka, r.s100b, paired = FALSE))
})

test_that("unpaired roc.test statistic and p are as expected", {
  expect_equal(t1up$statistic, c(D = 1.43490640926908))
  expect_equal(t1up$p.value, 0.152825378808796)
  expect_match(t1up$method, "DeLong")
  expect_identical(t1up$alternative, "two.sided")

  expect_equal(t2up$statistic, c(D = 3.10125096778969))
  expect_equal(t2up$p.value, 0.00220950791756457)
  expect_match(t2up$method, "DeLong")
  expect_identical(t2up$alternative, "two.sided")

  expect_equal(t3up$statistic, c(D = -1.55995743389685))
  expect_equal(t3up$p.value, 0.120192832430845)
  expect_match(t3up$method, "DeLong")
  expect_identical(t3up$alternative, "two.sided")
})

test_that("unpaired two.sided roc.test produces identical p values when roc curves are reversed", {
  expect_warning(t1upb <- roc.test(r.s100b, r.wfns, paired = FALSE))
  expect_equal(t1upb$p.value, t1up$p.value)
  expect_equal(t1upb$statistic, -t1up$statistic)

  expect_warning(t2upb <- roc.test(r.ndka, r.wfns, paired = FALSE))
  expect_equal(t2upb$p.value, t2up$p.value)
  expect_equal(t2upb$statistic, -t2up$statistic)

  expect_warning(t3upb <- roc.test(r.s100b, r.ndka, paired = FALSE))
  expect_equal(t3upb$p.value, t3up$p.value)
  expect_equal(t3upb$statistic, -t3up$statistic)
})


test_that("one-sided roc.test work and produce expected results", {
  t1gt <- roc.test(r.wfns, r.s100b, alternative = "greater")
  t1lt <- roc.test(r.wfns, r.s100b, alternative = "less")

  expect_equal(t1gt$statistic, t1$statistic)
  expect_equal(t1lt$statistic, t1$statistic)

  expect_equal(t1gt$p.value, 0.0135878911145941)
  expect_equal(t1lt$p.value, 0.986412108885406)

  expect_match(t1gt$method, "DeLong")
  expect_match(t1gt$method, "correlated")
  expect_identical(t1gt$alternative, "greater")
  expect_match(t1lt$method, "DeLong")
  expect_match(t1lt$method, "correlated")
  expect_identical(t1lt$alternative, "less")
})

test_that("unpaired one-sided roc.test work and produce expected results", {
  expect_warning(t1upgt <- roc.test(r.wfns, r.s100b, alternative = "greater", paired = FALSE))
  expect_warning(t1uplt <- roc.test(r.wfns, r.s100b, alternative = "less", paired = FALSE))

  expect_equal(t1upgt$statistic, t1up$statistic)
  expect_equal(t1uplt$statistic, t1up$statistic)

  expect_equal(t1upgt$p.value, 0.076412689404398)
  expect_equal(t1uplt$p.value, 0.923587310595602)

  expect_match(t1upgt$method, "DeLong")
  expect_identical(t1upgt$alternative, "greater")
  expect_match(t1uplt$method, "DeLong")
  expect_identical(t1uplt$alternative, "less")
})

test_that("roc.formula works", {
  expect_silent(t1c <- roc.test(aSAH$outcome ~ aSAH$wfns + aSAH$s100b, quiet = TRUE)) # make sure silent is passed
  expect_equal(t1c$statistic, t1$statistic)
  expect_equal(t1c$p.value, t1$p.value)
  expect_match(t1$method, "DeLong")
  expect_match(t1$method, "correlated")
  expect_identical(t1$alternative, "two.sided")

  expect_warning(t1upc <- roc.test(aSAH$outcome ~ aSAH$wfns + aSAH$s100b, quiet = TRUE, paired = FALSE))
  expect_equal(t1upc$statistic, t1up$statistic)
  expect_equal(t1upc$p.value, t1up$p.value)
  expect_match(t1upc$method, "DeLong")
  expect_identical(t1upc$alternative, "two.sided")
})


test_that("roc.formula supports subset and na.omit", {
  check.only.items <- c("p.value", "statistic")

  expect_identical(
    roc.test(outcome ~ wfns + ndka, data = aSAH, subset = (gender == "Female"), quiet = TRUE)[check.only.items],
    roc.test(aSAH$outcome[aSAH$gender == "Female"], aSAH$wfns[aSAH$gender == "Female"], aSAH$ndka[aSAH$gender == "Female"], quiet = TRUE)[check.only.items]
  )

  # Generate missing values
  aSAH.missing <- aSAH
  aSAH.missing$wfns[1:20] <- NA
  aSAH.missing$ndka[1:20] <- NA
  expect_warning(roctest1 <- roc.test(outcome ~ wfns + ndka, data = aSAH.missing, na.action = na.omit, quiet = TRUE), "na.omit")
  roctest2 <- roc.test(aSAH$outcome[21:113], aSAH$wfns[21:113], aSAH$ndka[21:113], quiet = TRUE)
  expect_identical(
    roctest1[check.only.items],
    roctest2[check.only.items]
  )
  # na.fail should fail
  expect_error(roc.test(outcome ~ wfns + ndka, data = aSAH.missing, na.action = na.fail, quiet = TRUE))
  # weights should fail too
  expect_error(roc.test(outcome ~ wfns + ndka, data = aSAH, weights = seq_len(nrow(aSAH))), regexp = "weights are not supported")

  # Both na.action and subset
  expect_warning(roctest1 <- roc.test(outcome ~ wfns + ndka, data = aSAH.missing, na.action = na.omit, subset = (gender == "Female"), quiet = TRUE), "na.omit")
  roctest2 <- roc.test(aSAH$outcome[21:113][aSAH[21:113, ]$gender == "Female"], aSAH$wfns[21:113][aSAH[21:113, ]$gender == "Female"], aSAH$ndka[21:113][aSAH[21:113, ]$gender == "Female"], quiet = TRUE)
  expect_identical(
    roctest1[check.only.items],
    roctest2[check.only.items]
  )
})

test_that("paired tests don't work on unpaired curves", {
  # Make an unpaired ROC curve
  up.r.ndka <- roc(controls = aSAH$ndka[aSAH$outcome == "Good"], cases = aSAH$ndka[aSAH$outcome == "Poor"], quiet = TRUE)
  # unpaired by default
  t4 <- roc.test(r.wfns, up.r.ndka)
  expect_false(grepl("correlated", t4$method))
  # Shoud be an error:
  expect_error(roc.test(r.wfns, up.r.ndka, paired = TRUE))
})

test_that("one-sided roc.test work with direction='>' and produce expected results", {
  r.mwfns <- roc(aSAH$outcome, -as.numeric(aSAH$wfns))
  r.ms100b <- roc(aSAH$outcome, -aSAH$s100b)
  ## We already tested those before:
  # t1gt <- roc.test(r.wfns, r.s100b, alternative = "greater")
  # t1lt <- roc.test(r.wfns, r.s100b, alternative = "less")
  # Test with inverted direction
  m1gt <- roc.test(r.mwfns, r.ms100b, alternative = "greater")
  m1lt <- roc.test(r.mwfns, r.ms100b, alternative = "less")

  expect_equal(m1gt$statistic, t1$statistic)
  expect_equal(m1lt$statistic, t1$statistic)

  expect_equal(m1gt$p.value, 0.0135878911145941)
  expect_equal(m1lt$p.value, 0.986412108885406)
})

test_that("paired roc.test works with bootstrap", {
  skip_slow()
  ht <- roc.test(r.wfns, r.s100b, method = "bootstrap", boot.n = 12, paired = TRUE)
  expect_bootstrap_htest(ht)
  expect_equal(ht$alternative, "two.sided")
  expect_equal(ht$method, "Bootstrap test for two correlated ROC curves")
  expect_equal(unname(ht$parameter), c(12, 1))
})

test_that("unpaired roc.test works with bootstrap", {
  skip_slow()
  expect_warning(ht <- roc.test(r.s100b, r.wfns, method = "bootstrap", boot.n = 12, paired = FALSE), "paired")
  expect_bootstrap_htest(ht)
  expect_equal(ht$alternative, "two.sided")
  expect_equal(ht$method, "Bootstrap test for two ROC curves")
  expect_equal(unname(ht$parameter), c(12, 1))
})

test_that("paired, non stratified roc.test works with bootstrap", {
  skip_slow()
  ht <- roc.test(r.s100b, r.wfns, method = "bootstrap", boot.n = 12, paired = TRUE, boot.stratified = FALSE)
  expect_bootstrap_htest(ht)
  expect_equal(ht$alternative, "two.sided")
  expect_equal(ht$method, "Bootstrap test for two correlated ROC curves")
  expect_equal(unname(ht$parameter), c(12, 0))
})

test_that("unpaired, non stratified roc.test works with bootstrap", {
  skip_slow()
  expect_warning(ht <- roc.test(r.s100b, r.wfns, method = "bootstrap", boot.n = 12, paired = FALSE, boot.stratified = FALSE), "paired")
  expect_bootstrap_htest(ht)
  expect_equal(ht$alternative, "two.sided")
  expect_equal(ht$method, "Bootstrap test for two ROC curves")
  expect_equal(unname(ht$parameter), c(12, 0))
})

test_that("bootstrap roc.test works with mixed roc, auc and smooth.roc objects", {
  skip_slow()
  for (roc1 in list(r.s100b, auc(r.s100b), smooth(r.s100b), r.s100b.partial2, r.s100b.partial2$auc)) {
    for (roc2 in list(r.wfns, auc(r.wfns), smooth(r.wfns), r.wfns.partial1, r.wfns.partial1$auc)) {
      n <- round(runif(1, 3, 9)) # keep boot.n small
      stratified <- sample(c(TRUE, FALSE), 1)
      paired <- sample(c(TRUE, FALSE), 1)
      alternative <- sample(c("two.sided", "less", "greater"), 1)
      suppressWarnings( # All sorts of warnings are expected
        ht <- roc.test(roc1, roc2,
          method = "bootstrap",
          boot.n = n, paired = paired, boot.stratified = stratified,
          alternative = alternative
        )
      )
      expect_bootstrap_htest(ht)
      expect_equal(ht$alternative, alternative)
      if (paired) {
        expect_equal(ht$method, "Bootstrap test for two correlated ROC curves")
      } else {
        expect_equal(ht$method, "Bootstrap test for two ROC curves")
      }
      expect_equal(unname(ht$parameter), c(n, as.integer(stratified)))
    }
  }
})

test_that("se/sp roc.test works with mixed roc, auc and smooth.roc objects", {
  skip_slow()
  for (roc1 in list(r.s100b, auc(r.s100b), smooth(r.s100b), r.s100b.partial2, r.s100b.partial2$auc)) {
    for (roc2 in list(r.wfns, auc(r.wfns), smooth(r.wfns), r.wfns.partial1, r.wfns.partial1$auc)) {
      for (method in c("sensitivity", "specificity")) {
        n <- round(runif(1, 3, 9)) # keep boot.n small
        stratified <- sample(c(TRUE, FALSE), 1)
        paired <- sample(c(TRUE, FALSE), 1)
        alternative <- sample(c("two.sided", "less", "greater"), 1)
        suppressWarnings( # All sorts of warnings are expected
          ht <- roc.test(roc1, roc2,
            method = method,
            sensitivity = 0.8,
            specificity = 0.8,
            boot.n = n, paired = paired, boot.stratified = stratified,
            alternative = alternative
          )
        )
        expect_bootstrap_htest(ht)
        expect_equal(ht$alternative, alternative)
        if (paired) {
          expect_equal(ht$method, sprintf("%s test for two correlated ROC curves", tools::toTitleCase(method)))
        } else {
          expect_equal(ht$method, sprintf("%s test for two ROC curves", tools::toTitleCase(method)))
        }
        expect_equal(unname(ht$parameter), c(n, as.integer(stratified)))
      }
    }
  }
})