tests/testthat/test-mnr.R

suppressMessages(library(dplyr))

cmh_17_cv <- tribble(
  ~n, ~c,
  3, 1.154,
  4, 1.481,
  5, 1.715,
  6, 1.887,
  7, 2.02,
  8, 2.127,
  9, 2.215,
  10, 2.29,
  11, 2.355,
  12, 2.412,
  13, 2.462,
  14, 2.507,
  15, 2.548,
  16, 2.586,
  17, 2.62,
  18, 2.652,
  19, 2.681,
  20, 2.708,
  21, 2.734,
  22, 2.758,
  23, 2.78,
  24, 2.802,
  25, 2.822,
  26, 2.841,
  27, 2.859,
  28, 2.876,
  29, 2.893,
  30, 2.908,
  31, 2.924,
  32, 2.938,
  33, 2.952,
  34, 2.965,
  35, 2.978,
  36, 2.991,
  37, 3.003,
  38, 3.014,
  39, 3.025,
  40, 3.036,
  41, 3.047,
  42, 3.057,
  43, 3.067,
  44, 3.076,
  45, 3.085,
  46, 3.094,
  47, 3.103,
  48, 3.112,
  49, 3.12,
  50, 3.128,
  51, 3.136,
  52, 3.144,
  53, 3.151,
  54, 3.159,
  55, 3.166,
  56, 3.173,
  57, 3.18,
  58, 3.187,
  59, 3.193,
  60, 3.2,
  61, 3.206,
  62, 3.212,
  63, 3.218,
  64, 3.224,
  65, 3.23,
  66, 3.236,
  67, 3.241,
  68, 3.247,
  69, 3.252,
  70, 3.258,
  71, 3.263,
  72, 3.268,
  73, 3.273,
  74, 3.278,
  75, 3.283,
  76, 3.288,
  77, 3.292,
  78, 3.297,
  79, 3.302,
  80, 3.306,
  81, 3.311,
  82, 3.315,
  83, 3.319,
  84, 3.323,
  85, 3.328,
  86, 3.332,
  87, 3.336,
  88, 3.34,
  89, 3.344,
  90, 3.348,
  91, 3.352,
  92, 3.355,
  93, 3.359,
  94, 3.363,
  95, 3.366,
  96, 3.37,
  97, 3.374,
  98, 3.377,
  99, 3.381,
  100, 3.384,
  101, 3.387,
  102, 3.391,
  103, 3.394,
  104, 3.397,
  105, 3.401,
  106, 3.404,
  107, 3.407,
  108, 3.41,
  109, 3.413,
  110, 3.416,
  111, 3.419,
  112, 3.422,
  113, 3.425,
  114, 3.428,
  115, 3.431,
  116, 3.434,
  117, 3.437,
  118, 3.44,
  119, 3.442,
  120, 3.445,
  121, 3.448,
  122, 3.451,
  123, 3.453,
  124, 3.456,
  125, 3.459,
  126, 3.461,
  127, 3.464,
  128, 3.466,
  129, 3.469,
  130, 3.471,
  131, 3.474,
  132, 3.476,
  133, 3.479,
  134, 3.481,
  135, 3.483,
  136, 3.486,
  137, 3.488,
  138, 3.491,
  139, 3.493,
  140, 3.495,
  141, 3.497,
  142, 3.5,
  143, 3.502,
  144, 3.504,
  145, 3.506,
  146, 3.508,
  147, 3.511,
  148, 3.513,
  149, 3.515,
  150, 3.517,
  151, 3.519,
  152, 3.521,
  153, 3.523,
  154, 3.525,
  155, 3.527,
  156, 3.529,
  157, 3.531,
  158, 3.533,
  159, 3.535,
  160, 3.537,
  161, 3.539,
  162, 3.541,
  163, 3.543,
  164, 3.545,
  165, 3.547,
  166, 3.549,
  167, 3.551,
  168, 3.552,
  169, 3.554,
  170, 3.556,
  171, 3.558,
  172, 3.56,
  173, 3.561,
  174, 3.563,
  175, 3.565,
  176, 3.567,
  177, 3.568,
  178, 3.57,
  179, 3.572,
  180, 3.574,
  181, 3.575,
  182, 3.577,
  183, 3.579,
  184, 3.58,
  185, 3.582,
  186, 3.584,
  187, 3.585,
  188, 3.587,
  189, 3.588,
  190, 3.59,
  191, 3.592,
  192, 3.593,
  193, 3.595,
  194, 3.596,
  195, 3.598,
  196, 3.599,
  197, 3.601,
  198, 3.603,
  199, 3.604,
  200, 3.606
)

test_that(
  "Critical MNR values match those published in CMH-17-1G, Table 8.5.7", {
    cmh_17_cv %>%
      rowwise() %>%
      mutate(calc_mnr_crit = maximum_normed_residual_crit(n, 0.05)) %>%
      mutate(check = expect_lte(abs(calc_mnr_crit - c), 0.001))
})

test_that(
  "MNR calculation matches example in CMH-17-1G Section 8.3.11.1.1", {
    # What follows is part of the ETW data from Section 8.3.11.1.1
    df <- tribble(
      ~batch, ~strength,
      2, 99.3207107,
      2, 115.86177,
      2, 82.6133082,
      2, 85.3690411,
      2, 115.801622,
      2, 44.3217741,
      2, 117.328077,
      2, 88.6782903,
      3, 107.676986,
      3, 108.960241,
      3, 116.12264,
      3, 80.2334815,
      3, 106.14557,
      3, 104.667866,
      3, 104.234953
    )

    # first do some checks to ensure that the above data has been typed
    # correctly
    df %>%
      filter(batch == 2) %>%
      summarise(check_n = expect_equal(n(), 8),
                check_mean = expect_lte(abs(mean(strength) - 93.662), 0.001),
                check_sd = expect_lte(abs(sd(strength) - 24.568), 0.001))

    # Check that the MNR test results match
    res <- df %>%
      filter(batch == 2) %>%
      maximum_normed_residual(strength, alpha = 0.05)
    expect_lte(abs(res$mnr - 2.008), 0.001)
    expect_lte(abs(res$crit - 2.127), 0.001)
    expect_equal(nrow(res$outliers), 0)  # no outliers for this batch
    expect_equal(res$n_outliers, 0)

    # check the print function
    expect_output(print(res), "no outliers", ignore.case = TRUE)
    expect_output(print(res), "MNR.*2.008", ignore.case = TRUE)
    expect_output(print(res), ".*crit.*2\\.12", ignore.case = TRUE)
    expect_output(print(res), ".*alpha.*0\\.05", ignore.case = TRUE)

    # check for typographical errors in the data above
    df %>%
      filter(batch == 3) %>%
      summarise(check = expect_equal(n(), 7),
                check_mean = expect_lte(abs(mean(strength) - 104.006), 0.001),
                check_sd = expect_lte(abs(sd(strength) - 11.218), 0.001))

    # Check that the MNR test results match
    res <- df %>%
      filter(batch == 3) %>%
      maximum_normed_residual(strength, alpha = 0.05)
    expect_lte(abs(res$mnr - 2.119), 0.001)
    expect_lte(abs(res$crit - 2.02), 0.001)
    expect_equal(nrow(res$outliers), 1)  # one outlier for this batch
    expect_equal(res$n_outliers, 1)

    # check the print function
    expect_output(print(res), "outliers", ignore.case = TRUE)
    expect_output(print(res), "MNR.*2.119", ignore.case = TRUE)
    expect_output(print(res), ".*crit.*2\\.01", ignore.case = TRUE)
    expect_output(print(res), ".*alpha.*0\\.05", ignore.case = TRUE)
    # check that the outlier was shown in the print statement
    expect_output(print(res), "4.*80\\.23348", ignore.case = TRUE)

})

test_that("Datasets with repeated value non-outliers work", {
  # It was found that for small data sets where there are three
  # outliers and two duplicate values, the MNR calculation
  # would fail. This is a regression test against this.

  # This has always worked
  d1 <- c(673, 621, 690, 689, 689.5)
  res <- maximum_normed_residual(x = d1)
  expect_equal(res$n_outliers, 2)

  # This would previously fail
  d2 <- c(673, 621, 690, 689, 689)
  res <- maximum_normed_residual(x = d2)
  expect_equal(res$n_outliers, 3)
})

test_that("Vectors with no variance produce reasonable results", {
  x <- rep(100, 10)
  res <- maximum_normed_residual(x = x)
  expect_equal(res$n_outliers, 0)
})

test_that("Both vectors and data.frames can be passed to the MNR function", {
  df <- tribble(
    ~batch, ~strength,
    3, 107.676986,
    3, 108.960241,
    3, 116.12264,
    3, 80.2334815,
    3, 106.14557,
    3, 104.667866,
    3, 104.234953
  )

  # check that passing a data.frame works
  res1 <- df %>%
    maximum_normed_residual(strength, alpha = 0.05)
  expect_lte(abs(res1$mnr - 2.119), 0.001)
  expect_lte(abs(res1$crit - 2.02), 0.001)
  expect_equal(nrow(res1$outliers), 1)  # one outlier for this batch
  expect_equal(res1$n_outliers, 1)

  # check that passing a vector works
  res2 <- maximum_normed_residual(x = df$strength, alpha = 0.05)
  expect_lte(abs(res2$mnr - 2.119), 0.001)
  expect_lte(abs(res2$crit - 2.02), 0.001)
  expect_equal(nrow(res2$outliers), 1)  # one outlier for this batch
  expect_equal(res2$n_outliers, 1)
})

test_that("Glance returns correct indicies for multiple outliers", {
  x <- c(129.18348, 131.07326, 122.68332, 123.06133, 126.82286,
         133.25628, 123.55778, 125.46771, 134.88326, 137.98354,
         128.62570, 125.07538, 130.88193, 128.33410, 130.94715,
         135.19539, 136.42983, 135.83982, 130.80670, 126.02690,
         138.88892, 124.68059, 131.11826, 120.19239, 125.54239,
         124.95325, 139.76737, 136.47803, 134.99572, 84.27198,
         86.77162, 86.48636
  )

  res <- maximum_normed_residual(x = x)

  expect_equal(res$n_outliers, 3)
  expect_equal(res$outliers$index, c(30, 32, 31))
})

test_that("Glance works with repeated values", {
  x <- c(129.18348, 131.07326, 122.68332, 123.06133, 126.82286,
         133.25628, 123.55778, 125.46771, 134.88326, 137.98354,
         128.62570, 125.07538, 130.88193, 128.33410, 130.94715,
         135.19539, 136.42983, 135.83982, 130.80670, 126.02690,
         138.88892, 124.68059, 131.11826, 120.19239, 125.54239,
         124.95325, 139.76737, 136.47803, 134.99572, 64.27198,
         64.27198, 64.27198
  )

  res <- maximum_normed_residual(x = x)

  expect_equal(res$n_outliers, 3)
  expect_equal(res$outliers$index, c(30, 31, 32))
})

test_that("glance method returns expected results", {
  df <- tribble(
    ~batch, ~strength,
    3, 107.676986,
    3, 108.960241,
    3, 116.12264,
    3, 80.2334815,
    3, 106.14557,
    3, 104.667866,
    3, 104.234953
  )

  mnr_res <- df %>%
    maximum_normed_residual(strength, alpha = 0.05)
  glance_res <- glance(mnr_res)

  expect_equal(glance_res$mnr, 2.119, tolerance = 0.001)
  expect_equal(glance_res$crit, 2.02, tolerance = 0.001)
  expect_equal(glance_res$alpha, 0.05, tolerance = 0.00001)
  expect_equal(glance_res$n_outliers, 1)
})


test_that("augment method returns expected results", {
  df <- tribble(
    ~batch, ~strength,
    3, 107.676986,
    3, 108.960241,
    3, 116.12264,
    3, 80.2334815,
    3, 106.14557,
    3, 104.667866,
    3, 204.234953
  )

  mnr_res <- df %>%
    maximum_normed_residual(strength, alpha = 0.05)

  # not passing along the original data
  augment_res <- augment(mnr_res)
  expect_equal(df$strength, augment_res$values)
  expect_equal(augment_res$.outlier,
               c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE))

  # passing along the original data.frame
  augment_res <- augment(mnr_res, df)
  expect_equal(df$strength, augment_res$strength)
  expect_equal(df$batch, augment_res$batch)
  expect_equal(augment_res$.outlier,
               c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE))
})

test_that("messages for small samples are sensible", {
  expect_error(
    maximum_normed_residual(x = c()),
    "at least 3"
  )
  expect_error(
    maximum_normed_residual(x = c(1)),
    "at least 3"
  )
  expect_error(
    maximum_normed_residual(x = c(1, 2)),
    "at least 3"
  )
  maximum_normed_residual(x = c(1, 2, 3))
})
ComtekAdvancedStructures/cmstatr documentation built on Nov. 20, 2024, 6:39 p.m.