tests/testthat/testA_Counts.R

context("Count Output")
#For testing if the functions that calculate the count and other reporting
#measures work properly

data.table::setDTthreads(2)  #only needed for CRAN checks

#Process some raw data
dat <- data.frame(
  var1 = c("product_A", "product_B", "product_B", "product_B", "product_A",
           "product_A", "product_A", "product_A", "product_A", "product_B",
           "product_B", "product_A", "product_A", "product_B", "product_A",
           "product_A", "product_C", "product_C"),
  var2 = c("event_2", "event_1", "event_1", "event_2", "event_1", "event_1",
           "event_1", "event_1", "event_1", "event_1", "event_1", "event_1",
           "event_2", "event_2", "event_2", "event_1", "event_1", "event_3"),
  strat1 = c("M", "M", "F", "F", "F", "F", "M", "F", "F", "M", "M", "M", "M",
             "M", "F", "M", "M", "M"),
  strat2 = c("age_cat1", "age_cat2", "age_cat2", "age_cat2", "age_cat1",
             "age_cat1", "age_cat1", "age_cat2", "age_cat2", "age_cat2",
             "age_cat1", "age_cat2", "age_cat1", "age_cat1", "age_cat1",
             "age_cat2", "age_cat2", "age_cat1"),
  stringsAsFactors = FALSE
)
dat$id <- 1:nrow(dat)

dat_no_strat <- dat[, !grepl("strat", names(dat))]
dat_no_id    <- dat[, !grepl("id", names(dat))]
dat_missing1 <- dat
dat_missing1[3, "var1"] <- ""
dat_missing2 <- dat
dat_missing2[1, "var2"] <- NA
dat_missing3 <- dat
dat_missing3[2, "id"] <- NA
dat_missing4 <- dat
dat_missing4[2, "strat2"] <- ""

testthat::test_that("correct number of rows and columns from processRaw", {
  expect_equal(nrow(processRaw(dat)), 6)
  expect_equal(nrow(suppressWarnings(processRaw(dat, stratify = TRUE))), 6)
  expect_equal(nrow(processRaw(dat, zeroes = TRUE)), 9)
  expect_equal(nrow(suppressWarnings(processRaw(dat, zeroes = TRUE, stratify = TRUE))), 9)

  expect_equal(ncol(processRaw(dat)), 6)
  expect_equal(ncol(suppressWarnings(processRaw(dat, stratify = TRUE))), 6)
  expect_equal(ncol(processRaw(dat, zeroes = TRUE)), 6)
  expect_equal(ncol(suppressWarnings(processRaw(dat, zeroes = TRUE, stratify = TRUE))), 6)
})

testthat::test_that("correct column names from processRaw", {
  expect_match(paste0(names(processRaw(dat)), collapse = ""),
               "var1var2NERRPRR")
  expect_match(
    paste0(names(suppressWarnings(processRaw(dat, stratify = TRUE))), collapse = ""),
               "var1var2NERRPRR"
  )
  expect_match(
    paste0(names(processRaw(dat, zeroes = TRUE)), collapse = ""),
               "var1var2NERRPRR"
  )
  expect_match(
    paste0(names(suppressWarnings(processRaw(dat, zeroes = TRUE, stratify = TRUE))), collapse = ""),
               "var1var2NERRPRR"
  )
})

testthat::test_that("values returned from processRaw make sense", {
  expect_equal(max(processRaw(dat)$N), 7)
  expect_equal(max(suppressWarnings(processRaw(dat, stratify = TRUE))$N), 7)
  expect_equal(max(processRaw(dat, zeroes = TRUE)$N), 7)
  expect_equal(max(suppressWarnings(processRaw(dat, zeroes = TRUE, stratify = TRUE))$N), 7)
})

strat_error <- paste("at least one stratification variable contains more than 1 categories --",
                     "\n  did you remember to categorize stratification variables?",
                     "\n  if you really need more categories, increase 'max_cats'",
                     sep = "")

testthat::test_that("do warnings/errors get correctly printed?", {
  expect_error(processRaw(as.matrix(dat)),
               "'data' must be a data frame",
               fixed = TRUE)
  expect_error(processRaw(dat, stratify = 4),
               "'stratify', 'zeroes', & 'list_ids' must be logical values",
               fixed = TRUE)
  expect_error(processRaw(dat, zeroes = 4),
               "'stratify', 'zeroes', & 'list_ids' must be logical values",
               fixed = TRUE)
  expect_error(processRaw(dat, list_ids = 4),
               "'stratify', 'zeroes', & 'list_ids' must be logical values",
               fixed = TRUE)
  expect_error(processRaw(dat_no_id),
               "missing the appropriate column names (need 'id', 'var1', & 'var2')",
               fixed = TRUE)
  expect_error(processRaw(dat_missing1),
               "missing values are not allowed for 'id', 'var1', or 'var2'",
               fixed = TRUE)
  expect_error(processRaw(dat_missing2),
               "missing values are not allowed for 'id', 'var1', or 'var2'",
               fixed = TRUE)
  expect_error(processRaw(dat_missing3),
               "missing values are not allowed for 'id', 'var1', or 'var2'",
               fixed = TRUE)
  expect_error(processRaw(dat_no_strat, stratify = TRUE),
               "no stratification variables found",
               fixed = TRUE)
  expect_error(processRaw(dat_missing4, stratify = TRUE),
               "missing values are not allowed for stratification variables",
               fixed = TRUE)
  expect_message(suppressWarnings(processRaw(dat, stratify = TRUE)),
                 "stratification variables used: strat1, strat2",
                 fixed = TRUE)
  expect_error(processRaw(dat, stratify = TRUE, max_cats = 1),
               strat_error,
               fixed = TRUE)
  expect_message(suppressWarnings(processRaw(dat, stratify = TRUE)),
                 "there were 4 strata",
                 fixed = TRUE)
  expect_warning(processRaw(dat, stratify = TRUE),
                 "at least one stratum contains less than 50 unique IDs",
                 fixed = TRUE)
})

testthat::test_that("are PRR results the same regardless of stratification?", {
  expect_equal(processRaw(dat, digits = 3)$PRR,
               suppressWarnings(
                 processRaw(dat, stratify = TRUE, digits = 3))$PRR
               )
  expect_equal(processRaw(dat, zeroes = TRUE, digits = 3)$PRR,
               suppressWarnings(
                 processRaw(dat, stratify = TRUE, zeroes = TRUE, digits = 3))$PRR
               )
})

testthat::test_that("works with a single stratification variable?", {
  expect_error(suppressWarnings(processRaw(dat[, -4], stratify = TRUE)), NA)
})

#Examine if results are correct
results_unstr <- processRaw(dat, zeroes = TRUE, digits = 3)
results_str   <- suppressWarnings(
  suppressMessages(processRaw(dat, stratify = TRUE, zeroes = TRUE, digits = 3))
)

#Manually build unstratified results
var1s    <- sort(unique(dat$var1))
var2s    <- sort(unique(dat$var2))
num_var1 <- length(var1s)
num_var2 <- length(var2s)
dat_man <- data.frame(
  matrix(data = NA, nrow = num_var1 * num_var2, ncol = ncol(results_unstr)),
  stringsAsFactors = FALSE
)
colnames(dat_man) <- colnames(results_unstr)
dat_man$var1 <- sort(rep(var1s, num_var2))
dat_man$var2 <- rep(sort(var2s), num_var1)
tot_sum <- length(unique(dat$id))
for (i in var1s) {
  row_sum <- nrow(dat[dat$var1 == i, ])
  for (j in var2s) {
    col_sum <- nrow(dat[dat$var2 == j, ])
    N <- nrow(dat[dat$var1 == i & dat$var2 == j, ])
    dat_man[dat_man$var1 == i & dat_man$var2 == j, "N"] <- N
    dat_man[dat_man$var1 == i & dat_man$var2 == j, "E"] <- (row_sum * col_sum) / tot_sum
    PRR_num <- N / row_sum
    PRR_den <- (col_sum - N) / (tot_sum - row_sum)
    dat_man[dat_man$var1 == i & dat_man$var2 == j, "PRR"] <- round(PRR_num / PRR_den, 3)
  }
}
dat_man$RR   <- round(dat_man$N / dat_man$E, 3)
dat_man$RR   <- ifelse(is.nan(dat_man$RR), 0, dat_man$RR)
dat_man$var1 <- factor(dat_man$var1, levels = sort(unique(dat_man$var1)))
dat_man$var2 <- factor(dat_man$var2, levels = sort(unique(dat_man$var2)))

dat_man_no_zero <- dat_man[dat_man$N !=0, ]
rownames(dat_man_no_zero) <- NULL

#Manually build stratified results
strat1s    <- sort(unique(dat$strat1))
strat2s    <- sort(unique(dat$strat2))
num_strat1 <- length(strat1s)
num_strat2 <- length(strat2s)
num_strats <- num_strat1 * num_strat2
num_rows   <- num_var1 * num_var2 * num_strat1 * num_strat2
cols <- c("strat1", "strat2", "var1", "var2", "N", "E")
dat_man2 <- data.frame(matrix(data = NA, nrow = num_rows, ncol = length(cols)),
                       stringsAsFactors = FALSE)
colnames(dat_man2) <- cols
dat_man2$strat1 <- sort(rep(strat1s, num_strat2 * num_var1 * num_var2))
dat_man2$strat2 <- rep(sort(rep(strat2s, num_var1 * num_var2)), num_strat1)
dat_man2$var1 <- rep(sort(rep(var1s, num_var2)), num_strats)
dat_man2$var2 <- rep(var2s, num_var1 * num_strats)

for (si in strat1s) {
  for (sj in strat2s) {
    dat_strat <- dat[dat$strat1 == si & dat$strat2 == sj, ]
    strat_sum <- nrow(dat_strat)
    for (i in var1s) {
      row_sum <- nrow(dat_strat[dat_strat$var1 == i, ])
      for (j in var2s) {
        col_sum <- nrow(dat_strat[dat_strat$var2 == j, ])
        N <- nrow(dat_strat[dat_strat$var1 == i & dat_strat$var2 == j, ])
        dat_man2[dat_man2$strat1 == si & dat_man2$strat2 == sj &
                 dat_man2$var1 == i & dat_man2$var2 == j, "N"] <- N
        E <- (row_sum * col_sum) / strat_sum
        dat_man2[dat_man2$strat1 == si & dat_man2$strat2 == sj &
                 dat_man2$var1 == i & dat_man2$var2 == j, "E"] <- E
      }
    }
  }
}

dat_man2_N   <- aggregate(N ~ var1 + var2, data = dat_man2, FUN = sum)
dat_man2_E   <- aggregate(E ~ var1 + var2, data = dat_man2, FUN = sum)
dat_man2_all <- merge(dat_man2_N, dat_man2_E, by = c("var1", "var2"))
dat_man2_all$RR   <- round(dat_man2_all$N / dat_man2_all$E, 3)
dat_man2_all$RR   <- ifelse(is.nan(dat_man2_all$RR), 0, dat_man2_all$RR)
dat_man2_all$var1 <- factor(dat_man2_all$var1, levels = sort(unique(dat_man2_all$var1)))
dat_man2_all$var2 <- factor(dat_man2_all$var2, levels = sort(unique(dat_man2_all$var2)))

dat_man2_all_no_zero <- dat_man2_all[dat_man2_all$N !=0, ]
rownames(dat_man2_all_no_zero) <- NULL

testthat::test_that("does processRaw() give the expected results?", {
  expect_equal(dat_man,
               processRaw(dat, stratify = FALSE, zeroes = TRUE, digits = 3))
  expect_equal(dat_man_no_zero,
               processRaw(dat, stratify = FALSE, zeroes = FALSE, digits = 3))
  expect_equal(dat_man2_all,
               suppressWarnings(
                 processRaw(dat, stratify = TRUE, zeroes = TRUE, digits = 3))[, 1:5]
               )
  expect_equal(dat_man2_all_no_zero,
               suppressWarnings(
                 processRaw(dat, stratify = TRUE, zeroes = FALSE, digits = 3))[, 1:5]
               )
})

dat_table <- data.table::as.data.table(dat)
dat_tib <- dplyr::as_tibble(dat)

testthat::test_that("does processRaw() work with data.tables and tibble_dfs?", {
    expect_equal(processRaw(dat), processRaw(dat_table))
    expect_equal(suppressWarnings(processRaw(dat, stratify = TRUE)),
                 suppressWarnings(processRaw(dat_table, stratify = TRUE)))
    expect_equal(suppressWarnings(processRaw(dat, stratify = TRUE, zeroes = TRUE)),
                 suppressWarnings(processRaw(dat_table, stratify = TRUE, zeroes = TRUE)))

    expect_equal(processRaw(dat), processRaw(dat_tib))
    expect_equal(suppressWarnings(processRaw(dat, stratify = TRUE)),
                 suppressWarnings(processRaw(dat_tib, stratify = TRUE)))
    expect_equal(suppressWarnings(processRaw(dat, stratify = TRUE, zeroes = TRUE)),
                 suppressWarnings(processRaw(dat_tib, stratify = TRUE, zeroes = TRUE)))
})

#Test to see if simple example gives expected results from hand calculation
dat2 <- data.frame(var1 = c("product_A", rep("product_B", 3), "product_C",
                            rep("product_A", 2), rep("product_B", 2), "product_C"),
                   var2 = c("event_1", rep("event_2", 2), rep("event_3", 2),
                            "event_2", rep("event_3", 3), "event_1"),
                   strat1 = c(rep("Male", 5), rep("Female", 3), rep("Male", 2)),
                   strat2 = c(rep("age_cat1", 5), rep("age_cat1", 3),
                              rep("age_cat2", 2)),
                   stringsAsFactors = FALSE
)
dat2$id <- 1:nrow(dat2)

var1_hand <- c(rep("product_A", 3), rep("product_B", 3), rep("product_C", 3))
var2_hand <- c(rep(c("event_1", "event_2", "event_3"), 3))
hand <- data.frame(var1 = factor(var1_hand, levels = sort(unique(var1_hand))),
                   var2 = factor(var2_hand, levels = sort(unique(var2_hand))),
                   N    = c(1, 1, 1, 0, 2, 3, 1, 0, 1),
                   E    = c(0.6, 0.9, 1.5, 1, 1.5, 2.5, 0.4, 0.6, 1),
                   RR   = c(1.6667, 1.1111, 0.6667, 0, 1.3333, 1.2, 2.5, 0, 1),
                   PRR  = c(2.3333, 1.1667, 0.5833, 0, 2, 1.5, 4, 0, 1),
                   stringsAsFactors = FALSE
)

testthat::test_that("results match hand calculations, unstratified", {
  expect_equal(processRaw(dat2, stratify = FALSE, zeroes = TRUE, digits = 4),
               hand)
  expect_equivalent(processRaw(dat2, stratify = FALSE, zeroes = FALSE, digits = 4),
               hand[hand$N != 0, ])
})

var1_hand_s <- c(rep("product_A", 3), rep("product_B", 3), rep("product_C", 3))
var2_hand_s <- c(rep(c("event_1", "event_2", "event_3"), 3))
hand_s <- data.frame(
  var1 = factor(var1_hand_s, levels = sort(unique(var1_hand_s))),
  var2 = factor(var2_hand_s, levels = sort(unique(var2_hand_s))),
  N    = c(1, 1, 1, 0, 2, 3, 1, 0, 1),
  E    = c(0.2, 0.4+2/3, 0.4+4/3, 1.1, 1.2+1/3, 1.7+2/3, 0.7, 0.4, 0.9),
  RR   = c(5, 0.9375, 0.5769, 0, 1.3043, 1.2676, 1.4286, 0, 1.1111),
  PRR  = c(2.3333, 1.1667, 0.5833, 0, 2, 1.5, 4, 0, 1),
  stringsAsFactors = FALSE
)

testthat::test_that("results match hand calculations, stratified", {
  suppressWarnings(suppressMessages(
    expect_equal(processRaw(dat2, stratify = TRUE, zeroes = TRUE, digits = 4),
                 hand_s)
  ))
  suppressWarnings(suppressMessages(
    expect_equivalent(processRaw(dat2, stratify = TRUE, zeroes = FALSE, digits = 4),
                      hand_s[hand_s$N != 0, ])
  ))
})

Try the openEBGM package in your browser

Any scripts or data that you put into this service are public.

openEBGM documentation built on Sept. 15, 2023, 1:08 a.m.