tests/testthat/test-LinkedSuppression.R

#
# d53.rds is generated by code below with data_1000000 from 
# https://github.com/statisticsnorway/sdc-census-2021-hypercubes/blob/main/data/data_1000000.RData
# and f1, f2, f3, f4 as below
# set.seed(53)
# d53 <- GaussSuppressionFromData(data = data_1000000[sample.int(1e+06, size = 10000), ], 
#             formula = SSBtools::combine_formulas(list(table_1 = f1, 
#                                                       table_2 = f2,
#                                                       table_3 = f3, 
#                                                       table_4 = f4)), 
#             output = "inner")

test_that("SuppressLinkedTables", {
  skip("since more advanced tests below")
  f1 <- ~sex * (age_l + age_m + age_h) * (lms_l + lms_h) 
  f2 <- ~sex * (age_l + age_m + age_h) * (hst_l + hst_m + hst_h) 
  f3 <- ~sex * (age_l + age_m + age_h) * (fst_l + fst_m + fst_h) 
  f4 <- ~sex * (lms_l + lms_h) * (hst_l + hst_m + hst_h)
  d53 <- readRDS(testthat::test_path("testdata", "d53.rds"))
  
  linkedGauss <- "consistent"
  recordAware <- TRUE
  
  #asum = NULL
  
  sum1 <- list(local_FALSE = c(1282693L, 2221804L, 4755062L, 20531L), 
               local_TRUE = c(1282693L, 2221804L, 4755062L, 20531L), 
               consistent_FALSE = c(1282693L, 2219402L, 4755062L, 20531L), 
               consistent_TRUE = c(1353576L, 2305412L, 5071277L, 20531L), 
               `back-tracking_FALSE` = c(1282693L, 2219402L, 4755062L, 20531L), 
               `back-tracking_TRUE` = c(1445094L, 2768849L, 5378354L, 20531L))
  
  
  sum2 <- list(local_FALSE = 20753475L, 
               local_TRUE = 22023863L, 
               consistent_FALSE = 20747225L, 
               consistent_TRUE = 22078558L, 
               `back-tracking_FALSE` = 20747225L, 
               `back-tracking_TRUE` = 24137217L, 
               global_FALSE = 21619689L, 
               global_TRUE = 21619689L)
  
  sum1[["local-bdiag_FALSE"]] <- sum1[["local_FALSE"]]
  sum1[["local-bdiag_TRUE"]] <- sum1[["local_TRUE"]]
  sum2[["local-bdiag_FALSE"]] <- sum2[["local_FALSE"]]
  sum2[["local-bdiag_TRUE"]] <- sum2[["local_TRUE"]]
  
  
  
  for(linkedGauss in c("local", "consistent", "back-tracking", "local-bdiag"))
    for(recordAware in c(FALSE, TRUE)) {
      
      cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n")
      
      
      a <- SuppressLinkedTables(data = d53,
                             freqVar = "freq",
                             fun = SuppressSmallCounts,
                             withinArg = list(list(formula = f1),
                                              list(formula = f2),
                                              list(formula = f3),
                                              list(formula = f4)), 
                             recordAware =  recordAware,
                             preAggregate = TRUE,
                             maxN = 3, 
                             protectZeros = FALSE,   extend0 = FALSE, 
                             printXdim = TRUE,
                             singletonMethod = "none",
                             linkedGauss = linkedGauss)
      
      expect_identical(sapply(a, function(x) sum(seq_len(nrow(x)) * as.integer(x$suppressed))),
                       sum1[[paste(linkedGauss, recordAware, sep = "_")]])
                       
    }
  
  
  
  for(linkedGauss in c("local", "consistent", "back-tracking", "global", "local-bdiag"))
    for(recordAware in c(FALSE, TRUE)) {
      
      cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n")
      
      a <- tables_by_formulas(data = d53,
                              freqVar = "freq",
                              table_fun = SuppressSmallCounts,
                              table_formulas = list(table_1 = f1,
                                                    table_2 = f2,
                                                    table_3 = f3,
                                                    table_4 = f4),
                              recordAware =  recordAware,
                              maxN = 3, 
                              protectZeros = FALSE,   extend0 = FALSE, 
                              printXdim = TRUE,
                              singletonMethod = "none",
                              linkedGauss = linkedGauss)
      
      expect_identical(sum(seq_len(nrow(a)) * as.integer(a$suppressed)),
                       sum2[[paste(linkedGauss, recordAware, sep = "_")]])
    }
})




test_that("SuppressLinkedTables with forced", {
  f1 <- ~sex * (age_l + age_m + age_h) * (lms_l + lms_h) 
  f2 <- ~sex * (age_l + age_m + age_h) * (hst_l + hst_m + hst_h) 
  f3 <- ~sex * (age_l + age_m + age_h) * (fst_l + fst_m + fst_h) 
  f4 <- ~sex * (lms_l + lms_h) * (hst_l + hst_m + hst_h)
  d53 <- readRDS(testthat::test_path("testdata", "d53.rds"))
  
  printXdim <- FALSE
  printInc <- FALSE
  
  # In order for the candidates order to be the same
  # But what should be the same will not be exactly the same anyway. 
  # This is because the common candidates order is not 
  # input to SuppressLinkedTables() 
  set.seed(123)
  d53$w = d53$freq + runif(nrow(d53))/nrow(d53)
  
  
  # forced increases complexity and results in unsafe in output
  # May give GaussSuppression-warning:
  #   "some cell grouping ignored due to forced cells"
  # Thus, it can still be inconsistent suppression and warning: 
  #   "Inconsistent suppression across common cells within the algorithm"
  forced <- function(freq, crossTable, ...) freq>500 & crossTable["sex"] == "Total"
  
  
  sum1 <- list(local_FALSE = c(1268238, 2177150, 4681831, 19221), 
               local_TRUE = c(1268238, 2177150, 4681831, 19221), 
               consistent_FALSE = c(1265297, 2174748, 4681439, 22860), 
               consistent_TRUE = c(1342807, 2222294, 4977701, 22860), 
               `super-consistent_TRUE` = c(1339256, 2161034, 4887808, 17202),
               `back-tracking_FALSE` = c(1273760, 2174748, 4681439, 22477), 
               `back-tracking_TRUE` = c(1427836, 2710850, 5364422, 22477))
                                         
                                         
  sum2 <- list(local_FALSE = c(1269555, 2177278, 4681933, 19665), 
               local_TRUE = c(1365557, 2363893, 4950102, 19677), 
               consistent_FALSE = c(1265297, 2174748, 4681413, 22864), 
               consistent_TRUE = c(1343774, 2222294, 5000442, 22876),
               `super-consistent_TRUE` = c(1355825, 2161034, 4877349, 17202),
               `back-tracking_FALSE` = c(1273760, 2174748, 4681413, 22481), 
               `back-tracking_TRUE` = c(1427836, 2712055, 5364396, 22493))
  
  sum1[["local-bdiag_FALSE"]] <- sum1[["local_FALSE"]]
  sum1[["local-bdiag_TRUE"]] <- sum1[["local_TRUE"]]
  sum2[["local-bdiag_FALSE"]] <- sum2[["local_FALSE"]]
  sum2[["local-bdiag_TRUE"]] <- sum2[["local_TRUE"]]
  
  
  # Copy of PxWebApiData:::WithWarningsAsMessages 
  WithWarningsAsMessages <- function(expr, classes = "warning") {
    withCallingHandlers(
      expr,
      warning = function(w) {
        if (inherits(w, classes)) {
          message("Warning converted to message: ", w$message)
          tryInvokeRestart("muffleWarning")
        }
      }
    )
  }
  
  # Choose not to use this now
  # WithWarningsAsMessages <- suppressWarnings
  
  # Choose not to suppress messages now
  WithWarningsAsMessages_ <- WithWarningsAsMessages
  WithWarningsAsMessages <- function(...) suppressMessages(WithWarningsAsMessages_(...))
  
  As4list <- function(a){
    list(a[a$table_1, ], a[a$table_2, ], a[a$table_3, ], a[a$table_4, ])
  }
  
  
  
  for(linkedGauss in c("local", "consistent", "super-consistent", "back-tracking",  "local-bdiag"))
    for(recordAware in TRUE) { #for(recordAware in c(FALSE, TRUE)) {
      
      if (printInc) cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n")
      
      a <- WithWarningsAsMessages(SuppressLinkedTables(data = d53,
                                                    freqVar = "freq",
                                                    fun = SuppressSmallCounts,
                                                    withinArg = list(list(formula = f1),
                                                                     list(formula = f2),
                                                                     list(formula = f3),
                                                                     list(formula = f4)), 
                                                    recordAware =  recordAware,
                                                    preAggregate = TRUE,
                                                    maxN = 3, 
                                                    protectZeros = FALSE,   extend0 = FALSE, 
                                                    printXdim = printXdim,
                                                    printInc = printInc,
                                                    singletonMethod = "none", 
                                                    forced = forced,
                                                    linkedGauss = linkedGauss,  
                                                    numVar = "w",
                                                    candidates = CandidatesNum))
      
      expect_identical(sapply(a, function(x) sum(seq_len(nrow(x)) * as.integer(x$suppressed) + 2 * as.integer(x$unsafe))),
                       sum1[[paste(linkedGauss, recordAware, sep = "_")]])
      
      
      a <- As4list(WithWarningsAsMessages(tables_by_formulas(data = d53,
                                                    freqVar = "freq",
                                                    table_fun = SuppressSmallCounts,
                                                    table_formulas = list(table_1 = f1,
                                                                          table_2 = f2,
                                                                          table_3 = f3,
                                                                          table_4 = f4),
                                                    recordAware =  recordAware,
                                                    preAggregate = TRUE,
                                                    maxN = 3, 
                                                    protectZeros = FALSE,   extend0 = FALSE, 
                                                    printXdim = printXdim,
                                                    printInc = printInc,
                                                    singletonMethod = "none", 
                                                    forced = forced,
                                                    linkedGauss = linkedGauss,  
                                                    numVar = "w",
                                                    candidates = CandidatesNum)))
    
      expect_identical(sapply(a, function(x) sum(seq_len(nrow(x)) * as.integer(x$suppressed) + 2 * as.integer(x$unsafe))),
                       sum2[[paste(linkedGauss, recordAware, sep = "_")]])
        
    }
})



test_that("SuppressLinkedTables with freq-singleton", {
  f1 <- ~sex * (age_l + age_m + age_h) * (lms_l + lms_h) 
  f2 <- ~sex * (age_l + age_m + age_h) * (hst_l + hst_m + hst_h) 
  f3 <- ~sex * (age_l + age_m + age_h) * (fst_l + fst_m + fst_h) 
  f4 <- ~sex * (lms_l + lms_h) * (hst_l + hst_m + hst_h)
  d53 <- readRDS(testthat::test_path("testdata", "d53.rds"))
  
  printXdim <- FALSE
  printInc <- FALSE
  
  sum1 <- 
    list(local_FALSE = c(1367202L, 2440464L, 5143007L, 20531L), 
         local_TRUE = c(1367202L, 2440464L, 5143007L, 20531L), 
         consistent_FALSE = c(1365362L, 2430673L, 5138722L, 21524L), 
         consistent_TRUE = c(1431584L, 2431583L, 5229043L, 21524L),
         `back-tracking_FALSE` = c(1367329L, 2430673L, 5138722L, 21524L), 
         `back-tracking_TRUE` = c(1525658L, 2962522L, 5706756L, 21524L), 
         `local-bdiag_FALSE` = c(1367202L, 2440464L, 5143007L, 20531L), 
         `local-bdiag_TRUE` = c(1367202L, 2440464L, 5143007L, 20531L))
  
  
  sum2 <- list(local_FALSE = 23753179L, 
               local_TRUE = 24928316L, 
               consistent_FALSE = 23680330L, 
               consistent_TRUE = 24242046L, 
               `back-tracking_FALSE` = 23743652L, 
               `back-tracking_TRUE` = 27124781L, 
               global_FALSE = 26316146L, 
               global_TRUE = 26316146L, 
               `local-bdiag_FALSE` = 23753179L, 
               `local-bdiag_TRUE` = 24928316L)
  
  sum1[["local-bdiag_FALSE"]] <- sum1[["local_FALSE"]]
  sum1[["local-bdiag_TRUE"]] <- sum1[["local_TRUE"]]
  sum2[["local-bdiag_FALSE"]] <- sum2[["local_FALSE"]]
  sum2[["local-bdiag_TRUE"]] <- sum2[["local_TRUE"]]
  
  
  for(linkedGauss in "consistent") #for(linkedGauss in c("local", "consistent", "back-tracking", "local-bdiag"))
    for(recordAware in TRUE) { #for(recordAware in c(FALSE, TRUE)) {
      
      if (printInc) cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n")
      
      a <- SuppressLinkedTables(data = d53,
                             freqVar = "freq",
                             fun = SuppressSmallCounts,
                             withinArg = list(list(formula = f1),
                                              list(formula = f2),
                                              list(formula = f3),
                                              list(formula = f4)), 
                             recordAware =  recordAware,
                             preAggregate = TRUE,
                             maxN = 3, 
                             protectZeros = FALSE,   extend0 = FALSE, 
                             printXdim = printXdim,
                             printInc = printInc,
                             linkedGauss = linkedGauss)
      
      expect_identical(sapply(a, function(x) sum(seq_len(nrow(x)) * as.integer(x$suppressed))),
                       sum1[[paste(linkedGauss, recordAware, sep = "_")]])
      
    }
  
  
  
  for(linkedGauss in "consistent") # for(linkedGauss in c("local", "consistent", "back-tracking", "global", "local-bdiag"))
    for(recordAware in FALSE) { #for(recordAware in c(FALSE, TRUE)) {
      
      if (printInc) cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n")
      
      a <- tables_by_formulas(data = d53,
                              freqVar = "freq",
                              table_fun = SuppressSmallCounts,
                              table_formulas = list(table_1 = f1,
                                                    table_2 = f2,
                                                    table_3 = f3,
                                                    table_4 = f4),
                              recordAware =  recordAware,
                              maxN = 3, 
                              protectZeros = FALSE,   extend0 = FALSE, 
                              printXdim = printXdim,
                              printInc = printInc,
                              linkedGauss = linkedGauss)
      
      expect_identical(sum(seq_len(nrow(a)) * as.integer(a$suppressed)),
                       sum2[[paste(linkedGauss, recordAware, sep = "_")]])
    }
})




test_that("SuppressLinkedTables with num-singleton", {
  f1 <- ~(age_l + age_m) * (lms_l) 
  f2 <- ~(age_l + age_m) * (hst_l + hst_m) 
  f3 <- ~(age_l + age_m) * (fst_l + fst_m) 
  f4 <- ~(lms_l + lms_h) * (hst_l + hst_m)
  d53 <- readRDS(testthat::test_path("testdata", "d53.rds"))
  z <- SSBtools::MakeMicro(d53, "freq")
  z <- z[z$sex == 2, ]
  set.seed(123)
  z$char <- sample(paste0("char", seq_len(nrow(z)/2)), nrow(z), replace = TRUE)
  z$value <- rnorm(nrow(z))^2
  
  printXdim <- FALSE
  printInc <- FALSE
  
  sum1 <- list(consistent_FALSE = c(1362L, 6656L, 11724L, 993L))
  
  
  sum2 <- c(consistent_FALSE = 64962L, 
            consistent_TRUE = 71056L, 
            `super-consistent_FALSE` = 64962L, 
            `super-consistent_TRUE` = 71157L)
  
  
  
  for(linkedGauss in "consistent") # for(linkedGauss in c("local", "consistent", "back-tracking", "local-bdiag"))
    for(recordAware in FALSE) { #for(recordAware in c(FALSE, TRUE)) {
      
      if (printInc) cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n")
      
      a <- SuppressLinkedTables(data = z,
                                fun = SuppressDominantCells,
                                dominanceVar = "value",
                                contributorVar = "char",
                                withinArg = list(list(formula = f1),
                                                 list(formula = f2),
                                                 list(formula = f3),
                                                 list(formula = f4)), 
                                recordAware =  recordAware,
                                pPercent = 10,
                                printXdim = printXdim,
                                printInc = printInc,
                                linkedGauss = linkedGauss)
      
      expect_identical(sapply(a, function(x) sum(seq_len(nrow(x)) * as.integer(x$suppressed))),
                       sum1[[paste(linkedGauss, recordAware, sep = "_")]])
      
    }
  
  
  for(linkedGauss in c("consistent", "super-consistent")) 
    for(collapseAware in c(FALSE, TRUE)) {
      
      if (printInc) cat("\n------------", paste(linkedGauss, recordAware, sep = "_"), "--------------\n")
      
      a <- tables_by_formulas(data = z,
                              table_fun = SuppressDominantCells,
                              dominanceVar = "value",
                              contributorVar = "char",
                              table_formulas = list(table_1 = f1,
                                                    table_2 = f2,
                                                    table_3 = f3,
                                                    table_4 = f4),
                              collapseAware =  collapseAware,
                              recordAware = FALSE, 
                              pPercent = 50,
                              printXdim = printXdim,
                              printInc = printInc,
                              linkedGauss = linkedGauss)
      
      expect_identical(sum(seq_len(nrow(a)) * as.integer(a$suppressed)),
                       sum2[[paste(linkedGauss, collapseAware, sep = "_")]])
    }
  
  
  lpPackage <- "highs"
  
  if (requireNamespace(lpPackage, quietly = TRUE)) {
    capture.output({
      a <- SuppressLinkedTables(data = z,
                              fun = SuppressDominantCells,
                              dominanceVar = "value",
                              contributorVar = "char",
                              withinArg = list(list(formula = f1),
                                               list(formula = f2),
                                               list(formula = f3),
                                               list(formula = f4)), 
                              recordAware =  TRUE,
                              pPercent = 50,
                              printXdim = printXdim,
                              printInc = printInc,
                              linkedGauss = "super-consistent", 
                              linkedIntervals = c("super-consistent", "local-bdiag"),
                              lpPackage = lpPackage, rangePercent = 100)
      })
    
    
    expect_equal(c(sapply(a, function(x) sum(x$up_1 - x$lo_1, na.rm = TRUE)), 
                   sapply(a, function(x) sum(x$up - x$lo, na.rm = TRUE)),
                   sapply(a, function(x) sum(x$up_lb - x$lo_lb, na.rm = TRUE))),
                 c(53.6501570133984, 215.174034096945, 438.489020508674, 4951.02367010228, 
                   63.0146389958929, 220.445112972418, 454.045573572603, 4951.02367010228, 
                   99.8724522094226, 293.380040891785, 804.740113843912, 4951.02367010228))
    
    capture.output({
      b <- tables_by_formulas(data = z,
                            table_fun = SuppressDominantCells,
                            dominanceVar = "value",
                            contributorVar = "char",
                            table_formulas = list(table_1 = f1,
                                                  table_2 = f2,
                                                  table_3 = f3,
                                                  table_4 = f4),
                            collapseAware =  TRUE,
                            pPercent = 30,
                            printXdim = printXdim,
                            printInc = printInc,
                            linkedGauss = "super-consistent", linkedIntervals = c("super-consistent", "local-bdiag", "global"),
                            lpPackage = lpPackage, rangePercent = 100)
      })
    
    expect_equal(c(sum(b$up_1 - b$lo_1, na.rm = TRUE), 
                   sum(b$up - b$lo, na.rm = TRUE), 
                   sum(b$up_lb - b$lo_lb, na.rm = TRUE), 
                   sum(b$up_global - b$lo_global, na.rm = TRUE)),
                 c(1366.98220095118, 
                   1438.33109987627, 
                   1597.84174633637, 
                   1016.64535487785))

  }
  
  
  
})

Try the GaussSuppression package in your browser

Any scripts or data that you put into this service are public.

GaussSuppression documentation built on Aug. 25, 2025, 5:12 p.m.