R/SuppressionFromDecimals.R

Defines functions PrimaryDecimals SuppressionFromDecimals

Documented in SuppressionFromDecimals

#' Cell suppression from synthetic decimal numbers
#' 
#' Decimal numbers, as calculated by \code{\link{GaussSuppressDec}}, are used to decide suppression (whole numbers or not). 
#' Technically, the calculations are done via \code{\link{GaussSuppressionFromData}}, 
#' but without running \code{\link[SSBtools]{GaussSuppression}}. 
#' All suppressed cells are primary suppressed.  
#' 
#' Several decimal number variables reduce the probability of obtaining whole numbers by chance. 
#'
#' @param data    Input data as a data frame 
#' @param decVar One ore several (`nRep>1`) decimal number variables. 
#' @param freqVar A single variable holding counts (not needed)
#' @param numVar Other numerical variables to be aggregated
#' @param preAggregate  Parameter to \code{\link{GaussSuppressionFromData}}
#' @param digits Parameter to \code{\link[SSBtools]{RoundWhole}}. Values close to whole numbers will be rounded.
#' @param ...   Other parameters to \code{\link{GaussSuppressionFromData}}
#'
#' @return Aggregated data with suppression information
#' @export
#' @importFrom stats t.test
#' 
#' @author Øyvind Langsrud 
#'
#' @examples
#' z2 <- SSBtoolsData("z2")
#' 
#' # Find suppression and decimal numbers with "fylke" in model 
#' a1 <- GaussSuppressDec(z2,
#'                        fun = SuppressSmallCounts,   
#'                        dimVar = c("region", "fylke", "hovedint"), 
#'                        freqVar = "ant", protectZeros = FALSE, maxN = 2, 
#'                        output = "inner")
#' 
#' # Add decimal numbers to data 
#' z2$freqDec <- a1$freqDec
#' 
#' # Find suppression with "kostragr" in model 
#' a2 <- SuppressionFromDecimals(z2, dimVar = c("region", "kostragr", "hovedint"), 
#'                               freqVar = "ant", decVar = "freqDec")
#' tail(a2)
#' 
#' b1 <- GaussSuppressDec(data = SSBtoolsData("magnitude1"), 
#'                        fun = SuppressDominantCells, 
#'                        numVar = "value", 
#'                        formula = ~sector2 * geo + sector4 * eu,
#'                        contributorVar = "company", k = c(80, 99))
#'  
#' b2 <- SuppressionFromDecimals(b1[b1$isInner, ], 
#'                               formula = ~(sector2 + sector4) * eu, 
#'                               numVar = "value", 
#'                               decVar = "freqDec")
#' FormulaSelection(b2, ~sector2 * eu)                                 
#'                               
SuppressionFromDecimals = function(data, decVar, freqVar = NULL, numVar = NULL,
                                   preAggregate = FALSE, digits = 9,
                                   ...){

   GaussSuppressionFromData(data, freqVar = freqVar, 
                            numVar = c(decVar, numVar), nDec = length(decVar),
                            preAggregate = preAggregate, 
                            candidates = integer(0),
                            primary = PrimaryDecimals,
                            singleton = NULL,
                            singletonMethod = "none",
                            digitsPrimary = digits,
                            ...) 
}
                                        
                   
PrimaryDecimals <- function(freq, num, nDec, digitsPrimary, onlyZerosRoundWhole = NA, ...) {
  
  if(nDec<ncol(num)){
    num = num[,seq_len(nDec), drop=FALSE]
  }
  
  if (is.na(onlyZerosRoundWhole)) {   # See comments below 
    t_test <- t.test(as.vector(as.matrix(num)))
    conf_int <- t_test$conf.int
    if (anyNA(conf_int)) {     # NA occurs when 0 variance 
      conf_int <- range(num)
      p_value <- as.numeric(conf_int[2] == 0)
    } else {
      p_value <- t_test$p.value
    }
    onlyZerosRoundWhole <- (max(abs(conf_int)) < 0.1) & (length(unique(sign(conf_int))) == 2)
    sureNotOnlyZeros <- p_value < 1e-12
  } else {
    sureNotOnlyZeros <- !onlyZerosRoundWhole
  }
  
  numRW = RoundWhole(num, digits = digitsPrimary, onlyZeros = onlyZerosRoundWhole)
  
  primary = !(rowSums(numRW == round(num)) == nDec)
  
  
  if (!is.null(freq) & sureNotOnlyZeros) {
    if(any(freq[!primary] != numRW[!primary,1])){
      warning("Mismatch between aggregated frequencies and decimals aggregated to whole numbers") 
    }
  }
  
  primary
}


#
# Automatic onlyZeros to minimize the probability of error
# 
# - When decimal numbers are generated with only 0s, 
#   the check can focus on zeros instead of all possible whole numbers.
# - The automation verifies whether a small confidence interval exists around 0.
# - Failing to detect such a confidence interval is not problematic, 
#   as it confirms the dataset is small.
# - Special cases may occur where non-zero frequencies exist, 
#   but the majority of frequencies are 0. Thus, the check is deliberately strict.
#

Try the GaussSuppression package in your browser

Any scripts or data that you put into this service are public.

GaussSuppression documentation built on June 8, 2025, 10:43 a.m.