R/NContributorsRule.R

Defines functions NContributorsRule2 NContributorsRule

Documented in NContributorsRule

#' Number of contributors suppression rule
#' 
#' The number of contributors is the number unique contributing 'charVar' codes. 
#' 
#' When several `charVar` variables, the rule is applied independently to each variable.
#' Primary suppression in at least one case results in  primary suppression in the output.
#' As described in the documentation for the `maxN` and `removeCodes` parameters, 
#' it is possible to specify them independently for each `charVar`.
#' 
#'
#' @param data  Input data as a data frame
#' @param freq  Vector of aggregate frequencies 
#' @param numVar Numerical variables. When several variables, only first is used. 
#' @param x Model matrix generated by parent function
#' @param maxN Suppression threshold. Cells where the number of unique contributors 
#'             (based on `charVar`) is less than or equal to `maxN` are marked as primary suppressed.  
#'             Can be specified as a single numeric value, or as a named list or named vector.  
#'             When named, the value matching the `charVar` name will be used.  
#'             If `charVar` contains multiple variables and you want different thresholds for each, 
#'             `maxN` must be a named list or vector with one value per variable.  
#'             For example: `maxN = list(char1 = 3, char2 = 2)`.
#' @param protectZeros Suppression parameter. Only TRUE (default) is used implemented. 
#' @param charVar Variable(s) with contributor codes. 
#'                When empty, unique contributor in each row is assumed.
#'                When several variables, see details.  
#' @param removeCodes Codes to exclude when counting contributors.  
#'                    Can be specified as a character vector (applied to all `charVar` variables),  
#'                    or as a named list of vectors to use different codes per variable.  
#'                    When using a list, its names must match the variables in `charVar`.  
#'                    If `charVar` is empty, codes are interpreted as row indices and converted to integers.
#' @param remove0 When set to `TRUE` (default), data rows in which the first `numVar` (if any) is zero 
#'               are excluded from the count of contributors. 
#'               Alternatively, `remove0` can be specified as one or more variable names. 
#'               In this case, all data rows with a zero in any of the specified variables 
#'               are omitted from the contributor count. 
#'               Specifying `remove0` as variable name(s) is useful for avoiding warning when there 
#'               are multiple `numVar` variables.
#' @param ... unused parameters
#'
#' @return List where first element is logical vector defining primary suppressions.
#'         The second element is data frame where `nRule` is number contributors used 
#'         in rule and where `nAll` is similar, but without omitting codes in `removeCodes`. 
#' @export
#'
NContributorsRule <- function(data, freq, numVar, x, 
                              maxN = 3, 
                              protectZeros = FALSE, 
                              charVar = NULL, 
                              removeCodes = character(0), 
                              remove0 = TRUE, 
                              ...) {
  maxN <- get_numeric_item(maxN, charVar)
  
  if (length(charVar)>1) {
    return(NContributorsRule2(data = data, freq = freq, numVar = numVar, x = x, 
                      maxN = maxN, 
                      protectZeros = protectZeros, 
                      charVar = charVar, 
                      removeCodes = removeCodes, 
                      remove0 = remove0, 
                      ...))
    #stop("Only single charVar implemented in suppression rule")
  }
  
  if (is.character(remove0)) {
    ma <- match(remove0, names(data))
    if (anyNA(ma)) {
      stop("remove0 as character must be variable name(s) in data")
    }
  } else {
    if (remove0) {
      if (length(numVar) > 1) {
        warning("Multiple numVar were supplied, only the first is used. Specify remove0 as variable name(s)?")
      }
      if (!length(numVar)) {
        remove0 <- NULL
      } else {
        remove0 <- numVar[1]
      }
    } else {
      remove0 <- NULL
    }
  }

  if (protectZeros) {
    stop("TRUE protectZeros not implemented")
  }
  if (length(charVar)) {
    y <- data[[charVar]]
  } else {
    y <- seq_len(nrow(data))
    removeCodes <- as.integer(removeCodes)
  }
  if (length(remove0)) {
    for (i in seq_along(remove0)) {
      y[data[[remove0[i]]] == 0] <- NA
    }
  }
  
  nAll <- Ncontributors(x, y)
  y[y %in% removeCodes] <- NA
  nRule <- Ncontributors(x, y)
  primary <- (nRule <= maxN) & (nRule > 0)
  list(primary = primary, numExtra = data.frame(nRule = nRule, nAll = nAll))
}



NContributorsRule2 <- function(data, freq, numVar, x, 
                               maxN, 
                               protectZeros, 
                               charVar, 
                               removeCodes, 
                               remove0, 
                               ...) {
  for (i in seq_along(charVar)) {
    if (length(maxN) > 1) {
      maxN_ <- maxN[charVar[i]]
    } else {
      maxN_ <- maxN
    }
    if (is.list(removeCodes)) {
      removeCodes_ <- removeCodes[[charVar[i]]]
    } else {
      removeCodes_ <- removeCodes
    }
    rulei <- NContributorsRule(data = data, freq = freq, numVar = numVar, x = x, 
                               maxN = maxN_, 
                               protectZeros = protectZeros, 
                               charVar = charVar[i], 
                               removeCodes = removeCodes_, 
                               remove0 = remove0, 
                               ...)
    names(rulei$numExtra) <- paste(names(rulei$numExtra), charVar[i], sep = "_")
    if (i == 1) {
      primary <- rulei$primary
      numExtra <- rulei$numExtra
    } else {
      primary <- primary | rulei$primary
      numExtra <- cbind(numExtra, rulei$numExtra)
    }
  }
  list(primary = primary, numExtra = numExtra)
}


NContributorsRule_identical <- NContributorsRule


#' Identical to ´NContributorsRule´
#' 
#' The function is included for compatibility after changing the name to 
#' \code{\link{NContributorsRule}}
#' 
#' @rdname NcontributorsRule_identical
#' 
#' @inheritParams NContributorsRule
#' 
#' @export
#' @keywords internal
#'
NcontributorsRule <- NContributorsRule_identical


# Without @rdname NcontributorsRule_identical:
# Default NcontributorsRule overwrites NContributorsRule.Rd  

# With 
# NcontributorsRule <- NContributorsRule 
# it is impossible to hide  function in separate rd-file with keywords internal

# With 
# NcontributorsRule <- function(...) NContributorsRule(...)   
# test fails since default values cannot be found in function

Try the GaussSuppression package in your browser

Any scripts or data that you put into this service are public.

GaussSuppression documentation built on Aug. 25, 2025, 5:12 p.m.