R/IntervalLimits.R
In GaussSuppression: Tabular Data Suppression using Gaussian Elimination

Documented in IntervalLimits

#' Construct feasibility interval requirements
#'
#' Creates one or more sets of requirements for feasibility intervals of cell
#' values in protected tables. These can include lower bound requirements
#' ("lomax_*"), upper bound requirements ("upmin_*"), and/or minimum width
#' requirements ("rlim_*"). The requirements may be specified as relative
#' percentages or as absolute distances, and can be set symmetrically around
#' the true cell value or separately for lower and upper bounds.
#' 
#' @note
#' Interval requirements can also be generated by the primary suppression
#' functions when `protectionIntervals = TRUE` is specified (see examples
#' below). More details are given in the description of `protectionIntervals`
#' in the [MagnitudeRule()] and [PrimaryDefault()] functions.
#' 
#' 
#' @param ... Unused parameters 
#' @param protectionPercent
#'   Required distance in percent between the true cell value and the upper
#'   bound of the feasibility interval. If `loProtectionPercent` is not set,
#'   the same percentage distance is also required to the lower bound.
#'
#' @param protectionLimit
#'   Required absolute distance between the true cell value and the upper
#'   bound of the feasibility interval. If `loProtectionLimit` is not set,
#'   the same absolute distance is also required to the lower bound.
#'
#' @param loProtectionPercent
#'   Required distance in percent between the true cell value and the lower
#'   bound of the feasibility interval. Is by default set to `protectionPercent`,
#'   i.e. symmetrical requirements.
#'
#' @param loProtectionLimit
#'   Required absolute distance between the true cell value and the lower
#'   bound of the feasibility interval. Is by default set to `protectionLimit`,
#'   i.e. symmetrical requirements.
#'   
#' @param rangePercent
#'   Minimum required width of the feasibility interval expressed as a
#'   percentage of the true cell value.
#'
#' @param rangeMin
#'   Minimum required width of the feasibility interval expressed as an
#'   absolute value.
#'   
#' @param intervalVar
#'   Numerical variable(s) used for interval calculations.
#'   If `NULL`, the variable names may be inferred from the `num` data frame
#'   as a consequence of interval requirements created by the primary
#'   function(s). If no variables can be inferred this way, 
#'  `dominanceVar`, first `numVar` or `freqVar` will be used.
#'   
#' @param primary The `primary` vector generated by parent function
#' @param num The `num` data frame generated by parent function
#' @param freq The `freq` vector generated by parent function
#' @param freqVar As input to e.g. [SuppressSmallCounts()]
#' @param dominanceVar As input to e.g. [SuppressDominantCells()]
#'
#' @return A matrix with column names constructed from the type of 
#'         requirement and the associated variable name.
#' 
#' @export
#' 
#' @examples
#' dat <- SSBtoolsData("magnitude1")
#' dat["num2"] <- 1:nrow(dat)
#' 
#' SuppressDominantCells(data = dat, 
#'     numVar = "value", 
#'     formula = ~sector2 * geo + sector4 * eu, 
#'     contributorVar = "company", 
#'     k = c(80, 99), 
#'     rangePercent = 10, rangeMin = 1,
#'     protectionPercent = 3, 
#'     protectionLimit = 5, loProtectionLimit = 4)
#' 
#' SuppressDominantCells(data = dat, 
#'     dominanceVar = "value",
#'     numVar = "num2", 
#'     formula = ~sector2 * geo + sector4 * eu, 
#'     contributorVar = "company", 
#'     pPercent = 10,
#'     intervalVar = c("value","freq", "num2"), 
#'     rangePercent = c(10, 10, 30), rangeMin = c(1, 0.2222, 2.222))
#'     
#' 
#'     
#' ## Below are two alternative ways of calculating interval requirements. 
#' ## In these cases, the requirements are generated by the primary suppression 
#' ## functions when the parameter `protectionIntervals = TRUE` is specified.
#' 
#' 
#' # See ?MagnitudeRule
#' SuppressDominantCells(data = dat, 
#'                       dominanceVar = "value",
#'                       formula = ~sector2 * geo + sector4 * eu, 
#'                       contributorVar = "company", 
#'                       pPercent = 10,
#'                       protectionIntervals = TRUE)
#' 
#' # See ?PrimaryDefault
#' SuppressSmallCounts(data = dat, 
#'                     formula = ~sector2 * geo + sector4 * eu, 
#'                     maxN = 3,
#'                     protectionIntervals = TRUE)     
#'                     
#'                     
#'                     
#' ## Combining IntervalLimits arguments 
#' ## with protectionIntervals = TRUE also works
#' 
#' SuppressSmallCounts(data = dat, 
#'                     formula = ~sector2 * geo + sector4 * eu, 
#'                     maxN = 3,
#'                     protectionIntervals = TRUE,
#'                     protectionPercent = 50)
#'
IntervalLimits <- function(..., 
                           protectionPercent = 0,
                           protectionLimit = 0,
                           loProtectionPercent = protectionPercent, 
                           loProtectionLimit = protectionLimit,
                           rangePercent = 0,
                           rangeMin = 0,
                           primary, 
                           num, 
                           freq,
                           freqVar,
                           dominanceVar = NULL, 
                           intervalVar = NULL) {
  
  rlim  <- any(rangePercent != 0) | any(rangeMin != 0)
  lomax <- any(loProtectionPercent != 0) | any(loProtectionLimit != 0)
  upmin <- any(protectionPercent != 0)   | any(protectionLimit != 0)
  
  if (!any(rlim | lomax | upmin)) {
    return(NULL)
  }
  
  if (is.null(intervalVar)) {
    intervalVar <- extract_intervalVar(colnames(num))
  }
  
  if (!length(intervalVar)) {
    if (is.null(dominanceVar)) {
      if (ncol(num) == sum(grepl("^(rlim_|lomax_|upmin_)", colnames(num)))) {
        intervalVar <- freqVar
      } else {
        intervalVar <- names(num)[1]
      }
    } else {
      intervalVar <- dominanceVar
    }
  }
  
  if (rlim) {
    rangePercent <- rep_len(rangePercent, length(intervalVar))
    rangeMin     <- rep_len(rangeMin,     length(intervalVar))
    rangeLimits  <- matrix(0, nrow(num), length(intervalVar))
    colnames(rangeLimits) <- paste("rlim", intervalVar, sep = "_")
  } else {
    rangeLimits <- matrix(0, nrow(num), 0)
  }
  
  if (lomax) {
    loProtectionPercent <- rep_len(loProtectionPercent, length(intervalVar))
    loProtectionLimit   <- rep_len(loProtectionLimit,   length(intervalVar))
    loLimits <- matrix(0, nrow(num), length(intervalVar))
    colnames(loLimits) <- paste("lomax", intervalVar, sep = "_")
  } else {
    loLimits <- matrix(0, nrow(num), 0)
  }
  
  if (upmin) {
    protectionPercent <- rep_len(protectionPercent, length(intervalVar))
    protectionLimit   <- rep_len(protectionLimit,   length(intervalVar))
    upLimits <- matrix(0, nrow(num), length(intervalVar))
    colnames(upLimits) <- paste("upmin", intervalVar, sep = "_")
  } else {
    upLimits <- matrix(0, nrow(num), 0)
  }
  
  for (i in seq_along(intervalVar)) {
    if (intervalVar[i] == c(freqVar, "")[1]) { # since freqVar may be NULL
      z <- freq
    } else {
      z <- num[[intervalVar[i]]]
    }
    z[!primary] <- NA
    
    if (rlim) {
      w <- z * rangePercent[i] / 100
      rangeLimits[, i] <- pmax(w, rangeMin[i])
    }
    
    if (lomax) {
      dev <- z * loProtectionPercent[i] / 100
      dev <- pmax(dev, loProtectionLimit[i])
      loLimits[, i] <- pmax(0, z - dev)
    }
    
    if (upmin) {
      dev <- z * protectionPercent[i] / 100
      dev <- pmax(dev, protectionLimit[i])
      upLimits[, i] <- z + dev
    }
    
  }
  
  
  cbind(rangeLimits, loLimits, upLimits)
}