R/SuppressKDisclosure.R

Defines functions FindDifferenceCells KDisclosurePrimary SuppressKDisclosure

Documented in KDisclosurePrimary SuppressKDisclosure

#' K-disclosure suppression
#'
#' A function for suppressing frequency tables using the k-disclosure method.
#'
#' @param data a data.frame representing the data set
#' @param coalition numeric vector of length one, representing possible size of an
#' attacking coalition. This parameter corresponds to the parameter k in the
#' definition of k-disclosure.
#' @param dimVar The main dimensional variables and additional aggregating
#' variables. This parameter can be  useful when hierarchies and formula are
#' unspecified.
#' @param formula A model formula
#' @param hierarchies List of hierarchies, which can be converted by
#' \code{\link[SSBtools]{AutoHierarchies}}. Thus, the variables can also be coded by
#' `"rowFactor"` or `""`, which correspond to using the categories in the data.
#' @param freqVar name of the frequency variable in `data`
#' @param mc_hierarchies a hierarchy representing meaningful combinations to be
#' protected. Default value is `NULL`.
#' @param upper_bound numeric value representing minimum count considered safe.
#' Default set to `Inf`
#' @param ... parameters passed to children functions
#' @inheritParams GaussSuppressionFromData 
#'
#' @return A data.frame containing the publishable data set, with a boolean
#' variable `$suppressed` representing cell suppressions.
#' @export
#'
#' @author Daniel P. Lupp

#' @examples
#' # data
#' data <- SSBtools::SSBtoolsData("mun_accidents")
#'
#' # hierarchies as DimLists
#' mun <- data.frame(levels = c("@@", rep("@@@@", 6)),
#' codes = c("Total", paste("k", 1:6, sep = "")))
#' inj <- data.frame(levels = c("@@", "@@@@" ,"@@@@", "@@@@", "@@@@"),
#' codes = c("Total", "serious", "light", "none", "unknown"))
#' dimlists <- list(mun = mun, inj = inj)
#'
#' inj2 <- data.frame(levels = c("@@", "@@@@", "@@@@@@" ,"@@@@@@", "@@@@", "@@@@"),
#' codes = c("Total", "injured", "serious", "light", "none", "unknown"))
#' inj3 <- data.frame(levels = c("@@", "@@@@", "@@@@" ,"@@@@", "@@@@"),
#' codes = c( "shadowtotal", "serious", "light", "none", "unknown"))
#' mc_dimlist <- list(inj = inj2)
#' mc_nomargs <- list(inj = inj3)
#'
#' #' # Example with formula, no meaningful combination
#' out <- SuppressKDisclosure(data, coalition = 1, freqVar = "freq", formula = ~mun*inj)
#'
#' # Example with hierarchy and meaningful combination
#' out2 <- SuppressKDisclosure(data, coalition = 1, freqVar = "freq",
#' hierarchies = dimlists, mc_hierarchies = mc_dimlist)
#'
#' #' # Example of table without mariginals, and mc_hierarchies to protect
#' out3 <- SuppressKDisclosure(data, coalition = 1, freqVar = "freq",
#' formula = ~mun:inj, mc_hierarchies = mc_nomargs )
SuppressKDisclosure <- function(data,
                                coalition = 0,
                                mc_hierarchies = NULL,
                                upper_bound = Inf,
                                dimVar = NULL,
                                formula = NULL,
                                hierarchies = NULL,
                                freqVar = NULL,
                                ...,
                                spec = PackageSpecs("kDisclosureSpec")) {
  additional_params <- list(...)
  if (length(additional_params)) {
    if ("singletonMethod" %in% names(additional_params) &
        "none" %in% additional_params[["singletonMethod"]])
      warning(
        "SuppressKDisclosure should use a singleton method for protecting the zero singleton problem. The output might not be safe, consider rerunning with a singleton method (default)."
      )
  }
  GaussSuppressionFromData(
    data,
    hierarchies = hierarchies,
    formula = formula,
    dimVar = dimVar,
    freqVar = freqVar,
    coalition = coalition,
    mc_hierarchies = mc_hierarchies,
    upper_bound = upper_bound,
    spec = spec,
    ...
  )
}

#' Construct primary suppressed difference matrix
#'
#' Function for constructing model matrix columns representing primary suppressed
#' difference cells
#'
#' @inheritParams SuppressKDisclosure
#' @param crossTable crossTable generated by parent function
#' @param x ModelMatrix generated by parent function
#'
#' @return dgCMatrix corresponding to primary suppressed cells
#' @export
#'
#' @author Daniel P. Lupp
KDisclosurePrimary <- function(data,
                               x,
                               crossTable,
                               freqVar,
                               mc_hierarchies = NULL,
                               coalition = 1,
                               upper_bound = Inf,
                               ...) {
  x <- cbind(
    x,
    X_from_mc(
      data = data,
      x = x,
      crossTable = crossTable,
      mc_hierarchies = mc_hierarchies,
      freqVar = freqVar,
      coalition = coalition,
      upper_bound = upper_bound,
      ...
    )
  )
  x <- x[,!SSBtools::DummyDuplicated(x, rnd = TRUE), drop = FALSE]
  freq <- as.vector(crossprod(x, data[[freqVar]]))
  FindDifferenceCells(
    x = x,
    freq = freq,
    coalition = coalition,
    upper_bound = upper_bound,
    crossTable = crossTable
  )
}

FindDifferenceCells <- function(x,
                                  freq,
                                  coalition,
                                  upper_bound = Inf,
                                  crossTable) {
  publ_x <- crossprod(x)
  publ_x <-
    As_TsparseMatrix(publ_x)
  colSums_x <- colSums(x)
  # row i is child of column j in r
  r <-
    colSums_x[publ_x@i + 1] == publ_x@x &
    colSums_x[publ_x@j + 1] != publ_x@x
  publ_x@x <- publ_x@x[r]
  publ_x@j <- publ_x@j[r]
  publ_x@i <- publ_x@i[r]
  child_parent <- cbind(child = publ_x@i + 1,
                        parent = publ_x@j + 1,
                        diff = freq[publ_x@j + 1] - freq[publ_x@i + 1])
  child_parent <- child_parent[freq[child_parent[, 2]] > 0 &
                                 freq[child_parent[, 1]] > 0 &
                                 freq[child_parent[, 1]] <= upper_bound, ]
  disclosures <- child_parent[child_parent[, 3] <= coalition, , drop = FALSE]
  if (nrow(disclosures))
    primary_matrix <- As_TsparseMatrix(apply(disclosures,
                                             1,
                                             function(row)
                                               x[, row[2]] - x[, row[1]]))
  else
    return(rep(FALSE, nrow(crossTable)))
  primary_matrix
}

Try the GaussSuppression package in your browser

Any scripts or data that you put into this service are public.

GaussSuppression documentation built on Sept. 24, 2024, 5:07 p.m.