Nothing
#' Cell suppression with synthetic decimal numbers
#'
#' \code{\link{GaussSuppressionFromData}}, or one of its wrappers, is run and decimal numbers are added to output by
#' executing \code{\link[RegSDC]{SuppressDec}}.
#'
#' @param data Input daata as a data frame
#' @param ... Further parameters to \code{\link{GaussSuppressionFromData}}
#' @param fun A function: \code{\link{GaussSuppressionFromData}} or one of its wrappers such as
#' \code{\link{SuppressSmallCounts}} and \code{\link{SuppressDominantCells}}.
#' @param output NULL (default), `"publish"`, `"inner"`, `"publish_inner"`, or `"publish_inner_x"` (x also).
#' @param use_freqVar Logical (`TRUE`/`FALSE`) with a default value of `NA`. Determines whether the variable
#' `freqVar` is used as the basis for generating decimal numbers.
#' If `NA`, the parameter is set to `TRUE`, except in the following cases, where it is set to `FALSE`:
#' - If `freqVar` is not available.
#' - If `runIpf` is `FALSE` and `fun` is one of the functions `SuppressFewContributors` or `SuppressDominantCells`.
#'
#' When `use_freqVar` is `FALSE`, only zeros are used instead. This approach is more robust in practice,
#' as decimal numbers can then be stored more accurately.
#' The default value is chosen to ensure compatibility with existing code and to allow for the use of `freqVar`
#' when dealing with frequency tables, which may be useful.
#' @param digits Parameter to \code{\link[SSBtools]{RoundWhole}}. Values close to whole numbers will be rounded.
#' @param nRep NULL or an integer. When >1, several decimal numbers will be generated.
#' @param rmse Desired root mean square error of decimal numbers.
#' Variability around the expected, according to the linear model, inner frequencies.
#' The expected frequencies are calculated from the non-suppressed publishable frequencies.
#' @param sparseLimit Limit for the number of rows of a reduced x-matrix within the algorithm. When exceeded, a new sparse algorithm is used.
#' @param rndSeed If non-NULL, a random generator seed to be used locally within the function without affecting the random value stream in R.
#' @param runIpf When TRUE, additional frequencies are generated by iterative proportional fitting using \code{\link[SSBtools]{Mipf}}.
#' @param eps Parameter to \code{\link[SSBtools]{Mipf}}.
#' @param iter Parameter to \code{\link[SSBtools]{Mipf}}.
#' @param mismatchWarning Whether to produce the warning "`Mismatch between whole numbers and suppression`", when relevant.
#' When `nRep>1`, all replicates must satisfy the whole number requirement for non-suppressed cells.
#' When `mismatchWarning` is integer (`>0`), this will be used as parameter `digits` to \code{\link[SSBtools]{RoundWhole}}
#' when doing mismatch checking (can be quite low when `nRep>1`).
#' @param whenDuplicatedInner Function to be called when default output and when cells marked as inner correspond to
#' several input cells (aggregated) since they correspond to published cells.
#' @param whenMixedDuplicatedInner Function to be called in the case above when some inner cells correspond
#' to published cells (aggregated) and some not (not aggregated).
#' @return A data frame where inner cells and cells to be published are combined or output according to parameter `output`.
#'
#' @importFrom SSBtools RoundWhole Match Mipf RbindAll
#' @importFrom RegSDC SuppressDec
#' @importFrom Matrix crossprod
#' @importFrom stats runif
#' @export
#'
#' @author Øyvind Langrsud
#'
#' @seealso [SuppressionFromDecimals()]
#'
#'
#' @examples
#' a <- GaussSuppressDec(data = SSBtoolsData("example1"),
#' fun = SuppressSmallCounts,
#' dimVar = c("age", "geo"),
#' preAggregate = TRUE,
#' freqVar = "freq", maxN = 3)
#' a
#'
#'
#' b <- GaussSuppressDec(data = SSBtoolsData("magnitude1"),
#' fun = SuppressDominantCells,
#' numVar = "value",
#' formula = ~sector2 * geo + sector4 * eu,
#' contributorVar = "company", k = c(80, 99))
#' b
#'
#' # FormulaSelection() works on this output as well
#' FormulaSelection(b, ~sector2 * geo)
#'
GaussSuppressDec = function(data,
...,
fun = GaussSuppressionFromData,
output = NULL,
use_freqVar = NA,
digits = 9,
nRep = NULL,
rmse = pi/3,
sparseLimit = 500,
rndSeed = 123,
runIpf = FALSE,
eps = 0.01,
iter = 100,
mismatchWarning = TRUE,
whenDuplicatedInner = NULL,
whenMixedDuplicatedInner = warning){
if (!is.null(rndSeed)) {
if (!exists(".Random.seed"))
if (runif(1) < 0)
stop("Now seed exists")
exitSeed <- .Random.seed
on.exit(.Random.seed <<- exitSeed)
set.seed(rndSeed)
}
if(!is.null(output)){
if(!(output %in% c("publish_inner_x", "publish_inner", "inner", "publish")))
stop('Allowed non-NULL values of parameter output are "publish_inner_x", "publish_inner", "inner" and "publish".')
} else {
output <- ""
}
if (is.null(nRep)) {
freqDecNames <- "freqDec"
nRep <- 1
} else {
freqDecNames <- paste0("freqDec", paste(seq_len(nRep)))[seq_len(nRep)]
}
a <- fun(data, ..., output = "publish_inner_x")
startRow <- attr(a$publish, "startRow", exact = TRUE)
freqVar <- attr(a$inner, "freqVar")
weightVar <- attr(a$inner, "weightVar")
numVar <- attr(a$inner, "numVar")
dimVarPub <- colnames(a$publish)
dimVarPub <- dimVarPub[!(dimVarPub %in% c(freqVar, "primary", "suppressed", weightVar, numVar, MoreVars(...)))]
dimVarPub <- dimVarPub[(dimVarPub %in% colnames(a$inner))]
if (is.na(use_freqVar)) {
use_freqVar <- TRUE
if (!length(freqVar)) {
use_freqVar <- FALSE
} else {
if (!runIpf) {
if (identical(fun, SuppressFewContributors))
use_freqVar <- FALSE
if (identical(fun, SuppressDominantCells))
use_freqVar <- FALSE
}
}
}
if (use_freqVar) {
freq_for_dec_publish <- a$publish[[freqVar]]
freq_for_dec_inner <- a$inner[[freqVar]]
} else {
freq_for_dec_publish <- rep(0L, nrow(a$publish))
freq_for_dec_inner <- rep(0L, nrow(a$inner))
}
z <- as.matrix(freq_for_dec_publish)
y <- as.matrix(freq_for_dec_inner)
if (nRep) {
yDec <- SuppressDec(a$x, z = z, y = y, suppressed = a$publish$suppressed, digits = digits, nRep = nRep, rmse = rmse, sparseLimit = sparseLimit)
zDec <- RoundWhole(as.matrix(Matrix::crossprod(a$x, yDec)), digits = digits)
} else {
yDec <- matrix(0, nrow(y),0)
zDec <- matrix(0, nrow(z),0)
}
if(runIpf){
freqDecNames <- c(freqDecNames, "freqIpf")
yIpf <- Mipf(x= a$x[,!a$publish$suppressed, drop=FALSE], z = z[!a$publish$suppressed, 1, drop=FALSE], iter = iter, eps = eps)
cat("\n")
yDec <- cbind(yDec, as.matrix(yIpf))
zDec <- cbind(zDec, as.matrix(Matrix::crossprod(a$x, yIpf)))
}
colnames(yDec) <- freqDecNames
colnames(zDec) <- freqDecNames
a$publish <- cbind(a$publish, zDec)
rownames(a$publish) <- NULL
a$inner <- cbind(a$inner, yDec)
if (nRep & mismatchWarning) {
if (is.numeric(mismatchWarning)) {
digitsPrimary <- mismatchWarning
} else {
digitsPrimary <- digits
}
# Re-use primary-function originally made for SuppressionFromDecimals
suppressionFromDecimals <- PrimaryDecimals(freq = freq_for_dec_publish, num = a$publish[freqDecNames[1:nRep]], nDec = nRep, digitsPrimary = digitsPrimary)
if (any(a$publish$suppressed != suppressionFromDecimals)) {
incorrectly_suppressed <- !a$publish$suppressed & suppressionFromDecimals
incorrectly_unsuppressed <- a$publish$suppressed & !suppressionFromDecimals
n_incorrectly_suppressed <- sum(incorrectly_suppressed)
n_incorrectly_unsuppressed <- sum(incorrectly_unsuppressed)
if (n_incorrectly_suppressed) {
show_var <- c(freqVar, numVar)[1]
max_incorrect_value <- round(max(a$publish[[show_var]][incorrectly_suppressed]), 1)
parenthesis1 <- paste0(" (", max_incorrect_value, " max ", show_var, ")")
} else {
parenthesis1 <- ""
}
if (n_incorrectly_unsuppressed) {
n_incorrectly_primary <- sum(incorrectly_unsuppressed & a$publish$primary)
parenthesis2 <- paste0(" (", n_incorrectly_primary, " primary)")
} else {
parenthesis2 <- ""
}
warning(paste0(
"Mismatch between whole numbers and suppression: ",
n_incorrectly_suppressed, " incorrectly suppressed",
parenthesis1, ", ",
n_incorrectly_unsuppressed, " incorrectly unsuppressed",
parenthesis2))
}
}
if (!is.null(startRow)) {
attr(a$publish, "startRow") <- startRow
}
if (output == "publish_inner_x")
return(a)
if (output == "publish_inner")
return(a[c("publish", "inner")])
if (output == "publish")
return(a$publish)
if (output == "inner")
return(a$inner)
ma <- Match(a$inner[dimVarPub], a$publish[dimVarPub])
anyDuplicated_ma <- anyDuplicated(ma[!is.na(ma)])
a$publish$isPublish <- TRUE
a$publish$isInner <- FALSE
a$publish$isInner[ma[!is.na(ma)]] <- TRUE
if (!anyNA(ma)) {
if (anyDuplicated_ma & !is.null(whenDuplicatedInner)) {
whenDuplicatedInner("Duplicated inner rows identified. Aggregation applied to some variables.")
}
} else {
if (anyDuplicated_ma & !is.null(whenMixedDuplicatedInner)) {
whenMixedDuplicatedInner("Duplicated inner rows identified. Aggregation applied to some variables in some rows.")
}
}
extra_inner <- which(!(names(a$inner) %in% names(a$publish)))
extra_inner_to_publish <- integer(0)
if (length(extra_inner)) {
ma_rows <- !is.na(ma)
ma_ok <- ma[ma_rows]
ma_unique <- unique(ma_ok)
n_ma_unique <- length(ma_unique)
if (anyDuplicated_ma) {
for (i in extra_inner) {
if (nrow(unique(cbind(ma_ok, a$inner[ma_rows, i]))) == n_ma_unique) {
# Variable is ok when equal value in duplicate rows.
# Only one of them will be copied to a$publish
extra_inner_to_publish <- c(extra_inner_to_publish, i)
}
}
} else {
extra_inner_to_publish <- extra_inner
}
}
extra_inner_to_remove <- extra_inner[!(extra_inner %in% extra_inner_to_publish)]
if (length(extra_inner_to_publish)) {
extra_inner_to_publish <- names(a$inner)[extra_inner_to_publish]
a$publish[extra_inner_to_publish] <- NA
if (length(ma_unique)) {
a$publish[ma_unique, extra_inner_to_publish] <- a$inner[match(ma_unique, ma), extra_inner_to_publish]
}
}
if (length(extra_inner_to_remove)) {
a$inner <- a$inner[-extra_inner_to_remove]
}
if (!anyNA(ma)) {
return(a$publish)
}
a$inner <- a$inner[is.na(ma), , drop = FALSE]
a$inner$isPublish <- FALSE
a$inner$isInner <- TRUE
if (!is.null(startRow)) {
startRow <- c(startRow, StaRt_InneR = nrow(a$publish) + 1L)
}
a <- RbindAll(a$publish, a$inner)
if (!is.null(startRow)) {
attr(a, "startRow") <- startRow
}
a
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.