Nothing
#' Dominance `(n,k)` or p% rule for magnitude tables
#'
#' Supports application of multiple values for `n` and `k`. The function works
#' on magnitude tables containing negative cell values by calculating
#' contribution based on absolute values.
#'
#' This method only supports suppressing a single numeric variable. There are
#' multiple ways of handling sampling weights in the dominance rule. the default
#' method implemented here compares unweighted sample values with the corresponding
#' weighted cell totals. if `domWeightMethod` is set to `"tauargus"`, the
#' method implemented in tauArgus is used. For more information on this
#' method, see "Statistical Disclosure Control" by Hundepool et al (2012,
#' p. 151).
#'
#' @param data the dataset
#' @param x ModelMatrix generated by parent function
#' @param numVar vector containing numeric values in the data set
#' @param n Parameter `n` in dominance rule.
#' @param k Parameter `k` in dominance rule.
#' @param pPercent Parameter in the p% rule, when non-NULL.
#' Parameters `n` and `k` will then be ignored.
#' Technically, calculations are performed internally as if
#' `n = 1:2`. The results of these intermediate calculations can
#' be viewed by setting `allDominance = TRUE`.
#' @param protectZeros parameter determining whether cells with value 0 should
#' be suppressed.
#' @param charVar Variable in data holding grouping information. Dominance will
#' be calculated after aggregation within these groups.
#' @param removeCodes A vector of `charVar` codes that are to be excluded when
#' calculating dominance percentages. Essentially, the corresponding numeric
#' values from `dominanceVar` or `numVar` are set to zero before proceeding
#' with the dominance calculations. With empty `charVar` row indices are
#' assumed and conversion to integer is performed.
#' @param sWeightVar variable with sampling weights to be used in dominance rule
#' @param domWeightMethod character representing how weights should be treated
#' in the dominance rule. See Details.
#' @param allDominance Logical parameter. If `TRUE`, adds primary columns for each
#' pair of parameters n,k in the dominance rules
#' @param outputWeightedNum logical value to determine whether weighted numerical
#' value should be included in output. Default is `TRUE` if `sWeightVar` is provided.
#' @param dominanceVar When specified, `dominanceVar` is used in place of `numVar`.
#' Specifying `dominanceVar` is beneficial for avoiding warnings when there
#' are multiple `numVar` variables. Typically, `dominanceVar` will be one
#' of the variables already included in `numVar`.
#' @param ... unused parameters
#'
#' @return logical vector that is `TRUE` in positions corresponding to cells
#' breaching the dominance rules.
#' @export
#'
#' @examples
#' set.seed(123)
#' z <- SSBtools::MakeMicro(SSBtoolsData("z2"), "ant")
#' z$value <- sample(1:1000, nrow(z), replace = TRUE)
#'
#' GaussSuppressionFromData(z, dimVar = c("region", "fylke", "kostragr", "hovedint"),
#' numVar = "value", candidates = CandidatesNum, primary = DominanceRule, preAggregate = FALSE,
#' singletonMethod = "sub2Sum", n = c(1, 2), k = c(65, 85), allDominance = TRUE)
#'
#'
#'num <- c(100,
#' 90, 10,
#' 80, 20,
#' 70, 30,
#' 50, 25, 25,
#' 40, 20, 20, 20,
#' 25, 25, 25, 25)
#' v1 <- c("v1",
#' rep(c("v2", "v3", "v4"), each = 2),
#' rep("v5", 3),
#' rep(c("v6", "v7"), each = 4))
#' sw <- c(1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1)
#' d <- data.frame(v1 = v1, num = num, sw = sw)
#'
#' # without weights
#' GaussSuppressionFromData(d, formula = ~v1 - 1,
#' numVar = "num", n = c(1,2), k = c(80,70),
#' preAggregate = FALSE, allDominance = TRUE, candidates = CandidatesNum,
#' primary = DominanceRule)
#'
#' # with weights, standard method
#' GaussSuppressionFromData(d, formula = ~v1 - 1,
#' numVar = "num", n = c(1,2), k = c(80,70), sWeightVar = "sw",
#' preAggregate = FALSE, allDominance = TRUE, candidates = CandidatesNum,
#' primary = DominanceRule)
#'
#' # with weights, tauargus method
#' GaussSuppressionFromData(d, formula = ~v1 - 1,
#' numVar = "num", n = c(1,2), k = c(80,70), sWeightVar = "sw",
#' preAggregate = FALSE, allDominance = TRUE, candidates = CandidatesNum,
#' primary = DominanceRule, domWeightMethod = "tauargus")
#'
#' @author Daniel Lupp and Øyvind Langsrud
#'
MagnitudeRule <- function(data,
x,
numVar,
n = NULL,
k = NULL,
pPercent = NULL,
protectZeros = FALSE,
charVar = NULL,
removeCodes = character(0),
sWeightVar = NULL,
domWeightMethod = "default",
allDominance = FALSE,
outputWeightedNum = !is.null(sWeightVar),
dominanceVar = NULL,
...) {
if (!is.null(pPercent)) {
n <- 1:2
k <- c(0, 0)
}
if (length(n) != length(k))
stop("You must provide an equal number of inputs for n and k.")
if(length(dominanceVar)){
if(length(dominanceVar) != 1){
stop("dominanceVar must be a single variable")
}
numVar <- dominanceVar
}
if (is.null(numVar))
stop("You must provide a numeric variable numVar to use the dominance rule.")
tauArgusDominance <- as.character(domWeightMethod) == "tauargus"
if (length(charVar) & tauArgusDominance)
stop("the tauArgus weight method does not work with charVar.")
if (length(numVar) > 1) {
warning("Multiple numVar were supplied, only the first is suppressed.")
numVar <- numVar[1]
}
if (!is.null(sWeightVar) & tauArgusDominance) {
if (any(data[[sWeightVar]] < 1))
warning("Some sample weights are < 1. Consider using other weighted domininace method.")
}
abs_inputnum <- abs(data[, numVar, drop = FALSE])
if (length(charVar)) {
if (length(charVar) == 1) {
charVar_groups <- data[[charVar]]
sweight <- NULL
} else {
stop("Only single charVar implemented")
}
} else {
charVar_groups <- NULL
if (is.null(sWeightVar))
sweight <- as.matrix(rep(1, nrow(data)))
else
sweight <- as.matrix(data[, sWeightVar, drop = FALSE])
}
if (length(removeCodes)) {
if (length(charVar)) {
abs_inputnum[charVar_groups %in% removeCodes, ] <- 0
} else {
abs_inputnum[as.integer(removeCodes), ] <- 0
}
}
abs_num <- as.data.frame(as.matrix(crossprod(x, as.matrix(abs_inputnum))))
abs_inputnum <- abs_inputnum[[numVar]]
prim <-
mapply(
function (a, b)
FindDominantCells(
x,
abs_inputnum,
abs_num,
a,
b,
charVar_groups = charVar_groups,
samplingWeight = sweight,
tauArgusDominance = tauArgusDominance,
returnContrib = TRUE
),
n,
k
)
if (is.null(pPercent)) {
primary <- sapply(seq_len(ncol(prim)), function(x) prim[, x] >= k[x]/100)
dominant <- apply(primary, 1, function(x) Reduce(`|`, x))
} else {
dominant <- abs(1 - prim[, 2]) < abs(pPercent/100 * prim[, 1])
}
colnames(prim) <- paste0("primary.", paste(n, k, sep = ":"))
if (!protectZeros)
output <- list(primary = dominant)
else
output <- list(primary = (dominant | (abs_num == 0)))
if (outputWeightedNum) {
wnum <- data.frame(v1 = as.vector(crossprod(x, sweight)),
v2 = as.vector(crossprod(x, sweight * data[[numVar]])))
names(wnum) <- c(sWeightVar, paste0("weighted.", numVar))
output[["numExtra"]] <- wnum
}
if (allDominance) {
if ("numExtra" %in% names(output))
output[["numExtra"]] <- cbind(output[["numExtra"]], as.data.frame(prim))
else
output[["numExtra"]] <- as.data.frame(prim)
}
if (length(names(output)) == 1)
output <- unlist(output)
output
}
#' @rdname MagnitudeRule
#' @note Explicit `protectZeros` in wrappers
#' since default needed by \code{\link{GaussSuppressionFromData}}
#' @export
DominanceRule <- function(data, n, k,
protectZeros = FALSE, ...) {
MagnitudeRule(data = data, n = n, k = k,
protectZeros = protectZeros, ...)
}
#' @rdname MagnitudeRule
#' @export
PPercentRule <- function(data, pPercent,
protectZeros = FALSE, ...) {
MagnitudeRule(data = data, pPercent = pPercent,
protectZeros = protectZeros, ...)
}
#' Method for finding dominant cells according to (possibly multiple) n,k
#' dominance rules.
#'
#' Supports functionality for grouping contributions according to holding
#' variables, as well as calculating dominance in surveys with a given sampling
#' weight. Two methods are implemented, depending on whether the sampling
#' weights sum to total population. The parameter `tauArgusDominance`
#' determines this. If `FALSE`, unweighted contributions are compared to weighted
#' cell values. If `TRUE`, the method described in in the
#' book "Statistical Disclosure Control" (Hundepool et al 2012, p. 151) is used.
#'
#' @param x model matrix describing relationship between input and published
#' cells
#' @param inputnum vector of numeric contributions for each of the input records
#' @param num vector of numeric values for each of the published cells
#' @param n vector of integers describing n parameters in n,k rules. Must be
#' same length as `k` parameter.
#' @param k vector of numeric values describing k parameters in n,k rules, where
#' percentages are described as numbers less than 100. Must be same length as
#' `n` parameter.
#' @param charVar_groups vector describing which input records should be grouped
#' @param samplingWeight vector of sampling weights associated to input records
#' @param tauArgusDominance logical value, default `FALSE`. determines how to
#' handle sampling weights in the dominance rule (see details).
#' @param returnContrib logical value, default `FALSE`. If `TRUE` return value is
#' the percentage of the first n contributors
#'
#' @return logical vector describing which publish-cells need to be suppressed.
#'
FindDominantCells <- function(x,
inputnum,
num,
n,
k,
charVar_groups,
samplingWeight,
tauArgusDominance = FALSE,
returnContrib = FALSE) {
if (is.null(samplingWeight)) {
# without sampling weight, calculate dominance directly from numerical values
max_cont <-
MaxContribution(x, inputnum, n = n, groups = charVar_groups)
max_cont[is.na(max_cont)] <- 0
if (returnContrib) {
out <- as.vector(rowSums(max_cont)/unlist(num))
out[is.nan(out)] <- 0
return(out)
} else {
return(as.vector(num > 0 & rowSums(max_cont) > num * k / 100))
}
} else {
# with sampling weights, need to weight the numerical values
max_cont_index <-
MaxContribution(x,
inputnum,
n = n,
groups = charVar_groups,
index = TRUE)
max_cont <-
apply(max_cont_index, 2, function(t)
ifelse(is.na(t), 0, inputnum[t]))
if (!tauArgusDominance) {
weighted_num <- crossprod(x, inputnum * samplingWeight)
ncontributions <- rowSums(max_cont)
}
else {
cont_weights <-
apply(max_cont_index, 2, function(t)
samplingWeight[t])
# last_index_t describes cumulative sum of contributing weights, used to
# calculate which of the contributions need to be considered
if (n == 1)
last_index_t <- cont_weights
else
last_index_t <-
t(apply(cont_weights, 1, function(t)
cumsum(t)))
# index of last contribution to be added to upsampled contribution
last_index <- apply(last_index_t, 1,
function(t) {
ind <- which(t >= n)[1]
ifelse(!length(ind), sum(!is.na(t)), ind)
})
# only keep the first n contributors (counting weights) and ensure
# rowSums(cont_weights) == n
for (ind in seq(length(last_index))) {
cont_weights[ind, last_index[ind]] <- ifelse(last_index[ind] == 1,
n,
n - last_index_t[ind, last_index[ind] - 1])
cont_weights[ind, 1:ncol(cont_weights) > last_index[ind]] <-
0
}
cont_weights[is.na(cont_weights)] <- 0
# sampling weights multiplied with contributions and added up
ncontributions <- rowSums(max_cont * cont_weights)
weighted_num <- crossprod(x, inputnum * samplingWeight)
}
if (returnContrib) {
out <- as.vector(ncontributions/weighted_num)
out[is.nan(out)] <- 0
return(out)
}
return(as.vector(weighted_num > 0 &
ncontributions >= weighted_num * k / 100))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.