#' @title Modify a ptable suitable for frequency count variables
#'
#' @description [modify_cnt_ptable()] is a function to modify the standard
#' ptable for count variables that is generated by [create_cnt_ptable()] or
#' within the 'cellKey'-package. The noise intervals in
#' the standard ptable are ordered from -D to D. A modified ptable still has
#' the same properties as the standard ptable but can ensure a higher
#' protection of perturbed frequency tables since the noise probabilities are
#' split and the intervals are rearranged.
#'
#' @details
#' In a first step, the noise probabilities larger than a threshold value
#' will be split. Then, the split noise probabilities are randomly rearranged
#' using a seed (the modifications is replicable). Finally, the intervals of
#' the ptable will be adjusted.
#'
#' @seealso [create_ptable()]
#'
#' @param input The ptable-object of class 'ptable', 'ck_params' or data.table
#' @param threshold The maximum width of the intervals after modification
#' @param seed A seed for the rearrangement of the split intervals
#' @return Returns an object of class [ptable-class] or a data.table.
#' @author Tobias Enderle, \email{tobias.enderle@@destatis.de}
#' @examples
#' # Original ptable
#' ptab <- create_cnt_ptable(3, 1)
#'
#' # modified ptable
#' ptab_mod <- modify_cnt_ptable(ptab, 0.3, seed = 5467)
#' ptab_mod@pTable
#'
#' @export
#' @rdname modify_cnt_ptable
#' @md
modify_cnt_ptable <- function(input,
threshold = 0.2,
seed = NULL){
. <- ptab_obj_new <- ptab <- NULL
if (!(threshold <= 1 & threshold > 0))
stop("Please, specify a threshold larger than 0 and less equal 1.")
# Check Input Type: ptable-object or ptable
if (class(input)[1] == "ptable"){
if (!(input@table == "cnts"))
stop("Only works with ptables for frequency count tables!")
ptab_obj_new <- copy(input)
ptab <- ptab_obj_new@pTable
} else if (class(input)[1] == "data.table") {
stopifnot(identical(
names(input),
c("i", "j", "p", "v", "p_int_lb", "p_int_ub", "type")
))
ptab <- input
} else if (class(input)[1] == "ck_params") {
if (!(input$type == "cnts"))
stop("Only works with ptables for frequency count tables!")
stopifnot(identical(
names(input$params$ptable),
c("i", "j", "p", "v", "lb", "ub", "type")
))
ck_obj_new <- copy(input)
ptab <- ck_obj_new$params$ptable
setnames(ptab, "lb", "p_int_lb")
setnames(ptab, "ub", "p_int_ub")
} else {
stop("Input must be an object of class 'ptable', 'ck_params' or a ptable
of class 'data.table'.")
}
modify <- function(ptab, seed, threshold){
. <- p <- i <- p_int_ub <- p_int_lb <- v <- NULL
# Function to split the probabilities into smaller parts
# (of size 'threshold' for maximum)
# - each interval must be at least of size 'threshold'
# - only split intervals that are smaller than 1
# (e.g. i=0 has p=1 and doesn't have to be splitted)
splitter <- function(x, threshold){
if (x >= threshold && x != 1){
times <- floor(x / threshold)
y <- c(rep(threshold, times))
y <- c(y, x-sum(y))
} else {
y <- x
}
return(y)
}
# split the probabilities of each noise into smaller parts
# (according to the threshold value)
erg <- lapply(ptab$p, function(x) splitter(x = x, threshold = threshold ))
# enlarge the ptable using the split noise probabilities
ptab_mod <- ptab[rep(1:.N, lapply(erg, length)),]
# add the new probabilities to the enlarged ptable
ptab_mod[, p:= unlist(erg)][]
# reorder the noises the within each group 'i' using a seed for replication
set.seed(seed)
ptab_mod[, order := sample(1:.N, .N), by = list(i)]
ptab_mod <- ptab_mod[order(i,order)]
# new computation of intervals using the split and reordered noise probs
ptab_mod[, p_int_ub := cumsum(p), by = list(i)]
ptab_mod[, p_int_lb := p_int_ub - p]
# rounding to have 'ndigits'
ndigits <- 8
ptab_mod[, p_int_lb := round(p_int_lb, ndigits)]
ptab_mod[, p_int_ub := round(p_int_ub, ndigits)]
# IMPORTANT step: Due to rounding errors, 'p' is replaced
# by the differences of the rounded intervals
ptab_mod[, p := p_int_ub - p_int_lb][]
# help variable: consecutive intervals with identical noise
ptab_mod[, diff:= c(NA, diff(v)), by = list(i)]
# meta information about consecutive intervals with identical noise
#ptab_mod[, sum(diff==0, na.rm = TRUE), by = list(i)]
cnt_cons_intervals <- ptab_mod[, sum(diff==0, na.rm = TRUE)]
if (cnt_cons_intervals > 0)
message(
paste0(
"There are ",
cnt_cons_intervals,
" consecutive intervals with identical noise. You can try another
'seed' or proceed. At least, check whether the modified ptable has
sufficiently rearranged sub-intervals."
)
)
ptab_mod[, c("diff", "order") := NULL][]
return(ptab_mod)
}
ptab_mod <- modify(ptab = ptab, seed = seed, threshold = threshold)
attr(ptab_mod, "intervals") <- "modified"
if (class(input)[1] == "ptable"){
ptab_obj_new@pTable <- ptab_mod
output <- ptab_obj_new
} else if (class(input)[1] == "ck_params") {
setnames(ptab_mod, "p_int_ub", "ub")
setnames(ptab_mod, "p_int_lb", "lb")
ck_obj_new$params$ptable <- ptab_mod
output <- ck_obj_new
} else {
output <- ptab_mod
}
message("\n NOTE for Tau-Argus:
Please use a new Tau-Argus Release (>= 4.2.3).")
return(output)
}
#' @rdname modify_cnt_ptable
#' @usage NULL
#' @export
modify_cnts_ptable <- modify_cnt_ptable
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.