#' Protect one table by suppressing cells with Tau-Argus
#'
#' The function prepares all the files needed by Tau-Argus and launches the
#' software with the good settings and gets back the result.
#'
#' @inheritParams tab_rda
#' @inheritParams tab_arb
#' @inheritParams run_arb
#'
#' @param files_name string used to name all the files needed to process.
#' All files will have the same name, only their extension will be different.
#' @param dir_name string indicated the path of the directory in which to save
#' all the files (.rda, .hst, .txt, .arb, .csv) generated by the function.
#' @param unif_labels boolean, if explanatory variables have to be standardized
#' @param split_tab `r lifecycle::badge("experimental")` boolean,
#' whether to reduce dimension to 3 while treating a table of dimension 4 or 5
#' (default to `FALSE`)
#' @param limit `r lifecycle::badge("experimental")` numeric, used to choose
#' which variable to merge (if nb_tab_option = 'smart')
#' and split table with a number of row above this limit in order to avoid
#' tauargus failures
#' @param nb_tab_option `r lifecycle::badge("experimental")` strategy to follow
#' to choose variables automatically while splitting:
#' \itemize{
#' \item{`"min"`: minimize the number of tables;}
#' \item{`"max"`: maximize the number of tables;}
#' \item{`"smart"`: minimize the number of tables under the constraint
#' of their row count.}
#' }
#' @param ... any parameter of the tab_rda, tab_arb or run_arb functions, relevant
#' for the treatment of tabular.
#'
#' @return
#' If output_type equals to 4 and split_tab = FALSE,
#' then the original tabular is returned with a new
#' column called Status, indicating the status of the cell coming from Tau-Argus :
#' "A" for a primary secret due to frequency rule, "B" for a primary secret due
#' to dominance rule, "D" for secondary secret and "V" for no secret cell.
#'
#' If split_tab = TRUE,
#' then the original tabular is returned with some new columns which are boolean
#' variables indicating the status of a cell at each iteration of the protection
#' process as we get with `tab_multi_manager()` function. `TRUE`
#' denotes a cell that have to be suppressed. The last column is then the
#' final status of the suppression process of the original table.
#'
#' If `split_tab = FALSE` and `output_type` doesn't equal to `4`,
#' then the raw result from tau-argus is returned.
#'
#' @section Standardization of explanatory variables and hierarchies:
#'
#' The boolean argument `unif_labels` is useful to
#' prevent some common errors in using Tau-Argus. Indeed, Tau-Argus needs that,
#' within a same level of a hierarchy, the labels have the same number of
#' characters. When the argument is set to TRUE, `tab_rtauargus`
#' standardizes the explanatory variables to prevent this issue.
#' Hierarchical explanatory variables (explanatory variables associated to
#' a hrc file) are then modified in the tabular data and an another hrc file is
#' created to be relevant with the tabular. In the output, these modifications
#' are removed.
#'
#' @examples
#'\dontrun{
#' library(dplyr)
#' data(turnover_act_size)
#'
#' # Prepare data with primary secret ----
#' turnover_act_size <- turnover_act_size %>%
#' mutate(
#' is_secret_freq = N_OBS > 0 & N_OBS < 3,
#' is_secret_dom = ifelse(MAX == 0, FALSE, MAX/TOT>0.85),
#' is_secret_prim = is_secret_freq | is_secret_dom
#' )
#'
#' # Make hrc file of business sectors ----
#' data(activity_corr_table)
#' hrc_file_activity <- activity_corr_table %>%
#' write_hrc2(file_name = "hrc/activity")
#'
#' # Compute the secondary secret ----
#' options(
#' rtauargus.tauargus_exe =
#' "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe"
#' )
#'
#' res <- tab_rtauargus(
#' tabular = turnover_act_size,
#' files_name = "turn_act_size",
#' dir_name = "tauargus_files",
#' explanatory_vars = c("ACTIVITY", "SIZE"),
#' hrc = c(ACTIVITY = hrc_file_activity),
#' totcode = c(ACTIVITY = "Total", SIZE = "Total"),
#' secret_var = "is_secret_prim",
#' value = "TOT",
#' freq = "N_OBS",
#' verbose = FALSE
#' )
#'
#' # Reduce dims feature
#'
#' data(datatest1)
#' res_dim4 <- tab_rtauargus(
#' tabular = datatest1,
#' dir_name = "tauargus_files",
#' explanatory_vars = c("A10", "treff","type_distrib","cj"),
#' totcode = rep("Total", 4),
#' secret_var = "is_secret_prim",
#' value = "pizzas_tot_abs",
#' freq = "nb_obs_rnd",
#' split_tab = TRUE
#' )
#' }
#' @export
tab_rtauargus <- function(
tabular,
explanatory_vars,
files_name = NULL,
dir_name = NULL,
totcode = getOption("rtauargus.totcode"),
hrc = NULL,
secret_var = NULL,
secret_no_pl = NULL,
cost_var = NULL,
value = "value",
freq = "freq",
ip = 10,
maxscore = NULL,
suppress = "MOD(1,5,1,0,0)",
safety_rules = paste0("MAN(",ip,")"),
show_batch_console = FALSE,
output_type = 4,
output_options = "",
unif_labels = TRUE,
split_tab = FALSE,
nb_tab_option = "smart",
limit = 14700,
...
){
.dots <- list(...)
## 0. CONFLITS PARAMETRES .................
# tabular not a data.frame
if(!is.data.frame(tabular)){
stop("tabular has to be a dataframe.")
}
if(any(!explanatory_vars %in% names(tabular))){
stop("At least one of the explanatory vars is not a tabular's column name")
}
if(any(!c(value, freq) %in% names(tabular))){
stop(paste0(value, " or ", freq, " is not a tabular's column name"))
}
if(!is.null(maxscore)){
if(!maxscore %in% names(tabular)){
stop(paste0(maxscore, " is not a tabular's column name"))
}
}
if(!is.null(cost_var)){
if(!cost_var %in% names(tabular)){
stop(paste0(cost_var, " is not a tabular's column name"))
}
}
if(!is.null(secret_var)){
if(!secret_var %in% names(tabular)){
stop(paste0(secret_var, " is not a tabular's column name"))
}
}
if(length(totcode) < length(explanatory_vars)){
stop("totcode must have the same length as explanatory_vars")
}
if(length(names(totcode)) < length(explanatory_vars)){
names(totcode) <- explanatory_vars
}
if(is.null(files_name)) files_name <- "targus_file"
if(is.null(dir_name)) dir_name <- getwd()
if (split_tab){
# detect secret_var = NULL
# We want to split the table but the primary secret have not been posed
if ( !grepl("MAN", safety_rules) ){
stop("While using split_tab = TRUE, you can't use tauargus to put primary secret")
}
if ( is.null(secret_var) ){
stop("While using split_tab = TRUE, a secret_var has to be provided")
}
# split_tab strategy only work with dimension 4 or 5 tables
if ( ! length(explanatory_vars) %in% c(4,5) ){
stop(
"You use split_tab = TRUE. However it only works with 4 or 5 dimensions
tables."
)
}
}
if (length(explanatory_vars) %in% c(4,5)){
if (split_tab){
params_rt4 <- formals(fun = "tab_rtauargus4")
params_rt4 <- params_rt4[1:(length(params_rt4)-1)]
call <- sys.call(); call[[1]] <- as.name('list')
new_params <- eval.parent(call)
for(param in intersect(names(params_rt4), names(new_params))){
params_rt4[[param]] <- new_params[[param]]
}
params_rt4$tabular <- tabular
params_rt4$totcode <- totcode
params_rt4$dir_name <- dir_name
params_rt4$files_name <- files_name
return(do.call("tab_rtauargus4", params_rt4))
} else {
message("Warning :
It is highly recommended to use split_tab = TRUE when using rtauargus with 4 or 5 dimensions tables.
It allows to split the table in several tables with 3 dimensions.
With split_tab = FALSE, tauargus treats the table in 4 or 5 dimensions.
In this case, the secondary secret may not being optimal according to tauargus itself
and the process may take longer.")
}
}
## 1. TAB_RDA .....................
tabular_original <- tabular
# uniformisation des chaines de caractères des variables catégorielles, hors total
# tabular ......................
if(unif_labels){
res_unif <- uniformize_labels(tabular, explanatory_vars, hrc, totcode)
tabular <- res_unif$data
if(!is.null(hrc)) hrc <- res_unif$hrc_unif
}
# parametres
param_tab_rda <- param_function(tab_rda, .dots)
param_tab_rda$tabular <- tabular
param_tab_rda$tab_filename <- file.path(dir_name, paste0(files_name, ".tab"))
param_tab_rda$rda_filename <- file.path(dir_name, paste0(files_name, ".rda"))
param_tab_rda$hst_filename <- if(is.null(secret_var) & is.null(cost_var)) NULL else file.path(dir_name, paste0(files_name, ".hst"))
param_tab_rda$explanatory_vars <- explanatory_vars
param_tab_rda$hrc <- hrc
param_tab_rda$totcode <- totcode
param_tab_rda$secret_var <- secret_var
param_tab_rda$secret_no_pl <- secret_no_pl
param_tab_rda$cost_var <- cost_var
param_tab_rda$value <- value
param_tab_rda$freq <- freq
param_tab_rda$ip <- ip
param_tab_rda$maxscore <- maxscore
# appel (+ récuperation noms tab hst et rda)
input <- do.call(tab_rda, param_tab_rda)
## 2. TAB_ARB .........................
# parametres
param_arb <- param_function(tab_arb, .dots)
param_arb$tab_filename <- input$tab_filename
param_arb$rda_filename <- input$rda_filename
param_arb$hst_filename <- input$hst_filename
param_arb$arb_filename <- file.path(dir_name, paste0(files_name, ".arb"))
param_arb$output_names <- file.path(dir_name, paste0(files_name, ".csv"))
#TODO : generaliser le choix de l'extension
param_arb$output_type <- output_type
param_arb$output_options <- output_options
param_arb$explanatory_vars <- explanatory_vars
param_arb$value <- value
param_arb$safety_rules <- safety_rules
param_arb$suppress <- suppress
# appel (+ récupération nom batch)
batch <- do.call(tab_arb, param_arb)
## 3. RUN_ARB ...........................
# parametres
param_run0 <- param_function(run_arb, .dots)
param_system <- param_function(system, .dots)
param_run <- c(param_run0, param_system)
param_run$arb_filename <- param_arb$arb_filename
param_run$logbook <- file.path(dir_name, paste0(files_name, ".txt"))
param_run$is_tabular <- TRUE
param_run$show_batch_console <- show_batch_console
# appel
res <- do.call(run_arb, param_run)
# RESULTAT .............................
if(output_type == 4){
res_import <- utils::read.csv(
param_arb$output_names,
header = FALSE,
col.names = c(explanatory_vars, value, freq, "Status","Dom"),
colClasses = c(rep("character", length(explanatory_vars)), rep("numeric",2), "character", "numeric"),
stringsAsFactors = FALSE,
na.strings = ""
)
if(unif_labels){
res_import <- cbind.data.frame(
apply(res_import[,explanatory_vars,drop=FALSE], 2, rev_var_pour_tau_argus),
res_import[, !names(res_import) %in% explanatory_vars]
)
}
mask <- merge(tabular_original, res_import[,c(explanatory_vars,"Status")], by = explanatory_vars, all = TRUE)
utils::write.csv(
res_import,
file = param_arb$output_names,
row.names = FALSE
)
return(mask)
}else{
if(unif_labels){
res <- cbind.data.frame(
apply(res[,explanatory_vars,drop=FALSE], 2, rev_var_pour_tau_argus),
res[, !names(res) %in% explanatory_vars]
)
}
return(res)
}
}
#' Wrapper of tab_rtauargus adapted for `tab_multi_manager` function.
#'
#' @inheritParams tab_rtauargus
#' @param ip Interval Protection Level (10 by default)
#' @param ... Other arguments of `tab_rtauargus` function
#'
#' @return
#' The original tabular is returned with a new
#' column called Status, indicating the status of the cell coming from Tau-Argus :
#' "A" for a primary secret due to frequency rule, "B" for a primary secret due
#' to dominance rule, "D" for secondary secret and "V" for no secret cell.
#'
#' @seealso `tab_rtauargus`
#'
#' @export
#'
#' @examples
#'\dontrun{
#' library(dplyr)
#' data(turnover_act_size)
#'
#' # Prepare data with primary secret ----
#' turnover_act_size <- turnover_act_size %>%
#' mutate(
#' is_secret_freq = N_OBS > 0 & N_OBS < 3,
#' is_secret_dom = ifelse(MAX == 0, FALSE, MAX/TOT>0.85),
#' is_secret_prim = is_secret_freq | is_secret_dom
#' )
#'
#' # Make hrc file of business sectors ----
#' data(activity_corr_table)
#' hrc_file_activity <- activity_corr_table %>%
#' write_hrc2(file_name = "hrc/activity")
#'
#' # Compute the secondary secret ----
#' options(
#' rtauargus.tauargus_exe =
#' "Y:/Logiciels/TauArgus/TauArgus4.2.3/TauArgus.exe"
#' )
#'
#' res <- tab_rtauargus2(
#' tabular = turnover_act_size,
#' files_name = "turn_act_size",
#' dir_name = "tauargus_files",
#' explanatory_vars = c("ACTIVITY", "SIZE"),
#' hrc = c(ACTIVITY = hrc_file_activity),
#' totcode = c(ACTIVITY = "Total", SIZE = "Total"),
#' secret_var = "is_secret_prim",
#' value = "TOT",
#' freq = "N_OBS"
#' )
#'
#' # reduce dimensions feature
#' data(datatest1)
#' res_dim4 <- tab_rtauargus2(
#' tabular = datatest1,
#' dir_name = "tauargus_files",
#' explanatory_vars = c("A10", "treff","type_distrib","cj"),
#' totcode = rep("Total", 4),
#' secret_var = "is_secret_prim",
#' value = "pizzas_tot_abs",
#' freq = "nb_obs_rnd",
#' split_tab = TRUE
#' )
#' }
tab_rtauargus2 <- function(
tabular,
files_name = NULL,
dir_name = NULL,
explanatory_vars,
totcode,
hrc = NULL,
secret_var = NULL,
secret_no_pl = NULL,
cost_var = NULL,
value = "value",
freq = "freq",
ip = 10,
suppress = "MOD(1,5,1,0,0)",
split_tab = TRUE,
nb_tab_option = "smart",
limit = 14700,
...
){
.dots = list(...)
params <- param_function(tab_rtauargus, .dots)
params$tabular = tabular
params$files_name = files_name
params$dir_name = dir_name
params$explanatory_vars = explanatory_vars
params$totcode = totcode
params$hrc = hrc
params$secret_var = secret_var
params$secret_no_pl = secret_no_pl
params$cost_var = cost_var
params$value = value
params$freq = freq
if(is.null(ip)){
if(!"safety_rules" %in% names(params)){
stop("Either ip or safety_rules has to be set.")
}
}else{
params$safety_rules = "MAN(0)" #paste0("MAN(",ip,")")
}
params$ip = ip
params$suppress = suppress
params$split_tab = if(length(explanatory_vars) > 3) split_tab else FALSE
params$dir_name = if(params$split_tab) file.path(dir_name, files_name) else dir_name
params$nb_tab_option = nb_tab_option
params$limit = limit
params$show_batch_console = FALSE
params$output_type = 4
params$output_options = ""
params$unif_labels = TRUE
params$separator = ","
params$verbose = FALSE
res <- do.call("tab_rtauargus", params)
if(params$split_tab & length(explanatory_vars) > 3){
vars_secret_iteration <-
names(res)[grepl("^is_secret_[1-9].*$", names(res), perl=TRUE)]
last_iteration <- names(res)[length(names(res))]
res$Status <- ifelse(res[[secret_var]], "B",
ifelse(res[[last_iteration]], "D", "V")
)
res <- res[,c(setdiff(names(res), vars_secret_iteration))]
}
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.