restore_format: Function to reverse the process of dimension reduction

View source: R/sp_restore_format.R

restore_formatR Documentation

Function to reverse the process of dimension reduction

Description

Function to reverse the process of dimension reduction

Usage

restore_format(masq, res)

Arguments

masq

a list of data.frames on which the secret has been applied

res

the result of the dimension reduction function (to retrieve the merged variables) and the separator (sep).

Value

the original dataframe with 4 or 5 dimensions

Examples

# Examples with dimension 4
library(dplyr)
data <- expand.grid(
  ACT = c("Total", "A", "B", "A1", "A2","A3", "B1", "B2","B3","B4","C",
          "name_non_changed_vars","E","F","G","B5"),
  GEO = c("Total", "G1", "G2"),
  SEX = c("Total", "F", "M"),
  AGE = c("Total", "AGE1", "AGE2"),
  stringsAsFactors = FALSE
) %>%
  as.data.frame()

data <- data %>% mutate(VALUE = 1)

hrc_act <- "hrc_ACT.hrc"

sdcHierarchies::hier_create(
  root = "Total",
  nodes = c("A","B","C","name_non_changed_vars","E","F","G")
) %>%
  sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2","A3")) %>%
  sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2","B3","B4","B5")) %>%
  sdcHierarchies::hier_convert(as = "argus") %>%
  slice(-1) %>%
  mutate(levels = substring(paste0(level,name),3)) %>%
  select(levels) %>%
  write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE)

# Result of the function by forcing some variables to be merged
res_red_dim <- reduce_dims(
  dfs = data,
  dfs_name = "tab",
  totcode = c(SEX="Total",AGE="Total", GEO="Total", ACT="Total"),
  hrcfiles = c(ACT = hrc_act),
  sep_dir = TRUE,
  hrc_dir = "output",
  vars_to_merge = c("ACT","GEO")
)

res1 <- restore_format(masq = res_red_dim$tabs, res = res_red_dim)
dim(setdiff(res1,data))[1] == 0

# return TRUE
# We have exactly the sames cases in the datatable after splitting and unsplitting data

# Exemple dimension 5

data <- expand.grid(
  ACT = c("Total_A", paste0("A", seq(1,5),"_"),paste0("A1_", seq(1,7)),
          paste0("A2_", seq(1,9))),
  GEO = c("Total_G", "GA", "GB", "GA1", "GA2", "GB1", "GB2","GA3","GB3","GB4"),
  SEX = c("Total_S", "F", "M","F1","F2","M1","M2"),
  AGE = c("Ensemble", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"),
  ECO = c("PIB","Ménages","Entreprises"),
  stringsAsFactors = FALSE,
  KEEP.OUT.ATTRS = FALSE
) %>%
  as.data.frame()

data <- data %>% mutate(VALUE = 1:n())

hrc_act <- "hrc_ACT.hrc"
sdcHierarchies::hier_create(root = "Total_A", nodes = paste0("A", seq(1,5),"_")) %>%
  sdcHierarchies::hier_add(root = "A1_", nodes = paste0("A1_", seq(1,7))) %>%
  sdcHierarchies::hier_add(root = "A2_", nodes = paste0("A2_", seq(1,9))) %>%
  sdcHierarchies::hier_convert(as = "argus") %>%
  slice(-1) %>%
  mutate(levels = substring(paste0(level,name),3)) %>%
  select(levels) %>%
  write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE)

hrc_geo <- "hrc_GEO.hrc"
sdcHierarchies::hier_create(root = "Total_G", nodes = c("GA","GB")) %>%
  sdcHierarchies::hier_add(root = "GA", nodes = c("GA1","GA2","GA3")) %>%
  sdcHierarchies::hier_add(root = "GB", nodes = c("GB1","GB2","GB3","GB4")) %>%
  sdcHierarchies::hier_convert(as = "argus") %>%
  slice(-1) %>%
  mutate(levels = substring(paste0(level,name),3)) %>%
  select(levels) %>%
  write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE)

# function's result

res_red_dim <- reduce_dims(
  dfs = data,
  dfs_name = "tab",
  totcode = c(SEX="Total_S",AGE="Ensemble", GEO="Total_G", ACT="Total_A", ECO = "PIB"),
  hrcfiles = c(ACT = hrc_act, GEO = hrc_geo),
  sep_dir = TRUE,
  hrc_dir = "output"
)

res2 <- restore_format(masq = res_red_dim$tabs, res = res_red_dim)

InseeFrLab/rtauargus documentation built on Feb. 25, 2025, 6:32 a.m.