View source: R/sp_reduce_dims.R
reduce_dims | R Documentation |
General function that selects the appropriate separator and applies dimension reduction.
reduce_dims(
dfs,
dfs_name,
totcode,
hrcfiles = NULL,
sep_dir = FALSE,
hrc_dir = "hrc_alt",
vars_to_merge = NULL,
nb_tab_option = "min",
limit = NULL,
over_split = FALSE,
vec_sep = c("___", "_XXX_", "_YYY_", "_TTT_", "_UVW_"),
verbose = FALSE
)
dfs |
data.frame with 4 or 5 categorical variables |
dfs_name |
name of the data.frame in the list provided by the user |
totcode |
named vector of totals for categorical variables |
hrcfiles |
named vector indicating the hrc files of hierarchical variables among the categorical variables of dfs |
sep_dir |
allows forcing the writing of hrc into a separate folder, default is FALSE |
hrc_dir |
folder to write hrc files if writing to a new folder is forced or if no folder is specified in hrcfiles |
vars_to_merge |
NULL or vector of variables to be merged: 2 in dimension 4; 3 or 4 in dimension 5 |
nb_tab_option |
strategy to follow for choosing variables automatically:
|
limit |
maximum allowed number of rows in the smart or over_split = TRUE case |
over_split |
indicates if we split in several tables the tables bigger than limit at the end of the reduction process ; it decreases the number of hierarchy of these tables |
vec_sep |
vector of candidate separators to use |
verbose |
print the different steps of the function to inform the user of progress |
A list containing:
tabs
: named list of 3-dimensional dataframes
with nested hierarchies
alt_hrc
: named list of hrc specific to the variables created
during merging to go to dimension 3
alt_totcode
: named list of totals specific to the variables
created during merging to go to dimension 3
vars
: categorical variables of the output dataframes
sep
: separator used to link the variables
totcode
: named vector of totals for all categorical variables
hrcfiles
: named vector of hrc for categorical variables
(except the merged one)
fus_vars
: named vector of vectors representing the merged
variables during dimension reduction
library(dplyr)
# Examples for dimension 4
data <- expand.grid(
ACT = c("Total", "A", "B", "A1", "A2","A3", "B1",
"B2","B3","B4","C","D","E","F","G","B5"),
GEO = c("Total", "G1", "G2"),
SEX = c("Total", "F", "M"),
AGE = c("Total", "AGE1", "AGE2"),
stringsAsFactors = FALSE
) %>%
as.data.frame() %>%
mutate(VALUE = 1)
if(!dir.exists("hrc")) dir.create("hrc")
hrc_act <- "hrc/hrc_ACT4.hrc"
sdcHierarchies::hier_create(
root = "Total",
nodes = c("A","B","C","D","E","F","G")
) %>%
sdcHierarchies::hier_add(root = "A", nodes = c("A1","A2","A3")) %>%
sdcHierarchies::hier_add(root = "B", nodes = c("B1","B2","B3","B4","B5")) %>%
sdcHierarchies::hier_convert(as = "argus") %>%
slice(-1) %>%
mutate(levels = substring(paste0(level,name),3)) %>%
select(levels) %>%
write.table(
file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE
)
# Reduce dim by forcing variables to be merged
res1 <- reduce_dims(
dfs = data,
dfs_name = "tab",
totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"),
hrcfiles = c(ACT = hrc_act),
sep_dir = TRUE,
vars_to_merge = c("ACT", "GEO"),
hrc_dir = "output",
verbose = TRUE
)
# Split the output in order to be under the limit & forcing variables to be merged
res1b <- reduce_dims(
dfs = data,
dfs_name = "tab",
totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"),
hrcfiles = c(ACT = hrc_act),
sep_dir = TRUE,
hrc_dir = "output",
nb_tab_option = 'smart',
over_split = TRUE,
verbose = TRUE,
limit = 100
)
# Result of the function (minimizes the number of created tables by default)
res2 <- reduce_dims(
dfs = data,
dfs_name = "tab",
totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"),
hrcfiles = c(ACT = hrc_act),
sep_dir = TRUE,
hrc_dir = "output",
verbose = TRUE
)
# Result of the function (maximize the number of created tables)
res3 <- reduce_dims(
dfs = data,
dfs_name = "tab",
totcode = c(SEX = "Total", AGE = "Total", GEO = "Total", ACT = "Total"),
hrcfiles = c(ACT = hrc_act),
sep_dir = TRUE,
hrc_dir = "output",
nb_tab_option = "max",
verbose = TRUE
)
# Example for dimension 5
data <- expand.grid(
ACT = c("Total_A", paste0("A", seq(1,5),"_"),paste0("A1_", seq(1,7)),paste0("A2_", seq(1,9))),
GEO = c("Total_G", "GA", "GB", "GA1", "GA2", "GB1", "GB2","GA3","GB3","GB4"),
SEX = c("Total_S", "F", "M","F1","F2","M1","M2"),
AGE = c("Ensemble", "AGE1", "AGE2", "AGE11", "AGE12", "AGE21", "AGE22"),
ECO = c("PIB","Ménages","Entreprises"),
stringsAsFactors = FALSE,
KEEP.OUT.ATTRS = FALSE
) %>%
as.data.frame() %>%
mutate(VALUE = 1:n())
hrc_act <- "hrc/hrc_ACT5.hrc"
sdcHierarchies::hier_create(root = "Total_A", nodes = paste0("A", seq(1,5),"_")) %>%
sdcHierarchies::hier_add(root = "A1_", nodes = paste0("A1_", seq(1,7))) %>%
sdcHierarchies::hier_add(root = "A2_", nodes = paste0("A2_", seq(1,9))) %>%
sdcHierarchies::hier_convert(as = "argus") %>%
slice(-1) %>%
mutate(levels = substring(paste0(level,name),3)) %>%
select(levels) %>%
write.table(file = hrc_act, row.names = FALSE, col.names = FALSE, quote = FALSE)
hrc_age <- "hrc/hrc_AGE5.hrc"
sdcHierarchies::hier_create(root = "Ensemble", nodes = c("AGE1", "AGE2")) %>%
sdcHierarchies::hier_add(root = "AGE1", nodes = c("AGE11", "AGE12")) %>%
sdcHierarchies::hier_add(root = "AGE2", nodes = c("AGE21", "AGE22")) %>%
sdcHierarchies::hier_convert(as = "argus") %>%
slice(-1) %>%
mutate(levels = substring(paste0(level,name),3)) %>%
select(levels) %>%
write.table(file = hrc_age, row.names = FALSE, col.names = FALSE, quote = FALSE)
hrc_geo <- "hrc/hrc_GEO5.hrc"
sdcHierarchies::hier_create(root = "Total_G", nodes = c("GA","GB")) %>%
sdcHierarchies::hier_add(root = "GA", nodes = c("GA1","GA2","GA3")) %>%
sdcHierarchies::hier_add(root = "GB", nodes = c("GB1","GB2","GB3","GB4")) %>%
sdcHierarchies::hier_convert(as = "argus") %>%
slice(-1) %>%
mutate(levels = substring(paste0(level,name),3)) %>%
select(levels) %>%
write.table(file = hrc_geo, row.names = FALSE, col.names = FALSE, quote = FALSE)
# Results of the function
res4 <- reduce_dims(
dfs = data,
dfs_name = "tab",
totcode = c(SEX = "Total_S", AGE = "Ensemble", GEO = "Total_G", ACT = "Total_A", ECO = "PIB"),
hrcfiles = c(ACT = hrc_act, GEO = hrc_geo, AGE = hrc_age),
sep_dir = TRUE,
hrc_dir = "output",
verbose = TRUE
)
res5 <- reduce_dims(
dfs = data,
dfs_name = "tab",
totcode = c(SEX = "Total_S", AGE = "Ensemble", GEO = "Total_G", ACT = "Total_A", ECO = "PIB"),
hrcfiles = c(ACT = hrc_act, GEO = hrc_geo),
sep_dir = TRUE,
hrc_dir = "output",
nb_tab_option = 'smart',
limit = 1300,
verbose = TRUE
)
res6 <- reduce_dims(
dfs = data,
dfs_name = "tab",
totcode = c(SEX = "Total_S", AGE = "Ensemble", GEO = "Total_G", ACT = "Total_A", ECO = "PIB"),
hrcfiles = c(ACT = hrc_act, GEO = hrc_geo),
sep_dir = TRUE,
hrc_dir = "output",
nb_tab_option = 'min',
verbose = TRUE,
limit = 4470,
over_split = TRUE
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.