#' Transform Structured Data Lists into a Tidy Data Frame for Plotting
#'
#' This function takes a nested list of data (with up to three possible structures)
#' and transforms it into a tidy `data.frame` suitable for visualization in LISSY.
#' It supports three structures, typically outputs by weighted summary functions in lissyrtools such as:
#' `run_weighted_mean()`, `run_weighted_percentiles()`, and `run_weighted_count()`.
#'
#' @param data_list A named list containing the data to be transformed. The structure and naming
#' conventions of this list determine how the data is processed.
#'
#' - **1st structure:** List with country names as keys and named vectors of year-values.
#' Expected when `names(data_list)` are country codes matching `get_countries_lis()` or `get_countries_lws()`.
#'
#' - **2nd structure:** List with `ccyy` abbreviations as keys, and named numeric/integer vectors, where names represent categories.
#' Typically output from functions grouped by a categorical variable, e.g. `"educ"`.
#'
#' - **3rd structure:** List with `ccyy` abbreviations as keys, each containing sublists of named vectors.
#' Represents more complex summaries with multiple grouping variables, percentiles, or shares.
#'
#' @return A tidy `data.frame` with the following columns (depending on input structure):
#' - `cname`: Country name.
#' - `year`: Year.
#' - `dname`: Dataset country-year code in the `ccyy` form.
#' - `category`: (2nd and 3rd structure) Grouping variable categories.
#' - `share` / `percentile` / `by_var`: (3rd structure) Variable describing the vector names.
#' - `value`: Numeric values from the input list.
#'
#' @details
#' Depending on the summary statistics computed before, the function renames one of the columns in the 3rd structure based on pattern matching:
#' - If the values contains "%" and "-", the column is renamed to `share`.
#' - If the values contains "%", renamed to `percentile`.
#' - Otherwise, renamed to `by_var`.
#'
#' @export
#'
#' @examples
#' \dontrun{
#'
#' library(ggplot2)
#' library(lissyrtools)
#' library(RColorBrewer)
#' library(ggthemes)
#' library(purrr)
#' library(forcats)
#'
#' data <- lissyrtools::lissyuse(data = c("es", "de"), vars = c("dhi", "educ", "pi11", "rural"), from = 2016)
#'
#' # Example usage for 1st structure
#' weighted_means <- run_weighted_mean(data, "pi11")
#' df1 <- structure_to_plot(weighted_means)
#'
#' # Example usage for 2nd structure
#' weighted_means_educ <- run_weighted_mean(data, "pi11", by = "educ")
#' df2 <- structure_to_plot(weighted_means_educ)
#'
#' # Example usage for 3rd structure
#' weighted_percentiles <- run_weighted_percentiles(data, "pi11", by = "educ")
#' df3 <- structure_to_plot(weighted_percentiles)
#'
#' # Example usage for 3rd structure but with shares
#' weighted_percentiles <- run_weighted_percentiles(data, "pi11", by = "educ", share = TRUE)
#' df3 <- structure_to_plot(weighted_percentiles)
#'
#'
#'
#' # Chart example: Plotting education group shares over years by country
#'
#'
#' run_weighted_count(data, "educ", percent = TRUE, na.rm = TRUE) %>%
#' structure_to_plot() %>%
#' ggplot(aes(x = year, y = value, color = cname, group = interaction(cname, category))) +
#' geom_line(linewidth = 1.2) +
#' geom_point(size = 0.6) +
#' scale_color_stata() +
#' labs(
#' x = "Year",
#' y = "Share of Education Group (%)",
#' color = "Country"
#' ) +
#' expand_limits(y = 0) +
#' facet_grid(~fct_relevel(category, 'low', 'medium', 'high')) +
#' theme_bw() +
#' theme(axis.text.x = element_text(angle = 25, hjust = 1))
#'
#'
#' # Another example: plotting weighted mean of 'dhi' over years by country
#' data %>%
#' map(~ .x %>% filter(relation == 1000) %>% mutate(new_wgt = nhhmem * hwgt)) %>%
#' apply_ppp_adjustment("dhi", "lis", "lisppp") %>%
#' run_weighted_mean("dhi", "new_wgt") %>%
#' structure_to_plot() %>%
#' ggplot(aes(x = year, y = value, color = cname, group = cname)) +
#' geom_point() +
#' geom_line() +
#' labs(
#' title = "dhi trend",
#' caption = "Source: Luxembourg Income Study"
#' ) +
#' scale_color_stata() +
#' scale_y_continuous(labels = scales::comma) +
#' theme_minimal() +
#' theme(axis.text.x = element_text(angle = 25, hjust = 1))
#' }
structure_to_plot <- function(data_list) {
# data <- lissyrtools::lissyuse(data = c("es", "de"), vars = c("dhi", "educ", "pi11", "rural"),from = 2016)
#
# 1st structure:
# - run_weighted_mean(data, "pi11")
#
# - run_weighted_percentiles(data, "dhi" , probs = 0.5)
#
#
# 2nd structure:
# - run_weighted_mean(data, "pi11", by = "educ")
#
# - run_weighted_percentiles(data, "dhi" , probs = 0.5, share = TRUE)
# - run_weighted_percentiles(data, "dhi" , probs = seq(0.1, 0.9, 0.1))
#
# - run_weighted_count(data, "educ", na.rm = TRUE)
# - run_weighted_count(data, "educ", na.rm = TRUE, percent = TRUE)
#
#
# 3rd structure:
# - run_weighted_percentiles(data, "pi11", by = "educ")
# - run_weighted_percentiles(data, "pi11", by = "educ", share = TRUE)
#
# - run_weighted_count(data, "educ", by = "rural", na.rm = TRUE)
# - run_weighted_count(data, "educ", by = "rural", na.rm = TRUE, percent = TRUE)
# 1st structure
if (all(names(data_list) %in% c(names(get_countries_lis()), names(get_countries_lws())))) {
result_df <- list_rbind(purrr::imap(
data_list ,
~ tibble::enframe(.x, name = "year", value = "value") %>% mutate(cname = .y)
)) %>%
mutate(cc = get_countries_lis()[cname],
yy = stringr::str_sub(year, 3, 4),
dname = paste0(cc, yy),
year = as.integer(year)) %>%
select(cname, year, dname, value)
# 2nd structure
} else if (
all(length(names(data_list) == 4)) &&
all(purrr::map_chr(data_list, ~ class(.x)[1]) %in% c("numeric", "integer"))
) {
result_df <- list_rbind(purrr::imap(
data_list ,
~ tibble::enframe(.x, name = "category", value = "value") %>% mutate(dname = .y)
)) %>%
mutate(cname = ccyy_to_cname(dname), year = ccyy_to_yyyy(dname),
category = stringr::str_remove(category, "^\\[\\d+\\]"),
year = as.integer(year)) %>%
select(cname, year, dname, category, value)
# 3rd structure
} else if (all(length(names(data_list) == 4) && all(purrr::map_chr(data_list, ~ class(.x)[1]) == "list"))) {
result_df <- list_rbind(purrr::imap(data_list, ~ {
outer_name <- .y
list_rbind(purrr::imap(.x, function(sublist, subgroup) { # subgroup would be the categorical variable in run_weighted_mean or run_weighted_percentiles
tibble::enframe(sublist, name = "vector_names", value = "value") %>% # vector_names: could be percentiles, shares, or the by var in run_weighted_count
mutate(dname = outer_name,
category = stringr::str_remove(subgroup, "^\\[\\d+\\]"),
name = stringr::str_remove(vector_names, "^\\[\\d+\\]"))
}))
})) %>%
mutate(cname = ccyy_to_cname(dname), year = as.integer(ccyy_to_yyyy(dname))) %>%
select(cname, year, dname, category, name, value)
# Rename the `name` column accordingly, specifically in the 3rd structure
first_value <- result_df$name[1]
if (stringr::str_detect(first_value, "%") & stringr::str_detect(first_value, "-")) {
names(result_df)[names(result_df) == "name"] <- "share"
} else if (stringr::str_detect(first_value, "%")) {
names(result_df)[names(result_df) == "name"] <- "percentile"
} else {
names(result_df)[names(result_df) == "name"] <- "by_var"
}
}
cat("The resulting data frame's column names are:\n")
print(names(result_df))
return(result_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.