knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", out.width = "100%" ) knitr::opts_chunk$set(message = F, warning = F)
note: see original discussion here: https://evamaerey.github.io/mytidytuesday/2022-02-14-tables/tables.html and thoughtful contributions from @shannonpileggi and @brshallow https://github.com/EvaMaeRey/mytidytuesday/issues/3
And, you know, I'd get a dataset. And, in my head I could very clearly kind of picture, I want to put this on the x-axis. Let's put this on the y-axis, draw a line, put some points here, break it up by this variable. And then, like, getting that vision out of my head, and into reality, it's just really, really hard. Just, like, felt harder than it should be. Like, there's a lot of custom programming involved, where I just felt, like, to me, I just wanted to say, like, you know, this is what I'm thinking, this is how I'm picturing this plot. Like you're the computer 'Go and do it'. ... and I'd also been reading about the Grammar of Graphics by Leland Wilkinson, I got to meet him a couple of times and ... I was, like, this book has been, like, written for me. - https://www.trifacta.com/podcast/tidy-data-with-hadley-wickham/
library(ggplot2) StatSum$default_aes <- aes(label = after_stat(n)) # I want to put this on the x-axis (cols) tidytitanic::tidy_titanic |> ggplot( # I want to put this on the x-axis (cols) aes(x = sex, # I want to put this on the y- axis (rows) y = survived) ) # grouping and computation happen in one step, filling in 'table' last_plot() + stat_sum(geom = "text")
Under the hood:
But API:
data_filter <- function(data, filter = TRUE){ data <- data |> dplyr::filter({{filter}}) data } data_define_value <- function(data, value = NULL, wt = NULL){ value_quo <- rlang::enquo(value) wt_quo <- rlang::enquo(wt) if(rlang::quo_is_null(value_quo) ){ ## adding a value as 1 if there is none data <- data |> dplyr::mutate(value = 1) }else{ data <- data |> dplyr::mutate(value = {{value}}) } #### weighting #### if(!rlang::quo_is_null(wt_quo) ){ data <- data |> dplyr::mutate(value = .data$value * {{wt}}) } data } data_to_grouped <- function(data, cols, rows){ ### grouping by tabulation vars col and row data |> dplyr::group_by(dplyr::across(c({{cols}}, {{rows}})), .drop = TRUE) } data_grouped_to_summarized <- function(data, fun = NULL){ if(is.null(fun)) {fun <- sum} ## adding a value as 1 if there is none ### summarizing #### data |> dplyr::summarise(summary = fun(.data$value)) } data_summarized_to_proportioned <- function(data, prop = F, percent = F, within = NULL, round = 2){ # proportion case or percent within_quo <- rlang::enquo(within) # totals_within_quo <- rlang::enquo(totals_within) if(is.null(prop)) {prop <- FALSE} if(prop|percent){ mult <- ifelse(percent, 100, 1) if(is.null(round)){round <- ifelse(percent, 1, 3)} # prop is across all data if(rlang::quo_is_null(within_quo) ){ data <- data |> dplyr::ungroup() |> dplyr::mutate(prop = round(.data$summary*mult/sum(.data$summary), round)) # prop is within categories specified by within variable }else{ data <- data |> dplyr::ungroup() |> dplyr::group_by(dplyr::across(c({{within}})), .drop = FALSE) |> dplyr::mutate(prop = round(.data$summary*mult/sum(.data$summary), round)) } } if(prop|percent){data$display <- data$prop}else{data$display <- data$summary} data } data_proportioned_to_pivoted <- function(data, pivot = TRUE, cols = NULL){ cols_quo <- rlang::enquo(cols) tidy <- data |> dplyr::ungroup() # do not pivot if argument pivot false or if no columns specified if(pivot == F){ tidy # otherwise pivot by columns }else if(rlang::quo_is_null(cols_quo) & pivot){ tidy <- tidy |> dplyr::select(-summary) if(!is.null(data$prop)|!is.null(data$percent)){ tidy <- tidy |> dplyr::select(-prop) } tidy |> dplyr::rename(value = display) } else if(!rlang::quo_is_null(cols_quo) & pivot){ # keep only display column, and tabulation vars tidy <- tidy |> dplyr::select(-summary) if(!is.null(data$prop)|!is.null(data$percent)){ tidy <- tidy |> dplyr::select(-prop) } tidy |> tidyr::pivot_wider(names_from = {{cols}}, values_from = display) } }
tidytitanic::flat_titanic |> dplyr::filter(freq > 30) |> data_define_value(value = freq) |> data_to_grouped(rows = survived, cols = sex) |> data_grouped_to_summarized() |> data_summarized_to_proportioned(percent = T, within = survived) |> data_proportioned_to_pivoted(cols = sex)
pivotr <- function(data, rows = NULL, cols = NULL, value = NULL, wt = NULL, fun = NULL, filter = TRUE, prop = FALSE, percent = FALSE, round = NULL, within = NULL, pivot = TRUE ){ data |> data_filter({{filter}}) |> data_define_value(value = {{value}}, wt = {{wt}}) |> data_to_grouped(rows = {{rows}}, cols = {{cols}}) |> data_grouped_to_summarized(fun = fun) |> data_summarized_to_proportioned(prop = prop, percent = percent, within = {{within}}, round = round) |> data_proportioned_to_pivoted(pivot = pivot, cols = {{cols}}) }
tidytitanic::flat_titanic |> pivotr(value = freq, rows = survived, cols = sex, percent = T, within = survived) tidytitanic::flat_titanic |> pivotr(value = freq, rows = survived, cols = sex, filter = sex == "Female")
library(tidytitanic) tidy_titanic |> pivotr() tidy_titanic |> pivotr(rows = sex, cols = survived) tidy_titanic |> pivotr(rows = c(sex, age), cols = survived) tidy_titanic |> pivotr(rows = sex, cols = survived, pivot = F) flat_titanic |> pivotr(rows = sex, value = freq, prop = TRUE) flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE) flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE, within = sex)
https://evamaerey.github.io/mytidytuesday/2024-07-02-s3-tables/s3-tables-tidypivot.html
new_tidypivot <- function(data = data.frame(), rows = NULL, cols = NULL, value = NULL, wt = NULL, fun = NULL, filter = TRUE, prop = FALSE, percent = FALSE, round = NULL, within = NULL, pivot = TRUE) { # table specification components ! tp <- list( data = data, rows = rows, cols = cols, value = value, wt = wt, fun = fun, prop = prop, percent = percent, round = round, within = within, pivot = pivot # more 'slots' to be added ) # declare class 'tidypivot' class(tp) <- "tidypivot" # Return the created object invisible(tp) } print.tidypivot <- function(tp){ print(do.call(pivotr, tp)) invisible(tp) } #' @export ggtable <- function(data = NULL){ # thedata <<- data # don't love this data <- data %||% data.frame() tp <- new_tidypivot() tp$data <- data last_tp <<- tp tp } #' @export last_table <- function(){ last_tp }
ggtable() tidytitanic::tidy_titanic |> head() ggtable(tidytitanic::tidy_titanic) last_table()
#' @export set_rows <- function(tp, rows = NULL){ tp$rows <- enquo(rows) last_tp <<- tp tp } #' @export set_cols <- function(tp, cols = NULL){ tp$cols <- enquo(cols) last_tp <<- tp tp } #' @export set_filter <- function(tp, filter = TRUE){ if(!filter){tp$filter <- !!rlang::enquo(filter)} last_tp <<- tp tp }
ggtable(tidytitanic::tidy_titanic) |> set_rows(sex) |> set_cols(survived) |> set_filter(TRUE) # last_table |> # set_filter(sex == "Female")
#' @export set_fun <- function(tp, fun = sum){ tp$fun <- fun last_tp <<- tp tp } #' @export set_value <- function(tp, value = NULL){ tp$value <- enquo(value) last_tp <<- tp tp } #' @export set_wt <- function(tp, wt = NULL){ tp$wt <- enquo(wt) last_tp <<- tp tp } #' @export set_weight <- function(tp, weight = NULL){ tp$weight <- enquo(weight) last_tp <<- tp tp } #' @export set_prop <- function(tp, within = NULL){ tp$percent <- FALSE tp$prop <- TRUE tp$within <- enquo(within) last_tp <<- tp tp } #' @export set_percent <- function(tp, within = NULL){ tp$prop <- FALSE tp$percent <- TRUE tp$within <- enquo(within) last_tp <<- tp tp } #' @export set_within <- function(tp, within = NULL){ tp$within <- enquo(within) last_tp <<- tp tp } #' @export no_pivot <- function(tp){ tp$pivot <- FALSE last_tp <<- tp tp } collect <- function(tp){ do.call(pivotr, tp) }
tidytitanic::flat_titanic ggtable(tidytitanic::flat_titanic) |> set_value(freq) |> set_rows(sex) |> set_cols(survived) last_table() |> set_percent() last_table() |> set_prop() last_table() |> set_prop(within = sex) # a null table... ggtable(tidytitanic::flat_titanic) |> set_value(NA) |> set_rows(sex) ggtable(tidytitanic::flat_titanic) |> set_wt(freq) |> set_rows(sex) |> set_cols(survived) last_table() |> set_fun(mean) last_table() |> no_pivot() last_table() |> collect()
# knitrExtra::chunk_names_get() knitrExtra::chunk_to_dir("helpers") knitrExtra::chunk_to_dir("pivotr") knitrExtra::chunk_to_dir("piping")
library(tidyverse) library(tidypivot) ext_exports <- read_csv("https://raw.githubusercontent.com/EvaMaeRey/mytidytuesday/refs/heads/main/2024-11-19-gg-prefixes/exported_funs_exts_ggplot2_tidyverse_org.csv") %>% mutate(prefix = str_extract(fun_exported, ".*?_")) %>% mutate(prefix_long = str_extract(fun_exported, ".+_")) %>% mutate(ind_classic_prefix = prefix %in% c("stat_", "geom_", "theme_", "scale_", "coord_", "facet_")) ggtable(ext_exports) ggtable(ext_exports |> filter(ind_classic_prefix)) last_table() |> set_rows(user) last_table() |> set_cols(prefix) # last_table() |> # set_rows(c(user, repo)) read_csv("https://raw.githubusercontent.com/EvaMaeRey/mytidytuesday/refs/heads/main/2024-12-10-ggplot2-layer-composition/ggplot2_exported_layer_fun_composition.csv") %>% rename(prefix = fun_prefix) -> ggplot2_layers_definers ggplot2_layers_definers |> ggtable() last_table() |> set_rows(type) last_table() |> set_rows(type) |> set_cols(default_or_fixed) |> set_rows(c(prefix, type))
devtools::check() devtools::install(pkg = ".", upgrade = "never")
Here are some examples where you might have derivative functions
pivot_count <- function(...){ # maybe a wt version... pivotr(fun = length, ...) } pivot_average <- function(...){ mean_na_rm <- function(x){mean(x, na.rm = T)} pivotr(fun = mean_na_rm, ...) } pivot_sum <- function(...){ pivotr(fun = sum, ...) } pivot_empty <- function(...){ nar <- function(x) return(NA) pivotr(fun = nar, ...) }
library(magrittr) library(tidytitanic) passengers <- readr::read_csv("https://raw.githubusercontent.com/clauswilke/dviz.supp/master/data-raw/titanic/Titanic.csv") head(passengers) tidy_titanic |> pivot_count(rows = sex) tidy_titanic |> pivot_count(rows = sex, col = survived) flat_titanic |> pivot_sum(rows = survived, value = freq) flat_titanic |> pivot_sum(rows = sex, cols = survived, value = freq) flat_titanic |> pivot_average(rows = sex, cols = survived, value = freq) flat_titanic |> pivot_empty(rows = survived, cols = age) passengers |> pivot_average(rows = c(Sex, PClass), cols = Survived, value = Age)
pivot_example <- function(...){ sample1 <- function(x) sample(x, 1) pivotr(fun = sample1, ...) } pivot_samplen <- function(..., n = 3, sep = "; "){ samplen <- function(x) paste(sample(x, n, replace = F), collapse = sep) pivotr(fun = samplen, ...) } pivot_list <- function(..., sep = "; "){ paste_collapse <- function(x) paste (x, collapse = sep) pivotr(fun = paste_collapse, ...) }
flat_titanic |> pivot_example(rows = sex, value = freq) flat_titanic |> pivot_samplen(rows = sex, value = freq) flat_titanic |> pivot_list(rows = sex, cols = survived, value = freq) set.seed(12345) passengers |> pivot_example(rows = Survived, cols = Sex, value = Name) passengers |> pivot_samplen(rows = Survived, cols = Sex, value = Name, n = 2, sep = "; ") passengers |> pivot_samplen(rows = Survived, cols = Sex, value = Age, n = 7) passengers |> dplyr::sample_n(20) |> pivot_list(rows = Sex, cols = Survived, value = Age)
library(tidytitanic) # pivot_prop flat_titanic |> pivotr(rows = sex, value = freq, prop = TRUE) # pivot_prop flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE) flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE, within = sex) # pivot_percent flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, percent = TRUE, within = sex)
knitr::knit_exit()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.