In EvaMaeRey/tidypivot: Tidy table calculations with visual api

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-",
  out.width = "100%"
)
knitr::opts_chunk$set(message = F, warning = F)

{tidypivot} allows you to create tables by describing them (like ggplot2 plot description/declaration)

note: see original discussion here: https://evamaerey.github.io/mytidytuesday/2022-02-14-tables/tables.html and thoughtful contributions from @shannonpileggi and @brshallow https://github.com/EvaMaeRey/mytidytuesday/issues/3

And, you know, I'd get a dataset. And, in my head I could very clearly kind of picture, I want to put this on the x-axis. Let's put this on the y-axis, draw a line, put some points here, break it up by this variable. And then, like, getting that vision out of my head, and into reality, it's just really, really hard. Just, like, felt harder than it should be. Like, there's a lot of custom programming involved, where I just felt, like, to me, I just wanted to say, like, you know, this is what I'm thinking, this is how I'm picturing this plot. Like you're the computer 'Go and do it'. ... and I'd also been reading about the Grammar of Graphics by Leland Wilkinson, I got to meet him a couple of times and ... I was, like, this book has been, like, written for me. - https://www.trifacta.com/podcast/tidy-data-with-hadley-wickham/

declarative table creation with ggplot2

library(ggplot2)
StatSum$default_aes <- aes(label = after_stat(n))

# I want to put this on the x-axis (cols)
tidytitanic::tidy_titanic |>
  ggplot(
      # I want to put this on the x-axis (cols)
  aes(x = sex, 
      # I want to put this on the y- axis (rows)
      y = survived)
  )

# grouping and computation happen in one step, filling in 'table'
last_plot() + 
  stat_sum(geom = "text")

Status quo table creation: Harder than it should be?

1. grouping
1. compute
1. pivot

pivotr function: toward declarative table generation

Under the hood:

group by rows and columns
value in data to consider (1 if not specified)
wt, weight the value (1 if not specified)
fun - do an operation (on value) within group

But API:

describe layout of table (rows and cols) and compute (default to count records)

data_filter <- function(data, filter = TRUE){

  data <- data |>
      dplyr::filter({{filter}})

  data

}

data_define_value <- function(data, value = NULL, wt = NULL){

    value_quo <- rlang::enquo(value)
    wt_quo    <- rlang::enquo(wt)

      if(rlang::quo_is_null(value_quo) ){

      ## adding a value as 1 if there is none
      data <- data |>
        dplyr::mutate(value = 1)

    }else{

      data <- data |> 
          dplyr::mutate(value = {{value}})

    }

    #### weighting ####

    if(!rlang::quo_is_null(wt_quo) ){

      data <- data |>
        dplyr::mutate(value = .data$value * {{wt}}) 
   }

    data

}


data_to_grouped <- function(data, cols, rows){

    ### grouping by tabulation vars col and row
    data |>
      dplyr::group_by(dplyr::across(c({{cols}}, {{rows}})),
                      .drop = TRUE)


}


data_grouped_to_summarized <- function(data, fun = NULL){

      if(is.null(fun))  {fun <- sum}

    ## adding a value as 1 if there is none

    ### summarizing ####

    data |>
        dplyr::summarise(summary = fun(.data$value))


}


data_summarized_to_proportioned <- function(data, prop = F, percent = F, within = NULL, round = 2){ 
    # proportion case or percent

    within_quo        <- rlang::enquo(within)
    # totals_within_quo <- rlang::enquo(totals_within)

    if(is.null(prop)) {prop <- FALSE}

    if(prop|percent){

      mult <- ifelse(percent, 100, 1)
      if(is.null(round)){round <- ifelse(percent, 1, 3)}

      # prop is across all data
        if(rlang::quo_is_null(within_quo) ){

            data <- data |>
              dplyr::ungroup() |>
              dplyr::mutate(prop = round(.data$summary*mult/sum(.data$summary), round))

        # prop is within categories specified by within variable
        }else{

              data <- data |>
                dplyr::ungroup() |>
                dplyr::group_by(dplyr::across(c({{within}})),
                                .drop = FALSE) |>
                dplyr::mutate(prop = round(.data$summary*mult/sum(.data$summary), round))

        }
    }

  if(prop|percent){data$display <- data$prop}else{data$display <- data$summary}

  data

}


data_proportioned_to_pivoted <- function(data, pivot = TRUE, cols = NULL){

    cols_quo  <- rlang::enquo(cols)

    tidy <- data |>
      dplyr::ungroup()


    # do not pivot if argument pivot false or if no columns specified
    if(pivot == F){

      tidy 

      # otherwise pivot by columns
    }else


    if(rlang::quo_is_null(cols_quo) & pivot){

      tidy <- tidy |> dplyr::select(-summary)
      if(!is.null(data$prop)|!is.null(data$percent)){
        tidy <- tidy |>  dplyr::select(-prop)
      }

      tidy |>
        dplyr::rename(value = display)

    } else


    if(!rlang::quo_is_null(cols_quo) & pivot){
      # keep only display column, and tabulation vars
      tidy <- tidy |> dplyr::select(-summary)
      if(!is.null(data$prop)|!is.null(data$percent)){
        tidy <- tidy |>  dplyr::select(-prop)
      }

      tidy |>
        tidyr::pivot_wider(names_from = {{cols}}, values_from = display)

    }

  }

tidytitanic::flat_titanic |> 
  dplyr::filter(freq > 30) |>
  data_define_value(value = freq) |> 
  data_to_grouped(rows = survived, cols = sex) |>
  data_grouped_to_summarized() |>
  data_summarized_to_proportioned(percent = T, within = survived) |>
  data_proportioned_to_pivoted(cols = sex)

pivotr <- function(data,
                   rows = NULL,
                   cols = NULL,

                   value = NULL,
                   wt = NULL,

                   fun = NULL,

                   filter = TRUE,
                   prop = FALSE,
                   percent = FALSE,
                   round = NULL,

                   within = NULL,

                   pivot = TRUE
){


  data |> 
  data_filter({{filter}}) |>
  data_define_value(value = {{value}}, wt = {{wt}}) |> 
  data_to_grouped(rows = {{rows}}, cols = {{cols}}) |>
  data_grouped_to_summarized(fun = fun) |>
  data_summarized_to_proportioned(prop = prop, percent = percent, within = {{within}}, round = round) |>
  data_proportioned_to_pivoted(pivot = pivot, cols = {{cols}})

}

tidytitanic::flat_titanic |> 
  pivotr(value = freq, rows = survived, cols = sex, percent = T, within = survived)

tidytitanic::flat_titanic |> 
  pivotr(value = freq, rows = survived, cols = sex, filter = sex == "Female")

library(tidytitanic)

tidy_titanic |> pivotr()

tidy_titanic |> pivotr(rows = sex, cols = survived)

tidy_titanic |> pivotr(rows = c(sex, age), cols = survived)

tidy_titanic |> pivotr(rows = sex, cols = survived, pivot = F)

flat_titanic |> pivotr(rows = sex, value = freq, prop = TRUE)

flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE)

flat_titanic |> pivotr(rows = sex, cols = survived, value = freq, prop = TRUE, within = sex)

toward a piped workflow

https://evamaerey.github.io/mytidytuesday/2024-07-02-s3-tables/s3-tables-tidypivot.html

new_tidypivot <- function(data = data.frame(),
                          rows = NULL,
                          cols = NULL,
                          value = NULL,
                          wt = NULL,
                          fun = NULL,
                          filter = TRUE,
                          prop = FALSE,
                          percent = FALSE,
                          round = NULL,
                          within = NULL,
                          pivot = TRUE) {

  # table specification components !
  tp <- list(
    data = data,
    rows = rows,
    cols = cols,
    value = value,
    wt = wt,
    fun = fun,
    prop = prop,
    percent = percent,
    round = round,
    within = within,
    pivot = pivot
    # more 'slots' to be added
  )

  # declare class 'tidypivot'
  class(tp) <- "tidypivot"

  # Return the created object
  invisible(tp)

}


print.tidypivot <- function(tp){

  print(do.call(pivotr, tp))

  invisible(tp)

}

#' @export
ggtable <- function(data = NULL){

  # thedata <<- data # don't love this
  data <- data %||% data.frame()

  tp <- new_tidypivot()

  tp$data <- data

  last_tp <<- tp

  tp

}


#' @export
last_table <- function(){

  last_tp

}

ggtable() 


tidytitanic::tidy_titanic |> head()

ggtable(tidytitanic::tidy_titanic) 


last_table()

#' @export
set_rows <- function(tp, rows = NULL){

  tp$rows <- enquo(rows)

  last_tp <<- tp

  tp


}


#' @export
set_cols <- function(tp, cols = NULL){

  tp$cols <- enquo(cols)

  last_tp <<- tp

  tp


}

#' @export
set_filter <- function(tp, filter = TRUE){

  if(!filter){tp$filter <- !!rlang::enquo(filter)}

  last_tp <<- tp

  tp


}

ggtable(tidytitanic::tidy_titanic) |>
  set_rows(sex) |>
  set_cols(survived) |>
  set_filter(TRUE)

# last_table |>
#   set_filter(sex == "Female")

#' @export
set_fun <- function(tp, fun = sum){

  tp$fun <- fun

  last_tp <<- tp

  tp

}


#' @export
set_value <- function(tp, value = NULL){

  tp$value <- enquo(value)

  last_tp <<- tp

  tp

}


#' @export
set_wt <- function(tp, wt = NULL){

  tp$wt <- enquo(wt)

  last_tp <<- tp

  tp

}


#' @export
set_weight <- function(tp, weight = NULL){

  tp$weight <- enquo(weight)

  last_tp <<- tp

  tp

}


#' @export
set_prop <- function(tp, within = NULL){

  tp$percent <- FALSE
  tp$prop <- TRUE
  tp$within <- enquo(within)

  last_tp <<- tp

  tp

}

#' @export
set_percent <- function(tp, within = NULL){

  tp$prop <- FALSE
  tp$percent <- TRUE
  tp$within <- enquo(within)

  last_tp <<- tp

  tp

}


#' @export
set_within <- function(tp, within = NULL){

  tp$within <- enquo(within)

  last_tp <<- tp

  tp

}

#' @export
no_pivot <- function(tp){

  tp$pivot <- FALSE

  last_tp <<- tp

  tp  

}

collect <- function(tp){

  do.call(pivotr, tp)

}

tidytitanic::flat_titanic 

ggtable(tidytitanic::flat_titanic) |>
  set_value(freq) |>
  set_rows(sex) |>
  set_cols(survived)

last_table() |>
  set_percent()

last_table() |>
  set_prop()

last_table() |>
  set_prop(within = sex)

# a null table...
ggtable(tidytitanic::flat_titanic) |>
  set_value(NA) |>
  set_rows(sex)

ggtable(tidytitanic::flat_titanic) |>
  set_wt(freq) |>
  set_rows(sex) |>
  set_cols(survived)

last_table() |>
  set_fun(mean)

last_table() |> 
  no_pivot()


last_table() |>
  collect()

# knitrExtra::chunk_names_get()

knitrExtra::chunk_to_dir("helpers")
knitrExtra::chunk_to_dir("pivotr")
knitrExtra::chunk_to_dir("piping")

library(tidyverse)
library(tidypivot)
ext_exports <- read_csv("https://raw.githubusercontent.com/EvaMaeRey/mytidytuesday/refs/heads/main/2024-11-19-gg-prefixes/exported_funs_exts_ggplot2_tidyverse_org.csv") %>% 
  mutate(prefix = str_extract(fun_exported, ".*?_")) %>% 
  mutate(prefix_long = str_extract(fun_exported, ".+_")) %>% 
  mutate(ind_classic_prefix = prefix %in% c("stat_", "geom_", "theme_", "scale_", "coord_", "facet_"))


ggtable(ext_exports)

ggtable(ext_exports |> filter(ind_classic_prefix))

last_table() |>
  set_rows(user) 

last_table() |>
  set_cols(prefix)

# last_table() |>
#   set_rows(c(user, repo))


read_csv("https://raw.githubusercontent.com/EvaMaeRey/mytidytuesday/refs/heads/main/2024-12-10-ggplot2-layer-composition/ggplot2_exported_layer_fun_composition.csv") %>% 
  rename(prefix = fun_prefix) ->
ggplot2_layers_definers

ggplot2_layers_definers |>
  ggtable()

last_table() |>
  set_rows(type)

last_table() |>
  set_rows(type) |>
  set_cols(default_or_fixed) |>
  set_rows(c(prefix, type))

devtools::check()
devtools::install(pkg = ".", upgrade = "never")

examples/derivative

Here are some examples where you might have derivative functions

pivot_count <- function(...){

  # maybe a wt version...
  pivotr(fun = length, ...) 

  }

pivot_average <- function(...){

  mean_na_rm <- function(x){mean(x, na.rm = T)}

  pivotr(fun = mean_na_rm, ...) 

}

pivot_sum <- function(...){

  pivotr(fun = sum, ...)

}

pivot_empty <- function(...){

nar <- function(x) return(NA)

  pivotr(fun = nar, ...)

}

library(magrittr)
library(tidytitanic)

passengers <- readr::read_csv("https://raw.githubusercontent.com/clauswilke/dviz.supp/master/data-raw/titanic/Titanic.csv")

head(passengers)

tidy_titanic |> pivot_count(rows = sex)
tidy_titanic |> pivot_count(rows = sex, col = survived)
flat_titanic |> pivot_sum(rows = survived, value = freq)
flat_titanic |> pivot_sum(rows = sex,  cols = survived, value = freq)

flat_titanic |> pivot_average(rows = sex,  cols = survived, value = freq)
flat_titanic |> pivot_empty(rows = survived, cols = age)

passengers |> pivot_average(rows = c(Sex, PClass), cols = Survived, value = Age)

filling cells with examples from data.

pivot_example <- function(...){

  sample1 <- function(x) sample(x, 1)
  pivotr(fun = sample1, ...)

}


pivot_samplen <- function(..., n = 3, sep = "; "){

  samplen <- function(x) paste(sample(x, n, replace = F), collapse = sep)

  pivotr(fun = samplen, ...) 

}

pivot_list <- function(..., sep = "; "){

  paste_collapse <- function(x) paste (x, collapse = sep)
  pivotr(fun = paste_collapse, ...) 

}

flat_titanic |> pivot_example(rows = sex, value = freq)

flat_titanic |> pivot_samplen(rows = sex, value = freq)

flat_titanic |> pivot_list(rows = sex, cols = survived, value = freq)

set.seed(12345)
passengers |> pivot_example(rows = Survived, cols = Sex, value = Name)
passengers |> pivot_samplen(rows = Survived, cols = Sex, value = Name, n = 2, sep = "; ") 

passengers |> pivot_samplen(rows = Survived, cols = Sex, value = Age, n = 7) 

passengers |> dplyr::sample_n(20) |> pivot_list(rows = Sex, cols = Survived, value = Age)

proportions helpers

library(tidytitanic)
# pivot_prop
flat_titanic |> pivotr(rows = sex, 
                       value = freq, 
                       prop = TRUE) # pivot_prop

flat_titanic |> 
  pivotr(rows = sex, cols = survived, 
         value = freq, prop = TRUE)

flat_titanic |> 
  pivotr(rows = sex, cols = survived, 
         value = freq, prop = TRUE, within = sex)

# pivot_percent
flat_titanic |> 
  pivotr(rows = sex, cols = survived, 
         value = freq, percent = TRUE, within = sex)