knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
Show percentiles, mean, sd, min, max, first, last, and unique counts across quantitative variables in a dataset. This file works out how the ff_summ_bygroup function works from Fan's REconTools Package.
There is a quantitative variable, summarize this quantitative variable by multiple categorical groups, show a variety of statistics. including:
rm(list = ls(all.names = TRUE)) library(tibble) library(tidyr) library(dplyr) library(purrr) # library(ggplot2) library(kableExtra)
data(mtcars) df_mtcars <- mtcars df <- df_mtcars vars.group <- c('am', 'vs') var.numeric <- 'mpg' str.stats.group <- 'allperc' ar.perc <- c(0.10, 0.25, 0.5, 0.75, 0.9) boo.overall.stats <- TRUE
# List of statistics # https://rdrr.io/cran/dplyr/man/summarise.html strs.center <- c('mean', 'median') strs.spread <- c('sd', 'IQR', 'mad') strs.range <- c('min', 'max') strs.pos <- c('first', 'last') strs.count <- c('n_distinct') # Grouping of Statistics if (str.stats.group == 'main') { strs.all <- c('mean', 'min', 'max', 'sd') } if (str.stats.group == 'all') { strs.all <- c(strs.center, strs.spread, strs.range, strs.pos, strs.count) } if (str.stats.group == 'allperc') { ar_st_percentile_func_names <- paste0(ar.perc*100, "%") funs_percentiles <- map(ar.perc, ~partial(quantile, probs = .x, na.rm = TRUE)) %>% set_names(nm = ar_st_percentile_func_names) strs.all <- c(strs.center, strs.spread, funs_percentiles, strs.range, strs.pos, strs.count) }
# Start Transform df <- df %>% drop_na() %>% mutate(!!(var.numeric) := as.numeric(!!sym(var.numeric))) # Overall Statistics if (boo.overall.stats) { df.overall.stats <- df %>% summarize_at(vars(var.numeric), funs(!!!strs.all)) if (length(strs.all) == 1) { # give it a name, otherwise if only one stat, name of stat not saved df.overall.stats <- df.overall.stats %>% rename(!!strs.all := !!sym(var.numeric)) } names(df.overall.stats) <- paste0(var.numeric, '.', names(df.overall.stats)) } # Display Results kable(df.overall.stats) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
# Group Sort df.select <- df %>% group_by(!!!syms(vars.group)) %>% arrange(!!!syms(c(vars.group, var.numeric))) # Table of Statistics df.table.grp.stats <- df.select %>% summarize_at(vars(var.numeric), funs(!!!strs.all)) # Display Results kable(df.table.grp.stats) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) # Add Stat Name if (length(strs.all) == 1) { # give it a name, otherwise if only one stat, name of stat not saved df.table.grp.stats <- df.table.grp.stats %>% rename(!!strs.all := !!sym(var.numeric)) } # Display Results kable(df.table.grp.stats) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
# Row of Statistics str.vars.group.combine <- paste0(vars.group, collapse='_') if (length(vars.group) == 1) { df.row.grp.stats <- df.table.grp.stats %>% mutate(!!(str.vars.group.combine) := paste0(var.numeric, '.', vars.group, '.g', (!!!syms(vars.group)))) %>% gather(variable, value, -one_of(vars.group)) %>% unite(str.vars.group.combine, c(str.vars.group.combine, 'variable')) %>% spread(str.vars.group.combine, value) } else { df.row.grp.stats <- df.table.grp.stats %>% mutate(vars.groups.combine := paste0(paste0(vars.group, collapse='.')), !!(str.vars.group.combine) := paste0(interaction(!!!(syms(vars.group))))) %>% mutate(!!(str.vars.group.combine) := paste0(var.numeric, '.', vars.groups.combine, '.', (!!sym(str.vars.group.combine)))) %>% ungroup() %>% select(-vars.groups.combine, -one_of(vars.group)) %>% gather(variable, value, -one_of(str.vars.group.combine)) %>% unite(str.vars.group.combine, c(str.vars.group.combine, 'variable')) %>% spread(str.vars.group.combine, value) } # Display Results kable(df.row.grp.stats) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
# Clean up name strings names(df.table.grp.stats) <- gsub(x = names(df.table.grp.stats),pattern = "_", replacement = "\\.") names(df.row.grp.stats) <- gsub(x = names(df.row.grp.stats),pattern = "_", replacement = "\\.") # Return list.return <- list(df_table_grp_stats = df.table.grp.stats, df_row_grp_stats = df.row.grp.stats) # Overall Statistics, without grouping if (boo.overall.stats) { df.row.stats.all <- c(df.row.grp.stats, df.overall.stats) list.return <- append(list.return, list(df_overall_stats = df.overall.stats, df_row_stats_all = df.row.stats.all)) } # Display Results kable(df.table.grp.stats) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) kable(df.row.grp.stats) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) kable(df.overall.stats) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.