pacman::p_load(numform, xtable, wakefield, ggplot2, knitr, gridExtra, viridis, maps) rinline <- function(code) { sprintf('<code class="r">``` `r %s` ```</code>', code) } desc <- suppressWarnings(readLines("DESCRIPTION")) regex <- "(^Version:\\s+)(\\d+\\.\\d+\\.\\d+)" loc <- grep(regex, desc) ver <- gsub(regex, "\\2", desc[loc]) # verbadge <- sprintf('<a href="https://img.shields.io/badge/Version-%s-orange.svg"><img src="https://img.shields.io/badge/Version-%s-orange.svg" alt="Version"/></a></p>', ver, ver) verbadge <- '' ```` ```r library(dplyr) library(pacman) options(scipen=999) knit_hooks$set(htmlcap = function(before, options, envir) { if(!before) { paste('<p class="caption"><b><em>',options$htmlcap,"</em></b></p>",sep="") } }) knitr::opts_knit$set(self.contained = TRUE, cache = FALSE) knitr::opts_chunk$set(fig.path = "tools/figure/", warning = FALSE)
numform contains tools to assist in the formatting of numbers and plots for publication. Tools include the removal of leading zeros, standardization of number of digits, addition of affixes, and a p-value formatter. These tools combine the functionality of several 'base' functions such as paste()
, format()
, and sprintf()
into specific use case functions that are named in a way that is consistent with usage, making their names easy to remember and easy to deploy.
To download the development version of numform:
Download the zip ball or tar ball, decompress and run R CMD INSTALL
on it, or use the pacman package to install the development version:
if (!require("pacman")) install.packages("pacman") pacman::p_load_current_gh("trinker/numform") pacman::p_load(tidyverse, gridExtra)
You are welcome to: submit suggestions and bug-reports at: https://github.com/trinker/numform/issues send a pull request on: https://github.com/trinker/numform * compose a friendly e-mail to: tyler.rinker@gmail.com
Below is a table of available numform functions. Note that f_
is read as "format" whereas fv_
is read as "format vector". The former formats individual values in the vector while the latter uses the vector to compute a calculation on each of the values and then formats them. Additionally, all numform f_
functions have a closure, function retuning, version that is prefixed with an additional f
(read "format function"). For example, f_num
has ff_num
which has the same arguments but returns a function instead. This is useful for passing in to ggplot2 scale_x/y_type
functions (see Plotting for usage).
p_funs(numform) %>% {grep("^ff", ., invert = TRUE, value = TRUE)} %>% wakefield:::variables_as_matrix() %>% xtable::xtable() %>% print(type = 'html', include.colnames = FALSE, include.rownames = FALSE, html.table.attributes = '')
if (!require("pacman")) install.packages("pacman") pacman::p_load_gh("trinker/numform") pacman::p_load(dplyr)
f_num(c(0.0, 0, .2, -00.02, 1.122222, pi, "A"))
f_thous(1234) f_thous(12345) f_thous(123456) f_mills(1234567) f_mills(12345678) f_mills(123456789) f_bills(1234567891) f_bills(12345678912) f_bills(123456789123)
...or auto-detect:
f_denom(1234) f_denom(12345) f_denom(123456) f_denom(1234567) f_denom(12345678) f_denom(123456789) f_denom(1234567891) f_denom(12345678912) f_denom(123456789123)
f_comma(c(1234.12345, 1234567890, .000034034, 123000000000, -1234567))
f_percent(c(30, 33.45, .1), digits = 1) f_percent(c(0.0, 0, .2, -00.02, 1.122222, pi)) f_prop2percent(c(.30, 1, 1.01, .33, .222, .01)) f_prop2percent(c(.30, 1, 1.01, .33, .222, .01), digits = 0) f_pp(c(.30, 1, 1.01, .33, .222, .01)) # same as f_prop2percent(digits = 0)
f_dollar(c(0, 30, 33.45, .1)) f_dollar(c(0.0, 0, .2, -00.02, 1122222, pi)) %>% f_comma()
Sometimes one wants to lop off digits of money in order to see the important digits, the real story. The f_denom
family of functions can do job.
f_denom(c(12345267, 98765433, 658493021), prefix = '$') f_denom(c(12345267, 98765433, 658493021), relative = 1, prefix = '$')
Notice the use of the alignment
function to detect the column alignment.
pacman::p_load(dplyr, pander) set.seed(10) dat <- data_frame( Team = rep(c("West Coast", "East Coast"), each = 4), Year = rep(2012:2015, 2), YearStart = round(rnorm(8, 2e6, 1e6) + sample(1:10/100, 8, TRUE), 2), Won = round(rnorm(8, 4e5, 2e5) + sample(1:10/100, 8, TRUE), 2), Lost = round(rnorm(8, 4.4e5, 2e5) + sample(1:10/100, 8, TRUE), 2), WinLossRate = Won/Lost, PropWon = Won/YearStart, PropLost = Lost/YearStart ) dat %>% group_by(Team) %>% mutate( `%ΔWinLoss` = fv_percent_diff(WinLossRate, 0), `ΔWinLoss` = f_sign(Won - Lost, '<b>+</b>', '<b>–</b>') ) %>% ungroup() %>% mutate_at(vars(Won:Lost), .funs = ff_denom(relative = -1, prefix = '$')) %>% mutate_at(vars(PropWon, PropLost), .funs = ff_prop2percent(digits = 0)) %>% mutate( YearStart = f_denom(YearStart, 1, prefix = '$'), Team = fv_runs(Team), WinLossRate = f_num(WinLossRate, 1) ) %>% data.frame(stringsAsFactors = FALSE, check.names = FALSE) %>% pander::pander(split.tables = Inf, justify = alignment(.), style = 'simple')
pacman::p_load(dplyr, pander) data_frame( Event = c('freezing water', 'room temp', 'body temp', 'steak\'s done', 'hamburger\'s done', 'boiling water', 'sun surface', 'lighting'), F = c(32, 70, 98.6, 145, 160, 212, 9941, 50000) ) %>% mutate( Event = f_title(Event), C = (F - 32) * (5/9) ) %>% mutate( F = f_degree(F, measure = 'F', type = 'string'), C = f_degree(C, measure = 'C', type = 'string', zero = '0.0') ) %>% data.frame(stringsAsFactors = FALSE, check.names = FALSE) %>% pander::pander(split.tables = Inf, justify = alignment(.), style = 'simple')
if (!require("pacman")) install.packages("pacman") pacman::p_load(tidyverse) set.seed(11) data_frame( date = sample(seq(as.Date("1990/1/1"), by = "day", length.out = 2e4), 12) ) %>% mutate( year_4 = f_year(date, 4), year_2 = f_year(date, 2), quarter = f_quarter(date), month_name = f_month_name(date) %>% numform::as_factor(), month_abbreviation = f_month_abbreviation(date) %>% numform::as_factor(), month_short = f_month(date), weekday_name = f_weekday_name(date), weekday_abbreviation = f_weekday_abbreviation(date), weekday_short = f_weekday(date), weekday_short_distinct = f_weekday(date, distinct = TRUE) ) %>% data.frame(stringsAsFactors = FALSE, check.names = FALSE) %>% pander::pander(split.tables = Inf, justify = alignment(.), style = 'simple')
mtcars %>% count(cyl, gear) %>% group_by(cyl) %>% mutate( p = numform::f_pp(n/sum(n)) ) %>% ungroup() %>% mutate( cyl = numform::fv_runs(cyl), ` ` = f_text_bar(n) ## Overall ) %>% as.data.frame()
library(tidyverse); library(viridis)
set.seed(10) data_frame( revenue = rnorm(10000, 500000, 50000), date = sample(seq(as.Date('1999/01/01'), as.Date('2000/01/01'), by="day"), 10000, TRUE), site = sample(paste("Site", 1:5), 10000, TRUE) ) %>% mutate( dollar = f_comma(f_dollar(revenue, digits = -3)), thous = f_denom(revenue), thous_dollars = f_denom(revenue, prefix = '$'), abb_month = f_month(date), abb_week = numform::as_factor(f_weekday(date, distinct = TRUE)) ) %>% group_by(site, abb_week) %>% mutate(revenue = {if(sample(0:1, 1) == 0) `-` else `+`}(revenue, sample(1e2:1e5, 1))) %>% ungroup() %T>% print() %>% ggplot(aes(abb_week, revenue)) + geom_jitter(width = .2, height = 0, alpha = .2, aes(color = revenue)) + scale_y_continuous(label = ff_denom(prefix = '$'))+ facet_wrap(~site) + theme_bw() + scale_color_viridis() + theme( strip.text.x = element_text(hjust = 0, color = 'grey45'), strip.background = element_rect(fill = NA, color = NA), panel.border = element_rect(fill = NA, color = 'grey75'), panel.grid = element_line(linetype = 'dotted'), axis.ticks = element_line(color = 'grey55'), axis.text = element_text(color = 'grey55'), axis.title.x = element_text(color = 'grey55', margin = margin(t = 10)), axis.title.y = element_text(color = 'grey55', angle = 0, margin = margin(r = 10)), legend.position = 'none' ) + labs( x = 'Day of Week', y = 'Revenue', title = 'Site Revenue by Day of Week', subtitle = f_wrap(c( 'This faceted dot plot shows the distribution of revenues within sites', 'across days of the week. Notice the consistently increasing revenues for', 'Site 2 across the week.' ), width = 85, collapse = TRUE) )
library(tidyverse); library(viridis)
set.seed(10) dat <- data_frame( revenue = rnorm(144, 500000, 10000), date = seq(as.Date('2005/01/01'), as.Date('2016/12/01'), by="month") ) %>% mutate( quarter = f_quarter(date), year = f_year(date, 4) ) %>% group_by(year, quarter) %>% summarize(revenue = sum(revenue)) %>% ungroup() %>% mutate(quarter = as.integer(gsub('Q', '', quarter))) year_average <- dat %>% group_by(year) %>% summarize(revenue = mean(revenue)) %>% mutate(x1 = .8, x2 = 4.2) dat %>% ggplot(aes(quarter, revenue, group = year)) + geom_segment( linetype = 'dashed', data = year_average, color = 'grey70', size = 1, aes(x = x1, y = revenue, xend = x2, yend = revenue) ) + geom_line(size = .85, color = '#009ACD') + geom_point(size = 1.5, color = '#009ACD') + facet_wrap(~year, nrow = 2) + scale_y_continuous(label = ff_denom(relative = 2)) + scale_x_continuous(breaks = 1:4, label = f_quarter) + theme_bw() + theme( strip.text.x = element_text(hjust = 0, color = 'grey45'), strip.background = element_rect(fill = NA, color = NA), panel.border = element_rect(fill = NA, color = 'grey75'), panel.grid.minor = element_blank(), panel.grid.major = element_line(linetype = 'dotted'), axis.ticks = element_line(color = 'grey55'), axis.text = element_text(color = 'grey55'), axis.title.x = element_text(color = 'grey55', margin = margin(t = 10)), axis.title.y = element_text(color = 'grey55', angle = 0, margin = margin(r = 10)), legend.position = 'none' ) + labs( x = 'Quarter', y = 'Revenue ($)', title = 'Quarterly Revenue Across Years', subtitle = f_wrap(c( 'This faceted line plot shows the change in quarterly revenue across', 'years.' ), width = 85, collapse = TRUE) )
library(tidyverse); library(gridExtra)
set.seed(10) dat <- data_frame( level = c("not_involved", "somewhat_involved_single_group", "somewhat_involved_multiple_groups", "very_involved_one_group", "very_involved_multiple_groups" ), n = sample(1:10, length(level)) ) %>% mutate( level = factor(level, levels = unique(level)), `%` = n/sum(n) ) gridExtra::grid.arrange( gridExtra::arrangeGrob( dat %>% ggplot(aes(level, `%`)) + geom_col() + labs(title = 'Very Sad', y = NULL) + theme( axis.text = element_text(size = 7), title = element_text(size = 9) ), dat %>% ggplot(aes(level, `%`)) + geom_col() + scale_x_discrete(labels = function(x) f_replace(x, '_', '\n')) + scale_y_continuous(labels = ff_prop2percent(digits = 0)) + labs(title = 'Underscore Split (Readable)', y = NULL) + theme( axis.text = element_text(size = 7), title = element_text(size = 9) ), ncol = 2 ), gridExtra::arrangeGrob( dat %>% ggplot(aes(level, `%`)) + geom_col() + scale_x_discrete(labels = function(x) f_title(f_replace(x))) + scale_y_continuous(labels = ff_prop2percent(digits = 0)) + labs(title = 'Underscore Replaced & Title (Capitalized Sadness)', y = NULL) + theme( axis.text = element_text(size = 7), title = element_text(size = 9) ), dat %>% ggplot(aes(level, `%`)) + geom_col() + scale_x_discrete(labels = function(x) f_wrap(f_title(f_replace(x)))) + scale_y_continuous(labels = ff_prop2percent(digits = 0)) + labs(title = 'Underscore Replaced, Title, & Wrapped (Happy)', y = NULL) + theme( axis.text = element_text(size = 7), title = element_text(size = 9) ), ncol = 2 ), ncol = 1 )
set.seed(10) dat <- data_frame( state = sample(state.name, 10), value = sample(10:20, 10) ^ (7), cols = sample(colors()[1:150], 10) ) %>% arrange(desc(value)) %>% mutate(state = factor(state, levels = unique(state))) dat %>% ggplot(aes(state, value, fill = cols)) + geom_col() + scale_x_discrete(labels = f_state) + scale_fill_identity() + scale_y_continuous(labels = ff_denom(prefix = '$'), expand = c(0, 0), limits = c(0, max(dat$value) * 1.05) ) + theme_minimal() + theme( panel.grid.major.x = element_blank(), axis.title.y = element_text(angle = 0) ) + labs(x = 'State', y = 'Cash\nFlow', title = f_title("look at how professional i look"), subtitle = 'Subtitles: For that extra professional look.' )
library(tidyverse); library(viridis)
data_frame( Event = c('freezing water', 'room temp', 'body temp', 'steak\'s done', 'hamburger\'s done', 'boiling water'), F = c(32, 70, 98.6, 145, 160, 212) ) %>% mutate( C = (F - 32) * (5/9), Event = f_title(Event), Event = factor(Event, levels = unique(Event)) ) %>% ggplot(aes(Event, F, fill = F)) + geom_col() + geom_text(aes(y = F + 4, label = f_fahrenheit(F, digits = 1, type = 'text')), parse = TRUE, color = 'grey60') + scale_y_continuous( labels = f_fahrenheit, limits = c(0, 220), expand = c(0, 0), sec.axis = sec_axis(trans = ~(. - 32) * (5/9), labels = f_celcius, name = f_celcius(prefix = 'Temperature ', type = 'title')) ) + scale_x_discrete(labels = ff_replace(pattern = ' ', replacement = '\n')) + scale_fill_viridis(option = "magma", labels = f_fahrenheit, name = NULL) + theme_bw() + labs( y = f_fahrenheit(prefix = 'Temperature ', type = 'title'), title = f_fahrenheit(prefix = 'Temperature of Common Events ', type = 'title') ) + theme( axis.ticks.x = element_blank(), panel.border = element_rect(fill = NA, color = 'grey80'), panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank() )
library(tidyverse); library(maps)
world <- map_data(map="world") ggplot(world, aes(map_id = region, x = long, y = lat)) + geom_map(map = world, aes(map_id = region), fill = "grey40", colour = "grey70", size = 0.25) + scale_y_continuous(labels = f_latitude) + scale_x_continuous(labels = f_longitude)
mtcars %>% mutate(mpg2 = cut(mpg, 10, right = FALSE)) %>% ggplot(aes(mpg2)) + geom_bar(fill = '#33A1DE') + scale_x_discrete(labels = function(x) f_wrap(f_bin_text_right(x, l = 'up to'), width = 8)) + scale_y_continuous(breaks = seq(0, 14, by = 2), limits = c(0, 7)) + theme_minimal() + theme( panel.grid.major.x = element_blank(), axis.text.x = element_text(size = 14, margin = margin(t = -12)), axis.text.y = element_text(size = 14), plot.title = element_text(hjust = .5) ) + labs(title = 'Histogram', x = NULL, y = NULL)
dat <- data_frame( Value = c(111, 2345, 34567, 456789, 1000001, 1000000001), Time = 1:6 ) gridExtra::grid.arrange( ggplot(dat, aes(Time, Value)) + geom_line() + scale_y_continuous(labels = ff_denom( prefix = '$')) + labs(title = "Single Denominational Unit"), ggplot(dat, aes(Time, Value)) + geom_line() + scale_y_continuous( labels = ff_denom(mix.denom = TRUE, prefix = '$', pad.char = '') ) + labs(title = "Mixed Denominational Unit"), ncol = 2 )
We can see its use in actual model reporting as well:
mod1 <- t.test(1:10, y = c(7:20)) sprintf( "t = %s (%s)", f_num(mod1$statistic), f_pval(mod1$p.value) )
mod2 <- t.test(1:10, y = c(7:20, 200)) sprintf( "t = %s (%s)", f_num(mod2$statistic, 2), f_pval(mod2$p.value, digits = 2) )
We can build a function to report model statistics:
report <- function(mod, stat = NULL, digits = c(0, 2, 2)) { stat <- if (is.null(stat)) stat <- names(mod[["statistic"]]) sprintf( "%s(%s) = %s, %s", gsub('X-squared', 'Χ<sup>2</sup>', stat), paste(f_num(mod[["parameter"]], digits[1]), collapse = ", "), f_num(mod[["statistic"]], digits[2]), f_pval(mod[["p.value"]], digits = digits[3]) ) } report(mod1) report(oneway.test(count ~ spray, InsectSprays)) report(chisq.test(matrix(c(12, 5, 7, 7), ncol = 2)))
This enables in-text usage as well. First set up the models in a code chunk:
mymod <- oneway.test(count ~ spray, InsectSprays) mymod2 <- chisq.test(matrix(c(12, 5, 7, 7), ncol = 2))
And then use r rinline("report(mymod)")
resulting in a report that looks like this: r report(mymod)
. For Χ2 using proper HTML leads to r report(mymod2)
.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.