# https://www.jumpingrivers.com/blog/knitr-rmarkdown-image-size/ # https://github.com/r-lib/pkgdown/issues/920 # dpi <- 96 # fig_width <- 7.2916667 # fig_height <- 4.479167 # Rendered image size 696 x 430 knitr::opts_chunk$set( collapse = TRUE, comment = "#>", dev = "ragg_png", fig.width = 7.25, fig.height = 4.479167, fig.retina = 1, dpi = 96 )
# Setup library(RUBer) # Attempts to register RubFlama font family, if that fails, # registers system dependent font for the alias "sans" instead. # This font family will be used as a parameter for all plots. font_family <- RUBer::register_font_df() # Required to use custom fonts showtext::showtext_auto()
options(max.print = 1000) font_family sysfonts::font_families() print(systemfonts::system_fonts() %>% dplyr::select(1:5), n = 500)
type_1_example_1 <- RUBer:::run_example_as_chunk( example_file = "rub_plot_type_1.R", chunk_name = "rub-plot-type-1" ) type_2_example_1 <- RUBer:::run_example_as_chunk( example_file = "rub_plot_type_2.R", chunk_name = "rub-plot-type-2" ) type_3_example_1 <- RUBer:::run_example_as_chunk( example_file = "rub_plot_type_3.R", chunk_name = "rub-plot-type-3" ) type_4_example_1 <- RUBer:::run_example_as_chunk( example_file = "rub_plot_type_4.R", chunk_name = "rub-plot-type-4" ) type_1_and_4_example_1 <- RUBer:::run_example_as_chunk( example_file = "rub_plot_type_1_and_4.R", chunk_name = "rub-plot-type-1-and-4" )
The plotting functions in RUBer do not extend the
functionality provided by {ggplot2}
itself. By preconfiguring a lot of
the parameters, though, RUBer's plotting functions are meant to be more
accessible and easier to use. One of the inspirations for this was the
bbplot
package used by the data team at BBC News.
Available figure types:
The default settings of theplot functions are optimized for the
corporate design font, RUB Flama, and Windows Enhanced Metafile
(EMF) as
output format. Unfortunately, RUB Flama is not publicly available and cannot be made accessible to the Github server rendering this document and creating the example figures. In addition, the images in this document use a raster format, PNG, created by the {ragg}
device, rather than the EMF vector format created by {devEMF}
. Practically, all of this means that the default settings of the RUBer package do not work well for HTML output, including this document. You are not seeing the intended font, nor do the text sizes match what one would get in Microsoft Word.
For plotting vertical stacked bar charts, we use
RUBer::rub_plot_type_1
. Three variable names are mandatory: x_var
for the x-coordinate, y_var
for the y-coordinate and fill_var
for
the fill variable, which determines the groups to be stacked. Consider
this example:
cat( type_1_example_1, sep = "\n" )
Next a more complex example, in which we additionally provide the label
for the y-axis, y_axis_label
and a caption indicating the source of the
data, caption
. By default, the caption has the German prefix "Quelle:",
which we change to English "Source:" using the parameter
caption_prefix
. We also want to suppress the value label for Master
students in the spring term of 2017, because the value is so small. By
default, labels for values accounting for less than 4% of the total
value are suppressed. In this case, the seven students account for
7/(7+122) = 5.4% of the total value, so we increase the value for
filter_cutoff
from the default of 0.04 to 0.06.
# Create test values for all three mandatory variables (x_var, y_var, fill_var). df_t1_ex2 <- tibble::tribble( ~term, ~students, ~degree, "Spring '13", 120, "Bachelor 1-Subject", "Spring '14", 105, "Bachelor 1-Subject", "Spring '15", 124, "Bachelor 1-Subject", "Spring '16", 114, "Bachelor 1-Subject", "Spring '17", 122, "Bachelor 1-Subject", "Spring '13", 121, "Master 1-Subject", "Spring '14", 129, "Master 1-Subject", "Spring '15", 122, "Master 1-Subject", "Spring '16", 168, "Master 1-Subject", "Spring '17", 7, "Master 1-Subject" ) # Set values for parameters setting the y-axis title, captioning the source data # and filtering small value labels (all labels below 6% of the stacked total). RUBer::rub_plot_type_1( df = df_t1_ex2, x_var = term, y_var = students, fill_var = degree, y_axis_label = stringr::str_wrap( "Students (1st degree program, 1st and 2nd field of study)", width = 35 ), caption = "Vignette example data", caption_prefix = "Source:", filter_cutoff = 0.06, base_size = 14, base_family = font_family )
The third example adds even more parameters. You can facet the figure by
a discrete variable, facet_var
, in order to make direct comparisons
between groups, e.g. different departments. You can also change a
figure's default color with color
, the default font with
base_family
, and the size of all text elements with base_size
(see
the documentation for RUBer::theme_rub()
).
# Create test values for all three mandatory variables (x_var, y_var, fill_var) # and the optional facet variable (facet_var). df_t1_ex3 <- tibble::tribble( ~term, ~students, ~degree, ~department, "Spring '13", 120, "Bachelor 1-Subject", "Department of Mathematics and Statistics", "Spring '14", 105, "Bachelor 1-Subject", "Department of Mathematics and Statistics", "Spring '15", 124, "Bachelor 1-Subject", "Department of Mathematics and Statistics", "Spring '16", 114, "Bachelor 1-Subject", "Department of Mathematics and Statistics", "Spring '17", 122, "Bachelor 1-Subject", "Department of Mathematics and Statistics", "Spring '13", 121, "Master 1-Subject", "Department of Mathematics and Statistics", "Spring '14", 129, "Master 1-Subject", "Department of Mathematics and Statistics", "Spring '15", 122, "Master 1-Subject", "Department of Mathematics and Statistics", "Spring '16", 168, "Master 1-Subject", "Department of Mathematics and Statistics", "Spring '17", 7, "Master 1-Subject", "Department of Mathematics and Statistics", "Spring '13", 44, "Bachelor 1-Subject", "Department of Philosophy", "Spring '14", 55, "Bachelor 1-Subject", "Department of Philosophy", "Spring '15", 60, "Bachelor 1-Subject", "Department of Philosophy", "Spring '16", 40, "Bachelor 1-Subject", "Department of Philosophy", "Spring '17", 35, "Bachelor 1-Subject", "Department of Philosophy", "Spring '13", 90, "Master 1-Subject", "Department of Philosophy", "Spring '14", 95, "Master 1-Subject", "Department of Philosophy", "Spring '15", 88, "Master 1-Subject", "Department of Philosophy", "Spring '16", 85, "Master 1-Subject", "Department of Philosophy", "Spring '17", 92, "Master 1-Subject", "Department of Philosophy" ) # Facet by department, which effectively leads to two plots in one figure. # The main color is changed from RUB blue to dark red, and the size is increased # from 11 to 14. rub_plot_type_1( df = df_t1_ex3, x_var = term, y_var = students, fill_var = degree, y_axis_label = stringr::str_wrap( string = "Students (1st degree program, 1st and 2nd field of study)", width = 35 ), caption = "Vignette example data", caption_prefix = "Source:", filter_cutoff = 0.06, facet_var = department, color = RUB_colors["dark red"], base_family = font_family, base_size = 14 )
Figure type 2, plotted with RUBer::rub_plot_type_2
, is very similar to
type 1. The main differences are that the y-axis uses a percentage scale
rather than an absolute one and that all stacked bars are scaled to
100%. Like for figure type 1, three variable names are mandatory:
x_var
for the x-coordinate, y_var
for the y-coordinate and
fill_var
for the fill variable, which determines the groups to be
stacked.
cat( type_2_example_1, sep = "\n" )
Now let us say we are unhappy with the default ordering of the stacked
bars, because we want the largest cohort group, those still studying, on
top. We can use the boolean parameter fill_reverse
to do just that. We
also provide a discrete facetting variable, facet_var
, a text for
caption
and caption_prefix
, and a higher threshhold for the
filter_cutoff
, leading to fewer labels being displayed.
df_t2_ex2 <- tibble::tribble( ~cohort_term, ~status_percentage, ~cohort_status, ~cohort_label, "2. cohort term", 0.9513551740, "Studying", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "2. cohort term", 0.0029748098, "Changed subject", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "2. cohort term", 0.0004673679, "Graduated", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "2. cohort term", 0.0186648938, "Disenrolled without degree", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "2. cohort term", 0.0265377545, "Dropped subject", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "4. cohort term", 0.8896149868, "Studying", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "4. cohort term", 0.0616919929, "Changed subject", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "4. cohort term", 0.0016484686, "Graduated", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "4. cohort term", 0.0201024499, "Disenrolled without degree", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "4. cohort term", 0.0269421019, "Dropped subject", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "6. cohort term", 0.7901183540, "Studying", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "6. cohort term", 0.1502641318, "Changed subject", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "6. cohort term", 0.0074548056, "Graduated", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "6. cohort term", 0.0243490259, "Disenrolled without degree", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "6. cohort term", 0.0278136827, "Dropped subject", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "8. cohort term", 0.6115873010, "Studying", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "8. cohort term", 0.2961468339, "Changed subject", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "8. cohort term", 0.0104080044, "Graduated", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "8. cohort term", 0.0274549015, "Disenrolled without degree", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "8. cohort term", 0.0544029593, "Dropped subject", "Bachelor 1-Subject: Starting cohort fall 2011 (n=222)", "2. cohort term", 0.769899396, "Studying", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "2. cohort term", 0.173399178, "Changed subject", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "2. cohort term", 0.034702328, "Graduated", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "2. cohort term", 0.006062833, "Disenrolled without degree", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "2. cohort term", 0.015936266, "Dropped subject", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "4. cohort term", 0.769421630, "Studying", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "4. cohort term", 0.173700319, "Changed subject", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "4. cohort term", 0.034742910, "Graduated", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "4. cohort term", 0.006156721, "Disenrolled without degree", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "4. cohort term", 0.015978420, "Dropped subject", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "6. cohort term", 0.667217426, "Studying", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "6. cohort term", 0.228271544, "Changed subject", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "6. cohort term", 0.065634578, "Graduated", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "6. cohort term", 0.010729695, "Disenrolled without degree", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "6. cohort term", 0.028146757, "Dropped subject", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "8. cohort term", 0.511075289, "Studying", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "8. cohort term", 0.353166732, "Changed subject", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "8. cohort term", 0.073315208, "Graduated", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "8. cohort term", 0.026374195, "Disenrolled without degree", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", "8. cohort term", 0.036068576, "Dropped subject", "Bachelor 1-Subject: Starting cohort fall 2012 (n=240)", ) rub_plot_type_2( df = df_t2_ex2, x_var = cohort_term, y_var = status_percentage, fill_var = cohort_status, facet_var = cohort_label, filter_cutoff = 0.06, caption = "Comparison of two fictitious cohorts", caption_prefix = "Source:", fill_reverse = TRUE, base_size = 14, base_family = font_family )
Figure type 3, plotted with RUBer::rub_plot_type_3
, is very similar to
figure type 2, with the exception that the stacked bar charts are
plotted horizontally. As with the first two figure types, three variable
names are mandatory: x_var
for the x-coordinate, y_var
for the
y-coordinate and fill_var
for the fill variable, which determines the
groups to be stacked.
cat( type_3_example_1, sep = "\n" )
The second example once again adds a facetting variable, fill_var
, for
plotting direct comparisons between groups. We also use caption
,
caption_prefix
, a filter_cutoff
that suppresses all values below
20%, and increase the text size to 14.
df_t3_ex2 <- tibble::tribble( ~survey_group, ~item_value, ~item_value_percentage, ~degree, "Bachelor 1-Subject (n=400)", "Exceeded prescribed period of study", 0.30, "Bachelor 1-Subject", "Bachelor 1-Subject (n=400)", "Within prescribed period of study", 0.60, "Bachelor 1-Subject", "SG Bachelor 1-Subject (n=726)", "Exceeded prescribed period of study", 0.1122486, "Bachelor 1-Subject", "SG Bachelor 1-Subject (n=726)", "Within prescribed period of study", 0.8877514, "Bachelor 1-Subject", "Master 1-Subject (n=369)", "Exceeded prescribed period of study", 0.1416009, "Master 1-Subject", "Master 1-Subject (n=369)", "Within prescribed period of study", 0.8583991, "Master 1-Subject", "SG Master 1-Subject (n=669)", "Exceeded prescribed period of study", 0.4417682, "Master 1-Subject", "SG Master 1-Subject (n=669)", "Within prescribed period of study", 0.5582318, "Master 1-Subject" ) rub_plot_type_3( df = df_t3_ex2, x_var = item_value_percentage, y_var = survey_group, fill_var = item_value, facet_var = degree, caption = "Graduate survey 2017/18", caption_prefix = "Source:", filter_cutoff = 0.20, base_size = 14, base_family = font_family )
In the last example above, the ordering of the fill variable,
item_value, is going from bad ("Exceeded prescribed period of study") to
good ("Within prescribed period of study"). This is because, by default,
the fill variable will be ordered alphabetically if not a factor. So
what if we want the fill variable ordered the other way? We can use the
optional boolean parameter fill_reverse
to change the ordering of the
the fill variable and of the legend.
# Reversed fill, reversed legend rub_plot_type_3( df = df_t3_ex2, x_var = item_value_percentage, y_var = survey_group, fill_var = item_value, facet_var = degree, caption = "Graduate survey 2017/18", caption_prefix = "Source:", filter_cutoff = 0.20, base_size = 14, base_family = font_family, fill_reverse = TRUE )
If we simply want to reverse the order of the legend items, without
actually reversing the order of the stacked bar segments, we can use the
optional parameter legend_reverse
instead.
# Reversed legend rub_plot_type_3( df = df_t3_ex2, x_var = item_value_percentage, y_var = survey_group, fill_var = item_value, facet_var = degree, caption = "Graduate survey 2017/18", caption_prefix = "Source:", filter_cutoff = 0.20, base_size = 14, base_family = font_family, legend_reverse = TRUE )
What if we want to use a custom ordering of the fill variable? Consider the following example:
df_t3_ex4 <- tibble::tribble( ~survey_group, ~item_value, ~item_value_percentage, ~degree, "Bachelor 1-Subject (n=400)", "Exceeded prescribed period of study", 0.30, "Bachelor 1-Subject", "Bachelor 1-Subject (n=400)", "Within prescribed period of study", 0.60, "Bachelor 1-Subject", "Bachelor 1-Subject (n=400)", "Unknown", 0.10, "Bachelor 1-Subject", "SG Bachelor 1-Subject (n=726)", "Exceeded prescribed period of study", 0.11, "Bachelor 1-Subject", "SG Bachelor 1-Subject (n=726)", "Within prescribed period of study", 0.77, "Bachelor 1-Subject", "SG Bachelor 1-Subject (n=726)", "Unknown", 0.12, "Bachelor 1-Subject", "Master 1-Subject (n=369)", "Exceeded prescribed period of study", 0.12, "Master 1-Subject", "Master 1-Subject (n=369)", "Within prescribed period of study", 0.83, "Master 1-Subject", "Master 1-Subject (n=369)", "Unknown", 0.04, "Master 1-Subject", "SG Master 1-Subject (n=669)", "Exceeded prescribed period of study", 0.44, "Master 1-Subject", "SG Master 1-Subject (n=669)", "Within prescribed period of study", 0.50, "Master 1-Subject", "SG Master 1-Subject (n=669)", "Unknown", 0.05, "Master 1-Subject" ) rub_plot_type_3( df = df_t3_ex4, x_var = item_value_percentage, y_var = survey_group, fill_var = item_value, facet_var = degree, caption = "Graduate survey 2017/18", caption_prefix = "Source:", base_size = 14, base_family = font_family )
Here, we neither want alphabetic ordering, nor its reverse. Instead, we
would prefer an alphabetic ordering with the item_value "Unknown" always
coming last. For this, we can explicitly turn the fill_var
into a
factor with a predetermined ordering. The plotting function will respect
this ordering.
# Take the data from previous example df_t3_ex5 <- df_t3_ex4 # Turn the column "item_value" into a factor df_t3_ex5[["item_value"]] <- factor(df_t3_ex5[["item_value"]]) # Examine the levels, default is alphabetic ordering levels(df_t3_ex5[["item_value"]]) # Make sure that level unknown always comes last df_t3_ex5[["item_value"]] <- forcats::fct_relevel( df_t3_ex5[["item_value"]], c("Unknown"), after = Inf ) # Check new ordering levels(df_t3_ex5[["item_value"]]) # Plot rub_plot_type_3( df = df_t3_ex5, x_var = item_value_percentage, y_var = survey_group, fill_var = item_value, facet_var = degree, caption = "Graduate survey 2017/18", caption_prefix = "Source:", base_size = 14, base_family = font_family )
cat( type_4_example_1, sep = "\n" )
cat( type_1_and_4_example_1, sep = "\n" )
The level of flexibility and customizability achievable through the
plotting functions of RUBer are limited. At some point, it makes more
sense to properly learn {ggplot2}
and to use the individual components
directly (scales, colors, etc.). See the resources section
for help on getting started with {ggplot2}
.
The function theme_rub
is a normal ggplot2::theme()
function based
on ggplot2::theme_minimal()
. You can use it for any ggplot object:
# Base plot ggplot2::ggplot( data = mtcars, ggplot2::aes( x = mpg, y = disp, color = as.factor(carb) ) ) + ggplot2::geom_point() + theme_rub( base_family = font_family )
The RUB palettes are used through corresponding scale functions.
Currently, (1) scale_color_rub
(a wrapper for
ggplot2::discrete_scale()
if discrete, or for
ggplot2::scale_color_gradientn()
if continuous) and (2)
scale_fill_rub
(a wrapper for ggplot2::discrete_scale()
if discrete,
or for ggplot2::scale_fill_gradientn()
if continuous) are implemented.
Use these scale functions to ensure that appropriate RUB palettes and
colors are used.
# Basic themed plot with discrete scale function ggplot2::ggplot( data = mtcars, ggplot2::aes( x = mpg, y = disp, color = as.factor(carb) ) ) + ggplot2::geom_point( size = 3 ) + scale_color_rub( palette = "discrete" ) + theme_rub( base_size = 14, base_family = font_family ) # By default, the theme shows no axis labels and no legend title. Use the # boolean parameters x_axis_label, y_axis_label and legend_title to activate # them. ggplot2::ggplot( data = mtcars, ggplot2::aes( x = mpg, y = disp, color = as.factor(carb) ) ) + ggplot2::geom_point( size = 3 ) + scale_color_rub( palette = "discrete" ) + theme_rub( base_size = 14, base_family = font_family, x_axis_label = TRUE, y_axis_label = TRUE, legend_title = FALSE ) # Reverse the color palette by setting reverse to TRUE in the scale function ggplot2::ggplot( data = mtcars, ggplot2::aes( x = mpg, y = disp, color = as.factor(carb) ) ) + ggplot2::geom_point( size = 3 ) + scale_color_rub( palette = "discrete", reverse = TRUE ) + theme_rub( base_size = 14, base_family = font_family, x_axis_label = TRUE, y_axis_label = TRUE, legend_title = FALSE )
Continuous scales are used by setting discrete
to FALSE
and by
providing the name of a continuous palette to palette
(see
vignette("RUB_colors")
).
# Basic plot with continuous scale function ggplot2::ggplot( data = mtcars, ggplot2::aes( x = mpg, y = disp, color = disp ) ) + ggplot2::geom_point( size = 3 ) + scale_color_rub( palette = "continuous", discrete = FALSE ) + theme_rub( base_size = 14, base_family = font_family, x_axis_label = TRUE, y_axis_label = TRUE, legend_title = TRUE ) # Reverse the color palette by setting reverse to TRUE in the scale function ggplot2::ggplot( data = mtcars, ggplot2::aes( x = mpg, y = disp, color = disp ) ) + ggplot2::geom_point( size = 3 ) + scale_color_rub( palette = "continuous", discrete = FALSE, reverse = TRUE ) + theme_rub( base_size = 14, base_family = font_family, x_axis_label = TRUE, y_axis_label = TRUE, legend_title = TRUE ) # Example of continuous_diverging palette ggplot2::ggplot( data = mtcars, ggplot2::aes( x = mpg, y = disp, color = disp ) ) + ggplot2::geom_point( size = 3 ) + scale_color_rub( palette = "continuous_diverging", discrete = FALSE ) + theme_rub( base_size = 14, base_family = font_family, x_axis_label = TRUE, y_axis_label = TRUE, legend_title = TRUE )
ggplot2::ggplot( data = mtcars, ggplot2::aes( x = as.factor(gear), fill = as.factor(vs) ) ) + ggplot2::geom_bar() + scale_fill_rub( palette = "discrete_2" ) + theme_rub( base_size = 14, base_family = font_family, x_axis_label = TRUE, y_axis_label = TRUE, legend_title = TRUE )
# Turn off showtext after plotting showtext::showtext_auto(FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.