Plots gallery"

options(tidyverse.quiet = TRUE)

knitr::opts_chunk$set(
  collapse = TRUE,
  warning = FALSE,
  comment = "#>"
)

This article is a How-to-plot page that covers the most frequently used charts. It is using Profinit color theme, of course. We start with displaying distributions, then proportions and relations. Each topic has an initial setup followed by couple of collapsed sections describing various use-cases. Code for ggplot2 is provided, some of the charts are covered by base R graphics code, too.

In case of any bug/edits/contributions feel free to either create a pull-request or raise an issue in the issue tracker.

It is not purpose of this page to cover all the use-cases, though. For more detailed guide how to design a good chart take a look on the Fundamentals of data visualization (either online or in Profinit's library).

Setup

As a toy dataset, let's use the dplyr::starwars dataset of Star Wars characters. Be ware, it contains information from the first 7 films in the series.

# load packages
library(tidyverse)
library(profiplots)
library(ggalluvial)
library(ggrepel)

# set the aesthetics (theme) of plots
profiplots::set_theme(pal_name = "blue-red", pal_name_discrete="discrete")

movie_series <- c(
  "The Phantom Menace",
  "Attack of the Clones",
  "Revenge of the Sith",
  "A New Hope",
  "The Empire Strikes Back",
  "Return of the Jedi",
  "The Force Awakens"
)

get_movie_order <- function(movie_names) {
  purrr::map_dbl(movie_names, function(mn) which(mn == movie_series))
}

# prepare dataset: Star Wars characters
sw <- 
  dplyr::starwars %>% 
  mutate(
    bmi = mass/(height/100)^2,
    is_droid = forcats::fct_explicit_na(if_else(sex == "none", "Droid", "Other"), "N/A"),
    first_film = purrr::map_chr(films, function(movies) {
      movie_ord = get_movie_order(movies)
      movies[which.min(movie_ord)]
    }),
    first_film = factor(first_film, labels = movie_series, ordered = TRUE),
    been_in_jedi = purrr::map_lgl(films, ~"Return of the Jedi" %in% .),
    n_films = purrr::map_dbl(films, length)
  )

Distributions

Barplot

Use-case: Visualization of discrete variables distributions.

::: {.panel-tabset .nav-pills}

ggplot

CODE

plt <- 
  sw %>% 
  mutate(
    gender = forcats::fct_explicit_na(gender),  # Make the NA's be obvious (new level)
    gender = forcats::fct_infreq(gender),       # in case of `nominal` values, sort according to frequency
  ) %>% 
  ggplot(aes(x = gender)) + 
  stat_count(geom = "bar") + 
  labs(
    x = "Character gender",
    y = "Count",
    title = "Gender distribution among StarWars characters"
  )
plt

plt

See also

Colored barplot

  • Avoid multiple colors (except highlighting)
  • Prefer colors from profinit_cols() palette, e.g. profinit blue (r profinit_cols("blue")) and profinit red (r profinit_cols("red")).
sw %>% 
  mutate(
    gender = forcats::fct_explicit_na(gender),  
    gender = forcats::fct_infreq(gender),       
  ) %>% 
  ggplot(aes(x = gender)) + 
  stat_count(geom = "bar", fill = profinit_cols("blue")) +    # Adding fill = 'color' makes the plot colored
  labs(
    x = "Character gender",
    y = "Count",
    title = "Gender distribution among StarWars characters"
  )

Horizontal barplot

sw %>% 
  mutate(
    gender = forcats::fct_explicit_na(gender),  
    gender = forcats::fct_infreq(gender),       
    gender = forcats::fct_rev(gender)           # Reverse the order to have the most prominent cat on top
  ) %>%    
  ggplot(aes(x = gender)) + 
  stat_count(geom = "bar", fill = profinit_cols("blue")) + 
  coord_flip() +                                # This way you can make the barplot horizontal
  labs(
    x = NULL,                                   # You may get rid of the axis (if the Title is self explanatory)
    y = "Count",
    title = "Gender distribution among StarWars characters"
  )

Highlighting a category

# prepare a highlighting scale to be reused elsewhere -- be consistent within your report
higlighting_cols <- 
  profinit_cols("grey", "red") %>% 
  purrr::set_names(c("FALSE", "TRUE"))

sw %>% 
  mutate(
    gender = forcats::fct_explicit_na(gender),   
    gender = forcats::fct_infreq(gender),        
  ) %>% 
  ggplot(aes(x = gender, fill = gender == "feminine")) +   # now we highlight category `feminine` via Boolean indicator
  stat_count(geom = "bar") + 
  scale_fill_manual(values = higlighting_cols) + # mapping of highlighting colors 
  guides(fill = "none") +                        # no need for legend, the `x` axis says it all
  labs(
    x = "Character gender",
    y = "Count",
    title = "Gender distribution among StarWars characters"
  )

Ordinal variables

sw %>% 
  mutate(
    # in case of `ordinal` values, sort according to their order 
    # (in this case, we treat numbers as category labels). 
    # Plus have it factor/character for better `x` axis
    n_films = forcats::fct_inseq(as.character(n_films)),
  ) %>% 
  ggplot(aes(x = n_films)) +   
  stat_count(geom = "bar") + 
  labs(
    x = "In how many films is the character present?",
    y = "Character count",
    title = "What is the character durability in StarWars films?"
  )

Too high numbers (truncating the y axis)

set.seed(123)

data.frame(
  x = sample(LETTERS[1:7], prob = 1/(1 + 1/(1:7)), size = 1e5, replace = TRUE)
) %>% 
  ggplot(aes(x = x)) + 
  stat_count(geom = "point") +           # THIS is the way to change geom (bar -> point)
  scale_y_continuous(
    limits = c(7000, 17000),             # To truncate the y-axis 
    labels = scales::number              # To get better looking numbers on y-axis
  ) + 
  labs(
    x = "Category",
    y = "Number of observations",
    title = "Frequency of artificial categories"
  )

baseR

CODE

# TODO

# TODO

Tips

:::

Histogram

Use case: Visualization of continuous variables distribution.

::: {.panel-tabset .nav-pills}

ggplot

CODE

plt <- 
  sw %>% 
  ggplot(aes(x = height)) + 
  stat_bin(geom = "bar", bins = 20) +  
  labs(
    x = "Height [cm]",
    y = "Count",
    title = "Height distribution of StarWars characters"
  )
plt

plt

See also

Colored histogram

  • Avoid multiple colors
  • Prefer colors from profinit_cols() palette, e.g. profinit blue (r profinit_cols("blue")) and profinit red (r profinit_cols("red")).
sw %>% 
  ggplot(aes(x = height)) + 
  stat_bin(geom = "bar", bins = 20, fill = profinit_cols("blue")) +    # This way you make the plot be colored
  labs(
    x = "Height [cm]",
    y = "Count",
    title = "Height distribution of StarWars characters"
  )

Tweak bin sizes

  • You can specify either number of bins or bin width.
  • Be ware of defaults, bin sizes might significantly change the message!
sw %>% 
  ggplot(aes(x = height)) + 
  stat_bin(geom = "bar", binwidth = 25) +  
  labs(
    x = "Height [cm]",
    y = "Count",
    title = "Height distribution of StarWars characters",
    subtitle = "Binwidth set to 25"
  )

Multiple sub-populations

  • You can use colors to distinguish sub-populations.
  • Do not stack nor dodge the bars. Use transparency.
  • Avoid in case of multiple subgroups (e.g., 4+), use KDE instead (maybe as a linechart, not an areachart).
  • Anyway, KDE could be a better way if the audience is skilled enough.
sw %>% 
  filter(!is.na(sex)) %>% 
  ggplot(aes(x = height, fill = is_droid)) + 
  scale_fill_profinit(palette = "discrete-full", exact = TRUE) + 
  stat_bin(geom = "bar", bins = 20, position = "identity", alpha = .7) + 
  labs(
    x = "Height [cm]",
    y = "Count",
    fill = "Character type",
    title = "Height distribution of StarWars characters",
  ) + 
  theme(legend.position = "bottom")             # You can move the legend to use the full width of the plot for distribution

Fixing the color mapping

  • For reporting purpose, it's better to fix the mapping instead of relying on color scale defaults.
  • Creating a named vector of colors, you're able to use it elsewhere throughout the document.
is_droid_color_mapping <- 
  profinit_pal("discrete-full")(3) %>% 
  set_names("Droid", "Other", "N/A")

sw %>% 
  filter(!is.na(sex)) %>% 
  ggplot(aes(x = height, fill = is_droid)) + 
  scale_fill_manual(values = is_droid_color_mapping) + 
  stat_bin(geom = "bar", bins = 20, position = "identity", alpha = .7) + 
  labs(
    x = "Height [cm]",
    y = "Count",
    fill = "Character type",
    title = "Height distribution of StarWars characters",
  ) + 
  theme(legend.position = "bottom")  

baseR

hist(
  x = sw$height,
  breaks = 20,                 # (optional) tweak default setting of bins number
  border = NA,                 # bins border color, NA to turn it off
  col = profinit_cols("blue"), # bins fill color, use either of `profinit_cols()`, either `blue`, `red` or `grey` are preferable
  main = "Distribution of heights of StarWars characters",
  xlab = "Height [cm]",        # do not forget to mention units
  ylab = "Count",
  # TODO: change axes style
  # TODO: add grid
)

Tips

:::

KDE

Use case: Continuous variables distribution for skilled audience. Esp. useful in case of multiple subgroups to be plotted on one chart.

::: {.panel-tabset .nav-pills}

ggplot

CODE

plt <- 
  sw %>% 
  filter(mass < 1000) %>% 
  ggplot(aes(x = bmi)) + 
  stat_density() +  
  labs(
    x = "Body mass index",
    y = "Density",
    title = "BMI distribution of StarWars characters",
    caption = "Characters under 1000kg  only."
  )
plt

plt

See also

Colored KDE

  • Avoid multiple colors.
  • Avoid color line.
  • Prefer colors from profinit_cols() palette, e.g. profinit blue (r profinit_cols("blue")) and profinit red (r profinit_cols("red")).
sw %>% 
  filter(mass < 1000) %>% 
  ggplot(aes(x = bmi)) + 
  stat_density(fill = profinit_cols("blue")) +  
  labs(
    x = "Body mass index",
    y = "Density",
    title = "BMI distribution of StarWars characters",
    caption = "Characters under 1000kg  only."
  )

Subgroups (small number)

  • Use transparency low enough to let reader spot the individual subgroups overlapping.
  • Map the grouping variable to colors.
  • In case of multiple groups (it's better to avoid fill/make it very transparent) and use the outline instead, see below) you may run out of colors in palette. In that case you can use scale_fill_profinit(palette = "discrete", exact = "FALSE") to get some colors interpolated from the palette.
sw %>% 
  filter(!is.na(sex)) %>% 
  ggplot(aes(x = height, fill = is_droid)) + 
  stat_density(
    alpha = .8,              # you shall use transparency in case of multiple overlapping groups
    position = "identity"    # do not position="stack" (default)!
  ) +  
  scale_fill_manual(values = is_droid_color_mapping) +  # Now, we're using fixed mapping to be consistent with other plots!
  labs(
    fill = "Character type",
    x = "Height [cm]",
    y = "Density",
    title = "Height distribution of StarWars characters"
  ) + 
  theme(legend.position = "bottom")

Subgroups (high number)

  • Take a way: it could be clumsy, consider whether it fits your needs. Instead, you can:
  • Use subplots (see below)
  • Focus on the most relevant categories & put others to background via highlighting (see below).
  • In case you still want to have a KDE plot with multiple individual categories you can use the following:
  • Make them transparent enough to not overlap each other completely.
  • Use
sw %>%
  ggplot(aes(x = height, fill = first_film)) + 
  stat_density(position = "identity", alpha = .3) +                # THIS way you can have fill very transparent
  scale_fill_profinit_d("blue-red") +
  guides(color = "none", fill = guide_legend(override.aes = list(alpha = .8))) +                                        # THIS way you won't have duplicated legend
  labs(
    fill = "First film of the character",
    x = "Height [cm]",
    y = "Density",
    title = "Height distribution of StarWars characters",
    subtitle = "Given the first films the character played in"
  )

Subgroups - highlighting * In case of large number of subgroups it may not be necessary to highlight all of them at once. * Pick the one you're telling story about and make it stand out. * Set a fill mapping. Use manual scale. Hide fill legend. * Set a alpha mapping. Use manual scale. Hide alpha legend. * Annotate directly in the graph to explain what you're highlighting.

higlight_fill_mapping <- c("TRUE" = profinit_cols("red"), "FALSE" = profinit_cols("gray"))
higlight_alpha_mapping <- c("TRUE" = .75, "FALSE" = .25)

higlight_cat <-  "Revenge of the Sith"

sw %>% 
  mutate(
    higlight_group = as.character(first_film == higlight_cat)
  ) %>% 
  ggplot(
    aes(
      x = height, 
      group = first_film, 
      fill = higlight_group, 
      alpha = higlight_group,
    )
  ) + 
  stat_density(
    position = "identity"
  ) + 
  scale_fill_manual(values = higlight_fill_mapping) + 
  scale_alpha_manual(values = higlight_alpha_mapping) + 
  annotate(x = 135, y = 0.014, geom = "text", label = "Revange\nof the Sith", color = profinit_cols("red")) + 
  annotate(x = 240, y = 0.014, geom = "text", label = "A New Hope", color = profinit_cols("grey"), alpha = .4) + 
  annotate(x = 210, y = 0.045, geom = "text", label = "The Phantom\nManace", color = profinit_cols("grey"), alpha = .4) + 
  guides(alpha =  "none", fill = "none") +                                        # THIS way you won't have duplicated
  labs(
    x = "Height [cm]",
    y = "Density",
    title = paste0("Characters in SW:", higlight_cat, " tends to be smaller"),
    subtitle = "Characters grouped by the first SW films they played in"
  )

baseR

CODE

# TODO: provide more straightforward approach

height_density = density(sw$height,na.rm = TRUE)
plot(
  height_density, 
  col = NA, 
  main = "Height distribution of StarWars characters",
  xlab = "Height [cm]",
  ylab = "Density"
)
polygon(
  x = height_density,
  col = profinit_cols("blue"), 
  border = NA,
)

plt

Tips

:::

Proportions

Single Variable

Use-case: Visualizing proportions of category levels. (Avoiding pie-chart).

::: {.panel-tabset .nav-pills}

ggplot

CODE

  • Use stat_count to get summary stats out of the raw dataset.
  • Otherwise, aggregate the dataset upfront & use stat_identity.
  • Use bar geom (default, therefore I'm not specifying it here).
  • Get rid of the x axis (redundant). Still, there needs to be mapping, so we use x = 1 here.
  • Use color mapping.
  • Here we're using a droid color mapping introduced above (for consistency reasons).
  • You can annotate the sections directly (will be shown in one of the use-cases below). You can get rid of the legend completely.
  • Again, you can highlight just one level etc. (See the sections above for more customization ideas).
plt <- 
  sw %>% 
  filter(is_droid != "N/A") %>% 
  ggplot() +
  aes(fill = is_droid, x =  1, y = ..count..) + 
  stat_count(position = "stack") + 
  guides(x = "none") + 
  scale_fill_manual(values = is_droid_color_mapping) + 
  scale_y_continuous(breaks = seq(0, 100, 10)) +              # customize Y axis ticks position
  labs(
    x = NULL,
    y = "Character count",
    fill = "Character type",
    title = "Proportion of droids among SW characters",
    subtitle = "Based on dplyr::starwars dataset",
    caption = "Characters with known status only"
  ) + 
  theme(
    legend.position = "bottom"
  )

plt

See also

Add labels

  • Specify label aesthetics to set what should be displayed on the label.
  • Add another layer via stat_count
  • Pick either label or text to be geom.
  • Use position_stack instead of "stack" to be able to fine tune the charts. In this case, I'M using vjust to center the label vertically.
sw %>% 
  filter(is_droid != "N/A") %>% 
  ggplot() +
  aes(fill = is_droid, x =  1, y = ..count.., label = ..count..) +      
  stat_count(position = "stack") + 
  stat_count(position = position_stack(vjust = 0.5), geom = "text") + 
  guides(x = "none") + 
  scale_fill_manual(values = is_droid_color_mapping) + 
  scale_y_continuous(breaks = seq(0, 100, 10)) + 
  labs(
    x = NULL,
    y = "Character count",
    fill = "Character type",
    title = "Proportion of droids among SW characters",
    subtitle = "Based on dplyr::starwars dataset",
    caption = "Characters with known status only"
  ) + 
  theme(
    legend.position = "bottom"
  )

Relative numbers

  • Change position from stackto fill to receive relative numbers on the y axis.
  • Do not forget to adjust scale_y_continous breakpoints appropriately (if in use).
  • You may utilize scales::percent for nicely looking axis labels as well.
  • Adjust labels to relative numbers you can use ..count../sum(..count..)
  • But it might be easier to aggregate upfront and use stack_identity instead...
sw %>% 
  filter(is_droid != "N/A") %>% 
  ggplot() +
  aes(fill = is_droid, x =  1, y = ..count..) +         # no need to change the y = ..count.., position_fill will do that for you
  stat_count(position = "fill") + 
  guides(x = "none") + 
  scale_fill_manual(values = is_droid_color_mapping) +  # to fix the color mapping
  scale_y_continuous(
    breaks = seq(0, 1, .1),                             # customize Y axis ticks position
    labels = scales::percent_format(suffix = " %")) +   # customize Y axis ticks labels, use ` %` (CZ) or `%` (EN)
  labs(
    x = NULL,
    y = "Proportion of characters",
    fill = "Character type",
    title = "Proportion of droids among SW characters",
    subtitle = "Based on dplyr::starwars dataset",
    caption = "Characters with known status only"
  ) + 
  theme(
    legend.position = "bottom"                          # to have the legend below the chart
  )

Horizontal bar

  • You can either change the aes() mapping OR use coord_flip()
  • You need to adjust
  • Turn of the y axis legend (instead of x).
  • Set the x axis customization (instead of y).
sw %>% 
  filter(is_droid != "N/A") %>% 
  ggplot() +
  aes(fill = is_droid, x =  1, y = ..count..) +               # You can change the X and y mapping (not shown)
  coord_flip() +                                              # ... or just flip the axes
  stat_count(position = "stack") + 
  guides(y = "none") +                                        # Turn of the 'primary' axis, y in this case
  scale_fill_manual(values = is_droid_color_mapping) +        # Set the color mapping to be consistent
  scale_x_continuous(breaks = seq(0, 100, 10)) +              # customize x axis ticks position
  labs(
    x = NULL,
    y = "Character count",
    fill = "Character type",
    title = "Proportion of droids among SW characters",
    subtitle = "Based on dplyr::starwars dataset",
    caption = "Characters with known status only"
  ) + 
  theme(
    legend.position = "bottom"                                # Set the legend position
  )

base R

TODO

Tips

:::

Two variables, proportion of two categories

Use-case: Visualizing proportions of a category levels in different subgroups based on another variable.

In this case, the best way is to use side-by-side stacked barplots (with fill option).

::: {.panel-tabset .nav-pills}

ggplot

sw %>% 
  filter(!is.na(gender)) %>% 
  mutate(is_droid = forcats::fct_rev(is_droid)) %>% 
  ggplot() + 
  aes(x = gender, fill = is_droid) + 
  stat_count(position = position_fill()) + 
  scale_y_continuous(breaks = seq(0, 1, .1), labels = scales::percent) + 
  scale_fill_manual(values = is_droid_color_mapping) + 
  labs(
    title = "Droid proportion is the same accross Gender",
    x = "Gender",
    y = "Proportion of droids",
    fill = "Character type",
    caption = "Characters with known Gender only"
  )

Add horizontal line

  • E.g., to highlight the population mean.
droid_prop_overall <- mean(sw$is_droid == "Droid", na.rm = TRUE)
droid_prop_overall_label <- paste0("Overall mean: ", scales::percent(droid_prop_overall, accuracy = .01))

sw %>% 
  filter(!is.na(gender)) %>% 
  mutate(is_droid = forcats::fct_rev(is_droid)) %>% 
  ggplot() + 
  aes(x = gender, fill = is_droid) + 
  stat_count(position = position_fill()) + 
  stat_identity(geom = "hline", yintercept = droid_prop_overall, linetype = "dashed", color = profinit_cols("grey")) +
  annotate(x = 2.1, y = droid_prop_overall - .01, geom = "text", label = droid_prop_overall_label, size = 2.5) + 
  scale_y_continuous(breaks = seq(0, 1, .1), labels = scales::percent) + 
  scale_fill_manual(values = is_droid_color_mapping) + 
  labs(
    title = "Droid proportion is the same accross Gender",
    x = "Gender",
    y = "Proportion of droids",
    fill = "Character type",
    caption = "Characters with known Gender only"
  )

base R

TODO

Tips

:::

Two variables, proportion of 3+ categories

::: {.panel-tabset .nav-pills}

ggplot

CODE

plt <- sw %>% 
  filter(!is.na(gender)) %>% 
  ggplot() + 
  aes(
    x = gender,
    fill = first_film
  ) + 
  stat_count(position = position_dodge(preserve = "single")) + 
  scale_fill_profinit() + 
  labs(
    x = "Gender of the character",
    y = "Count",
    fill = "First SW film of the character",
    title = "At what film first star the most characters of given gender"
  )
plt

plt

See also:

Continuous X variable

  • Use stacked (with opt. fill) KDE plot instead:
sw %>% 
  filter(!is.na(gender)) %>% 
  mutate(is_droid = forcats::fct_rev(is_droid)) %>%                             # More important level comes first
  ggplot() + 
  aes(x = height, fill = is_droid) + 
  stat_density(geom = "area", position = position_fill()) +                     # Here we specify stacking(fill)
  scale_fill_manual(values = is_droid_color_mapping) +                          # To be consistent in the report
  scale_y_continuous(labels = scales::percent, breaks = seq(0, 1, .1)) +        # Pleasant y-axis labels
  labs(
    title = "Proportion of DROIDS among SW characters of a given height",
    x = "Height [cm]",
    y = "Proportion of droids",
    fill = "Character type",
    caption = "Characters with known gender only"                               # Describe the population in use
  ) + 
  theme(legend.position = "bottom")

Alluvial plots

  • Alluvial plots (přelivové grafy in CZ) allows you show how the proportions are interconnected.
  • It's especially useful for repeated measures (e.g., elections etc.)
  • How to construct:
  • Pre-calculate a contingency table (frequency of all categories combination)
  • Assign mapping:
    • y = group size (you might use a proportional value, e.g., n/sum(n) as well).
    • axis1, axis2, ... = individual variables to be compared
    • add geom_stratum -- this makes the bars
    • add goem_alluvial -- this makes the stream between
    • you may add geom_text(stat = "stratum", aes(label = after_stat(stratum))) as well to add individual stratum descriptions
is_movie_present <- function(films) {
  purrr::map_dbl(movie_series, function(movie_name) movie_name %in% films) %>% 
    purrr::set_names(movie_series)
}



sw_wide_agg <- 
  sw %>% 
  mutate(
    x = purrr::map(films, is_movie_present)
  ) %>% 
  unnest_wider(x) %>% 
  filter(`A New Hope` == 1 | `Return of the Jedi` == 1 | `The Force Awakens` == 1) %>% 
  group_by_at(vars(`A New Hope`, `Return of the Jedi`, `The Force Awakens`)) %>% 
  summarise(n = n(), .groups = "drop") %>% 
  mutate_at(vars(`A New Hope`, `Return of the Jedi`, `The Force Awakens`), ~ifelse(. == 1, "Present", "Skipped")) 

sw_wide_agg %>% 
  ggplot() + 
  aes(y = n/sum(n), axis1 = `A New Hope`, axis2 = `Return of the Jedi`, axis3 = `The Force Awakens`) + 
  geom_alluvium(aes()) + 
  geom_stratum(aes(fill = after_stat(stratum)), color = "#00000000") + 
  scale_x_discrete(limits = c("A New Hope", "Return of the Jedi", "The Force Awakens")) + 
  scale_y_continuous(labels = scales::percent) + 
  labs(
    title = "Characters being recycled in last 3 movies",
    x = NULL, 
    y = "Proportion of characters",
    fill = "Character in the movie",
    caption = "Characters present in at least one of the movies"
  ) + 
  theme(
    legend.position = "bottom"
  )

base R

TODO

Tips

:::

Relations

Scatterplot

Use-case: Visualizing relationship of two numeric variables. Visualizing trend (target ~ regresor).

::: {.panel-tabset .nav-pills}

ggplot

CODE

plt <-
  sw %>% 
  filter(mass < 1e3) %>% 
  ggplot(aes(x = height, y = mass)) + 
  geom_point() + 
  labs(
    x = "Height [cm]",
    y = "Weight [kg]",
    title = "Height ~ weight relation of StarWars characters",
    note = "Characters weighting less hten 1t"            # Indicate population filters!
  )
plt

plt

See Also

Highlighting

obesity_color_mapping <- c("Overweight" = profinit_cols("red"), "Slim" = profinit_cols("grey"))
sw %>% 
  filter(mass < 1e3) %>% 
  mutate(
    higlight = if_else(bmi > 33, "Overweight", "Slim")
  ) %>% 
  ggplot(aes(x = height, y = mass, color = higlight)) + 
  scale_color_manual(values = obesity_color_mapping) + 
  geom_point() + 
  labs(
    x = "Height [cm]",
    y = "Weight [kg]",
    title = "StarWars characters with obesity problem",
    subtitle = "Height ~ weight relation of StarWars characters",
    caption = "Characters weighting less then 1000kg only\nBMI = weight[kg]/(height[m])^2"            # Indicate population filters!
  ) + 
  theme(
    legend.position = "bottom"
  )

Adding labels

  • Use either geom_text (without frame) or geom_label.
  • Filter down the dataset to limit number of labels. You can utilize data argument to provide filtering.
  • In case of overlapping you can use either:
  • `nudging (moving the label; in units of respective axis)
  • ggrepel's functions geom_text_repel and geom_label_repel. Ggplot tries to plot them non-overlapped.
sw %>% 
  filter(mass < 1e3) %>% 
  mutate(
    higlight = if_else(bmi > 33, "Overweight", "Slim")
  ) %>% 
  ggplot(aes(x = height, y = mass, color = higlight)) + 
  geom_text_repel(                                                         # geom_text does not set the rectangle
    data = function(d) filter(d, mass/(height/100)^2 > 33),           # You can use `data` to provide filtering
    aes(label = name),                                                # Specify the label (text) mapping
    size = 2.2
  ) + 
  geom_point() + 
  scale_color_manual(values = obesity_color_mapping) + 
  labs(
    x = "Height [cm]",
    y = "Weight [kg]",
    color = "BMI status", 
    title = "Overweighted characters in StarWars",
    subtitle = "Characters with BMI > 33",
    note = "Characters weighting less hten 1t"            # Indicate population filters!
  ) + 
  theme(
    legend.position = "bottom"
  )

Adding fitted lines

sw %>% 
  filter(mass < 1e3, is_droid != "N/A") %>% 
  ggplot(aes(x = height, y = mass, color = is_droid)) + 
  geom_point(size = 1) + 
  geom_smooth(method = "lm", se = FALSE, formula = "y~x") + # THIS way you introduce best LM fit y~x without error bound
  labs(
    x = "Height [cm]",
    y = "Weight [kg]",
    title = "Height ~ weight relation of StarWars characters",
    note = "Characters weighting less hten 1t\nTrend line of `y ~ x`"            
  )

Adding arbitrary lines

  • Use colors from Profinit palette for added lines (either profinit red (r profinit_cols("red")) or profinit blue (r profinit_cols("blue")) depending on your report theme).
r2d2 <-
  sw %>% 
  filter(name == "R2-D2")

sw %>% 
  filter(mass < 1e3) %>% 
  ggplot(aes(x = height, y = mass, color = name == "R2-D2")) + 
  geom_point(size = 1) + 
  geom_abline(intercept = 150, slope = -.05, color = profinit_cols("blue")) +   # THIS is the way to add arbitrary line
  annotate(x = 220, y = 135, label = "Arbitrary line", color = profinit_cols("blue"), geom = "text", size = 2.7) + 
  geom_vline(xintercept = r2d2$height, linetype = "dashed", color = profinit_cols("red")) +                       # THIS way you introduce vertical lines & customize their line style
  annotate(x = r2d2$height, y = 135, label = "R2-D2", geom = "text", color = profinit_cols("red"), hjust=1.2, size = 2.7) + 
  scale_color_manual(values = c("TRUE" = profinit_cols("red"), "FALSE" = profinit_cols("grey"))) + 
  labs(
    x = "Height [cm]",
    y = "Weight [kg]",
    title = "Height ~ weight relation of StarWars characters",
    subtitle = "In comparision with R2-D2",
    note = "Characters weighting less hten 1t"           
  )

Scaling axes

  • You can use custom scales via scale_y_continuous(trans = <your_fun>).
  • Be sure to inform your reader about the manipulation (both in blot and in the paragraph you're describing the plot insights).
sw %>% 
  ggplot(aes(x = height, y = mass)) + 
  geom_point() + 
  scale_y_log10() +                                                # this is as easy as setting y-scale 
  labs(
    x = "Height [cm]",
    y = "Weight [kg], log10 scaled",                               # be sure you inform your reader
    title = "Height ~ weight relation of StarWars characters",
  )

base R

CODE

# TODO

# TODO

Tips

:::

2D Density

Use-case: Visualizing relationship of two numeric variables with too many observations.

With too many observations, the details are hidden in the tons of spots. You can try to set transparency low enough and use scatterplot anyway (see above). But it's quite convenient to rely on 2D Density plot.

::: {.panel-tabset .nav-pills}

ggplot

CODE

plt <- 
  sw %>% 
  filter(mass < 1000) %>% 
  ggplot(aes(x = height, y = mass)) +
  stat_density2d_filled() +  
  scale_fill_profinit("blues", reverse = TRUE) + 
  labs(
    x = "Height [cm]",
    y = "Mass [kg]",
    caption = "Characters below 1000kg only",
    title = "Height ~ Mass relationship among SW Characters"
  )

plt

See also:

Contour plot

  • You may combine this layer with a scatterplot.
sw %>% 
  filter(mass < 1000) %>% 
  ggplot(aes(x = height, y = mass)) +
  stat_density2d(geom = "path", aes(color = after_stat(level))) +  
  scale_color_profinit_c("reds-dark", reverse = TRUE, labels = scales::number) + 
  labs(
    x = "Height [cm]",
    y = "Mass [kg]",
    color = "Density",
    caption = "Characters below 1000kg only",
    title = "Height ~ Mass relationship among SW Characters"
  )

2D histogram

  • Warning: The data are too sparse for this use case.
sw %>% 
  filter(mass < 1000) %>% 
  ggplot(aes(x = height, y = mass)) +
  stat_bin_2d() +  
  scale_fill_profinit_c() + 
  labs(
    x = "Height [cm]",
    y = "Mass [kg]",
    caption = "Characters below 1000kg only",
    title = "Height ~ Mass relationship among SW Characters"
  ) + 
  coord_equal()

2D histogram using hexes

  • Why: Hexagon bins avoid the visual artefacts sometimes generated by the very regular alignment of geom_bin2d().
  • Warning: The data are too sparse for this use case.
sw %>% 
  filter(mass < 1000) %>% 
  ggplot(aes(x = height, y = mass)) +
  stat_bin_hex() + 
  scale_fill_profinit_c() + 
  labs(
    x = "Height [cm]",
    y = "Mass [kg]",
    caption = "Characters below 1000kg only",
    title = "Height ~ Mass relationship among SW Characters"
  ) + 
  coord_equal()

base R

CODE

# TODO

# TODO

Tips

:::

Heatmap

Use-case: Visualizing relationship of two numeric variables. Visualizing trend (target ~ regresor).

::: {.panel-tabset .nav-pills}

ggplot

CODE

plt <- sw %>% 
  group_by(first_film, gender) %>% 
  summarise(n = n()) %>% 
  ggplot(aes(x = gender, y = first_film, fill=n)) + 
  stat_identity(geom = "tile") + 
  scale_fill_profinit_c("blues", reverse = TRUE) + 
  labs(
    x = "Character gender",
    y = "First film of the character",
    fill = "Count", 
    title = "Where do the characters of given gender mostly starts?"
  )

plt

See also:

Ratio / differences

  • In case of visualizing ratios or differences (that is, having some natural base level to compare the values with), use blue-white-red color palette (or create another diverging color palette from Profinit's colors).
sw %>% 
  filter(!is.na(gender)) %>% 
  group_by(gender, is_droid) %>% 
  summarise(
    n_total = n(),
    n_overweight = sum(bmi > 30, na.rm = TRUE),
    odds_overweight = n_overweight/(n_total - n_overweight),
    .groups = "drop"
  ) %>% 
  ggplot(aes(x = gender, y = is_droid, fill = odds_overweight)) + 
  stat_identity(geom = "tile") + 
  scale_fill_gradient2(low = profinit_cols("red"), mid = "white", high = profinit_cols("blue"), midpoint = 1) + 
  labs(
    x = "Character gender",
    y = "Character type",
    fill = "Overweight\nOdds", 
    title = "What is the odds to be overweighted?",
    subtitle = "Based on Gender and being droid in SW",
    caption = "Characters with known gender only"
  )

base R

CODE

# TODO

# TODO

Tips

:::

Extra: Odds ratio visualization

::: {.panel-tabset .nav-pills}

ggplot

CODE

plt <- ggplot()

plt

base R

CODE

# TODO

# TODO

Tips

TODO

:::



Try the profiplots package in your browser

Any scripts or data that you put into this service are public.

profiplots documentation built on Nov. 16, 2023, 5:07 p.m.