knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(cjmr)
library(tidyverse)
setwd("C:/Users/chris/Desktop/Data Analysis Projects")
devtools::install("cjmr")

Some charts to work with

A column chart

Based on a chart produced by John Burn Murdoch at the Financial Times

# **************************************************************************
# set up for plotting
# **************************************************************************

library(lubridate)  # for date manipulation
library(slider)     # for calculating moving averages

# read in covid-19 case data
cases <- read_csv("../inst/extdata/utla_2021-05-19.csv") %>%
  # standardise naming
  janitor::clean_names()

# define areas of particular interest
areas_of_interest = c("Bedford", "Bolton", "Sefton", "Blackburn with Darwen")


# **************************************************************************
# process data for plotting
# **************************************************************************
areas_focus <- cases %>%

  # focus down on areas and timeframe of interest
  filter(area_name %in% areas_of_interest,
         date %within% interval(ymd("2021-04-01"), ymd("2021-05-14"))) %>% 

  # remove variables which won't be used
  select(-area_code, -area_type) %>%

  # calculate seven day moving average number of new cases
  group_by(area_name) %>%
  arrange(date) %>% 
  mutate(seven_day_mean = slide_dbl(new_cases_by_specimen_date_rolling_rate, 
                                    ~mean(.x), .before = 3, .after = 3)) %>% 

  # classify each day for each area of interest according to growth trend
  mutate(rate = (seven_day_mean - lag(seven_day_mean)) / lag(seven_day_mean),
         trend = case_when(
           rate <= 0 ~ "declining",
           rate >= lag(rate) ~ "accelerating",
           rate >= 0 ~ "stabilising",
           TRUE ~ "NA"
         ))

# **************************************************************************
# producing the plot
# **************************************************************************

p1 <- ggplot(data = areas_focus, mapping = aes(date, seven_day_mean,
                                         fill = trend, colour = trend)) +
  geom_col() +

  facet_wrap(~area_name, ncol = 4)

p1

A bubble chart

Also based on a chart produced by John Burn Murdoch at the Financial Times

# **************************************************************************
# set up for plotting
# **************************************************************************

# read in additional data on prevalance of covid-19 variants in upper tier
# local authorities
variants <- read_tsv("../inst/extdata/lineages_by_ltla_and_week_2021_05_17.tsv") %>% 
  janitor::clean_names()


# **************************************************************************
# process data for plotting
# **************************************************************************

# process variant data
latest_variants <- variants %>% 

  # focused down on the latest weeks data and
  # only include local authorities where more 20 test have been sequenced
  # to identify the variants
  filter(week_end_date == "2021-05-08",
         count >= 20) %>% 

  # transform data to create one variable per variant
  pivot_wider(names_from = lineage, values_from = count,
              values_fill = 0) %>%

  # calculate the perecentage of tests showing B.1.617.2 in each local authority
  mutate(count = `B.1.1.7` + `B.1.617.2`) %>% 
  mutate(perc_var_concern = `B.1.617.2` / count) %>% 

  # drop variables that are not needed for plotting
  select(ltla, perc_var_concern)

# process case data 
weekly_change_df <- cases %>%

  # add in variant data
  inner_join(latest_variants, by = c("area_code" = "ltla")) %>% 

  # remove unnessary variables
  select(-area_code, -area_type) %>%

  # focus on a specific week window of interest
  filter(date == ymd("2021-05-12") | 
           date == ymd("2021-05-05")) %>% 

  # rename to simplify variable name
  rename(rolling_rate = new_cases_by_specimen_date_rolling_rate) %>%

  # calculate percentage change in cases over the week
  pivot_wider(names_from = date, values_from = rolling_rate) %>% 
  mutate(weekly_change = `2021-05-12` / `2021-05-05`)


# **************************************************************************
# produce the plot
# **************************************************************************

p2 <- ggplot(weekly_change_df, aes(perc_var_concern, weekly_change)) +
  geom_point(aes(size = `2021-05-12`)) +
  ggrepel::geom_label_repel(aes(label = area_name)) # add labels for interpretation

p2 

An area chart

Also based on a chart produced by John Burn Murdoch at the Financial Times

# **************************************************************************
# process data for plotting
# **************************************************************************

# create a lookup mapping area_code to area_name
# as variant date does not include area_name
la_lookup <- select(cases, area_code, area_name) %>% 
  distinct()

# focus down on subset of variant data
variants_simplified <- variants %>%

  # focus on recent data (after a specified data)
  filter(week_end_date >= ymd("2021-04-01")) %>%

  # add in area_names
  left_join(la_lookup, by = c("ltla" = "area_code")) %>% 

  # focus on areas of interest
  filter(area_name %in% areas_of_interest) %>% 

  # simplify variants to compare B.1.617.2 to other variants
  mutate(lineage = case_when(
    lineage == "B.1.617.2" ~ "B.1.617.2",
    TRUE ~ "Other")) %>% 
  group_by(area_name, week_end_date, lineage) %>% 
  summarise(count = sum(count)) %>% 
  ungroup()

# there are some weeks which missing either for "B.1.617.2" or "Other" observation
# for some local authorities, this causes problems (i.e. gaps) in the area plot
# so I needed to create rows for the missing observations with count of zero
# to do this I need to work out all combinations of area_name, week_end_date and
# lineage

# select simplified variant classifications
variant_names <- distinct(variants_simplified, lineage)$lineage

# select all week_end_date s in the data set
weeks <- variants_simplified %>% 
  group_by(week_end_date) %>% 
  summarise(uniqueid = n_distinct(week_end_date)) %>% 
  select(week_end_date)

# create a grid of all combinations of area_name, week_end_date and lineage
wks_line_comb <- expand.grid(area_name = areas_of_interest, 
                             week_end_date = weeks$week_end_date, 
                             lineage = variant_names)

# join the grid with the variant data so all rows needed for plotting are
# present 
variants_simp_comp <- variants_simplified %>% 
  full_join(wks_line_comb) %>% 
  mutate(count = replace_na(count, 0))

# enable the order of facets to be specified
variants_simp_comp <- variants_simp_comp %>% 
  mutate(area_name = factor(area_name,
                            levels = c("Bolton","Blackburn with Darwen", 
                                       "Sefton", "Bedford")))



# **************************************************************************
# produce the plot
# **************************************************************************

p3 <- ggplot(variants_simp_comp, aes(week_end_date, count,
                                fill = lineage,
                                colour = lineage)) +
  geom_area()

p3 +
  facet_wrap(~area_name)
sysfonts::font_add_google("Roboto", "robo")
sysfonts::font_add_google("Roboto Slab", "slab")
# , lineheight = .4


explanatory_theme <- function(){  

  list(
    ggplot2::theme_light(),

    ggplot2::theme(

    # format text
    text = ggplot2::element_text(family = "robo", size = 14),
    plot.title = ggplot2::element_text(hjust = 0.5, 
                                       family = "slab",
                                       size = 18),
    plot.subtitle = ggplot2::element_text(hjust = -0.05, 
                                          size = 14),
    plot.caption = ggplot2::element_text(size = 12, hjust = 0),

    # format legend
    legend.position = "top",
    legend.background = ggplot2::element_blank(),
    legend.title = ggplot2::element_blank(),

    # format axis
    axis.title.y = ggplot2::element_blank(),
    axis.line.y = ggplot2::element_blank(),
    axis.line.x = ggplot2::element_line(colour = "black", size = 1),
    axis.ticks.x = ggplot2::element_line(colour = "black", size = 1),
    axis.text.x = ggplot2::element_text(margin=ggplot2::margin(t = 7.5, b = 10)),

    # format plot gridlines
    panel.grid.minor = ggplot2::element_blank(),
    panel.grid.major.x = ggplot2::element_blank(),
    panel.grid.major.y = ggplot2::element_line(colour = grid_line_colour),

    # format plot background
    panel.background = ggplot2::element_blank(),

    # format overall background (i.e. background behind the entire figure
    # including the plot, titles etc.)
    plot.background = element_rect(fill = background_colour),

    # facet labels background
    strip.background = ggplot2::element_rect(fill=background_colour),
    strip.text = ggplot2::element_text(colour = text_colour, face = "bold",
                                       size = 14),
    panel.border = ggplot2::element_blank()
    )
  )
}  


binary_pallette <- ggthemes::fivethirtyeight_pal()(2)
binary_pallette_subtle <- c(nord::nord("lumina")[3], nord::nord("lumina")[2])

# Automatically use showtext to render text
showtext::showtext_auto()
showtext::showtext_opts(dpi = 300)

grid_line_colour <- "grey"
text_colour <- "black"
background_colour <- "grey98"


p3_formatted <- p3 +

  labs(title = "Holding title",
       subtitle = "Number of cases\nsequenced",
       x = NULL,
       caption = "Source:") +

  # remove gaps between the axis and the start of plot
  # see https://ggplot2.tidyverse.org/reference/expansion.html
  ggplot2::scale_x_date(expand = expansion(mult = c(0, 0))) +
  ggplot2::scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +

  scale_fill_manual(values = binary_pallette_subtle) +
  scale_colour_manual(values = binary_pallette_subtle) +


  lemon::facet_rep_wrap(~area_name, ncol = 1, repeat.tick.labels = "bottom") +
  explanatory_theme() +
  theme(legend.position = "none")

#+
  # patchwork::plot_annotation(
  #   caption = "Data Viz: @analytics_urban",
  #   theme = theme(
  #     text = ggplot2::element_text(family = "robo", size = 12),
  #     plot.caption = element_text(hjust = 0)
  #   ))



ggsave("test1.png", plot = p3_formatted, width = 7, height = 10, unit = "in", dpi = 300)
showtext::showtext_auto(FALSE)

magick::image_read("test1.png")
#showtext::showtext_end()

A line graph

Making use of data to hand to create a simple line graph.

# **************************************************************************
# process data for plotting
# **************************************************************************

all_areas <- cases %>% 

  # focus down on a date window of interest
  filter(date %within% interval(ymd("2021-04-01"), ymd("2021-05-14")))

# **************************************************************************
# produce the plot
# **************************************************************************
showtext::showtext.auto()
showtext::showtext_opts(dpi = 300)

p4 <- ggplot(all_areas, aes(date, new_cases_by_specimen_date_rolling_rate, group = area_name)) +
  geom_line()

p4_formatted <- p4 +
  explanatory_theme() +
  ggplot2::scale_x_date(expand = expansion(mult = c(0, 0))) +
  ggplot2::scale_y_continuous(expand = expansion(mult = c(0, 0.1))) 

finalise_plot <- function(f_name, plot, width = 7, height = 4.5){

  ggsave(f_name, plot = plot, 
         width = width, height = height, unit = "in", 
         dpi = 300)

  magick::image_read(f_name)
}

finalise_plot("p4.png", p4_formatted)



showtext::showtext.auto(FALSE)

A heatmap

Finally for now, a heatmap as shown on the UK Government Covid-19 data portal.

# **************************************************************************
# set up for plotting
# **************************************************************************

# read in additional covid-19 case data by age (for Bolton only)
bolton_cases_by_age <- read_csv("../inst/extdata/utla_E08000001_2021-05-20.csv") %>%
  # standardise naming
  janitor::clean_names()

# **************************************************************************
# process data for plotting
# **************************************************************************

bolton_cases_by_age <- bolton_cases_by_age %>% 

  # following the example plot aggregate rolling rates of 800+ into one group
  mutate(rolling_rate = if_else(rolling_rate >= 800, 800, rolling_rate)) %>% 

  # remove an NA age catergory
  filter(age != "unassigned")

# **************************************************************************
# produce the plot
# **************************************************************************

p5 <- ggplot(bolton_cases_by_age, aes(date, age,
                                fill = rolling_rate)) +
  geom_tile() +
  scale_fill_viridis_c(direction = -1)

p5

Plot anatomy

abcd

Dimensions

Title

Subtitle

Axis titles

Axis labels

Axis lines

Gridlines

Legend

Data

Sources and notes

Colour pallettes

A long list of colour palettes - https://github.com/EmilHvitfeldt/r-color-palettes

Binary - bold

scales::show_col(ggthemes::fivethirtyeight_pal()(2))

Binary - subtle

nord::nord_show_palette("lumina")

Sequential

Diverging

Categorical

Highlighting



tbk03/cjmr documentation built on Dec. 23, 2021, 8:40 a.m.