knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(cjmr) library(tidyverse)
setwd("C:/Users/chris/Desktop/Data Analysis Projects") devtools::install("cjmr")
Based on a chart produced by John Burn Murdoch at the Financial Times
# ************************************************************************** # set up for plotting # ************************************************************************** library(lubridate) # for date manipulation library(slider) # for calculating moving averages # read in covid-19 case data cases <- read_csv("../inst/extdata/utla_2021-05-19.csv") %>% # standardise naming janitor::clean_names() # define areas of particular interest areas_of_interest = c("Bedford", "Bolton", "Sefton", "Blackburn with Darwen") # ************************************************************************** # process data for plotting # ************************************************************************** areas_focus <- cases %>% # focus down on areas and timeframe of interest filter(area_name %in% areas_of_interest, date %within% interval(ymd("2021-04-01"), ymd("2021-05-14"))) %>% # remove variables which won't be used select(-area_code, -area_type) %>% # calculate seven day moving average number of new cases group_by(area_name) %>% arrange(date) %>% mutate(seven_day_mean = slide_dbl(new_cases_by_specimen_date_rolling_rate, ~mean(.x), .before = 3, .after = 3)) %>% # classify each day for each area of interest according to growth trend mutate(rate = (seven_day_mean - lag(seven_day_mean)) / lag(seven_day_mean), trend = case_when( rate <= 0 ~ "declining", rate >= lag(rate) ~ "accelerating", rate >= 0 ~ "stabilising", TRUE ~ "NA" )) # ************************************************************************** # producing the plot # ************************************************************************** p1 <- ggplot(data = areas_focus, mapping = aes(date, seven_day_mean, fill = trend, colour = trend)) + geom_col() + facet_wrap(~area_name, ncol = 4) p1
Also based on a chart produced by John Burn Murdoch at the Financial Times
# ************************************************************************** # set up for plotting # ************************************************************************** # read in additional data on prevalance of covid-19 variants in upper tier # local authorities variants <- read_tsv("../inst/extdata/lineages_by_ltla_and_week_2021_05_17.tsv") %>% janitor::clean_names() # ************************************************************************** # process data for plotting # ************************************************************************** # process variant data latest_variants <- variants %>% # focused down on the latest weeks data and # only include local authorities where more 20 test have been sequenced # to identify the variants filter(week_end_date == "2021-05-08", count >= 20) %>% # transform data to create one variable per variant pivot_wider(names_from = lineage, values_from = count, values_fill = 0) %>% # calculate the perecentage of tests showing B.1.617.2 in each local authority mutate(count = `B.1.1.7` + `B.1.617.2`) %>% mutate(perc_var_concern = `B.1.617.2` / count) %>% # drop variables that are not needed for plotting select(ltla, perc_var_concern) # process case data weekly_change_df <- cases %>% # add in variant data inner_join(latest_variants, by = c("area_code" = "ltla")) %>% # remove unnessary variables select(-area_code, -area_type) %>% # focus on a specific week window of interest filter(date == ymd("2021-05-12") | date == ymd("2021-05-05")) %>% # rename to simplify variable name rename(rolling_rate = new_cases_by_specimen_date_rolling_rate) %>% # calculate percentage change in cases over the week pivot_wider(names_from = date, values_from = rolling_rate) %>% mutate(weekly_change = `2021-05-12` / `2021-05-05`) # ************************************************************************** # produce the plot # ************************************************************************** p2 <- ggplot(weekly_change_df, aes(perc_var_concern, weekly_change)) + geom_point(aes(size = `2021-05-12`)) + ggrepel::geom_label_repel(aes(label = area_name)) # add labels for interpretation p2
Also based on a chart produced by John Burn Murdoch at the Financial Times
# ************************************************************************** # process data for plotting # ************************************************************************** # create a lookup mapping area_code to area_name # as variant date does not include area_name la_lookup <- select(cases, area_code, area_name) %>% distinct() # focus down on subset of variant data variants_simplified <- variants %>% # focus on recent data (after a specified data) filter(week_end_date >= ymd("2021-04-01")) %>% # add in area_names left_join(la_lookup, by = c("ltla" = "area_code")) %>% # focus on areas of interest filter(area_name %in% areas_of_interest) %>% # simplify variants to compare B.1.617.2 to other variants mutate(lineage = case_when( lineage == "B.1.617.2" ~ "B.1.617.2", TRUE ~ "Other")) %>% group_by(area_name, week_end_date, lineage) %>% summarise(count = sum(count)) %>% ungroup() # there are some weeks which missing either for "B.1.617.2" or "Other" observation # for some local authorities, this causes problems (i.e. gaps) in the area plot # so I needed to create rows for the missing observations with count of zero # to do this I need to work out all combinations of area_name, week_end_date and # lineage # select simplified variant classifications variant_names <- distinct(variants_simplified, lineage)$lineage # select all week_end_date s in the data set weeks <- variants_simplified %>% group_by(week_end_date) %>% summarise(uniqueid = n_distinct(week_end_date)) %>% select(week_end_date) # create a grid of all combinations of area_name, week_end_date and lineage wks_line_comb <- expand.grid(area_name = areas_of_interest, week_end_date = weeks$week_end_date, lineage = variant_names) # join the grid with the variant data so all rows needed for plotting are # present variants_simp_comp <- variants_simplified %>% full_join(wks_line_comb) %>% mutate(count = replace_na(count, 0)) # enable the order of facets to be specified variants_simp_comp <- variants_simp_comp %>% mutate(area_name = factor(area_name, levels = c("Bolton","Blackburn with Darwen", "Sefton", "Bedford"))) # ************************************************************************** # produce the plot # ************************************************************************** p3 <- ggplot(variants_simp_comp, aes(week_end_date, count, fill = lineage, colour = lineage)) + geom_area() p3 + facet_wrap(~area_name)
sysfonts::font_add_google("Roboto", "robo") sysfonts::font_add_google("Roboto Slab", "slab")
# , lineheight = .4 explanatory_theme <- function(){ list( ggplot2::theme_light(), ggplot2::theme( # format text text = ggplot2::element_text(family = "robo", size = 14), plot.title = ggplot2::element_text(hjust = 0.5, family = "slab", size = 18), plot.subtitle = ggplot2::element_text(hjust = -0.05, size = 14), plot.caption = ggplot2::element_text(size = 12, hjust = 0), # format legend legend.position = "top", legend.background = ggplot2::element_blank(), legend.title = ggplot2::element_blank(), # format axis axis.title.y = ggplot2::element_blank(), axis.line.y = ggplot2::element_blank(), axis.line.x = ggplot2::element_line(colour = "black", size = 1), axis.ticks.x = ggplot2::element_line(colour = "black", size = 1), axis.text.x = ggplot2::element_text(margin=ggplot2::margin(t = 7.5, b = 10)), # format plot gridlines panel.grid.minor = ggplot2::element_blank(), panel.grid.major.x = ggplot2::element_blank(), panel.grid.major.y = ggplot2::element_line(colour = grid_line_colour), # format plot background panel.background = ggplot2::element_blank(), # format overall background (i.e. background behind the entire figure # including the plot, titles etc.) plot.background = element_rect(fill = background_colour), # facet labels background strip.background = ggplot2::element_rect(fill=background_colour), strip.text = ggplot2::element_text(colour = text_colour, face = "bold", size = 14), panel.border = ggplot2::element_blank() ) ) } binary_pallette <- ggthemes::fivethirtyeight_pal()(2) binary_pallette_subtle <- c(nord::nord("lumina")[3], nord::nord("lumina")[2]) # Automatically use showtext to render text showtext::showtext_auto() showtext::showtext_opts(dpi = 300) grid_line_colour <- "grey" text_colour <- "black" background_colour <- "grey98" p3_formatted <- p3 + labs(title = "Holding title", subtitle = "Number of cases\nsequenced", x = NULL, caption = "Source:") + # remove gaps between the axis and the start of plot # see https://ggplot2.tidyverse.org/reference/expansion.html ggplot2::scale_x_date(expand = expansion(mult = c(0, 0))) + ggplot2::scale_y_continuous(expand = expansion(mult = c(0, 0.1))) + scale_fill_manual(values = binary_pallette_subtle) + scale_colour_manual(values = binary_pallette_subtle) + lemon::facet_rep_wrap(~area_name, ncol = 1, repeat.tick.labels = "bottom") + explanatory_theme() + theme(legend.position = "none") #+ # patchwork::plot_annotation( # caption = "Data Viz: @analytics_urban", # theme = theme( # text = ggplot2::element_text(family = "robo", size = 12), # plot.caption = element_text(hjust = 0) # )) ggsave("test1.png", plot = p3_formatted, width = 7, height = 10, unit = "in", dpi = 300) showtext::showtext_auto(FALSE) magick::image_read("test1.png") #showtext::showtext_end()
Making use of data to hand to create a simple line graph.
# ************************************************************************** # process data for plotting # ************************************************************************** all_areas <- cases %>% # focus down on a date window of interest filter(date %within% interval(ymd("2021-04-01"), ymd("2021-05-14"))) # ************************************************************************** # produce the plot # ************************************************************************** showtext::showtext.auto() showtext::showtext_opts(dpi = 300) p4 <- ggplot(all_areas, aes(date, new_cases_by_specimen_date_rolling_rate, group = area_name)) + geom_line() p4_formatted <- p4 + explanatory_theme() + ggplot2::scale_x_date(expand = expansion(mult = c(0, 0))) + ggplot2::scale_y_continuous(expand = expansion(mult = c(0, 0.1))) finalise_plot <- function(f_name, plot, width = 7, height = 4.5){ ggsave(f_name, plot = plot, width = width, height = height, unit = "in", dpi = 300) magick::image_read(f_name) } finalise_plot("p4.png", p4_formatted) showtext::showtext.auto(FALSE)
Finally for now, a heatmap as shown on the UK Government Covid-19 data portal.
# ************************************************************************** # set up for plotting # ************************************************************************** # read in additional covid-19 case data by age (for Bolton only) bolton_cases_by_age <- read_csv("../inst/extdata/utla_E08000001_2021-05-20.csv") %>% # standardise naming janitor::clean_names() # ************************************************************************** # process data for plotting # ************************************************************************** bolton_cases_by_age <- bolton_cases_by_age %>% # following the example plot aggregate rolling rates of 800+ into one group mutate(rolling_rate = if_else(rolling_rate >= 800, 800, rolling_rate)) %>% # remove an NA age catergory filter(age != "unassigned") # ************************************************************************** # produce the plot # ************************************************************************** p5 <- ggplot(bolton_cases_by_age, aes(date, age, fill = rolling_rate)) + geom_tile() + scale_fill_viridis_c(direction = -1) p5
abcd
Markers
Labels
Series
Sources and notes
A long list of colour palettes - https://github.com/EmilHvitfeldt/r-color-palettes
scales::show_col(ggthemes::fivethirtyeight_pal()(2))
nord::nord_show_palette("lumina")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.