factsheet.R
In stats19: Work with Open Road Traffic Casualty Data from Great Britain

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  eval = FALSE,
  collapse = TRUE,
  comment = "#>"
)

## ----setup, echo=FALSE, warning=FALSE, error=FALSE, message=FALSE-------------
# 
# # https://www.gov.uk/government/statistics/reported-road-casualties-great-britain-pedestrian-factsheet-2023/reported-road-casualties-in-great-britain-pedestrian-factsheet-2023
# 
# #library(stats19)
# library(stats19)
# library(sf)
# library(dplyr)
# library(lubridate)
# library(reshape2)
# library(ggplot2)
# library(knitr)
# library(readODS)
# library(gt)
# library(clock)
# library(stringr)
# library(tidyr)
# 
# # define plot dimensions
# knitr::opts_chunk$set(
#   out.width = "100%"  # scale relative to text width
# )
# 
#   # what casualty is the report for? options Pedestrian, Cyclist, escooters
#   report_casualty <- "Pedestrian"
# 
#   # stats19 usually updated in September, so if it is October last years data should be there
#   yr2calc <- 2024
# 
#   # request collision data (entering 2004 results in a table with all years)
#   crashes = get_stats19(year = "2004", type = "collision", ask = FALSE, format = TRUE, output_format = "data.frame") |>
#     filter(collision_year >= 2004)
# 
#   # import the adjusted casualty data ready to join to the original
#   #adj <- get_stats19_adjustments()
# 
# 
#   ## request casualty
#   casualties = get_stats19(year = "2004", type = "casualty", ask = FALSE, format = TRUE, output_format = "data.frame") |>
#     filter(collision_year >= 2004) |>
#     mutate(fatal_count = if_else(casualty_severity == "Fatal", 1, 0)) # add a column for fatal tally to enable same method to be used for serious and slight
# 
#   ## request vehicle
#   vehicles = get_stats19(year = "2004", type = "vehicle", ask = FALSE, format = TRUE, output_format = "data.frame") |>
#     filter(collision_year >= 2004)
# 
# 
# # get population data from https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates
#   uk_pop <- read.csv("https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/populationestimatestimeseriesdataset/current/pop.csv", skip = 7)
#   uk_pop <- uk_pop[,c(1,6)]
#   names(uk_pop) <- c("Year", "Population")
# 
# 
#   # get trip data NTS0303 https://www.gov.uk/government/statistical-data-sets/nts03-modal-comparisons
#   download.file("https://assets.publishing.service.gov.uk/media/66ce0f118e33f28aae7e1f75/nts0303.ods", destfile = "nts0303.ods", mode = "wb")
#   # dataset links to this sheet, but it is total distance and not dissagregated by travel mode
#   #download.file("https://assets.publishing.service.gov.uk/media/66ce0e818e33f28aae7e1f71/nts0101.ods", destfile = "nts0101.ods", mode = "wb")
# 
# 
#   # read in trip data
#   trips <- read_ods("nts0303.ods", sheet = "NTS0303c_miles", skip = 5) |>
#     left_join(uk_pop, by = "Year") |>
#     mutate(tot_dist_billion_miles = (`Walk [notes 2, 3]`*Population)/10^9)
# 
#   # speed up debugging by saving these key dfs locally
# #   save(casualties,crashes,vehicles, trips, file = "all_years.RData")
# 
#   #load("all_years.RData")
# 
#     # # most of the data is based on the last 5 years, speed up calcs by creating df for this
# #crashes$number_of_casualties <- as.numeric(crashes$number_of_casualties)
#   cra_L5Y <- filter(crashes, collision_year <= yr2calc & collision_year >= yr2calc-4)
#   cas_L5Y <- filter(casualties, collision_year <= yr2calc & collision_year >= yr2calc-4)
#   veh_L5Y <- filter(vehicles, collision_year <= yr2calc & collision_year >= yr2calc-4)
# #
# #
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # pick out data only for 2004
# fat_cas_2004 <- casualties |> filter(collision_year == "2004" & casualty_severity == "Fatal" & casualty_class == report_casualty)
# #casualties this year (TY)
# fat_cas_TY <- casualties |> filter(collision_year == yr2calc & casualty_severity == "Fatal" & casualty_class == report_casualty)
# 
# if(NROW(fat_cas_2004)>NROW(fat_cas_TY)){
#   ud <- "down"
#   fat_cas_diff <- round((1-NROW(fat_cas_TY)/NROW(fat_cas_2004))*100)
# } else {
#   ud <- "up"
#   fat_cas_diff <- round((1-NROW(fat_cas_2004)/NROW(fat_cas_TY))*100)
# }
# 
# ser_cas_2004 <- casualties |> filter(collision_year == "2004" & casualty_class == report_casualty)
# #casualties this year (TY)
# ser_cas_TY <- casualties |> filter(collision_year == yr2calc & casualty_class == report_casualty)
# 
# if(NROW(ser_cas_2004)>NROW(ser_cas_TY)){
#   id <- "decreased"
#   ser_cas_diff <-(((sum(ser_cas_2004$casualty_adjusted_severity_serious, na.rm = TRUE)-sum(ser_cas_TY$casualty_adjusted_severity_serious,na.rm = TRUE))/sum(ser_cas_2004$casualty_adjusted_severity_serious, na.rm = TRUE))*100)
# } else {
#   id <- "increased"
#   ser_cas_diff <- round((1-sum(ser_cas_2004$casualties,na.rm = TRUE)/sum(ser_cas_TY$casualties))*100)
# }
# 
# dist_walked_2004 <- filter(trips, Year == "2004")
# dist_walked_TY <- filter(trips, Year == yr2calc)
# 
# if(dist_walked_TY$tot_dist_billion_miles<dist_walked_2004$tot_dist_billion_miles){
#   ped_miles_id <- "decreased"
#   ped_traf <- (dist_walked_2004$tot_dist_billion_miles/dist_walked_TY$tot_dist_billion_miles)-1
# } else {
#   ped_miles_id <- "increased"
#   ped_traf <- (dist_walked_TY$tot_dist_billion_miles/dist_walked_2004$tot_dist_billion_miles)-1
# }
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# crash_cas <- inner_join(cas_L5Y,cra_L5Y) |>
#   filter(casualty_type == report_casualty)
# 
# # create column for weeks
# dths_per_wk_fat <- crash_cas |>
#   filter(casualty_severity == "Fatal" & casualty_class == report_casualty) |>  ## pick out casualty the stats will focus on
#   mutate(wk = isoweek(date),## determine the week number of each date
#          yr = year(date)) |>  ## add year so we can include all weeks over the 5 years
#   group_by(wk,yr) |>
#   summarise(casualties = sum(fatal_count))
# 
# # create column for weeks
# dths_per_wk_ser <- crash_cas %>%
#   mutate(wk = isoweek(date),# determine the week number of each date
#          yr = year(date)) %>% ## add year so all weeks over the 5 years are included
#   filter(casualty_class == report_casualty) |>  ## pick out casualty the stats will focus on
#   group_by(wk,yr) %>%
#   summarise(casualties = sum(casualty_adjusted_severity_serious,na.rm = TRUE))
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# cas_summary <- cas_L5Y |>
#   select(collision_index, casualty_class, fatal_count, casualty_adjusted_severity_serious, casualty_adjusted_severity_slight) |>
#   filter(casualty_class == report_casualty) |>
#   group_by(collision_index, casualty_class) |>
#    summarise(Fatal = sum(fatal_count),
#             Serious = sum(casualty_adjusted_severity_serious,na.rm = TRUE),
#             Slight = sum(casualty_adjusted_severity_slight,na.rm = TRUE)) |>
#   tidyr::pivot_wider(names_from = "casualty_class", values_from = c("Fatal","Serious","Slight"))
# 
# if(yr2calc > 2023){
# 
# junction_pc <- cas_summary |>
#   left_join(cra_L5Y) |>
#   group_by(junction_detail) |>
#   summarise(fatal_casualties = sum(Fatal_Pedestrian),
#             serious_casualties = sum(Serious_Pedestrian,na.rm = TRUE),
#             slight_casualties = sum(Slight_Pedestrian,na.rm = TRUE)) |>
#   rowwise() |>
#   mutate(All = sum(fatal_casualties, serious_casualties, slight_casualties)) |>
#   ungroup() |>
#   transmute(Junction = junction_detail,
#             Fatalities = fatal_casualties/sum(fatal_casualties)*100,
#          Serious = serious_casualties/sum(serious_casualties)*100,
#          Slight = slight_casualties/sum(slight_casualties)*100,
#          All = All/sum(All)*100) |>
#   mutate_if(is.numeric, round,1) |>
#   arrange(desc(All))
# } else {
#   junction_pc <- cas_summary |>
#   left_join(cra_L5Y) |>
#   group_by(junction_detail_historic) |>
#   summarise(fatal_casualties = sum(Fatal_Pedestrian),
#             serious_casualties = sum(Serious_Pedestrian,na.rm = TRUE),
#             slight_casualties = sum(Slight_Pedestrian,na.rm = TRUE)) |>
#   rowwise() |>
#   mutate(All = sum(fatal_casualties, serious_casualties, slight_casualties)) |>
#   ungroup() |>
#   transmute(Junction = junction_detail_historic,
#             Fatalities = fatal_casualties/sum(fatal_casualties)*100,
#          Serious = serious_casualties/sum(serious_casualties)*100,
#          Slight = slight_casualties/sum(slight_casualties)*100,
#          All = All/sum(All)*100) |>
#   mutate_if(is.numeric, round,1) |>
#   arrange(desc(All))
# }
# 
# ## stats for within 20m of junctions
# not_within_20 <- junction_pc %>%
#   filter(Junction == "Not at junction or within 20 metres")
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# 
# ## create some approximate groups
# vehicle_groups <- data.frame(summary_group = c("pedal cycle", "motorcycle", "motorcycle", "motorcycle", "motorcycle", "motorcycle",
#                                                "motorcycle", "car", "other vehicle", "other vehicle", "car", "bus or coach", "car", "light goods vehicle", "heavy goods vehicle", "heavy goods vehicle",
#                                                "other vehicle", "other vehicle", "other vehicle", "other vehicle", "other vehicle", "other vehicle"),
#                              vehicle_type =  c("Pedal cycle","Motorcycle - unknown cc", "Electric motorcycle" , "Motorcycle 125cc and under",
#                                                  "Motorcycle 50cc and under","Motorcycle over 125cc and up to 500cc", "Motorcycle over 500cc",
#                                                  "Car", "Agricultural vehicle", "Tram","Taxi/Private hire car",
#                                                  "Bus or coach (17 or more pass seats)", "Minibus (8 - 16 passenger seats)", "Van / Goods 3.5 tonnes mgw or under",
#                                                  "Goods over 3.5t. and under 7.5t", "Goods 7.5 tonnes mgw and over", "Other vehicle", "Unknown vehicle type (self rep only)",
#                                                "Goods vehicle - unknown weight", "Mobility scooter", "Ridden horse", "Data missing or out of range"))
# 
# 
# # join the casualty summary table with crashes and vehicles
# veh_cas_sum <- cas_summary |>
#   left_join(veh_L5Y, by = "collision_index") |> # join the vehicles to get data on type of vehicle
#   left_join(cra_L5Y, by = "collision_index") |> # join crashes as number of vehicles is included, quicker than calculating from veh table
#   select(collision_index, vehicle_type, number_of_vehicles, Fatal_Pedestrian, Serious_Pedestrian, Slight_Pedestrian) |>
#   left_join(vehicle_groups, by = "vehicle_type") |>
#   distinct(collision_index, .keep_all = TRUE) # there is a row for each vehicle in a collision, get rid of duplicates
# 
# 
# all_vehicles <- veh_cas_sum |>
#   group_by(number_of_vehicles, summary_group) |>
#    summarise(Fatal = round(sum(Fatal_Pedestrian)),
#             Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)),
#             Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |>
#   rowwise() |>
#   mutate(All = sum(Fatal,Serious,Slight)) |>
#   ungroup() |>
#   mutate(pc_fat = round(Fatal/sum(Fatal)*100,1)) |>
#   arrange(desc(Fatal))
# 
# single_car <- filter(all_vehicles, number_of_vehicles == 1 & summary_group == "car")
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# road_type <- cas_summary |>
#   left_join(cra_L5Y, by = "collision_index") |>
#   #select(collision_severity, casualty_type, datetime, first_road_class, urban_or_rural_area, number_of_casualties,collision_reference) %>%
#   mutate(first_road_class = case_when(first_road_class == "A" ~ "Other",first_road_class == "B" ~ "Other",first_road_class == "C" ~ "Other",
#                          first_road_class == "Unclassified" ~ "Other",first_road_class == "A(M)" ~ "Other",first_road_class == "Motorway" ~ "Motorway")) |>
#   group_by(first_road_class, urban_or_rural_area) %>%
#    summarise(Fatal = round(sum(Fatal_Pedestrian)),
#             Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)),
#             Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |>
#   filter(!urban_or_rural_area == "Unallocated")
# 
# m_way <- road_type %>%
#   filter(first_road_class == "Motorway") |>
#   select(road_class = first_road_class,
#          Fatal,
#          Serious,
#          Slight)
# 
# rural_urban <- road_type %>%
#   filter(first_road_class == "Other") |>
#   ungroup() |>
#   select(road_class = urban_or_rural_area,
#          Fatal,
#          Serious,
#          Slight)
# 
# 
# road_types <- rbind(m_way, rural_urban)|>
#   group_by(road_class) |>
#   summarise(Fatal = round(sum(Fatal)),
#             Serious = round(sum(Serious,na.rm = TRUE)),
#             Slight = round(sum(Slight,na.rm = TRUE))) |>
#   tidyr::pivot_longer(cols =  c("Fatal", "Serious", "Slight")) |>
#   group_by(road_class, name) %>%
#   summarise(value = sum(value)) |>
#   group_by(name) %>%
#   mutate(pc = value/sum(value)*100)
# 
# road_type_all <- rbind(m_way, rural_urban)|>
#   group_by(road_class) |>
#   summarise(Fatal = round(sum(Fatal)),
#             Serious = round(sum(Serious,na.rm = TRUE)),
#             Slight = round(sum(Slight,na.rm = TRUE))) |>
#   tidyr::pivot_longer(cols =  c("Fatal", "Serious", "Slight")) |>
#   group_by(road_class) %>%
#   summarise(value = sum(value)) %>%
#   mutate(pc = value/sum(value)*100) |>
#   mutate(name = "All casualties")
# 
# rural_all <-  filter(road_type_all, road_class == "Rural")
# 
# fatal_rural <- filter(road_types,road_class == "Rural" & name == "Fatal")
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# ## male female
# sex_casualty <- cas_L5Y %>%
#   filter(casualty_type == "Pedestrian") %>%
#   group_by(sex_of_casualty) %>%
#   summarise(fatal_casualties = sum(fatal_count),
#             serious_casualties = sum(casualty_adjusted_severity_serious,na.rm = TRUE),
#             slight_casualties = sum(casualty_adjusted_severity_slight, na.rm = TRUE))
# 
# serious_fatal_male <- sex_casualty %>%
#   rowwise() |>
#   #filter(sex_of_casualty == "Male") |>
#   mutate(KSI = sum(fatal_casualties, serious_casualties)) |>
#   ungroup() |>
#   mutate(pc_ksi = KSI/sum(KSI))
# 
# pc_fatal_serious_male <- filter(serious_fatal_male, sex_of_casualty == "Male")
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# ## only latest year (TY) in report
# fatal_TY <- filter(casualties, collision_year == yr2calc, casualty_type == report_casualty, casualty_severity == "Fatal")
# 
# serious_TY <- filter(casualties, collision_year == yr2calc, casualty_type == report_casualty)
# 
# slight_TY <- filter(casualties, collision_year == yr2calc, casualty_type == report_casualty)
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# trips <- select(trips, Year, Traffic = tot_dist_billion_miles)
# 
# table_1 <- casualties |>
#   filter(casualty_type == report_casualty & collision_year >= 2004 & collision_year <= yr2calc) |> # add in filter for calculating past years when later data is available
#   group_by(collision_year) |>
#   summarise(Fatal = sum(fatal_count),
#             Serious = sum(casualty_adjusted_severity_serious,na.rm = TRUE),
#             Slight = sum(casualty_adjusted_severity_slight,na.rm = TRUE)) |>
#   select(collision_year,Fatal, Serious, Slight) |>
#   #tidyr::pivot_wider(names_from = "casualty_severity", values_from = "casualties") |>
#   rowwise() |>
#   mutate(All = sum(c(Fatal, Serious, Slight))) |>
#   left_join(trips, by = c("collision_year" = "Year"))
# 
# dist_walked_2004 <- filter(trips, Year == 2004)
# dist_walked_TY <- filter(trips, Year == yr2calc)
# 
# if(dist_walked_2004$Traffic > dist_walked_TY$Traffic){
#   dw <- "decreased"
# } else {
#   dw <- "increased"
# }
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# ## change between this year (TY) and last year (LY)
# 
# diff_fatal <- (table_1$Fatal[NROW(table_1)]-table_1$Fatal[NROW(table_1)-1])/table_1$Fatal[NROW(table_1)-1]
# 
# diff_trips <- abs(table_1$Traffic[NROW(table_1)]-table_1$Traffic[NROW(table_1)-1])/table_1$Traffic[NROW(table_1)-1]
# 
# # pedestrian fatalities increased or decreased
# if(table_1$Fatal[NROW(table_1)]>table_1$Fatal[NROW(table_1)-1]){
#   pf <- "increased"
# } else {
#   pf <- "decreased"
# }
# 
# # pedestrian casualties all severities increased or decreased
# if(table_1$All[NROW(table_1)]>table_1$All[NROW(table_1)-1]){
#   pcr <- "increased"
# } else {
#   pcr <- "fallen"
# }
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# bm_vals <- table_1 %>% filter(collision_year == 2004) %>% select(collision_year,Fatal, Serious, Slight, Traffic)
# 
# rates <- table_1 %>%
#   select(collision_year,Fatal, Serious,Slight,Traffic) |>
#   mutate(Fatal = Fatal/bm_vals$Fatal*100,
#          Serious = Serious/bm_vals$Serious*100,
#          Slight = Slight/bm_vals$Slight*100,
#          Traffic = Traffic/bm_vals$Traffic*100)
# 
# chart_1 <- rates |> melt("collision_year")
# 
# cols <- rev(c("#001a70", "#ff7733", "#1de9b6","#006853"))
# cust_theme <- theme(panel.grid.major = element_line(size = 2))
# # put the elements in a list
# dft_theme <- list(cust_theme, scale_color_manual(values = cols))
# 
# chart_1 %>%
#   ggplot(aes(collision_year, value, color = variable)) +
#   geom_line(size = 2, alpha = .8) +
#   dft_theme+
#   theme(panel.background = element_blank(),
#         legend.position = "top",
#     legend.title = element_blank()) +
#   scale_x_continuous(expand = c(0, 0)) +
#   geom_hline(yintercept=100, linetype='dotted', col = 'black')+
#   ggtitle(paste0("Chart 1: Index of casualties by severity, GB: 2004 to ", yr2calc," (Index 2004=100)")) +
#   scale_x_continuous(name = NULL,
#     breaks = seq(2004, 2023, by = 2)  # Add more tick marks
#   ) +
#   labs(caption = "Source: Stats19")+
#   theme(panel.border = element_blank())
# 
# 
# # round all the values for the table print
# table_1_out <- table_1  |>
#    mutate(Fatal = round(Fatal),
#           Serious = round(Serious),
#           Slight = round(Slight),
#           All = round(All),
#           Traffic = round(Traffic,2))
# 
# 
# gt(table_1_out,auto_align = TRUE) |>
#   cols_width(collision_year ~px(60)) |>
#   cols_label(collision_year = md("**Year**"),
#              Fatal = md("**Killed**"),
#              Serious = md("**Serious**"),
#              Slight = md("**Slight**"),
#              All = md("**All**"),
#              Traffic = md("**Traffic**")) |>
#   tab_footnote(md("**Source: DfT STATS19, National Travel Survey and Office for National\nStatistics population data**")) |>
#   tab_header(
#     title = md(paste0("**Table 1: Number of reported pedestrian casualties by severity and traffic\n(pedestrian billion miles walked), GB: 2004 to ", yr2calc,"**"))) |>
#   tab_options(heading.align = "left",
#               column_labels.border.top.style = "none",
#               table.border.top.style = "none",
#               column_labels.border.bottom.style = "none",
#               column_labels.border.bottom.width = 1,
#               column_labels.border.bottom.color = "black",
#               table_body.border.top.style = "none",
#               table_body.border.bottom.color = "white",
#               heading.border.bottom.style = "none",
#               table.border.bottom.style = "none",) |>
#   tab_style(
#     style = cell_text(weight = "bold"),
#     locations = list(
#       cells_column_labels(columns = c(collision_year)),
#       cells_body(columns = c(collision_year))
#     )) |>
#   tab_style(
#     style = cell_fill(color = "white"),
#     locations = cells_body(columns = everything())
#   )
# 
# table_2 <- table_1 |>
#   transmute(collision_year,
#             Fatal = round(Fatal/Traffic),
#          Serious = round(Serious/Traffic),
#          Slight = round(Slight/Traffic),
#          All = round(All/Traffic))
# 
# bm_vals_2 <- table_2 %>% filter(collision_year == 2004) %>% select(collision_year,Fatal, Serious, Slight,All)
# 
# rates_2 <- table_2 %>%
#   mutate(Fatal = Fatal/bm_vals_2$Fatal*100,
#          Serious = Serious/bm_vals_2$Serious*100,
#          Slight = Slight/bm_vals_2$Slight*100,
#          All = All/bm_vals_2$All*100)
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # compare the last year to 2004 for all stats
# diff_all_2004 <- (1-(rates_2$All[NROW(rates_2)]/rates_2$All[1]))*100
# diff_fat_2004 <- (1-(rates_2$Fatal[NROW(rates_2)]/rates_2$Fatal[1]))*100
# diff_sev_2004 <- (1-(rates_2$Serious[NROW(rates_2)]/rates_2$Serious[1]))*100
# diff_sli_2004 <- (1-(rates_2$Slight[NROW(rates_2)]/rates_2$Slight[1]))*100
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# 
# 
# # pick out traffic
# chart_2 <- rates_2 |>
#   melt("collision_year") |>
#   filter(!variable == "All")
# 
# # define the colour palette
# cols <- rev(c("#ff7733", "#1de9b6","#006853"))
# cust_theme <- theme(panel.grid.major = element_line(size = 2))
# # put the elements in a list
# dft_theme <- list(cust_theme, scale_color_manual(values = cols))
# 
# 
# 
# chart_2 %>%
#   ggplot(aes(collision_year, value, color = variable)) +
#   geom_line(size = 2, alpha = .8) +
#   dft_theme+
#   theme(panel.background = element_blank(),
#         legend.position = "top",
#     legend.title = element_blank()) +
#   scale_x_continuous(expand = c(0, 0)) +
#   geom_hline(yintercept=100, linetype='dotted', col = 'black')+
#   ggtitle(paste0("Chart 2: Index of casualties by severity, GB: 2004 to ", yr2calc," (Index 2004=100)")) +
#   scale_x_continuous(name = NULL,
#     breaks = seq(2004, 2023, by = 2)  # Add more tick marks
#   ) +
#   labs(caption = "Source: Stats19")
# 
# 
# 
# gt(table_2,auto_align = FALSE) |>
#   cols_label(collision_year = md("**Year**"),
#              Fatal = md("**Killed**"),
#              Serious = md("**Serious**"),
#              Slight = md("**Slight**"),
#              All = md("**All**")) |>
#   tab_header(
#     title = md(paste0("**Table 2: Casualty rates of pedestrian casualties by severity per billion miles walked, GB: 2004 to ",yr2calc,"**"))) |>
#   tab_options(heading.align = "left",
#               column_labels.border.top.style = "none",
#               table.border.top.style = "none",
#               column_labels.border.bottom.style = "none",
#               column_labels.border.bottom.width = 1,
#               column_labels.border.bottom.color = "#334422",
#               table_body.border.top.style = "none",
#               table_body.border.bottom.color = "white",
#               heading.border.bottom.style = "none",
#               table.border.bottom.style = "none") |>
#   tab_style(
#     style = cell_text(weight = "bold"),
#     locations = list(
#       cells_column_labels(columns = c(collision_year)),
#       cells_body(columns = c(collision_year))
#     ))
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # by sex and age
# # use raw casualty df as there are multiple rows per collision. Bin the ages into custom bins that match the document
# sac_all <- cas_L5Y %>%
#   filter(casualty_type == report_casualty) |>
#   mutate(age_band = cut(as.numeric(age_of_casualty), breaks=c(0,11,15,19,24,29,39,49,59,69,100),labels=c("0-11","12-15","16-19","20-24","25-29","30-39","40-49","50-59","60-69","70+"))) |>
#   group_by(sex_of_casualty, age_band) %>%
#   summarise(Fatal = sum(fatal_count),
#             Serious = sum(casualty_adjusted_severity_serious,na.rm = TRUE),
#             Slight = sum(casualty_adjusted_severity_slight,na.rm = TRUE)) |>
#     filter(!is.na(age_band)) |>
#   rowwise() |>
#   mutate(All = sum(Fatal,Serious))
# 
# mf <- sac_all |> group_by(sex_of_casualty) |> summarise(all = sum(All)) |> ungroup() |> mutate(pc = all/sum(all))
# 
# male_tot <- filter(mf, sex_of_casualty == "Male")
# female_tot <- filter(mf, sex_of_casualty == "Female")
# 
# male_times <- male_tot$all/female_tot$all
# 
# # age band 1
# ab1 <- "30-39"
# 
# # male female casualties for this age band
# sac_ab1 <- sac_all |>  filter(age_band == ab1)
# 
# 
# 
# ab2 <- "0-11"
# 
# # male female casualties for this age band
# sac_ab2 <- sac_all |>  filter(age_band == ab2)
# 
# ab3 <- "70+"
# 
# # male female casualties for this age band
# sac_ab3 <- sac_all |>  filter(age_band == ab3)
# 
# # add pc_ksi for only Male and Female
# sac_all <- sac_all |> ungroup() |> mutate(pc_ksi = (All/sum(All))*100) |> filter(sex_of_casualty %in% c("Male", "Female"))
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # Define colours and theme
# cols <- rev(c("#1de9b6", "#006853"))
# cust_theme <- theme(panel.grid.major = element_line(size = 2))
# dft_theme <- list(cust_theme, scale_fill_manual(values = cols))  # use fill, not color
# 
# ggplot(sac_all, aes(x = age_band, y = pc_ksi, fill = sex_of_casualty)) +
#   geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.7) +
#   geom_text(
#     aes(label = paste0(round(pc_ksi),"%")),  # Round values to 1 decimal place
#     position = position_dodge(width = 0.7),
#     vjust = -0.5,
#     size = 3
#   ) +
#   ggtitle(paste0("Chart 3: Percentage of ", tolower(report_casualty), " KSI casualties, by sex and age, GB: ", yr2calc-4, " to ", yr2calc)) +
#   dft_theme +
#   theme(
#     panel.background = element_blank(),
#     legend.position = "top",
#     legend.title = element_blank()
#   ) +
#   ylab(NULL)+
#   xlab(NULL)+
#   labs(caption = "Source: Stats19")
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# 
# # split up different vehicle numbers into individual dfs and join later for the table
# single_vehicles <- veh_cas_sum |>
#   filter(number_of_vehicles == 1) |>
#   group_by(summary_group) |>
#    summarise(Fatal = round(sum(Fatal_Pedestrian)),
#             Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)),
#             Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |>
#   rowwise() |>
#   mutate(All = sum(Fatal,Serious,Slight)) |>
#   ungroup() |>
#   mutate(pc_fat = round(Fatal/All*100,1)) |>
#   mutate(summary_group = factor(summary_group, levels = c("pedal cycle","motorcycle",  "car","bus or coach","light goods vehicle",  "heavy goods vehicle", "other vehicle"))) |>
#   arrange(summary_group) |>
#   mutate(summary_group = paste(1,summary_group))
# 
# 
# two_vehicles <- veh_cas_sum |>
#   filter(number_of_vehicles == 2) |>
#   mutate(summary_group = "2 vehicles involved") |>
#   group_by(summary_group) |>
#    summarise(Fatal = round(sum(Fatal_Pedestrian)),
#             Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)),
#             Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |>
#   rowwise() |>
#   mutate(All = sum(Fatal,Serious,Slight)) |>
#   ungroup() |>
#   mutate(pc_fat = round(Fatal/All*100,1))
# 
# GT_two_vehicles <- veh_cas_sum |>
#   filter(number_of_vehicles > 2) |>
#   mutate(summary_group = "3 or more other vehicles involved") |>
#   group_by(summary_group) |>
#    summarise(Fatal = round(sum(Fatal_Pedestrian)),
#             Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)),
#             Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |>
#   rowwise() |>
#   mutate(All = round(sum(Fatal,Serious,Slight))) |>
#   ungroup() |>
#   mutate(pc_fat = round(Fatal/All*100,1))
# 
# # sort by percentage fatal for the text
# most_fat <- rbind(single_vehicles,two_vehicles,GT_two_vehicles) |>
#   arrange(desc(pc_fat))
# 
# # create a totals row
# totals <- rbind(single_vehicles,two_vehicles,GT_two_vehicles) |>
#   summarise(across(where(is.numeric), sum), group = "Total") |>
#    mutate(pc_fat = round(Fatal/All*100,1),
#           summary_group = group) |>
#   select(-group)
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # bind them all together for table 3
# table_3 <- rbind(single_vehicles,two_vehicles,GT_two_vehicles, totals)
# 
# # create table 3
# gt(table_3,auto_align = FALSE) |>
#   cols_label(summary_group = md("**Vehicles**"),
#              Fatal = md("**Fatalities**"),
#              Serious = md("**Serious injuries**"),
#              Slight = md("**Slight injuries**"),
#              All = md("**All casualties**"),
#              pc_fat = md("**% Fatalities**")) |>
#   tab_header(
#     title = md(paste0("**Table 3: Pedestrian casualties in reported road collisions by severity showing other vehicles involved GB: ", yr2calc-4, " to ",yr2calc,"**"))) |>
#   tab_options(heading.align = "left",
#               column_labels.border.top.style = "none",
#               table.border.top.style = "none",
#               column_labels.border.bottom.style = "none",
#               column_labels.border.bottom.width = 1,
#               column_labels.border.bottom.color = "#334422",
#               table_body.border.top.style = "none",
#               table_body.border.bottom.color = "white",
#               heading.border.bottom.style = "none",
#               table.border.bottom.style = "none") |>
#   tab_style(
#     style = cell_text(weight = "bold"),
#     locations = list(
#       cells_column_labels(columns = c(summary_group)),
#       cells_body(columns = c(summary_group))
#     ))
# 
# 
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# ## create a table of severity by year
# # used clock as much faster than lubridate
# crash_time <- cas_summary |>
#   left_join(cra_L5Y, by = "collision_index") |> # join crashes as number of vehicles is included, quicker than calculating from veh table
#   select(datetime, Fatal_Pedestrian, Serious_Pedestrian) |>
#   mutate(#collision_hr = lubridate::hour(datetime),
#          dow = clock::date_weekday_factor(datetime, abbreviate = FALSE),
#         collision_hr = get_hour(datetime),
#         KSI = sum(Fatal_Pedestrian, Serious_Pedestrian)) |>
#   mutate(dow = case_when(dow == "Monday" ~  "Monday to Friday",
#          dow == "Tuesday" ~  "Monday to Friday",
#          dow == "Wednesday" ~  "Monday to Friday",
#          dow == "Thursday" ~  "Monday to Friday",
#          dow == "Friday" ~  "Monday to Friday",
#          dow == "Saturday" ~  "Saturday",
#          dow == "Sunday" ~  "Sunday")) |>
#   #mutate(dow = case_when(dow > 1 & dow < 7 ~  "Monday to Friday", dow == 7 ~ "Saturday", dow == 1 ~ "Sunday")) |>
#   group_by(collision_hr, dow) |>
#     summarise(KSI = sum(KSI)) |>
#   mutate(KSI = if_else(dow == "Monday to Friday", KSI/5, KSI))
# 
# MF_peak <- crash_time |> filter(dow == "Monday to Friday") |> arrange(desc(KSI)) |> mutate(hr = str_sub(gsub(" ","", tolower(format(strptime(collision_hr, format = "%H"), "%I %p"))),2))
# 
# SS_peak <- crash_time |> filter(dow %in% c("Saturday", "Sunday")) |> group_by(collision_hr) |> summarise(KSI = sum(KSI)) |> arrange(desc(KSI)) |> mutate(hr = str_sub(gsub(" ","", tolower(format(strptime(collision_hr, format = "%H"), "%I %p"))),2))
# 
# # define the colour palette
# cols <- rev(c("#ff7733", "#1de9b6","#006853"))
# cust_theme <- theme(panel.grid.major = element_line(size = 2))
# # put the elements in a list
# dft_theme <- list(cust_theme, scale_color_manual(values = cols))
# 
# crash_time %>%
#   ggplot(aes(collision_hr, KSI, color = dow)) +
#   geom_line(size = 2, alpha = .8) +
#   dft_theme+
#   theme(panel.background = element_blank(),
#         legend.position = "top", legend.title = element_blank()) +
#   scale_x_continuous(expand = c(0, 0)) +
#   ggtitle(paste0("Chart 4: Reported ", tolower(report_casualty), " KSIs by hour of day and day of week, GB: ", yr2calc-4, " to ", yr2calc)) +
#    ylab(NULL)+
#   labs(x = "Hour starting", caption = "Source: Stats19")
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # see section 1 for original table
# 
# road_type_bar <- rbind(road_types, road_type_all) |>
#   filter(!road_class == "Data missing or out of range")
# 
# fatal_urban <- filter(road_type_bar, name == "Fatal" & road_class == "Urban")
# 
# all_cas <- filter(road_type_bar, name == "All casualties" & road_class == "Urban")
# 
# fatal_mway <- filter(road_type_bar, name == "Fatal" & road_class == "Motorway")
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # define the colour palette
# cols <- rev(c("#ff7733", "#1de9b6","#006853"))
# cust_theme <- theme(panel.grid.major = element_line(size = 2))
# dft_theme <- list(cust_theme, scale_fill_manual(values = cols))  # use fill, not co
# 
# # Grouped bar
# ggplot(road_type_bar, aes(fill=road_class, y=pc, x=name, label = paste0(round(pc),"%"))) +
#   geom_bar(position="dodge", stat="identity") +
#   dft_theme +
#   theme(
#     panel.background = element_blank(),
#     legend.position = "top",
#     legend.title = element_blank()
#   ) +
#   theme(panel.background = element_blank()) +
#   geom_text(position = position_dodge2(width = 0.9, preserve = "single"), angle = 0, vjust=-0.5, hjust=0.5) +
#   xlab(NULL)+
#   ylab(NULL)+
#   ggtitle(paste0("Chart 5: Percentage of pedestrian casualties, by urban or rural classification and severity, GB: ", yr2calc-4, " to ", yr2calc)) +
#   labs(caption = "Source: Stats19")
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # see section one
# 
# highest_fat <- junction_pc |> arrange(desc(Fatalities))
# highest_sev <- junction_pc |> arrange(desc(Serious))
# 
# if(yr2calc > 2023){
# 
# junc <- junction_pc |> filter(Junction %in% c("T or staggered junction", "Crossroads", "More than 4 arms (not roundabout)"))
# 
# } else {
# 
#   junc <- junction_pc |> filter(Junction %in% c("T or staggered junction", "Other junction", "Crossroads", "More than 4 arms (not roundabout)"))
# 
# roundabouts <- junction_pc |> filter(Junction %in% c("roundabout", "Mini-roundabout"))
# 
# }
# 
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# table_4 <- junction_pc |>
#   arrange(desc(All)) |>
#   mutate_if(is.numeric, round,1) |> # round all values to 1 dp
#   mutate_if(is.numeric, ~ paste0(.x, "%")) # add a % sign after each value
# 
# # create table 3
# gt(table_4,auto_align = FALSE) |>
#   cols_label(Junction = md("**Junction**"),
#              Fatalities = md("**Fatalities**"),
#              Serious = md("**Serious**"),
#              Slight = md("**Slight**"),
#              All = md("**All casualties**")) |>
#   tab_header(
#     title = md(paste0("**Table 4: Percentage of pedestrian KSI casualties by severity and junction detail where the collision occurred, GB: ", yr2calc-4, " to ",yr2calc,"**"))) |>
#   tab_options(heading.align = "left",
#               column_labels.border.top.style = "none",
#               table.border.top.style = "none",
#               column_labels.border.bottom.style = "none",
#               column_labels.border.bottom.width = 1,
#               column_labels.border.bottom.color = "#334422",
#               table_body.border.top.style = "none",
#               table_body.border.bottom.color = "white",
#               heading.border.bottom.style = "none",
#               table.border.bottom.style = "none") |>
#   tab_style(
#     style = cell_text(weight = "bold"),
#     locations = list(
#       cells_column_labels(columns = c(Junction)),
#       cells_body(columns = c(Junction))
#     ))
# 
#
Any scripts or data that you put into this service are public.
stats19 documentation built on March 18, 2026, 5:08 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
stats19
Work with Open Road Traffic Casualty Data from Great Britain

inst/doc/factsheet.R
In stats19: Work with Open Road Traffic Casualty Data from Great Britain

Try the stats19 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stats19 Work with Open Road Traffic Casualty Data from Great Britain

inst/doc/factsheet.R In stats19: Work with Open Road Traffic Casualty Data from Great Britain

Try the stats19 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stats19
Work with Open Road Traffic Casualty Data from Great Britain

inst/doc/factsheet.R
In stats19: Work with Open Road Traffic Casualty Data from Great Britain