inst/doc/stats19.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  out.width = "100%",
  eval = curl::has_internet()
)

## ----eval=FALSE---------------------------------------------------------------
#  # from CRAN
#  install.packages("stats19")
#  # you can install the latest development (discoraged) using:
#  remotes::install_github("ITSLeeds/stats19")

## -----------------------------------------------------------------------------
library(stats19)

## ----dl2022-accidents---------------------------------------------------------
dl_stats19(year = 2022, type = "collision", ask = FALSE)

## -----------------------------------------------------------------------------
stats19::file_names$DigitalBreathTestData2013.zip

## ----dl2022-all, eval=FALSE---------------------------------------------------
#  dl_stats19(year = 2022)

## ----dl2022-read--------------------------------------------------------------
crashes_2022_raw = get_stats19(year = 2022, type = "collision", format = FALSE)

## ----echo=FALSE---------------------------------------------------------------
# skip vignettes if resource unavailable
if(object.size(crashes_2022_raw) < 1000) {
  knitr::opts_chunk$set(eval = FALSE)
}

## ----crashes2022-class--------------------------------------------------------
class(crashes_2022_raw)
dim(crashes_2022_raw)

## ----read2022-raw-format------------------------------------------------------
crashes_2022_raw = read_collisions(year = 2022, format = FALSE)
crashes_2022 = format_collisions(crashes_2022_raw)
nrow(crashes_2022_raw)
ncol(crashes_2022_raw)
nrow(crashes_2022)
ncol(crashes_2022)

## ----crashes2022-columns------------------------------------------------------
crashes_2022_raw[c(7, 18, 23, 25)]
crashes_2022[c(7, 18, 23, 25)]

## ----echo=FALSE, eval=FALSE---------------------------------------------------
#  # commented out as confusing...
#  key_patt = "severity|speed|light|human"
#  key_vars = grep(key_patt, x = names(crashes_2022_raw), ignore.case = TRUE)
#  random_n = sample(x = nrow(crashes_2022_raw), size = 3)
#  crashes_2022_raw[random_n, key_vars]
#  crashes_2022[random_n, key_vars]

## ----variables-and-schema-----------------------------------------------------
stats19_variables
stats19_schema

## ----format-col-names---------------------------------------------------------
format_column_names(stats19_variables$variable[1:3])

## ----format-main--------------------------------------------------------------
crashes_2022 = format_collisions(crashes_2022_raw)

# vehicle data for 2022
dl_stats19(year = 2022, type = "vehicle", ask = FALSE)
vehicles_2022_raw = read_vehicles(year = 2022)
vehicles_2022 = format_vehicles(vehicles_2022_raw)

# casualties data for 2022
dl_stats19(year = 2022, type = "casualty", ask = FALSE)
casualties_2022 = read_casualties(year = 2022)

## ----summarise-stats19--------------------------------------------------------
summarise_stats19 = function(x) {
  data.frame(row.names = 1:length(x),
    name = substr(names(x), 1, 19),
    class = sapply(x, function(v) class(v)[1]),
    n_unique = sapply(x, function(v) length(unique(v))),
    first_label = sapply(x, function(v) substr(unique(v)[1], 1, 16)),
    most_common_value = sapply(x, function(v) 
      substr(names(sort(table(v), decreasing = TRUE)[1]), 1, 16)[1])
  )
}

## ----summarise-crashes--------------------------------------------------------
knitr::kable(summarise_stats19(crashes_2022), 
             caption = "Summary of formatted crash data.")

## ----summarise-vehicles-------------------------------------------------------
knitr::kable(summarise_stats19(vehicles_2022), 
             caption = "Summary of formatted vehicles data.")

## ----summarise-casualties-----------------------------------------------------
knitr::kable(summarise_stats19(casualties_2022), 
             caption = "Summary of formatted casualty data.")

## ----echo=FALSE, results='asis'-----------------------------------------------
key_patt = "severity|speed|light|human"
key_vars = grep(key_patt, x = names(stats19::accidents_sample_raw), ignore.case = TRUE)
knitr::kable(stats19::accidents_sample_raw[, key_vars])

## ----2022-cas-----------------------------------------------------------------
dl_stats19(year = 2022, type = "casualty", ask = FALSE)
casualties_2022 = read_casualties(year = 2022)
nrow(casualties_2022)
ncol(casualties_2022)

## ----2022-cas-columns---------------------------------------------------------
casualties_2022[c(4, 5, 6, 14)]

## ----2022-cas-columns-all-----------------------------------------------------
names(casualties_2022)

## ----dl2022-vehicles----------------------------------------------------------
dl_stats19(year = 2022, type = "vehicle", ask = FALSE)
vehicles_2022 = read_vehicles(year = 2022)
nrow(vehicles_2022)
ncol(vehicles_2022)

## ----2022-veh-columns---------------------------------------------------------
vehicles_2022[c(3, 14:16)]

## ----2022-veh-columns-all-----------------------------------------------------
names(vehicles_2022)

## ----eval=FALSE, echo=FALSE---------------------------------------------------
#  # old code to be up-dated
#  d14 = "Stats19_Data_2005-2014"
#  crashes_2005_2014 = read_collisions(data_dir = d14)
#  crashes_2005_2014_f = format_stats19_2005_2014_ac(crashes_2005_2014)
#  d15 = "RoadSafetyData_2015"
#  crashes_2015 = read_collisions(data_dir = d15, filename = "Accidents_2015.csv")
#  crashes_2015_f = format_stats19_2015_ac(crashes_2015)
#  d16 = "dftRoadSafety_Accidents_2016"
#  crashes_2016 = read_collisions(data_dir = d16, filename = "dftRoadSafety_Accidents_2016.csv")
#  crashes_2016_f = format_stats19_2016_ac(crashes_2016)
#  all_crashes = rbind(crashes_2015_f, crashes_2016_f, crashes_2022_f)
#  table(ac$Accident_Severity)

## ----format-crashes-sf--------------------------------------------------------
crashes_sf = format_sf(crashes_2022)

## ----nfatalities--------------------------------------------------------------
library(sf)
library(dplyr)
crashes_sf %>% 
  filter(accident_severity == "Fatal") %>% 
  select(n_fatalities = accident_index) %>% 
  aggregate(by = police_boundaries, FUN = length) %>% 
  plot()

## ----ukboundaries-------------------------------------------------------------
west_yorkshire =
  police_boundaries[police_boundaries$pfa16nm == "West Yorkshire", ]

## ----crashes-west_yorkshire---------------------------------------------------
crashes_wy = crashes_sf[west_yorkshire, ]
nrow(crashes_sf)
nrow(crashes_wy)

## ----table-join, message = FALSE----------------------------------------------
library(tidyr)
library(dplyr)
sel = casualties_2022$accident_index %in% crashes_wy$accident_index
casualties_wy = casualties_2022[sel, ]
cas_types = casualties_wy %>% 
  select(accident_index, casualty_type) %>% 
  group_by(accident_index) %>% 
  summarise(
    Total = n(),
    walking = sum(casualty_type == "Pedestrian"),
    cycling = sum(casualty_type == "Cyclist"),
    passenger = sum(casualty_type == "Car occupant")
    ) 
cj = left_join(crashes_wy, cas_types)

## ----table-join-examples------------------------------------------------------
base::setdiff(names(cj), names(crashes_wy))

## ----out.width="90%", fig.show='hold'-----------------------------------------
plot(
  cj[cj$cycling > 0, "speed_limit", ],
  cex = cj$Total[cj$cycling > 0] / 3,
  main = "Speed limit (cycling)"
  )
plot(
  cj[cj$passenger > 0, "speed_limit", ],
  cex = cj$Total[cj$passenger > 0] / 3,
  main = "Speed limit (passenger)"
  )

## ----echo=FALSE---------------------------------------------------------------
knitr::include_graphics("wy-overview.jpg")

## ----sfplot, fig.show='hold', out.width="100%", fig.cap="Spatial distribution of serious and fatal crashes in West Yorkshire, for cycling, walking, being a car passenger and other modes of travel. Colour is related to the speed limit where the crash happened (red is faster) and size is proportional to the total number of people hurt in each crash (legend not shown).", fig.width=9, fig.height=7----
library(ggplot2)
crashes_types = cj %>% 
  filter(accident_severity != "Slight") %>% 
  mutate(type = case_when(
    walking > 0 ~ "Walking",
    cycling > 0 ~ "Cycling",
    passenger > 0 ~ "Passenger",
    TRUE ~ "Other"
  ))
crashes_types$speed_limit = as.integer(crashes_types$speed_limit)
table(crashes_types$speed_limit)
ggplot(crashes_types, aes(size = Total, colour = speed_limit)) +
  geom_sf(show.legend = "point", alpha = 0.3) +
  facet_grid(vars(type), vars(accident_severity)) +
  scale_size(
    breaks = c(1:3, 12),
    labels = c(1:2, "3+", 12)
    ) +
  scale_color_gradientn(colours = c("blue", "yellow", "red")) +
  theme(axis.text = element_blank(), axis.ticks = element_blank())

## ----ggplot-ped-severity, fig.height=5, fig.width=6---------------------------
table(cj$light_conditions)
cj$speed_limit = as.integer(cj$speed_limit)
cj %>% 
  filter(walking > 0) %>% 
  mutate(light = case_when(
    light_conditions == "Daylight" ~ "Daylight",
    light_conditions == "Darkness - lights lit" ~ "Lit",
    TRUE ~ "Other/Unlit"
  )) %>% 
  ggplot(aes(colour = speed_limit)) +
  geom_sf() +
  facet_grid(vars(light), vars(accident_severity)) +
  scale_color_continuous(low = "blue", high = "red") +
  theme(axis.text = element_blank(), axis.ticks = element_blank())

## ----crash-date-plot, fig.width=5, fig.height=5-------------------------------
crashes_dates = cj %>% 
  st_set_geometry(NULL) %>% 
  group_by(date) %>% 
  summarise(
    walking = sum(walking),
    cycling = sum(cycling),
    passenger = sum(passenger)
    ) %>% 
  gather(mode, casualties, -date)
ggplot(crashes_dates, aes(date, casualties)) +
  geom_smooth(aes(colour = mode), method = "loess") +
  ylab("Casualties per day")

## ----crash-time-plot, fig.width=5, fig.height=5-------------------------------
library(stringr)

crash_times = cj %>% 
  st_set_geometry(NULL) %>% 
  group_by(hour = as.numeric(str_sub(time, 1, 2))) %>% 
  summarise(
    walking = sum(walking),
    cycling = sum(cycling),
    passenger = sum(passenger)
    ) %>% 
  gather(mode, casualties, -hour)

ggplot(crash_times, aes(hour, casualties)) +
  geom_line(aes(colour = mode))

Try the stats19 package in your browser

Any scripts or data that you put into this service are public.

stats19 documentation built on Sept. 11, 2024, 6:08 p.m.