Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
out.width = "100%",
eval = curl::has_internet()
)
## ----eval=FALSE---------------------------------------------------------------
# # from CRAN
# install.packages("stats19")
# # you can install the latest development (discoraged) using:
# remotes::install_github("ITSLeeds/stats19")
## -----------------------------------------------------------------------------
library(stats19)
## ----dl2022-accidents---------------------------------------------------------
dl_stats19(year = 2022, type = "collision", ask = FALSE)
## -----------------------------------------------------------------------------
stats19::file_names$DigitalBreathTestData2013.zip
## ----dl2022-all, eval=FALSE---------------------------------------------------
# dl_stats19(year = 2022)
## ----dl2022-read--------------------------------------------------------------
crashes_2022_raw = get_stats19(year = 2022, type = "collision", format = FALSE)
## ----echo=FALSE---------------------------------------------------------------
# skip vignettes if resource unavailable
if(object.size(crashes_2022_raw) < 1000) {
knitr::opts_chunk$set(eval = FALSE)
}
## ----crashes2022-class--------------------------------------------------------
class(crashes_2022_raw)
dim(crashes_2022_raw)
## ----read2022-raw-format------------------------------------------------------
crashes_2022_raw = read_collisions(year = 2022, format = FALSE)
crashes_2022 = format_collisions(crashes_2022_raw)
nrow(crashes_2022_raw)
ncol(crashes_2022_raw)
nrow(crashes_2022)
ncol(crashes_2022)
## ----crashes2022-columns------------------------------------------------------
crashes_2022_raw[c(7, 18, 23, 25)]
crashes_2022[c(7, 18, 23, 25)]
## ----echo=FALSE, eval=FALSE---------------------------------------------------
# # commented out as confusing...
# key_patt = "severity|speed|light|human"
# key_vars = grep(key_patt, x = names(crashes_2022_raw), ignore.case = TRUE)
# random_n = sample(x = nrow(crashes_2022_raw), size = 3)
# crashes_2022_raw[random_n, key_vars]
# crashes_2022[random_n, key_vars]
## ----variables-and-schema-----------------------------------------------------
stats19_variables
stats19_schema
## ----format-col-names---------------------------------------------------------
format_column_names(stats19_variables$variable[1:3])
## ----format-main--------------------------------------------------------------
crashes_2022 = format_collisions(crashes_2022_raw)
# vehicle data for 2022
dl_stats19(year = 2022, type = "vehicle", ask = FALSE)
vehicles_2022_raw = read_vehicles(year = 2022)
vehicles_2022 = format_vehicles(vehicles_2022_raw)
# casualties data for 2022
dl_stats19(year = 2022, type = "casualty", ask = FALSE)
casualties_2022 = read_casualties(year = 2022)
## ----summarise-stats19--------------------------------------------------------
summarise_stats19 = function(x) {
data.frame(row.names = 1:length(x),
name = substr(names(x), 1, 19),
class = sapply(x, function(v) class(v)[1]),
n_unique = sapply(x, function(v) length(unique(v))),
first_label = sapply(x, function(v) substr(unique(v)[1], 1, 16)),
most_common_value = sapply(x, function(v)
substr(names(sort(table(v), decreasing = TRUE)[1]), 1, 16)[1])
)
}
## ----summarise-crashes--------------------------------------------------------
knitr::kable(summarise_stats19(crashes_2022),
caption = "Summary of formatted crash data.")
## ----summarise-vehicles-------------------------------------------------------
knitr::kable(summarise_stats19(vehicles_2022),
caption = "Summary of formatted vehicles data.")
## ----summarise-casualties-----------------------------------------------------
knitr::kable(summarise_stats19(casualties_2022),
caption = "Summary of formatted casualty data.")
## ----echo=FALSE, results='asis'-----------------------------------------------
key_patt = "severity|speed|light|human"
key_vars = grep(key_patt, x = names(stats19::accidents_sample_raw), ignore.case = TRUE)
knitr::kable(stats19::accidents_sample_raw[, key_vars])
## ----2022-cas-----------------------------------------------------------------
dl_stats19(year = 2022, type = "casualty", ask = FALSE)
casualties_2022 = read_casualties(year = 2022)
nrow(casualties_2022)
ncol(casualties_2022)
## ----2022-cas-columns---------------------------------------------------------
casualties_2022[c(4, 5, 6, 14)]
## ----2022-cas-columns-all-----------------------------------------------------
names(casualties_2022)
## ----dl2022-vehicles----------------------------------------------------------
dl_stats19(year = 2022, type = "vehicle", ask = FALSE)
vehicles_2022 = read_vehicles(year = 2022)
nrow(vehicles_2022)
ncol(vehicles_2022)
## ----2022-veh-columns---------------------------------------------------------
vehicles_2022[c(3, 14:16)]
## ----2022-veh-columns-all-----------------------------------------------------
names(vehicles_2022)
## ----eval=FALSE, echo=FALSE---------------------------------------------------
# # old code to be up-dated
# d14 = "Stats19_Data_2005-2014"
# crashes_2005_2014 = read_collisions(data_dir = d14)
# crashes_2005_2014_f = format_stats19_2005_2014_ac(crashes_2005_2014)
# d15 = "RoadSafetyData_2015"
# crashes_2015 = read_collisions(data_dir = d15, filename = "Accidents_2015.csv")
# crashes_2015_f = format_stats19_2015_ac(crashes_2015)
# d16 = "dftRoadSafety_Accidents_2016"
# crashes_2016 = read_collisions(data_dir = d16, filename = "dftRoadSafety_Accidents_2016.csv")
# crashes_2016_f = format_stats19_2016_ac(crashes_2016)
# all_crashes = rbind(crashes_2015_f, crashes_2016_f, crashes_2022_f)
# table(ac$Accident_Severity)
## ----format-crashes-sf--------------------------------------------------------
crashes_sf = format_sf(crashes_2022)
## ----nfatalities--------------------------------------------------------------
library(sf)
library(dplyr)
crashes_sf %>%
filter(accident_severity == "Fatal") %>%
select(n_fatalities = accident_index) %>%
aggregate(by = police_boundaries, FUN = length) %>%
plot()
## ----ukboundaries-------------------------------------------------------------
west_yorkshire =
police_boundaries[police_boundaries$pfa16nm == "West Yorkshire", ]
## ----crashes-west_yorkshire---------------------------------------------------
crashes_wy = crashes_sf[west_yorkshire, ]
nrow(crashes_sf)
nrow(crashes_wy)
## ----table-join, message = FALSE----------------------------------------------
library(tidyr)
library(dplyr)
sel = casualties_2022$accident_index %in% crashes_wy$accident_index
casualties_wy = casualties_2022[sel, ]
cas_types = casualties_wy %>%
select(accident_index, casualty_type) %>%
group_by(accident_index) %>%
summarise(
Total = n(),
walking = sum(casualty_type == "Pedestrian"),
cycling = sum(casualty_type == "Cyclist"),
passenger = sum(casualty_type == "Car occupant")
)
cj = left_join(crashes_wy, cas_types)
## ----table-join-examples------------------------------------------------------
base::setdiff(names(cj), names(crashes_wy))
## ----out.width="90%", fig.show='hold'-----------------------------------------
plot(
cj[cj$cycling > 0, "speed_limit", ],
cex = cj$Total[cj$cycling > 0] / 3,
main = "Speed limit (cycling)"
)
plot(
cj[cj$passenger > 0, "speed_limit", ],
cex = cj$Total[cj$passenger > 0] / 3,
main = "Speed limit (passenger)"
)
## ----echo=FALSE---------------------------------------------------------------
knitr::include_graphics("wy-overview.jpg")
## ----sfplot, fig.show='hold', out.width="100%", fig.cap="Spatial distribution of serious and fatal crashes in West Yorkshire, for cycling, walking, being a car passenger and other modes of travel. Colour is related to the speed limit where the crash happened (red is faster) and size is proportional to the total number of people hurt in each crash (legend not shown).", fig.width=9, fig.height=7----
library(ggplot2)
crashes_types = cj %>%
filter(accident_severity != "Slight") %>%
mutate(type = case_when(
walking > 0 ~ "Walking",
cycling > 0 ~ "Cycling",
passenger > 0 ~ "Passenger",
TRUE ~ "Other"
))
crashes_types$speed_limit = as.integer(crashes_types$speed_limit)
table(crashes_types$speed_limit)
ggplot(crashes_types, aes(size = Total, colour = speed_limit)) +
geom_sf(show.legend = "point", alpha = 0.3) +
facet_grid(vars(type), vars(accident_severity)) +
scale_size(
breaks = c(1:3, 12),
labels = c(1:2, "3+", 12)
) +
scale_color_gradientn(colours = c("blue", "yellow", "red")) +
theme(axis.text = element_blank(), axis.ticks = element_blank())
## ----ggplot-ped-severity, fig.height=5, fig.width=6---------------------------
table(cj$light_conditions)
cj$speed_limit = as.integer(cj$speed_limit)
cj %>%
filter(walking > 0) %>%
mutate(light = case_when(
light_conditions == "Daylight" ~ "Daylight",
light_conditions == "Darkness - lights lit" ~ "Lit",
TRUE ~ "Other/Unlit"
)) %>%
ggplot(aes(colour = speed_limit)) +
geom_sf() +
facet_grid(vars(light), vars(accident_severity)) +
scale_color_continuous(low = "blue", high = "red") +
theme(axis.text = element_blank(), axis.ticks = element_blank())
## ----crash-date-plot, fig.width=5, fig.height=5-------------------------------
crashes_dates = cj %>%
st_set_geometry(NULL) %>%
group_by(date) %>%
summarise(
walking = sum(walking),
cycling = sum(cycling),
passenger = sum(passenger)
) %>%
gather(mode, casualties, -date)
ggplot(crashes_dates, aes(date, casualties)) +
geom_smooth(aes(colour = mode), method = "loess") +
ylab("Casualties per day")
## ----crash-time-plot, fig.width=5, fig.height=5-------------------------------
library(stringr)
crash_times = cj %>%
st_set_geometry(NULL) %>%
group_by(hour = as.numeric(str_sub(time, 1, 2))) %>%
summarise(
walking = sum(walking),
cycling = sum(cycling),
passenger = sum(passenger)
) %>%
gather(mode, casualties, -hour)
ggplot(crash_times, aes(hour, casualties)) +
geom_line(aes(colour = mode))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.