stats19-training.R
In stats19: Work with Open Road Traffic Casualty Data from Great Britain

## ----message=FALSE, warning=FALSE, eval=FALSE---------------------------------
# library(pct)      # access travel data from DfT-funded PCT project
# library(sf)       # spatial vector data classes
# library(stats19)  # get stats19 data
# library(stplanr)  # transport planning tools
# library(tidyverse)# packages for 'data science'
# library(tmap)     # interactive maps

## ----include = FALSE----------------------------------------------------------

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  out.width = "50%",
  eval = curl::has_internet()
)

## ----pkgs, warning=FALSE, echo=FALSE------------------------------------------
pkgs = c(
  "sf",          # spatial data package
  "stats19",     # downloads and formats open stats19 crash data
  "dplyr",       # a package data manipulation, part of the tidyverse
  "tmap"         # for making maps
)

## ----cite, echo=FALSE---------------------------------------------------------
knitr::write_bib(x = pkgs, "packages.bib")

## ----eval=FALSE, echo=FALSE---------------------------------------------------
# remotes::install_cran(pkgs)
# # remotes::install_github("ITSLeeds/pct")

## ----rstudioui, echo=FALSE, out.width="70%"-----------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/ITSLeeds/TDS/master/courses/2day/images/rstudio-ui.png")

## ----edit, eval=FALSE---------------------------------------------------------
# file.edit("stats19-lesson-1.R")

## ----eval=FALSE---------------------------------------------------------------
# x = 1:5
# y = c(0, 1, 3, 9, 18)
# plot(x, y)

## -----------------------------------------------------------------------------
vehicle_type = c("car", "bus", "tank")
casualty_type = c("pedestrian", "cyclist", "cat")
casualty_age = seq(from = 20, to = 60, by = 20)
set.seed(1)
dark = sample(x = c(TRUE, FALSE), size = 3, replace = TRUE)
small_matrix = matrix(1:24, nrow = 12)
crashes = data.frame(vehicle_type, casualty_type, casualty_age, dark)

## ----summary------------------------------------------------------------------
summary(casualty_age)

## ----summary-answers, echo=FALSE, eval=FALSE----------------------------------
# summary(vehicle_type)
# class(vehicle_type)
# typeof(vehicle_type)
# dim(vehicle_type)
# length(vehicle_type)

## ----tibble1, echo=FALSE, eval=FALSE------------------------------------------
# tibble::tibble(
#   vehicle_type,
#   casualty_type,
#   casualty_age,
#   dark
# )

## ----autocomp, echo=FALSE-----------------------------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/ITSLeeds/TDS/master/courses/2day/images/autocomplete.jpg")

## ----help, echo=FALSE---------------------------------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/ITSLeeds/TDS/master/courses/2day/images/fucntionhelp.jpg")

## -----------------------------------------------------------------------------
# Create vector objects (a whole line comment)
x = 1:5 # a seqence of consecutive integers (inline comment)
y = c(0, 1, 3, 9, 18.1) 

## ----debug, echo=FALSE, out.width="60%"---------------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/ropensci/stats19/master/inst/rstudio-autocomplete.png")

## -----------------------------------------------------------------------------
saveRDS(crashes, "crashes.Rds")

## -----------------------------------------------------------------------------
crashes2 = readRDS("crashes.Rds")
identical(crashes, crashes2)

## ----readr-write, eval=FALSE--------------------------------------------------
# readr::write_csv(crashes, "crashes.csv")
# crashes3 = readr::read_csv("crashes.csv")
# identical(crashes3, crashes)

## ----eval=FALSE---------------------------------------------------------------
# casualty_age[2:3] # second and third casualty_age
# crashes[c(1, 2), ] # first and second row of crashes
# crashes$vehicle_type # returns just one column
# crashes[, c("casualty_type", "casualty_age")] # first and third columns

## ----eval=FALSE, echo=FALSE---------------------------------------------------
# crashes[, c(1, 3)] # first and third column of crashes by positional numbers
# crashes[c(2), c(3)]
# crashes[c(2), c(2, 3)]
# class(crashes[, c(1, 3)])
# class(crashes[c(2), c(3)])

## ----eval=FALSE---------------------------------------------------------------
# x[c(TRUE, FALSE, TRUE, FALSE, TRUE)] # 1st, 3rd, and 5th element in x
# x[x == 5] # only when x == 5 (notice the use of double equals)
# x[x < 3] # less than 3
# x[x < 3] = 0 # assign specific elements
# casualty_age[casualty_age %% 6 == 0] # just the ages that are a multiple of 6
# crashes[crashes$dark == FALSE, ]

## ----eval=FALSE, echo=FALSE---------------------------------------------------
# casualty_age[casualty_age < 50] # the  casualty_age less than 50
# crashes[crashes$vehicle_type == "tank", ] # rows where the name is tank
# crashes$casualty_age[crashes$vehicle_type == "tank"] = 61

## ----eval=FALSE---------------------------------------------------------------
# z = c(4, 5, NA, 7)

## ----eval=FALSE---------------------------------------------------------------
# sum(z) # result is NA

## ----eval=FALSE---------------------------------------------------------------
# sum(z, na.rm = TRUE) # result is equal to 4 + 5 + 7

## ----eval=FALSE---------------------------------------------------------------
# is.na(z)
# z_nona = z[!is.na(z)] # note the use of the not operator !
# sum(z)

## ----echo=FALSE, eval=FALSE---------------------------------------------------
# crashes$vehicle_type = as.character(crashes$vehicle_type)
# as.matrix(crashes)

## -----------------------------------------------------------------------------
z = c(1, 2, -1, 1, 3)
l = c(NA, "a", "b", "c") # labels in ascending order
z_factor = factor(z, labels = l)
z_charcter = as.character(z_factor)
z_charcter

## ----smile, out.width="30%", fig.align="center"-------------------------------
eyes = c(2.3, 4, 3.7, 4)
eyes = matrix(eyes, ncol = 2, byrow = T)
mouth = c(2, 2, 2.5, 1.3, 3, 1, 3.5, 1.3, 4, 2)
mouth = matrix(mouth, ncol = 2, byrow = T)
plot(eyes, type = "p", main = "RRR!", cex = 2, xlim = c(1, 5), ylim = c(0, 5))
lines(mouth, type = "l", col = "red")

## ----eval=FALSE---------------------------------------------------------------
# install.packages("sf")
# # remotes::install_github("r-spatial/sf")

## -----------------------------------------------------------------------------
library(sf)

## ----tibble2, eval=FALSE------------------------------------------------------
# crashes_tibble = tibble::tibble(
#   vehicle_type,
#   casualty_type,
#   casualty_age,
#   dark
# )

## ----message=FALSE, out.width="40%", eval=FALSE-------------------------------
# library(ggplot2)
# ggplot(crashes) + geom_point(aes(x = casualty_type, y = casualty_age))

## ----gg-extend, echo=FALSE, message=FALSE, eval=FALSE-------------------------
# library(ggplot2)
# # install.packages("ggthemes")
# g1 = ggplot(crashes) + geom_point(aes(x = casualty_type, y = casualty_age))
# g2 = ggplot(crashes) + geom_point(aes(x = casualty_type, y = casualty_age)) +
#   ggthemes::theme_economist()
# library(patchwork)
# g3 = g1 + g2
# ggsave(filename = "inst/ggtheme-plot.png", width = 8, height = 2, dpi = 80)

## ----gg2, echo=FALSE, out.width="80%", fig.align="center"---------------------
library(ggplot2)
knitr::include_graphics("https://raw.githubusercontent.com/ropensci/stats19/b4c40ad4c134853007493a9eac116b00acd4ec5a/inst/ggtheme-plot.png")

## -----------------------------------------------------------------------------
library(dplyr)
class(crashes)       
crashes %>% class()

## ----eval=FALSE---------------------------------------------------------------
# crashes %>%
#   filter(casualty_age > 50) # filter rows
# crashes %>%
#   select(casualty_type) # select just one column
# crashes %>%
#   group_by(dark) %>%
#   summarise(mean_age = mean(casualty_age))

## ----dplyr, eval=FALSE, echo=FALSE--------------------------------------------
# # answers
# crashes %>%
#   arrange(desc(casualty_age))
# crashes %>% filter(casualty_age > 21)
# crashes %>%
#   mutate(birth_year = 2019 - casualty_age) %>%
#   filter(birth_year > 1969)

## ----message=FALSE------------------------------------------------------------
library(lubridate)

## -----------------------------------------------------------------------------
today()

## ----eval=FALSE---------------------------------------------------------------
# x = today()
# day(x)
# month(x)
# year(x)
# weekdays(x)

## ----eval=FALSE---------------------------------------------------------------
# as.Date("2019-10-17") # works
# as.Date("2019 10 17") # fails
# ymd("2019 10 17") # works
# dmy("17/10/2019") # works

## -----------------------------------------------------------------------------
x = c("2009-01-01", "2009-02-02", "2009-03-03")
x_date = ymd(x)
x_date

## ----echo=FALSE, eval=FALSE---------------------------------------------------
# # 1. Extract the day, the year-day, the month and the weekday (as a non-abbreviated character vector) of each element of `x_date`.
# day(x_date)
# yday(x_date)
# month(x_date)
# weekdays(x_date, abbreviate = FALSE)
# # 1. Modify the previous example to parse the following character string: `"09/09/1993"` and extract its weekday.
# weekdays(dmy("09/09/93"))
# wday(dmy("09/09/93"))

## -----------------------------------------------------------------------------
crashes$casualty_day = x_date

## -----------------------------------------------------------------------------
filter(crashes, day(casualty_day) < 7) # the events that ocurred in the first week of the month
filter(crashes, weekdays(casualty_day) == "Monday") # the events occurred on monday

## -----------------------------------------------------------------------------
x = c("18:23:35", "00:00:01", "12:34:56")
x_hour = hms(x)
x_hour

## -----------------------------------------------------------------------------
hour(x_hour)
minute(x_hour)
second(x_hour)

## -----------------------------------------------------------------------------
x = c("18:23", "00:00", "12:34")
(x_hour = hm(x))

## -----------------------------------------------------------------------------
crashes$casualty_hms = hms(c("18:23:35", "00:00:01", "12:34:56"))
crashes$casualty_hour = hour(crashes$casualty_hms)

## ----eval=FALSE---------------------------------------------------------------
# library(stats19)
# crashes_2022 = stats19::get_stats19(year = 2022, type = "ac")
# crashes_2022

## ----eval=FALSE, echo=FALSE---------------------------------------------------
# # solutions
# crashes %>% filter(casualty_hour >= 12)
# crashes %>% filter(casualty_hour > 15 & casualty_hour < 19)
# 
# crashes_2022 %>%
#   mutate(my_weekdays = weekdays(date)) %>%
#   filter(my_weekdays == "Monday") %>%
#   nrow()
# crashes_2022 %>%
#   mutate(my_weekdays = weekdays(date)) %>%
#   filter(my_weekdays == "Friday") %>%
#   nrow()
# 
# crashes_2022 %>%
#   mutate(my_weekdays = weekdays(date)) %>%
#   group_by(my_weekdays) %>%
#   summarize(n = n()) %>%
#   ggplot() +
#   geom_col(aes(x = my_weekdays, y = n))
# 
# crashes_2022 %>%
#   mutate(my_hours = hour(hm(time))) %>%
#   group_by(my_hours) %>%
#   summarize(n = n()) %>%
#   ggplot() +
#   geom_col(aes(x = my_hours, y = n))
# 
# crashes_2022 %>%
#   mutate(my_weekdays = weekdays(date), my_hours = hour(hm(time))) %>%
#   group_by(my_weekdays, my_hours) %>%
#   summarise(n = n()) %>%
#   ggplot() +
#   geom_line(aes(x = my_hours, y = n, col = my_weekdays), size = 1.05)
# # the legend needs some reordering

## ----crashes-sf, fig.height=2, fig.width=3------------------------------------
library(sf) # load the sf package for working with spatial data
crashes_sf = crashes # create copy of crashes dataset
crashes_sf$longitude = c(-1.3, -1.2, -1.1)
crashes_sf$latitude = c(50.7, 50.7, 50.68)
crashes_sf = st_as_sf(crashes_sf, coords = c("longitude", "latitude"), crs = 4326)
# plot(crashes_sf[1:4]) # basic plot
# mapview::mapview(crashes_sf) # for interactive map

## ----crashes-sf-ex, echo=FALSE, out.width="30%", fig.show='hold'--------------
plot(crashes_sf$geometry)
plot(crashes_sf["casualty_age"])
plot(crashes_sf[2:3, "dark"])
# st_distance(crashes_sf)
# Bembridge

# # updload geographic crash data
# write_sf(crashes_sf, "crashes_sf.geojson")
# piggyback::pb_upload("crashes_sf.geojson")

## ----eval=FALSE---------------------------------------------------------------
# write_sf(zones, "zones.geojson") # save geojson file
# write_sf(zones, "zmapinfo", driver = "MapInfo file")
# read_sf("zmapinfo") # read in mapinfo file

## -----------------------------------------------------------------------------
knitr::opts_chunk$set(eval = FALSE)

## -----------------------------------------------------------------------------
# zones = pct::get_pct_zones("isle-of-wight")[1:9]

## ----echo=FALSE---------------------------------------------------------------
# # class(zones)
# # names(zones)
# zones[1:2, c(1, 5, 6, 7, 8)]

## ----message=FALSE------------------------------------------------------------
# zones_containing_crashes = zones[crashes_sf, ]

## ----sp-ex, echo=FALSE, out.width="33%", fig.show='hold', message=FALSE, warning=FALSE----
# plot(zones$geometry)
# plot(zones_containing_crashes$geometry, col = "red", add = TRUE)
# plot(zones$geometry)
# plot(zones[crashes_sf[2, ], ], col = "blue", add = TRUE)
# plot(zones$geometry)
# plot(zones[zones_containing_crashes, ], col = "yellow", add = TRUE)
# plot(crashes_sf$geometry, pch = 20, add = TRUE)

## ----message=FALSE------------------------------------------------------------
# zones_joined = st_join(zones[1], crashes_sf)

## ----joinf, echo=FALSE, out.width="40%", fig.show='hold', message=FALSE-------
# plot(zones_joined["casualty_age"])
# zjd = st_join(zones[1], crashes_sf["dark"], left = FALSE)
# plot(zjd)

## ----crs1---------------------------------------------------------------------
# crashes_osgb = st_transform(crashes_sf, 27700)

## -----------------------------------------------------------------------------
# # load example dataset if it doesn't already exist
# zones = pct::get_pct_zones("isle-of-wight")
# sel = zones$all > 3000  # create a subsetting object
# zones_large = zones[sel, ] # subset areas with a popualtion over 100,000
# zones_2 = zones[zones$geo_name == "Isle of Wight 002",] # subset based on 'equality' query
# zones_first_and_third_column = zones[c(1, 3)]
# zones_just_all = zones["all"]

## ----echo=FALSE, eval=FALSE---------------------------------------------------
# # 1. Practice subsetting techniques you have learned on the `sf data.frame` object `zones`:
# #      1. Create an object called `zones_small` which contains only regions with less than 3000 people in the `all` column
# # in base R
# zones_small = zones[zones$all < 3000, ]
# # with dplyr
# zones_small = zones %>%
#   filter(all < 3000)
# #      1. Create a selection object called `sel_high_car` which is `TRUE` for regions with above median numbers of people who travel by car and `FALSE` otherwise
# median_car = median(zones$car_driver)
# sel_high_car = zones$car_driver > median_car
# #      1. How many regions have the number '1' in the column 'geo_name'? What percentage of the regions in the Isle of Wight is this?
# sel_region_name_contains_1 = grepl("1", x = zones$geo_name)
# sum(sel_region_name_contains_1) / nrow(zones)
# #      1. Create an object called `zones_foot` which contains only the foot attribute from `zones`
# # using base R
# zones_foot = zones["foot"]
# # dplyr
# zones_foot = zones %>%
#   select(foot)
# #      1. Bonus: plot the result to show where walking is a popular mode of travel to work
# plot(zones_foot)
# #      1. Bonus: bulding on your answers to previous questions, use `filter()` from the `dplyr` package to subset small regions where high car use is high
# zones_small_car_high = zones %>%
#   filter(all < 3000, car_driver > median_car)
# # 1. Bonus: What is the population density of each region (hint: you may need to use the functions `st_area()`, `as.numeric()` and use the 'all' column)?
# zones$area_km2 = as.numeric(st_area(zones)) /1000000
# zones$population_density = zones$all / zones$area_km2
# plot(zones["population_density"])
# # in dplyr
# zones_density = zones %>%
#   mutate(area_km2 = as.numeric(st_area(geometry)) / 1000000) %>%
#   mutate(population_density = all / area_km2)
# plot(zones_density %>% select(population_density))
# # 1. Bonus: Which zone has the highest percentage who cycle?
# zones %>%
#   mutate(pcycle = bicycle / all) %>%
#   top_n(n = 1, wt = pcycle)
# # 1. Bonus: Find the proportion of people who drive to work (`car_driver`) in areas in which more than 500 people walk to work
# zones %>%
#   group_by(foot > 500) %>%
#   summarise(mean_car = sum(car_driver) / sum(all) )

## -----------------------------------------------------------------------------
# library(tmap)
# tmap_mode("plot")

## ----plot3, fig.show='hold', out.width="33%", echo=FALSE----------------------
# plot(zones[c("all", "bicycle")])
# # tm_shape(zones) +
# #   tm_polygons(c("all", "bicycle"))
# # tmap_mode("view")
# # m = tm_shape(zones_joined) +
# #   tm_polygons(c("casualty_type")) +
# #   tm_scale_bar()
# # m
# # knitr::include_graphics("tmap-zones-interactive.png")
# # piggyback::pb_upload("zones_joined.Rds")
# # create bug report:
# # See https://github.com/mtennekes/tmap/issues/551
# # piggyback::pb_download_url("zones_joined.Rds")
# # "https://github.com/ropensci/stats19/releases/download/1.3.0/zones_joined.Rds"
# # library(tmap)
# # u = "https://github.com/ropensci/stats19/releases/download/1.3.0/zones_joined.Rds"
# # zones_joined = readRDS(url(u))
# # qtm(zones_joined)

## ----eval=FALSE---------------------------------------------------------------
# vignette(package = "stats19") # view all vignettes available on stats19
# vignette("stats19") # view the introductory vignette

## ----echo=FALSE, results='hide', message=FALSE, eval=FALSE--------------------
# library(stats19)
# library(dplyr)
# library(sf)
# a = get_stats19(2018, "ac")
# asf = format_sf(a)
# a_zones = asf %>%
#   filter(local_authority_district == "Isle of Wight")
# nrow(a_zones)
# zones = pct::get_pct_zones(region = "isle-of-wight")
# zones_osbg = st_transform(zones, 27700)
# a_zones_sf = a_zones[zones_osbg, ]
# nrow(a_zones_sf)
# # mapview::mapview(zones) +
# #   mapview::mapview(a_zones)
# class(a$date)
# class(a$time)
# a_zones$month = lubridate::month(a_zones$date)
# a_zones_may = a_zones %>%
#   filter(month == 5)
# a_agg = aggregate(a_zones_may["speed_limit"], zones_osbg, mean)
# plot(a_agg)
# class(a$date)

## -----------------------------------------------------------------------------
# u = "https://github.com/ropensci/stats19/releases/download/1.1.0/roads_key.Rds"
# roads_wgs = readRDS(url(u))
# roads = roads_wgs %>% st_transform(crs = 27700)

## -----------------------------------------------------------------------------
# u = "https://github.com/ropensci/stats19/releases/download/1.1.0/car_collisions_2022_iow.Rds"
# crashes_iow = readRDS(url(u))

## ----echo=FALSE, out.width="49%", fig.show='hold', message=FALSE--------------
# plot(roads$geometry)
# plot(crashes_iow["accident_severity"], add = TRUE)
# roads_buffer = st_buffer(roads, 200, endCapStyle = "FLAT")
# crashes_outside_roads = crashes_iow[roads_buffer, , op = sf::st_disjoint]
# roads_agg = aggregate(crashes_iow[1], by = roads_buffer, FUN = length)
# # plot(roads_agg, border = NA, main = "")
# names(roads_agg)[1] = "N. Crashes"
# # tmap_mode("plot")
# # tm_shape(roads_agg) + tm_fill("N. Crashes") +
# #   tm_shape(crashes_outside_roads) + tm_dots(col = "blue")

## ----final-plot, echo=FALSE, out.width="100%"---------------------------------
# # knitr::include_graphics("final-figure.png")
Any scripts or data that you put into this service are public.
stats19 documentation built on March 18, 2026, 5:08 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
stats19
Work with Open Road Traffic Casualty Data from Great Britain

inst/doc/stats19-training.R
In stats19: Work with Open Road Traffic Casualty Data from Great Britain

Try the stats19 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stats19 Work with Open Road Traffic Casualty Data from Great Britain

inst/doc/stats19-training.R In stats19: Work with Open Road Traffic Casualty Data from Great Britain

Try the stats19 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stats19
Work with Open Road Traffic Casualty Data from Great Britain

inst/doc/stats19-training.R
In stats19: Work with Open Road Traffic Casualty Data from Great Britain