inst/doc/stats19-training.R

## ----message=FALSE, warning=FALSE, eval=FALSE---------------------------------
#  library(pct)      # access travel data from DfT-funded PCT project
#  library(sf)       # spatial vector data classes
#  library(stats19)  # get stats19 data
#  library(stplanr)  # transport planning tools
#  library(tidyverse)# packages for 'data science'
#  library(tmap)     # interactive maps

## ----include = FALSE----------------------------------------------------------

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  out.width = "50%",
  eval = curl::has_internet()
)

## ----pkgs, warning=FALSE, echo=FALSE------------------------------------------
pkgs = c(
  "sf",          # spatial data package
  "stats19",     # downloads and formats open stats19 crash data
  "dplyr",       # a package data manipulation, part of the tidyverse
  "tmap"         # for making maps
)

## ----cite, echo=FALSE---------------------------------------------------------
knitr::write_bib(x = pkgs, "packages.bib")

## ----eval=FALSE, echo=FALSE---------------------------------------------------
#  remotes::install_cran(pkgs)
#  # remotes::install_github("ITSLeeds/pct")

## ----rstudioui, echo=FALSE, out.width="70%"-----------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/ITSLeeds/TDS/master/courses/2day/images/rstudio-ui.png")

## ----edit, eval=FALSE---------------------------------------------------------
#  file.edit("stats19-lesson-1.R")

## ----eval=FALSE---------------------------------------------------------------
#  x = 1:5
#  y = c(0, 1, 3, 9, 18)
#  plot(x, y)

## -----------------------------------------------------------------------------
vehicle_type = c("car", "bus", "tank")
casualty_type = c("pedestrian", "cyclist", "cat")
casualty_age = seq(from = 20, to = 60, by = 20)
set.seed(1)
dark = sample(x = c(TRUE, FALSE), size = 3, replace = TRUE)
small_matrix = matrix(1:24, nrow = 12)
crashes = data.frame(vehicle_type, casualty_type, casualty_age, dark)

## ----summary------------------------------------------------------------------
summary(casualty_age)

## ----summary-answers, echo=FALSE, eval=FALSE----------------------------------
#  summary(vehicle_type)
#  class(vehicle_type)
#  typeof(vehicle_type)
#  dim(vehicle_type)
#  length(vehicle_type)

## ----tibble1, echo=FALSE, eval=FALSE------------------------------------------
#  tibble::tibble(
#    vehicle_type,
#    casualty_type,
#    casualty_age,
#    dark
#  )

## ----autocomp, echo=FALSE-----------------------------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/ITSLeeds/TDS/master/courses/2day/images/autocomplete.jpg")

## ----help, echo=FALSE---------------------------------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/ITSLeeds/TDS/master/courses/2day/images/fucntionhelp.jpg")

## -----------------------------------------------------------------------------
# Create vector objects (a whole line comment)
x = 1:5 # a seqence of consecutive integers (inline comment)
y = c(0, 1, 3, 9, 18.1) 

## ----debug, echo=FALSE, out.width="60%"---------------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/ropensci/stats19/master/inst/rstudio-autocomplete.png")

## -----------------------------------------------------------------------------
saveRDS(crashes, "crashes.Rds")

## -----------------------------------------------------------------------------
crashes2 = readRDS("crashes.Rds")
identical(crashes, crashes2)

## ----readr-write, eval=FALSE--------------------------------------------------
#  readr::write_csv(crashes, "crashes.csv")
#  crashes3 = readr::read_csv("crashes.csv")
#  identical(crashes3, crashes)

## ----eval=FALSE---------------------------------------------------------------
#  casualty_age[2:3] # second and third casualty_age
#  crashes[c(1, 2), ] # first and second row of crashes
#  crashes$vehicle_type # returns just one column
#  crashes[, c("casualty_type", "casualty_age")] # first and third columns

## ----eval=FALSE, echo=FALSE---------------------------------------------------
#  crashes[, c(1, 3)] # first and third column of crashes by positional numbers
#  crashes[c(2), c(3)]
#  crashes[c(2), c(2, 3)]
#  class(crashes[, c(1, 3)])
#  class(crashes[c(2), c(3)])

## ----eval=FALSE---------------------------------------------------------------
#  x[c(TRUE, FALSE, TRUE, FALSE, TRUE)] # 1st, 3rd, and 5th element in x
#  x[x == 5] # only when x == 5 (notice the use of double equals)
#  x[x < 3] # less than 3
#  x[x < 3] = 0 # assign specific elements
#  casualty_age[casualty_age %% 6 == 0] # just the ages that are a multiple of 6
#  crashes[crashes$dark == FALSE, ]

## ----eval=FALSE, echo=FALSE---------------------------------------------------
#  casualty_age[casualty_age < 50] # the  casualty_age less than 50
#  crashes[crashes$vehicle_type == "tank", ] # rows where the name is tank
#  crashes$casualty_age[crashes$vehicle_type == "tank"] = 61

## ----eval=FALSE---------------------------------------------------------------
#  z = c(4, 5, NA, 7)

## ----eval=FALSE---------------------------------------------------------------
#  sum(z) # result is NA

## ----eval=FALSE---------------------------------------------------------------
#  sum(z, na.rm = TRUE) # result is equal to 4 + 5 + 7

## ----eval=FALSE---------------------------------------------------------------
#  is.na(z)
#  z_nona = z[!is.na(z)] # note the use of the not operator !
#  sum(z)

## ----echo=FALSE, eval=FALSE---------------------------------------------------
#  crashes$vehicle_type = as.character(crashes$vehicle_type)
#  as.matrix(crashes)

## -----------------------------------------------------------------------------
z = c(1, 2, -1, 1, 3)
l = c(NA, "a", "b", "c") # labels in ascending order
z_factor = factor(z, labels = l)
z_charcter = as.character(z_factor)
z_charcter

## ----smile, out.width="30%", fig.align="center"-------------------------------
eyes = c(2.3, 4, 3.7, 4)
eyes = matrix(eyes, ncol = 2, byrow = T)
mouth = c(2, 2, 2.5, 1.3, 3, 1, 3.5, 1.3, 4, 2)
mouth = matrix(mouth, ncol = 2, byrow = T)
plot(eyes, type = "p", main = "RRR!", cex = 2, xlim = c(1, 5), ylim = c(0, 5))
lines(mouth, type = "l", col = "red")

## ----eval=FALSE---------------------------------------------------------------
#  install.packages("sf")
#  # remotes::install_github("r-spatial/sf")

## -----------------------------------------------------------------------------
library(sf)

## ----tibble2, eval=FALSE------------------------------------------------------
#  crashes_tibble = tibble::tibble(
#    vehicle_type,
#    casualty_type,
#    casualty_age,
#    dark
#  )

## ----message=FALSE, out.width="40%", eval=FALSE-------------------------------
#  library(ggplot2)
#  ggplot(crashes) + geom_point(aes(x = casualty_type, y = casualty_age))

## ----gg-extend, echo=FALSE, message=FALSE, eval=FALSE-------------------------
#  library(ggplot2)
#  # install.packages("ggthemes")
#  g1 = ggplot(crashes) + geom_point(aes(x = casualty_type, y = casualty_age))
#  g2 = ggplot(crashes) + geom_point(aes(x = casualty_type, y = casualty_age)) +
#    ggthemes::theme_economist()
#  g3 = cowplot::plot_grid(g1, g2)
#  ggsave(filename = "inst/ggtheme-plot.png", width = 8, height = 2, dpi = 80)

## ----gg2, echo=FALSE, out.width="80%", fig.align="center"---------------------
library(ggplot2)
knitr::include_graphics("https://raw.githubusercontent.com/ropensci/stats19/b4c40ad4c134853007493a9eac116b00acd4ec5a/inst/ggtheme-plot.png")

## -----------------------------------------------------------------------------
library(dplyr)
class(crashes)       
crashes %>% class()

## ----eval=FALSE---------------------------------------------------------------
#  crashes %>%
#    filter(casualty_age > 50) # filter rows
#  crashes %>%
#    select(casualty_type) # select just one column
#  crashes %>%
#    group_by(dark) %>%
#    summarise(mean_age = mean(casualty_age))

## ----dplyr, eval=FALSE, echo=FALSE--------------------------------------------
#  # answers
#  crashes %>%
#    arrange(desc(casualty_age))
#  crashes %>% filter(casualty_age > 21)
#  crashes %>%
#    mutate(birth_year = 2019 - casualty_age) %>%
#    filter(birth_year > 1969)

## ----message=FALSE------------------------------------------------------------
library(lubridate)

## -----------------------------------------------------------------------------
today()

## ----eval=FALSE---------------------------------------------------------------
#  x = today()
#  day(x)
#  month(x)
#  year(x)
#  weekdays(x)

## ----eval=FALSE---------------------------------------------------------------
#  as.Date("2019-10-17") # works
#  as.Date("2019 10 17") # fails
#  ymd("2019 10 17") # works
#  dmy("17/10/2019") # works

## -----------------------------------------------------------------------------
x = c("2009-01-01", "2009-02-02", "2009-03-03")
x_date = ymd(x)
x_date

## ----echo=FALSE, eval=FALSE---------------------------------------------------
#  # 1. Extract the day, the year-day, the month and the weekday (as a non-abbreviated character vector) of each element of `x_date`.
#  day(x_date)
#  yday(x_date)
#  month(x_date)
#  weekdays(x_date, abbreviate = FALSE)
#  # 1. Modify the previous example to parse the following character string: `"09/09/1993"` and extract its weekday.
#  weekdays(dmy("09/09/93"))
#  wday(dmy("09/09/93"))

## -----------------------------------------------------------------------------
crashes$casualty_day = x_date

## -----------------------------------------------------------------------------
filter(crashes, day(casualty_day) < 7) # the events that ocurred in the first week of the month
filter(crashes, weekdays(casualty_day) == "Monday") # the events occurred on monday

## -----------------------------------------------------------------------------
x = c("18:23:35", "00:00:01", "12:34:56")
x_hour = hms(x)
x_hour

## -----------------------------------------------------------------------------
hour(x_hour)
minute(x_hour)
second(x_hour)

## -----------------------------------------------------------------------------
x = c("18:23", "00:00", "12:34")
(x_hour = hm(x))

## -----------------------------------------------------------------------------
crashes$casualty_hms = hms(c("18:23:35", "00:00:01", "12:34:56"))
crashes$casualty_hour = hour(crashes$casualty_hms)

## ----eval=FALSE---------------------------------------------------------------
#  library(stats19)
#  crashes_2022 = stats19::get_stats19(year = 2022, type = "ac")
#  crashes_2022

## ----eval=FALSE, echo=FALSE---------------------------------------------------
#  # solutions
#  crashes %>% filter(casualty_hour >= 12)
#  crashes %>% filter(casualty_hour > 15 & casualty_hour < 19)
#  
#  crashes_2022 %>%
#    mutate(my_weekdays = weekdays(date)) %>%
#    filter(my_weekdays == "Monday") %>%
#    nrow()
#  crashes_2022 %>%
#    mutate(my_weekdays = weekdays(date)) %>%
#    filter(my_weekdays == "Friday") %>%
#    nrow()
#  
#  crashes_2022 %>%
#    mutate(my_weekdays = weekdays(date)) %>%
#    group_by(my_weekdays) %>%
#    summarize(n = n()) %>%
#    ggplot() +
#    geom_col(aes(x = my_weekdays, y = n))
#  
#  crashes_2022 %>%
#    mutate(my_hours = hour(hm(time))) %>%
#    group_by(my_hours) %>%
#    summarize(n = n()) %>%
#    ggplot() +
#    geom_col(aes(x = my_hours, y = n))
#  
#  crashes_2022 %>%
#    mutate(my_weekdays = weekdays(date), my_hours = hour(hm(time))) %>%
#    group_by(my_weekdays, my_hours) %>%
#    summarise(n = n()) %>%
#    ggplot() +
#    geom_line(aes(x = my_hours, y = n, col = my_weekdays), size = 1.05)
#  # the legend needs some reordering

## ----crashes-sf, fig.height=2, fig.width=3------------------------------------
library(sf) # load the sf package for working with spatial data
crashes_sf = crashes # create copy of crashes dataset
crashes_sf$longitude = c(-1.3, -1.2, -1.1)
crashes_sf$latitude = c(50.7, 50.7, 50.68)
crashes_sf = st_as_sf(crashes_sf, coords = c("longitude", "latitude"), crs = 4326)
# plot(crashes_sf[1:4]) # basic plot
# mapview::mapview(crashes_sf) # for interactive map

## ----crashes-sf-ex, echo=FALSE, out.width="30%", fig.show='hold'--------------
plot(crashes_sf$geometry)
plot(crashes_sf["casualty_age"])
plot(crashes_sf[2:3, "dark"])
# st_distance(crashes_sf)
# Bembridge

# # updload geographic crash data
# write_sf(crashes_sf, "crashes_sf.geojson")
# piggyback::pb_upload("crashes_sf.geojson")

## ----eval=FALSE---------------------------------------------------------------
#  write_sf(zones, "zones.geojson") # save geojson file
#  write_sf(zones, "zmapinfo", driver = "MapInfo file")
#  read_sf("zmapinfo") # read in mapinfo file

## -----------------------------------------------------------------------------
knitr::opts_chunk$set(eval = FALSE)

## -----------------------------------------------------------------------------
#  zones = pct::get_pct_zones("isle-of-wight")[1:9]

## ----echo=FALSE---------------------------------------------------------------
#  # class(zones)
#  # names(zones)
#  zones[1:2, c(1, 5, 6, 7, 8)]

## ----message=FALSE------------------------------------------------------------
#  zones_containing_crashes = zones[crashes_sf, ]

## ----sp-ex, echo=FALSE, out.width="33%", fig.show='hold', message=FALSE, warning=FALSE----
#  plot(zones$geometry)
#  plot(zones_containing_crashes$geometry, col = "red", add = TRUE)
#  plot(zones$geometry)
#  plot(zones[crashes_sf[2, ], ], col = "blue", add = TRUE)
#  plot(zones$geometry)
#  plot(zones[zones_containing_crashes, ], col = "yellow", add = TRUE)
#  plot(crashes_sf$geometry, pch = 20, add = TRUE)

## ----message=FALSE------------------------------------------------------------
#  zones_joined = st_join(zones[1], crashes_sf)

## ----joinf, echo=FALSE, out.width="40%", fig.show='hold', message=FALSE-------
#  plot(zones_joined["casualty_age"])
#  zjd = st_join(zones[1], crashes_sf["dark"], left = FALSE)
#  plot(zjd)

## ----crs1---------------------------------------------------------------------
#  crashes_osgb = st_transform(crashes_sf, 27700)

## -----------------------------------------------------------------------------
#  # load example dataset if it doesn't already exist
#  zones = pct::get_pct_zones("isle-of-wight")
#  sel = zones$all > 3000  # create a subsetting object
#  zones_large = zones[sel, ] # subset areas with a popualtion over 100,000
#  zones_2 = zones[zones$geo_name == "Isle of Wight 002",] # subset based on 'equality' query
#  zones_first_and_third_column = zones[c(1, 3)]
#  zones_just_all = zones["all"]

## ----echo=FALSE, eval=FALSE---------------------------------------------------
#  # 1. Practice subsetting techniques you have learned on the `sf data.frame` object `zones`:
#  #      1. Create an object called `zones_small` which contains only regions with less than 3000 people in the `all` column
#  # in base R
#  zones_small = zones[zones$all < 3000, ]
#  # with dplyr
#  zones_small = zones %>%
#    filter(all < 3000)
#  #      1. Create a selection object called `sel_high_car` which is `TRUE` for regions with above median numbers of people who travel by car and `FALSE` otherwise
#  median_car = median(zones$car_driver)
#  sel_high_car = zones$car_driver > median_car
#  #      1. How many regions have the number '1' in the column 'geo_name'? What percentage of the regions in the Isle of Wight is this?
#  sel_region_name_contains_1 = grepl("1", x = zones$geo_name)
#  sum(sel_region_name_contains_1) / nrow(zones)
#  #      1. Create an object called `zones_foot` which contains only the foot attribute from `zones`
#  # using base R
#  zones_foot = zones["foot"]
#  # dplyr
#  zones_foot = zones %>%
#    select(foot)
#  #      1. Bonus: plot the result to show where walking is a popular mode of travel to work
#  plot(zones_foot)
#  #      1. Bonus: bulding on your answers to previous questions, use `filter()` from the `dplyr` package to subset small regions where high car use is high
#  zones_small_car_high = zones %>%
#    filter(all < 3000, car_driver > median_car)
#  # 1. Bonus: What is the population density of each region (hint: you may need to use the functions `st_area()`, `as.numeric()` and use the 'all' column)?
#  zones$area_km2 = as.numeric(st_area(zones)) /1000000
#  zones$population_density = zones$all / zones$area_km2
#  plot(zones["population_density"])
#  # in dplyr
#  zones_density = zones %>%
#    mutate(area_km2 = as.numeric(st_area(geometry)) / 1000000) %>%
#    mutate(population_density = all / area_km2)
#  plot(zones_density %>% select(population_density))
#  # 1. Bonus: Which zone has the highest percentage who cycle?
#  zones %>%
#    mutate(pcycle = bicycle / all) %>%
#    top_n(n = 1, wt = pcycle)
#  # 1. Bonus: Find the proportion of people who drive to work (`car_driver`) in areas in which more than 500 people walk to work
#  zones %>%
#    group_by(foot > 500) %>%
#    summarise(mean_car = sum(car_driver) / sum(all) )

## -----------------------------------------------------------------------------
#  library(tmap)
#  tmap_mode("plot")

## ----plot3, fig.show='hold', out.width="33%", echo=FALSE----------------------
#  plot(zones[c("all", "bicycle")])
#  # tm_shape(zones) +
#  #   tm_polygons(c("all", "bicycle"))
#  # tmap_mode("view")
#  # m = tm_shape(zones_joined) +
#  #   tm_polygons(c("casualty_type")) +
#  #   tm_scale_bar()
#  # m
#  # knitr::include_graphics("tmap-zones-interactive.png")
#  # piggyback::pb_upload("zones_joined.Rds")
#  # create bug report:
#  # See https://github.com/mtennekes/tmap/issues/551
#  # piggyback::pb_download_url("zones_joined.Rds")
#  # "https://github.com/ropensci/stats19/releases/download/1.3.0/zones_joined.Rds"
#  # library(tmap)
#  # u = "https://github.com/ropensci/stats19/releases/download/1.3.0/zones_joined.Rds"
#  # zones_joined = readRDS(url(u))
#  # qtm(zones_joined)

## ----eval=FALSE---------------------------------------------------------------
#  vignette(package = "stats19") # view all vignettes available on stats19
#  vignette("stats19") # view the introductory vignette

## ----echo=FALSE, results='hide', message=FALSE, eval=FALSE--------------------
#  library(stats19)
#  library(dplyr)
#  library(sf)
#  a = get_stats19(2018, "ac")
#  asf = format_sf(a)
#  a_zones = asf %>%
#    filter(local_authority_district == "Isle of Wight")
#  nrow(a_zones)
#  zones = pct::get_pct_zones(region = "isle-of-wight")
#  zones_osbg = st_transform(zones, 27700)
#  a_zones_sf = a_zones[zones_osbg, ]
#  nrow(a_zones_sf)
#  # mapview::mapview(zones) +
#  #   mapview::mapview(a_zones)
#  class(a$date)
#  class(a$time)
#  a_zones$month = lubridate::month(a_zones$date)
#  a_zones_may = a_zones %>%
#    filter(month == 5)
#  a_agg = aggregate(a_zones_may["speed_limit"], zones_osbg, mean)
#  plot(a_agg)
#  class(a$date)

## -----------------------------------------------------------------------------
#  u = "https://github.com/ropensci/stats19/releases/download/1.1.0/roads_key.Rds"
#  roads_wgs = readRDS(url(u))
#  roads = roads_wgs %>% st_transform(crs = 27700)

## -----------------------------------------------------------------------------
#  u = "https://github.com/ropensci/stats19/releases/download/1.1.0/car_collisions_2022_iow.Rds"
#  crashes_iow = readRDS(url(u))

## ----echo=FALSE, out.width="49%", fig.show='hold', message=FALSE--------------
#  plot(roads$geometry)
#  plot(crashes_iow["accident_severity"], add = TRUE)
#  roads_buffer = st_buffer(roads, 200, endCapStyle = "FLAT")
#  crashes_outside_roads = crashes_iow[roads_buffer, , op = sf::st_disjoint]
#  roads_agg = aggregate(crashes_iow[1], by = roads_buffer, FUN = length)
#  # plot(roads_agg, border = NA, main = "")
#  names(roads_agg)[1] = "N. Crashes"
#  # tmap_mode("plot")
#  # tm_shape(roads_agg) + tm_fill("N. Crashes") +
#  #   tm_shape(crashes_outside_roads) + tm_dots(col = "blue")

## ----final-plot, echo=FALSE, out.width="100%"---------------------------------
#  # knitr::include_graphics("final-figure.png")

Try the stats19 package in your browser

Any scripts or data that you put into this service are public.

stats19 documentation built on Nov. 5, 2023, 1:09 a.m.