Nothing
## ----message=FALSE, warning=FALSE, eval=FALSE---------------------------------
# library(pct) # access travel data from DfT-funded PCT project
# library(sf) # spatial vector data classes
# library(stats19) # get stats19 data
# library(stplanr) # transport planning tools
# library(tidyverse)# packages for 'data science'
# library(tmap) # interactive maps
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
out.width = "50%",
eval = curl::has_internet()
)
## ----pkgs, warning=FALSE, echo=FALSE------------------------------------------
pkgs = c(
"sf", # spatial data package
"stats19", # downloads and formats open stats19 crash data
"dplyr", # a package data manipulation, part of the tidyverse
"tmap" # for making maps
)
## ----cite, echo=FALSE---------------------------------------------------------
knitr::write_bib(x = pkgs, "packages.bib")
## ----eval=FALSE, echo=FALSE---------------------------------------------------
# remotes::install_cran(pkgs)
# # remotes::install_github("ITSLeeds/pct")
## ----rstudioui, echo=FALSE, out.width="70%"-----------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/ITSLeeds/TDS/master/courses/2day/images/rstudio-ui.png")
## ----edit, eval=FALSE---------------------------------------------------------
# file.edit("stats19-lesson-1.R")
## ----eval=FALSE---------------------------------------------------------------
# x = 1:5
# y = c(0, 1, 3, 9, 18)
# plot(x, y)
## -----------------------------------------------------------------------------
vehicle_type = c("car", "bus", "tank")
casualty_type = c("pedestrian", "cyclist", "cat")
casualty_age = seq(from = 20, to = 60, by = 20)
set.seed(1)
dark = sample(x = c(TRUE, FALSE), size = 3, replace = TRUE)
small_matrix = matrix(1:24, nrow = 12)
crashes = data.frame(vehicle_type, casualty_type, casualty_age, dark)
## ----summary------------------------------------------------------------------
summary(casualty_age)
## ----summary-answers, echo=FALSE, eval=FALSE----------------------------------
# summary(vehicle_type)
# class(vehicle_type)
# typeof(vehicle_type)
# dim(vehicle_type)
# length(vehicle_type)
## ----tibble1, echo=FALSE, eval=FALSE------------------------------------------
# tibble::tibble(
# vehicle_type,
# casualty_type,
# casualty_age,
# dark
# )
## ----autocomp, echo=FALSE-----------------------------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/ITSLeeds/TDS/master/courses/2day/images/autocomplete.jpg")
## ----help, echo=FALSE---------------------------------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/ITSLeeds/TDS/master/courses/2day/images/fucntionhelp.jpg")
## -----------------------------------------------------------------------------
# Create vector objects (a whole line comment)
x = 1:5 # a seqence of consecutive integers (inline comment)
y = c(0, 1, 3, 9, 18.1)
## ----debug, echo=FALSE, out.width="60%"---------------------------------------
knitr::include_graphics("https://raw.githubusercontent.com/ropensci/stats19/master/inst/rstudio-autocomplete.png")
## -----------------------------------------------------------------------------
saveRDS(crashes, "crashes.Rds")
## -----------------------------------------------------------------------------
crashes2 = readRDS("crashes.Rds")
identical(crashes, crashes2)
## ----readr-write, eval=FALSE--------------------------------------------------
# readr::write_csv(crashes, "crashes.csv")
# crashes3 = readr::read_csv("crashes.csv")
# identical(crashes3, crashes)
## ----eval=FALSE---------------------------------------------------------------
# casualty_age[2:3] # second and third casualty_age
# crashes[c(1, 2), ] # first and second row of crashes
# crashes$vehicle_type # returns just one column
# crashes[, c("casualty_type", "casualty_age")] # first and third columns
## ----eval=FALSE, echo=FALSE---------------------------------------------------
# crashes[, c(1, 3)] # first and third column of crashes by positional numbers
# crashes[c(2), c(3)]
# crashes[c(2), c(2, 3)]
# class(crashes[, c(1, 3)])
# class(crashes[c(2), c(3)])
## ----eval=FALSE---------------------------------------------------------------
# x[c(TRUE, FALSE, TRUE, FALSE, TRUE)] # 1st, 3rd, and 5th element in x
# x[x == 5] # only when x == 5 (notice the use of double equals)
# x[x < 3] # less than 3
# x[x < 3] = 0 # assign specific elements
# casualty_age[casualty_age %% 6 == 0] # just the ages that are a multiple of 6
# crashes[crashes$dark == FALSE, ]
## ----eval=FALSE, echo=FALSE---------------------------------------------------
# casualty_age[casualty_age < 50] # the casualty_age less than 50
# crashes[crashes$vehicle_type == "tank", ] # rows where the name is tank
# crashes$casualty_age[crashes$vehicle_type == "tank"] = 61
## ----eval=FALSE---------------------------------------------------------------
# z = c(4, 5, NA, 7)
## ----eval=FALSE---------------------------------------------------------------
# sum(z) # result is NA
## ----eval=FALSE---------------------------------------------------------------
# sum(z, na.rm = TRUE) # result is equal to 4 + 5 + 7
## ----eval=FALSE---------------------------------------------------------------
# is.na(z)
# z_nona = z[!is.na(z)] # note the use of the not operator !
# sum(z)
## ----echo=FALSE, eval=FALSE---------------------------------------------------
# crashes$vehicle_type = as.character(crashes$vehicle_type)
# as.matrix(crashes)
## -----------------------------------------------------------------------------
z = c(1, 2, -1, 1, 3)
l = c(NA, "a", "b", "c") # labels in ascending order
z_factor = factor(z, labels = l)
z_charcter = as.character(z_factor)
z_charcter
## ----smile, out.width="30%", fig.align="center"-------------------------------
eyes = c(2.3, 4, 3.7, 4)
eyes = matrix(eyes, ncol = 2, byrow = T)
mouth = c(2, 2, 2.5, 1.3, 3, 1, 3.5, 1.3, 4, 2)
mouth = matrix(mouth, ncol = 2, byrow = T)
plot(eyes, type = "p", main = "RRR!", cex = 2, xlim = c(1, 5), ylim = c(0, 5))
lines(mouth, type = "l", col = "red")
## ----eval=FALSE---------------------------------------------------------------
# install.packages("sf")
# # remotes::install_github("r-spatial/sf")
## -----------------------------------------------------------------------------
library(sf)
## ----tibble2, eval=FALSE------------------------------------------------------
# crashes_tibble = tibble::tibble(
# vehicle_type,
# casualty_type,
# casualty_age,
# dark
# )
## ----message=FALSE, out.width="40%", eval=FALSE-------------------------------
# library(ggplot2)
# ggplot(crashes) + geom_point(aes(x = casualty_type, y = casualty_age))
## ----gg-extend, echo=FALSE, message=FALSE, eval=FALSE-------------------------
# library(ggplot2)
# # install.packages("ggthemes")
# g1 = ggplot(crashes) + geom_point(aes(x = casualty_type, y = casualty_age))
# g2 = ggplot(crashes) + geom_point(aes(x = casualty_type, y = casualty_age)) +
# ggthemes::theme_economist()
# g3 = cowplot::plot_grid(g1, g2)
# ggsave(filename = "inst/ggtheme-plot.png", width = 8, height = 2, dpi = 80)
## ----gg2, echo=FALSE, out.width="80%", fig.align="center"---------------------
library(ggplot2)
knitr::include_graphics("https://raw.githubusercontent.com/ropensci/stats19/b4c40ad4c134853007493a9eac116b00acd4ec5a/inst/ggtheme-plot.png")
## -----------------------------------------------------------------------------
library(dplyr)
class(crashes)
crashes %>% class()
## ----eval=FALSE---------------------------------------------------------------
# crashes %>%
# filter(casualty_age > 50) # filter rows
# crashes %>%
# select(casualty_type) # select just one column
# crashes %>%
# group_by(dark) %>%
# summarise(mean_age = mean(casualty_age))
## ----dplyr, eval=FALSE, echo=FALSE--------------------------------------------
# # answers
# crashes %>%
# arrange(desc(casualty_age))
# crashes %>% filter(casualty_age > 21)
# crashes %>%
# mutate(birth_year = 2019 - casualty_age) %>%
# filter(birth_year > 1969)
## ----message=FALSE------------------------------------------------------------
library(lubridate)
## -----------------------------------------------------------------------------
today()
## ----eval=FALSE---------------------------------------------------------------
# x = today()
# day(x)
# month(x)
# year(x)
# weekdays(x)
## ----eval=FALSE---------------------------------------------------------------
# as.Date("2019-10-17") # works
# as.Date("2019 10 17") # fails
# ymd("2019 10 17") # works
# dmy("17/10/2019") # works
## -----------------------------------------------------------------------------
x = c("2009-01-01", "2009-02-02", "2009-03-03")
x_date = ymd(x)
x_date
## ----echo=FALSE, eval=FALSE---------------------------------------------------
# # 1. Extract the day, the year-day, the month and the weekday (as a non-abbreviated character vector) of each element of `x_date`.
# day(x_date)
# yday(x_date)
# month(x_date)
# weekdays(x_date, abbreviate = FALSE)
# # 1. Modify the previous example to parse the following character string: `"09/09/1993"` and extract its weekday.
# weekdays(dmy("09/09/93"))
# wday(dmy("09/09/93"))
## -----------------------------------------------------------------------------
crashes$casualty_day = x_date
## -----------------------------------------------------------------------------
filter(crashes, day(casualty_day) < 7) # the events that ocurred in the first week of the month
filter(crashes, weekdays(casualty_day) == "Monday") # the events occurred on monday
## -----------------------------------------------------------------------------
x = c("18:23:35", "00:00:01", "12:34:56")
x_hour = hms(x)
x_hour
## -----------------------------------------------------------------------------
hour(x_hour)
minute(x_hour)
second(x_hour)
## -----------------------------------------------------------------------------
x = c("18:23", "00:00", "12:34")
(x_hour = hm(x))
## -----------------------------------------------------------------------------
crashes$casualty_hms = hms(c("18:23:35", "00:00:01", "12:34:56"))
crashes$casualty_hour = hour(crashes$casualty_hms)
## ----eval=FALSE---------------------------------------------------------------
# library(stats19)
# crashes_2022 = stats19::get_stats19(year = 2022, type = "ac")
# crashes_2022
## ----eval=FALSE, echo=FALSE---------------------------------------------------
# # solutions
# crashes %>% filter(casualty_hour >= 12)
# crashes %>% filter(casualty_hour > 15 & casualty_hour < 19)
#
# crashes_2022 %>%
# mutate(my_weekdays = weekdays(date)) %>%
# filter(my_weekdays == "Monday") %>%
# nrow()
# crashes_2022 %>%
# mutate(my_weekdays = weekdays(date)) %>%
# filter(my_weekdays == "Friday") %>%
# nrow()
#
# crashes_2022 %>%
# mutate(my_weekdays = weekdays(date)) %>%
# group_by(my_weekdays) %>%
# summarize(n = n()) %>%
# ggplot() +
# geom_col(aes(x = my_weekdays, y = n))
#
# crashes_2022 %>%
# mutate(my_hours = hour(hm(time))) %>%
# group_by(my_hours) %>%
# summarize(n = n()) %>%
# ggplot() +
# geom_col(aes(x = my_hours, y = n))
#
# crashes_2022 %>%
# mutate(my_weekdays = weekdays(date), my_hours = hour(hm(time))) %>%
# group_by(my_weekdays, my_hours) %>%
# summarise(n = n()) %>%
# ggplot() +
# geom_line(aes(x = my_hours, y = n, col = my_weekdays), size = 1.05)
# # the legend needs some reordering
## ----crashes-sf, fig.height=2, fig.width=3------------------------------------
library(sf) # load the sf package for working with spatial data
crashes_sf = crashes # create copy of crashes dataset
crashes_sf$longitude = c(-1.3, -1.2, -1.1)
crashes_sf$latitude = c(50.7, 50.7, 50.68)
crashes_sf = st_as_sf(crashes_sf, coords = c("longitude", "latitude"), crs = 4326)
# plot(crashes_sf[1:4]) # basic plot
# mapview::mapview(crashes_sf) # for interactive map
## ----crashes-sf-ex, echo=FALSE, out.width="30%", fig.show='hold'--------------
plot(crashes_sf$geometry)
plot(crashes_sf["casualty_age"])
plot(crashes_sf[2:3, "dark"])
# st_distance(crashes_sf)
# Bembridge
# # updload geographic crash data
# write_sf(crashes_sf, "crashes_sf.geojson")
# piggyback::pb_upload("crashes_sf.geojson")
## ----eval=FALSE---------------------------------------------------------------
# write_sf(zones, "zones.geojson") # save geojson file
# write_sf(zones, "zmapinfo", driver = "MapInfo file")
# read_sf("zmapinfo") # read in mapinfo file
## -----------------------------------------------------------------------------
knitr::opts_chunk$set(eval = FALSE)
## -----------------------------------------------------------------------------
# zones = pct::get_pct_zones("isle-of-wight")[1:9]
## ----echo=FALSE---------------------------------------------------------------
# # class(zones)
# # names(zones)
# zones[1:2, c(1, 5, 6, 7, 8)]
## ----message=FALSE------------------------------------------------------------
# zones_containing_crashes = zones[crashes_sf, ]
## ----sp-ex, echo=FALSE, out.width="33%", fig.show='hold', message=FALSE, warning=FALSE----
# plot(zones$geometry)
# plot(zones_containing_crashes$geometry, col = "red", add = TRUE)
# plot(zones$geometry)
# plot(zones[crashes_sf[2, ], ], col = "blue", add = TRUE)
# plot(zones$geometry)
# plot(zones[zones_containing_crashes, ], col = "yellow", add = TRUE)
# plot(crashes_sf$geometry, pch = 20, add = TRUE)
## ----message=FALSE------------------------------------------------------------
# zones_joined = st_join(zones[1], crashes_sf)
## ----joinf, echo=FALSE, out.width="40%", fig.show='hold', message=FALSE-------
# plot(zones_joined["casualty_age"])
# zjd = st_join(zones[1], crashes_sf["dark"], left = FALSE)
# plot(zjd)
## ----crs1---------------------------------------------------------------------
# crashes_osgb = st_transform(crashes_sf, 27700)
## -----------------------------------------------------------------------------
# # load example dataset if it doesn't already exist
# zones = pct::get_pct_zones("isle-of-wight")
# sel = zones$all > 3000 # create a subsetting object
# zones_large = zones[sel, ] # subset areas with a popualtion over 100,000
# zones_2 = zones[zones$geo_name == "Isle of Wight 002",] # subset based on 'equality' query
# zones_first_and_third_column = zones[c(1, 3)]
# zones_just_all = zones["all"]
## ----echo=FALSE, eval=FALSE---------------------------------------------------
# # 1. Practice subsetting techniques you have learned on the `sf data.frame` object `zones`:
# # 1. Create an object called `zones_small` which contains only regions with less than 3000 people in the `all` column
# # in base R
# zones_small = zones[zones$all < 3000, ]
# # with dplyr
# zones_small = zones %>%
# filter(all < 3000)
# # 1. Create a selection object called `sel_high_car` which is `TRUE` for regions with above median numbers of people who travel by car and `FALSE` otherwise
# median_car = median(zones$car_driver)
# sel_high_car = zones$car_driver > median_car
# # 1. How many regions have the number '1' in the column 'geo_name'? What percentage of the regions in the Isle of Wight is this?
# sel_region_name_contains_1 = grepl("1", x = zones$geo_name)
# sum(sel_region_name_contains_1) / nrow(zones)
# # 1. Create an object called `zones_foot` which contains only the foot attribute from `zones`
# # using base R
# zones_foot = zones["foot"]
# # dplyr
# zones_foot = zones %>%
# select(foot)
# # 1. Bonus: plot the result to show where walking is a popular mode of travel to work
# plot(zones_foot)
# # 1. Bonus: bulding on your answers to previous questions, use `filter()` from the `dplyr` package to subset small regions where high car use is high
# zones_small_car_high = zones %>%
# filter(all < 3000, car_driver > median_car)
# # 1. Bonus: What is the population density of each region (hint: you may need to use the functions `st_area()`, `as.numeric()` and use the 'all' column)?
# zones$area_km2 = as.numeric(st_area(zones)) /1000000
# zones$population_density = zones$all / zones$area_km2
# plot(zones["population_density"])
# # in dplyr
# zones_density = zones %>%
# mutate(area_km2 = as.numeric(st_area(geometry)) / 1000000) %>%
# mutate(population_density = all / area_km2)
# plot(zones_density %>% select(population_density))
# # 1. Bonus: Which zone has the highest percentage who cycle?
# zones %>%
# mutate(pcycle = bicycle / all) %>%
# top_n(n = 1, wt = pcycle)
# # 1. Bonus: Find the proportion of people who drive to work (`car_driver`) in areas in which more than 500 people walk to work
# zones %>%
# group_by(foot > 500) %>%
# summarise(mean_car = sum(car_driver) / sum(all) )
## -----------------------------------------------------------------------------
# library(tmap)
# tmap_mode("plot")
## ----plot3, fig.show='hold', out.width="33%", echo=FALSE----------------------
# plot(zones[c("all", "bicycle")])
# # tm_shape(zones) +
# # tm_polygons(c("all", "bicycle"))
# # tmap_mode("view")
# # m = tm_shape(zones_joined) +
# # tm_polygons(c("casualty_type")) +
# # tm_scale_bar()
# # m
# # knitr::include_graphics("tmap-zones-interactive.png")
# # piggyback::pb_upload("zones_joined.Rds")
# # create bug report:
# # See https://github.com/mtennekes/tmap/issues/551
# # piggyback::pb_download_url("zones_joined.Rds")
# # "https://github.com/ropensci/stats19/releases/download/1.3.0/zones_joined.Rds"
# # library(tmap)
# # u = "https://github.com/ropensci/stats19/releases/download/1.3.0/zones_joined.Rds"
# # zones_joined = readRDS(url(u))
# # qtm(zones_joined)
## ----eval=FALSE---------------------------------------------------------------
# vignette(package = "stats19") # view all vignettes available on stats19
# vignette("stats19") # view the introductory vignette
## ----echo=FALSE, results='hide', message=FALSE, eval=FALSE--------------------
# library(stats19)
# library(dplyr)
# library(sf)
# a = get_stats19(2018, "ac")
# asf = format_sf(a)
# a_zones = asf %>%
# filter(local_authority_district == "Isle of Wight")
# nrow(a_zones)
# zones = pct::get_pct_zones(region = "isle-of-wight")
# zones_osbg = st_transform(zones, 27700)
# a_zones_sf = a_zones[zones_osbg, ]
# nrow(a_zones_sf)
# # mapview::mapview(zones) +
# # mapview::mapview(a_zones)
# class(a$date)
# class(a$time)
# a_zones$month = lubridate::month(a_zones$date)
# a_zones_may = a_zones %>%
# filter(month == 5)
# a_agg = aggregate(a_zones_may["speed_limit"], zones_osbg, mean)
# plot(a_agg)
# class(a$date)
## -----------------------------------------------------------------------------
# u = "https://github.com/ropensci/stats19/releases/download/1.1.0/roads_key.Rds"
# roads_wgs = readRDS(url(u))
# roads = roads_wgs %>% st_transform(crs = 27700)
## -----------------------------------------------------------------------------
# u = "https://github.com/ropensci/stats19/releases/download/1.1.0/car_collisions_2022_iow.Rds"
# crashes_iow = readRDS(url(u))
## ----echo=FALSE, out.width="49%", fig.show='hold', message=FALSE--------------
# plot(roads$geometry)
# plot(crashes_iow["accident_severity"], add = TRUE)
# roads_buffer = st_buffer(roads, 200, endCapStyle = "FLAT")
# crashes_outside_roads = crashes_iow[roads_buffer, , op = sf::st_disjoint]
# roads_agg = aggregate(crashes_iow[1], by = roads_buffer, FUN = length)
# # plot(roads_agg, border = NA, main = "")
# names(roads_agg)[1] = "N. Crashes"
# # tmap_mode("plot")
# # tm_shape(roads_agg) + tm_fill("N. Crashes") +
# # tm_shape(crashes_outside_roads) + tm_dots(col = "blue")
## ----final-plot, echo=FALSE, out.width="100%"---------------------------------
# # knitr::include_graphics("final-figure.png")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.