inst/doc/stats19-vehicles.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = FALSE
)

## ----setup, message=FALSE-----------------------------------------------------
#  library(stats19)
#  library(dplyr)

## -----------------------------------------------------------------------------
#  v = get_stats19(year = 2018, type = "vehicle")
#  names(v)
#  v

## -----------------------------------------------------------------------------
#  v = v %>% mutate(vehicle_type2 = case_when(
#    grepl(pattern = "motorcycle", vehicle_type, ignore.case = TRUE) ~ "Motorbike",
#    grepl(pattern = "Car", vehicle_type, ignore.case = TRUE) ~ "Car",
#    grepl(pattern = "Bus", vehicle_type, ignore.case = TRUE) ~ "Bus",
#    grepl(pattern = "cycle", vehicle_type, ignore.case = TRUE) ~ "Cycle",
#    # grepl(pattern = "Van", vehicle_type, ignore.case = TRUE) ~ "Van",
#    grepl(pattern = "Goods", vehicle_type, ignore.case = TRUE) ~ "Goods",
#  
#    TRUE ~ "Other"
#  ))
#  # barplot(table(v$vehicle_type2))

## -----------------------------------------------------------------------------
#  table(v$vehicle_type2)
#  summary(v$age_of_driver)
#  summary(v$engine_capacity_cc)
#  table(v$propulsion_code)
#  summary(v$age_of_vehicle)

## -----------------------------------------------------------------------------
#  a = get_stats19(year = 2018, type = collision)
#  va = dplyr::inner_join(v, a)

## -----------------------------------------------------------------------------
#  dim(v)
#  dim(va)
#  names(va)

## ----out.width="100%"---------------------------------------------------------
#  xtabs(~vehicle_type2 + accident_severity, data = va) %>% prop.table()
#  xtabs(~vehicle_type2 + accident_severity, data = va) %>% prop.table() %>% plot()

## -----------------------------------------------------------------------------
#  vac = va %>% filter(vehicle_type2 == "Car")

## -----------------------------------------------------------------------------
#  summary(vac$engine_capacity_cc)

## ----echo=FALSE, eval=FALSE---------------------------------------------------
#  library(tidyverse)
#  
#  max_engine_size = 5000
#  min_engine_size = 300
#  
#  sel_too_big = vac$engine_capacity_cc > max_engine_size
#  sel_too_small = vac$engine_capacity_cc < min_engine_size
#  sum(sel_too_big) / nrow(vac)
#  sum(sel_too_small) / nrow(vac)
#  vac$engine_capacity_cc[sel_too_big | sel_too_small] = NA

## ----eval=FALSE, echo=FALSE---------------------------------------------------
#  
#  vac = vac %>% filter(val_size)
#  vac %>%
#    mutate(age = formatC(age_band_of_driver, digits = 2, flag = "0")) %>%
#    ggplot() +
#    geom_violin(aes(age, engine_capacity_cc))
#  
#  vac$sev_factor = factor(vac$accident_severity, labels = 3:1)
#  vac$sev_numeric = vac$sev_factor %>% as.character() %>%  as.numeric()
#  summary(vac$sev_factor)
#  summary(vac$sev_numeric)
#  
#  m = lm(sev_numeric ~ engine_capacity_cc + age_of_driver + speed_limit, data = vac)
#  summary(m)

## ----echo=FALSE, eval=FALSE---------------------------------------------------
#  table_vehicle_type = xtabs(cbind(accident_severity, vehicle_type) ~ accident_severity, data = va)
#  group_totals = va %>%
#    group_by(accident_severity) %>%
#    summarise(n = n())
#  
#  # fails
#  fit = glm(data = va, factor(accident_severity) ~
#              engine_capacity_cc +
#              age_of_driver +
#              engine_capacity_cc +
#              factor(propulsion_code) +
#              age_of_vehicle
#            )
#  # works but result does not have probabilities
#  ?nnet::multinom
#  mod = nnet::multinom(formula = accident_severity ~
#              engine_capacity_cc +
#              age_of_driver +
#              engine_capacity_cc +
#              propulsion_code +
#              age_of_vehicle, data = va)
#  mod
#  summary(mod)
#  mod$nunits
#  class(mod$fitted.values)
#  dim(mod$fitted.values)
#  colnames(mod$fitted.values)
#  summary(mod$fitted.values) # result!
#  
#  probs = as.data.frame(mod$fitted.values)
#  head(probs)
#  head(rowSums(probs))
#  colSums(probs) / group_totals$n
#  nrow(probs)
#  nrow(va)
#  
#  # install.packages("mlogit")
#  install.packages("AER")
#  vignette(package = "mlogit")
#  vignette("c2.formula.data")
#  library(mlogit)
#  data("TravelMode", package = "AER")
#  head(TravelMode)
#  ?TravelMode
#  summary(TravelMode$choice)
#  summary(TravelMode$mode)
#  TM = mlogit.data(TravelMode, choice = "choice", shape = "long",
#                   alt.levels = c("air", "train", "bus", "car"))
#  
#  vamld = mlogit.data(va, choice = "accident_severity", alt.levels = c("Slight", "Serious", "Fatal"), shape = "wide")
#  
#  mlogit(accident_severity ~ speed_limit | 0, vamld[1:999, ])
#  
#  vamld = mlogit::mlogit.data(va, choice = "accident_severity", shape = "wide")
#  head(vamld)
#  
#  
#  # fails
#  # vm = mlogit(accident_severity ~ engine_capacity_cc + speed_limit | 0, vamld[1:999, ])
#  # apply(fitted(vm, outcome = FALSE), 2, mean)
#  # vm

Try the stats19 package in your browser

Any scripts or data that you put into this service are public.

stats19 documentation built on Nov. 5, 2023, 1:09 a.m.