knitr::opts_chunk$set(echo = FALSE, comment = NA, warning = FALSE, error = FALSE, message = FALSE, tidy = TRUE, fig.dim = c(10, 6), global.par = TRUE)
# install.packages("tidyverse", repo = 'https://cran.R-project.org')  try this to install packages in base R, not RStudio #
# If you get errors installing a package, go to cran website and download tar.gz. If that doesn't work, submit an IT ticket to unblock the website.
library(tidyverse)      # includes tidyr, tibble, ggplots, dplyr
# library(ggtext)         # HTML for charts
# library(data.table)     # db files are loaded as a data table 
library(wisdotcrashdatabase) # read PDF at https://github-p.dot.state.wi.us/DSP/wisdotcrashdatabase (wisdotcrashdatabase_1.2.xx.pdf)
# library(janitor)        # make tabyl, adorn_totals
# library(kableExtra)     # styling for HTML
# source("C:/temp_code/chart_formats.R") # Contains functions to make bar/line charts and pretty tables
# Always loaded: "CRSHDATE", "CNTYCODE","CRSHSVR", "UNITNMBR", "UNITPRSN", "UNITTYPE", "DRVRFLAG", "ROLE", "VEHTYPE", "WISINJ"
db_dir = "C:/data/crashes_duck.duckdb" # location of crash database files to load
years = get_list_of_years("19", "23") # select years to import, must be in this format
crash <-
  import_db_data(
    db_dir,
    db_type = "crash",
    years = years,
    columns = c("INJSVR", "DOCTNMBR", "CRSHTYPE", "ANMLTY", "CRSHMTH", "CYCLFLAG", "TOTINJ", "TOTFATL", "CONSZONE", "HWYREGN")
    # filter_by = "county:Brown"
  )
person <-
  import_db_data(
    db_dir,
    db_type = "person",
    years = years,
    columns = c(
      # "HLMTUSE",
      # "EYEPROT",
      # "SFTYEQP",
      "DRVRSTA",
      "SEX",
      "AGE",
      "DISTACT",
      "DRVRDS",
      "STABBR",
      # "DISTSRC",
      "DRVRPC",
      "STATNM",
      "OLICTYPE",
      "ALCSUSP",
      "DRUGSUSP"
    )#, filter_by = "county:Milwaukee"
  )
vehicle <-
  import_db_data(
    db_dir,
    db_type = "vehicle",
    years = years,
    columns = c("VEHPC")
  )
narrative = import_narrative(db_dir, years = years)
m = get_motorcycle_persons(person, vehicle)

# Summary table
crash[deer_flag == "Y"] |> aggregate_crashes_by_crshsvr_tot_inj()

person[CRSHDATE >= "2023-03-01" & CRSHDATE <= "2023-03-31", .N, by = WISINJ ]
crash[CRSHDATE >= "2023-03-01" & CRSHDATE <= "2023-03-31", .N, by = INJSVR ]
# janitor::tabyl(crash, var1 = year, var2 = CRSHSVR)
# CGPfunctions::PlotXTabs(crash, year, CRSHSVR)

# data.table (recommend using dplyr if you're new)
# crash[ , .N, by = .(CRSHSVR, year)][order(year, N)] # count by CRSHSVR and year
# %chin% %like%  %between%  fcase is like case_when but for booleon
# do more than 1 operation do dt[, `:=` (x = "x", y = "y")]
# mydt[, ..mycols] # to refer to char list
# crash[ , lapply(.SD, sum), by = countyname, .SDcols = "TOTFATL"]
# d[is.na(d)] <- 0
# driver[, ac_12 := rowSums(do.call(cbind, lapply(.SD, `==`, "Y"))) > 0, .SDcols = cols ]
# rowSums(do.call(cbind, lapply(driver[, ..cols], `%in%`, val)))

# dplyr
# ac |> top_n(n = 1, wt = number_of_casualties)
# df |> mutate_at( 2:5, replace_na, replace = 0) OR df |> mutate_if(is.numeric, tidyr::replace_na, replace = 0)
# x |> mutate(rank = min_rank(desc(n))) |> filter(rank <0)
# mtcars |>
#   group_by(cyl) |>
#   summarise(across(starts_with("d"), list(mean = mean, sd = sd)))
# mutate(rank = min_rank(desc(n))) |> filter(rank < 7)

# remove duplicates
# crash |> distinct(.keep_all = TRUE) # dplyr way
# crash[!duplicated('CRSHNMBR')] # data.table way

# DRVRPC - put data in long format to count frequency
# person |> select("CRSHNMBR", starts_with("DRVRPC")) |> pivot_longer(starts_with("DRVRPC")) |> filter(value !='') 


jacciz/wisdotcrashdatabase documentation built on June 3, 2023, 2:26 a.m.