library(tidyverse)      # includes tidyr, tibble, ggplots, dbplyr
library(lubridate)      # for dates
# library(ggtext)         # HTML for charts
# library(data.table)     # db files are loaded as a data table
library(wisdotcrashdatabase)
db_dir = "C:/data/crashes_duck.duckdb" # location of crash database files to load
years = c("18") 

person <-
  wisdotcrashdatabase::import_db_data(
    db_dir,
    db_type = "person",
    years = years,
    columns = c("INJSVR",
                "AGE",
                "DRVRPC",
                "DOCTNMBR")
  )

person <-
    person |> dplyr::filter(.data[["DRVRFLAG"]] == 'Y')
  ag = person |> get_driver_flags("aggressive") |> filter(aggressiveflag == "Y")
ag |> count(CRSHNMBR, UNITNMBR) |> arrange(-n)
ag |> count(CRSHNMBR, UNITNMBR) |> arrange(-n)
ag |> distinct(CRSHNMBR) |> count() #4904
person |> get_aggressive_driver() |> filter(aggressiveflag == "Y") |> distinct(CRSHNMBR) |> count() #5087
c <-
  wisdotcrashdatabase::import_db_data(
    # db_dir,
    db_type = "crash",
    years = "19",
    columns = c("CRSHTIME", "ANMLTY", "CRSHTYPE", "RDCOND", "RLTNTRWY", "CRSHLOC", "INTTYPE", "INTDIS")
  )
c  |> get_crash_flags("location")# |> dplyr::count(crash_location)
c |> get_crash_flags("winterroad")
# 2018 : 18633 crashes
# select doctnmbr,crshdate,
# 
#                                 sum(case when tier1 >= 1 or tier2 >= 2 or (tier2 + tier3 >= 3) then 1 end) aggrflag_temp
# 
#                 from (
# 
#                                 select doctnmbr,crshdate,unitnmbr,
# 
#                                                 case when drvrpc01 in (117) then 1 else 0 end
# 
#                                 tier1,
# 
#                                 case when drvrpc01 in (101,102,104,113,114,118) then 1 else 0 end
# 
#                                 tier2,
# 
#                                 case when drvrpc01 in (103,107,109,111,112) then 1 else 0 end
# 
#                                 tier3
# MISSING 170104221 / J9L0V55JN3 ??
get_aggressive_driver <- function(person_df) {

  N <- value <- NULL # make local variable, not a global variable. Can't seem to bind (.data[["x"]]) variable inside data.table

  tier1 <- "Operated Motor Vehicle In Aggressive/Reckless Manner"
  tier2 <-
    c(
      "Exceed Speed Limit",
      "Speed Too Fast/Cond",
      "Following Too Close",
      "Improper Overtaking / Passing Right",
      "Improper Overtaking / Passing Left",
      "Operated Motor Vehicle In Inattentive, Careless, Negligent, or Erratic Manner"
    )
  tier3 <-
    c(
      "Failed To Yield Right-Of-Way",
      "Failure To Control",
      "Disregarded Red Light",
      "Disregarded Stop Sign",
      "Disregarded Other Traffic Control",
      "Disregarded Other Road Markings"
    )

  # Grab the fields
  drvrpc = person_df[ , grepl("DRVRPC|CRSHNMBR|UNITNMBR", names(person_df)), with = FALSE]

  # Filter do make this df shorter
  drvrpc_long = drvrpc |> tidyr::pivot_longer(cols = dplyr::starts_with("DRVRPC")) |> dplyr::filter(value %in% tier2 | value %in% tier3) |> data.table::setDT()

  # tier1
  t1_c = dplyr::filter_all(drvrpc, dplyr::any_vars(grepl(tier1, .))) |> dplyr::select("CRSHNMBR", "UNITNMBR")

  # if 2 in tier2
  t2 = drvrpc_long[value %in% tier2]
  t2_c = t2[, .N, by = c("CRSHNMBR", "UNITNMBR")][N >= 2][, N := NULL]

  # if 3 in tier3
  t3 = drvrpc_long[value %in% tier3]
  t3_1 = t3[, .N, by = c("CRSHNMBR", "UNITNMBR")][N >= 3][, N := NULL]

  # if 1 in tier2 and 2 in tier3.
  t3_2 = dplyr::semi_join(t2, t3[, .N, by = c("CRSHNMBR", "UNITNMBR")][N >= 2], by = c("CRSHNMBR", "UNITNMBR"))[, c("CRSHNMBR", "UNITNMBR")]

  # Combine all to get aggressive flag - t3 has 2 parts
  agg_flags = Reduce(function(x, y)
    merge(x, y, all = TRUE, by = c("CRSHNMBR", "UNITNMBR")),
    list(t1_c, t2_c, t3_1, t3_2)) |> dplyr::distinct(.data[["CRSHNMBR"]], .data[["UNITNMBR"]]) |> dplyr::mutate(aggressiveflag = "Y") 

  dplyr::left_join(person_df, agg_flags, by = c("CRSHNMBR", "UNITNMBR")) |> dplyr::mutate(aggressiveflag = tidyr::replace_na(.data[["aggressiveflag"]], "N"))
}


jacciz/wisdotcrashdatabase documentation built on June 3, 2023, 2:26 a.m.