data-raw/1_data_find_crash_flags.R

library(data.table)
library(dplyr)
library(lubridate)
# library(memisc)
# library(gdata)
library(wisdotcrashdatabase)

# Must to each year separately
year = "17"
# This script return a dataframe with a list of crshnmbr with flags for each year. The script finds crashes with a certain flag (i.e. older driver, speed),
# adds a column for each flag type with "Y" denotes crshnmb has that flag. Then it combines all flags into one dataframe to save to SQLite db.

# setwd("W:/HSSA/Keep/Jaclyn Ziebert/R/Data Prep for R Shiny")  # set WD
# file = "W:/HSSA/Keep/Jaclyn Ziebert/R/Data Prep for R Shiny/" # where CSVs are saved
db_dir = "C:/data/crashes_duck.duckdb"                    # where CSVs are saved

# Functions to import databases from a CSV, grabs FLAG fields and fields to determine if a type of FLAG

all_crashes <-
  import_db_data(
    db_dir,
    db_type = "crash",
    years = year,
    columns = c(
      "CRSHTYPE",
      "ANMLTY",
      "MNRCOLL",
      "TOTUNIT",
      # "ALCFLAG",
      # "DRUGFLAG",
      "BIKEFLAG",
      "CYCLFLAG",
      "PEDFLAG",
      "CMVFLAG",
      "CRSHLOC",
      "RLTNTRWY",
      "INTTYPE",
      "INTDIS",
      "MUNICODE"
    )
  )
# Relabel so in consistent format
# all_crashes <- all_crashes |> mutate(
#   ALCFLAG = case_when(ALCFLAG == "Yes" ~ "Y",
#                       ALCFLAG == "No" ~ "N"),
#   DRUGFLAG = case_when(DRUGFLAG == "Yes" ~ "Y",
#                        DRUGFLAG == "No" ~ "N"),
#   CRSHDATE = as.character(CRSHDATE)
# )

all_persons <-
    import_db_data(
      db_dir,
      db_type = "person",
      years = year,
      columns = c(
        "DRVRPC",
        "STATNM",
        "DRVRDS",
        "SFTYEQP",
        "AGE",
        "HLMTUSE",
        "DISTACT",
        "ALCSUSP",
        "DRUGSUSP"
      )
    )

all_persons_drivers <- get_driver_flags(all_persons, flags = c("distracted","speed","teen","older", "impaired"))
# Functions to find flags, returns flag column and crshnmbr. By list I mean a long df.

# person_flags = all_persons_drivers[, list(CRSHNMBR, distracted_flag, speed_flag, teendriver_flag, olderdriver_flag, impaired_flag)]
list_person_flags = wisdotcrashdatabase::persons_flags_to_crash(all_persons_drivers, c("speed_flag", "distracted_flag", "teendriver_flag", "olderdriver_flag", "impaired_flag"))


# get_list_speedflags <- function(persons_df) {
#   speedflags <-
#     persons_df |>  dplyr::filter(speed_flag == "Y") |> dplyr::select(CRSHNMBR, speed_flag)
#   speedflags <- unique(speedflags)
#   return (speedflags)
# }
# 
# get_list_distractedflags <- function(persons_df) {
#   distractflags <-
#     persons_df |> dplyr::filter(distracted_flag == "Y") |> dplyr::select(CRSHNMBR, distracted_flag)
#   distractflags <- unique(distractflags)
#   return (distractflags |>  filter(distracted_flag == "Y"))
# }
# 
# get_list_teendrvrflags  <- function(persons_df) {
#   teendrvrflags <- persons_df |> dplyr::filter(teendriver_flag == "Y") |> dplyr::select(CRSHNMBR, teendriver_flag)
#   teendrvrflags <- unique(teendrvrflags)
#   return (teendrvrflags)
# }
# get_list_olderdrvrflags <- function(persons_df) {
#   olderdrvrflags <- persons_df |> dplyr::filter(olderdriver_flag == "Y") |> dplyr::select(CRSHNMBR, olderdriver_flag)
#   olderdrvrflags <- unique(olderdrvrflags)
#   return (olderdrvrflags)
# }
# 
# get_list_impaireddrvrflags <- function(persons_df) {
#   impaired <- persons_df |> dplyr::filter(impaired_flag == "Y") |> dplyr::select(CRSHNMBR, impaired_flag)
#   impaired <- unique(impaired)
#   return (impaired)
# }

get_list_singlevehflags <- function(crashes_df) {
  singlevehflags <- crashes_df |> dplyr::select(CRSHNMBR, TOTUNIT) |> filter(TOTUNIT == 1) |> mutate(singlevehflag = "Y")
  singlevehflags <- singlevehflags |> dplyr::select(CRSHNMBR, singlevehflag)
  return (singlevehflags)
}

get_list_deerflags      <- function(crashes_df) {
  deerflags <- crashes_df |>
    get_deerflag_crashes() |> filter(deer_flag == "Y")
  deerflags <- deerflags |> dplyr::select(CRSHNMBR, deer_flag) #dplyr::filter(!is.na(CRSHNMBR))# |> group_by(CRSHNMBR)
  return (deerflags)}

get_list_lanedepflags   <- function(crashes_df){
  lanedep_crshtypes = c( "Motor Veh Tran Other Rdwy", "Other Object - Not Fixed", "Traffic Sign Post",
                         "Traffic Signal", "Utility Pole", "Lum Light Support", "Other Post, Pole or Support",
                         "Tree", "Mailbox", "Guardrail Face", "Guardrail End", "Bridge Parapet End",
                         "Bridge/Pier/Abut", "Impact Attenuator/Crash Cushion", "Overhead Sign Post",
                         "Bridge Rail", "Culvert", "Ditch", "Curb", "Embankment", "Fence",
                         "Other Fixed Object", "Overturn/Rollover", "Jackknife", "Cable Barrier",
                         "Concrete Traffic Barrier", "Other Traffic Barrier", "Fire Hydrant", "Unknown")
  lanedep_mnrcoll = c("Front To Front", "Sideswipe/Same Direction", "Sideswipe/Opposite Direction")
  lanedepflags <- crashes_df |> dplyr::select(CRSHNMBR, CRSHTYPE, MNRCOLL) |> filter(CRSHTYPE %in% lanedep_crshtypes | MNRCOLL %in% lanedep_mnrcoll) |> mutate(lanedepflag = "Y")
  lanedepflags <- lanedepflags |> dplyr::select(CRSHNMBR, lanedepflag) #dplyr::filter(!is.na(CRSHNMBR))# |> group_by(CRSHNMBR)
  return (lanedepflags)
}

get_list_intersection_flags   <- function(crashes_df){
  crashes_df |> dplyr::mutate(crash_location = dplyr::case_when(
    RLTNTRWY == "Non Trafficway - Parking Lot" ~ "parking lot",
    CRSHLOC %in% c("Private Property","Tribal Land") ~ "private property",
    INTTYPE == "Not At Intersection" ~ "non-intersection",
    INTTYPE != "" ~ "intersection",
    INTDIS > 0 ~ "non-intersection",
    TRUE ~ "intersection"
  ), intersection_flag = ifelse(crash_location == "intersection", "Y","N")) |>
    filter(intersection_flag == "Y") |> 
    dplyr::select(intersection_flag, CRSHNMBR)
}

# This finds crashes with a crash flag already in the crash db (i.e. BIKEFLAG)
get_list_crashflags     <- function(crashes_df){
  crash_flags <- crashes_df |> dplyr::select(CRSHNMBR, BIKEFLAG, CYCLFLAG, PEDFLAG, CMVFLAG)
  crash_flags <- crash_flags |> dplyr::filter_all(dplyr::any_vars(stringr::str_detect(., "Y"))) # returns any row where there is at least 1 flag
}

# Run the functions, these all return a list of crshnmbers and a column of the respected flag(s)
# speedflag_crshes    <- get_list_speedflags(all_persons_drivers)
# distractflag_crshes <- get_list_distractedflags(all_persons_drivers)
# teenflag_crshes     <- get_list_teendrvrflags(all_persons_drivers)
# olderflag_crshes    <- get_list_olderdrvrflags(all_persons_drivers)
# impairedflag_crshes <- get_list_impaireddrvrflags(all_persons_drivers)
singleveh_crshes    <- get_list_singlevehflags(all_crashes)
deer_crshes         <- get_list_deerflags(all_crashes)
lanedep_crshes      <- get_list_lanedepflags(all_crashes)
allcrashflag_crshes <- get_list_crashflags(all_crashes)
intersection_crshes <- get_list_intersection_flags(all_crashes)

# Combine dataframes - make sure all df are here
all_flags <- Reduce(function(x, y) merge(x, y, all=TRUE, by = "CRSHNMBR"),
                    list(#speedflag_crshes, distractflag_crshes, teenflag_crshes, olderflag_crshes, impairedflag_crshes,
                      list_person_flags,
                         singleveh_crshes, lanedep_crshes, deer_crshes, allcrashflag_crshes, intersection_crshes)) # combine to one df
all_flags_com = all_crashes |> dplyr::select(CRSHNMBR, CRSHDATE, countyname, CRSHSVR, MUNICODE, year) |> left_join(all_flags, ., by = "CRSHNMBR") |>
  mutate_at(vars(ends_with(c("FLAG", "flag"))), tidyr::replace_na, replace = "N")

#### SAVE TO SQLITE
fname = paste0("crsh_flags", "20",year)
pool <- pool::dbPool(RSQLite::SQLite(), dbname = "inst/app/www/crash_db.db")
DBI::dbWriteTable(pool, fname, all_flags_com, overwrite = TRUE)
# DBI::dbDisconnect(pool)
# pool::poolClose(pool)
# make CRSHNMBR primary key?
jacciz/shiny_wisdot_crash_dashboard documentation built on May 4, 2023, 11:36 a.m.