R/get_crashflags_person.R

Defines functions get_aggressive_driver get_impaired_driver get_older_driver get_teen_driver get_speed_flag get_distracted_driver_flag get_driver_flags

Documented in get_driver_flags

#' Get driver flags - speed, impaired, distracted, aggressive, teen, older
#'
#' This adds a column(s) of certain crash flags. Driver flags are: distracted,
#' speed, teen, and older. Speed can be for both old and new db. Rest are for
#' new db only. Must have \strong{DRVRPC} and \strong{STATNM} for speed.
#' \strong{DISTACT} and \strong{DRVRDS} for distracted. \strong{AGE} for teen
#' and older. \strong{ALCSUSP}, \strong{DRUGSUSP}, \strong{STATNM} for impaired.
#' \strong{DRVRPC} for aggressive. Note: old db has bike/peds for DRVRFLAG, new
#' db has only drivers of motor vehicles. c("STATNM", "DISTACT", "DRVRDS",
#' "AGE", "ALCSUSP", "DRUGSUSP")
#' @param person_df person df
#' @param flags select either/all c("distracted","speed","teen","older",
#'   "impaired", "aggressive")
#'
#' @importFrom data.table `:=` `.N`
#'
#' @return Returns only drivers. Adds a column of selected flag(s) with
#'   \emph{speed_flag}, \emph{distracted_flag}, \emph{teendriver_flag},
#'   \emph{olderdriver_flag}, \emph{impaired_flag}, \emph{aggressiveflag}. ("Y"
#'   or "N")
#' @export
#'
#' @examples
#' \dontrun{get_driver_flags(person_df, flags = c("teen", "distracted"))}
#'
get_driver_flags <- function(person_df,
                             flags) {

  person_df <-
    person_df |> dplyr::filter(.data[["DRVRFLAG"]] == 'Y')

  if ("distracted" %in% flags) {
    person_df <- get_distracted_driver_flag(person_df)
  }
  if ("speed" %in% flags) {
    person_df <- get_speed_flag(person_df)
  }
  if ("teen" %in% flags) {
    person_df <- get_teen_driver(person_df)
  }
  if ("older" %in% flags) {
    person_df <- get_older_driver(person_df)
  }
  if ("impaired" %in% flags) {
    person_df <- get_impaired_driver(person_df)
  }
  if ("aggressive" %in% flags) {
    person_df <- get_aggressive_driver(person_df)
  }
  person_df
}

get_distracted_driver_flag <- function(person_df) {
  distracted <- person_df |>
    dplyr::select(.data[["CRSHNMBR"]], .data[["UNITNMBR"]],
                  dplyr::any_of(dplyr::starts_with(c(
                    "DISTACT", "DRVRDS"
                  )))) |>
    # dplyr::filter(.data[["DRVRFLAG"]] == 'Y') |>
    dplyr::filter_all(dplyr::any_vars(
      grepl(
        "Talking|Manually Operating|Other Action|Manually Operating|Electronic Device|Passenger|Eating|Outside|Vehicle|Looked|Moving Object|Adjusting Audio|Outside Person|Smoking|Other Cellular|Inattention|Careless|Details Unknown$|Daydreaming|Other Distraction|Distraction/Inattention",
        .
      )
    ))
  # Find where 'Not distracted' is listed even though a distraction may have been listed
  not_distracted <- person_df |>
    dplyr::select(.data[["CRSHNMBR"]], .data[["UNITNMBR"]],
                  dplyr::any_of(dplyr::starts_with(c(
                    "DISTACT", "DRVRDS"
                  )))) |>
    # dplyr::filter(.data[["DRVRFLAG"]] == 'Y') |>
    dplyr::filter_all(dplyr::any_vars(!grepl(
      "Not Distracted|Unknown If Distracted", .
    ) == FALSE))
  combine <-
    # Remove all 'Not distracted' and add a column of distracted_flag
    dplyr::anti_join(distracted, not_distracted, by = c("CRSHNMBR", "UNITNMBR")) |> dplyr::select("CRSHNMBR", "UNITNMBR") |> dplyr::mutate(distracted_flag = "Y")
  return(
    dplyr::left_join(person_df, combine, by = c("CRSHNMBR", "UNITNMBR")) |> dplyr::mutate(distracted_flag = tidyr::replace_na(.data[["distracted_flag"]], "N"))
  )
}

get_speed_flag <- function(person_df) {
  speed <- person_df |>
    dplyr::select(
      .data[["CRSHNMBR"]], .data[["UNITNMBR"]],
      dplyr::starts_with("DRVRPC"),
      dplyr::starts_with("STATNM")
    ) |>
    dplyr::filter_all(dplyr::any_vars(
                      grepl(
                        # To account for new and old databases, use upper and lower case
                        "^346.55|^346.56|^346.57|^346.58|^346.59(1)|^346.59(2)|Exceed Speed Limit|Speed Too Fast/Cond|^346.55 5|^346.59 1|^346.59 2|SPEEDING IN EXCESS OF FIXED LIMITS|SPEED TOO FAST/COND|EXCEED SPEED LIMIT|DRIVING TOO FAST|EXCEEDING ZONES AND POSTED LIMITS|ANY VIOLATION OF SPEED RESTRICTIONS|UNREASONABLE AND IMPRUDENT SPEED",
                        .
                      )
                    )) |> dplyr::select(.data[["CRSHNMBR"]], .data[["UNITNMBR"]]) |> dplyr::mutate(speed_flag = "Y")
  return(
    dplyr::left_join(person_df, speed, by = c("CRSHNMBR", "UNITNMBR")) |> dplyr::mutate(speed_flag = tidyr::replace_na(.data[["speed_flag"]], "N"))
  )
}

get_teen_driver <- function(person_df) {
  teen <-
    person_df |> dplyr::filter(.data[["AGE"]] %in% c(16, 17, 18, 19)) |> dplyr::select("CRSHNMBR", "UNITNMBR") |> dplyr::mutate(teendriver_flag = "Y")
  return(
    dplyr::left_join(person_df, teen, by = c("CRSHNMBR", "UNITNMBR")) |> dplyr::mutate(teendriver_flag = tidyr::replace_na(.data[["teendriver_flag"]], "N"))
  )
}

get_older_driver <- function(person_df) {
  older <-
    person_df |> dplyr::filter(.data[["AGE"]] >= 65) |> dplyr::select(.data[["CRSHNMBR"]], .data[["UNITNMBR"]]) |> dplyr::mutate(olderdriver_flag = "Y")
  return(
    dplyr::left_join(person_df, older, by = c("CRSHNMBR", "UNITNMBR")) |> dplyr::mutate(olderdriver_flag = tidyr::replace_na(.data[["olderdriver_flag"]], "N"))
  )
}

# Finds if person was suspected of drug/alcohol or if cited for OWI
get_impaired_driver <- function(person_df) {
  alc_drg_statutes = "^346.63\\(" # Only the new db uses "(" after statute number
  suspected_yes = c("^101$|^Yes$|^Y$")

  imp <- person_df |>
    # for str_detect, add "Y" so we get flags under alcohol_flag
    dplyr::select(.data[["CRSHNMBR"]],
                  .data[["UNITNMBR"]],
                  .data[["ALCSUSP"]],
                  .data[["DRUGSUSP"]],
                  dplyr::starts_with("STATNM")) |>
    dplyr::filter_all(dplyr::any_vars(stringr::str_detect(
      ., paste0(alc_drg_statutes, "|", suspected_yes)
    ))) |> dplyr::select(.data[["CRSHNMBR"]], .data[["UNITNMBR"]]) |>
    dplyr::mutate(impaired_flag = "Y")
  return(dplyr::left_join(person_df, imp, by = c("CRSHNMBR", "UNITNMBR")) |> dplyr::mutate(impaired_flag = tidyr::replace_na(.data[["impaired_flag"]], "N")))
}

get_aggressive_driver <- function(person_df) {

  N <- value <- NULL # make local variable, not a global variable. Can't seem to bind (.data[["x"]]) variable inside data.table

  tier1 <- "Operated Motor Vehicle In Aggressive/Reckless Manner"
  tier2 <-
    c(
      "Exceed Speed Limit",
      "Speed Too Fast/Cond",
      "Following Too Close",
      "Improper Overtaking / Passing Right",
      "Improper Overtaking / Passing Left",
      "Operated Motor Vehicle In Inattentive, Careless, Negligent, or Erratic Manner"
    )
  tier3 <-
    c(
      "Failed To Yield Right-Of-Way",
      "Failure To Control",
      "Disregarded Stop Sign",
      "Disregarded Red Light",
      "Disregarded Other Traffic Control",
      "Disregarded Other Road Markings"
    )

  # Grab the fields
  drvrpc = person_df[ , grepl("DRVRPC|CRSHNMBR|UNITNMBR", names(person_df)), with = FALSE]

  # Filter to make this df shorter
  drvrpc_long = drvrpc |> tidyr::pivot_longer(cols = dplyr::starts_with("DRVRPC")) |> dplyr::filter(value %in% tier2 | value %in% tier3) |> data.table::setDT()

  # tier1
  t1_c = dplyr::filter_all(drvrpc, dplyr::any_vars(grepl(tier1, .))) |> dplyr::select("CRSHNMBR", "UNITNMBR")

  # if 2 in tier2
  t2 = drvrpc_long[value %in% tier2]
  t2_c = t2[, .N, by = c("CRSHNMBR", "UNITNMBR")][N >= 2][, N := NULL]

  # if 3 in tier3
  t3 = drvrpc_long[value %in% tier3]
  t3_1 = t3[, .N, by = c("CRSHNMBR", "UNITNMBR")][N >= 3][, N := NULL]

  # if 1 in tier2 and 2 in tier3.
  t3_2 = dplyr::semi_join(t2, t3[, .N, by = c("CRSHNMBR", "UNITNMBR")][N >= 2], by = c("CRSHNMBR", "UNITNMBR"))[, c("CRSHNMBR", "UNITNMBR")]

  # Combine all to get aggressive flag - t3 has 2 parts
  agg_flags = Reduce(function(x, y)
    merge(x, y, all = TRUE, by = c("CRSHNMBR", "UNITNMBR")),
    list(t1_c, t2_c, t3_1, t3_2)) |> dplyr::distinct(.data[["CRSHNMBR"]], .data[["UNITNMBR"]]) |> dplyr::mutate(aggressiveflag = "Y")

  dplyr::left_join(person_df, agg_flags, by = c("CRSHNMBR", "UNITNMBR")) |> dplyr::mutate(aggressiveflag = tidyr::replace_na(.data[["aggressiveflag"]], "N"))
}

#' Get flag for a suspected alcohol or drug person (old and new db). Deprecated.
#'
#' This looks to see if a person was suspected of alcohol or drug use.
#' @inheritParams get_driver_flags
#' @param driver_only Select for driver flag as driver only ("Y" or"N")
#' @param include_alc Select to include suspected alcohol ("Y" or"N")
#' @param include_drug Select to include suspected drug ("Y" or"N")
#'
#' @return A new column called \emph{drug_flag} or \emph{alcohol_flag}. Values
#'   are "Y","N", and "U" for unknowm. If \emph{driver_only} = "Y", then only
#'   drivers will return.
#' @export
#'
#' @examples
#' \dontrun{get_alc_drug_impaired_person(person17, include_alc = "N")}
get_alc_drug_impaired_person <- function(person_df,
                                         driver_only = "N",
                                         include_alc = "Y",
                                         include_drug = "Y") {
  if (driver_only == "Y") {
    person_df <-
      person_df |> dplyr::filter(.data[["DRVRFLAG"]] == 'Y')
  }
  if (include_alc == "Y") {
    person_df <- person_df |>
      dplyr::left_join(lookup_susp_alcohol, by = "ALCSUSP") |>
      dplyr::mutate(alcohol_flag = ifelse(is.na(.data[["alcohol_flag"]]), "U", .data[["alcohol_flag"]]))
  }
  if (include_drug == "Y") {
    person_df <- person_df |>
      dplyr::left_join(lookup_susp_drug, by = "DRUGSUSP") |>
      dplyr::mutate(drug_flag = ifelse(is.na(.data[["drug_flag"]]), "U", .data[["drug_flag"]]))
  }
  if (include_alc == "Y" & include_drug == "Y") {
  person_df <-
    person_df |> dplyr::mutate(
      drug_or_alc_flag = dplyr::case_when(
        .data[["drug_flag"]] == "Y" | .data[["alcohol_flag"]] == "Y" ~ "Y",
        .data[["drug_flag"]] == "N" | .data[["alcohol_flag"]] == "N" ~ "N",
        TRUE ~ "U"
      )
    )
  }
  return(person_df)
  # return(dplyr::left_join(alc_df, drug_df, by = c("CRSHNMBR", "CUSTNMBR")))
}

# # get_seat_belt <- # may need character, not attribute code
# #   function(person_df) {
# #     person_df |> filter(SFTYEQP == 105 |
# #                            (-(EYEPROT %in% c(101, 102, 103)) & HLMTUSE == 104))
# #   }
# get_seatbelt_flag_by_role <- function(person_df) {
#   sb <-
#     person_df |> dplyr::filter((
#       SFTYEQP %in% c("None Used - Vehicle Occupant",
#                      "NONE USED-VEHICLE OCCUPANT") |
#         (-(
#           EYEPROT %in% c("Yes: Worn", "Yes: Windshield", "Yes: Worn and Windshield")
#         ) & HLMTUSE %in% c("No"))
#     )) |> dplyr::select(CRSHNMBR, UNITNMBR, ROLE) |> dplyr::mutate(seatbelt_flag_role = "Y")
#   return(dplyr::left_join(person_df, sb, by = c("CRSHNMBR", "UNITNMBR", "ROLE")) |> dplyr::mutate(seatbelt_flag_role = tidyr::replace_na(seatbelt_flag_role, "N")))
# }

#' Get seatbelt flag (old and new db)
#'
#' Finds if a person in a unit was not wearing a seatbelt inside a motor
#' vehicle. For example, a passenger not wearing a seatbelt, every person in
#' that unit would get a seat belt flag. This includes the driver and other
#' passengers, if any. Need \strong{SFTYEQP}.
#' @inheritParams get_driver_flags
#'
#' @return A new column \emph{seatbelt_flag} ("Y" or "N")
#'
#' @examples
#' \dontrun{get_seatbelt_flag_by_unit(person17)}
get_seatbelt_flag_by_unit <- function(person_df) {
  sb <- person_df |>
    # for str_detect, add "Y" so we get flags under alcohol_flag
    dplyr::select(.data[["CRSHNMBR"]], .data[["UNITNMBR"]], .data[["SFTYEQP"]]) |>
    dplyr::filter(.data[["SFTYEQP"]] %in% c("None Used - Vehicle Occupant", "NONE USED-VEHICLE OCCUPANT")) |>
    dplyr::mutate(seatbelt_flag = "Y") |>
    dplyr::select("CRSHNMBR", "UNITNMBR", "seatbelt_flag")

  return(dplyr::left_join(person_df, sb, by = c("CRSHNMBR", "UNITNMBR")) |> dplyr::mutate(seatbelt_flag = tidyr::replace_na(.data[["seatbelt_flag"]], "N")))
}

#' Relabels WISINJ and ROLE in person
#'
#' This bins certain variables by recategorizing and making a new column. This
#' is useful when working with data from an old and new database or when wanting
#' to have fewer categories. "wisinj" bins \emph{WISINJ} into "No Injury",
#' "Injured", and "Killed".
#' @inheritParams get_driver_flags
#' @param relabel_by either by "wisinj"
#'
#' @return A new column of either/all \emph{inj}
#' @export
#'
#' @examples
#' \dontrun{system.file("extdata", "17person.fst", package = "fst") |>
#'   relabel_person_date(relabel_by = "wisinj")}
relabel_person_variables <- function(person_df,
                                relabel_by = "wisinj"){
  if (relabel_by %in% "wisinj"){
    person_df <- person_df |> dplyr::left_join(bin_wisinj_levels, by = "WISINJ")
  }
  # if (relabel_by %in% "bikeped"){
  #   person_df <- person_df |> dplyr::left_join(lookup_role_bike_ped, by = "ROLE")
  # }
  person_df
}

#' Select pedestrians or cyclists (old and new db)
#'
#' @inheritParams get_driver_flags
#' @param ped whether to select pedestrians
#' @param bike whether to select bicyclists
#'
#' @return Person df of selected bike/peds
#' @export
#'
#' @examples
#' \dontrun{select_bike_ped_persons(person17)}
select_bike_ped_persons <-
  function(person_df,
           ped = "Y",
           bike = "Y") {
    if (ped == "Y") {
      ped_select = c("Pedestrian", "PEDESTRIAN")
    } else {
      ped_select = c()
    }
    if (bike == "Y") {
      bike_select =  c("Bicycle", "BICYCLE")
    } else {
      bike_select = c()
    }
    selection = dplyr::filter(person_df, .data[["UNITTYPE"]] %in% c(ped_select, bike_select))
    # selection = person_df[.data[["UNITTYPE"]] %in% c(ped_select, bike_select)]
    selection |> dplyr::mutate(
      UNITTYPE = ifelse(.data[["UNITTYPE"]] == "BICYCLE", "Bicycle", .data[["UNITTYPE"]]),
      UNITTYPE = ifelse(.data[["UNITTYPE"]] == "PEDESTRIAN", "Pedestrian", .data[["UNITTYPE"]])
    )
  }
jacciz/wisdotcrashdatabase documentation built on June 3, 2023, 2:26 a.m.