R/factorize_variables.R

Defines functions factor_crash_variables

Documented in factor_crash_variables

#' Factor variables
#'
#' Applies factor (if it exists) for: CRSHSVR, INJSVR, WISINJ, DAYNMBR, CRSHMTH, inj
#' @param any_df any crash dataframe
#'
#' @return same df, but now with factors!
#' @export
#'
#' @examples \dontrun{factor_crash_variables(crash)}
factor_crash_variables <- function(any_df) {
  columns_df = colnames(any_df)
  if ("CRSHSVR" %in% columns_df) {
    any_df <-
      any_df |> dplyr::mutate(CRSHSVR = factor(.data$CRSHSVR, levels = c("Fatal", "Injury", "Property Damage")))
  }
  if ("inj" %in% columns_df) {
    any_df <-
      any_df |> dplyr::mutate(inj = factor(.data$inj, levels =  c("Killed", "Injured", "No Injury")))
  }
  if ("INJSVR" %in% columns_df) {
    any_df <- any_df |> dplyr::mutate(INJSVR =  factor(
      .data$INJSVR,
      levels = c(
        "Fatal Injury",
        "Suspected Serious Injury",
        "Suspected Minor Injury",
        "Possible Injury",
        "No Apparent Injury"
      )
    ))
  }
  if ("WISINJ" %in% columns_df) {
    any_df <- any_df |> dplyr::mutate(WISINJ =  factor(
      .data$WISINJ,
      levels = c(
        "Fatal Injury",
        "Suspected Serious Injury",
        "Suspected Minor Injury",
        "Possible Injury",
        "No Apparent Injury"
      )
    ))
  }
  if ("DAYNMBR" %in% columns_df) {
    any_df <- any_df |> dplyr::mutate(
      DAYNMBR = dplyr::case_when(
        .data$DAYNMBR == "SUN" ~ "Sunday",
        .data$DAYNMBR == "MON" ~ "Monday",
        .data$DAYNMBR == "TUE" ~ "Tuesday",
        .data$DAYNMBR == "WED" ~ "Wednesday",
        .data$DAYNMBR == "THU" ~ "Thursday",
        .data$DAYNMBR == "FRI" ~ "Friday",
        .data$DAYNMBR == "SAT" ~ "Saturday",
        TRUE ~ .data$DAYNMBR
      ),
      DAYNMBR = factor(
        .data$DAYNMBR,
        levels = c(
          "Sunday",
          "Monday",
          "Tuesday",
          "Wednesday",
          "Thursday",
          "Friday",
          "Saturday"
        )
      )
    )
  }

  if ("CRSHMTH" %in% columns_df) {
    any_df <- any_df |> dplyr::mutate(
      CRSHMTH = dplyr::case_when(
        .data$CRSHMTH == "JAN" ~ month.name[1],
        .data$CRSHMTH == "FEB" ~ month.name[2],
        .data$CRSHMTH == "MAR" ~ month.name[3],
        .data$CRSHMTH == "APR" ~ month.name[4],
        .data$CRSHMTH == "MAY" ~ month.name[5],
        .data$CRSHMTH == "JUN" ~ month.name[6],
        .data$CRSHMTH == "JUL" ~ month.name[7],
        .data$CRSHMTH == "AUG" ~ month.name[8],
        .data$CRSHMTH == "SEP" ~ month.name[9],
        .data$CRSHMTH == "OCT" ~ month.name[10],
        .data$CRSHMTH == "NOV" ~ month.name[11],
        .data$CRSHMTH == "DEC" ~ month.name[12],
        TRUE ~ .data$CRSHMTH
      ),
      CRSHMTH = factor(.data$CRSHMTH, levels = month.name)
    )
  }
  return(any_df)
}
jacciz/wisdotcrashdatabase documentation built on June 3, 2023, 2:26 a.m.