R/format.R
In stats19: Work with Open Road Traffic Casualty Data from Great Britain

Documented in format_casualties format_collisions format_column_names format_ppp format_sf format_vehicles

#' Format STATS19 'collisions' data
#'
#' @section Details:
#' This is a helper function to format raw STATS19 data
#'
#' @param x Data frame created with `read_collisions()`
#' @export
#' @examples
#' \donttest{
#'   if(curl::has_internet()) {
#'     dl_stats19(year = 2022, type = "collision")
#'   }
#' }
#' @export
format_collisions = function(x) {
  format_stats19(x, type = "Accident")
}
#' Format STATS19 casualties
#'
#' @section Details:
#' This function formats raw STATS19 data
#'
#' @param x Data frame created with `read_casualties()`
#' @export
#' @examples
#' \donttest{
#' if(curl::has_internet()) {
#' dl_stats19(year = 2022, type = "casualty")
#' x = read_casualties(year = 2022)
#' casualties = format_casualties(x)
#' }
#' }
#' @export
format_casualties = function(x) {
  format_stats19(x, type = "Casualty")
}
#' Format STATS19 vehicles data
#'
#' @section Details:
#' This function formats raw STATS19 data
#'
#' @param x Data frame created with `read_vehicles()`
#' @export
#' @examples
#' \donttest{
#' if(curl::has_internet()) {
#' dl_stats19(year = 2022, type = "vehicle", ask = FALSE)
#' x = read_vehicles(year = 2022, format = FALSE)
#' vehicles = format_vehicles(x)
#' }
#' }
#' @export
format_vehicles = function(x) {
  format_stats19(x, type = "Vehicle")
}

format_stats19 = function(x, type) {
  # Rename columns
  old_names = names(x)
  new_names = format_column_names(old_names)
  # waldo::compare(old_names, new_names) They are the same for 2023 date
  # TODO: remove format_column_names() and use stats19::stats19_schema$variable
  names(x) = new_names

  # create lookup table
  lkp = stats19::stats19_variables[stats19::stats19_variables$table == tolower(type),]

  vkeep = new_names %in% stats19::stats19_schema$variable
  vars_to_change = which(vkeep)

  # # for testing
  # browser()
  # i = 1
  # x_old = x
  for(i in vars_to_change) {
    lkp_name = unique(lkp$variable[lkp$variable %in% new_names[i]])
    lookup = stats19::stats19_schema[
      stats19::stats19_schema$variable %in% lkp_name,
      c("code", "label")
      ]
    original_class = class(x[[i]])
    # Use lookup to replace codes with labels, but keep original values for non-matches
    # See https://github.com/ropensci/stats19/issues/235#issuecomment-2254257770
    matched_labels = lookup$label[match(x[[i]], lookup$code)]
    x[[i]] = ifelse(is.na(matched_labels), x[[i]], matched_labels)
    x[[i]] = methods::as(x[[i]], original_class)
  }
  # waldo::compare(x_old, x)

  date_in_names = "date" %in% names(x)
  if(date_in_names) {
    date_char = x$date
    x$date = as.Date(date_char, format = "%d/%m/%Y")
  }
  if(date_in_names && "time" %in% names(x)) {
    # Add formated datetime column, tell people about this new feature
    # (message could be removed in future versions)
    message("date and time columns present, creating formatted datetime column")
    # names(x)
    # class(x$time) # it's a character string
    # head(x$time) # just the time (not date)

    x$datetime = as.POSIXct(paste(date_char, x$time), tz = 'Europe/London', format = "%d/%m/%Y %H:%M")
    # summary(x$datetime)
  }
  cregex = "easting|northing|latitude|longitude"
  names_coordinates = names(x)[grepl(pattern = cregex, x = names(x), ignore.case = TRUE)]
  # convert them to numeric if not already:
  for(i in names_coordinates) {
    if(!is.numeric(x[[i]])) {
      x[[i]] = as.numeric(x[[i]])
    }
  }
  x
}


#' Format column names of raw STATS19 data
#'
#' This function takes messy column names and returns clean ones that work well with
#' R by default. Names that are all lower case with no R-unfriendly characters
#' such as spaces and `-` are returned.
#' @param column_names Column names to be cleaned
#' @return Column names cleaned.
#' @export
#' @examples
#' \donttest{
#' if(curl::has_internet()) {
#' crashes_raw = read_collisions(year = 2022)
#' column_names = names(crashes_raw)
#' column_names
#' format_column_names(column_names = column_names)
#' }
#' }
format_column_names = function(column_names) {
  x = tolower(column_names)
  x = gsub(pattern = " ", replacement = "_", x = x)
  x = gsub(pattern = "\\(|\\)", replacement = "", x = x)
  x = gsub(pattern = "1st", replacement = "first", x = x)
  x = gsub(pattern = "2nd", replacement = "second", x = x)
  x = gsub(pattern = "-", replacement = "_", x = x)
  x = gsub(pattern = "\\?", replacement = "", x)
  x
}

#' Format convert STATS19 data into spatial (sf) object
#'
#' @param x Data frame created with `read_collisions()`
#' @param lonlat Should the results be returned in longitude/latitude?
#' By default `FALSE`, meaning the British National Grid (EPSG code: 27700)
#' is used.
#'
#' @export
#' @examples
#' x_sf = format_sf(accidents_sample)
#' sf:::plot.sf(x_sf)

format_sf = function(x, lonlat = FALSE) {
  n = names(x)
  coords = n[grep(pattern = "easting|northing",
                  x = n,
                  ignore.case = TRUE)]
  coord_null = is.na(x[[coords[1]]]) | is.na(x[[coords[2]]])
  message(sum(coord_null), " rows removed with no coordinates")
  x = x[!coord_null, ]
  x_sf = sf::st_as_sf(x, coords = coords, crs = 27700)
  if(lonlat) {
    x_sf = sf::st_transform(x_sf, crs = 4326)
  }
  x_sf
}

#' Convert STATS19 data into ppp (spatstat) format.
#'
#' This function is a wrapper around the [spatstat.geom::ppp()] function and
#' it is used to transform STATS19 data into a ppp format.
#'
#' @param data A STATS19 dataframe to be converted into ppp format.
#' @param window A windows of observation, an object of class `owin()`. If
#'   `window = NULL` (i.e. the default) then the function creates an approximate
#'   bounding box covering the whole UK. It can also be used to filter only the
#'   events occurring in a specific region of UK (see the examples of
#'   \code{\link{get_stats19}}).
#' @param ... Additional parameters that should be passed to
#'   [spatstat.geom::ppp()] function. Read the help page of that function
#'   for a detailed description of the available parameters.
#'
#' @return A ppp object.
#' @seealso \code{\link{format_sf}} for an analogous function used to convert
#'   data into sf format and [spatstat.geom::ppp()] for the original function.
#' @export
#'
#' @examples
#' if (requireNamespace("spatstat.geom", quietly = TRUE)) {
#'   x_ppp = format_ppp(accidents_sample)
#'   x_ppp
#' }
#'

format_ppp = function(data, window = NULL,  ...) {
  # check that spatstat.geom is installed
  if (!requireNamespace("spatstat.geom", quietly = TRUE)) {
    stop("package spatstat.geom required, please install it first")
  }

  # look for column names of coordinates
  names_data = names(data)
  coords = names_data[grepl(
    pattern = "easting|northing",
    x = names_data,
    ignore.case = TRUE
  )]

  # exclude car crashes with NA in the coordinates
  coords_null = is.na(data[[coords[1]]] | data[[coords[2]]])
  if (sum(coords_null) > 0) {
    message(sum(coords_null), " rows removed with no coordinates")
    data = data[!coords_null, ]
  }

  # owin object for ppp. Default values represent an approximate bbox of UK
  if (is.null(window)) {
    window = spatstat.geom::owin(
      xrange = c(64950, 655391),
      yrange = c(10235, 1209512)
    )
  }

  data_ppp = spatstat.geom::ppp(
    x = data[[coords[[1]]]],
    y = data[[coords[[2]]]],
    window = window,
    marks = data[setdiff(names_data, coords)],
    ...
  )
  data_ppp
}

Any scripts or data that you put into this service are public.

stats19 documentation built on April 4, 2025, 1:41 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

stats19
Work with Open Road Traffic Casualty Data from Great Britain

R/format.R
In stats19: Work with Open Road Traffic Casualty Data from Great Britain

Defines functions format_ppp format_sf format_column_names format_stats19 format_vehicles format_casualties format_collisions

Documented in format_casualties format_collisions format_column_names format_ppp format_sf format_vehicles

Try the stats19 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stats19 Work with Open Road Traffic Casualty Data from Great Britain

R/format.R In stats19: Work with Open Road Traffic Casualty Data from Great Britain

Defines functions format_ppp format_sf format_column_names format_stats19 format_vehicles format_casualties format_collisions

Documented in format_casualties format_collisions format_column_names format_ppp format_sf format_vehicles

Try the stats19 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

stats19
Work with Open Road Traffic Casualty Data from Great Britain

R/format.R
In stats19: Work with Open Road Traffic Casualty Data from Great Britain