R/rename_and_filter.R

Defines functions rename_and_filter

Documented in rename_and_filter

#' @title Rename and filter
#' @description Rename and filter data after having conducted initial merge
#' @import data.table
#' @import fst
#' @param import_folder Folder where data is located
#' @param export_folder Folder to export dataset
#' @return FST file of VBZ passenger data with renaming and initial filtering
#' @export

rename_and_filter <- function(import_folder, export_folder) {

  # Clean folder names
  import_folder <- clean_folder(import_folder)

  if (is.null(export_folder)) {
    export_folder <- import_folder
  } else {
    export_folder <- clean_folder(export_folder)
  }

  dt <- paste0(export_folder, "zvv_stage1.fst")
  dt <- fst::read_fst(dt)
  data.table::setDT(dt)

  columns <- data.table::data.table()

  columns$old <- c(
    "Zaehl_Fahrt_Id",
    "Haltestellen_Id",
    "Ist_Fahrzeit",
    "Ist_Wartezeit",
    "Einsteiger",
    "Aussteiger",
    "Besetzung",
    "IST_AN",
    "IST_AB",
    "Datum",
    "Richtung",
    "Ist_Abfahrtszeit",
    "Ist_Ankunftszeit",
    "Fahrzeug_Id",
    "FZG",
    "Fahrzeugtypname",
    "Sitzplaetze",
    "Kap_2m2",
    "Kap_4m2",
    "Haltestellen_Id_naechste",
    "Haltestellenlangname_naechste",
    "Linienname_Fahrgastauskunft",
    "Haltestellenlangname",
    "Besetzung_Kategorie",
    "Belastungs_Faktor",
    "n_tueren"
  )

  columns$new <- c(
    "trip_id",
    "stop_id",
    "travel_duration",
    "stop_duration",
    "onboardings",
    "offboardings",
    "occupancy",
    "real_arrival_time",
    "real_departure_time",
    "date",
    "direction",
    "trip_start_time",
    "trip_end_time",
    "vehicle_id",
    "vehicle_abbreviation",
    "vehicle",
    "seating_capacity",
    "capacity_2m2",
    "capacity_4m2",
    "next_stop_id",
    "next_stop_name",
    "line_name",
    "stop_name",
    "occupancy_level",
    "load_factor",
    "n_doors"
  )

  data.table::setnames(dt, columns$old, columns$new, skip_absent = TRUE)

  # Conduct basic checks
  data.table::setorder(dt, real_departure_time)
  # Everyone who comes on must come off
  dt[, check := sum(onboardings) == sum(offboardings), by = trip_id]
  dt <- dt[check == TRUE]
  # Initial occupancy equal to inital onboardings
  dt[, check := data.table::first(onboardings) == data.table::first(occupancy), by = trip_id]
  dt <- dt[check == TRUE]
  # No one comes off at first stop
  dt[, check := data.table::first(offboardings) == 0, by = trip_id]
  dt <- dt[check == TRUE]
  # Bus is empty after trip
  dt[, check := data.table::last(occupancy) == 0, by = trip_id]
  dt <- dt[check == TRUE]
  # No one comes on at terminal stop
  dt[, check := data.table::last(onboardings) == 0, by = trip_id]
  dt <- dt[check == TRUE]
  # All passengers on last segemnt come off at terminal stop
  dt[, check := data.table::last(offboardings) == data.table::last(shift(occupancy, 1)), by = trip_id]
  dt <- dt[check == TRUE]
  # People boarding at second to last stop less or equal to offboardings at terminal
  dt[, check := data.table::last(offboardings) >= data.table::last(shift(onboardings, 1)), by = trip_id]
  dt <- dt[check == TRUE]
  # People offboarding at second stop less or equal than boardings at first stop
  dt[, check := data.table::first(onboardings) >= data.table::first(shift(offboardings, -1)), by = trip_id]
  dt <- dt[check == TRUE]
  dt[, check := NULL]

  # Basic time checks
  dt <- dt[trip_start_time <= trip_end_time]
  dt <- dt[trip_start_time <= real_departure_time]
  dt <- dt[trip_end_time >= real_arrival_time]
  dt <- dt[real_departure_time >= real_arrival_time]

  fst::write_fst(dt, paste0(export_folder, "zvv_stage2.fst"))
}
lucasjamar/VBZtools documentation built on May 20, 2020, 3:44 a.m.