#' @title Rename and filter
#' @description Rename and filter data after having conducted initial merge
#' @import data.table
#' @import fst
#' @param import_folder Folder where data is located
#' @param export_folder Folder to export dataset
#' @return FST file of VBZ passenger data with renaming and initial filtering
#' @export
rename_and_filter <- function(import_folder, export_folder) {
# Clean folder names
import_folder <- clean_folder(import_folder)
if (is.null(export_folder)) {
export_folder <- import_folder
} else {
export_folder <- clean_folder(export_folder)
}
dt <- paste0(export_folder, "zvv_stage1.fst")
dt <- fst::read_fst(dt)
data.table::setDT(dt)
columns <- data.table::data.table()
columns$old <- c(
"Zaehl_Fahrt_Id",
"Haltestellen_Id",
"Ist_Fahrzeit",
"Ist_Wartezeit",
"Einsteiger",
"Aussteiger",
"Besetzung",
"IST_AN",
"IST_AB",
"Datum",
"Richtung",
"Ist_Abfahrtszeit",
"Ist_Ankunftszeit",
"Fahrzeug_Id",
"FZG",
"Fahrzeugtypname",
"Sitzplaetze",
"Kap_2m2",
"Kap_4m2",
"Haltestellen_Id_naechste",
"Haltestellenlangname_naechste",
"Linienname_Fahrgastauskunft",
"Haltestellenlangname",
"Besetzung_Kategorie",
"Belastungs_Faktor",
"n_tueren"
)
columns$new <- c(
"trip_id",
"stop_id",
"travel_duration",
"stop_duration",
"onboardings",
"offboardings",
"occupancy",
"real_arrival_time",
"real_departure_time",
"date",
"direction",
"trip_start_time",
"trip_end_time",
"vehicle_id",
"vehicle_abbreviation",
"vehicle",
"seating_capacity",
"capacity_2m2",
"capacity_4m2",
"next_stop_id",
"next_stop_name",
"line_name",
"stop_name",
"occupancy_level",
"load_factor",
"n_doors"
)
data.table::setnames(dt, columns$old, columns$new, skip_absent = TRUE)
# Conduct basic checks
data.table::setorder(dt, real_departure_time)
# Everyone who comes on must come off
dt[, check := sum(onboardings) == sum(offboardings), by = trip_id]
dt <- dt[check == TRUE]
# Initial occupancy equal to inital onboardings
dt[, check := data.table::first(onboardings) == data.table::first(occupancy), by = trip_id]
dt <- dt[check == TRUE]
# No one comes off at first stop
dt[, check := data.table::first(offboardings) == 0, by = trip_id]
dt <- dt[check == TRUE]
# Bus is empty after trip
dt[, check := data.table::last(occupancy) == 0, by = trip_id]
dt <- dt[check == TRUE]
# No one comes on at terminal stop
dt[, check := data.table::last(onboardings) == 0, by = trip_id]
dt <- dt[check == TRUE]
# All passengers on last segemnt come off at terminal stop
dt[, check := data.table::last(offboardings) == data.table::last(shift(occupancy, 1)), by = trip_id]
dt <- dt[check == TRUE]
# People boarding at second to last stop less or equal to offboardings at terminal
dt[, check := data.table::last(offboardings) >= data.table::last(shift(onboardings, 1)), by = trip_id]
dt <- dt[check == TRUE]
# People offboarding at second stop less or equal than boardings at first stop
dt[, check := data.table::first(onboardings) >= data.table::first(shift(offboardings, -1)), by = trip_id]
dt <- dt[check == TRUE]
dt[, check := NULL]
# Basic time checks
dt <- dt[trip_start_time <= trip_end_time]
dt <- dt[trip_start_time <= real_departure_time]
dt <- dt[trip_end_time >= real_arrival_time]
dt <- dt[real_departure_time >= real_arrival_time]
fst::write_fst(dt, paste0(export_folder, "zvv_stage2.fst"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.