#' GTFS cleaning functions
#'
#' Functions to remove items from a GTFS if they are not used in either the
#' trips (for routes, shapes, and `service_id`s) or stop_times (for stops)
#' file.
#'
#' @param gtfs A GTFS file stored as a list
#' @param retain_stops Whether to retain the unused stops (in `gtfs$stops_unused`)
#' @param retain_shapes Whether to retain the unused shapes (in `gtfs$shapes_unused`)
#' @param retain_routes Whether to retain the unused routes (in `gtfs$routes_unused`)
#' @param retain_service Whether to retain the unused `service_id`s (in `gtfs$calendar_unused`)
#'
#' @return A GTFS file stored as a list. If `gtfs_remove_all()` is called, stops,
#' shapes, routes, and `service_id`s that are not
#' used by the trips.txt or stop_times.txt files will be removed.
#' Other functions pertain to specific files.
#' @export
#' @examples \dontrun{
#' gtfs_remove_all(gtfs)
#' }
#' @name gtfs_remove_unused
#' @export
remove_unused_stops <- function(gtfs, retain_stops = FALSE) {
# summarize number of trips by stop
used_stop_counts <-
gtfs$stop_times %>%
dplyr::group_by(.data$stop_id) %>%
dplyr::summarise(trip_stops = dplyr::n())
# get list of stops
distinct_stops <-
gtfs$stops %>%
dplyr::select(.data$stop_id) %>%
dplyr::distinct()
undefined_stops <-
used_stop_counts %>%
dplyr::filter(!(.data$stop_id %in% distinct_stops$stop_id)) %>%
magrittr::use_series(stop_id)
if(length(undefined_stops) > 0) {
stop(paste0("There are stops in stop_times.txt that lack definition in stops.txt.\n The following stops lack definition: ",
paste(undefined_stops, collapse = " ")))
}
# join number of trips by stop to list of stops
stop_summary <-
dplyr::left_join(distinct_stops, used_stop_counts, by = "stop_id")
# list of unused stops
unused_stops <-
stop_summary %>%
dplyr::filter(is.na(.data$trip_stops)) %>%
magrittr::use_series(stop_id)
# if retaining stops, write the unused stops to stops_unused
if(retain_stops) {
gtfs$stops_unused <- gtfs$stops %>% dplyr::filter(.data$stop_id %in% unused_stops)
}
# overwrite the stops file
gtfs$stops <-
gtfs$stops %>%
dplyr::filter(!(.data$stop_id %in% unused_stops))
# return the new gtfs
gtfs
}
#' @rdname gtfs_remove_unused
#' @export
remove_unused_shapes <- function(gtfs, retain_shapes = FALSE) {
# number of trips by shape
used_shape_counts <-
gtfs$trips %>%
dplyr::group_by(.data$shape_id) %>%
dplyr::summarize(trip_count = dplyr::n())
# distinct shapes
distinct_shapes <-
gtfs$shapes %>%
dplyr::select(.data$shape_id) %>%
dplyr::distinct()
undefined_shapes <-
used_shape_counts %>%
dplyr::filter(!(.data$shape_id %in% distinct_shapes$shape_id)) %>%
magrittr::use_series(shape_id)
if(length(undefined_shapes) > 0) {
stop(paste0("There are shapes in trips.txt that lack definition in shapes.txt.\n The following shapes lack definition: ",
paste(undefined_shapes, collapse = " ")))
}
# join
shape_summary <-
dplyr::left_join(distinct_shapes, used_shape_counts,
by = "shape_id")
# list of unused shapes
unused_shapes <-
shape_summary %>%
dplyr::filter(is.na(.data$trip_count)) %>%
magrittr::use_series(shape_id)
# if retaining shapes, write unused shapes to shapes_unused
if (retain_shapes) {
gtfs$shapes_unused <- gtfs$shapes %>% dplyr::filter(.data$shape_id %in% unused_shapes)
}
# overwrite the shapes file
gtfs$shapes <-
gtfs$shapes %>%
dplyr::filter(!(.data$shape_id %in% unused_shapes))
# return the new gtfs
gtfs
}
#' @rdname gtfs_remove_unused
#' @export
#' @importFrom rlang .data
remove_unused_routes <- function(gtfs, retain_routes = FALSE) {
# list of route_ids in trips with trip count
trips_by_route <-
gtfs$trips %>%
dplyr::group_by(.data$route_id) %>%
dplyr::summarize(trips = dplyr::n())
# list of route_ids in routes
distinct_routes <-
gtfs$routes %>%
dplyr::transmute(.data$route_id, route_in_routes = TRUE)
# join route_ids from both
route_comparison <-
dplyr::full_join(distinct_routes, trips_by_route, by = "route_id") %>%
dplyr::mutate(route_in_routes = tidyr::replace_na(.data$route_in_routes, FALSE))
# throw error if there are route_ids in trips that have no info in routes
stopifnot(route_comparison %>% dplyr::filter(!.data$route_in_routes) %>% magrittr::use_series(route_id) %>% length() == 0)
# create list of unused routes
unused_routes <-
route_comparison %>%
dplyr::filter(is.na(.data$trips)) %>%
magrittr::use_series(route_id)
# if retaining unused routes, put them in routes_unused
if(retain_routes) {
gtfs$routes_unused <-
gtfs$routes %>%
dplyr::filter(.data$route_id %in% unused_routes)
}
# remove unused routes from routes table
gtfs$routes <-
gtfs$routes %>%
dplyr::filter(!(.data$route_id %in% unused_routes))
# output gtfs
gtfs
}
#' @rdname gtfs_remove_unused
#' @export
remove_unused_service <- function(gtfs, retain_service = FALSE) {
# get count of trips by service_id
trips_by_service_id <-
gtfs$trips %>%
dplyr::group_by(.data$service_id) %>%
dplyr::summarize(trips = dplyr::n())
# get service_ids from calendar
calendar_service_ids <-
gtfs$calendar %>%
dplyr::transmute(.data$service_id, defined_in_calendar = TRUE)
# compare lists
service_id_comparison <-
dplyr::full_join(calendar_service_ids, trips_by_service_id, by = "service_id") %>%
dplyr::mutate(defined_in_calendar = tidyr::replace_na(.data$defined_in_calendar, FALSE))
# throw error for undefined service ids
stopifnot(service_id_comparison %>%
dplyr::filter(!.data$defined_in_calendar) %>%
magrittr::use_series(service_id) %>%
length() == 0)
# list of unused service ids
unused_service_ids <-
service_id_comparison %>%
dplyr::filter(is.na(.data$trips)) %>%
magrittr::use_series(service_id)
# if retaining service ids, add to calendar_unused
if(retain_service) {
gtfs$calendar_unused <-
gtfs$calendar %>%
dplyr::filter(.data$service_id %in% unused_service_ids)
}
# filter calendar file
gtfs$calendar <-
gtfs$calendar %>%
dplyr::filter(!(.data$service_id %in% unused_service_ids))
# filter calendar attributes file if exists
if(!rlang::is_null(gtfs$calendar_attributes)) {
gtfs$calendar_attributes <-
gtfs$calendar_attributes %>%
dplyr::filter(!(.data$service_id %in% unused_service_ids))
}
# return gtfs
gtfs
}
#' @rdname gtfs_remove_unused
#' @family gtfs cleaning functions
#' @param retain_all Whether to retain removed components
#' @export
gtfs_remove_all <- function(gtfs,
retain_all = FALSE,
retain_stops = FALSE,
retain_shapes = FALSE,
retain_routes = FALSE,
retain_service = FALSE) {
if (retain_all) {
retain_shapes <- TRUE
retain_stops <- TRUE
retain_routes <- TRUE
retain_service <- TRUE
}
gtfs <- remove_unused_stops(gtfs, retain_stops = retain_stops)
gtfs <- remove_unused_shapes(gtfs, retain_shapes = retain_shapes)
gtfs <- remove_unused_routes(gtfs, retain_routes = retain_routes)
gtfs <- remove_unused_service(gtfs, retain_service = retain_service)
gtfs
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.