#' Export ATOC stations as GTFS stops.txt
#'
#' @details
#' Export ATOC stations as GTFS stops.txt
#'
#' @param station station SF data frame from the importMSN function
#' @param TI TI object
#' @noRd
#'
station2stops <- function(station, TI) {
# Discard Unneded Columns
TI <- TI[, c("TIPLOC code", "NALCO", "TPS Description", "CRS Code")]
station <- station[, c(
"Station Name", "CATE Interchange status", "TIPLOC Code",
"CRS Code", "geometry"
)]
jnd <- dplyr::left_join(TI, station, by = c("TIPLOC code" = "TIPLOC Code"))
station.extra <- station[!station$`TIPLOC Code` %in% jnd$`TIPLOC code`, ]
station.extra$`TIPLOC code` <- station.extra$`TIPLOC Code`
station.extra$NALCO <- NA
station.extra$`CRS Code.y` <- station.extra$`CRS Code`
station.extra$`TPS Description` <- NA
station.extra$`CRS Code.x` <- NA
station.extra <- station.extra[, names(jnd)]
jnd <- suppressWarnings(dplyr::bind_rows(jnd, station.extra))
jnd$geometry <- sf::st_sfc(jnd$geometry)
jnd <- sf::st_sf(jnd)
sf::st_crs(jnd) <- 4326
jnd$CRS <- ifelse(is.na(jnd$`CRS Code.y`), jnd$`CRS Code.x`, jnd$`CRS Code.y`)
jnd$name <- ifelse(is.na(jnd$`TPS Description`), jnd$`Station Name`, jnd$`TPS Description`)
stops <- jnd[, c("CRS", "TIPLOC code", "name")]
stops <- stops[!sf::st_is_empty(stops), ]
stops.final <- stops
stops.final <- as.data.frame(stops.final)
stops.final$geometry <- sf::st_sfc(stops.final$geometry)
stops.final <- sf::st_sf(stops.final)
sf::st_crs(stops.final) <- 4326
stops.final <- stops.final[, c("TIPLOC code", "CRS", "name", "geometry")]
# recorder the match the GTFS stops.txt
names(stops.final) <- c("stop_id", "stop_code", "stop_name", "geometry")
coords <- sf::st_coordinates(stops.final)
stops.final$stop_lat <- coords[, 2]
stops.final$stop_lon <- coords[, 1]
stops.final$stop_lat <- round(stops.final$stop_lat, 5) # sub metre precison is sufficent
stops.final$stop_lon <- round(stops.final$stop_lon, 5)
stops.final <- as.data.frame(stops.final)
stops.final$geometry <- NULL
# Built tiploc to CRS lookup
lookup <- as.data.frame(jnd)
lookup <- lookup[, c("TIPLOC code", "CRS")]
lookup$match <- ifelse(is.na(lookup$CRS), lookup$`TIPLOC code`, lookup$CRS)
lookup <- lookup[, c("TIPLOC code", "match")]
names(lookup) <- c("TIPLOC", "match")
results <- list(stops.final, lookup)
names(results) <- c("stops", "lookup")
return(results)
}
#' Export ATOC stations and FLF file as transfers.txt
#'
#' @details
#' Export ATOC FLF file as transfers.txt
#'
#' @param station station SF data frame from the importMSN function
#' @param flf imported flf file from importFLF
#' @param path_out Path to save file to
#' @noRd
#'
station2transfers <- function(station, flf, path_out) {
### SECTION 4: ###############################################################################
# make make the transfers.txt
# transfer betwwen stations are in the FLF file
transfers1 <- flf[, c("from", "to", "time")]
transfers1$time <- transfers1$time * 60
transfers1$transfer_type <- 2
# transfer within sations are in the stations file
transfers2 <- station[, c("TIPLOC Code", "CRS Code", "Minimum Change Time")]
transfers2 <- as.data.frame(transfers2)
transfers2$geometry <- NULL
transfers3 <- transfers2[, c("TIPLOC Code", "CRS Code")]
names(transfers3) <- c("from_stop_id", "CRS Code")
transfers1 <- dplyr::left_join(transfers1, transfers3, by = c("from" = "CRS Code"))
names(transfers3) <- c("to_stop_id", "CRS Code")
transfers1 <- dplyr::left_join(transfers1, transfers3, by = c("to" = "CRS Code"))
transfers1 <- transfers1[, c("from_stop_id", "to_stop_id", "transfer_type", "time")]
names(transfers1) <- c("from_stop_id", "to_stop_id", "transfer_type", "min_transfer_time")
transfers2$min_transfer_time <- as.integer(transfers2$`Minimum Change Time`) * 60
transfers2$to_stop_id <- transfers2$`TIPLOC Code`
transfers2$transfer_type <- 2
names(transfers2) <- c("from_stop_id", "CRS Code", "Minimum Change Time", "min_transfer_time", "to_stop_id", "transfer_type")
transfers2 <- transfers2[, c("from_stop_id", "to_stop_id", "transfer_type", "min_transfer_time")]
transfers <- rbind(transfers1, transfers2)
return(transfers)
}
#' split overlapping start and end dates#
#'
#' @param cal cal object
#' @details split overlapping start and end dates
#' @noRd
splitDates <- function(cal) {
# get all the dates that
dates <- c(cal$start_date, cal$end_date)
dates <- dates[order(dates)]
# create all unique pairs
dates.df <- data.frame(
start_date = dates[seq(1, length(dates) - 1)],
end_date = dates[seq(2, length(dates))]
)
cal.new <- dplyr::left_join(dates.df, cal, by = c("start_date" = "start_date", "end_date" = "end_date"))
if ("P" %in% cal$STP) {
match <- "P"
} else {
match <- cal$STP[cal$STP != "C"]
match <- match[1]
}
# fill in the original missing schdule
for (j in seq(1, nrow(cal.new))) {
if (is.na(cal.new$UID[j])) {
st_tmp <- cal.new$start_date[j]
ed_tmp <- cal.new$end_date[j]
new.UID <- cal$UID[cal$STP == match & cal$start_date <= st_tmp & cal$end_date >= ed_tmp ]
new.Days <- cal$Days[cal$STP == match & cal$start_date <= st_tmp & cal$end_date >= ed_tmp ]
new.roWID <- cal$rowID[cal$STP == match & cal$start_date <= st_tmp & cal$end_date >= ed_tmp ]
new.ATOC <- cal$`ATOC Code`[cal$STP == match & cal$start_date <= st_tmp & cal$end_date >= ed_tmp ]
new.Retail <- cal$`Retail Train ID`[cal$STP == match & cal$start_date <= st_tmp & cal$end_date >= ed_tmp ]
new.head <- cal$Headcode[cal$STP == match & cal$start_date <= st_tmp & cal$end_date >= ed_tmp ]
new.Status <- cal$`Train Status`[cal$STP == match & cal$start_date <= st_tmp & cal$end_date >= ed_tmp ]
if (length(new.UID) == 1) {
cal.new$UID[j] <- new.UID
cal.new$Days[j] <- new.Days
cal.new$rowID[j] <- new.roWID
cal.new$`ATOC Code`[j] <- new.ATOC
cal.new$`Retail Train ID`[j] <- new.Retail
cal.new$`Train Status`[j] <- new.Status
cal.new$Headcode[j] <- new.head
cal.new$STP[j] <- match
} else if (length(new.UID) > 1) {
message("Going From")
print(cal)
message("To")
print(cal.new)
stop()
# readline(prompt="Press [enter] to continue")print()
}
}
}
# remove any gaps
cal.new <- cal.new[!is.na(cal.new$UID), ]
# remove duplicated rows
cal.new <- cal.new[!duplicated(cal.new), ]
# modify end and start dates
for (j in seq(1, nrow(cal.new))) {
if (cal.new$STP[j] == "P") {
# check if end date need changing
if (j < nrow(cal.new)) {
if (cal.new$end_date[j] == cal.new$start_date[j + 1]) {
cal.new$end_date[j] <- (cal.new$end_date[j] - 1)
}
}
# check if start date needs changing
if (j > 1) {
if (cal.new$start_date[j] == cal.new$end_date[j - 1]) {
cal.new$start_date[j] <- (cal.new$start_date[j] + 1)
}
}
}
}
# remove cancled trips
cal.new <- cal.new[cal.new$STP != "C", ]
# fix duration
cal.new$duration <- cal.new$end_date - cal.new$start_date + 1
# remove any zero or negative day schduels
cal.new <- cal.new[cal.new$duration > 0, ]
# Append UID to note the changes
if (nrow(cal.new) > 0) {
if (nrow(cal.new) < 27) {
cal.new$UID <- paste0(cal.new$UID, " ", letters[1:nrow(cal.new)])
} else {
# Cases where we need extra letters, gives upto 676 ids
lett <- paste0(rep(letters, each = 26), rep(letters, times = 26))
cal.new$UID <- paste0(cal.new$UID, " ", lett[1:nrow(cal.new)])
}
} else {
cal.new <- NA
}
return(cal.new)
}
#' internal function for matching stop_times to the basic schdule
#'
#' @details
#' Takes in a row of the schdedule and then gets the next row (schedule must be sorted by rowID)
#'
#' @param schedule.rowID rowID field from schedule object
#' @param stop_times.rowID rowID field from stop_times object
#' @param ncores number of processes for parallel processing (default = 1)
#' @noRd
#'
matchRoutes <- function(schedule.rowID, stop_times.rowID, ncores = 1) {
schedule_tmp <- matrix(c(schedule.rowID, schedule.rowID[2:length(schedule.rowID)], max(schedule.rowID) + 99999), ncol = 2)
if (ncores == 1) {
matches <- lapply(1:nrow(schedule_tmp), function(x) {
stop_times.rowID[ dplyr::between(
stop_times.rowID,
schedule_tmp[x, 1],
schedule_tmp[x, 2]
) ]
})
} else {
CL <- parallel::makeCluster(ncores) # make clusert and set number of core
parallel::clusterExport(cl = CL, varlist = c("stop_times.rowID", "schedule_tmp"), envir = environment())
parallel::clusterEvalQ(cl = CL, {
library(dplyr)
})
matches <- parallel::parLapply(cl = CL, 1:nrow(schedule_tmp), function(x) {
stop_times.rowID[ dplyr::between(
stop_times.rowID,
schedule_tmp[x, 1],
schedule_tmp[x, 2]
) ]
})
parallel::stopCluster(CL)
}
# names(matches) = schedule_tmp[1:10]
result <- data.frame(
stop_times.rowID = unlist(matches),
schedule.rowID = rep(schedule.rowID, times = lengths(matches))
)
return(result)
}
# TODO: Does not work within functions, rejig to work in package.
#
#' internal function for cleaning calendar
#'
#' @details
#' check for schdules that don overlay with the day they rund i.e. Mon - Sat schduel for a sunday only service
#' return a logcal vector of if the calendar is valid
#'
#' @param tmp 1 row dataframe
#' @noRd
#'
checkrows <- function(tmp) {
# tmp = res.calendar[i,]
# message(paste0("done ",i))
if (tmp$duration < 7) {
days.valid <- weekdays(seq.POSIXt(from = as.POSIXct.Date(tmp$start_date), to = as.POSIXct.Date(tmp$end_date), by = "DSTday"))
days.valid <- tolower(days.valid)
days.match <- tmp[, c("monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday")]
days.match <- sapply(days.match, function(x) {
x == 1
})
days.match <- days.match[days.match]
days.match <- names(days.match)
if (any(days.valid %in% days.match)) {
return(TRUE)
} else {
return(FALSE)
}
} else {
return(TRUE)
}
}
#' internal function for contructing longnames of routes
#'
#' @details
#' check for schdules that don overlay with the day they rund i.e. Mon - Sat schduel for a sunday only service
#' return a logcal vector of if the calendar is valid
#'
#' @param routes routes data.frame
#' @param stop_times stop_times data.frame
#' @noRd
#'
longnames <- function(routes, stop_times) {
stop_times_sub <- dplyr::group_by(stop_times, trip_id)
stop_times_sub <- dplyr::summarise(stop_times_sub,
schedule = unique(schedule),
stop_a = stop_id[stop_sequence == 1],
# seq = min(stop_sequence),
stop_b = stop_id[stop_sequence == max(stop_sequence)]
)
stop_times_sub$route_long_name <- paste0("Train from ", stop_times_sub$stop_a, " to ", stop_times_sub$stop_b)
stop_times_sub <- stop_times_sub[!duplicated(stop_times_sub$schedule), ]
stop_times_sub <- stop_times_sub[, c("schedule", "route_long_name")]
routes <- dplyr::left_join(routes, stop_times_sub, by = c("rowID" = "schedule"))
return(routes)
}
#' make calendar
#'
#' @details
#' split overlapping start and end dates
#'
#' @param schedule scheduel data.frame
#' @param ncores number of processes for parallel processing (default = 1)
#' @noRd
#'
makeCalendar <- function(schedule, ncores = 1) {
# prep the inputs
calendar <- schedule[, c("Train UID", "Date Runs From", "Date Runs To", "Days Run", "STP indicator", "rowID", "Headcode", "ATOC Code", "Retail Train ID", "Train Status")]
calendar$`STP indicator` <- as.character(calendar$`STP indicator`)
# calendar = calendar[order(-calendar$`STP indicator`),]
names(calendar) <- c("UID", "start_date", "end_date", "Days", "STP", "rowID", "Headcode", "ATOC Code", "Retail Train ID", "Train Status")
calendar$duration <- calendar$end_date - calendar$start_date + 1
# UIDs = unique(calendar$UID)
# length_todo = length(UIDs)
message(paste0(Sys.time(), " Constructing calendar and calendar_dates"))
calendar_split <- split(calendar, calendar$UID)
if (ncores > 1) {
cl <- parallel::makeCluster(ncores)
# parallel::clusterExport(
# cl = cl,
# varlist = c("calendar", "UIDs"),
# envir = environment()
# )
parallel::clusterEvalQ(cl, {
loadNamespace("UK2GTFS")
})
pbapply::pboptions(use_lb = TRUE)
res <- pbapply::pblapply(calendar_split,
# 1:length_todo,
makeCalendar.inner,
# UIDs = UIDs,
# calendar = calendar,
cl = cl
)
parallel::stopCluster(cl)
rm(cl)
} else {
res <- pbapply::pblapply(
calendar_split,
# 1:length_todo,
makeCalendar.inner # ,
# UIDs = UIDs,
# calendar = calendar
)
}
# if(ncores == 1){
# res = lapply(1:length_todo, makeCalendar.inner)
# }else{
# CL <- parallel::makeCluster(ncores) #make clusert and set number of core
# parallel::clusterExport(cl = CL, varlist=c("calendar", "UIDs"), envir = environment())
# parallel::clusterExport(cl = CL, c('splitDates'), envir = environment() )
# parallel::clusterEvalQ(cl = CL, {library(dplyr)})
# res = parallel::parLapply(cl = CL,1:length_todo,makeCalendar.inner)
# parallel::stopCluster(CL)
# }
res.calendar <- lapply(res, `[[`, 1)
res.calendar <- dplyr::bind_rows(res.calendar)
res.calendar_dates <- lapply(res, `[[`, 2)
res.calendar_dates <- res.calendar_dates[!is.na(res.calendar_dates)]
res.calendar_dates <- dplyr::bind_rows(res.calendar_dates)
days <- lapply(res.calendar$Days, function(x) {
as.integer(substring(x, 1:7, 1:7))
})
days <- matrix(unlist(days), ncol = 7, byrow = T)
days <- as.data.frame(days)
names(days) <- c("monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday")
res.calendar <- cbind(res.calendar, days)
res.calendar$Days <- NULL
message(paste0(Sys.time(), " Removing trips that only occur on days of the week that are non-operational"))
res.calendar.split <- split(res.calendar, seq(1, nrow(res.calendar)))
if (ncores > 1) {
cl <- parallel::makeCluster(ncores)
parallel::clusterEvalQ(cl, {
loadNamespace("UK2GTFS")
})
keep <- pbapply::pbsapply(res.calendar.split,
checkrows,
cl = cl
)
parallel::stopCluster(cl)
rm(cl)
} else {
keep <- pbapply::pbsapply(res.calendar.split, checkrows)
}
# if(ncores == 1){
# keep = sapply(seq(1,nrow(res.calendar)),checkrows)
# }else{
# CL <- parallel::makeCluster(ncores) #make clusert and set number of core
# parallel::clusterExport(cl = CL, varlist=c("res.calendar"), envir = environment())
# parallel::clusterExport(cl = CL, c('checkrows'), envir = environment() )
# parallel::clusterEvalQ(cl = CL, {library(dplyr)})
# keep = parallel::parSapply(cl = CL,X = seq(1,nrow(res.calendar)), FUN = checkrows)
# parallel::stopCluster(CL)
# }
res.calendar <- res.calendar[keep, ]
return(list(res.calendar, res.calendar_dates))
}
#' make calendar hleper function
#' @param i row number to do
#' @noRd
#'
makeCalendar.inner <- function(calendar.sub) { # i, UIDs, calendar){
# UIDs.sub = UIDs[i]
# calendar.sub = calendar[calendar$UID == UIDs.sub,]
# calendar.sub = schedule[schedule$`Train UID` == UIDs.sub,]
if (nrow(calendar.sub) == 1) {
# make into an single entry
return(list(calendar.sub, NA))
} else {
# check duration and types
dur <- as.numeric(calendar.sub$duration[calendar.sub$STP != "P"])
typ <- calendar.sub$STP[calendar.sub$STP != "P"]
typ.all <- calendar.sub$STP
if (all(dur == 1) & all(typ == "C") & length(typ) > 0 & length(typ.all) == 2) {
# One Day cancelationss
# Modify in the calendar_dates.txt
return(list(
calendar.sub[calendar.sub$STP == "P", ],
calendar.sub[calendar.sub$STP != "P", ]
))
} else {
# check for identical day pattern
if (length(unique(calendar.sub$Days)) == 1 & sum(typ.all == "P") == 1) {
calendar.new <- UK2GTFS:::splitDates(calendar.sub)
return(list(calendar.new, NA))
} else {
# split by day pattern
splits <- list()
daypatterns <- unique(calendar.sub$Days)
for (k in seq(1, length(daypatterns))) {
# slect for each patter but include cancellations with a different day pattern
calendar.sub.day <- calendar.sub[calendar.sub$Days == daypatterns[k] | calendar.sub$STP == "C", ]
if (all(calendar.sub.day$STP == "C")) {
# ignore cases of only cancleds
splits[[k]] <- NULL
} else {
calendar.new.day <- UK2GTFS:::splitDates(calendar.sub.day)
# rejects nas
if (class(calendar.new.day) == "data.frame") {
calendar.new.day$UID <- paste0(calendar.new.day$UID, k)
splits[[k]] <- calendar.new.day
}
}
}
splits <- dplyr::bind_rows(splits)
return(list(splits, NA))
}
}
}
}
#' Duplicate stop_times
#'
#' @details
#' Function that duplicates top times for trips that have been split into multiple trips
#'
#' @param calendar calendar data.frame
#' @param stop_times stop_times data.frame
#' @param ncores number of processes for parallel processing (default = 1)
#' @noRd
#'
duplicate.stop_times <- function(calendar, stop_times, ncores = 1) {
# calendar.nodup = calendar[!duplicated(calendar$rowID),]
# calendar.dup = calendar[duplicated(calendar$rowID),]
# rowID.unique = as.data.frame(table(calendar.dup$rowID))
# rowID.unique$Var1 = as.integer(as.character(rowID.unique$Var1))
#
# duplicate.stop_times.int = function(i){
# stop_times.tmp = stop_times[stop_times$schedule.rowID == rowID.unique$Var1[i],]
# reps = rowID.unique$Freq[i]
# index =rep(seq(1,reps),nrow(stop_times.tmp))
# index = index[order(index)]
# stop_times.tmp = stop_times.tmp[rep(seq(1,nrow(stop_times.tmp)), reps),]
# stop_times.tmp$index = index
# return(stop_times.tmp)
# }
#
# if(ncores == 1){
# stop_times.dup = lapply(1:length(rowID.unique$Var1),duplicate.stop_times.int)
# }else{
# CL <- parallel::makeCluster(ncores) #make clusert and set number of core
# parallel::clusterExport(cl = CL, varlist=c("rowID.unique", "calendar.dup","stop_times"), envir = environment())
# #parallel::clusterEvalQ(cl = CL, {library(dplyr)})
# stop_times.dup = parallel::parLapply(cl = CL,1:length(rowID.unique$Var1),duplicate.stop_times.int)
# parallel::stopCluster(CL)
# }
#
# stop_times.dup = dplyr::bind_rows(stop_times.dup)
#
# #Join on the nonduplicated trip_ids
# trip.ids.nodup = calendar.nodup[,c("rowID","trip_id")]
# stop_times = dplyr::left_join(stop_times,trip.ids.nodup, by = c("schedule.rowID" = "rowID"))
# stop_times = stop_times[!is.na(stop_times$trip_id),] #when routes are cancled their stop times are left without valid trip_ids
#
# #join on the duplicated trip_ids
# calendar2 = dplyr::group_by(calendar, rowID)
# calendar2 = dplyr::mutate(calendar2,Index=1:n())
#
# stop_times.dup$index2 = as.integer(stop_times.dup$index + 1)
# trip.ids.dup = calendar2[,c("rowID","trip_id","Index")]
# trip.ids.dup = as.data.frame(trip.ids.dup)
# stop_times.dup = dplyr::left_join(stop_times.dup,trip.ids.dup, by = c("schedule.rowID" = "rowID", "index2" = "Index"))
# stop_times.dup = stop_times.dup[,c("departure_time", "stop_id","rowID","arrival_time","schedule.rowID","trip_id")]
#
# #stop_times.dup = stop_times.dup[order(stop_times.dup$rowID),]
#
# stop_times.comb = rbind(stop_times, stop_times.dup)
#
# return(stop_times.comb)
}
#' Duplicate stop_times
#'
#' @details
#' Function that duplicates top times for trips that have been split into multiple trips
#'
#' @param calendar calendar data.frame
#' @param stop_times stop_times data.frame
#' @param ncores number of processes for parallel processing (default = 1)
#' @noRd
#'
duplicate.stop_times_alt <- function(calendar, stop_times, ncores = 1) {
calendar.nodup <- calendar[!duplicated(calendar$rowID), ]
calendar.dup <- calendar[duplicated(calendar$rowID), ]
rowID.unique <- as.data.frame(table(calendar.dup$rowID))
rowID.unique$Var1 <- as.integer(as.character(rowID.unique$Var1))
stop_times <- dplyr::left_join(stop_times, rowID.unique, by = c("schedule" = "Var1"))
stop_times_split <- split(stop_times, stop_times$schedule)
# TODO: The could handle cases of non duplicated stoptimes within duplicate.stop_times.int
# rather than splitting and rejoining, would bring code tidyness and speed improvements
duplicate.stop_times.int <- function(stop_times.tmp) {
# message(i)
# stop_times.tmp = stop_times[stop_times$schedule == rowID.unique$Var1[i],]
# reps = rowID.unique$Freq[i]
reps <- stop_times.tmp$Freq[1]
if (is.na(reps)) {
return(NULL)
} else {
index <- rep(seq(1, reps), nrow(stop_times.tmp))
index <- index[order(index)]
stop_times.tmp <- stop_times.tmp[rep(seq(1, nrow(stop_times.tmp)), reps), ]
stop_times.tmp$index <- index
return(stop_times.tmp)
}
}
if (ncores == 1) {
stop_times.dup <- pbapply::pblapply(stop_times_split, duplicate.stop_times.int)
} else {
cl <- parallel::makeCluster(ncores)
stop_times.dup <- pbapply::pblapply(stop_times_split,
duplicate.stop_times.int,
cl = cl
)
parallel::stopCluster(cl)
rm(cl)
}
stop_times.dup <- dplyr::bind_rows(stop_times.dup)
# stop_times.dup$index <- NULL
# Join on the nonduplicated trip_ids
trip.ids.nodup <- calendar.nodup[, c("rowID", "trip_id")]
stop_times <- dplyr::left_join(stop_times, trip.ids.nodup, by = c("schedule" = "rowID"))
stop_times <- stop_times[!is.na(stop_times$trip_id), ] # when routes are cancled their stop times are left without valid trip_ids
# join on the duplicated trip_ids
calendar2 <- dplyr::group_by(calendar, rowID)
calendar2 <- dplyr::mutate(calendar2, Index = seq(1, dplyr::n()))
stop_times.dup$index2 <- as.integer(stop_times.dup$index + 1)
trip.ids.dup <- calendar2[, c("rowID", "trip_id", "Index")]
trip.ids.dup <- as.data.frame(trip.ids.dup)
stop_times.dup <- dplyr::left_join(stop_times.dup, trip.ids.dup, by = c("schedule" = "rowID", "index2" = "Index"))
stop_times.dup <- stop_times.dup[, c(
"arrival_time", "departure_time", "stop_id", "stop_sequence",
"pickup_type", "drop_off_type", "rowID", "schedule", "trip_id"
)]
stop_times <- stop_times[, c(
"arrival_time", "departure_time", "stop_id", "stop_sequence",
"pickup_type", "drop_off_type", "rowID", "schedule", "trip_id"
)]
# stop_times.dup = stop_times.dup[order(stop_times.dup$rowID),]
stop_times.comb <- rbind(stop_times, stop_times.dup)
return(stop_times.comb)
}
#' fix times fro jounrye that run past midnight
#'
#' @details
#' When train rund over midnight GTFS requries the stop times to be in 24h+ e.g. 26:30:00
#'
#' @param stop_times stop_times data.frame
#' @param safe logical (default = TRUE) should the check for trains running more than 24h be perfomed?
#'
#' @details
#' Not running the 24 check is faster, in the check is run a warning is returned, but the error is not fixed
#' As the longest train jounrey in the UK is 13 hours (Aberdeen to Penzance) this is unlikley to be a problem
#' @noRd
#'
afterMidnight <- function(stop_times, safe = TRUE) {
stop_times2 <- stop_times
# stop_times2$arv = as.integer(paste0(substr(stop_times2$arrival_time,1,2),substr(stop_times2$arrival_time,4,5)))
# stop_times2$dept = as.integer(paste0(substr(stop_times2$departure_time,1,2),substr(stop_times2$departure_time,4,5)))
stop_times2$arv <- as.integer(stop_times2$arrival_time)
stop_times2$dept <- as.integer(stop_times2$departure_time)
stop_times.summary <- dplyr::group_by(stop_times2, trip_id)
stop_times.summary <- dplyr::summarise(stop_times.summary,
dept_first = dept[stop_sequence == 1]
)
stop_times2 <- dplyr::left_join(stop_times2, stop_times.summary, by = "trip_id")
stop_times2$arvfinal <- ifelse(stop_times2$arv < stop_times2$dept_first, stop_times2$arv + 2400, stop_times2$arv)
stop_times2$depfinal <- ifelse(stop_times2$dept < stop_times2$dept_first, stop_times2$dept + 2400, stop_times2$dept)
if (safe) {
# check if any train more than 24 hours
stop_times.summary2 <- dplyr::group_by(stop_times2, trip_id)
stop_times.summary2 <- dplyr::summarise(stop_times.summary2,
arv_last = arvfinal[stop_sequence == max(stop_sequence)],
arv_max = max(arvfinal, na.rm = T)
)
check <- stop_times.summary2$arv_last < stop_times.summary2$arv_max
if (any(check)) {
warning("24 hour clock correction will return false results for any trip where total travel time exceeds 24 hours")
}
}
numb2time <- function(numb) {
numb <- as.character(numb)
cnt <- nchar(numb)
if (cnt == 4) {
numb <- paste0(substr(numb, 1, 2), ":", substr(numb, 3, 4), ":00")
} else if (cnt == 3) {
numb <- paste0("0", substr(numb, 1, 1), ":", substr(numb, 2, 3), ":00")
} else if (cnt == 2) {
numb <- paste0("00:", numb, ":00")
} else if (cnt == 1) {
numb <- paste0("00:0", numb, ":00")
} else {
error("Unknown Time Format")
stop()
}
return(numb)
}
stop_times2$arrival_time <- pbapply::pbsapply(stop_times2$arvfinal, numb2time)
stop_times2$departure_time <- pbapply::pbsapply(stop_times2$depfinal, numb2time)
stop_times2 <- stop_times2[, c("trip_id", "arrival_time", "departure_time", "stop_id", "stop_sequence", "pickup_type", "drop_off_type")]
return(stop_times2)
}
#' Clean Stops
#'
#' @details
#' Some TIPLOCS have the same physical location, and some are unused this function cleans that up
#'
#' @param stop_times stop_times data.frame
#' @param stops stops data.frame
#' @noRd
#'
cleanstops <- function(stop_times, stops) {
}
#' Clean Activities
#' @param x character activities
#' @details
#' Change Activities code to pickup and drop_off
#' https://wiki.openraildata.com//index.php?title=Activity_codes
#'
#' @noRd
#'
clean_activities <- function(x) {
if ("T" %in% x) {
return(c(0, 0))
} else if ("U" %in% x) {
return(c(0, 1))
} else if ("TF" %in% x) {
return(c(1, 0))
} else if ("D" %in% x) {
return(c(1, 0))
} else if ("TFN" %in% x) {
return(c(1, 0))
} else if ("TFT" %in% x) {
return(c(1, 0))
} else if ("TFRM" %in% x) {
return(c(1, 0))
} else if ("TFD" %in% x) {
return(c(1, 0))
} else if ("TF-D" %in% x) {
return(c(1, 0))
} else if ("TFTW" %in% x) {
return(c(1, 0))
} else if ("TFX" %in% x) {
return(c(1, 0))
} else if ("TF-U" %in% x) {
return(c(1, 0))
} else if ("TFS" %in% x) {
return(c(1, 1))
} else if ("TFR" %in% x) {
return(c(1, 0))
} else if ("TFU" %in% x) {
return(c(0, 0))
} else {
message(paste0("Unknown ", paste(x, " ")))
}
}
#' Clean Activities
#' @param x character activities
#' @details
#' Change Activities code to pickup and drop_off
#' https://wiki.openraildata.com//index.php?title=Activity_codes
#'
#' @noRd
#'
clean_activities2 <- function(x) {
# Load Data
# data("activity_codes")
x <- data.frame(activity = x, stringsAsFactors = FALSE)
x <- dplyr::left_join(x, activity_codes, by = c("activity"))
if (anyNA(x$pickup_type)) {
message("Unknown Activity codes ", paste(unique(x$activity), collapse = " "), " please report these codes as a GitHub Issue")
x$pickup_type[is.na(x$pickup_type)] <- 0
x$drop_off_type[is.na(x$drop_off_type)] <- 0
}
x <- x[, c("pickup_type", "drop_off_type")]
return(x)
}
#' Check for valid day of the week
#' @param from date
#' @param to date
#' Returns the days of the week that are between two dates
#'
#' Check for valid day of the week
#' @param from date
#' @param to date
#' Returns the days of the week that are between two dates
#'
# valid_days <- function(from, to, duration, monday,tuesday, wednesday,
# thursday, friday, saturday, sunday){
# if(duration >= 7){
# message("skipped")
# return(TRUE)
# }else{
# days.valid <- tolower(weekdays(seq.POSIXt(from = from,
# to = to,
# by = "DSTday")))
# message(paste0("did "), length(days.valid), "of ", class(days.valid))
# days.valid <- unique(days)
#
# days.opp <- c("monday","tuesday", "wednesday","thursday", "friday", "saturday", "sunday")
# days.opp <- days.opp[c(monday,tuesday, wednesday,thursday, friday, saturday, sunday)]
#
# if(any(days.valid %in% days.opp)){
# return(TRUE)
# }else{
# return(FALSE)
# }
# }
# }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.