inst/doc/transfers.R

## ----DTthread, echo = FALSE---------------------------------------------------
# Necessary for CRAN to avoid CPU / elapsed time ratios being too high
nthr <- data.table::setDTthreads (1)

## ----berlin_gtfs--------------------------------------------------------------
library (gtfsrouter)
f <- berlin_gtfs_to_zip ()
gtfs <- extract_gtfs (f, quiet = TRUE)

## ----transfers-struct---------------------------------------------------------
gtfs$transfers

## ----route1-------------------------------------------------------------------
gtfs_route (
    gtfs,
    from = "Friedrichstr.",
    to = "Rosenthaler Platz",
    start_time = 12 * 3600,
    day = "Monday"
)

## ----no-transfers-------------------------------------------------------------
gtfs$transfers <- NULL
gtfs_route (
    gtfs,
    from = "Friedrichstr.",
    to = "Rosenthaler Platz",
    start_time = 12 * 3600,
    day = "Monday"
)

## ----transfer-table-200-------------------------------------------------------
gtfs <- gtfs_transfer_table (gtfs, d_limit = 200)
gtfs_route (
    gtfs,
    from = "Friedrichstr.",
    to = "Rosenthaler Platz",
    start_time = 12 * 3600,
    day = "Monday"
)

## ----transfer-table-size------------------------------------------------------
gtfs <- extract_gtfs (f, quiet = TRUE) # 'f' is the location generated above
nrow (gtfs$transfers)
length (which (!duplicated (gtfs$transfers [, c ("from_stop_id", "to_stop_id")])))

## ----transfer-table-size-data, echo = FALSE-----------------------------------
n0 <- nrow (gtfs$transfers)
n1 <- length (which (!duplicated (gtfs$transfers [, c ("from_stop_id", "to_stop_id")])))

## ----transfer-table-regen-----------------------------------------------------
gtfs$transfers <- NULL
gtfs <- gtfs_transfer_table (gtfs)
nrow (gtfs$transfers)

## ----transfer-table-regen2----------------------------------------------------
gtfs <- extract_gtfs (f, quiet = TRUE)
gtfs <- gtfs_transfer_table (gtfs)
nrow (gtfs$transfers)

## ----transfer-table-extend----------------------------------------------------
gtfs <- extract_gtfs (f, quiet = TRUE)
vapply (0:10, function (i) {
    gtfs <- gtfs_transfer_table (gtfs, d_limit = i * 100)
    return (nrow (gtfs$transfers))
}, integer (1))

## ----transfers-reduce-d_lim---------------------------------------------------
gtfs <- gtfs_transfer_table (gtfs, d_limit = 100)
nrow (gtfs$transfers)

## ----transfers-reduce-d_lim2--------------------------------------------------
gtfs$transfers <- NULL
gtfs <- gtfs_transfer_table (gtfs, d_limit = 100)
nrow (gtfs$transfers)

## ----route2-------------------------------------------------------------------
r <- gtfs_route (
    gtfs,
    from = "Friedrichstr.",
    to = "Rosenthaler Platz",
    start_time = 12 * 3600,
    day = "Monday",
    include_ids = TRUE
)
stns <- r$stop_id [3:4] # the transfer station IDs
gtfs$stops [match (stns, gtfs$stops$stop_id), ]

## ----dist---------------------------------------------------------------------
s <- gtfs$stops [match (stns, gtfs$stops$stop_id), ]
as.numeric (geodist::geodist (s [1, ], s [2, ]))

## ----SU_routes----------------------------------------------------------------
S_routes <- gtfs$routes$route_id [grep ("^S", gtfs$routes$route_short_name)]
U_routes <- gtfs$routes$route_id [grep ("^U", gtfs$routes$route_short_name)]
S_trips <- gtfs$trips$trip_id [which (gtfs$trips$route_id %in% S_routes)]
U_trips <- gtfs$trips$trip_id [which (gtfs$trips$route_id %in% U_routes)]

## ----SU_stops-----------------------------------------------------------------
S_stops <- gtfs$stop_times$stop_id [which (gtfs$stop_times$trip_id %in% S_trips)]
S_stops <- unique (S_stops)
U_stops <- gtfs$stop_times$stop_id [which (gtfs$stop_times$trip_id %in% U_trips)]
U_stops <- unique (U_stops)

## ----SU_stops2----------------------------------------------------------------
S_stops <- S_stops [which (!S_stops %in% U_stops)]
U_stops <- U_stops [which (!U_stops %in% S_stops)]

## ----penalty------------------------------------------------------------------
index <- which ((gtfs$transfers$from_stop_id %in% S_stops &
    gtfs$transfers$to_stop_id %in% U_stops) |
    (gtfs$transfers$from_stop_id %in% U_stops &
        gtfs$transfers$to_stop_id %in% S_stops))
gtfs$transfers$min_transfer_time [index] <-
    gtfs$transfers$min_transfer_time [index] + 120

## ----route3-------------------------------------------------------------------
gtfs_route (
    gtfs,
    from = "Friedrichstr.",
    to = "Rosenthaler Platz",
    start_time = 12 * 3600,
    day = "Monday"
)

## ----transfer_penalties_fn----------------------------------------------------
gtfs$transfers <- NULL
gtfs <- gtfs_transfer_table (gtfs, d_limit = 500)

transfer_penalties <- function (gtfs, penalty = 120) {

    S_routes <- gtfs$routes$route_id [grep ("^S", gtfs$routes$route_short_name)]
    U_routes <- gtfs$routes$route_id [grep ("^U", gtfs$routes$route_short_name)]
    S_trips <- gtfs$trips$trip_id [which (gtfs$trips$route_id %in% S_routes)]
    U_trips <- gtfs$trips$trip_id [which (gtfs$trips$route_id %in% U_routes)]

    S_stops <- gtfs$stop_times$stop_id [which (gtfs$stop_times$trip_id %in% S_trips)]
    S_stops <- unique (S_stops)
    U_stops <- gtfs$stop_times$stop_id [which (gtfs$stop_times$trip_id %in% U_trips)]
    U_stops <- unique (U_stops)

    S_stops <- S_stops [which (!S_stops %in% U_stops)]
    U_stops <- U_stops [which (!U_stops %in% S_stops)]

    index <- which ((gtfs$transfers$from_stop_id %in% S_stops &
        gtfs$transfers$to_stop_id %in% U_stops) |
        (gtfs$transfers$from_stop_id %in% U_stops &
            gtfs$transfers$to_stop_id %in% S_stops))
    gtfs$transfers$min_transfer_time [index] <-
        gtfs$transfers$min_transfer_time [index] + 120

    return (gtfs)
}
gtfs <- transfer_penalties (gtfs)

## ----route4-------------------------------------------------------------------
gtfs_route (
    gtfs,
    from = "Friedrichstr.",
    to = "Rosenthaler Platz",
    start_time = 12 * 3600,
    day = "Monday"
)

## ----route4b------------------------------------------------------------------
r <- gtfs_route (
    gtfs,
    from = "Friedrichstr.",
    to = "Rosenthaler Platz",
    start_time = 12 * 3600,
    day = "Monday",
    include_ids = TRUE
)
s <- gtfs$stops [match (r$stop_id [2:3], gtfs$stops$stop_id), ]
as.numeric (geodist::geodist (s [1, ], s [2, ]))

## ----d_limit1-----------------------------------------------------------------
d_limit <- 1:20 * 100
n <- vapply (d_limit, function (i) {
    gtfs$transfer <- NULL
    gtfs <- gtfs_transfer_table (gtfs, d_limit = i)
    nrow (gtfs$transfers)
}, integer (1))
d_limit <- d_limit / 1000 # in km
n <- n / 1000 # in thousands
plot (
    d_limit, n,
    type = "l", col = "red", lwd = 2,
    xlab = "Maximal transfer distance",
    ylab = "Number of transfers (1000s)"
)

## ----travel_times_d_limit-fn, message = FALSE---------------------------------
library (dplyr)
library (hms) # 'parse_hms' function

travel_times_d_limit <- function (gtfs, from, d_limit = 200) {

    gtfs$transfers <- NULL
    gtfs <- gtfs_transfer_table (gtfs, d_limit = d_limit)
    gtfs <- transfer_penalties (gtfs)

    start_time_limits <- 12:13 * 3600

    get_one_times <- function (gtfs, from, start_time_limits) {

        x <- gtfs_traveltimes (
            gtfs,
            from = from,
            start_time_limits = start_time_limits,
            quiet = TRUE
        )
        ret <- NULL
        if (nrow (x) > 0) {
            # Convert duration to minutes:
            dur <- as.integer (parse_hms (x$duration)) / 60
            ret <- data.frame (
                from = from,
                to = x$stop_id,
                duration = dur
            )
        }

        return (ret)
    }

    res <- lapply (from, function (i) {
        get_one_times (gtfs, i, start_time_limits)
    })
    res <- do.call (rbind, res) %>%
        group_by (from, to) %>%
        summarise (duration = mean (duration), .groups = "drop")
    res$d_limit <- d_limit

    return (res)
}

## ----plot1, warning = FALSE, message = FALSE----------------------------------
set.seed (10L)
from <- sample (gtfs$stops$stop_name, size = 10)
d_limit <- 1:10 * 100
x <- lapply (d_limit, function (i) {
    travel_times_d_limit (gtfs, from = from, d_limit = i)
})
xall <- do.call (rbind, x) [, c ("from", "to")]
xall <- xall [which (!duplicated (xall)), ]

for (i in seq_along (x)) {

    y <- left_join (xall, x [[i]], by = c ("from", "to"))
    xall [paste0 ("d_", x [[i]]$d_limit [1])] <- y$duration
}

index <- which (xall$d_1000 != xall$d_100)
# record number of stations for which travel times did not change:
nsame <- nrow (xall) - length (index)

xall <- xall [index, ]
# travel times only:
times <- xall %>% select (!c ("from", "to"))
# differences with each increase in d_limit:
times <- t (apply (times, 1, function (i) {
    diff (i)
}))
times <- data.frame (t = as.vector (times))
library (ggplot2)
ggplot (times, aes (x = t)) +
    stat_bin (bins = 30, col = "red", fill = "orange") +
    scale_y_log10 () +
    xlab ("time difference (minutes)")

## ----transfer_difference_fn---------------------------------------------------
transfer_difference <- function (gtfs,
                                 nsamples = 10,
                                 d_limit = 200,
                                 day = "Monday",
                                 start_time_limits = 12:13 * 3600) {

    g1 <- data.table::copy (gtfs)
    g1 <- gtfs_timetable (g1, day = day)

    g2 <- data.table::copy (gtfs)
    g2 <- gtfs_transfer_table (g2, d_limit = d_limit, quiet = TRUE)
    g2 <- gtfs_timetable (g2, day = day)

    get1 <- function (gtfs, gtfs2, start_time_limits) {

        from <- sample (gtfs$stops$stop_name, size = 1)
        x1 <- gtfs_traveltimes (
            gtfs,
            from = from,
            start_time_limits = start_time_limits,
            quiet = TRUE
        )

        x2 <- gtfs_traveltimes (
            gtfs2,
            from = from,
            start_time_limits = start_time_limits,
            quiet = TRUE
        )

        if (nrow (x1) == 0L | nrow (x2) == 0L) {
            return (rep (NA, 2))
        }

        x2 <- data.frame (
            stop_id = x2$stop_id,
            duration2 = x2$duration
        )
        x2 <- dplyr::left_join (x1, x2, by = "stop_id")

        dat <- data.frame (
            x = as.integer (parse_hms (x2$duration)) / 60,
            y = as.integer (parse_hms (x2$duration2)) / 60
        )
        mod <- lm (y ~ x + 0, data = dat)
        return (c (
            prop = length (which (dat$x != dat$y)) / nrow (dat),
            change = as.numeric (mod$coefficients)
        ))
    }

    vapply (seq (nsamples), function (i) {
        get1 (g1, g2, start_time_limits)
    },
    numeric (2),
    USE.NAMES = FALSE
    )
}

## ----transfer_d_limit_results, warning = FALSE--------------------------------
d_limit <- 1:10 * 100
set.seed (1L)
d <- lapply (d_limit, function (i) {
    transfer_difference (
        gtfs,
        nsamples = 20,
        d_limit = i,
        day = "Monday",
        start_time_limits = 12:13 * 3600
    )
})
d <- lapply (seq_along (d_limit), function (i) {
    data.frame (
        d_limit = d_limit [i],
        prop = d [[i]] [1, ],
        change = d [[i]] [2, ]
    )
})
d <- do.call (rbind, d)

ggplot (d, aes (x = d_limit, y = change)) +
    geom_point (pch = 19, col = "orange") +
    geom_smooth (method = "loess", formula = "y ~ x")

## ----stuttgart-results, echo = FALSE, warning = FALSE-------------------------
d <- list ()

# 1
d [[1]] <- rbind (
    c (
        0.04787234, 0.3024283, 0.2137809, 0.4226190, 0.4782609, 0,
        0.1996497, 0.0324826, 0.3122142, 0.1051136, 0.1330166,
        0.1755486, 0.3659229, NA, 0.2471751, 0.03344482,
        0.3617021, 0.3400000, NA, 0.05964215
    ),
    c (
        0.97835370, 0.9045062, 0.9552470, 0.8157467, 0.8343242, 1,
        0.8956478, 0.9857173, 0.9197187, 0.9813956, 0.9488823,
        0.9505516, 0.8601199, NA, 0.9367492, 0.99126114,
        0.8190608, 0.9064265, NA, 0.97887272
    )
)

# 2
d [[2]] <- rbind (
    c (
        0.2675343, 0.3167203, 0.4016824, 0.3119881, 0.3680556,
        0.09902913, 0.1257367, 0.1349578, 0.1366906, 0.4723442,
        0.2439614, 0.2401575, 0.2959184, 0.1376147, 0.4624113,
        0.1189189, 0.2706450, 0.1621622, 0.01632653, 0.3847584
    ),
    c (
        0.9333812, 0.8716062, 0.8790074, 0.9153534, 0.8889449,
        0.97546764, 0.9747218, 0.9620882, 0.9235575, 0.8427352,
        0.8973988, 0.8534331, 0.8745992, 0.9666719, 0.8746626,
        0.9395889, 0.9442811, 0.9733787, 0.99658716, 0.8350441
    )
)

# 3
d [[3]] <- rbind (
    c (
        0.4449339, 0.3257576, 0.08494208, 0.07758621, 0.2345679,
        0.009615385, 0.1959877, 0.06282723, 0.05785124, 0.4670588,
        0.0562500, 0.1203704, 0.1000000, 0.1720779, 0.4771167,
        0.4372331, 0.2102273, 0.1291449, 0.4567901, 0
    ),
    c (
        0.8480413, 0.8881877, 0.97683839, 0.95861325, 0.8839587,
        0.990835401, 0.9353503, 0.97107607, 0.97832732, 0.8946960,
        0.9568263, 0.9783830, 0.9800815, 0.9538397, 0.8978858,
        0.8865221, 0.9256468, 0.9507978, 0.8657352, 1
    )
)

# 4
d [[4]] <- rbind (
    c (
        0.7241379, 0.2611807, 0.2901961, 0.02531646, 0.5215054,
        0.2336735, 0.5795779, 0.1518987, NA, 0.0625000, NA, 0,
        0.6513274, 0.4292683, 0.3418573, 0.1452703, 0.2619543,
        0.4628450, 0.2417375, 0.3950216
    ),
    c (
        0.7022668, 0.9451780, 0.8701041, 0.98733569, 0.8588725,
        0.9451916, 0.8372441, 0.9076009, NA, 0.9842143, NA, 1,
        0.7893403, 0.9412012, 0.9203506, 0.9627622, 0.9228610,
        0.8665221, 0.9418134, 0.9021016
    )
)

# 5
d [[5]] <- rbind (
    c (
        0.7239508, 0.8513952, 0.1402439, 0.1448468, 0.4007092,
        0.09883721, 0.08634538, 0.2936747, 0.7217235, 0.5807783,
        0.7090415, 0.1440678, 0.4746581, 0.2164179, 0.05633803,
        0.3898587, NA, 0.1363636, 0.6066116, 0.2217195
    ),
    c (
        0.8041968, 0.7696561, 0.9385228, 0.9466528, 0.8734857,
        0.97151782, 0.97918464, 0.9076568, 0.7783634, 0.8431704,
        0.7343155, 0.9412296, 0.8566643, 0.9716889, 0.99403550,
        0.9124251, NA, 0.9785596, 0.8332798, 0.9380071
    )
)

# 6
d [[6]] <- rbind (
    c (
        0.4354430, 0.6561666, 0.2995392, NA, 0.3553922, 0.3404826,
        0.2711864, 0.2535433, 0.4971098, 0.6451231, 0.117378, 0,
        0.6751592, 0.4408353, 0.6392199, 0.5826598, NA, 0.1823770,
        0.2105263, 0.1016043
    ),
    c (
        0.7925198, 0.8567067, 0.9134449, NA, 0.8825668, 0.9020107,
        0.9247888, 0.9654961, 0.8274002, 0.7975106, 0.979065, 1,
        0.7964435, 0.8632960, 0.8738289, 0.7831721, NA, 0.9436972,
        0.9274683, 0.9744991
    )
)

# 7
d [[7]] <- rbind (
    c (
        0.5275288, 0.8809990, 0.2042254, 0.2562101, 0, 0.3214286,
        0.3479730, 0.2478032, 0.3501229, 0.5036298, 0.0560000,
        0.7117819, 0.4114833, 0.2748092, 0.1865342, 0.2213115,
        0.1674208, 0.3247863, 0.01388889, 0.2974013
    ),
    c (
        0.8974057, 0.7428764, 0.9503437, 0.9217992, 1, 0.9228010,
        0.9036412, 0.9161946, 0.9596850, 0.8494607, 0.9632148,
        0.7661997, 0.8901800, 0.9080806, 0.9574235, 0.9429245,
        0.9445154, 0.9383823, 0.99440270, 0.9226305
    )
)

# 8
d [[8]] <- rbind (
    c (
        0.4239291, 0.6470588, 0.4225146, 0.2475728, 0.4358491,
        0.4682081, 0.8519306, 0.5047619, 0.03007519, 0.4723404,
        0.1925676, 0.2028470, 0.5584677, 0.2478386, 0.06122449, 0,
        0.1219512, 0.6993865, 0.3742455, NA
    ),
    c (
        0.9030931, 0.8842219, 0.8946512, 0.9000109, 0.8482130,
        0.9018473, 0.7631321, 0.6506485, 0.98723649, 0.8749593,
        0.9249907, 0.9497207, 0.8826570, 0.9193413, 0.97003127, 1,
        0.8942574, 0.8027865, 0.8630442, NA
    )
)

# 9
d [[9]] <- rbind (
    c (
        0.1160991, 0.4269663, 0.2256858, 0.3259604, 0.7434613,
        0.3766376, 0.3615819, 0.5671642, 0.1721311, 0.7014749,
        0.1000000, 0.07798165, 0.1622276, 0.4407583, 0.3865188,
        0.2470309, 0.3603175, 0.1516588, 0.3102310, 0.4602133
    ),
    c (
        0.9666432, 0.7528710, 0.9193014, 0.9227199, 0.7834500,
        0.8515732, 0.8863982, 0.8002182, 0.9434899, 0.7971016,
        0.9594046, 0.98316892, 0.9457987, 0.8288464, 0.9053315,
        0.8986293, 0.8698507, 0.9590871, 0.8651403, 0.8851207
    )
)

# 10
d [[10]] <- rbind (
    c (
        0.3747811, NA, 0.4325744, NA, 0.7301301, 0.06694561,
        0.2693267, 0.2651163, 0.5702840, 0.5865947, 0.6845638,
        0.1111111, 0.5206897, NA, 0.2322275, 0.2739726,
        0.03968254, 0.1763367, 0.9158513, 0.3980100
    ),
    c (
        0.8318046, NA, 0.8894870, NA, 0.8242255, 0.97770335,
        0.8996235, 0.8986981, 0.8131951, 0.8357680, 0.8225032,
        0.8502140, 0.8353131, NA, 0.9366721, 0.8817567,
        0.99278373, 0.9559705, 0.6732793, 0.8348557
    )
)

d <- lapply (seq_along (d_limit), function (i) {
    data.frame (
        d_limit = d_limit [i],
        prop = d [[i]] [1, ],
        change = d [[i]] [2, ]
    )
})
d <- do.call (rbind, d)

ggplot (d, aes (x = d_limit, y = change)) +
    geom_point (pch = 19, col = "orange") +
    geom_smooth (method = "loess", formula = "y ~ x")

## ----stuttgart-plot2, warning = FALSE-----------------------------------------
ggplot (d, aes (x = d_limit, y = prop)) +
    geom_point (pch = 19, col = "orange") +
    geom_smooth (method = "loess", formula = "y ~ x")

## ----DTthread-reset, echo = FALSE---------------------------------------------
data.table::setDTthreads (nthr)

Try the gtfsrouter package in your browser

Any scripts or data that you put into this service are public.

gtfsrouter documentation built on Sept. 24, 2023, 1:08 a.m.