#' Simple destination choice model for local (internal) truck trips
#'
#' @param truck_origins Data frame containing discrete daily trips for each
#' tour, usually produced by a trip or tour generation function
#' @param skim_matrices Data frame containing skim distance and travel time for
#' each origin-destination pair, usually only containing records for zone
#' pairs that fall within user-defined threshold (i.e., excludes what would be
#' only a long-distance trip)
#' @param utility_parameters Data frame containing weights for distance and
#' attractors for each truck type defined in the simulation
#' @param save_to File name for saving the trip records with destinations
#' appended in comma-separated value format (optional)
#'
#' @details This is a simple destination choice model that mimics a singly-
#' constrained gravity model. It samples eligible destinations based upon the
#' product of interzonal distance and attractions, however defined. The
#' distances are typically limited to a certain distance (120-180 miles),
#' beyond which are considered the realm of long-distance trips. Different
#' weights for distance (alpha1) and attractors (alpha2) ar defined for each
#' truck type (and can be all set to unity to eliminate such scaling),
#' otherwise known as utility parameters. In a typical gravity model
#' formulation we divide the product of attractions and interzonal impedance
#' by the sum of them for all zones. Sampling rather than calculating
#' deterministic proportions obviates the need for this step. The output from
#' this function is the same as the input truck origins, with the destination
#' appended. The user can optionally also save this data frame in a comma-
#' separated value file.
#'
#' @export
#' @examples
#' sample_local_truck_destinations(truck_origins, skim_matrices,
#' utility_parameters, "daily-truck-odflows.csv")
sample_local_truck_destinations <- function(truck_origins, skim_matrices,
trip_length_targets, utility_parameters, save_to = NULL) {
simulation_start <- proc.time()
# The trip length targets are defined by truck type, and provided in wide
# format. Convert to tall format to make subsetting easier.
max_target_distance <- max(trip_length_targets$distance)
targets <- trip_length_targets %>%
tidyr::gather(truck_type, probability, -distance) %>%
dplyr::filter(probability > 0.0) %>%
dplyr::rename(distanceI = distance) # To match with integer distance
# It is possible that a zone might be coded further from its nearest neighbor
# such that its intrazonal travel time is longer than the maximum target
# value. That is likely to be error in skim processing, for intrazonal times
# are small even in rural areas. Thus, we will recode intrazonal distances to
# fall within our specified range, so that at least intrazonal is an option.
skim_matrices <- dplyr::mutate(skim_matrices,
distance = ifelse(origin == destination & distance > max_target_distance,
max_target_distance-1, distance))
# Now we can eliminate all of the skim zone pairs where the distance is beyond
# threshold (maximum target distance), which will hopefully make processing
# the remainders faster
skims <- skim_matrices %>%
dplyr::filter(distance <= max_target_distance) %>%
dplyr::mutate(distanceI = round(distance, 0))
# Create a list of eligible destination zones from the skim matrix (if it is
# not defined then it is outside of our threshold, or is zone that has no
# corresponding centroid connector in the network file)
dzones <- sort(as.integer(unique(skims$destination)))
print(paste(length(dzones), "destination zones found in skim matrices"),
quote = FALSE)
# Determine which truck types are defined in the trip origins
truck_types <- unique(truck_origins$truck_type)
# Check to make sure that we don't have trips originating from alpha zones
# that are not in the skim matrix
CTO <- sort(unique(truck_origins$origin))
problem_children <- list()
for (c in CTO) {
if (!c %in% dzones) problem_children <- c(problem_children, c)
}
if (length(problem_children>0)) {
error_message <- "Alpha zones in trip list with no corresponding skim"
print(paste(error_message, ':', sep = ''), quote = FALSE)
print(unlist(problem_children))
stop(error_message)
}
# Sum the attractors by alpha zone. We will need to have data defined for each
# of the zones defined in the skim matrix, even if some have zero truck trips
# associated with it. Thus, we'll substitute any missing values with zeros.
attractors <- truck_origins %>%
dplyr::group_by(origin, truck_type) %>%
dplyr::summarise(attractors = n()) %>%
dplyr::rename(destination = origin)
# Finally, run the model. The ideal distributions and alpha parameters differ
# by truck type, so we will handle each one differently. At the end of
# handling each truck type we will add those results to a final data table
# that will have OD flows.
origin_zones <- sort(unique(truck_origins$origin))
truck_origins$destination <- NA # We'll fill this in as we go along
for (t in truck_types) {
print(paste("Sampling destinations for",
nrow(dplyr::filter(truck_origins, truck_type==t)), t, "origins"), quote=FALSE)
# Grab the target trip distances for this particular truck type only
these_targets <- dplyr::filter(targets, truck_type == t)
# Grab the relevant attractors and scale them
these_attractors <- attractors %>%
dplyr::filter(truck_type == t) %>%
dplyr::mutate(attractors = attractors*
utility_parameters$alpha2[utility_parameters$truck_type == t])
# Process each origin in turn
for (this_origin in origin_zones) {
# Determine how many trips we have
these_origins <- dplyr::filter(truck_origins, origin == this_origin,
truck_type == t)
N <- nrow(these_origins)
if (N==0) next
# We first need to assign probability to each integer distance in the skim
# matrix, which we can easily do by merging it with the targets. Drop
# cases where the probability is undefined (i.e., outside our max target
# distance)
these_skims <- skims %>%
dplyr::filter(origin == this_origin) %>%
dplyr::left_join(these_targets, by = "distanceI") %>%
dplyr::filter(!is.na(probability)) %>%
dplyr::mutate(probability = probability *
utility_parameters$alpha1[utility_parameters$truck_type == t])
# But othewise calculate the total attractiveness (zed) of each possible
# destination, and then sample from that the required number of times
combined <- these_skims %>%
dplyr::left_join(these_attractors, by = "destination") %>%
dplyr::mutate(zed = probability*attractors) %>%
dplyr::filter(!is.na(zed))
# Check that we have at least two destination choices. If not, show us the
# 10 closest skims and attractions associated with them. Then create a new
# data frame that allows us to choose intrazonal trip.
if (nrow(combined)<2) {
# Tattle on the closest neighboring zone
ex_skims <- dplyr::filter(skim_matrices, origin == this_origin,
destination != this_origin)
nearest_neighbor <- round(min(ex_skims$distance, na.rm = TRUE), 1)
print(paste("No neighbor within range: zone=", this_origin,
", nearest=", nearest_neighbor, ", ", N, " ", t,
" trips set to intrazonal", sep = ''), quote = FALSE)
# Then spin up the intrazonal alternative
combined <- dplyr::data_frame(destination = this_origin, zed = 0.5,
seq = 1:2)
}
# And finally, choose the destination(s) for trip originating in the
# currently processed origin
sampled_destinations <- sample(combined$destination, N, replace = TRUE,
prob = combined$zed)
truck_origins$destination[truck_origins$origin==this_origin &
truck_origins$truck_type == t] <- sampled_destinations
}
}
# There should be none, but check that we have no cases where the destination
# is a missing value. If we find that pathological case then stop the
# simulation.
problems <- dplyr::filter(truck_origins, is.na(destination))
if (nrow(problems)>0) {
error_message <- paste("Destinations not processed for", nrow(problems),
"cases")
readr::write_csv(problems, "destinations_not_assigned.csv")
stop(error_message)
}
# Append the travel time and distance for each distance to the trip records
appended <- truck_origins %>%
dplyr::left_join(skim_matrices, by = c("origin", "destination"))
# Report summary statistics for this truck type
for (t in truck_types) {
these_trucks <- dplyr::filter(appended, truck_type == t)
S <- paste("Sampled ", t, " destinations: n=", nrow(these_trucks), " avg distance=",
round(mean(these_trucks$distance, na.rm = TRUE),1), " avg travel time=",
round(mean(these_trucks$travel_time, na.rm = TRUE),1), sep = '')
print(S, quote=FALSE)
}
# If the user has asked to save intermediate results now would be great time
# to do so
if (!is.null(save_to)) readr::write_csv(appended, save_to)
# Tell us the outcome and exit stage right
simulation_stop <- proc.time()
elapsed_seconds <- round((simulation_stop-simulation_start)[["elapsed"]], 1)
print(paste("Simulation time=", elapsed_seconds, "seconds"), quote=FALSE)
appended
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.