#' Generate local truck trips (tour segments) from pseudo-firms
#'
#' @param synthetic_firms Data frame containing pseudo-firms and their
#' typical attributes
#' @param generation_probabilities Data frame containing trip generation rates
#' expressed as probabilities, for each trip generation category used in the
#' model
#' @param random_seed Integer value of random number seed set at outset of
#' sampling of trips generated by pseudo-firms (defaults to 1)
#' @param max_resampling_attempts Integer value that controls the number of
#' times the generation sampling will repeat if it does not finish with same
#' number of trips calculated in aggregate for the entire study area. If this
#' number is exceeded then the model will error. Alternatively if `1` then
#' resampling will be turned off.
#' @param max_resampling_threshold Floating point number defining how close
#' the simulation must match aggregate number of trips for the entire
#' modeled area (default is 0.5 percent)
#' @param save_to File name for saving the trip records with destinations
#' appended in comma-separated value format (optional)
#'
#' @details The trip generation function samples the number of discrete daily
#' trips generated by each pseudo-firm, and appends that number to each
#' synthetic firm. The mode of transport used for each trip is also sampled
#' and added to the trip records.
#'
#' @export
#' @examples
#' daily_trips <- local_truck_generation(synthetic_firms,
#' generation_probabilities)
#' daily_trips <- local_truck_generation(synthetic_firms,
#' generation_probabilities, random_seed = 1, max_resampling_attempts = 30,
#' max_resampling_threshold = 0.5, "local-truck-origins.csv")
local_truck_generation <- function(synthetic_firms, generation_probabilities,
random_seed = 1, max_resampling_attempts = 30, max_resampling_threshold = 0.5,
save_to = NULL) {
ct_msg(header = "Local truck trip generation")
set.seed(as.integer(random_seed))
# Reformat the generation probabilities into tall records that we can merge
# with our firms, dropping cases where probabilities are zero
probabilities <- generation_probabilities %>%
tidyr::gather("truck_type", "p_gen", -category) %>%
dplyr::filter(p_gen>0.0)
# Start by showing the aggregate trip (AT) generation using these values,
# which we'll compare the sum of microsimulated values to in calibration.
AT <- synthetic_firms %>%
dplyr::group_by(category) %>%
dplyr::summarise(employees = sum(employees))
truck_types <- sort(unique(probabilities$truck_type))
aggregate_results <- dplyr::data_frame()
for (t in truck_types) {
p <- probabilities %>%
dplyr::filter(truck_type==t) %>%
dplyr::inner_join(AT, by = "category") %>%
dplyr::mutate(target = round(employees*p_gen, 1)) %>%
dplyr::select(-p_gen, -employees)
aggregate_results <- dplyr::bind_rows(aggregate_results, p)
}
x <- addmargins(xtabs(target~category+truck_type, data = aggregate_results))
ct_msg(x)
# Finally, let's do the generation. We'll step through this process for each
# truck type defined in the trip generation probabilities.
results <- dplyr::data_frame() # Container to hold the generated trips
for (t in truck_types) {
# Create a subset of the probabilities for this truck type, which we'll
# merge with the alpha zone data. We use an inner join to ensure that we
# are only including firms that we have non-zero rates for.
these_probabilities <- dplyr::filter(probabilities, truck_type==t)
these_firms <- dplyr::inner_join(synthetic_firms, these_probabilities,
by = "category")
# Calculate the total daily trips generated by each firm and sum them for
# a target total. We will require the sum of the microsimulated trips to
# not deviate more than a user-specified difference from this initial
# So we will loop until we fall within that window.
success <- FALSE
stage <- ""
trials <- 1
# If we have a small enough number of total trucks of this type it might be
# hard to get under the maximum desirable error, even though we want it that
# low for the more frequently occurring types. So we'll keep track of the
# "best solution so far" so that we can retain that if we do not converge
# before we hit the maximum number of resampling attempts.
best_solution <- NULL
best_difference <- 999.9 # An easy bar to pass beneath
while (!success) {
# Check to make sure that we haven't exceeded the maximum number of
# attempts at replanning. If we have something pathological has happened
# and we need to stop dead in our tracks.
if (trials>as.integer(max_resampling_attempts)) {
these_firms$daily_trips <- best_solution
warning_message <- paste("Max resampling exceeded while working on", t,
"generation, set to best solution")
warning(warning_message)
break
}
# Calculate our target
these_firms$daily_trips <- these_firms$employees*these_firms$p_gen
initial_total <- round(sum(these_firms$daily_trips), 1)
# Generate discrete number of trucks for each firm. Use random draws to
# decide what to do with the fractional part of the trips.
pw <- trunc(these_firms$daily_trips) # Retain the whole part
fp <- these_firms$daily_trips-pw # Retain the fractional part
rn <- runif(length(fp), 0, 1) # Random draws to compare to
these_firms$daily_trips <- ifelse(rn<=fp, pw+1, pw)
adjusted_total <- sum(these_firms$daily_trips)
pct_difference<-((adjusted_total-initial_total)/initial_total)*100.0
# Store the best solution so far, in case we don't converge before we run
# out of resampling attempts
if (pct_difference < best_difference) {
best_difference <- pct_difference
best_solution <- these_firms$daily_trips
}
# Show us how we did
ct_msg(paste(stage, t, " origins: combined=", initial_total,
" simulated=", adjusted_total, " difference=",
round(pct_difference,2), "%", sep=''))
if(max_resampling_attempts > 0){ # only resample if asked
# Evaluate success
success <- abs(pct_difference) <= as.numeric(max_resampling_threshold)
stage <- "Resampling "
trials <- trials+1
} else {
break
}
}
# For now we'll combine the results for this truck type with all of the
# others processed. At this point we're appended the number of daily
# truck trips by truck type to each record of the firm data.
p <- dplyr::filter(these_firms, daily_trips>0)
results <- dplyr::bind_rows(results, p)
}
# Create a list that repeats the firmID by the number of daily trips.
# Because each firm can generate more than one type of truck we have one or
# more trips from the same firm. We'll need to use a merge key that handles
# both firmID and truck type.
origins <- rep(results$firmID, results$daily_trips)
trucks <- rep(results$truck_type, results$daily_trips)
trip_records <- dplyr::data_frame(firmID = as.integer(origins),
truck_type = as.character(trucks))
# Now we simply merge this with the input data table, ensuring that all of
# the origins get matched with the firm's information. We'll drop the info
# that we don't need in subsequent models.
truck_origins <- trip_records %>%
dplyr::left_join(results, by=c("firmID", "truck_type")) %>%
dplyr::select(-p_gen, -daily_trips) %>%
dplyr::mutate(taz = as.integer(taz), employees = as.integer(employees),
origin = taz)
# If the user has specified they want to store these intermediate results in
# a text file then do so
if (!is.null(save_to)) readr::write_csv(truck_origins, save_to)
# Compare the initial aggregate results with the microsimulated ones, summed
# by trip generation categories. The results are for information only, for if
# we got past the resampling threshold earlier we're good.
combined <- truck_origins %>%
dplyr::group_by(category, truck_type) %>%
dplyr::summarise(simulated = n()) %>%
dplyr::right_join(aggregate_results, by = c("category", "truck_type"))
outliers <- combined %>%
dplyr::mutate(pct_difference = ((simulated-target)/target)*100) %>%
dplyr::filter(pct_difference > max_resampling_threshold)
if (nrow(outliers)>0) {
ct_msg("Cases where total simulated trips exceed target thresholds:")
ct_msg(outliers)
}
# Return the list of daily truck origins
ct_msg(paste(nrow(truck_origins), "daily local truck origins generated"))
truck_origins
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.