#' Transform FAF flow database distributed by FHWA into format for target year
#'
#' @param fhwa_db Data frame containing the FAF interregional flow database in
#' format distributed by FHWA, or string containing path and filename
#' containing the same
#' @param target_year The four-digit simulation year as an integer value
#' @param interpolate A boolean variable denoting whether values for the
#' `target_year` should be interpolated between the two closest years if it is
#' not one of the years included in the FAF database (defaults to FALSE,
#' currently ignored)
#' @param internal_regions A set of one or more FAF regions internal to the
#' study area (optional, but must be specified if `outer_regions` is defined)
#' @param outer_regions A data frame containing FAF region pairs whose flows
#' pass through the modeled area (optional, but if specified then
#' `internal_regions` must also be specified)
#'
#' @details This function converts the FHWA Freight Analysis Framework (FAF)
#' commodity flows, measured in annual tons, dollars, and ton-miles
#' between domestic FAF regions and external regions into annual flows
#' for the specified year or closest to it in the FAF database. The `fhwa_db`
#' can either be a data frame previously created with the contents of the
#' FAF flow database or a fully-qualified pathname to the data in comma-
#' separated value (CSV) format. The latter can be in a compressed format that
#' readr::read_csv() can handle.
#'
#' It is important to note that the tons, value, and ton-miles are converted
#' from implied thousands and millions of units in the source data. Zero
#' values are retained, as they reportedly represent flows whose values are
#' less than 1 unit (e.g., zero tons represent values between zero and one
#' thousand tons since in the FAF tons are coded in implied thousands of
#' tons).
#'
#' The `target_year` must be one of the years in the FAF database, or fall
#' between two of the years included within it. Only data for the target or
#' closest year is processed by this function. An optional `interpolate`
#' parameter will eventually allow for the tons, value, and ton-miles to be
#' interpolated between the two closest years to the target year. The
#' parameter is included so as to not break the interface, but the
#' functionality is not currently implemented.
#'
#' The `internal_regions` should be specified as a list of the FAF regions
#' inside the modeled area. Flows to, from, and between these internal
#' regions will be allocated to synthetic firms or traffic analysis zones
#' within the model. Flows from the remainder of the USA will be retained if
#' they have a trip end in one of the internal regions (e.g., internal,
#' inbound, or outbound flows). The internal regions are optional, but if
#' omitted data from the entire USA will be considered internal to the model.
#'
#' Flows likely to pass through one or more internal regions can be included
#' as `outer_regions`. Such flows must be defined as FAF region pairs (e.g.,
#' truck flows between California and Washington on I-5 if Oregon comprises
#' the internal regions). The flows are assumed to be bidirectional to reduce
#' the amount of coding required. Note that any additional fields in
#' `outer_regions` will be added to the FAF flows records. The user should
#' remove any unwanted fields before or after passing the `outer_region`
#' definitions to this function.
#'
#' @export
#' @examples
#' annual_flows <- preprocess_faf4_database(fhwa_database, 2018, FALSE,
#' c(411, 419, 532)) # Several internal regions and no outer regions defined
#' annual_flows <- preprocess_faf4_database("./faf4.4.1.zip", 2016, FALSE,
#' 160, idaho_outer_regions) # A single internal and multiple outer regions
preprocess_faf4_database <- function(fhwa_db, target_year, interpolate = FALSE,
internal_regions = NULL, outer_regions = NULL) {
# Determine whether fhwa_db is a data frame or filename, and complain if not
contents <- as.character(class(fhwa_db)[1])
if (contents %in% c("tbl_df", "data.frame", "data.table", "spec_tbl_df")) {
# Assume that the fhwa_db points to a valid data frame
print(paste("Processing FAF flow data from", contents, "contents in",
deparse(substitute(fhwa_db))), quote = FALSE)
} else if (contents == "character") {
# The contents can be a valid filename, but check to make sure
if (!file.exists(fhwa_db)) {
stop(paste0("The fhwa_db parameter ", fhwa_db,
" appears to be a string but does not specify a valid filename"))
} else {
print(paste("Build FAF flows data frame from", fhwa_db), quote = FALSE)
fhwa_db <- readr::read_csv(fhwa_db, guess_max = Inf)
}
} else {
# It isn't a data frame or string that resolves to valid filename
stop(paste0("The fhwa_db parameter ", fhwa_db, " is invalid"))
}
# Extract the years that are included in the database. We will grab the years
# coded in the tons fields in the file header. There is probably some super
# efficient way of doing all of this in one line of obscure code, but few
# enough fields to chop through to make the least efficient way fast enough.
years_found <- c()
for (this_field in names(fhwa_db)) {
if (substr(this_field, 1 , 5) == "tons_") {
this_year <- as.integer(substr(this_field, 6, 9))
years_found <- c(years_found, this_year)
}
}
# Finding the closest year in the dataset should now be easy, as we can find
# the year closest to our target year
offsets <- abs(years_found - target_year)
faf_year <- years_found[which.min(offsets)]
print(paste("FAF data from", faf_year, "is closest to target year",
target_year), quote = FALSE)
# Append the tonnage, value, and ton-miles to each record from the FAF year
# closest to the target year and scale them on the fly
fhwa_db$year <- faf_year
fhwa_db$exp_tons <- fhwa_db[[paste0("tons_", faf_year)]] * 1e3
fhwa_db$exp_value <- fhwa_db[[paste0("value_", faf_year)]] * 1e6
fhwa_db$exp_tmiles <- fhwa_db[[paste0("tmiles_", faf_year)]] * 1e6
fhwa_db$year <- target_year # Reset to the user-specified year afterwards
# Now that we have tonnage, value, and ton-miles for the target year drop the
# original fields from the data
for (this_year in years_found) {
fhwa_db[[paste0("tons_", this_year)]] <- NULL
fhwa_db[[paste0("value_", this_year)]] <- NULL
fhwa_db[[paste0("tmiles_", this_year)]] <- NULL
}
# Before we do anything else we need to recast several variables as numeric if
# they were incorrectly read as character variables
recast <- dplyr::mutate(fhwa_db, fr_orig = as.integer(fr_orig),
dms_orig = as.integer(dms_orig), fr_dest = as.integer(fr_dest),
dms_dest = as.integer(dms_dest), sctg2 = as.integer(sctg2),
dms_mode = as.integer(dms_mode), trade_type = as.integer(trade_type))
# Next tag the regions that will be included within the modeled area. This
# list is optional, as some users might want all domestic origins and desti-
# nations, but otherwise set the direction code for those that are included.
recast$direction <- "drop" # Set the default value for this variable
if (is.null(internal_regions)) {
# Everything is internal if the user hasn't specified which regions are
recast$direction <- "internal"
# But they cannot specify outer regions in that case, as they don't make
# sense within this context
if (!is.null(outer_regions)) stop(paste("No internal regions were",
"specified, but outer regions were"))
} else {
# Define the direction codes for those regions specified as internal
recast$direction <- ifelse(recast$dms_orig %in% internal_regions &
recast$dms_dest %in% internal_regions, "internal", recast$direction)
recast$direction <- ifelse(recast$dms_orig %in% internal_regions &
!recast$dms_dest %in% internal_regions, "outbound", recast$direction)
recast$direction <- ifelse(!recast$dms_orig %in% internal_regions &
recast$dms_dest %in% internal_regions, "inbound", recast$direction)
# Then process the outer regions if they are defined. Note that the user can
# define internal regions but leave outer regions undefined. We will do this
# by merging the outer region data with the data frame. We will tag those
# regions as through movements.
if (!is.null(outer_regions)) {
# The outer region flows are usually coded as one way, but of course the
# flows move in opposite direction as well. Thus, we'll first need to add
# the other direction to the flows.
opposite_direction <- outer_regions %>%
dplyr::mutate(temp = dms_orig, dms_orig = dms_dest, dms_dest = temp) %>%
dplyr::mutate(temp = entry, entry = exit, exit = temp, temp = NULL)
outer_regions <- dplyr::bind_rows(outer_regions, opposite_direction)
# Now we can merge the flow data with these outer region definitions,
# which will allow us to carry forward any data included in the latter
recast <- dplyr::left_join(recast, outer_regions, by = c("dms_orig",
"dms_dest"))
# And finally, tag the outer regions as through flows. We will process the
# list of all through flows as we cannot tag based upon unique or non-
# missing contents outer_regions because we don't know for sure what
# fields, if any, will be included other than domestic O and D.
n_outer_regions <- nrow(outer_regions)
for (i in (1:n_outer_regions)) {
recast$direction <- ifelse(recast$dms_orig == outer_regions$dms_orig[i] &
recast$dms_dest == outer_regions$dms_dest[i], "through",
recast$direction)
}
}
}
# The trade type is coded as numeric variable, but it will make downstream use
# less error-prone to recast as descriptive string. In this case we only need
# to differentiate between domestic and cross-border flows.
redo_trade_type <- dplyr::mutate(recast, trade_type = ifelse(trade_type == 1,
"domestic", ifelse(trade_type %in% c(2, 3), "x-border", NA)))
# We should also recode the mode of transport while we're at it
redo_transport_mode <- dplyr::mutate(redo_trade_type,
domestic_mode = ifelse(dms_mode == 1, "Truck", ifelse(dms_mode == 2, "Rail",
ifelse(dms_mode == 3, "Water", ifelse(dms_mode == 4, "Air",
ifelse(dms_mode == 5, "Multiple", ifelse(dms_mode == 6, "Pipeline",
ifelse(dms_mode == 7, "Other", ifelse(dms_mode == 8, "None",
NA)))))))))
# Drop the records whose direction are not internal, inbound, outbound, or
# through
keep <- dplyr::filter(redo_transport_mode, direction != "drop")
print(paste(nrow(keep), "records retained:"), quote = FALSE)
print(table(keep$direction, useNA = "ifany"))
# How many records have zero transactions?
n_zeros <- nrow(dplyr::filter(keep, exp_value <= 0.0, exp_tons <= 0.0))
pct_zeros <- swimctr::percent(n_zeros, nrow(keep))
print(paste0(n_zeros, " of ", nrow(keep), " records (", pct_zeros,
"%) have zero tons and value coded"), quote = FALSE)
# Show us the total flows by mode and direction
print("Annual tonnage by direction and mode for modeled area:", quote = FALSE)
print(addmargins(xtabs(exp_tons ~ domestic_mode + direction, data = keep)))
# Return the results
return(keep)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.