#' @name BY_area_source_projections
#'
#' @title
#' Calculate, Forecast, and Backcast Emissions for Area Sources
#'
#' @description
#' `BY_area_source_projections()` combines throughputs, emission factors, control
#' factors, and growth profiles, multiplying them together. This results in a
#' set of "grown and controlled" emissions for the given `years`.
#'
#' @return
#' Tabular data:
#' - keyed by year, county, pollutant, and category;
#' - having columns `tput_qty`, `ef_qty`, `cf_qty`, `gf_qty`, and their product, `ems_qty`.
#'
#' @param base_year `BY` object, like `BY(2011)`
#' @param years `CY` object, like `CY(1990:2050)`; the range of years you want results for
#' @param tput_data see Details, below
#' @param ef_data see Details, below
#' @param cf_data see Details, below
#' @param gpf_data see Details, below
#' @param na.rm passed to subroutines; refer to "See Also", below
#' @param verbose display informative messages
#'
#' @details
#'
#' **BASIC USAGE**
#'
#' If you don't supply any or all of:
#'
#' - `tput_data`
#' - `ef_data`
#' - `cf_data`
#' - `gpf_data`
#'
#' ... then, helpfully, `BY_area_source_projections()` will fill them in for you.
#' In that case, so that it knows where to look, you will have to supply a base
#' year, like so:
#'
#' ```
#' BY(2011) %>% BY_area_source_projections()
#' ```
#'
#' In this case, it's helpful to pass `verbose = TRUE`, so that you'll know
#' what's happening and where the throughput, emission factor, control factor,
#' and growth profile data are being pulled from.
#'
#' **ADVANCED USAGE**
#'
#' You can, instead, BYOD ("bring your own data"). Then you can use this function to
#' explore combinations of alternate throughputs, growth factors, emission
#' factors, and control factors.
#'
#' You can simply read in a CSV file, or an XLSX worksheet, containing the
#' `tput_data`, `ef_data`, `cf_data`, or `gpf_data` that you would like to use.
#' (You will have to use `read_csv()` or `read_excel()` to import the data
#' first.)
#'
#' The data that you do supply should look like the defaults:
#'
#' - `tput_data` defaults to `DB_area_source_throughputs(base_year)`.
#' - `ef_data` defaults to `annualize_emission_factors(DB_area_source_emission_factors(base_year))`.
#' - `cf_data` defaults to `annualize_control_factors(DB_area_source_control_factors(base_year))`.
#' - `gpf_data` defaults to `DB_growth_profiles(base_year))`.
#'
#' Try out those defaults, or look at the help pages. (Click the links under "See Also", below.)
#' As long as you supply similar-looking data, you are good to go.
#'
#' @note If you don't supply `tput_data`, then data will be fetched, and results
#' will be calculated, for every single category in the given base year. If
#' you do supply `tput_data`, the result will be scoped to just the categories
#' present in `tput_data$cat_id`. (This will be noticeably faster if the number of categories is small.)
#'
#' @seealso
#' - [DB_area_source_throughputs()]
#' - [DB_area_source_emission_factors()] and [annualize_emission_factors()]
#' - [DB_area_source_control_factors()] and [annualize_control_factors()]
#' - [DB_growth_profiles()]
#'
NULL
#' @name BY_area_source_projections
#' @usage NULL
#'
BY_area_source_projections_ <- function (
base_year = NULL,
years = CY(1990:2040),
tput_data = NULL,
ef_data = NULL,
cf_data = NULL,
gf_data = NULL,
na.rm = TRUE,
verbose = getOption("verbose")
) {
msg <- function (...) if(isTRUE(verbose)) message("[BY_area_source_projections] ", ...)
#
# Throughout this function, we work with years as "naive" years; that is,
# 4-digit years without prefixes (i.e., without timelines like RY or CY).
# At the end, we pretend that everything happened on the CY timeline.
#
msg("eliding `years`")
years <- elide_year(years, verbose = verbose)
#
# If `tput_data` is NULL, then attempt to fetch it from DataBank.
#
if (is.null(tput_data)) {
msg("tput_data <- DB_area_source_throughputs()")
tput_data <-
DB_area_source_throughputs(
base_year = base_year,
na.rm = na.rm,
verbose = verbose)
}
msg("eliding `year` in tput_data")
tput_data <-
elide_year(
tput_data,
verbose = verbose)
#
# Construct `projected_tput_data`.
#
# If `gf_data` is NULL, and `base_year` is also NULL,
# then this will just be identical to `tput_data`,
# but with a new column `gf_qty` set equal to 1.
#
if (is.null(gf_data) && is.null(base_year)) {
msg("projecting `tput_data` as though `gf_qty` is 1.000 everywhere")
projected_tput_data <-
mutate(
tput_data,
gf_qty = 1.000)
} else {
#
# We have a `base_year`, but `gf_data` is NULL.
# So, try to fetch growth profile data from DataBank.
#
if (is.null(gf_data)) {
msg("`gf_data` <- `DB_growth_profiles(\"", base_year, "\")`")
gf_data <-
DB_growth_profiles(
base_year = base_year,
na.rm = na.rm,
verbose = verbose)
}
msg("eliding `year` in `gf_data`")
gf_data <-
elide_year(
gf_data,
verbose = verbose)
# Never bother with any extraneous GFs
msg("dropping any categories from `gf_data` that don't appear in `tput_data`")
gf_data <-
semi_join(
filter_years(gf_data, years),
tput_data,
by = "cat_id")
if ("cnty_abbr" %in% names(tput_data)) {
# In the `tput_data`, when cnty_abbr is present, there should be no cases where it is NA.
if (any(is.na(tput_data[["cnty_abbr"]]))) {
err_msg <- "you have NAs in `cnty_abbr` (in the data you are trying to project)."
stop(err_msg)
}
# In the `gf_data`, however, some (or all) values of `cnty_abbr` might be NA.
# The remainder should be specific counties ("ALA", "CC", ... "SON").
#
# If the column isn't even present in `gf_data`, we'll have to treat
# it as though it were all NA. We're here because `cnty_abbr` is present in
# `tput_data`, so we'll need something to join against.
#
msg("expanding `cnty_abbr` in `gf_data`")
gf_data <-
expand_counties(
gf_data,
verbose = verbose)
} else {
if ("cnty_abbr" %in% names(gf_data)) {
err_msg <- "`cnty_abbr` is in `gf_data`, but not in `tput_data`"
stop(err_msg)
}
}
projected_tput_data <-
project_annual_throughputs_by(
tput_data,
cat_id,
cnty_abbr,
using = gf_data,
verbose = verbose)
msg("eliding `year` in `projected_tput_data`")
projected_tput_data <-
elide_year(
projected_tput_data,
verbose = verbose) %>%
filter_years(
years)
}
if (is.null(cf_data)) {
if (is.null(base_year)) {
msg("letting `cf_data` be an empty tibble")
cf_data <-
tibble(
year = character(0),
cat_id = integer(0),
pol_abbr = character(0),
pol_id = integer(0),
cf_qty = numeric(0))
} else {
msg("`legacy_format_cf_data` <- `DB_control_factors(\"", base_year, "\")`")
legacy_format_cf_data <-
DB_control_factors(
base_year = base_year,
na.rm = na.rm,
verbose = verbose)
msg("dropping any categories from `legacy_format_cf_data` that don't appear in `tput_data`")
legacy_format_cf_data <-
semi_join(
legacy_format_cf_data,
tput_data,
by = "cat_id")
msg("`cf_data` <- `annualize_DB_control_factors(legacy_format_cf_data, ...)`")
cf_data <-
annualize_DB_control_factors(
legacy_format_cf_data,
years = years,
verbose = verbose)
}
}
if (is.data.frame(cf_data) && (nrow(cf_data) > 0)) {
msg("eliding `year` in `cf_data`")
cf_data <-
elide_year(
cf_data,
verbose = verbose) %>%
filter_years(
years)
# don't bother with any extraneous CFs
msg("dropping any categories from `cf_data` that don't appear in `tput_data`")
cf_data <-
semi_join(
cf_data,
tput_data,
by = "cat_id")
}
if (is.null(ef_data)) {
msg("`legacy_format_ef_data` <- `DB_area_source_emission_factors(\"", base_year, "\", ...)`")
legacy_format_ef_data <-
DB_area_source_emission_factors(
base_year,
na.rm = na.rm,
verbose = verbose)
msg("dropping any categories from `legacy_format_ef_data` that don't appear in `tput_data`")
legacy_format_ef_data <-
semi_join(
legacy_format_ef_data,
tput_data,
by = "cat_id")
msg("`ef_data` <- `annualize_DB_emission_factors(legacy_format_ef_data, ...)`")
ef_data <-
annualize_DB_emission_factors(
legacy_format_ef_data,
years = years,
verbose = verbose) %>%
elide_years(
verbose = verbose)
}
msg("eliding `year` in `ef_data`")
ef_data <-
elide_year(
ef_data,
verbose = verbose) %>%
filter_years(
years)
# join projected throughput data with emission factors
joined_data <-
inner_join(
projected_tput_data,
ef_data,
by = c("year", "cat_id"))
if (is.data.frame(cf_data) && (nrow(cf_data) > 0)) {
# join again, this time with control factors
pol_vars <-
intersect(
tidyselect::vars_select(names(joined_data), starts_with("pol_")),
tidyselect::vars_select(names(cf_data), starts_with("pol_")))
if (any(is.na(pol_vars))) {
err_msg("you have `NA` in ", str_or(pol_vars), "; this is unsupported (yet)")
stop(err_msg)
}
msg("joining with `cf_data` by year, category, and pollutant")
joined_data <-
joined_data %>%
left_join(
cf_data,
by = c("year", "cat_id", pol_vars))
} else {
msg("no control factors; setting `cf_qty` to `NA`")
joined_data <-
mutate(
joined_data,
cf_qty = NA_real_)
}
msg("setting `cf_qty` to 1.000 (unabated) wherever is it `NA` (missing)")
imputed_data <-
replace_na(
joined_data,
list(cf_qty = 1.000))
msg("calculating: ems_qty = tput_qty x ef_qty x cf_qty")
multiplied_data <-
mutate(
imputed_data,
ems_qty = tput_qty * ef_qty * cf_qty)
msg("converting from lb/yr to ton/yr")
converted_data <-
multiplied_data %>%
set_emission_units(
"lb/yr") %>% # because in DataBank, `ef_qty` is "lbs/tput"
convert_emission_units(
to = "ton/yr")
tidied_data <-
converted_data %>%
# mutate(
# pol_abbr = decode(pol_id, DB_PROJECTED_POLLUTANT_CODES)) %>%
dplyr::select(
one_of(names(tput_data)),
starts_with("pol"),
one_of(names(ef_data)),
one_of(names(cf_data)),
ems_qty,
ems_unit) %>%
mutate_at(
vars(ends_with("_id")),
~ as.integer(.))
if (isTRUE(na.rm)) {
msg("`na.rm` is `TRUE`; so, dropping rows where `ems_qty` is zero")
tidied_data <-
filter(
tidied_data,
ems_qty > 0)
}
msg("casting all elided `year`s to CY")
tidied_data <-
tidied_data %>%
elide_year() %>%
mutate(
year = CY(year)) %>%
as_tibble()
return(tidied_data)
}
#' @name BY_area_source_projections
#'
#' @export
BY_area_source_projections <-
memoise::memoise(
BY_area_source_projections_,
cache = memoise::cache_memory())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.