# Functions used for downscaling reported data from model-reported regions to countries
#' Load the data submitted by the individual models
#'
#' Load model-submitted data into a list of data frames where each element is a single model's output (i.e., all
#' scenarios in one data frame per model).
#'
#' @param model_data_folder folder where the pre-processed model output data is located
#' @param model_names names of the models (case sensitive)
#' @details Requires that the model output data are generally cleaned and appropriately formatted, and placed into a
#' common folder with sub-folders for each model. Individual data files are typically the model name plus the data
#' submissions version number, and the extension ".csv". The function will pull the data with the highest version
#' number.
#' @importFrom tidyr gather
#' @importFrom dplyr funs matches mutate mutate_at vars
#' @importFrom magrittr "%>%"
#' @export
load_preprocessed_data <- function(model_data_folder, model_names){
if (!file.exists(model_data_folder)) {
print(paste('unrecognized path:', model_data_folder))
return()
}
folder_paths <- paste(model_data_folder, model_names, sep = "/")
filepaths <- character(0)
for(i in 1:length(folder_paths)){
if (!file.exists(folder_paths[i])) {
print(paste('unrecognized path:', folder_paths[i]))
return()
}
filename <- list.files(folder_paths[i])[endsWith(list.files(folder_paths[i]), "csv")]
# In case there are multiple submissions, take the last one (sorted alphabetically)
filename <- filename[length(filename)]
filepaths[i] <- paste(folder_paths[i], filename, sep = "/")
}
# Put all of the data in a list, where each model is an element of the list
data <- list()
for(fnum in seq_along(filepaths)){
cat("Loading", filepaths[fnum], "...\n")
# Read in the data tables to the list
data[[fnum]] <- read.csv(filepaths[[fnum]], comment.char = COMMENT_CHAR, stringsAsFactors=FALSE,
na.strings = c( "", "NA"), check.names = FALSE) %>%
gather(year, value, matches(YEAR_PATTERN)) %>%
# year needs to be numeric for subsequent interpolation to work
mutate(year=as.integer(year)) %>%
# All columns except for year and value should be characters
mutate_at(dplyr::vars(-year, -value),
as.character)
}
names(data) <- model_names
return(data)
}
#' Apply corrections to pre-processed data
#'
#' Addresses errors and omissions in submitted model data loaded by \code{\link{load_preprocessed_data}} prior to
#' performing any data processing. Where necessary, corrections are performed by calling model-specific functions. This
#' function also performs a number of processing steps that are optional but default to true. These include: subsetting
#' only quantity flow variables (the methods assume that the components to all indicator variables are reported as
#' quantity flows, and can be re-computed post-hoc from various levels of regional aggregation), removing "All"
#' categories when the consituent components are available (\code{\link{remove_redundant_alls}}), interpolating years
#' where necessary for harmonized reporting years, dropping missing values (the alternative is setting them to zero),
#' and aggregating redundant categories (as category re-assignment may cause multiple values with the same ID info).
#'
#' @param model_data_list list of data frames with model-specific data
#' @param subset_quantity_flows logical (default = TRUE) indicating whether to subset only quantity flow variables
#' @param remove_redundant_alls logical (default = TRUE) indicating whether to remove observations with "All" that could
#' be calculated instead by adding provided components. These "All" values should not be downscaled.
#' @param interpolate_years logical (default = TRUE) indicating whether to interpolate years. If FALSE, no interpolation
#' will be performed. If TRUE, iTEM analysis years that fall within the model's range of reported years will be
#' linearly interpolated, as necessary.
#' @param drop_na_values logical (default = TRUE) indicating whether to drop observations with no value in model-
#' submitted data. If FALSE, missing values are set to zero.
#' @param aggregate_redundant_categories logical (default = TRUE) indicating whether to add up categories that are
#' redundant (i.e., all ID information the same), which generally result from re-setting variables from model-
#' reported data to more aggregate categories.
#' @details To apply model-specific functions, this function searches for a correction function containing each model
#' name in the list (i.e., "correct_" + model name). This step is applied prior to all others. The interpolation
#' function does not extrapolate, so the data from any model does not necessarily include all iTEM analysis years. In
#' its present form, this function does not call additional functions to compute quantity flows from indicator
#' variables, though such capacity may be added for an expanded variable set. For example, to process fuel prices,
#' fuel expenditures would be calculated as reported prices times consumption.
#' @importFrom assertthat assert_that
#' @importFrom dplyr bind_rows filter group_by_at if_else mutate pull ungroup
#' @importFrom magrittr "%>%"
#' @export
prepare_preprocessed_data <- function(model_data_list,
derive_weighted_indicators = TRUE,
subset_quantity_flows = TRUE,
remove_redundant_alls = TRUE,
interpolate_years = TRUE,
drop_na_values = TRUE,
aggregate_redundant_categories = TRUE, ...){
assert_that(is.list(model_data_list))
assert_that(is.logical(derive_weighted_indicators))
assert_that(is.logical(subset_quantity_flows))
assert_that(is.logical(remove_redundant_alls))
assert_that(is.logical(interpolate_years))
assert_that(is.logical(drop_na_values))
assert_that(is.logical(aggregate_redundant_categories))
if(derive_weighted_indicators){
print("Note: Deriving weighted quantity flows from provided indicator variables")
weighted_indicator_mapping <- get_variable_mapping("downscale/weighted_indicators.csv")
}
if(subset_quantity_flows){
print("Note: Only retaining quantity flow variables from submitted model data")
# load the variable type mapping, and derive a vector of quantity flow variables to be retained
flow_variables <- get_variable_mapping(...) %>%
filter(variable_type == "quantity") %>%
pull(Variable)
}
for(model in names(model_data_list)){
# Standardize on a region name for the global/world region. Setting to "All" allows removal of redundant reporting
model_data_list[[model]] <- mutate(model_data_list[[model]],
Region = if_else(Region %in% c("World", "Wor", "Global"), "All", Region))
# Apply any model-specific correction functions, as necessary
if( exists(paste0("correct_", model), mode = "function")){
print( paste0( "Applying model-specific corrections to data from model: ", model ))
correction_function <- get( paste0("correct_", model))
model_data_list[[model]] <- correction_function(model_data_list[[model]])
}
# If indicator variables are provided without the necessary weighting factors (e.g., new vehicle
# efficiency but not vkm and energy consumption by new vehicles), this step computes weighted
# variables that can be used
if(derive_weighted_indicators){
model_data_list[[model]] <- derive_variables(model_data_list[[model]],
mapping = weighted_indicator_mapping,
bind_derived_data = TRUE)
}
# Subset quantity flows, if indicated
if(subset_quantity_flows){
model_data_list[[model]] <- subset(model_data_list[[model]],
Variable %in% flow_variables)
}
# Remove redundant "All" values of selected ID variables, if indicated
if(remove_redundant_alls){
model_data_list[[model]] <- remove_redundant_alls(model_data_list[[model]])
}
# Interpolate the years provided, if indicated
if(interpolate_years){
# Only interpolate up to the final model year (no need to have a bunch of missing values everywhere)
final_model_year <- max( model_data_list[[model]]$year)
initial_model_year <- min( model_data_list[[model]]$year)
model_item_years <- ITEM_YEARS[ ITEM_YEARS <= final_model_year & ITEM_YEARS >= initial_model_year]
years_to_interpolate <- model_item_years[!model_item_years %in% unique(model_data_list[[model]]$year) ]
# Don't do this if no data would be added
if(length(years_to_interpolate) > 0){
print( paste0( "Interpolating years ", paste(years_to_interpolate, collapse = ', '), " from model: ", model ))
model_data_list[[model]] <- model_data_list[[model]] %>%
group_by_at(ITEM_ID_COLUMNS[ ITEM_ID_COLUMNS != "year"]) %>%
complete(year = model_item_years) %>%
mutate(value = approx_fun(year, value)) %>%
ungroup()
}
# Filter to only the iTEM analysis years
model_data_list[[model]] <- filter(model_data_list[[model]], year %in% ITEM_YEARS)
}
# Either drop NA values or or set them to zero
if(drop_na_values){
model_data_list[[model]] <- model_data_list[[model]] %>%
filter(!is.na(value))
} else {
model_data_list[[model]] <- model_data_list[[model]] %>%
mutate(value = if_else(is.na(value), 0, value))
}
# Aggregate redundant quantities, if indicated
if( aggregate_redundant_categories){
#For any model-reported categories that were re-assigned to a more aggregate categories (e.g., from different
#sizes of trucks (LHDT, MHDT, HHDT) to trucks (HDT)), then these should be aggregated. If no such re-assignments
#took place, then this step won't do anything. First, make sure that only quantity variables are subsetted, as we
#don't want to be adding indicator variables
assert_that(subset_quantity_flows)
model_data_list[[model]] <- model_data_list[[model]] %>%
group_by_at(ITEM_ID_COLUMNS) %>%
summarise( value = sum(value)) %>%
ungroup()
}
} # end for(model in names(model_data_list))
return(model_data_list)
} # end function
#' Downscale annual flow variables from model regions to countries
#'
#' Performs a merge of (1) pre-processed and corrected model output (by native model region) from
#' \code{\link{prepare_preprocessed_data}}, (2) country-within-model-region shares of downscaling proxies from
#' \code{\link{compute_country_shares}}, and (3) assignment of a nation-level socioeconomic realization for each model
#' and scenario from \code{\link{assign_socioeconomic_scenario}}. Returns a list of model-specific dataframes with all
#' quantity (i.e., annual flow) variables reported at the country level.
#'
#' @param model_data_list list of model-specific dataframes with cleaned and pre-processed output
#' @param model_socio_list list of model-specific dataframes assigning each submitted scenario to a country-level
#' socioeconomic scenario
#' @param country_share_list list of model-specific lists of downscaling proxy dataframes indicating country-
#' within-region shares
#' @param collapse_list logical (default = TRUE) indicating whether to return output as a single data frame (TRUE) or a
#' list of model-specific data frames (FALSE).
#' @details Takes in three lists; the named elements of each of these lists should be the model names, and they must
#' be standardized. All models and scenarios in \code{model_data_list} must be available in \code{model_socio_list},
#' though there is no problem if the latter contains additional models and/or scenarios. Similarly, all models and
#' socioeconomic realizations from joining \code{model_data_list} and \code{model_socio_list} must be available in
#' \code{country_share_list}, though the latter may contain additional nation-level socioeconomic realizations.
#' Finally, all reported quantity variables must be assigned to a downscaling proxy in \code{variable_ds_proxy_fn}
#' that is also available in \code{country_share_list}.
#' @importFrom assertthat assert_that
#' @importFrom dplyr right_join left_join select mutate filter arrange across bind_rows group_by summarise ungroup
#' @importFrom magrittr "%>%"
#' @export
downscale_flow_variables <- function(model_data_list,
model_socio_list,
country_share_list,
collapse_list = TRUE, ...){
assert_that(is.list(model_data_list))
assert_that(is.list(model_socio_list))
assert_that(is.list(country_share_list))
assert_that(is.logical(collapse_list))
# load the variable-to-downscaling-proxy mapping file
variable_ds_proxy <- get_variable_mapping(...) %>%
filter(variable_type == "quantity") %>%
select(-variable_type)
# generate the new list with country-level model output, and indicate the column names
downscaled_data_list <- list()
for( model_name in names(model_data_list)){
# Assign an empty list to each model
downscaled_data_list[[model_name]] <- list()
for(proxy_name in names(country_share_list[[model_name]])){
print(paste0("Downscaling quantity flows from model ", model_name,
" assigned to downscaling proxy ", proxy_name))
# country share data: keep only relevant socioeconomic realization(s),
# repeat as necessary by model scenario
country_share_list[[model_name]][[proxy_name]] <-
country_share_list[[model_name]][[proxy_name]] %>%
right_join(model_socio_list[[model_name]], by="socio")
# Determine the "by" argument for joining country shares with model output
join_fields <- names(country_share_list[[model_name]][[proxy_name]])[
!names(country_share_list[[model_name]][[proxy_name]]) %in% c("socio", "iso", "value") ]
# Perform the merge, replacing native model regions with countries
downscaled_data_list[[model_name]][[proxy_name]] <-
model_data_list[[model_name]] %>%
left_join(variable_ds_proxy, by = "Variable" ) %>%
filter(ds_proxy == proxy_name) %>%
left_join(country_share_list[[model_name]][[proxy_name]],
by = join_fields, suffix = c("_reg_total", "_country_share")) %>%
filter(!is.na(iso)) %>%
mutate(value = value_reg_total * value_country_share) %>%
select(DS_DATA_COLUMNS)
} #end for(proxy_name in names(country_share_list[[model_name]]))
# Bind the different proxies within each model
downscaled_data_list[[model_name]] <-
do.call(bind_rows, downscaled_data_list[[model_name]]) %>%
arrange(across(DS_DATA_COLUMNS))
} # end for( model_name in names(model_data_list))
if(collapse_list){
downscaled_data <- do.call(bind_rows, downscaled_data_list)
return(downscaled_data)
} else {
return(downscaled_data_list)
}
} # end function
#' Compute country shares for model output by native model region
#'
#' Compute country-with-model-region shares for models, downscaling proxies, nation-level socioeconomic realizations,
#' and years. Takes in list of model data from \code{\link{prepare_preprocessed_data}}, each model's country-to-region
#' mapping from \code{\link{load_country_region_mappings}}, and country-level downscaling proxy data from
#' \code{\link{generate_ds_proxy_data}}. Returns a list with all combinations of model, downscaling proxy, country-level
#' socioeconomic realization, and year.
#'
#' @param model_data_list list of model output data that has been cleaned, pre-processed, and prepared (i.e., corrected)
#' @param country_region_map_list list of model-specific dataframes with country-to-region mapping
#' @param ds_proxy_data list of downscaling proxy dataframes
#' @param save_output logical indicating whether to save the output of this function
#' @param create_dir logical indicating whether to create a new directory for the output (if one with the expected name
#' doesn't already exist)
#' @param output_dir directory in which to save the output
#' @details Returns a list of models, each element of which is a list of downscaling proxy dataframes, each of which
#' indicates country-within-model-region shares, to be used by \code{\link{downscale_flow_variables}} in order to
#' downscale region-level model output to the country level. Returned data are filtered to the years reported by each
#' model, and include all possible nation-level socioeconomic realizations.
#' @importFrom assertthat assert_that
#' @importFrom dplyr group_by_at summarise select mutate ungroup filter left_join rename
#' @importFrom magrittr "%>%"
#' @importFrom readr write_csv
#' @export
compute_country_shares <- function(model_data_list,
country_region_map_list,
ds_proxy_data = generate_ds_proxy_data(),
save_output = TRUE, create_dir = TRUE,
output_dir = DS_OUTPUT_DIR, ...){
assert_that(is.list(model_data_list))
assert_that(is.list(country_region_map_list))
assert_that(is.list(ds_proxy_data))
assert_that(is.logical(save_output))
assert_that(is.logical(create_dir))
assert_that(is.character(output_dir))
# The output is a list of models, each of which is a list of downscaling proxies.
output_list <- list()
# Process each model separately in a for loop
for(model_name in names(model_data_list)){
# get the country-to-region mapping for the given model
region_map <- country_region_map_list[[model_name]]
# figure out which years this will be written out for
model_years <- sort(unique(model_data_list[[model_name]]$year))
# each model in the list will also be a list of downscaling proxies
output_list[[model_name]] <- list()
for(ds_proxy in names(ds_proxy_data)){
ds_proxy_name <- unique(ds_proxy_data[[ds_proxy]]$ds_proxy)
print(paste0("Computing country-within-region shares for model: ",
model_name, ", and proxy: ", ds_proxy_name))
ds_proxy_region <- ds_proxy_data[[ds_proxy]] %>%
filter(year %in% model_years) %>%
left_join(region_map, by = "iso") %>%
na.omit()
# The group_by columns may include mode; need to keep this flexible to column names
group_columns <- names(ds_proxy_region)[!names(ds_proxy_region) %in% c("value", "iso")]
# Convert character vector to list of symbols
ds_proxy_region <- ds_proxy_region %>%
group_by_at(group_columns) %>%
summarise(reg_total = sum(value)) %>%
ungroup()
output_list[[model_name]][[ds_proxy_name]] <- ds_proxy_data[[ds_proxy]] %>%
filter(year %in% model_years) %>%
left_join(region_map, by="iso") %>%
left_join(ds_proxy_region, by = group_columns) %>%
mutate(value = value / reg_total) %>%
# Get rid of any missing values from dividing by zero (assuming these are all too small to matter)
mutate(value = if_else(is.na(value), 0, value)) %>%
select(-reg_total) %>%
# The socioeconomic scenario is named "socio"
rename(socio = Scenario)
# write the data out, if requested
if(save_output){
# Create the directory if called for
if(create_dir) dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)
write_csv(output_list[[model_name]][[ds_proxy_name]],
file = paste0(output_dir, "/country_shares_", model_name, "_", ds_proxy_name, ".csv"))
} # end if(save_output)
} # end for(ds_proxy in names(ds_proxy_data))
} # end for(model_name in model_names)
return(output_list)
} #end function
#' Assign country-level socioeconomic realization to each model+scenario
#'
#' Takes in list of model output data from \code{\link{prepare_preprocessed_data}} and returns a list of model-
#' specific dataframes mapping model scenarios to named nation-level socioeconomic realizations. Default behavior
#' is to compute these assignments by minimizing sum of squared errors in future GDP ratios by model region and
#' time period.
#'
#' @param model_data_list list of data tables of model-specific pre-processed, prepared data
#' @param country_region_map_list list of data tables of model-specific country-to-region mappings
#' @param socio_assign_method method used for assigning scenarios. SSE: default behavior; minimize sum of squared errors in future
#' ratios of variable indicated, by year and model region. exogenous: use exogenously provided assignments between
#' model+scenario and nation-level socioeconomic realization for downscaling model output. If this method is
#' selected and no model data folder is provided, a single default realization will be used for all models and scenarios.
#' @param SSE_variable variable to use in determining the socioeconomic assignment using the SSE method; can be
#' Population or PPP-GDP
#' @param socio_assignment_folder character string indicating the folder where the model data are located, in which to find
#' exogenously specified assignments from model+scenario to socioeonomic realization. They are stored as
#' scenario_folder/model/scenarios.yaml
#' @details Returns a list of dataframes where each model is an element. Data frames assign model's scenarios
#' to a country-level socioeconomics file.
#' @importFrom assertthat assert_that
#' @importFrom yaml yaml.load_file
#' @importFrom dplyr full_join group_by summarise select mutate rename ungroup filter left_join slice
#' @importFrom magrittr "%>%"
#' @importFrom data.table rbindlist
#' @export
assign_socioeconomic_scenario <- function( model_data_list,
country_region_map_list,
socio_assign_method = "SSE",
SSE_variable = "PPP-GDP",
socio_assignment_folder = NA, ...){
assert_that(is.list(model_data_list))
assert_that(is.list(country_region_map_list))
assert_that(socio_assign_method %in% c("exogenous", "SSE"))
assert_that(SSE_variable %in% c("PPP-GDP", "Population"))
output_list <- list()
if(SSE_variable == "PPP-GDP") country_data <- generate_ds_proxy_data()$ds_proxy_gdp
if(SSE_variable == "Population") country_data <- generate_ds_proxy_data()$ds_proxy_population
if(socio_assign_method == "exogenous" & !is.na(socio_assignment_folder)){
print("Using exogenous assignments from model/scenario to country-level socio scenario")
}
if(socio_assign_method == "exogenous" & is.na(socio_assignment_folder)){
print("Using a single country-level socio scenario for all models and scenarios")
}
for(model_name in names(model_data_list)){
if(socio_assign_method == "exogenous"){
if(is.na(socio_assignment_folder)){
scenario_assignment <- data.frame(Scenario = unique(model_data_list[[model_name]]$Scenario),
socio = DS_DEFAULT_SCENARIO,
stringsAsFactors = FALSE)
} # end if(is.na(socio_assignment_folder))
if(!is.na(socio_assignment_folder)){
assert_that(is.character(socio_assignment_folder))
fqfn <- paste0(socio_assignment_folder, "/", model_name, "/metadata/scenarios.yaml")
assert_that(file.exists(fqfn))
scenario_assignment <- yaml.load_file(fqfn) %>%
rbindlist(idcol = "Scenario") %>%
select(Scenario, socio)
} # end if(!is.na(socio_assignment_folder))
} # end if(socio_assign_method == "exogenous")
if(socio_assign_method == "SSE"){
print(paste0("Determining country-level socioeconomic scenarios for model: ", model_name))
# Get the country-to-region assignments for the given model
region_assignment <- country_region_map_list[[model_name]]
# Need to figure out if the model provided socioeconomic information. If not, the data frame can be written here
if(!SSE_variable %in% unique( model_data_list[[model_name]]$Variable)){
scenario_assignment <- data.frame(Scenario = unique(model_data_list[[model_name]]$Scenario),
socio = DS_DEFAULT_SCENARIO,
stringsAsFactors = FALSE)
} else {
model_data <- subset( model_data_list[[model_name]], Variable == SSE_variable) %>%
# Filter out any years with missing values
filter(!is.na(value)) %>%
# only do the assessment on 2010 and later, as this is the base year for the socioeconomics
filter(year >= 2010) %>%
filter(Region %in% unique(region_assignment$Region)) %>%
select(Model, Scenario, Region, year, value)
# Aggregate the country-level socio data by the model's regions
socio_data <- country_data %>%
left_join(region_assignment, by = "iso") %>%
filter(!is.na(Region)) %>% #not all countries are assigned to a region
filter(year %in% model_data$year) %>%
select(-ds_proxy) %>%
group_by(Scenario, Region, year) %>%
summarise(value = sum(value)) %>%
ungroup()
# Merge the aggregated country-level socio data with the model's output
merged_data <- socio_data %>%
full_join(model_data, by = c("Region", "year"), suffix = c("_socio","_model")) %>%
# some models may have mapped regions that aren't actually in their output. These are missing values here.
na.omit()
# Subset the base-year data to merge back in, in order to compute growth ratios from the base year
# 2010 is the base year for the SSPs (may be a future year for some models)
base_data <- subset(merged_data, year == min(year)) %>%
rename(base_value_socio = value_socio) %>%
rename(base_value_model = value_model) %>%
select(Model, Scenario_socio, Scenario_model, Region, base_value_socio, base_value_model)
# Compute the ratios
merged_data <- left_join( merged_data, base_data,
by = c("Model", "Scenario_socio", "Scenario_model", "Region")) %>%
mutate( value_socio = value_socio / base_value_socio ) %>%
mutate( value_model = value_model / base_value_model) %>%
# If a model reported a region but had zero values, this returns a missing value. Just drop the obs.
na.omit() %>%
filter( year != min(year)) %>%
# Compute the squared errors for each observation, and aggregate to compute the sum of squared errors
mutate(SSE = (value_socio - value_model)^2) %>%
select( Model, Region, year, Scenario_socio, Scenario_model, value_socio, value_model, SSE)
SSE_data <- merged_data %>%
group_by(Scenario_socio, Scenario_model) %>%
summarise(SSE = sum(SSE)) %>%
ungroup() %>%
group_by(Scenario_model) %>% slice(which.min(SSE)) %>%
ungroup()
scenario_assignment <- data.frame(Scenario = SSE_data$Scenario_model,
socio = SSE_data$Scenario_socio,
stringsAsFactors = FALSE)
} #end else; if(!SSE_variable %in% unique( model_data_list[[model_name]]$variable))
} # end if(socio_assign_method == "SSE")
output_list[[model_name]] <- scenario_assignment
} # end for(model_name in names(model_data_list))
return(output_list)
} # end function
#' Generate country-level data for each socioeconomic realization and downscaling proxy
#'
#' Takes in file paths to country-level data that is used to construct downscaling proxy datasets for any number of
#' country-level socioeconomic realizations, in all time periods. Input socioeconomic data must include all future
#' analysis years; these data are used to expand but other proxy data from a given base year to all iTEM analysis years.
#' Returns a list of downscaling proxy dataframes.
#'
#' @param pop_data_fn file name of data with population by scenario, country, year (including future iTEM years)
#' @param gdp_data_fn file name of data with GDP by scenario, country, year (including future iTEM years)
#' @param transportenergy_data_t0_fn file name of data with transportation energy consumption data by country, for a
#' base year. May include mode-level detail.
#' @param use_modal_detail logical indicating whether to determine mode-specific proxy values. If false, a single proxy
#' will be calculated for the whole transportation sector.
#' @param apply_modal_elasticities logical indicating whether to differentiate income elasticities by mode. If true,
#' exogenous assumptions are used. If false, all modes are assigned an income elasticity of 1.
#' @param save_output indicate whether to save the output of this function
#' @param create_dir indicate whether to create a new directory if the specified one does not already exist
#' @param output_dir file path where output of this function is saved
#' @details The downscaling proxy data generated by this function are assumed to apply to all models; i.e., elasticities
#' linking future GDP growth to future changes in the base-year proxy data do not differ by model.
#' @importFrom assertthat assert_that
#' @importFrom tidyr gather
#' @importFrom dplyr bind_rows mutate group_by filter select matches rename full_join left_join if_else
#' @importFrom magrittr "%>%"
#' @importFrom readr write_csv
generate_ds_proxy_data <- function(pop_data_fn = "downscale/SSP_Population.csv",
gdp_data_fn = "downscale/SSP_GDP_PPP.csv",
transportenergy_data_t0_fn = "downscale/IEA_trn_ctry.csv",
use_modal_detail = TRUE,
apply_modal_elasticities = TRUE,
save_output = TRUE, create_dir = TRUE,
output_dir = DS_OUTPUT_DIR, ...){
assert_that(is.character(pop_data_fn))
assert_that(is.character(gdp_data_fn))
assert_that(is.character(transportenergy_data_t0_fn))
assert_that(is.logical(use_modal_detail))
assert_that(is.logical(apply_modal_elasticities))
assert_that(is.logical(save_output))
assert_that(is.logical(create_dir))
assert_that(is.character(output_dir))
# For population and GDP data, we just need to re-format the tables and extract the SSP name
ds_proxy_population <- load_data_file(pop_data_fn, quiet = TRUE) %>%
gather(year, value, matches(YEAR_PATTERN)) %>%
mutate(Scenario = substr(SCENARIO, 1, 4)) %>%
mutate(year = as.numeric(year)) %>%
filter(year %in% ITEM_YEARS) %>%
mutate(iso = toupper(REGION)) %>%
mutate(ds_proxy = "POPULATION") %>%
select(ds_proxy, Scenario, iso, year, value)
ds_proxy_gdp <- load_data_file(gdp_data_fn, quiet = TRUE) %>%
gather(year, value, matches(YEAR_PATTERN)) %>%
mutate(Scenario = substr(SCENARIO, 1, 4)) %>%
mutate(year = as.numeric(year)) %>%
filter(year %in% ITEM_YEARS) %>%
mutate(iso = toupper(REGION)) %>%
mutate(ds_proxy = "GDP") %>%
select(ds_proxy, Scenario, iso, year, value)
# For transportenergy, use population growth ratios multiplied by per-capita GDP growth ratios to expand
# available (base year) data to all iTEM years. This is the same as using total GDP, but is partitioned in order
# to allow the per-capita GDP growth to be assigned mode-specific elasticities.
# Calculate the population growth ratios from the base year
pop_baseyear_data <- subset( ds_proxy_population, year == BASEYEAR) %>%
select(-year) %>%
rename(base_pop = value)
pop_growth_ratios <- ds_proxy_population %>%
rename(pop = value) %>%
left_join(pop_baseyear_data, by = c("ds_proxy", "Scenario", "iso")) %>%
mutate(pop_growth_ratio = pop/base_pop) %>%
select(Scenario, iso, year, pop_growth_ratio)
# Calculate the per-capita GDP growth ratios from the base year
pcgdp_data <- left_join(ds_proxy_gdp, ds_proxy_population,
by = c("Scenario", "iso", "year"),
suffix = c("_gdp", "_pop")) %>%
mutate(pcgdp = value_gdp / value_pop) %>%
select(Scenario, iso, year, pcgdp)
pcgdp_baseyear_data <- subset( pcgdp_data, year == BASEYEAR) %>%
select(-year) %>%
rename(base_pcgdp = pcgdp)
pcgdp_growth_ratios <- pcgdp_data %>%
left_join(pcgdp_baseyear_data, by = c("Scenario", "iso")) %>%
mutate(pcgdp_growth_ratio = pcgdp/base_pcgdp) %>%
select(Scenario, iso, year, pcgdp_growth_ratio)
# Calculate the transportation proxy data
transportenergy_data_t0 <- load_data_file(transportenergy_data_t0_fn, quiet = TRUE) %>%
prepare_transportenergy_t0_data(use_modal_detail = use_modal_detail) %>%
filter( iso %in% pop_growth_ratios$iso)
ds_proxy_transportenergy <- pop_growth_ratios %>%
left_join(pcgdp_growth_ratios, by = c("Scenario", "iso", "year")) %>%
filter( iso %in% transportenergy_data_t0$iso) %>%
full_join(transportenergy_data_t0, by = "iso") %>%
mutate(base_value = if_else(is.na(base_value), 0, base_value)) %>%
mutate(ds_proxy = "TRANSPORTENERGY") %>%
mutate(value = base_value * pop_growth_ratio * pcgdp_growth_ratio) # default assumption is elasticity of 1
if( apply_modal_elasticities){
mode_GDP_elasticity <- load_data_file( "downscale/mode_GDP_elasticity.csv", quiet = TRUE)
ds_proxy_transportenergy <- ds_proxy_transportenergy %>%
left_join( mode_GDP_elasticity, by = "Mode") %>%
mutate( value = base_value * pop_growth_ratio * pcgdp_growth_ratio ^ elasticity )
} else {
ds_proxy_transportenergy <- mutate(ds_proxy_transportenergy,
Service = "All (if no detail)",
Vehicle_type = "All (if no detail)")
}
ds_proxy_transportenergy <- ds_proxy_transportenergy %>%
select(ds_proxy, Scenario, iso, Mode, Service, Vehicle_type, year, value)
proxy_list <- list( ds_proxy_population = ds_proxy_population,
ds_proxy_gdp = ds_proxy_gdp,
ds_proxy_transportenergy = ds_proxy_transportenergy)
# If indicated, write the data out to an intermediate output directory
if(save_output){
# Create the output directory, if indicated
if(create_dir) dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)
for(proxy_table in names(proxy_list)){
write_csv(proxy_list[[proxy_table]],
file = paste0(output_dir, "/",names(proxy_list[proxy_table]), ".csv"))
}
}
return(proxy_list)
}
#' Get the country-to-region mappings for the project and optionally for a set of models
#'
#' Returns a list of data frames, each named according to the data source/model, each of which has two columns:
#' iso and region.
#'
#' @param model_data_folder folder where the pre-processed model output data is located. If NA, this will return a
#' list with a single internal (included with the package) country-to-region mapping table for iTEM project regions.
#' @param item_map_name the name of the dataframe with the iTEM project country-to-region mapping
#' @details This function is designed to return country-to-region mapping assignments as required by other functions in
#' the package. If a model data folder is provided, the function requires that the mapping lists be within this folder
#' as model/regions.yaml. If no such folder is provided, then the returned list will only have the internal
#' (i.e., this project's) country-to-region mapping.
#' @importFrom assertthat assert_that
#' @importFrom yaml yaml.load_file
#' @importFrom dplyr rename
#' @importFrom magrittr "%>%"
#' @importFrom data.table rbindlist
#' @export
load_country_region_mappings <- function(model_data_folder = NA_character_, model_names = NA_character_, item_map_name = "item", ...){
assert_that(is.character(model_data_folder))
assert_that(is.character(model_names))
assert_that(is.character(item_map_name))
country_region_maps <- list()
item_region_assignment <- load_data_file("downscale/regions.yaml", quiet = TRUE) %>%
# Strip any other meta-info and transform the yaml list into a dataframe of regions and countries
lapply(function(x){x <- x['countries']}) %>%
rbindlist(idcol = "Region") %>%
rename(iso = countries)
country_region_maps[[item_map_name]] <- item_region_assignment
# If a folder with model-specific mappings is provided, add each model to the list
# Otherwise, just return the iTEM country-to-region assignments
if(!is.na(model_data_folder)){
assert_that(is.character(model_data_folder))
# If model_names were passed from another function, use that. Otherwise, retrieve them from the folder names
# By default, this will take all folders, excluding several (item, template) that may exist but are not models
if(is.null(model_names)){
model_names <- list.dirs(model_data_folder, full.names = FALSE, recursive = FALSE)
model_names <- model_names[!model_names %in% c("item", "template")]
}
for(model_name in model_names){
fqfn <- paste0(model_data_folder, "/", model_name, "/metadata/regions.yaml")
model_region_assignment <- yaml.load_file(fqfn) %>%
lapply(function(x){x <- x['countries']}) %>%
rbindlist(idcol = "Region") %>%
rename(iso = countries)
country_region_maps[[model_name]] <- model_region_assignment
} #end for(model_name in model_names)
} # end if(!is.na(model_data_folder))
return(country_region_maps)
} # end function
#' Generate base-year transportation dataset from which to construct the transport energy downscaling proxy
#'
#' Perform some pre-processing of the base-year transportation data that will be expanded to all years and used as a
#' downscaling proxy in \code{\link{generate_ds_proxy_data}}.
#'
#' @param datafile data table with transportation energy consumption
#' @param mode_mapping_file mapping from mode in the transportation energy data to the iTEM modes
#' @details The transportation proxy data has modes which may or may not be used, and if they are used, they are
#' somewhat different from the iTEM modes. This function performs the modal pre-processing to construct a useful
#' base-year dataset which can be expanded to all iTEM years for each socioeconomic realization.
#' @importFrom assertthat assert_that
#' @importFrom tidyr complete
#' @importFrom dplyr full_join group_by summarise select mutate rename ungroup
#' @importFrom magrittr "%>%"
prepare_transportenergy_t0_data <- function( country_data,
use_modal_detail = TRUE,
mode_mapping_fn = "downscale/map_mode_IEA.csv", ...){
assert_that(is.data.frame(country_data))
assert_that(is.logical(use_modal_detail))
assert_that(is.character(mode_mapping_fn))
# load and pre-process the country-level data
country_data <- country_data %>%
mutate(iso = toupper(iso)) %>%
rename(base_value = value)
# Aggregate all modes, for downscaling data from models only reporting mode "All"
country_data_allmodes <- country_data %>%
group_by(iso) %>%
summarise(base_value = sum(base_value)) %>%
ungroup() %>%
mutate(Mode = "All (if no detail)") %>%
select(iso, Mode, base_value)
#If no modal detail is requested, this is the final output of this function
if(!use_modal_detail){
return(country_data_allmodes)
} else {
# load the mode mapping file
mode_mapping <- load_data_file(mode_mapping_fn, quiet = TRUE)
country_data <- country_data %>%
# combine international and domestic air which are not separated in iTEM
mutate(FLOW = if_else(FLOW %in% c("AVBUNK", "DOMESAIR"), "AIR", FLOW),
FLOW = if_else(FLOW == "DOMESNAV", "MARBUNK", FLOW)) %>%
select(iso, FLOW, base_value) %>%
group_by(iso, FLOW) %>%
summarise(base_value = sum(base_value)) %>%
ungroup() %>%
left_join(mode_mapping, by = "FLOW") %>%
select(iso, Mode, base_value) %>%
# Fill this out to all combinations of country x mode
complete(iso, Mode = unique(Mode), fill = list(base_value = 0)) %>%
bind_rows(country_data_allmodes)
return(country_data)
}
}
#' Aggregate across selected categories of quantity flow data, looping through all permutations
#'
#' Inputs are a data frame with model data, and a character vector of ID variables to loop through. All possible
#' permutations of the variables provided are returned in the output, with no redundant categories written out.
#' Returns the intial data frame with permutations appended.
#'
#' @param input_data data table with quantity (flow) variables
#' @param collapse_vars character string indicating the variables (columns) whose values will be collapsed (aggregated)
#' to "All"
#' @details The method implemented here assumes that all itemized elements of each variable to add to the total. Any
#' excluded elements will not be part of the reported total, and redundant categories (e.g., gasoline,
#' total liquid fuels) will be double counted.
#' @importFrom assertthat assert_that
#' @importFrom dplyr distinct group_by_at summarise ungroup bind_rows
#' @importFrom magrittr "%>%"
#' @export
aggregate_all_permutations <- function(input_data,
collapse_vars = ITEM_IDVARS_WITH_ALLS,
mode_service_fn = "downscale/aggregations_serv_mode_veh.csv",
tech_fuel_fn = "downscale/aggregations_tech_fuel_liq.csv", ...){
assert_that(is.data.frame(input_data))
assert_that(is.character(collapse_vars))
assert_that(is.character(mode_service_fn))
assert_that(is.character(tech_fuel_fn))
output_data <- input_data
output_data[output_data == "All (if no detail)"] <- "All"
group_columns <- names(output_data)[ names(output_data) != "value"]
# If exogenous aggregation mappings are read in, load them and revise the list of variables to be collapsed individually
if(!is.na(mode_service_fn)){
mode_service_aggregations <- load_data_file(mode_service_fn, quiet = TRUE)
serv_mode_joinvars <- names(output_data[names(output_data) %in% names(mode_service_aggregations)])
collapse_vars <- collapse_vars[!collapse_vars %in% names(mode_service_aggregations)]
output_aggregated_serv <- output_data %>%
inner_join(mode_service_aggregations, by = serv_mode_joinvars) %>%
select(-serv_mode_joinvars)
names(output_aggregated_serv) <- gsub("rev_", "", names(output_aggregated_serv))
output_aggregated_serv <- group_by_at(output_aggregated_serv,
group_columns) %>%
summarise(value = sum(value)) %>%
ungroup()
output_data <- bind_rows(output_data, output_aggregated_serv)
}
if(!is.na(tech_fuel_fn)){
tech_fuel_aggregations <- load_data_file(tech_fuel_fn, quiet = TRUE)
tech_fuel_joinvars <- names(output_data[names(output_data) %in% names(tech_fuel_aggregations)])
collapse_vars <- collapse_vars[!collapse_vars %in% names(tech_fuel_aggregations)]
output_aggregated_tech <- output_data %>%
inner_join(tech_fuel_aggregations, by = tech_fuel_joinvars) %>%
select(-tech_fuel_joinvars)
names(output_aggregated_tech) <- gsub("rev_", "", names(output_aggregated_tech))
output_aggregated_tech <- group_by_at(output_aggregated_tech,
group_columns) %>%
summarise(value = sum(value)) %>%
ungroup()
# Some of the model-provided categories may be dropped at this stage
tech_fuel_to_drop <- filter(tech_fuel_aggregations, Drop_orig) %>%
select(tech_fuel_joinvars) %>%
distinct()
output_data <- anti_join(output_data, tech_fuel_to_drop, by = tech_fuel_joinvars) %>%
bind_rows(output_aggregated_tech)
}
# Perform the individually aggregated (collapsed) variables
if(length(collapse_vars) > 0){
for(var in collapse_vars){
print(paste0("Collapsing ", var, " to All and appending"))
# Where this variable is already "All", exclude from the aggregation (would result in double counting)
assert_that(!is.null(output_data[[var]]))
output_thisvar <- output_data[output_data[[var]] != "All" & !is.na(output_data[[var]]),]
# Exclude missing values
if(nrow(output_thisvar) > 0){
output_thisvar[[var]] <- "All"
output_thisvar <- output_thisvar %>%
group_by_at(group_columns) %>%
summarise(value = sum(value)) %>%
ungroup()
output_data <- bind_rows(output_data, output_thisvar)
} # end if(nrow(output_thisvar) > 0)
# the output of each loop is passed to the input of the next loop
} # end for(var in collapse_vars)
} # end if length(collase_vars) > 0
# Where models reported incomplete cuts through a database, rows may be generated with duplicate ID information and
# potentially different data values. Differences may be due to aggregated rounding errors, or data that is simply
# unreported for selected variables at a given level of aggregation. This step drops redundant rows, and assumes that
# where all ID information is identical, the higher value is the correct one to report.
output_data <- group_by_at(output_data, group_columns) %>%
summarise(value = max(value)) %>%
ungroup()
return(output_data)
}
#' Get internal variable mapping assignments (with option to pull an external file instead)
#'
#' @param mapping_fn file path and name of variable mapping
#' @details Takes in a csv table with variables and associated categories. Defaults to the one provided with the item
#' package, but can be set to a different one.
#' @importFrom assertthat assert_that
get_variable_mapping <- function(mapping_fn = "downscale/variable_ds_proxy.csv", ...){
assert_that(is.character(mapping_fn))
variable_mapping <- load_data_file(mapping_fn, quiet = TRUE)
return(variable_mapping)
}
#' Remove redundant "All" values from selected variables (columns)
#'
#' An "All" value for an ID column may be the only data available, or it may be derived from the sum of available
#' components. Except for diagnostic purposes, "All" and its sub-components should never be downscaled independently due
#' to potentially different proxies. Instead, where component-level data are available, "All" should be computed
#' post-hoc as the sum of the components. This function drops redundant "All" values (i.e., values that could be
#' computed from provided components).
#'
#' Note that the method assumes that if any components are provided for a given variable that has "All" reported, then
#' all necessary components to re-calcualate the reported "All" value are provided. However this condition is not
#' checked numerically, and the method will result in a loss of data if a model provided a category called "All", and
#' some but not all of its sub-components.
#'
#' @param data data frame with model-reported quantity variables
#' @param variables data frame with model-reported quantity variables
#' @details
#' @importFrom assertthat assert_that
#' @importFrom magrittr "%>%"
#' @importFrom dplyr anti_join filter
remove_redundant_alls <- function(data, variables = ITEM_IDVARS_WITH_ALLS, ...){
assert_that(is.data.frame(data))
assert_that(is.character(variables))
for(idvar in variables){
other_idvars <- ITEM_ID_COLUMNS[which(ITEM_ID_COLUMNS != idvar)] #"by" variables for anti-join
data_na <- data[is.na(data[[idvar]]),] # Keep all observations with NA for this variable
data_no_alls <- data[data[[idvar]] != "All" & data[[idvar]] != "All (if no detail)" & !is.na(data[[idvar]]),]
data_alls <- data[ data[[idvar]] %in% c("All", "All (if no detail)") & !is.na(data[[idvar]]), ] %>%
anti_join(data_no_alls, by = other_idvars) # Only keep observations with "All" where the sub-categories are not available
data <- bind_rows(data_na, data_no_alls, data_alls)
}
return(data)
}
#' Extract global data from model-specific data list to a data frame.
#'
#' Simple wrapper function to extract global data (region == "All") from list of model output data. Returns global-only
#' data if executed after (\code{\link{remove_redundant_alls}}), as called from
#' (\code{\link{prepare_preprocessed_data}})
#'
#' @param model_data list of model-specific data frames from which to extract the global data
#' @param input_region_col name of the region column in the input data
#' @param output_region_col name of the region column in the output (return) data
#' @details Modeling teams may provide some data at the global level only, i.e., with no regional detail available. Such
#' data is neither downscaled to countries/regions nor dropped. This function generates a data frame with global-only
#' data from all models that can be binded to other data frames with regional detail.
#' @importFrom assertthat assert_that
#' @importFrom dplyr bind_rows
#' @export
extract_global_data_to_df <- function(model_data, input_region_col = "Region",
output_region_col = "Region", ...){
assert_that(is.list(model_data))
assert_that(is.character(input_region_col))
assert_that(is.character(output_region_col))
global_data <- do.call(
bind_rows,
lapply(model_data, function(x){
x <- x[x[[input_region_col]] == "All", ]
})
)
names(global_data)[ names(global_data) == input_region_col] <- output_region_col
return(global_data)
}
#' Aggregate country-level quantity flow data to iTEM analysis regions
#'
#' This function takes in a data frame with quantity flow data by ISO code and a mapping from ISO code to iTEM region.
#' It joins in the appropriate iTEM region for each country ISO code, aggregates by these regions, and returns a
#' dataframe.
#'
#' @param downscaled_data data frame with downscaled (i.e. country-level) data. Can also include global totals, assigned
#' to iso code "All".
#' @param item_region_map data table mapping from 3-digit ISO code to iTEM region, called from
#' (\code{\link{load_country_region_mappings}})
#' @param compute_global_totals logical indicating whether to compute the global totals. The default is FALSE, as it is
#' assumed that this step has been conducted under (\code{\link{aggregate_all_permutations}}).
#' @details The mapping from ISO code to iTEM region is user-specified with a provided default.
#' @importFrom assertthat assert_that
#' @importFrom magrittr "%>%"
#' @importFrom dplyr left_join group_by_at summarise ungroup
#' @export
aggregate_item_regions <- function(downscaled_data,
item_region_map = load_country_region_mappings()[['item']],
compute_global_totals = FALSE, ...){
assert_that(is.data.frame(downscaled_data))
assert_that(is.data.frame(item_region_map))
assert_that(is.logical(compute_global_totals))
global_data <- downscaled_data %>%
filter(iso == "All") %>%
rename(Region = iso)
region_data <- downscaled_data %>%
filter(iso != "All") %>%
left_join(item_region_map, by = "iso") %>%
group_by_at(ITEM_ID_COLUMNS) %>%
summarise(value = sum(value)) %>%
ungroup() %>%
bind_rows(global_data)
if(compute_global_totals){
region_data <- aggregate_all_permutations(region_data, collapse_vars = "Region")
}
return(region_data)
}
#' Derive new variables from existing variables in a dataset
#'
#' This function takes in a dataframe with model data, and a variable derivation CSV table with
#' instructions detailing what the new variables will be named, how they will be calculated, and what
#' their units will be.
#'
#' @param model_data_df data frame with pre-processed/corrected iTEM data in the appropriate format
#' @param mapping dataframe specifying the variables to be created, along with instructions
#' for creating them and the units of the new variable
#' @param drop_weighted_indicators logical (default = FALSE) indicating whether to drop "weighted indicator" variables
#' whose purpose is to allow computation of indicator variables, generally after some aggregations or re-assignments
#' have been performed. Weighted indicators are generally excluded from final template reporting.
#' @param bind_derived_data logical (default = TRUE) indicating whether to return a data frame with the original model
#' data bound to the output of this function (TRUE), or to only return the variables calculated by this function
#' (FALSE)
#' @details This function derives new variables from existing variables in the data frame. Each new variable is derived
#' from two and only two existing variables. The operations allowed are +, *, and /. Unit conversions are applied
#' multiplicatively. If the operation is addition and there is a unit conversion provided, the conversion is applied
#' to the second variable in the derivation.
#' @importFrom assertthat assert_that
#' @importFrom magrittr %>%
#' @importFrom dplyr inner_join bind_rows mutate
#' @export
derive_variables <- function(model_data_df,
mapping = get_variable_mapping("downscale/indicators.csv"),
drop_weighted_indicators = FALSE,
bind_derived_data = TRUE, ...){
assert_that(is.data.frame(model_data_df))
assert_that(is.data.frame(mapping))
assert_that(is.logical(bind_derived_data))
output_data_cols <- names(model_data_df)
derived_data <- list()
for(i in 1:nrow(mapping)){
# Select only ID columns from the "var2" data frame that are relevant for the join
join_byvars <- unlist(strsplit(mapping$join_byvars[i], ","))
model_data_var1 <- subset( model_data_df, Variable == mapping$var1[i])
model_data_var2 <- subset(model_data_df, Variable == mapping$var2[i]) %>%
select(c(join_byvars, "value"))
# The operations allowed are +, *, and /
assert_that(mapping$operation[i] %in% c("+", "*", "/"))
# Join the var1 and var2 data. The rest of the operations are all conditional
derived_data[[i]] <- inner_join(model_data_var1, model_data_var2,
by = join_byvars,
suffix = c("_var1", "_var2")) %>%
mutate(Variable = mapping$newvar[i],
Unit = mapping$unit[i])
# If the necessary data to derive the new variable are not available, generate an empty value column
if (nrow(derived_data[[i]]) == 0) {
derived_data[[i]]$value <- as.numeric(character(0))
} else {
print(paste0("Deriving new variable: ", mapping$newvar[i]))
derived_data[[i]] <-
subset(derived_data[[i]],!is.na(value_var1) & !is.na(value_var2))
if (mapping$operation[i] == "+") {
if (is.na(mapping$unit_conversion[i])) {
derived_data[[i]] <-
derived_data[[i]] %>% mutate(value = value_var1 + value_var2)
} else {
derived_data[[i]] <-
derived_data[[i]] %>% mutate(value = (value_var1 + value_var2) * mapping$unit_conversion[i])
}
} else if (mapping$operation[i] == "*") {
derived_data[[i]] <-
derived_data[[i]] %>% mutate(value = value_var1 * value_var2 * mapping$unit_conversion[i])
} else if (mapping$operation[i] == "/") {
derived_data[[i]] <-
derived_data[[i]] %>% mutate(value = value_var1 / value_var2 * mapping$unit_conversion[i])
}
} # end if (nrow(derived_data[[i]]) == 0) else
derived_data[[i]] <- derived_data[[i]][output_data_cols]
} # end for i in 1:nrow(mapping)
output_data <- do.call(bind_rows, derived_data)
if(bind_derived_data){
output_data <- bind_rows(model_data_df, output_data)
if(drop_weighted_indicators){
print("Note: Dropping weighted indicator variables that are not reported in the final product")
weighted_indicator_variables <- get_variable_mapping("downscale/weighted_indicators.csv") %>%
pull(newvar)
output_data <- filter(output_data, !Variable %in% weighted_indicator_variables)
}
}
return(output_data)
}
#' Perform iTEM data processing, producing region- and/or country-level database(s) from submitted model output
#'
#' This is a wrapper that performs all steps for inter-model harmonization in iTEM: data loading, pre-processing,
#' determination of nation-level socioeconomic scenarios for downscaling, downscaling, computation of "All" subtotals,
#' re-aggregation (if requested), deriving new variables from the existing set, and saving the output.
#'
#' @param model_data_folder folder where the pre-processed model output data is located
#' @param output_folder folder where the harmonized output dataset will be saved
#' @param write_item_region_data logical (default = TRUE) indicating whether to write out a dataset aggregated to item
#' reporting regions. If FALSE, no iTEM region level data will be returned.
#' @param write_item_country_data logical (default = FALSE) indicating whether to write out a dataset with all variables
#' reported at the country level. Warning: the dataset may be quite large.
#' @param return_output logical (default = FALSE) indicating whether to return an object in the R environment
#' @param spread_by_years logical (default = TRUE) indicating whether to "spread" the output so that the reporting years
#' are columns and values are listed within the year columns
#' @details This function applies nearly all other item downscaling functions. While it only requires a few arguments,
#' any additional arguments provided will be passed to other functions. The list of functions called is provided here:
#' \code{\link{load_preprocessed_data}}, \code{\link{prepare_preprocessed_data}},
#' \code{\link{load_country_region_mappings}}, \code{\link{compute_country_shares}},
#' \code{\link{assign_socioeconomic_scenario}}, \code{\link{downscale_flow_variables}},
#' \code{\link{extract_global_data_to_df}}, \code{\link{aggregate_all_permutations}},
#' \code{\link{aggregate_item_regions}}, \code{\link{derive_variables}}, \code{\link{save_output}}
#' @importFrom assertthat assert_that
#' @importFrom magrittr %>%
#' @importFrom dplyr bind_rows
#' @importFrom tidyr spread
#' @export
perform_item_data_processing <- function(model_data_folder,
model_names,
output_folder,
non_global_models = NA_character_,
write_item_region_data = TRUE,
write_item_country_data = FALSE,
return_output = FALSE,
spread_by_years = TRUE, ...){
assert_that(is.character(model_data_folder))
assert_that(is.character(model_names))
assert_that(is.character(output_folder))
assert_that(is.character(non_global_models))
assert_that(is.logical(write_item_region_data))
assert_that(is.logical(write_item_country_data))
assert_that(is.logical(return_output))
assert_that(is.logical(spread_by_years))
model_data <- load_preprocessed_data(model_data_folder, model_names) %>%
prepare_preprocessed_data(...)
country_region_maps <- load_country_region_mappings(model_data_folder, model_names, ...)
country_share <- compute_country_shares(model_data, country_region_maps, ...)
model_socio <- assign_socioeconomic_scenario(model_data, country_region_maps, ...)
downscaled_data <- downscale_flow_variables(model_data, model_socio, country_share, ...) %>%
bind_rows(extract_global_data_to_df(model_data, output_region_col = "iso", ...))
output_list <- list()
if(write_item_region_data){
item_region_data <- aggregate_item_regions(downscaled_data) %>%
aggregate_all_permutations(...) %>%
derive_variables(drop_weighted_indicators = TRUE, ...) %>%
mutate(Model = tolower(Model),
Region = if_else(Region == "All", "Global", Region),
value = signif(value, SIGNIFICANT_FIGURES)) %>%
filter(!(Model %in% non_global_models & Region == "Global"))
if(spread_by_years) item_region_data <- spread(item_region_data, key = year, value = value)
print("Generating database at the level of iTEM regions")
if(return_output) output_list[['item_region_data']] <- item_region_data
save_output(item_region_data, output_folder, ...)
}
if(write_item_country_data){
downscaled_data <- rename(downscaled_data, Region = iso) %>%
aggregate_all_permutations(...)
item_country_data <- derive_variables(downscaled_data, drop_weighted_indicators = TRUE, ...) %>%
mutate(Region = if_else(Region == "All", "Global", Region),
value = signif(value, SIGNIFICANT_FIGURES))
print(paste0("Generating database at the level of ",
length(unique(item_country_data$Region)), " countries"))
if(spread_by_years) item_country_data <- spread(item_country_data, key = year, value = value)
if(return_output) output_list[['item_country_data']] <- item_country_data
save_output(item_country_data, output_folder, ...)
}
if(return_output) return(output_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.