# GCAMFigsBase() ----------------------------------------------------------
#' Constructs figure generating objects
#'
#' An rgcam project file is restructured so that queries are the top-layer of a list,
#' with entry holding query data for all scenarios.
#'
#' Any queries that have a 'GHG' column are separated by GHG. See breakoutGHGs() for details.
#'
#' The processed project data is assigned class according to the queries it holds. See
#' system.file("classes.csv") for what queries compose each class. Currently throws an error
#' if the project data is assigned more than 1 query-class because this would disrupt
#' method dispatch.
#'
#' The final step is to calculate whatever additional queries are possible given those
#' already contained in project data. For example, in the land class, if queries
#' "Land Allocation" and "Ag Production by Crop Type" are present, "Average Yield" will be
#' calculated automatically. See calculateQueries() in script calculations.R for
#' the queries that can be calculated for each class.
#'
#' @param proj rgcam project, a list of scenarios
#' @param transf list of functions, each named for query it restructures
#' @return list of dataframes
#' @export
GCAMFigs <- function(proj, transf) {
# check for missing input
if (missing(proj)) {
error("Need to provide rgcam project file")
}
if (missing(transf)) {
error("Need to provide list of transformation functions")
}
# grab scenarios and queries from project file
scenarios <- names(proj)
queries <- names(proj[[1]]) # TODO: check we have queries for all scenarios
# pull queries to top-level of list. transformation functions applied to queries
list.queries <- dataRestructure(proj, transf, scenarios, queries)
# make groups of GHG's into own queries
list.queries <- breakoutGHGs(list.queries)
# assign classes according to queries in project data
# each column holds queries for an individual class
lookup <- system.file("classes.csv", package="ValidationFigures") %>%
read.csv()
# assign the class of each column in lookup whose queries appear in project data
for (qclass in names(lookup)) {
if( any( queries %in% lookup[[qclass]])) {
class(list.queries) <- c(qclass, class(list.queries))
}
}
# class(list.queries) contains "list" and all query-classes identified by lookup
# throw error if more than one query-class identified by lookup
# TODO: allow multiple query-classes ->
# execute all relevent methods of calculateQueries()
# execute all relevent methods of barchart()
if ( length(class(list.queries)) > 2 ) {
stop("Queries in project file belong to multiple classes! Check system.file('classes.csv')")
}
# add calcualted queries to list
print("Calculating queries...")
list.queries <- calculateQueries(list.queries)
print("Done!")
# return final figure generation object
list.queries
}
# dataRestructure() -------------------------------------------------------
#' Pulls queries to top level of project data
#'
#' Used in constructor() to initialize figure generation object. Native rgcam project data
#' is structured a list of scenarios, where each scenario is a list of queries. Project data
#' needs to be structured as a list of queries, where each query is a single dataframe. Query
#' data for all scenarios needs to be saved as a single data.frame in order to produce plots
#' that compare scenarios.
#'
#' User-provided transformation functions can aggregate over unnecessary columns, reformat
#' columns (eg: split on "_"), or map the keys in a column to a different set of keys with a
#' mapping file.
#' @param proj rgcam project, a list of scenarios
#' @param transf list of functions, each named for query it restructures
#' @param scenarios character vector, scenarios in project data
#' @param queries character vector, queries in project data
#' @return list of dataframes
dataRestructure <- function(proj, transf, scenarios, queries) {
# function to make top level a list of queries, subsuming scenarios into the entries (df's)
query.scens <- function(query, scenarios, proj, transf) {
q <- lapply(scenarios, function(scenario, proj, transf) {
print(paste0("...", query, ", ", scenario))
transf[[query]](proj[[scenario]][[query]])
}, proj, transf) %>%
bind_rows()
}
# call query.scens to pull queries to top-level of data
print("Pulling queries...")
list.queries <- lapply(queries, function(query) {
list.scenarios <- lapply(scenarios, function(scenario) {
# print log if scenario doesn't hold query
if (! query %in% names(proj[[scenario]])) {
print(paste0("...", query, " not found in ", scenario))
return(NULL)
# else returned transformed query data for scenario
} else {
print(paste0("...", query, ", ", scenario))
return(transf[[query]](proj[[scenario]][[query]]))
}
})
# bind all scenarios for same query
return(bind_rows(list.scenarios))
}) # return list of queries
print("Queries pulled!")
# named list of queries
names(list.queries) <- queries
list.queries
}
# breakoutGHGs() ----------------------------------------------------------
#' Reformats emissions queries to be grouped by GHG
#'
#' Some pollutant species appear as three versions ("CH4", "CH4_AGR", "CH4_AWB"). This data would originally
#' appear under a query such as "GHG emissions by region". This function reformats list.queries into
#' separate emissions queries for each GHG. "GHG emissions by region (CH4)" would be its own entry
#' in list. queries, while still distinguishing between the three species under the GHG column of that
#' query's dataframe.
#'
#' SO2 appears to have 4 variants: SO2_1, SO2_1_AWB, SO2_2, ..., SO2_4_AWB. These 8 species are all
#' contained in the "GHG emissions by region (SO2)" query after this function is applied to
#' list.queries.
#'
#' @param list.queries list of queries, each query a single data.frame
#' @return list of dataframes
breakoutGHGs <- function(list.queries) {
# grab list of queries
queries <- names(list.queries)
# find queries that have a "GHG" column
for (query in queries) {
# grab query entry
df <- list.queries[[query]]
# look for ghg col in query entry
if ("ghg" %in% names(df)) {
# replace single emissions query with multiple emissions queries, separated by GHG
list.queries[[query]] <- NULL
# get set of unique 'base' ghgs (CH4 would be the base for CH4, CH4_AWB, and CH4_AGR)
ghgs <- unique(df$ghg)
ghgs.base <- str_split(ghgs, "_", simplify=TRUE)[,1] %>%
unique()
# populate list.queries with emissions broken out by ghg.base
for (ghg.base in ghgs.base) {
# log message showing all ghgs associated w/ ghg.base
print(paste0(ghg.base, ": ", paste0(str_subset(unique(df$ghg), ghg.base), collapse=", ")))
# grab rows that pertain to ghg.base
df.filt <- filter(df, str_detect(ghg, ghg.base))
# construct new emissions query title
new_query <- paste0(query, " (", ghg.base, ")")
# drop filtered emissions query into new emissions query title
list.queries[[new_query]] <- df.filt
} # end for(ghgs.base) loop
# log message
print(paste0("Regrouped GHG's in ", query))
} # end if(ghg) statement
} # end for(queries) loop
# return project data
list.queries
}
# End ---------------------------------------------------------------------
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.