R/calculations.R

# calculateQueries() ------------------------------------------------------


#' Calculate additional queries
#'
#' This is the generic that dispatches different methods (below) according to class of list.q
#'
#' @param list.q list of queries, each query a single data.frame
#' @return list of dataframes
calculateQueries <- function(list.q) {
  UseMethod("calculateQueries")
}

# calculateQueries.electricity() ------------------------------------------
#' Additional queries calculated for class=electricity
#'
#' None
#' @param list.q list of queries, each query a single data.frame
#' @return list of dataframes
calculateQueries.electricity <- function(list.q) {
  print("No electricity queries to calculate!")
  list.q
}

# calculateQueries.socioeconomics() ------------------------------------------
#' Additional queries calculated for class=socioeconomics
#'
#' None
#' @param list.q list of queries, each query a single data.frame
#' @return list of dataframes
calculateQueries.socioeconomics <- function(list.q) {
  print("No socioeconomics queries to calculate!")
  list.q
}

# calculateQueries.water() ------------------------------------------------
#' Additional queries calculated for class=water
#'
#' Calculated Query ~ Input Query/Queries
#' Electricity Cooling Technology Shares ~ Withdrawals by sector: Electricity Total
#'
#' @param list.q list of queries, each query a single data.frame
#' @return list of dataframes
calculateQueries.water <- function(list.q) {

  # calculate shares of total water used to cool electricity generating technologies
  if ("Withdrawals by sector: Electricity Total" %in% names(list.q)) {
    print("...Electricity Cooling Technology Shares")

    # grab query data
    df <- list.q[["Withdrawals by sector: Electricity Total"]]

    # calculate total water withdrawn
    tot <- df %>%
      group_by(Units, scenario, forcing, branch, year, region) %>%
      summarise(total = sum(value)) %>%
      ungroup()

    # share = water withdrawn for 1 technology / total water withdrawn
    shr <- df %>%
      left_join(tot,
                by = c("Units", "scenario", "forcing", "branch", "year", "region")) %>%
      mutate(share = value / total) %>%
      select(-value,-total) %>% # drop original value and calculated total
      dplyr::rename(value = share) %>% # use calculated share as this query's reported value
      mutate(Units = "share")

    list.q[["Electricity Cooling Technology Shares"]] <- shr
  } else {
    print("No water queries to calculate!")
  }
  list.q
}

# calculateQueries.land() -------------------------------------------------

#' Additional queries calculated for class=land
#'
#' Calculated Query ~ Input Query/Queries
#'
#' Water Management Shares ~ Land Allocation
#' Fertilizer Tech Shares ~ Land Allocation
#' Average Yield ~ Land Allocation, Ag Production by Crop Type
#'
#' @param list.q list of queries, each query a single data.frame
#' @return list of dataframes
calculateQueries.land <- function(list.q) {

  # fertilizer & water tech shares of total land area
  if ("Land Allocation" %in% names(list.q)) {

    # grab query data
    land <- list.q[["Land Allocation"]]

    # water management type has its own column, but contains multiple keys (RFD/IRR)
    # we use calcShare(key, column, df) to calculate the share of total land area
    # that employs each water management type (identified by key)
    print("...Water Management Shares")
    keys <- unique(land$water)[unique(land$water) != ""]
    list.q[["Water Management Shares"]] <- keys %>% # we want to loop over each non-empty key value
      lapply(calcShare, water, land) %>% # water identifies the column that contains our set of keys
      bind_rows()

    # fertilizer tech has its own column, but contains multiple keys (hi/lo)
    # we use calcShare(key, column, df) to calculate the share of total land area
    # that utilizes each fertilizer tech (identified by key)
    print("...Fertilizer Tech Shares")
    keys <- unique(land$fertilizer)[unique(land$fertilizer) != ""]

    # drop new query into project data
    list.q[["Fertilizer Tech Shares"]] <- keys %>% # we want to loop over each non-empty key value
      lapply(calcShare, fertilizer, land) %>% # land identifies the column that contains our set of keys
      bind_rows() # unlist and rbind output of lapply()
  }

  # average crop production per unit land area
  if ("Land Allocation" %in% names(list.q) & "Ag Production by Crop Type" %in% names(list.q)) {
    print("...Average Yield")

    # grab query data
    land <- list.q[["Land Allocation"]]
    output <- list.q[["Ag Production by Crop Type"]]

    # area by landtype
    land <- land %>%
      group_by(Units, scenario, forcing, branch, year, region, land_type) %>%
      summarise(area = sum(value)) %>%
      ungroup()
    # crop production by landtype
    output <- output %>%
      group_by(Units, scenario, forcing, branch, year, region, land_type) %>%
      summarise(prod = sum(value)) %>%
      ungroup()
    # crop production / land area
    avgYield <- output %>%
      left_join(land, by=c( "scenario", "forcing", "branch", "year", "region", "land_type")) %>%
      mutate(value = prod/area,
             Units = paste0(Units.x, "/", Units.y)) %>%
      select(-prod, -area, -Units.y, -Units.x)

    # drop new query into project data
    list.q[["Average Yield"]] <- avgYield

  } else {
    print("No land queries to calculate!")
  }
  list.q
}

# calculateQueries.refining() ---------------------------------------------

#' Additional queries calculated for class=refining
#'
#' Calculated Query ~ Input Query/Queries
#'
#' Refining Tech Shares ~ Regional oil production by fuel
#' Refined liquids production by technology ~ Refining Tech Shares, Refined liquids production by region
#' Refined liquids use by technology and aggregate end-use ~ Refining Tech Shares, Refined liquids use by aggregate end-use
#'
#' @param list.q list of queries, each query a single data.frame
#' @return list of dataframes
calculateQueries.refining <- function(list.q) {

  # refining technology shares of total oil production
  if("Regional oil production by fuel" %in% names(list.q)) {
    print("...Oil Type Shares")

    # grab query data
    refining <- list.q[["Regional oil production by fuel"]]

    # refining technology has its own column, but contains multiple keys (crude vs unconventional oil)
    # we use calcShare(key, column, df) to calculate the share of total land area
    # that employs each water management type (identified by key)
    keys <- unique(refining$technology)[unique(refining$technology) != ""]

    # drop new query into project data
    list.q[["Oil Type Shares"]] <- keys %>% # we want to loop over each non-empty key value
      lapply(calcShare, technology, refining) %>% # technology identifies the column that contains our set of keys
      bind_rows() # unlist and rbind output of lapply()
  }

  # factorize oil production by refining technology using refining technology shares (calculated above)
  if ("Oil Type Shares" %in% names(list.q) & "Refined liquids production by region" %in% names(list.q)) {
    print("...Refined liquids production by fuel")

    # grab query data
    shares <- list.q[["Oil Type Shares"]] %>%
      rename(share = value) %>%
      select(-Units) # when production data is multiplied by shares, the units stay the same
    production <- list.q[["Refined liquids production by region"]]

    # shares has the base set of columns used in the merge below, in addition to technology, which
    # contains the keys that shares are calculated for.
    # when joined with shares, each row is duplicated for each technology in shares (2).
    # the value is duplicated as well, but then proportioned by the share. This way, the duplicated
    # rows sum to the original row's value.
    production.factorized <- production %>%
      left_join(shares, by=c("scenario", "forcing", "branch", "year", "region")) %>%
      mutate(value = value * share) %>%
      select(-share)

    # drop new query into project data
    list.q[["Refined liquids production by fuel"]] <- production.factorized

  }

  # factorize oil use by end-sector using refining technology shares (calculated above)
  if ("Oil Type Shares" %in% names(list.q) & "Refined liquids use by aggregate end-use" %in% names(list.q)) {
    print("...Refined liquids use by fuel and aggregate end-use")

    # grab query data
    shares <- list.q[["Oil Type Shares"]] %>%
      rename(share=value) %>%
      select(-Units) # when enduse data is multiplied by shares, the units stay the same
    enduse <- list.q[["Refined liquids use by aggregate end-use"]]

    # shares has the base set of columns used in the merge below, in addition to technology, which
    # contains the keys that shares are calculated for.
    # when joined with shares, each row is duplicated for each technology in shares (2).
    # the value is duplicated as well, but then proportioned by the share. This way, the duplicated
    # rows sum to the original row's value.
    enduse.factorized <- enduse %>%
      left_join(shares, by=c("scenario", "forcing", "branch", "year", "region")) %>%
      mutate(value = value * share) %>%
      select(-share)

    # drop new query into project data
    list.q[["Refined liquids use by fuel and aggregate end-use"]] <- enduse.factorized
  }
  list.q
}

# calculateQueries.emissions() --------------------------------------------
#' Additional queries calculated for class=emissions
#'
#' None
#' @param list.q list of queries, each query a single data.frame
#' @return list of dataframes
calculateQueries.emissions <- function(list.q) {
  print("No emissions queries to calculate!")
  list.q
}


# calcShare() -------------------------------------------------------------

#' Share Calculation for Single Key Found in Column
#'
#' This function is used repeatedly in the query calcultions above. It calculates shares of a total value,
#' where the total value is calculated using an aggregation by Units, scenario, forcing, branch, year,
#' region.
#'
#' The output contains shares of the total for only one key. To get the shares calculated for all keys,
#' we would grab the set of unique keys, lapply over them, and unlist:
#'
#' sharesForAllKeys <- lapply(keys, calcshare, column, df) %>%
#'   bind_rows()
#'
#' @param key string, single key found in unique(df$column)
#' @param column unquoted column name, name of column that contains set of all possible keys
#' @param df data.frame, contains "Land Allocation" data from Comparison Figures object
calcShare <- function(key, column, df) {

  # take input column, convert to quosure (in dplyr functions, must be unquoted using !!)
  column <- enquo(column)

  # get total value, aggregated to columns in group_by()
  total <- df %>%
    filter(!!column != "") %>% # unquote quosure. total is calculated for all non-empty key values.
    group_by(Units, scenario, forcing, branch, year, region) %>%
    summarise(total=sum(value)) %>%
    ungroup()

  # get part value, aggregated to columns in group_by()
  part <- df %>%
    filter(!!column == key) %>%
    group_by(Units, scenario, forcing, branch, year, region) %>%
    summarise(part=sum(value)) %>%
    ungroup()

  # share = part/total
  # we store the value of the key-value (our input param) under the column it was originally found in (another
  # input param) so that share values for different key-values can be stacked ontop of each other.
  share <- part %>%
    left_join(total, by=c("Units", "scenario", "forcing", "branch", "year", "region")) %>%
    mutate(value = part/total,
           Units = "share") %>%
    select(-part, -total) %>%
    mutate(!!quo_name(column) := key) # unquote quosure to be restored as column name, fill the column with our key-value

  return(share)
}

# End ---------------------------------------------------------------------
xavier-gutierrez/validation_figures documentation built on May 24, 2019, 9:58 p.m.