TEST RAN FOR COMMIT :

git rev-parse HEAD
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
# Set to FALSE if modifications in the code are expected to alter the model results 
# compared to the results obtained with the master branch
# If set to FALSE to merge a PR, the merge should then be followed by a 
# reactivation PR setting this value back to TRUE.
USE_AS_TEST <- FALSE 


# Default input folder is the test data
# ST_INPUTS_FOLDER <- fs::path("..","tests","testthat","test_data","ST_INPUTS_DEV")
ST_INPUTS_FOLDER <- file.path("..","tests","testthat", "test_data", "ST_INPUTS_DEV")

Get ST results from current branch

library(dplyr)
library(ggplot2)
library(tidyr)
library(scales)
library(purrr)
if (!"devtools" %in% installed.packages()){
  install.packages("devtools", repos = "http://cran.us.r-project.org")
}
if ("r2dii.climate.stress.test" %in% installed.packages()){
  if (requireNamespace("package_name", quietly = TRUE)) {
    detach("package:r2dii.climate.stress.test", unload=TRUE)
  }
  remove.packages("r2dii.climate.stress.test")
}
devtools::install(upgrade="never")
library(r2dii.climate.stress.test)
st_results_branch <- r2dii.climate.stress.test::run_trisk(input_path = ST_INPUTS_FOLDER, output_path = tempdir(), return_results = TRUE)

Uninstall current branch and install ST from master branch

detach("package:r2dii.climate.stress.test", unload=TRUE)
remove.packages("r2dii.climate.stress.test")
devtools::install_github("2DegreesInvesting/r2dii.climate.stress.test", upgrade = "never")

Get ST results from master branch

library(r2dii.climate.stress.test)
st_results_master<- r2dii.climate.stress.test::run_trisk(input_path = ST_INPUTS_FOLDER, output_path = tempdir(), return_results = TRUE)

Compare results

new_crispy <- st_results_branch$crispy_output %>%
  select(
    company_id,
    company_name,
    ald_sector,
    ald_business_unit,
    term,
    net_present_value_baseline,
    net_present_value_shock,
    pd_baseline,
    pd_shock
  )

old_crispy <- st_results_master$crispy_output %>%
  select(
    company_id,
    company_name,
    ald_sector,
    ald_business_unit,
    term,
    net_present_value_baseline,
    net_present_value_shock,
    pd_baseline,
    pd_shock
  )


crispy_comparison <- dplyr::inner_join(old_crispy, new_crispy)

if (USE_AS_TEST) {
  crispy_comparison %>%
    assertr::verify(nrow(.) == nrow(old_crispy))
}

Investigate crispy differences

mix <- dplyr::inner_join(old_crispy, new_crispy, by=c("company_id", "company_name", "ald_sector", "ald_business_unit", "term"))

# List of root names based on your data frame
roots <- c("net_present_value_baseline", "net_present_value_shock", "pd_baseline", "pd_shock")

# Iterate over each root name
for(root in roots) {
  x_col <- paste0(root, ".x")
  y_col <- paste0(root, ".y")

  # Subtract and create new column, remove old columns
  mix <- mix %>%
    mutate(!!paste0(root, "_diff") := !!sym(x_col) - !!sym(y_col)) %>%
    select(-!!sym(x_col), -!!sym(y_col))
}
# Assuming your dataframe is named df
df <- mix

# Replace zeros with NAs in _diff columns, except those completely filled with zeros
diff_cols <- grep("_diff$", names(df), value = TRUE)
for(col in diff_cols) {
  if(all(df[[col]] == 0)) next
  df[[col]][df[[col]] == 0] <- NA
}

# Reshape the dataframe for plotting
long_df <- df %>%
  select(contains("_diff")) %>%
  pivot_longer(cols = everything(), names_to = "variable", values_to = "value")



# Plot distribution plots for each _diff column with scientific notation on x-axis
ggplot(long_df, aes(x = value, fill = variable)) +
  geom_histogram(bins = 30, position = "identity", alpha = 0.6) +
  facet_wrap(~ variable, scales = "free") +
  theme_minimal() +
  scale_x_continuous(labels = scientific_format(),
                     breaks = scales::pretty_breaks()) +
  labs(title = "Distribution of '_diff' Columns", x = "Value (Scientific Notation)", y = "Count") +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, hjust = 1))

Investigate trajectories differences

new_trajectories <- st_results_branch$company_trajectories %>%
  select(
    company_id,
    company_name,
    ald_sector,
    ald_business_unit,
    year,
    production_plan_company_technology,
    production_baseline_scenario,
    production_target_scenario,
    production_shock_scenario
  )

old_trajectories <- st_results_master$company_trajectories %>%
  select(
    company_id,
    company_name,
    ald_sector,
    ald_business_unit,
    year,
    production_plan_company_technology,
    production_baseline_scenario,
    production_target_scenario,
    production_shock_scenario
    )


mix <- dplyr::inner_join(old_trajectories, new_trajectories, by=dplyr::join_by(    company_id,
    company_name,
    ald_sector,
    ald_business_unit,
    year))

Define functions

# Assuming your dataframe is named mix

# Function to compute RMSE
compute_rmse <- function(x, y) {
  sqrt(mean((x - y)^2, na.rm = TRUE))
}

# Function to process and plot data for each root
process_and_plot <- function(root) {
  x_col <- paste0(root, ".x")
  y_col <- paste0(root, ".y")

  # Compute RMSE
  rmse <- mix %>%
    select(company_id, company_name, ald_sector, ald_business_unit, year, x_col, y_col) %>%
    group_by(company_id, company_name, ald_sector, ald_business_unit) %>%
    summarise(RMSE = compute_rmse(!!sym(x_col), !!sym(y_col)), .groups = "drop") %>%
    mutate(root = root)

  # Select the top 10 RMSEs
  top_rmse <- rmse %>%
    arrange(desc(RMSE), company_id) %>%
    slice_head(n = 6)

  # Join with original data to get values for plotting
  plot_data <- mix %>%
      pivot_longer(cols = -c(company_id, company_name, ald_sector, ald_business_unit, year),
                 names_to = c("root", "set"), names_pattern = "(.*)\\.(.*)") %>%
    filter(.data$root == root) %>%
    inner_join(top_rmse, by = c("company_id", "company_name", "ald_sector", "ald_business_unit", "root"))

  # Plotting
  p <- ggplot(plot_data, aes(x = year, y = value, group = set, color = set)) +
    geom_line() +
    facet_wrap(~ company_id + company_name + ald_sector + ald_business_unit + root, scales = "free_y") +
    labs(title = paste("Top 10 RMSE for", root, "- by Group"), x = "Year", y = "Value") +
    theme_minimal()

  return(p)
}

plot most different trajectories

process_and_plot("production_plan_company_technology")
process_and_plot("production_baseline_scenario")
process_and_plot("production_target_scenario")
process_and_plot("production_shock_scenario")


2DegreesInvesting/r2dii.climate.stress.test documentation built on June 6, 2024, 8:23 a.m.