R/summarize_uncertainty.R

Defines functions summarize_uncertainty

Documented in summarize_uncertainty

#' Get Monte Carlo confidence intervals

# DESCRIPTION ##################################################################
#' @description
#' This function obtains a summary of uncertainty
#' (based on central, lower and upper estimates of at least one input variable)
#' using a Monte Carlo simulation.
#' @description
#' Input variables that will be processed are:
#' \itemize{
#'   \item{relative_risk (\code{rr_...})}
#'   \item{exposure (\code{exp_...})}
#'   \item{cutoff (\code{cutoff_...})}
#'   \item{baseline health data (\code{bhd_...})}
#'   \item{disability weight (\code{dw_...})}
#'   \item{duration (\code{duration_...})}
#'   }

# ARGUMENTS ####################################################################
#' @param output_attribute \code{variable} in which the output of a \code{healthiar::attribute_...()} function call are stored.
#' @param n_sim \code{numeric value} indicating the number of simulations to be performed.
#' @param seed \code{numeric value} for fixing the randomization. Based on it, each geographic unit is assigned a different. If empty, 123 is used as the base seed per default. The function preserves and restores the user's original random seed (if set prior to calling the function) upon function completion.

# DETAILS ######################################################################
#' @details
#'
#' \strong{Function arguments}
#' \code{seed}
#' If the \code{seed} argument is specified then the \code{parallel} package
#' is used to generate independent L’Ecuyer random number streams.
#' One stream is allocated per variable (or per variable–geography combination, as needed),
#' ensuring reproducible and independent random draws across variables and scenarios.
#'
#'
#' \strong{Methodology}
#'
#' This function summarizes the uncertainty of the attributable health impacts
#' (i.e. a single confidence interval instead of many combinations).
#' For this purpose, it employs
#' a Monte Carlo simulation methodology \insertCite{Robert2004_book}{healthiar}
#' and framework application \insertCite{Rubinstein2016_book}{healthiar}.
#'
#' Detailed information about the methodology (including equations)
#' is available in the package vignette.
#' More specifically, see chapters:
#' \itemize{
#'  \item \href{https://swisstph.github.io/healthiar/articles/intro_to_healthiar.html#monte-carlo-simulation}{Monte Carlo simulation}}
#'
#'
# VALUE ########################################################################
#' @returns
#' This function returns a \code{list} containing:
#'
#' 1) \code{uncertainty_main} (\code{tibble}) containing the \code{numeric}
#' summary uncertainty central estimate and corresponding lower and upper confidence intervals
#' for the attributable health impacts obtained through Monte Carlo simulation;
#'
#' 2) \code{uncertainty_detailed} (\code{list}) containing detailed (and interim) results.
#' \itemize{
#'  \item \code{impact_by_sim} (\code{tibble}) containing the results for each simulation
#'  \item \code{uncertainty_by_geo_id_micro} (\code{tibble}) containing results for each geographic unit under analysis (specified in \code{geo_id_micro} argument in the preceding \code{attribute_health} call)
#'  }
#'  The two results elements are added to the existing output.

# EXAMPLES #####################################################################
#' @examples
#' # Goal: obtain summary uncertainty for an existing attribute_health() output
#' # First create an assessment
#' attribute_health_output <- attribute_health(
#'   erf_shape = "log_linear",
#'   rr_central = 1.369,
#'   rr_lower = 1.124,
#'   rr_upper = 1.664,
#'   rr_increment = 10,
#'   exp_central = 8.85,
#'   exp_lower = 8,
#'   exp_upper = 10,
#'   cutoff_central = 5,
#'   bhd_central = 30747,
#'   bhd_lower = 28000,
#'   bhd_upper = 32000
#' )
#' # Then run Monte Carlo simulation
#' results <- summarize_uncertainty(
#'   output_attribute = attribute_health_output,
#'   n_sim = 100
#' )
#' results$uncertainty_main$impact # Central, lower and upper estimates
#'
#'
#' @seealso
#' \itemize{
#'   \item Upstream:
#'     \code{\link{attribute_health}}, \code{\link{attribute_lifetable}},
#'     \code{\link{compare}}
#' }
#'
#'
#' @references
#'
#' \insertAllCited{}
#'
#'
#' @author Alberto Castro & Axel Luyten
#'
#' @export



summarize_uncertainty <- function(
    output_attribute,
    n_sim,
    seed = NULL) {

  # PREPARATION ################

  ## Decimals ######

  ## Set options
  user_options <- base::options()
  base::on.exit(base::options(user_options), add = TRUE) # restores the user's option at the end of script
  # Make sure that no rounding occurs
  base::options(digits = 15)


  ## Relevant variables ##########
  # Store the input data as entered in the arguments
  var_names <- c("rr", "exp", "cutoff", "bhd", "dw", "duration")
  # vars that are identical across geo_ids
  var_geo_identical <- c("rr", "cutoff", "dw", "duration")

  input_args <- output_attribute$health_detailed$input_args
  input_table <- output_attribute$health_detailed$input_table

  is_two_cases <- base::any(c("input_table_scen_1", "input_table_scen_2") %in% base::names(input_table))
  is_one_case <- !is_two_cases

  if(is_one_case){
    input_args_to_check <- output_attribute$health_detailed$input_args
    input_table_to_check <- output_attribute$health_detailed$input_table
  } else {
    input_args_to_check <- output_attribute$health_detailed$input_args$input_args_scen_1
    input_table_to_check <- output_attribute$health_detailed$input_table$input_table_scen_1
    #Same as input_args_scen_2 (data validation of compare())
  }

  input_arg_names_passed <- input_args_to_check$is_entered_by_user |>
    purrr::keep(~.x == TRUE) |>
    base::names()

  is_lifetable <- base::unique(input_table_to_check$is_lifetable)
  exp_type <- base::unique(input_table_to_check$exp_type)

  # Determine number of geographic units (n_geo)
  if(is_one_case) {
    # Let's use here unique() and input_table instead of input_args
    # because in some cases the users do not enter the geo_id.
    # In that cases compile_input() provide a geo_id and it is shown in results_raw
    n_geo <- base::length(base::unique(input_table$geo_id_micro))

    } else {
      # Same for scen_1 and scen_2 so just take one of them
      n_geo <- base::length(base::unique(input_table$input_table_scen_1$geo_id_micro))
    }





  ## Seeds ########

  # RNG setup for reproducible, independent streams
  # Save user's global random number generator (RNG) state if it exists
  old_seed_exists <- base::exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
  if (old_seed_exists) {
    old_seed <- base::get(".Random.seed", envir = .GlobalEnv)
  } else {
    old_seed <- NULL
  }

  # Save the current RNG kind so we can restore it later
  old_RNGkind <- base::RNGkind()

  # Prepare L'Ecuyer stream seeds if user asked for reproducibility

  # If users request reproducibility (seed not being NULL), switch to L'Ecuyer-CMRG,
  # set.seed(seed) once, and pre-generate full independent L'Ecuyer streams
  # (.Random.seed vectors) for each vectorized simulate() call using
  # parallel::nextRNGStream().
  # We then assign those full .Random.seed vectors
  # before each vectorized simulate() call to guarantee independence and avoid
  # integer-seed collisions.

  if (!is.null(seed)) {
    # If the user provided a seed, switch to L'Ecuyer-CMRG and initialise it.
    # This gives a single seed that can be used to derive reproducible streams
    # (supports parallel-safe advancing / cluster use).
    use_streams <- TRUE
    base::RNGkind(kind = "L'Ecuyer-CMRG")
    base::set.seed(seed)

    } else {
      use_streams <- FALSE
  }

  # Ensure RNG state and kind are restored on exit
  base::on.exit({
    base::do.call(base::RNGkind, as.list(old_RNGkind))
    if (!base::is.null(old_seed)) {
      base::assign(".Random.seed", old_seed, envir = .GlobalEnv)
    } else if (base::exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) {
      base::rm(".Random.seed", envir = .GlobalEnv)
    }
  }, add = TRUE)



  # DATA VALIDATION ##################
  ## Error if uncertainty in erf_eq_... ####
  # Uncertainty in erf_eq is currently not supported
  # It would require a more complex modelling
  if((!base::is.null(input_args_to_check$value$erf_eq_lower) |
      !base::is.null(input_args_to_check$value$erf_eq_lower))){
    base::stop("Sorry, the summary of uncertainty for erf_eq_... is not currently supported.",
               call. = FALSE)
  }

  ## Error if exposure distribution and uncertainty in exp_...####
  if(# If exposure distribution
    exp_type == "exposure_distribution" &&
    # If uncertainty in exposure
    (!base::is.null(input_args$value$exp_lower) |
      !base::is.null(input_args$value$exp_upper))){
    base::stop("Sorry, the summary of uncertainty for exp_... in exposure distributions is not currently supported.",
               call. = FALSE)
  }

  ## Error if no argument with uncertainty ####
  if(# No argument used has _lower or _upper)
    ! base::any(base::grepl("_upper|_lower", input_arg_names_passed))){
    base::stop("Please enter an assessment with uncertainty (..._lower and ..._upper) in any argument.",
               call. = FALSE)
  }



  # FUNCTION TO SUMMARIZE UNCERTAINTY ######
  # Create sub-functions to be used inside summarize_uncertainty_based_on_input (see below)
  # and in compare() out that function

  # Get uncertainty
  get_summary <- function(attribute){

    summary <-
      attribute |>
      dplyr::summarise(
        .by = dplyr::any_of(c("geo_id_macro", "geo_id_micro")),
        central_estimate = stats::quantile(x = impact, probs = c(0.5), na.rm = TRUE, names = FALSE),
        lower_estimate = stats::quantile(x = impact, probs = c(0.025), na.rm = TRUE, names = FALSE),
        upper_estimate = stats::quantile(x = impact, probs = c(0.975), na.rm = TRUE, names = FALSE)) |>
      # Change to same format as other output from healthiar
      tidyr::pivot_longer(
        # data = summary,
        cols = !dplyr::contains("geo_id"),
        names_to = "impact_ci", values_to = "impact"
      ) |>
      dplyr::mutate(
        impact_rounded = base::round(impact, digits = 0)
      )

    return(summary)
  }


  ## Prepare streams ########
  # Important: Out of summarize_uncertainty_based_on_input(), see below.
  # Otherwise, different seed for each scenario in two cases
  # and results always different  (no reproducibility)
  if(use_streams){
    # Current seed vector (after set.seed(seed)) is a valid L'Ecuyer stream
    current_stream <- base::get(".Random.seed", envir = .GlobalEnv)

    # Prepare structure to hold full .Random.seed vectors per var and geo
    stream_map <- stats::setNames(base::vector("list", base::length(var_names)),
                                  var_names)



    # Iterate vars and allocate one stream per needed item:
    for (v in var_names) {
      if (v %in% var_geo_identical) {
        # only one stream per variable
        stream_map[[v]] <- base::list(current_stream)
        current_stream <- parallel::nextRNGStream(current_stream)
      } else {
        # one stream per geo_id for this variable
        stream_map[[v]] <- base::vector("list", n_geo)
        for (g in base::seq_len(n_geo)) {
          stream_map[[v]][[g]] <- current_stream
          current_stream <- parallel::nextRNGStream(current_stream)
        }
      }
    }
  } else {
    stream_map <- NULL
  }


  # Create the function that make the whole job
  # It is important to create it because for two cases (comparison)
  # the function has to be run more than once (avoid copy-pasted code)
  summarize_uncertainty_based_on_input <- function(input_args, input_table, stream_map){


    ## Boolean variables ####

    # Is there a confidence interval? I.e. lower and upper estimate?

    ci_in <- base::list()

    for (v in var_names){
      ci_in[[v]] <-
        !base::is.null(input_args$value[[base::paste0(v, "_lower")]]) &&
        !base::is.null(input_args$value[[base::paste0(v, "_upper")]])
    }


  # Beta and gamma functions ############################

  # betaExpert()
  # Copied from source code of prevalence::betaExpert() here:
  # https://github.com/cran/prevalence/blob/master/R/betaExpert.R

  betaExpert <-
    function(best, lower, upper, p = 0.95, method = "mode"){

      ## functions to optimize ~ mode
      f_mode <-
        function(x, mode, p, target){
          return(
            base::sum(
              (stats::qbeta(p = p,
                     shape1 = x,
                     shape2 = (x * (1 - mode) + 2 * mode - 1) / mode) -
                 target) ^ 2
            ))
        }

      f_mode_zero <-
        function(x, p, target){
          return((stats::qbeta(p = p, shape1 = 1, shape2 = x) - target) ^ 2)
        }

      f_mode_one <-
        function(x, p, target){
          return((stats::qbeta(p = p, shape1 = x, shape2 = 1) - target) ^ 2)
        }

      ## functions to optimize ~ mean
      f_mean <-
        function(x, mean, p, target){
          return(
            base::sum(
              (stats::qbeta(p = p,
                     shape1 = x,
                     shape2 = (x * (1 - mean)) / mean) -
                 target) ^ 2
            ))
        }

      ## define 'target' and 'p'
      if (!base::missing(lower) & base::missing(upper)){
        target <- lower
        p <- 1 - p
      } else if (!base::missing(upper) & base::missing(lower)){
        target <- upper
      } else if (!base::missing(upper) & !base::missing(lower)){
        target <- c(lower, upper)
        p <- c(0, p) + (1 - p) / 2
      }

      ## derive a and b (=shape1 and shape2)
      if (method == "mode"){
        if (best == 0){
          a <- 1
          b <- stats::optimize(f_mode_zero, c(0, 1000), p = p, target = target)$minimum
        } else if (best == 1) {
          a <- stats::optimize(f_mode_one, c(0, 1000), p = p, target = target)$minimum
          b <- 1
        } else {
          a <- stats::optimize(f_mode, c(0, 1000),
                        mode = best, p = p, target = target)$minimum
          b <- (a * (1 - best) + 2 * best - 1) / best
        }
      } else if (method == "mean"){
        a <- stats::optimize(f_mean, c(0, 1000),
                      mean = best, p = p, target = target)$minimum
        b <- (a * (1 - best)) / best
      }

      ## create 'out' dataframe
      out <- base::list(alpha = a, beta = b)
      base::class(out) <- "betaExpert"

      ## return 'out'
      return(out)
    }

  # Define helper functions for fitting a gamma distribution with optimization
  # for the relative risk.
  # NOTE: the functions were adapted from those provided by Sciensano

  # Set gamma distribution specs
  probs <- c(0.025, 0.975)
  # shape parameter of the gamma distribution
  par <- 2

  ## Fit gamma distribution
  f_gamma <-
    function(par, probs, lower_estimate, central_estimate, upper_estimate) {

      qfit <- stats::qgamma(p = probs,
                            shape = par,
                            rate = par / central_estimate)
      return(base::sum((qfit - c(lower_estimate, upper_estimate))^2))
    }


  ## Optimize gamma distribution
  optim_gamma <-
    function(central_estimate, lower_estimate, upper_estimate) {

      f_optimized <-
        stats::optimize(f = f_gamma,
                        interval = c(0, 1e9),
                        probs = probs,
                        lower_estimate = lower_estimate,
                        central_estimate = central_estimate,
                        upper_estimate = upper_estimate)

      return(c(shape = f_optimized$minimum,
               rate = f_optimized$minimum / central_estimate))
    }

  ## Simulate values based on optimized gamma distribution
  sim_gamma <-
    function(n_sim, central_estimate, lower_estimate, upper_estimate) {
      fit <- optim_gamma(central_estimate, lower_estimate, upper_estimate)
      gamma <- stats::rgamma(n = n_sim, shape = fit["shape"], rate = fit["rate"])
      return(gamma)}


  # Simulate function #####################
  simulate <- function(central, lower, upper, distribution, n, seed = NULL){
    if (!is.null(seed)) {
      base::set.seed(seed)}

    if(distribution == "gamma"){

      simulation <-
        #abs() because negative values have to be avoided
        base::abs(
          sim_gamma(
            n_sim = n,
            central_estimate = central,
            lower_estimate = lower,
            upper_estimate = upper))

      return(simulation)

    } else if (distribution == "normal"){
        simulation <-
          #abs() because negative values have to be avoided
          base::abs(
            stats::rnorm(
              n = n,
              mean = central,
              sd = (upper - lower) / (2 * stats::qnorm(0.975))))



    } else if (distribution == "beta") {

      simulation_betaExpert <-
        betaExpert(
          best = central,
          lower = lower,
          upper = upper,
          method = "mean")

      simulation <-
        stats::rbeta(
          n = n,
          shape1 = base::as.numeric(base::unname(simulation_betaExpert["alpha"])),
          shape2 = base::as.numeric(base::unname(simulation_betaExpert["beta"])))

      return(simulation)
    }

  }

  ##  Identify relevant var_names
  # Variable names with confidence interval #####
  var_names_with_ci <- base::names(ci_in)[base::unlist(ci_in)]
  var_names_with_ci_in_name <- base::gsub("rr", "erf", var_names_with_ci) |> base::paste0("_ci")
  # var_names_with_ci that have simulated values identical in all geo units
  var_names_with_ci_geo_identical <- base::intersect(var_names_with_ci,  var_geo_identical)
  # var_names_with_ci that have simulated values different in all geo units
  var_names_with_ci_geo_different <- base::setdiff(var_names_with_ci, var_geo_identical)



  ## Template and simulations #####
  sim_template <- input_table |>
    dplyr::select(geo_id_micro) |>
    base::unique()|>
    dplyr::mutate(geo_id_number = 1:n_geo) |>
    dplyr::mutate(sim_id = base::list(1:n_sim))



  # Define the mapping between variable names and their distributions
  sim_config <- base::list(
    rr = "gamma",
    exp = "normal",
    cutoff = "normal",
    bhd = "normal",
    dw = "beta",
    duration = "normal"
  )

  # Prepare the list to store the data in the for loop
  sim <- base::list()


  # Apply simulation for variables that have confidence interval
  for (var in var_names_with_ci) {

    # Store distribution
    dist <- sim_config[[var]]

    # Store central, lower and upper estimate for the simulation below
    central <- base::as.numeric(input_args$value[[base::paste0(var, "_central")]])
    lower   <- base::as.numeric(input_args$value[[base::paste0(var, "_lower")]])
    upper   <- base::as.numeric(input_args$value[[base::paste0(var, "_upper")]])


    # Run simulate across all rows

    # First those variable that are different for all geo units (exp and bhd)
    # Simulations must be DIFFERENT  in all geo units
    if(var %in% var_names_with_ci_geo_different){

      sim[[var]] <- purrr::pmap(
        base::list(sim_template$geo_id_number),
        function(geo_id_number) {

          # assign full .Random.seed stream if available for this var & geo
          if (!base::is.null(stream_map)) {
            base::assign(".Random.seed", stream_map[[var]][[geo_id_number]], envir = .GlobalEnv)
          }

          simulate(
            central = central,
            lower = lower,
            upper = upper,
            distribution = dist,
            n = n_sim,
            # Keep the seed argument for now although it should be NULL.
            # It’s useful for backward compatibility, unit tests, and standalone calls,
            # but the function should ignore it
            # when you drive reproducibility by assigning full L'Ecuyer .Random.seed streams externally.
            seed = NULL)
          }
      )

      # Second for those variable that are common for all geo units (rr, cutoff, dw and duration)
      # The simulated value must be IDENTICAL in all geo units
      # Not across geo_id but across sim_id
    } else if (var %in% var_names_with_ci_geo_identical ){

      # If reproducible streams were requested and stream_map[[var]] contains one stream vector,
      # assign it once, then call simulate(...) to create a single vector of length n_sim.
      if (!is.null(stream_map)) {
        base::assign(".Random.seed", stream_map[[var]][[1]], envir = .GlobalEnv)
      }

      sim[[var]] <-
        base::list(
          simulate(
            central = central,
            lower = lower,
            upper = upper,
            distribution = dist,
            n = n_sim,
            seed = NULL))
    }

  }

  # Identify the variables that have to be deleted in input_args
  # (to be used below)
  args_to_be_replaced_by_sim <-
    base::paste0(
      base::rep(var_names_with_ci, each = 3),
      c("_central", "_lower", "_upper"))
  # variable that were not passed by the user
  args_not_passed <-
    base::names(input_args$is_entered_by_user)[!base::unlist(input_args$is_entered_by_user)]
  # All args to be removed
  args_to_be_removed_in_input_args <-
    base::unique(c(args_to_be_replaced_by_sim, args_not_passed))

  # Prepare input_args to accommodate the new simulated values
  # (to be used below)
  input_args_prepared_for_replacement <-
    # Remove the arguments that will be replace with the simulation including upper and lower
    input_args$value[! base::names(input_args$value) %in% args_to_be_removed_in_input_args]


  template_with_sim <-
    # Bind the template with the simulated values
    dplyr::bind_cols(sim_template, tibble::as_tibble(sim[var_names_with_ci])) |>
    # Unnest to have table layout
    tidyr::unnest(dplyr::any_of(c("sim_id", var_names_with_ci)))

  geo_ids <-
    base::intersect(base::names(template_with_sim), c("geo_id_macro", "geo_id_micro"))


  if( base::length(var_names_with_ci_geo_identical) >= 1 ){
    test <-  template_with_sim |>
      # Keep only unique values because they identical for all geo_id_micro
      dplyr::mutate(dplyr::across(dplyr::all_of(var_names_with_ci_geo_identical),
                                  ~ purrr::map(.x, base::unique)))
  }

  input_table_with_sim <- input_table |>
    # Remove lower and upper rows (keep only central)
    # In the summary of uncertainties no lower or upper is used
    dplyr::filter(dplyr::if_all(.cols = dplyr::all_of(var_names_with_ci_in_name),
                                .fns = ~ .x == "central")) |>
    # Remove the variables with uncertainty because the new simulated values
    # are introduced in the step below
    dplyr::select(- dplyr::all_of(var_names_with_ci)) |>
    # Add the simulated values
    dplyr::inner_join(template_with_sim,
                      by = "geo_id_micro",
                      relationship = "many-to-many") |>
    # Change the name of geo_id_micro adding the sim_id
    # Important: This is a trick to be able to get the output with impacts by simulation
    # To be removed below when impacts by geo_id_micro have to be obtained
    dplyr::mutate(geo_id_micro = base::paste0(geo_id_micro, "_sim_", sim_id))

  # Call get_impact taking benefit of the vectorized structure
  # impact by row no matter what you enter (e.g. multiple rr by geo_id_micro)
  impact_sim <-
    get_impact(input_table = input_table_with_sim,
               pop_fraction_type = "paf")

  # Get output
  output_sim <-
    get_output(results_raw = impact_sim$results_raw)[["health_detailed"]][["results_by_geo_id_micro"]]


  impact_by_sim <- output_sim |>
    # Bring back the original geo_id_micro after using the trick of adding the sim_id
    # We need the original names to be able to sum impacts below
    dplyr::mutate(geo_id_micro = base::gsub("_sim_.*", "", geo_id_micro))|>
    # Put column sim_id closer to sim_id
    # get_output reorder columns but ignores sim_id
    dplyr::relocate(sim_id, .before = impact)

  # Get summary (uncertainty) for each geo_id_micro
  summary_by_geo_id_micro <-
    get_summary(attribute = impact_by_sim)

  summary <- summary_by_geo_id_micro


  if("geo_id_macro" %in% base::names(output_attribute$health_main) ){

    summary_by_geo_id_macro <- summary_by_geo_id_micro |>
      # Sum impacts
      dplyr::summarise(impact = base::sum(impact),
                       .by = c("geo_id_macro", "impact_ci")) |>
      # Round
      dplyr::mutate(impact_rounded = round(impact))

    summary <- summary_by_geo_id_macro

  }

  # Store the results in a list keeping consistency in the structure with
  # other healthiar functions
  uncertainty <-
    base::list(
      uncertainty_main = summary,
      uncertainty_detailed =
        base::list(impact_by_sim = impact_by_sim,
                   uncertainty_by_geo_id_micro = summary_by_geo_id_micro))

  return(uncertainty)
    }

  # GENERATE RESULTS USING THE FUNCTION ##################

  ## One case (no comparison) ####

  # Identify if this is one-case or two-case (comparison) assessment

  if(is_one_case){
    # Use summarize_uncertainty_based_on_input() only once
    uncertainty <-
      c(output_attribute,
        summarize_uncertainty_based_on_input(
          input_args = input_args,
          input_table = input_table,
          stream_map = stream_map))

  ## Two cases (comparison) ####
  } else if (is_two_cases){

    # Once for the scenario 1
    attribute_scen_1 <-
      summarize_uncertainty_based_on_input(
        input_args = input_args[["input_args_scen_1"]],
        input_table = input_table[["input_table_scen_1"]],
        stream_map = stream_map)

    # Once for the scenario 2
    attribute_scen_2 <-
      summarize_uncertainty_based_on_input(
        input_args = input_args[["input_args_scen_2"]],
        input_table = input_table[["input_table_scen_2"]],
        stream_map = stream_map)

    # Extract simulation values 1 and 2
    # Extract output 1 and 2
    output_scen_1 <-
      attribute_scen_1[["uncertainty_detailed"]][["impact_by_sim"]]

    output_scen_2 <-
      attribute_scen_2[["uncertainty_detailed"]][["impact_by_sim"]]

    scenario_specific_arguments <-
      c("exp_central", "exp_lower", "exp_upper",
        "bhd_central", "bhd_lower", "bhd_upper",
        "population",
        "prop_pop_exp",
        "pop_exp",
        "year_of_analysis",
        "info",
        "impact", "pop_fraction")

    # Identify the id columns of the outputs (to be used below)
    scen_joining_cols <-
      # We use output 1 but we could use output 2
      # (same structure because same type of assessment)
      # base::names(output_scen_1) |>
      # base::grep(pattern = "_id", x = _, value = TRUE)
      find_joining_columns(
        df_1 = output_scen_1,
        df_2 = output_scen_2,
        except = scenario_specific_arguments)

    # Put together ids and sim cols
    output_both_scen <-
      dplyr::left_join(output_scen_1,
                       output_scen_2,
                       by = scen_joining_cols,
                       suffix = c("_scen_1", "_scen_2"))



    if(input_args$approach_comparison == "delta"){

      impact_by_sim <- output_both_scen |>
        dplyr::mutate(
          impact = impact_scen_1 - impact_scen_2,
          impact_rounded = base::round(impact),
          .after = sim_id)

    } else if(input_args$approach_comparison == "pif"){


      impact_sim_both_scen <-
        get_impact(
          input_table = output_both_scen,
          pop_fraction_type = "pif")

      output_sim_both_scen <-
        get_output(results_raw = output_sim_after_impact$results_raw)[["health_detailed"]][["impact_by_sim"]]

      impact_by_sim <- output_sim_both_scen|>
        dplyr::relocate(impact, impact_rounded,
                        .after = sim_id)

    }

  # Get summary (uncertainty) for each geo_id_micro
  summary_by_geo_id_micro <-
    get_summary(attribute = impact_by_sim)

  summary <- summary_by_geo_id_micro


  if("geo_id_macro" %in% base::names(output_attribute$health_main) ){

    summary_by_geo_id_macro <- summary_by_geo_id_micro |>
      # Sum impacts
      dplyr::summarise(impact = base::sum(impact),
                       .by = c("geo_id_macro", "impact_ci")) |>
      # Round
      dplyr::mutate(impact_rounded = round(impact))

    summary <- summary_by_geo_id_macro

  }


    # Store the results in a list keeping consistency in the structure with
    # other healthiar functions

    uncertainty <-
      c(output_attribute,
        base::list(
          uncertainty_main = summary,
          uncertainty_detailed =
            base::list(impact_by_sim = impact_by_sim,
                       uncertainty_by_geo_id_micro = summary_by_geo_id_micro)))

  }


  # RETURN ####################################################################

  return(uncertainty)

}

Try the healthiar package in your browser

Any scripts or data that you put into this service are public.

healthiar documentation built on March 12, 2026, 5:07 p.m.