R/add_hep_population.R

Defines functions hep_pop_calc add_hep_populations

Documented in add_hep_populations

#' Add Population Figures for HEP Billion
#'
#' `add_hep_populations()` adds relevant populations to each HEP Billion indicator
#' and country, so these can be used to calculate indicator-level aggregations
#' of the HEP Billion. The column specified by `population` will be generated and
#' filled with relevant populations for that country and indicator. If the column
#' already exists, only missing values will be replaced by the function.
#'
#' As HEP indicators where population is relevant are all generated by billionaiRe,
#' the `transform_value_col` is required.
#'
#' @inherit transform_hpop_data return details params
#' @param pop_year Year used to pull in HEP populations, defaults to 2025.
#' @param population Column name of column to create with population figures.
#' @inheritParams calculate_hpop_contributions
#' @inheritParams calculate_hpop_change_vector
#'
#' @family populations
#' @family hep
#'
#' @export
add_hep_populations <- function(df,
                                population = "population",
                                pop_year = 2025,
                                scenario_col = NULL,
                                transform_value_col = "transform_value",
                                ind_ids = billion_ind_codes("hep", include_calculated = TRUE)) {
  df <- dplyr::group_by(df, dplyr::across(c("iso3", !!scenario_col)))

  assert_columns(df, "iso3", "ind", transform_value_col)
  assert_string(population, 1)
  assert_ind_ids(ind_ids, "hep")
  assert_numeric(pop_year)

  df <- billionaiRe_add_columns(df, population, NA_real_)

  args <- list(
    name = ind_ids[c(
      "meningitis", "meningitis_campaign", "meningitis_routine",
      "yellow_fever", "yellow_fever_campaign", "yellow_fever_routine",
      "cholera", "cholera_campaign",
      "polio", "polio_routine",
      "measles", "measles_campaign", "measles_routine",
      "covid","covid_campaign",
      "ebola", "ebola_campaign",
      "prevent"
    )],
    numerator = list(
      # meningitis
      ind_ids[c("meningitis_campaign_num", "meningitis_routine_num")],
      ind_ids[c("meningitis_campaign_num")],
      ind_ids[c("meningitis_routine_num")],
      # yellow_fever
      ind_ids[c("yellow_fever_campaign_num", "yellow_fever_routine_num")],
      ind_ids[c("yellow_fever_campaign_num")],
      ind_ids[c("yellow_fever_routine_num")],
      # cholera
      ind_ids[c("cholera_campaign_num")],
      ind_ids[c("cholera_campaign_num")],
      # polio
      ind_ids[c("polio_routine_num")],
      ind_ids[c("polio_routine_num")],
      # measles
      ind_ids[c("measles_routine_num", "measles_campaign_num")],
      ind_ids[c("measles_campaign_num")],
      ind_ids[c("measles_routine_num")],
      # covid
      ind_ids[c("covid_campaign_num")],
      ind_ids[c("covid_campaign_num")],
      #ebola
      ind_ids[c("ebola_campaign_num")],
      ind_ids[c("ebola_campaign_num")],
      #prevent
      ind_ids[c(
        "meningitis_campaign_num", "meningitis_routine_num", "yellow_fever_campaign_num", "yellow_fever_routine_num",
        "cholera_campaign_num", "polio_routine_num", "measles_routine_num", "measles_campaign_num", "covid_campaign_num",
        "ebola_campaign_num"
      )]
    ),
    denominator = list(
      # meningitis
      ind_ids[c("meningitis_campaign_denom", "surviving_infants")],
      ind_ids[c("meningitis_campaign_denom")],
      ind_ids[c("surviving_infants")],
      # yellow_fever
      ind_ids[c("yellow_fever_campaign_denom", "surviving_infants")],
      ind_ids[c("yellow_fever_campaign_denom")],
      ind_ids[c("surviving_infants")],
      # cholera
      ind_ids[c("cholera_campaign_denom")],
      ind_ids[c("cholera_campaign_denom")],
      # polio
      ind_ids[c("surviving_infants")],
      ind_ids[c("surviving_infants")],
      # measles
      ind_ids[c("surviving_infants", "measles_campaign_denom")],
      ind_ids[c("measles_campaign_denom")],
      ind_ids[c("surviving_infants")],
      # covid
      ind_ids[c("covid_campaign_denom")],
      ind_ids[c("covid_campaign_denom")],
      #ebola
      ind_ids[c("ebola_campaign_denom")],
      ind_ids[c("ebola_campaign_denom")],
      ind_ids[c(
        "meningitis_campaign_denom", "yellow_fever_campaign_denom", "cholera_campaign_denom",
        "measles_campaign_denom", "ebola_campaign_denom", "covid_campaign_denom", "surviving_infants"
      )]
    ),
    multiply_surviving_infs = c(rep(FALSE, 17),
                                TRUE)
  )

  surviving_infants_df <- df %>%
    dplyr::ungroup() %>%
    dplyr::select("iso3") %>%
    dplyr::distinct() %>%
    dplyr::mutate("surviving_infants" := wppdistro::get_population(.data[["iso3"]], pop_year, age_range = "under_1"))

  pop_df <- furrr::future_pmap_dfr(args,
                                   hep_pop_calc,
                                   df = df,
                                   transform_value_col = transform_value_col,
                                   ind_ids = ind_ids,
                                   pop_year = pop_year,
                                   scenario_col = scenario_col,
                                   surviving_infants_df = surviving_infants_df
  )

  other_inds <- ind_ids[stringr::str_detect(ind_ids, paste0(unique(pop_df[["ind"]]), collapse = "|"), negate = TRUE)]

  df %>%
    dplyr::left_join(pop_df, by = c("iso3", "ind", scenario_col)) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(
      "{population}" := dplyr::case_when(
        .data[["ind"]] %in% other_inds & is.na(.data[["_temp_pop"]]) ~ wppdistro::get_population(.data[["iso3"]], pop_year),
        TRUE ~ .data[["_temp_pop"]]
      )
    ) %>%
    dplyr::select(-"_temp_pop")
}

#' @keywords internal
#'
#' @noRd
#'
hep_pop_calc <- function(df,
                         name,
                         denominator,
                         numerator,
                         transform_value_col,
                         ind_ids,
                         multiply_surviving_infs,
                         pop_year,
                         scenario_col = "scenario",
                         surviving_infants_df){


  routine_num <- numerator[stringr::str_detect(numerator, "routine_num$")]
  routine_denom <- denominator[stringr::str_detect(denominator, "routine_denom$|^surviving_infants$")]
  campaign_num <- numerator[stringr::str_detect(numerator, "campaign_num$")]
  campaign_denom <- denominator[stringr::str_detect(denominator, "campaign_denom$")]

  df_get_surviving_infants <- df %>%
    dplyr::group_by(dplyr::across(c("iso3", !!scenario_col))) %>%
    dplyr::filter(.data[["ind"]] %in% routine_num) %>%
    dplyr::select(dplyr::all_of(c("iso3", !!scenario_col))) %>%
    dplyr::distinct()

  if(nrow(df_get_surviving_infants) > 0){

    full_routine_df <- df %>%
      dplyr::semi_join(df_get_surviving_infants, by = c("iso3", scenario_col)) %>%
      dplyr::filter(.data[["ind"]] %in% routine_denom) %>%
      dplyr::select(dplyr::all_of(c("iso3", "ind", !!scenario_col))) %>%
      dplyr::distinct()

    df_get_surviving_infants <- df %>%
      dplyr::semi_join(df_get_surviving_infants, by = c("iso3", scenario_col)) %>%
      dplyr::filter(.data[["ind"]] %in% routine_denom) %>%
      dplyr::filter(.data[["year"]] == pop_year) %>%
      dplyr::full_join(full_routine_df, by = c("iso3", "ind", scenario_col)) %>%
      dplyr::left_join(surviving_infants_df, by = "iso3") %>%
      dplyr::mutate("ind" := "surviving_infants",
                    dplyr::across(
                      dplyr::all_of(transform_value_col),
                      ~ dplyr::case_when(
                        !is.na(.x) ~ .x,
                        TRUE ~ .data[["surviving_infants"]]
                      )
                    )) %>%
      dplyr::select(dplyr::all_of(c("iso3", "ind", !!scenario_col, !!transform_value_col))) %>%
      dplyr::distinct()
  }

  campaign_df <- df %>%
    dplyr::group_by(dplyr::across(c("iso3", !!scenario_col))) %>%
    dplyr::filter(.data[["ind"]] %in% campaign_denom) %>%
    dplyr::filter(.data[["year"]] <= pop_year)


  if(nrow(campaign_df) > 0){
    campaign_df <- campaign_df %>%
      dplyr::group_by(dplyr::across(c("iso3", "ind", !!scenario_col))) %>%
      dplyr::filter(.data[["year"]] == suppressWarnings(max(.data[["year"]]))) %>%
      dplyr::select(dplyr::all_of(c("ind", "iso3", !!transform_value_col, !!scenario_col))) %>%
      dplyr::distinct()
  }

  pop_df <- dplyr::bind_rows(df_get_surviving_infants, campaign_df)

  if (multiply_surviving_infs) {

    nb_multi <- df %>%
      dplyr::group_by(dplyr::across(c("iso3", !!scenario_col))) %>%
      dplyr::filter(.data[["ind"]] %in% routine_num) %>%
      dplyr::filter(.data[["year"]] == max(.data[["year"]])) %>%
      dplyr::summarise(n = dplyr::n())

    pop_df <- pop_df %>%
      dplyr::left_join(nb_multi, by = c("iso3", scenario_col)) %>%
      dplyr::group_by(dplyr::across(c("iso3", !!scenario_col))) %>%
      dplyr::mutate(
        dplyr::across(
          !!transform_value_col,
          ~ dplyr::case_when(
            .data[["ind"]] == ind_ids["surviving_infants"] ~ .x * .data[["n"]],
            TRUE ~ .x
          )
        )
      ) %>%
      dplyr::select(-"n")
  }

  pop_df %>%
    dplyr::group_by(dplyr::across(c("iso3", !!scenario_col))) %>%
    dplyr::summarize(
      "_temp_pop" := sum(.data[[transform_value_col]][.data[["ind"]] %in% ind_ids[denominator]], na.rm = TRUE),
      .groups = "drop",
      "ind" := !!name
    )
}
gpw13/billionaiRe documentation built on Sept. 27, 2024, 10:05 p.m.