test_list: InnovaLab Toolkit

Documented in vulnerability_index

#' Vulnerability Index
#'
#' @description It allows the observations to be ordered by different methods (default
#'   percentile rank), and in consideration of domains grouping indicators.
#'
#' @param data A dataframe containing the variables that will be analyzed.
#'
#' @param direct A vector indicating the name of the variables that contribute
#'    directly to the vulnerability index.
#'
#' @param inverse A vector indicating the name of the variables that contribute
#'    inversely to the vulnerability index.
#'
#' @param table A dataframe indicating the variables "codename", "Direccion",
#'    "Dominio" and "Escala_Geografica" as a minimum. If this argument is specified,
#'    it is no longer necessary to specify the direct and inverse arguments.
#'
#' @param weighted A logical value indicating whether the "Dominio" information is
#'    used for the index calculation.
#'
#' @param domains A vector indicating the names of the domains grouping the variables
#'    of interest. If the argument "table" containing the variable "Dominio" is used,
#'    only "default" is indicated in this argument.
#'
#' @param level A vector indicating the level of analysis. It currently supports 3
#'    values:"departamental", "provincial", and "distrital".
#'
#' @param ordered A logical value indicating whether the cases analyzed will be
#'    ordered from highest to lowest.
#'
#' @param method A vector indicating the analysis method used for the index.
#'    Currently only "percent_rank" is supported.
#'
#' @param na.rm A logical value indicating whether cases where missing values are
#'    eliminated.
#'
#' @return A [tibble::tibble()] containing the variables analyzed by the function.
#' @seealso [dplyr::percent_rank()].
#'
#' @export
#'
#' @examples
vulnerability_index <- function(data, direct = NULL, inverse = NULL, table = NULL,
                                weighted = TRUE, domains = 'default', level = NULL,
                                ordered = TRUE, method = "percent_rank", na.rm = FALSE) {
  # Checking
  if (is.null(table) & (is.null(direct) | is.null(inverse))) {
    stop(paste("Es necesario que direct e inverse contengan informacion o",
               "que se especifique el argumento table."))
  }

  if (!is.null(table) & (!is.null(direct) | !is.null(inverse))) {
    stop(paste("Solo es necesario especificar direct/inverse o table. No",
               "los 3 argumentos al mismo tiempo."))
  }

  if(is.null(level)) {
    stop(paste("Ahora es necesario indicar el nivel de analisis que se quiere",
               "realizar. Se admiten 3 valores: distrital, provincial, departamental."))
  } else if (!level %in% c("distrital", "provincial", "departamental")) {
    stop(paste("Solo se admiten 3 valores: distrital, privincial, departamental."))
  }


  # Get data about variable's direction
  # Temp only table argument is support

  if(level == "distrital") {
    table <- table %>%
      dplyr::filter(Escala_geografica == "Distrital")
  } else if (level == "provincial") {
    table <- table %>%
      dplyr::filter(Escala_geografica == "Distrital")
  }

  if(!is.null(table)) {
    direct <- table %>%
      dplyr::filter(Direccion == "Directo") %>%
      magrittr::use_series(Codename)

    inverse <- table %>%
      dplyr::filter(Direccion == "Inverso") %>%
      magrittr::use_series(Codename)
  }

  if (weighted & domains == 'default') {
    n_domains <- length(table(table$Dominio))
    names_domains <- stringr::str_sub(stringr::str_to_lower(sort(unique(table$Dominio))), 1, 6)
    names_domains <- paste0("rank_", names_domains)
    vars_domains <- table %>%
      dplyr::group_nest(Dominio) %>%
      magrittr::use_series(data) %>%
      purrr::map(~ .x$Codename)
    vars_vulnerability <- table$Codename
  }

  data <- data %>%
    dplyr::select(dplyr::starts_with("dep"),
                  dplyr::starts_with("prov"),
                  dplyr::starts_with("distr"),
                  {{ vars_vulnerability }})

  # Function body

  if (method == "percent_rank") {
    if (weighted) {
      df_rank <- data %>%
        dplyr::mutate(
          dplyr::across({{ direct }}, dplyr::percent_rank),
          dplyr::across({{ inverse }}, ~ dplyr::percent_rank(desc(.)))
        ) %>%
        dplyr::rowwise()

      for (i in 1:n_domains) {
        name_domain_C <- names_domains[i]
        var_domain <- vars_domains[[i]]

        df_rank <- df_rank %>%
          dplyr::mutate(
            {{ name_domain_C }} := sum(dplyr::c_across({{ var_domain }}), na.rm = na.rm)
          )
      }

      df_rank <- df_rank %>%
        dplyr::ungroup() %>%
        dplyr::mutate(
          dplyr::across( {{names_domains}}, dplyr::percent_rank)
        ) %>%
        dplyr::rowwise() %>%
        dplyr::mutate(
          sumr = sum(dplyr::c_across({{ names_domains }}), na.rm = na.rm)
        ) %>%
        dplyr::ungroup() %>%
        dplyr::mutate(
          Rank_T = dplyr::percent_rank(sumr) # Ranking percentil global
        ) %>%
        dplyr::select(-c({{ vars_vulnerability }}, sumr))

    } else {
      df_rank <- data %>%
        dplyr::mutate(
          dplyr::across({{ direct }}, dplyr::percent_rank),
          dplyr::across({{ inverse }}, dplyr::percent_rank)
        ) %>%
        dplyr::rowwise() %>%
        dplyr::mutate(
          sumr = sum(dplyr::c_across(c({{ direct }}, {{ inverse }})), na.rm = na.rm)
        ) %>%
        dplyr::ungroup() %>% # desagrupar
        dplyr::mutate(
          Rank_T = dplyr::percent_rank(sumr) # Ranking percentil global
        ) %>%
        dplyr::select(-c(sumr))
      # select(dep:distr, {{ direct }}, {{ inverse }}, Rank_T)
    }
  }

  if (ordered) {
    df_rank <- df_rank %>%
      dplyr::arrange(dplyr::desc(Rank_T))
  }

  return(df_rank)

}