#' Vulnerability Index
#'
#' @description It allows the observations to be ordered by different methods (default
#' percentile rank), and in consideration of domains grouping indicators.
#'
#' @param data A dataframe containing the variables that will be analyzed.
#'
#' @param direct A vector indicating the name of the variables that contribute
#' directly to the vulnerability index.
#'
#' @param inverse A vector indicating the name of the variables that contribute
#' inversely to the vulnerability index.
#'
#' @param table A dataframe indicating the variables "Codename", "Direction",
#' "Domain" and "Geographic_scale" as a minimum. If this argument is specified,
#' it is no longer necessary to specify the direct and inverse arguments.
#'
#' @param weighted A logical value indicating whether the "Domain" information is
#' used for the index calculation.
#'
#' @param domains A vector indicating the names of the domains grouping the variables
#' of interest. If the argument "table" containing the variable "Domain" is used,
#' only "default" is indicated in this argument.
#'
#' @param level A vector indicating the level of analysis. It currently supports 3
#' values:"departamental", "provincial", and "distrital".
#'
#' @param ordered A logical value indicating whether the cases analyzed will be
#' ordered from highest to lowest.
#'
#' @param method A vector indicating the analysis method used for the index.
#' Currently only "percent_rank" is supported.
#'
#' @param complete If true, the results table will show all the variables that
#' the dataframe has in addition to the computations performed.
#'
#' @param na.rm A logical value indicating whether cases where missing values are
#' eliminated.
#'
#' @return A \code{\link[tibble]{tibble}} containing the variables analyzed by the function.
#' @seealso \code{\link[dplyr]{percent_rank}}, \code{\link[psych]{principal}}
#'
#' @import dplyr
#' @importFrom purrr map map2 reduce
#' @importFrom magrittr use_series
#' @importFrom stringr str_sub str_to_lower str_replace
#' @importFrom rlang .data
#' @importFrom tidyr nest unnest drop_na separate
#' @importFrom psych principal
#' @importFrom methods new
#' @importFrom stats cor
#' @export
#'
#' @examples
#' \dontrun{
#' Sigma_Dom1 <- matrix(rep(c(1, runif(4, 0.65, 0.9)), 4),
#' 4, 4, byrow = TRUE)
#'
#' Sigma_Dom2 <- matrix(rep(c(1, runif(4, 0.65, 0.9)), 4),
#' 4, 4, byrow = TRUE)
#'
#' Dom1 <- as.data.frame(MASS::mvrnorm(100, rep(0, 4), Sigma_Dom1))
#' Dom2 <- as.data.frame(MASS::mvrnorm(100, rep(0, 4), Sigma_Dom2))
#' colnames(Dom2) <- paste0("V", 5:8)
#'
#' df_example <- data.frame(distr = paste0("distr", 1:100))
#' df_example <- cbind(df_example, Dom1, Dom2)
#'
#' table_var <- data.frame(
#' Codename = paste0("V", 1:8),
#' Direction = "Direct",
#' Domain = c(rep("Dom1", 4), rep("Dom2", 4)),
#' Geographic_scale = "Distrital"
#' )
#'
#' vulnerability_index(df_example, table = table_var,
#' level = "distrital", na.rm = TRUE,
#' method = "pca")
#'
#'
#' vulnerability_index(df_example, table = table_var,
#' level = "distrital", na.rm = TRUE,
#' method = "percent_rank")
#' }
vulnerability_index <- function(data, direct = NULL, inverse = NULL, table = NULL,
weighted = TRUE, domains = 'default', level = NULL,
ordered = TRUE, method = "percent_rank", complete = TRUE,
na.rm = FALSE) {
# Checking
if (is.null(table) & (is.null(direct) | is.null(inverse))) {
stop(paste("It is necessary that direct and inverse contain information",
"or that the table argument is specified."))
}
if (!is.null(table) & (!is.null(direct) | !is.null(inverse))) {
stop(paste("It is only necessary to specify direct/inverse or table.",
"Not all 3 arguments at the same time."))
}
if(is.null(level)) {
stop(paste("Now it is necessary to indicate the level of analysis to",
"be performed. Three values are allowed: distrital, provincial",
"departamental"))
} else if (!level %in% c("distrital", "provincial", "departamental")) {
stop(paste("Only 3 values are allowed: distrital, privincial, departamental."))
}
## Options
indexoptions <- list()
indexoptions$direct <- direct
indexoptions$inverse <- inverse
indexoptions$table <- table
indexoptions$weighted <- weighted
indexoptions$domains <- domains
indexoptions$level <- level
indexoptions$ordered <- ordered
indexoptions$method <- method
indexoptions$complete <- complete
indexoptions$na.rm <- na.rm
# Get data about variable's direction
# Temp only table argument is support
if(level == "distrital") {
table <- table %>%
filter(.data$Geographic_scale == "Distrital")
} else if (level == "provincial") {
table <- table %>%
filter(.data$Geographic_scale == "Distrital")
}
if(!is.null(table)) {
direct <- table %>%
filter(.data$Direction == "Direct") %>%
use_series(Codename)
inverse <- table %>%
filter(.data$Direction == "Inverse") %>%
use_series(Codename)
}
if (weighted & domains == 'default') {
n_domains <- length(table(table$Domain))
names_domains <- str_sub(str_to_lower(sort(unique(table$Domain))), 1, 6)
# names_domains <- paste0("rank_", names_domains)
# if (method == "percent_rank") {
# names_domains <- paste0("rank_", names_domains)
# } else if (method == "pca") {
# names_domains <- paste0("pca_", names_domains)
# }
vars_domains <- table %>%
group_nest(.data$Domain) %>%
use_series(data) %>%
map(~ .x$Codename)
vars_vulnerability <- table$Codename
}
data <- data %>%
select(starts_with("dep"),
starts_with("prov"),
starts_with("distr"),
{{ vars_vulnerability }})
# Function body
if (method == "percent_rank") {
names_domains <- paste0("rank_", names_domains)
if (weighted) {
df_rank <- data %>%
mutate(
across({{ direct }}, percent_rank, .names = "rank_{.col}"),
across({{ inverse }}, ~ percent_rank(desc(.)), .names = "rank_{.col}")
) %>%
rowwise()
for (i in 1:n_domains) {
name_domain_C <- names_domains[i]
var_domain <- vars_domains[[i]]
var_domain <- paste0("rank_", var_domain)
df_rank <- df_rank %>%
mutate(
{{ name_domain_C }} := sum(c_across({{ var_domain }}), na.rm = na.rm)
)
}
rank_vars_vulnerability <- paste0("rank_", vars_vulnerability)
df_rank <- df_rank %>%
ungroup() %>%
mutate(
across( {{names_domains}}, percent_rank)
) %>%
rowwise() %>%
mutate(
sumr = sum(c_across({{ names_domains }}), na.rm = na.rm)
) %>%
ungroup() %>%
mutate(
Rank_T = percent_rank(.data$sumr) # Ranking percentil global
) %>%
select(-c(
#{{ vars_vulnerability }},
{{ rank_vars_vulnerability }},
.data$sumr
)) %>%
relocate(
starts_with("dep"),
starts_with("prov"),
starts_with("distr"),
{{ names_domains }},
.data$Rank_T
)
} else {
df_rank <- data %>%
mutate(
across({{ direct }}, percent_rank),
across({{ inverse }}, percent_rank)
) %>%
rowwise() %>%
mutate(
sumr = sum(c_across(c({{ direct }}, {{ inverse }})), na.rm = na.rm)
) %>%
ungroup() %>% # desagrupar
mutate(
Rank_T = percent_rank(.data$sumr) # Ranking percentil global
) %>%
select(-c(.data$sumr))
# select(dep:distr, {{ direct }}, {{ inverse }}, Rank_T)
}
} else if (method == "pca") {
names_domains_data <- paste0("data_", names_domains)
names_domains_pca <- paste0("pca_", names_domains)
names_domains_aug <- paste0("fitted_", names_domains)
# names_domains_loadings <- paste0("tidy_", names_domains)
# names_domains_explained <- paste0("explained_", names_domains)
# names_domains_fit <- paste0("fit_", names_domains)
df_rank <- data %>%
nest(data = everything())
df_rank_list <- list()
df_rank_list_g <- list()
for (i in 1:n_domains) {
name_domain_data <- names_domains_data[i]
name_domain_C_pca <- names_domains_pca[i]
name_domain_aug <- names_domains_aug[i]
# name_domains_loadings <- names_domains_loadings[i]
# name_domains_explained <- names_domains_explained[i]
# name_domains_fit <- names_domains_fit[i]
var_domain <- vars_domains[[i]]
df_rank_list_g[[i]] <- df_rank %>%
mutate(
{{ name_domain_data }} := map(data,
~ .x %>%
select(starts_with("dep"),
starts_with("prov"),
starts_with("distr"),
{{ var_domain }}) %>%
drop_na()),
{{ name_domain_C_pca }} := map(eval(parse(text = name_domain_data)),
~ .x %>%
select({{ var_domain }}) %>%
mutate(
across(
.cols = c({{ var_domain }}),
.fns = ~ scale(.)[,1]
)
) %>%
principal(r = .,
nfactors = 1)),
{{ name_domain_aug }} := map2(eval(parse(text = name_domain_C_pca)),
eval(parse(text = name_domain_data)),
~ .y %>%
bind_cols(
.x$scores %>%
as_tibble()
) %>%
rename({{ name_domain_C_pca }} := .data$PC1)),
loadings = map(eval(parse(text = name_domain_C_pca)),
~ .x$loadings %>%
unclass() %>%
as_tibble(rownames = "Indicators") %>%
mutate(
h2 = .x$communality,
u2 = .x$uniquenesses,
com = .x$complexity
)),
explained = map(eval(parse(text = name_domain_C_pca)),
~ .x$Vaccounted %>%
as_tibble(rownames = "Variance")),
fit = map2(eval(parse(text = name_domain_C_pca)),
eval(parse(text = name_domain_data)),
~ .y %>%
select({{ var_domain }}) %>%
fit_measures(.x, .))
)
df_rank_list[[i]] <- df_rank_list_g[[i]] %>%
select({{ name_domain_aug }}) %>%
unnest(cols = {{ name_domain_aug }})
}
df_rank <- df_rank_list %>%
reduce(full_join) %>%
drop_na()
df_rank_g <- df_rank %>%
nest(data = everything()) %>%
mutate(
pca = map(data,
~ .x %>%
select({{ names_domains_pca }}) %>%
principal(r = .,
nfactors = 1)),
fitted = map2(.data$pca, .data$data,
~ .y %>%
bind_cols(
.x$scores %>%
as_tibble()
) %>%
rename(fit_g := .data$PC1)),
loadings = map(.data$pca,
~ .x$loadings %>%
unclass() %>%
as_tibble(rownames = "Indicators") %>%
mutate(
h2 = .x$communality,
u2 = .x$uniquenesses,
com = .x$complexity
)),
explained = map(.data$pca,
~ .x$Vaccounted %>%
as_tibble(rownames = "Variance")),
fit = map2(.data$pca, .data$data,
~ .y %>%
select({{ names_domains_pca }}) %>%
fit_measures(.x, .))
)
df_rank <- df_rank_g %>%
select(.data$fitted) %>%
unnest(cols = .data$fitted)
# Detect direction of domains
direction_loadings <- df_rank_g$loadings[[1]] %>%
mutate(
Direction = ifelse(.data$PC1 >= 0,
"Direct",
"Inverse")
)
direct_domains_loading <- direction_loadings %>%
filter(.data$Direction == "Direct") %>%
pull(.data$Indicators)
names_direct_domains_loading <- str_replace(direct_domains_loading,
"pca_", "rank_")
inverse_domains_loading <- direction_loadings %>%
filter(.data$Direction == "Inverse") %>%
pull(.data$Indicators)
names_inverse_domains_loading <- str_replace(inverse_domains_loading,
"pca_", "rank_")
#######################################################
names_domains_rank <- paste0("rank_", names_domains)
names_domains_pca_rank <- data.frame(string = c(names_domains_pca,
names_domains_rank)) %>%
separate(.data$string, c("type", "variable"),
remove = FALSE) %>%
arrange(.data$variable) %>%
pull(string)
df_rank <- df_rank %>%
mutate(
Rank_T = percent_rank(.data$fit_g),
across({{ direct_domains_loading }}, percent_rank,
.names = "{names_direct_domains_loading}"),
across({{ inverse_domains_loading }}, ~ percent_rank(desc(.)),
.names = "{names_inverse_domains_loading}"),
) %>%
relocate(starts_with("dep"),
starts_with("prov"),
starts_with("distr"),
{{ names_domains_pca_rank }},
.data$fit_g, .data$Rank_T)
}
if (ordered) {
df_rank <- df_rank %>%
arrange(desc(.data$Rank_T))
}
if (complete == FALSE) {
df_rank <- df_rank %>%
select(-c({{ vars_vulnerability }}))
}
if (method == "percent_rank") {
indexcalc <- new("indexcalc",
Options = indexoptions, # list
Data = df_rank # S4 class
)
} else if (method == "pca") {
Specificfit <- list()
for (i in 1:n_domains) {
names_domain <- names_domains[[i]]
Specificfit[[names_domain]] <- new("indexFit",
Fits = df_rank_list_g[[i]]$fit[[1]],
Explained = df_rank_list_g[[i]]$explained[[1]],
Loadings = df_rank_list_g[[i]]$loadings[[1]]
)
}
indexfits <- new("indexFits",
Specific = Specificfit,
Fits = df_rank_g$fit[[1]],
Explained = df_rank_g$explained[[1]],
Loadings = df_rank_g$loadings[[1]]
)
indexcalc <- new("indexcalc",
Options = indexoptions, # list
Data = df_rank, # S4 class
Fit = indexfits # S4 class
)
}
return(indexcalc)
}
fit_measures <- function(pca, data) {
bartlett <- data %>%
cor() %>%
psych::cortest.bartlett(., nrow(data))
bartlett <- tibble(
Chisq = bartlett$chisq,
df = bartlett$df,
p_value = bartlett$p.value
)
KMO <- data %>%
cor() %>%
psych::KMO()
KMO <- tibble(
Indicators = "Overall",
KMO = KMO$MSA
) %>%
bind_rows(
KMO$MSAi %>%
as_tibble(rownames = "Indicators") %>%
rename(KMO = .data$value)
)
Chi <- tibble(
Chisq = pca$chi,
p_value = pca$EPVAL
)
Fit <- list(
bartlett = bartlett,
KMO = KMO,
Chi = Chi,
RMSR = pca$rms,
Fit_off = pca$fit.off
)
return(Fit)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.