Nothing
#' Indicator Normalization
#'
#' Perform normalization based on indicators' polarity.
#'
#' @usage
#' normalize(inds, method = c("min-max", "goalpost"), ind.pol,
#' gp.range = c(70, 130), time = NULL, ref.time = NULL,
#' ref.value = NULL)
#'
#' @param inds a numeric vector, matrix, or data frame which provides indicators
#' to be normalized.
#' @param method normalization method to be used. See \sQuote{Details}.
#' @param ind.pol a character vector whose elements can be `"pos"` (positive) or
#' `"neg"` (negative), indicating the polarity of indicators. An indicator's
#' polarity is the sign of the relation between the indicator and the
#' phenomenon to be measured.
#' @param gp.range a vector of the form `c(a,b)` giving the normalization range
#' for method `"goalpost"`. The default value is `c(70,130)`.
#' @param time a vector of temporal factors for input indicators. The length of
#' `time` must equal the number of rows in `inds`. If `NULL`, the input data
#' will be treated as cross-sectional.
#' @param ref.time a value denoting the reference time for normalization. See
#' \sQuote{Details}.
#' @param ref.value a vector containing reference values for indicators to
#' facilitate the interpretation of results, required by method `"goalpost"`.
#' When normalizing each indicator, their reference values will be mapped to
#' the midpoint of `gp.range`. See \sQuote{Details}.
#'
#' @details
#' By default, each indicator \eqn{x} is normalized by method `"min-max"` with
#' the formulas
#' \deqn{\displaystyle\tilde{x}^{+}_i = \frac{x_i -
#' \mathrm{inf}_x}{\mathrm{sup}_x - \mathrm{inf}_x},}
#' or
#' \deqn{\displaystyle \tilde{x}^{-}_i = 1 - \tilde{x}^{+}_i,}
#' where \eqn{\mathrm{sup}_x} and \eqn{\mathrm{inf}_x} are respectively the
#' superior and inferior values of the indicator. The former formula is applied
#' to indicators with positive polarity while the latter one is used for those
#' with negative polarity.
#'
#' If either `time` or `ref.time` is `NULL`, the superior and inferior values
#' are respectively the maximum and minimum values of \eqn{x}. If both `time`
#' and `ref.time` are not `NULL`, the superior and inferior values are
#' respectively the maximum and minimum values of \eqn{x} observed at the
#' reference time. In other words, if `time` is not provided or provided without
#' specifying a value for `ref.time`, the input data will be treated as
#' cross-sectional.
#'
#' For method `"goalpost"`, a vector of reference values for indicators is
#' required. If not specified by users (`ref.value = NULL`), these values are
#' automatically set #' to the indicator means for cross-sectional data or to
#' the indicator means at the reference time for longitudinal data.
#'
#' Method `"goalpost"` computes two goalposts for normalization as
#' \eqn{\mathrm{gp\_min}_x = \mathrm{ref}_x - \Delta} and
#' \eqn{\mathrm{gp\_max}_x = \mathrm{ref}_x + \Delta}, where
#' \eqn{\mathrm{ref}_x} is the reference value of \eqn{x} and
#' \eqn{\Delta = (\mathrm{sup}_x - \mathrm{inf}_x)/2}. Indicators with
#' positive polarity are rescaled using the formula
#' \deqn{\displaystyle \tilde{x}^{+}_i =
#' \frac{x_i - \mathrm{gp\_min}_x}{\mathrm{gp\_max}_x -
#' \mathrm{gp\_min}_x} (b - a) + a,}
#' while indicators with negative polarity are rescaled using the formula
#' \deqn{\displaystyle \tilde{x}^{-}_i = a + b - \tilde{x}^{+}_i.}
#' If an indicator follows a symmetric probability distribution and its
#' reference value is set to the mean, the normalized values will theoretically
#' remain in the range \eqn{[a,b]}. In other cases, the normalized values may
#' extend beyond `gp.range`.
#'
#' @returns An object of class `"data.frame"` containing normalized indicators.
#'
#' @author
#' Viet Duong Nguyen, Chiara Gigliarano, Mariateresa Ciommi
#'
#' @references
#' Mazziotta, M. and Pareto, A. (2016). On a Generalized Non-compensatory
#' Composite Index for Measuring Socio-economic Phenomena. \emph{Social
#' Indicators Research}, \bold{127}, 983--1003.
#'
#' @seealso
#' [`giniCI`].
#'
#' @example ./inst/examples/normalize_eg.R
#'
#' @export
normalize <- function(inds, method = c("min-max", "goalpost"), ind.pol,
gp.range = c(70, 130), time = NULL, ref.time = NULL,
ref.value = NULL) {
name <- deparse(substitute(inds))
inds <- as.data.frame(inds)
n <- ncol(inds)
method <- match.arg(method)
if (n == 1) colnames(inds) <- name
if (all(sapply(inds, is.numeric)) == FALSE) {
stop("All indicators must be numeric.")
}
if (length(ind.pol) != n) {
stop("Length of polarity vector must equal the number of indicators.")
}
if (all(unique(ind.pol) %in% c("pos", "neg")) == FALSE) {
stop("Values of polarity vector must be either 'pos' or 'neg'.")
}
if (gp.range[1] >= gp.range[2]) {
stop("Invalid range, the first value must be smaller than the second value.")
}
if (any(gp.range < 0)) {
stop("Range must contain only non-negative values.")
}
if (!is.null(time) && length(time) != nrow(inds)) {
stop("Invalid length of temporal factor vector!")
}
if (!is.null(ref.time) && is.null(time)) {
stop("Vector of temporal factors is missing.")
}
if (!is.null(ref.time)) {
if (!ref.time %in% unique(time)) stop("Invalid reference time!")
}
if (!is.null(time) && is.null(ref.time)) {
warning("Reference time is missing. Input dataset is treated as cross-sectional.",
call. = FALSE
)
}
inds.norm <- list()
if (is.null(time) || is.null(ref.time)) {
inf <- apply(inds, 2, function(x) min(x, na.rm = TRUE))
sup <- apply(inds, 2, function(x) max(x, na.rm = TRUE))
} else {
inf <- apply(inds[time == ref.time, ], 2, function(x) min(x, na.rm = TRUE))
sup <- apply(inds[time == ref.time, ], 2, function(x) max(x, na.rm = TRUE))
}
for (i in 1:n) {
if (sup[i] == inf[i]) {
stop(paste("Column", i, "is constant (or constant at the reference time). Cannot perform normalization!"))
}
}
if (method == "min-max") {
for (i in 1:n) {
ind <- (inds[, i] - inf[i]) / (sup[i] - inf[i])
if (ind.pol[i] == "neg") ind <- 1 - ind
inds.norm[[i]] <- ind
}
} else {
if (is.null(ref.value)) {
if (is.null(ref.time)) {
ref.value <- colMeans(inds, na.rm = TRUE)
} else {
ref.value <- colMeans(inds[time == ref.time, ], na.rm = TRUE)
}
} else {
if (length(ref.value) != n) {
stop("Length of reference value vector must equal the number of indicators.")
}
}
delta <- (sup - inf) / 2
gp.min <- ref.value - delta
gp.max <- ref.value + delta
for (i in 1:n) {
ind <- (inds[, i] - gp.min[i]) / (gp.max[i] - gp.min[i]) *
(gp.range[2] - gp.range[1]) + gp.range[1]
if (ind.pol[i] == "neg") ind <- gp.range[1] + gp.range[2] - ind
inds.norm[[i]] <- ind
}
}
inds.norm <- do.call(cbind.data.frame, inds.norm)
colnames(inds.norm) <- colnames(inds)
rownames(inds.norm) <- rownames(inds)
if (!all(complete.cases(inds.norm))) {
n.missing <- sum(is.na(inds.norm))
warning(paste(n.missing, "NA value(s) generated."),
call. = FALSE
)
}
if (sum(inds.norm < 0, na.rm = TRUE) > 0) {
n.cell.neg <- sum(inds.norm < 0)
col.neg <- apply(inds.norm, 2, function(x) all(x >= 0, na.rm = TRUE))
col.neg <- which(col.neg == FALSE)
warning(
paste(
n.cell.neg, "negative value(s) generated in columns(s):",
noquote(paste(col.neg, collapse = ", "))
),
call. = FALSE
)
}
return(inds.norm)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.