Nothing
#' Proxy Indicator Diagnostic Tool
#'
#' @description
#' `senser()` is a statistical diagnostic function designed to evaluate
#' whether one or more proxy indicators are suitable representations of an
#' underlying construct that cannot be directly observed or measured.
#'
#' The function assesses each proxy based on multiple statistical dimensions:
#' monotonicity, information content, stability, distributional alignment,
#' bias risk, and dynamic range (sensitivity).
#'
#' The output is returned as a structured data.frame containing quantitative
#' scores and qualitative classifications suitable for applied research and
#' policy diagnostics.
#'
#' @param data
#' A data.frame containing the target construct and proxy variables.
#'
#' @param proxy
#' Character vector specifying one or more proxy variable names contained
#' in `data`.
#'
#' @param target
#' Character string specifying the target construct variable name contained
#' in `data`.
#'
#' @param lang
#' Language for status labels and interpretation text.
#' Must be either `"english"` or `"indonesia"`.
#'
#' @param stagnation_cut
#' Threshold used to detect stagnation (very small average absolute change).
#' Default is `0.01`.
#'
#' @param cv_cut
#' Threshold for the coefficient of variation (CV).
#' Default is `0.02`.
#'
#' @param ceiling_cut
#' Threshold used to detect ceiling effects.
#' Default is `0.95`.
#'
#' @details
#' The diagnostic score for each proxy is computed using six normalized components:
#'
#' \itemize{
#' \item \strong{monotonicity}: Spearman rank correlation.
#' \item \strong{information_content}: R-squared from linear regression.
#' \item \strong{elasticity_score}: Responsiveness index.
#' \item \strong{variability_score}: Based on coefficient of variation.
#' \item \strong{stagnation_score}: Penalizes near-flat dynamics.
#' \item \strong{ceiling_score}: Penalizes ceiling effects.
#' \item \strong{stability_score}: Coefficient stability across subsamples.
#' }
#'
#' The final proxy score is computed as the median of all components.
#'
#' Classification categories:
#'
#' \itemize{
#' \item Score >= 0.70: Suitable proxy
#' \item 0.40 <= Score < 0.70: Conditionally suitable
#' \item Score < 0.40: Not suitable proxy
#' }
#'
#' @return
#' A data.frame with one row per proxy variable containing:
#'
#' \describe{
#' \item{target}{Target construct name.}
#' \item{proxy}{Proxy variable name.}
#' \item{monotonicity}{Spearman correlation (absolute).}
#' \item{information_content}{R-squared value.}
#' \item{elasticity}{Elasticity index.}
#' \item{cv}{Coefficient of variation.}
#' \item{avg_change}{Average absolute change.}
#' \item{ceiling_ratio}{Mean-to-maximum ratio.}
#' \item{stability_score}{Coefficient stability index.}
#' \item{final_score}{Median diagnostic score.}
#' \item{classification}{Qualitative proxy category.}
#' \item{interpretation}{Plain-language interpretation.}
#' }
#'
#' @examples
#' set.seed(123)
#' df <- data.frame(
#' gdp = rnorm(100, 10, 2),
#' ntl = rnorm(100, 50, 10),
#' road_density = rnorm(100, 3, 0.5)
#' )
#'
#' senser(
#' data = df,
#' proxy = c("ntl", "road_density"),
#' target = "gdp",
#' lang = "english"
#' )
#'
#' @references
#' Spearman, C. (1904). The proof and measurement of association between two things.
#' \emph{American Journal of Psychology}, 15(1), 72–101.
#'
#' Chow, G. C. (1960). Tests of equality between sets of coefficients
#' in two linear regressions. \emph{Econometrica}.
#'
#' OECD (2008). Handbook on Constructing Composite Indicators:
#' Methodology and User Guide.
#'
#' @author
#' Joko Nursiyono
#'
#' @seealso
#' \code{\link{lm}}, \code{\link{cor}}, \code{\link{median}}
#'
#' @importFrom stats cor coef lm sd median complete.cases
#' @export
senser <- function(data,
proxy,
target,
lang = c("english", "indonesia"),
stagnation_cut = 0.01,
cv_cut = 0.02,
ceiling_cut = 0.95) {
lang <- match.arg(lang)
if (!is.data.frame(data))
stop("data must be a data.frame")
if (!all(proxy %in% names(data)))
stop("proxy not found in data")
if (!target %in% names(data))
stop("target not found in data")
Y <- data[[target]]
if (!is.numeric(Y))
stop("target must be numeric")
results <- lapply(proxy, function(p) {
X <- data[[p]]
if (!is.numeric(X)) return(NULL)
idx <- complete.cases(X, Y)
X <- X[idx]
Yc <- Y[idx]
if (length(X) < 10) return(NULL)
mono <- abs(cor(X, Yc, method = "spearman"))
info <- cor(X, Yc)^2
elast <- abs(coef(lm(Yc ~ X))[2]) * sd(X)/sd(Yc)
elast_score <- ifelse(elast < 0.1, 0.3, 1)
cv <- sd(X)/mean(X)
cv_score <- ifelse(cv < cv_cut, 0.3, 1)
avg_change <- mean(abs(diff(X)), na.rm = TRUE)
stagnation_score <- ifelse(avg_change < stagnation_cut, 0.2, 1)
ceiling_ratio <- mean(X)/max(X)
ceiling_score <- ifelse(ceiling_ratio > ceiling_cut, 0.3, 1)
half <- floor(length(X)/2)
b_full <- coef(lm(Yc ~ X))[2]
b_sub1 <- coef(lm(Yc[1:half] ~ X[1:half]))[2]
b_sub2 <- coef(lm(Yc[(half+1):length(X)] ~ X[(half+1):length(X)]))[2]
sens <- 1 - sd(c(b_full, b_sub1, b_sub2)) /
max(abs(b_full), 1e-8)
sens <- max(min(sens, 1), 0)
components <- c(mono, info, elast_score,
cv_score, stagnation_score,
ceiling_score, sens)
score <- median(components)
if (score >= 0.7) {
classif <- if (lang == "english")
"Suitable proxy" else "Proxy layak"
} else if (score >= 0.4) {
classif <- if (lang == "english")
"Conditionally suitable" else "Layak bersyarat"
} else {
classif <- if (lang == "english")
"Not suitable proxy" else "Proxy tidak layak"
}
interp <- if (lang == "english") {
if (score >= 0.7)
"Proxy demonstrates adequate variability and statistical stability."
else if (score >= 0.4)
"Proxy partially represents the construct with limitations."
else
"Proxy lacks sufficient statistical reliability."
} else {
if (score >= 0.7)
"Proxy memiliki variabilitas dan stabilitas yang memadai."
else if (score >= 0.4)
"Proxy menangkap sebagian konstruk dengan keterbatasan."
else
"Proxy tidak cukup andal secara statistik."
}
data.frame(
target = target,
proxy = p,
monotonicity = mono,
information_content = info,
elasticity = elast,
cv = cv,
avg_change = avg_change,
ceiling_ratio = ceiling_ratio,
stability_score = sens,
final_score = score,
classification = classif,
interpretation = interp,
stringsAsFactors = FALSE
)
})
do.call(rbind, results)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.