Nothing
#' Linearization of at-risk-of-poverty rate
#'
#' @description Estimates the at-risk-of-poverty rate (defined as the proportion of persons with equalized disposable income below at-risk-of-poverty threshold) and computes linearized variable for variance estimation.
#'
#' @param Y Study variable (for example equalized disposable income). One dimensional object convertible to one-column \code{data.table} or variable name as character, column number).
#' @param id Optional variable for unit ID codes. One dimensional object convertible to one-column \code{data.table} or variable name as character, column number or logical vector).
#' @param weight Optional weight variable. One dimensional object convertible to one-column \code{data.table} or variable name as character, column number or logical vector).
#' @param Y_thres Variable (for example equalized disposable income) used for computation and linearization of poverty threshold. One dimensional object convertible to one-column \code{data.table} or variable name as character, column number. Variable specified for \code{inc} is used as \code{income_thres} if \code{income_thres} is not defined.
#' @param wght_thres Weight variable used for computation and linearization of poverty threshold. One dimensional object convertible to one-column \code{data.table} or variable name as character, column number or logical vector. Variable specified for \code{weight} is used as \code{wght_thres} if \code{wght_thres} is not defined.
#' @param sort Optional variable to be used as tie-breaker for sorting. One dimensional object convertible to one-column \code{data.table} or variable name as character, column number.
#' @param Dom Optional variables used to define population domains. If supplied, linearization of at-risk-of-poverty threshold is done for each domain. An object convertible to \code{data.table} or variable names as character vector, column numbers as numeric vector.
#' @param period Optional variable for survey period. If supplied, linearization of at-risk-of-poverty threshold is done for each survey period. Object convertible to \code{data.table} or variable names as character, column numbers as numeric vector.
#' @param dataset Optional survey data object convertible to \code{data.table}.
#' @param percentage A numeric value in range \eqn{\left[ 0,100 \right]}{[0,100]} for \eqn{p} in the formula for at-risk-of-poverty threshold computation:
#' \deqn{\frac{p}{100} \cdot Z_{\frac{\alpha}{100}}.}{p/100 * Z(\alpha/100).}
#' For example, to compute at-risk-of-poverty threshold equal to 60\% of some income quantile, \eqn{p} #'should be set equal to 60.
#' @param order_quant A numeric value in range \eqn{\left[ 0,100 \right]}{[0,100]} for \eqn{\alpha} in the formula #'for at-risk-of-poverty threshold computation:
#' \deqn{\frac{p}{100} \cdot Z_{\frac{\alpha}{100}}.}{p/100 * Z(\alpha/100).}
#' For example, to compute at-risk-of-poverty threshold equal to some percentage of median income, \eqn{\alpha} should be set equal to 50.
#' @param var_name A character specifying the name of the linearized variable.
#' @param checking Optional variable if this variable is TRUE, then function checks data preparation errors, otherwise not checked. This variable by default is TRUE.
#'
#' @details The implementation strictly follows the Eurostat definition.
#'
#' @return A list with four objects are returned:
#' \itemize{
#' \item \code{quantile} - a \code{data.table} containing the estimated value of the quantile used for at-risk-of-poverty threshold estimation.
#' \item \code{threshold} - a \code{data.table} containing the estimated at-risk-of-poverty threshold.
#' \item \code{value} - a \code{data.table} containing the estimated at-risk-of-poverty rate (in percentage).
#' \item \code{lin} - a \code{data.table} containing the linearized variables of the at-risk-of-poverty rate (in percentage).
#' }
#'
#' @references
#' Working group on Statistics on Income and Living Conditions (2004) Common cross-sectional EU indicators based on EU-SILC; the gender pay gap. \emph{EU-SILC 131-rev/04}, Eurostat. \cr
#' Guillaume Osier (2009). Variance estimation for complex indicators of poverty and inequality. \emph{Journal of the European Survey Research Association}, Vol.3, No.3, pp. 167-195, ISSN 1864-3361, URL \url{https://ojs.ub.uni-konstanz.de/srm/article/view/369}. \cr
#' Jean-Claude Deville (1999). Variance estimation for complex statistics and estimators: linearization and residual techniques. Survey Methodology, 25, 193-203, URL \url{https://www150.statcan.gc.ca/n1/pub/12-001-x/1999002/article/4882-eng.pdf}. \cr
#'
#'
#' @seealso \code{\link{linarpt}},
#' \code{\link{varpoord}},
#' \code{\link{vardcrospoor}},
#' \code{\link{vardchangespoor}}
#'
#' @keywords Linearization
#'
#' @examples
#' library("data.table")
#' library("laeken")
#' data("eusilc")
#' dataset1 <- data.table(IDd = paste0("V", 1 : nrow(eusilc)), eusilc)
#'
#' # Full population
#' d <- linarpr(Y = "eqIncome", id = "IDd",
#' weight = "rb050", Dom = NULL,
#' dataset = dataset1, percentage = 60,
#' order_quant = 50L)
#' d$value
#'
#' \dontrun{
#' # By domains
#' dd <- linarpr(Y = "eqIncome", id = "IDd",
#' weight = "rb050", Dom = "db040",
#' dataset = dataset1, percentage = 60,
#' order_quant = 50L)
#' dd}
#'
#' @import data.table
#' @import laeken
#'
#' @export linarpr
linarpr <- function(Y, id = NULL, weight = NULL, Y_thres = NULL,
wght_thres = NULL, sort = NULL, Dom = NULL,
period = NULL, dataset = NULL, percentage = 60,
order_quant = 50, var_name = "lin_arpr",
checking = TRUE) {
## initializations
if (min(dim(data.table(var_name)) == 1) != 1) {
stop("'var_name' must have defined one name of the linearized variable")}
if (checking) {
percentage <- check_var(vars = percentage, varn = "percentage",
varntype = "numeric0100")
order_quant <- check_var(vars = order_quant, varn = "order_quant",
varntype = "numeric0100")
Y <- check_var(vars = Y, varn = "Y", dataset = dataset,
ncols = 1, isnumeric = TRUE,
isvector = TRUE, grepls = "__")
Ynrow <- length(Y)
Y_thres <- check_var(vars = Y_thres, varn = "Y_thres",
dataset = dataset, ncols = 1,
Ynrow = Ynrow, mustbedefined = FALSE,
isnumeric = TRUE, isvector = TRUE)
weight <- check_var(vars = weight, varn = "weight",
dataset = dataset, ncols = 1,
Ynrow = Ynrow, isnumeric = TRUE,
isvector = TRUE)
wght_thres <- check_var(vars = wght_thres, varn = "wght_thres",
dataset = dataset, ncols = 1,
Ynrow = Ynrow, mustbedefined = FALSE,
isnumeric = TRUE, isvector = TRUE)
sort <- check_var(vars = sort, varn = "sort",
dataset = dataset, ncols = 1,
Ynrow = Ynrow, mustbedefined = FALSE,
isnumeric = TRUE, isvector = TRUE)
period <- check_var(vars = period, varn = "period",
dataset = dataset, Ynrow = Ynrow,
ischaracter = TRUE, mustbedefined = FALSE,
duplicatednames = TRUE)
Dom <- check_var(vars = Dom, varn = "Dom", dataset = dataset,
Ynrow = Ynrow, ischaracter = TRUE,
mustbedefined = FALSE, duplicatednames = TRUE,
grepls = "__")
id <- check_var(vars = id, varn = "id", dataset = dataset,
ncols = 1, Ynrow = Ynrow, ischaracter = TRUE,
periods = period)
}
dataset <- NULL
if (is.null(Y_thres)) Y_thres <- Y
if (is.null(wght_thres)) wght_thres <- weight
## computations
ind0 <- rep.int(1, length(Y))
period_agg <- period1 <- NULL
if (!is.null(period)) { period1 <- copy(period)
period_agg <- data.table(unique(period))
} else period1 <- data.table(ind = ind0)
period1_agg <- data.table(unique(period1))
# ARPR by domain (if requested)
quantile <- incPercentile(Y = Y_thres,
weights = wght_thres,
sort = sort, Dom = NULL,
period = period,
k = order_quant,
dataset = NULL,
checking = FALSE)
setnames(quantile, names(quantile)[ncol(quantile)], "quantile")
if (ncol(quantile) > 1) setkeyv(quantile, head(names(quantile), -1))
threshold <- copy(quantile)
threshold[, threshold := percentage / 100 * quantile]
threshold[, quantile := NULL]
arpr_id <- id
if (!is.null(period)) arpr_id <- data.table(arpr_id, period)
if (!is.null(Dom)) {
Dom_agg <- data.table(unique(Dom))
setkeyv(Dom_agg, names(Dom_agg))
arpr_v <- c()
arpr_m <- copy(arpr_id)
for(i in 1 : nrow(Dom_agg)) {
g <- c(var_name, paste(names(Dom), as.matrix(Dom_agg[i,]), sep = "."))
var_nams <- do.call(paste, as.list(c(g, sep = "__")))
ind <- as.integer(rowSums(Dom == Dom_agg[i,][ind0,]) == ncol(Dom))
arprl <- lapply(1 : nrow(period1_agg), function(j) {
if (!is.null(period)) {
rown <- cbind(period_agg[j], Dom_agg[i])
setkeyv(rown, names(rown))
rown2 <- copy(rown)
rown <- merge(rown, quantile, all.x = TRUE)
} else {rown <- quantile
rown2 <- Dom_agg[i] }
indj <- (rowSums(period1 == period1_agg[j,][ind0,]) == ncol(period1))
arpr_l <- arprlinCalc(Y1 = Y[indj],
ids = arpr_id[indj],
wght1 = weight[indj],
indicator = ind[indj],
Y_thresh = Y_thres[indj],
wght_thresh = wght_thres[indj],
percent = percentage,
order_quants = order_quant,
quant_val = rown[["quantile"]])
list(arpr = data.table(rown2, arpr = arpr_l$rate_val_pr), lin = arpr_l$lin)
})
arprs <- rbindlist(lapply(arprl, function(x) x[[1]]))
arprlin <- rbindlist(lapply(arprl, function(x) x[[2]]))
setnames(arprlin, names(arprlin), c(names(arpr_id), var_nams))
arpr_m <- merge(arpr_m, arprlin, all.x = TRUE, by = names(arpr_id))
arpr_v <- rbind(arpr_v, arprs)
}
} else { arprl <- lapply(1:nrow(period1_agg), function(j) {
if (!is.null(period)) {
rown <- period_agg[j]
rown <- merge(rown, quantile, all.x = TRUE,
by = names(rown))
} else rown <- quantile
ind2 <- (rowSums(period1 == period1_agg[j,][ind0,]) == ncol(period1))
arpr_l <- arprlinCalc(Y1 = Y[ind2],
ids = arpr_id[ind2],
wght1 = weight[ind2],
indicator = ind0[ind2],
Y_thresh = Y_thres[ind2],
wght_thresh = wght_thres[ind2],
percent = percentage,
order_quants = order_quant,
quant_val = rown[["quantile"]])
if (!is.null(period)) {
arprs <- data.table(period_agg[j], arpr = arpr_l$rate_val_pr)
} else arprs <- data.table(arpr = arpr_l$rate_val_pr)
list(arpr = arprs, lin = arpr_l$lin)
})
arpr_v <- rbindlist(lapply(arprl, function(x) x[[1]]))
arpr_m <- rbindlist(lapply(arprl, function(x) x[[2]]))
setnames(arpr_m, names(arpr_m), c(names(arpr_id), var_name))
}
arpr_m[is.na(arpr_m)] <- 0
setkeyv(arpr_m, names(arpr_id))
return(list(quantile = quantile, threshold = threshold, value = arpr_v, lin = arpr_m))
}
## workhorse
arprlinCalc <- function(Y1, ids, wght1, indicator,
Y_thresh, wght_thresh, percent,
order_quants = NULL, quant_val) {
N <- dat <- eqIncome1 <- NULL
#---- 1. Linearization of the poverty threshold ----
arpt_calcs <- arptlinCalc(inco = Y_thresh, ids = ids,
wght = wght_thresh,
indicator = rep(1, length(ids)),
order_quants = order_quants,
quant_val = quant_val,
percentag = percent)
lin_thres <- arpt_calcs[[names(arpt_calcs)[2]]]
thres_val <- percent / 100 * quant_val
wt <- indicator * wght1
N <- sum(wt) # Estimated (sub)population size
poor <- (Y1 <= thres_val)
rate_val <- sum(wt * poor) / N # Estimated poverty rate */
rate_val_pr <- 100 * rate_val
#---- 2. Linearization of the poverty rate -----
h <- bandwith_plug(y = Y1, w = wt)
f_quant2 <- gaussian_kern(inco = Y1, wt = wt,
quant_val = quant_val, hh = h)
#****************************************************************************************
# LINEARIZED VARIABLE OF THE POVERTY RATE (IN %) *
#****************************************************************************************
lin <- 100 * ((1 / N) * indicator * ((Y1 <= thres_val) - rate_val) + f_quant2 * lin_thres)
lin_id <- data.table(ids, lin)
return(list(rate_val = rate_val, rate_val_pr = rate_val_pr, lin = lin_id))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.