#' Watts poverty index decomposition
#'
#' Estimate the Watts (1968) poverty measure and its components
#'
#' @param formula a formula specifying the income variable
#' @param design a design object of class \code{survey.design} or class \code{svyrep.design} from the \code{survey} library.
#' @param type_thresh type of poverty threshold. If "abs" the threshold is fixed and given the value
#' of abs_thresh; if "relq" it is given by percent times the quantile; if "relm" it is percent times the mean.
#' @param abs_thresh poverty threshold value if type_thresh is "abs"
#' @param percent the multiple of the the quantile or mean used in the poverty threshold definition
#' @param quantiles the quantile used used in the poverty threshold definition
#' @param thresh return the poverty threshold value
#' @param na.rm Should cases with missing values be dropped?
#' @param return.replicates Return the replicate estimates?
#' @param ... additional arguments. Currently not used.
#'
#'
#' @details you must run the \code{convey_prep} function on your survey design object immediately after creating it with the \code{svydesign} or \code{svrepdesign} function.
#'
#' For the \code{svywatts} and \code{svywattsdec} functions, zeroes and negative numbers in the analysis domain cause an error because of the logarithm function in the definition of this poverty measure. However, zeroes and negative values in the full survey design that are outside of the domain of analysis are valid to calculate the poverty threshold because zeroes and negatives are not a problem for computing quantiles (used when \code{type_thresh = "relq"}) or means (used when \code{type_thresh = "relm"}) . Missing values are treated differently. \code{NA} values anywhere in the full survey design (not only the subset, or the domain of analysis) will cause these quantiles and means to return \code{NA} results. To ignore \code{NA} values throughout, set \code{na.rm = TRUE}.
#'
#' @return Object of class "\code{cvydstat}", with estimates for the Watts index, FGT(0), Watts Poverty Gap Ratio, and Theil(poor incomes) with a "\code{var}" attribute giving the variance-covariance matrix.
#' A "\code{statistic}" attribute giving the name of the statistic.
#'
#' @author Guilherme Jacob, Djalma Pessoa, and Anthony Damico
#'
#' @seealso \code{\link{svywatts},\link{svyfgt},\link{svyfgt}}
#'
#' @references McKinley L. Blackburn (1989). Poverty measurement: an index related to a Theil measure of inequality.
#' \emph{Journal of Business & Economic Statistics}, Vol.7, No.4, pp. 475-481,
#' DOI \doi{10.1080/07350015.1989.10509760}.
#'
#' Satya R. Chakravarty, Joseph Deutsch and Jacques Silber (2008).
#' On the Watts multidimensional poverty index and its decomposition.
#' \emph{World Development}, Vol.36, No.6, pp.1067-1077.
#'
#' Harold W. Watts (1968). An economic definition of poverty.
#' \emph{Institute For Research on Poverty Discussion Papers}, n.5.
#' University of Wisconsin. URL \url{https://www.irp.wisc.edu/publications/dps/pdfs/dp568.pdf}.
#'
#' Guillaume Osier (2009). Variance estimation for complex indicators
#' of poverty and inequality. \emph{Journal of the European Survey Research
#' Association}, Vol.3, No.3, pp. 167-195,
#' ISSN 1864-3361, URL \url{https://ojs.ub.uni-konstanz.de/srm/article/view/369}.
#'
#' Jean-Claude Deville (1999). Variance estimation for complex statistics and estimators:
#' linearization and residual techniques. Survey Methodology, 25, 193-203,
#' URL \url{https://www150.statcan.gc.ca/n1/en/catalogue/12-001-X19990024882}.
#'
#' @keywords survey
#'
#' @examples
#' library(survey)
#' library(laeken)
#' data(eusilc) ; names( eusilc ) <- tolower( names( eusilc ) )
#'
#' # linearized design
#' des_eusilc <- svydesign( ids = ~rb030 , strata = ~db040 , weights = ~rb050 , data = eusilc )
#' des_eusilc <- convey_prep( des_eusilc )
#'
#' # replicate-weighted design
#' des_eusilc_rep <- as.svrepdesign( des_eusilc , type = "bootstrap" )
#' des_eusilc_rep <- convey_prep( des_eusilc_rep )
#'
#' # filter positive incomes
#' des_eusilc <- subset( des_eusilc , eqincome > 0 )
#' des_eusilc_rep <- subset( des_eusilc_rep , eqincome > 0 )
#'
#' # absolute poverty threshold
#' svywattsdec(~eqincome, des_eusilc, abs_thresh=10000)
#' # poverty threshold equal to arpt
#' svywattsdec(~eqincome, des_eusilc, type_thresh= "relq" , thresh = TRUE)
#' # poverty threshold equal to 0.6 times the mean
#' svywattsdec(~eqincome, des_eusilc, type_thresh= "relm" , thresh = TRUE)
#'
#' # using svrep.design:
#' # absolute poverty threshold
#' svywattsdec(~eqincome, des_eusilc_rep, abs_thresh=10000)
#' # poverty threshold equal to arpt
#' svywattsdec(~eqincome, des_eusilc_rep, type_thresh= "relq" , thresh = TRUE)
#' # poverty threshold equal to 0.6 times the mean
#' svywattsdec(~eqincome, des_eusilc_rep, type_thresh= "relm" , thresh = TRUE)
#'
#' \dontrun{
#'
#' # database-backed design
#' library(RSQLite)
#' library(DBI)
#' dbfile <- tempfile()
#' conn <- dbConnect( RSQLite::SQLite() , dbfile )
#' dbWriteTable( conn , 'eusilc' , eusilc )
#'
#' dbd_eusilc <-
#' svydesign(
#' ids = ~rb030 ,
#' strata = ~db040 ,
#' weights = ~rb050 ,
#' data="eusilc",
#' dbname=dbfile,
#' dbtype="SQLite"
#' )
#'
#' dbd_eusilc <- convey_prep( dbd_eusilc )
#' dbd_eusilc <- subset( dbd_eusilc , eqincome > 0 )
#'
#' # absolute poverty threshold
#' svywattsdec(~eqincome, dbd_eusilc, abs_thresh=10000)
#' # poverty threshold equal to arpt
#' svywattsdec(~eqincome, dbd_eusilc, type_thresh= "relq" , thresh = TRUE)
#' # poverty threshold equal to 0.6 times the mean
#' svywattsdec(~eqincome, dbd_eusilc, type_thresh= "relm" , thresh = TRUE)
#'
#' dbRemoveTable( conn , 'eusilc' )
#'
#' dbDisconnect( conn , shutdown = TRUE )
#'
#' }
#'
#' @export
svywattsdec <-
function(formula, design, ...) {
if ('type_thresh' %in% names(list(...)) &&
!(list(...)[["type_thresh"]] %in% c('relq' , 'abs' , 'relm')))
stop('type_thresh= must be "relq" "relm" or "abs". see ?svywattsdec for more detail.')
if (length(attr(terms.formula(formula) , "term.labels")) > 1)
stop(
"convey package functions currently only support one variable in the `formula=` argument"
)
UseMethod("svywattsdec", design)
}
#' @rdname svywattsdec
#' @export
svywattsdec.survey.design <-
function(formula,
design,
type_thresh = "abs",
abs_thresh = NULL,
percent = .60,
quantiles = .50,
na.rm = FALSE,
thresh = FALSE ,
...) {
# check for convey_prep
if (is.null(attr(design, "full_design")))
stop(
"you must run the ?convey_prep function on your linearized survey design object immediately after creating it with the svydesign() function."
)
# check for threshold type
if (type_thresh == "abs" &
is.null(abs_thresh))
stop("abs_thresh= must be specified when type_thresh='abs'")
# if the class of the full_design attribute is just a TRUE, then the design is
# already the full design. otherwise, pull the full_design from that attribute.
if ("logical" %in% class(attr(design, "full_design")))
full_design <-
design
else
full_design <- attr(design, "full_design")
# domain
if (is.null(names(design$prob)))
ind <-
as.character(seq(length(design$prob)))
else
ind <- names(design$prob)
# if the class of the full_design attribute is just a TRUE, then the design is
# already the full design. otherwise, pull the full_design from that attribute.
if ("logical" %in% class(attr(design, "full_design")))
full_design <-
design
else
full_design <- attr(design, "full_design")
# compute poverty measures
watts <-
svywatts(
formula = formula ,
design = design ,
type_thresh = type_thresh ,
percent = percent ,
quantiles = quantiles ,
abs_thresh = abs_thresh ,
na.rm = na.rm ,
thresh = thresh ,
linearized = TRUE
)
fgt0 <-
svyfgt(
formula = formula ,
design = design ,
g = 0 ,
type_thresh = type_thresh ,
percent = percent ,
quantiles = quantiles ,
abs_thresh = abs_thresh ,
na.rm = na.rm ,
thresh = thresh ,
linearized = TRUE
)
fgt1 <-
svyfgt(
formula = formula ,
design = design ,
g = 1 ,
type_thresh = type_thresh ,
percent = percent ,
quantiles = quantiles ,
abs_thresh = abs_thresh ,
na.rm = na.rm ,
thresh = thresh ,
linearized = TRUE
)
# Watts Poverty Gap Ratio
fgt0 <-
list(value = coef(fgt0)[[1]] ,
lin = attr(fgt0 , "lin"))
fgt1 <-
list(value = coef(fgt1)[[1]] ,
lin = attr(fgt1 , "lin"))
W_pgr <-
convey::contrastinf(quote(log(fgt0 / (fgt0 - fgt1))) , list(fgt0 = fgt0 , fgt1 = fgt1))
# Theil inequality index of incomes among the poor
# by residual
watts <-
list(value = coef(watts)[[1]] ,
lin = attr(watts , "lin"))
L_poor <-
convey::contrastinf(quote(watts / fgt0 - W_pgr) ,
list(
watts = watts ,
fgt0 = fgt0 ,
W_pgr = W_pgr
))
L_poor$lin <- as.numeric(L_poor$lin)
names(L_poor$lin) <- rownames(fgt0$lin)
# create linearization matrix
lin.matrix <-
do.call(cbind , lapply(list(watts , fgt0 , W_pgr , L_poor) , `[[` , "lin"))
rownames(lin.matrix) <- rownames(fgt0$lin)
colnames(lin.matrix) <-
c("watts", "fgt0", "watts pov. gap ratio" , "theil(poor)")
# compile
estimates <-
c(watts$value, fgt0$value, W_pgr$value , L_poor$value)
names(estimates) <-
c("watts", "fgt0", "watts pov. gap ratio" , "theil(poor)")
# collect full weights
wf <- 1 / full_design$prob
# # ensure length
# if (nrow(lin.matrix) != length(full_design$prob)) {
# tmplin <-
# matrix(0 ,
# nrow = nrow(full_design$variables) ,
# ncol = ncol(lin.matrix))
# tmplin[wf > 0 ,] <- lin.matrix
# lin.matrix <- tmplin
# rm(tmplin)
# colnames(lin.matrix) <-
# c("watts", "fgt0", "watts pov. gap ratio" , "theil(poor)")
# }
# compute variance
variance <-
survey::svyrecvar(
sweep(lin.matrix , 1 , wf , "*") ,
full_design$cluster,
full_design$strata,
full_design$fpc,
postStrata = full_design$postStrata
)
# fix nan
estimates[is.nan(estimates)] <- NA
variance[which(is.nan(variance))] <- NA
# build result object
rval <- c(estimates)
attr(rval, "var") <- variance
attr(rval, "statistic") <- "watts decomposition"
class(rval) <- c("cvystat" , "svystat")
# attr( rval , "index" ) <- as.numeric( rownames( lin.matrix ) )[ wf > 0 ]
rval
}
#' @rdname svywattsdec
#' @export
svywattsdec.svyrep.design <-
function(formula,
design,
type_thresh = "abs",
abs_thresh = NULL,
percent = .60,
quantiles = .50,
na.rm = FALSE,
thresh = FALSE,
return.replicates = FALSE ,
...) {
# check for convey_prep
if (is.null(attr(design, "full_design")))
stop(
"you must run the ?convey_prep function on your linearized survey design object immediately after creating it with the svydesign() function."
)
# check for threshold type
if (type_thresh == "abs" &
is.null(abs_thresh))
stop("abs_thresh= must be specified when type_thresh='abs'")
# svyrep design ComputeIndex functions
ComputeWatts <-
function(y , w , thresh) {
y <- y[w > 0]
w <- w[w > 0]
N <- sum(w)
h <-
function(y , thresh)
ifelse(y != 0 , ifelse(y <= thresh , log(thresh / y) , 0) , 0)
sum(w * h(y , thresh)) / N
}
ComputeFGT <-
function(y , w , g , thresh) {
y <- y[w > 0]
w <- w[w > 0]
N <- sum(w)
h <-
function(y, thresh, g)
(((thresh - y) / thresh) ^ g) * (y <= thresh)
sum(w * h(y , thresh , g)) / N
}
# if the class of the full_design attribute is just a TRUE, then the design is
# already the full design. otherwise, pull the full_design from that attribute.
if ("logical" %in% class(attr(design, "full_design")))
full_design <-
design
else
full_design <- attr(design, "full_design")
# if the class of the full_design attribute is just a TRUE, then the design is
# already the full design. otherwise, pull the full_design from that attribute.
if ("logical" %in% class(attr(design, "full_design")))
full_design <-
design
else
full_design <- attr(design, "full_design")
### threshold calculation
# collect income from full sample
incvec <-
model.frame(formula, full_design$variables, na.action = na.pass)[[1]]
# treat missing values
if (na.rm) {
nas <- is.na( incvec )
full_design <- full_design[!nas,]
incvec <-
model.frame(formula, full_design$variables, na.action = na.pass)[[1]]
}
# collect full sample weights
wsf <- weights(full_design , "sampling")
# poverty threshold
if (type_thresh == 'relq')
th <- percent * computeQuantiles(incvec, wsf, p = quantiles)
if (type_thresh == 'relm')
th <- percent * sum(incvec * wsf) / sum(wsf)
if (type_thresh == 'abs')
th <- abs_thresh
### domain poverty estimate
# collect domain data
incvar <-
model.frame(formula, design$variables, na.action = na.pass)[[1]]
# treat missing values
if (na.rm) {
nas <- is.na(incvar)
design <- design[!nas,]
incvar <-
model.frame(formula, design$variables, na.action = na.pass)[[1]]
}
# collect domain sampling weights
ws <- weights(design, "sampling")
names(ws) <- rownames(design$variables)
# check for strictly positive incomes
if ( any( incvar[ ws > 0 ] <= 0 , na.rm = TRUE))
stop(
"The Watts measure is defined for strictly positive variables only.\nNegative and zero values not allowed."
)
# point estimates
watts <- ComputeWatts(incvar, ws, thresh = th)
fgt0 <- ComputeFGT(incvar, ws, g = 0 , thresh = th)
fgt1 <- ComputeFGT(incvar, ws, g = 1 , thresh = th)
w_pgr <- log(fgt0 / (fgt0 - fgt1))
L_poor <-
CalcGEI(incvar , ifelse(incvar <= th , ws , 0) , epsilon = 0)
### variance calculation
# collect full sample analysis weights
wwf <- weights(full_design , "analysis")
# get replicates
qq <- apply( wwf , 2 , function( wi ) {
# compute threshold
if (type_thresh == 'relq')
thr <-
percent * computeQuantiles(incvec , wi , p = quantiles)
if (type_thresh == 'relm')
thr <- percent * sum(incvec * wi) / sum(wi)
if (type_thresh == 'abs')
thr <- abs_thresh
# mark domains
wsi <-
ifelse( rownames( full_design$variables ) %in% rownames( design$variables ) ,
wi , 0 )
# compute estimates
watts.rep <- ComputeWatts( incvec , wsi , thresh = thr )
fgt0.rep <-
ComputeFGT( incvec , wsi , g = 0 , thresh = thr )
fgt1.rep <-
ComputeFGT( incvec , wsi , g = 1 , thresh = thr )
w_pgr.rep <- log( fgt0.rep / ( fgt0.rep - fgt1.rep ) )
L_poor.rep <-
CalcGEI( incvec , ifelse( incvec <= thr & !is.na( incvec ) , wsi , 0 ) , epsilon = 0 )
# combine esitmates
c(watts.rep, fgt0.rep , w_pgr.rep , L_poor.rep)
} )
qq <- t(qq)
colnames(qq) <-
c("watts" , "fgt0" , "watts pov. gap ratio" , "theil(poor)")
qq[is.nan(qq)] <- NA
# combine estimates
estimates <- c(watts, fgt0, w_pgr , L_poor)
estimates[is.nan(estimates)] <- NA
# compute variance
if (anyNA(qq)) {
variance <- diag(estimates)
variance[,] <- NA
} else {
variance <-
survey::svrVar(
qq ,
full_design$scale,
full_design$rscales,
mse = full_design$mse,
coef = estimates
)
}
# build result object
rval <- estimates
names(rval) <-
c("watts" , "fgt0" , "watts pov. gap ratio" , "theil(poor)")
attr(rval, "var") <- variance
attr(rval, "statistic") <- "watts decomposition"
# keep replicates
if (return.replicates) {
attr(qq , "scale") <- full_design$scale
attr(qq , "rscales") <- full_design$rscales
attr(qq , "mse") <- full_design$mse
rval <- list(mean = rval , replicates = qq)
}
class(rval) <- c("cvystat" , "svrepstat" , "svystat")
rval
}
#' @rdname svywattsdec
#' @export
svywattsdec.DBIsvydesign <-
function (formula, design, ...) {
if (!("logical" %in% class(attr(design, "full_design")))) {
full_design <- attr(design , "full_design")
full_design$variables <-
getvars(
formula,
attr(design , "full_design")$db$connection,
attr(design , "full_design")$db$tablename,
updates = attr(design , "full_design")$updates,
subset = attr(design , "full_design")$subset
)
attr(design , "full_design") <- full_design
rm(full_design)
}
design$variables <-
getvars(
formula,
design$db$connection,
design$db$tablename,
updates = design$updates,
subset = design$subset
)
NextMethod("svywattsdec", design)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.