Nothing
#'Computes the bandwidth needed to compute the derivative of the cdf function
#'
#'Using the whole sample, computes the bandwith used to get the linearized variable
#'
#' @param incvar income variable used in the estimation of the indicators
#' @param w vector of design weights
#' @return value of the bandwidth
#' @author Djalma Pessoa and Anthony Damico
#' @keywords survey
#' @export
h_fun <- function(incvar, w) {
N <- sum(w)
sd_inc <- sqrt((sum(w * incvar * incvar) - sum(w * incvar) * sum(w * incvar)/N)/N)
h <- sd_inc/exp(0.2 * log(sum(w)))
h
}
#'Estimate the derivative of the cdf function using kernel estimator
#'
#'computes the derivative of a function in a point using kernel estimation
#'
#' @param formula a formula specifying the income variable
#' @param design a design object of class \code{survey.design} from the \code{survey} library.
#' @param x the point where the derivative is calculated
#' @param h value of the bandwidth based on the whole sample
#' @param FUN if \code{F} estimates the derivative of the cdf function; if \code{big_s} estimates the derivative of total in the tails of the distribution
#' @param na.rm Should cases with missing values be dropped?
#' @param ... future expansion
#'
#' @return the value of the derivative at \code{x}
#'
#' @author Djalma Pessoa and Anthony Damico
#'
#' @keywords survey
#' @examples
#' library(laeken)
#' data(eusilc) ; names( eusilc ) <- tolower( names( eusilc ) )
#' library(survey)
#' des_eusilc <- svydesign(ids = ~rb030, strata =~db040, weights = ~rb050, data = eusilc)
#' des_eusilc <- convey_prep( des_eusilc )
#' densfun (~eqincome, design=des_eusilc, 10000, FUN="F" )
#' # linearized design using a variable with missings
#' densfun ( ~ py010n , design = des_eusilc, 10000, FUN="F" )
#' densfun ( ~ py010n , design = des_eusilc , 10000,FUN="F", na.rm = TRUE )
#'
#' @export
densfun <- function(formula, design, x, h = NULL, FUN = "F" , na.rm=FALSE, ...) {
if( !( FUN %in% c( "F" , "big_s" ) ) ) stop( "valid choices for `FUN=` are 'F' and 'big_s'" )
incvar <- model.frame(formula, design$variables, na.action = na.pass)[[1]]
if(na.rm){
nas<-is.na(incvar)
design<-design[!nas,]
if (length(nas) > length(design$prob))
incvar <- incvar[!nas]
else incvar[nas] <- 0
}
w <- 1/design$prob
N <- sum(w)
if(is.null(h)) h <- h_fun(incvar,w)
u <- (x - incvar)/h
vectf <- exp(-(u^2)/2)/sqrt(2 * pi)
if (FUN == "F")
res <- sum(vectf * w)/(N * h) else {
v <- w * incvar
res <- sum(vectf * v)/h
}
res
}
#' Linearization of the cumulative distribution function (cdf) of a variable
#'
#' Computes the linearized variable of the cdf function in a point.
#'
#' @param formula a formula specifying the income variable
#' @param design a design object of class \code{survey.design} or class \code{svyrep.design} from the \code{survey} library.
#' @param x the point where the cdf is calculated
#' @param na.rm Should cases with missing values be dropped?
#' @param ... future expansion
#'
#' @return Object of class "\code{cvystat}", which are vectors with a "\code{var}" attribute giving the variance and a "\code{statistic}" attribute giving the name of the statistic.
#'
#' @author Djalma Pessoa and Anthony Damico
#'
#' @seealso \code{\link{svyarpr}}
#'
#' @references Guillaume Osier (2009). Variance estimation for complex indicators
#'of poverty and inequality. \emph{Journal of the European Survey Research
#' Association}, Vol.3, No.3, pp. 167-195,
#' ISSN 1864-3361, URL \url{https://ojs.ub.uni-konstanz.de/srm/article/view/369}.
#'Jean-Claude Deville (1999). Variance estimation for complex statistics and estimators:
#' linearization and residual techniques. Survey Methodology, 25, 193-203,
#' URL \url{https://www150.statcan.gc.ca/n1/en/catalogue/12-001-X19990024882}.
#'
#' @keywords survey
#' @examples
#' library(laeken)
#' data(eusilc) ; names( eusilc ) <- tolower( names( eusilc ) )
#' library(survey)
#' des_eusilc <- svydesign(ids = ~rb030, strata =~db040, weights = ~rb050, data = eusilc)
#' des_eusilc <- convey_prep( des_eusilc )
#' icdf(~eqincome, design=des_eusilc, 10000 )
#' # linearized design using a variable with missings
#' icdf( ~ py010n , design = des_eusilc, 10000 )
#' icdf( ~ py010n , design = des_eusilc , 10000, na.rm = TRUE )
#' @export
icdf <- function(formula, design, x, na.rm = FALSE, ...) {
incvar <- model.frame(formula, design$variables, na.action = na.pass)[[1]]
ncom<- names(design$prob)
if(na.rm){
nas<-is.na(incvar)
design<-design[!nas,]
if (length(nas) > length(design$prob))
incvar <- incvar[!nas]
else incvar[nas] <- 0
}
w <- 1/design$prob
#ind<- names(w)
N <- sum(w)
poor <- (incvar <= x) * 1
value<- sum(poor*w)/N
lin<-((incvar<=x)-value)/N
rval <- value
variance <- survey::svyrecvar(lin/design$prob, design$cluster,
design$strata, design$fpc, postStrata = design$postStrata)
class(rval) <- c( "cvystat" , "svystat" )
attr(rval, "lin") <- lin
attr(rval, "var") <- variance
attr(rval, "statistic") <- "cdf"
rval
}
# Functions U and big_t from Jenkins & Biewen:
U_fn <-
function( x, weights, gamma ) {
x <- x[weights != 0]
weights <- weights[weights != 0]
sum( weights * x^gamma )
}
T_fn <-
function( x, weights, gamma ) {
x <- x[weights != 0]
weights <- weights[weights != 0]
sum( weights * x^gamma * log( x ) )
}
# cvystat print method
#' @method print cvystat
#' @export
print.cvystat <- function(x, ...) {
vv <- attr(x, "var")
if ( attr( x, "statistic" ) %in% c( "alkire-foster", "bourguignon-chakravarty", "bourguignon" ) ) {
statistic <- attr( x, "statistic" )
m <- matrix( data = c( x[1] , sqrt(vv) ) , ncol = 2, dimnames = list( NULL, c( statistic, "SE" ) ) )
return( printCoefmat(m) )
}
if (is.matrix(vv)) {
m <- cbind(x, sqrt(diag(vv)))
} else {
m <- cbind(x, sqrt(vv))
}
nattr <- length(names(attributes(x)))
if (nattr>5) {
for(i in 6:nattr)
{m <- cbind(m, attr(x, names(attributes(x)[i])))}
colnames(m) <- c(attr(x, "statistic"), "SE", names(attributes(x))[6:nattr])
}
else {
colnames(m) <- c(attr(x, "statistic"), "SE")
}
printCoefmat(m)
}
# cvystat vcov method
#' @export
vcov.cvystat <- function (object, ...)
{
as.matrix(attr(object, "var"))
}
# cvystat coef method
#' @export
coef.cvystat <- function(object, ...) {
attr(object, "statistic") <- NULL
attr(object, "deff") <- NULL
attr(object, "var") <- NULL
attr(object, "lin") <- NULL
attr(object, "quantile") <- NULL
attr(object, "epsilon") <- NULL
attr(object, "dimensions") <- NULL
attr(object, "parameters") <- NULL
attr(object, "extra") <- NULL
attr(object, "components") <- NULL
unclass(object)
}
# cvydstat print method
#' @method print cvydstat
#' @export
print.cvydstat <- function(x, ...) {
vv <- attr(x, "var")
m <- matrix( x[[1]], nrow = 1 )
m <- rbind( m , matrix( sqrt( diag(vv) ), nrow = 1 ) )
if ( grepl( "watts index decomposition|fgt.* decomposition", attr( x , "statistic" ) ) ) {
dimnames(m) <- list( c( "coef", "SE" ), names(coef(x)) )
} else {
dimnames(m) <- list( c( "coef", "SE" ), c( "total", "within", "between" ) )
}
printCoefmat(m, digits = 5)
}
# cvydstat vcov method
#' @method vcov cvydstat
#' @export
vcov.cvydstat <- function (object, ...)
{
as.matrix(attr(object, "var"))
}
# cvydstat coef method
#' @method coef cvydstat
#' @export
coef.cvydstat <- function(object, ...) {
object[[1]]
}
# cvydstat SE method
#' @importFrom survey SE
#' @export
SE.cvydstat <- function (object, ...) {
vv <- as.matrix(attr(object, "var"))
if (!is.null(dim(object)) && length(object) == length(vv))
sqrt(vv)
else sqrt(diag(vv))
}
#' prepare svydesign and svyrep.design objects for the convey package
#'
#' stores the full survey design (needed for convey functions that use a global poverty threshold) within the design. this function must be run immediately after the full design object creation with \code{svydesign} or \code{svrepdesign}
#'
#' @param design a survey design object of the library survey.
#'
#' @return the same survey object with a \code{full_design} attribute as the storage space for the unsubsetted survey design
#'
#' @author Djalma Pessoa and Anthony Damico
#'
#' @details functions in the convey package that use a global poverty threshold require the complete (pre-subsetted) design in order to calculate variances correctly. this function stores the full design object as a separate attribute so that functions from the \code{survey} package such as \code{subset} and \code{svyby} do not disrupt the calculation of error terms.
#'
#' @keywords survey
#'
#' @examples
#'
#' library(survey)
#' library(laeken)
#' data(eusilc) ; names( eusilc ) <- tolower( names( eusilc ) )
#'
#' # linearized design: convey_prep must be run as soon as the linearized design has been created
#' des_eusilc <- svydesign( ids = ~rb030 , strata = ~db040 , weights = ~rb050 , data = eusilc )
#' des_eusilc <- convey_prep( des_eusilc )
#' # now this linearized design object is ready for analysis!
#'
#' # # # CORRECT usage example # # #
#' des_eusilc <- svydesign( ids = ~rb030 , strata = ~db040 , weights = ~rb050 , data = eusilc )
#' des_eusilc <- convey_prep( des_eusilc )
#' sub_eusilc <- subset( des_eusilc , age > 20 )
#' # since convey_prep() was run immediately after creating the design
#' # this will calculate the variance accurately
#' SE( svyarpt( ~ eqincome , sub_eusilc ) )
#' # # # end of CORRECT usage example # # #
#'
#' # # # INCORRECT usage example # # #
#' des_eusilc <- svydesign( ids = ~rb030 , strata = ~db040 , weights = ~rb050 , data = eusilc )
#' sub_eusilc <- subset( des_eusilc , age > 20 )
#' sub_eusilc <- convey_prep( sub_eusilc )
#' # since convey_prep() was not run immediately after creating the design
#' # this will make the variance wrong
#' SE( svyarpt( ~ eqincome , sub_eusilc ) )
#' # # # end of INCORRECT usage example # # #
#'
#' @export
convey_prep <- function(design) {
if (!is.null(attr(design, "full_design")))stop("convey_prep has already been run on this design")
if( as.character( design$call )[1] == 'subset' ) warning("this function must be run on the full survey design object immediately after the svydesign() or svrepdesign() call.")
# store the full design within one of the attributes of the design
attr(design, "full_design") <- design
# store the full_design's full_design attribute as TRUE
attr(attr(design, "full_design"), "full_design") <- TRUE
class( design ) <- c( "convey.design" , class( design ) )
design
}
#' @importFrom survey svyby
#' @export
svyby.convey.design <-
function (formula, by, design, ...){
if ( ( "DBIsvydesign" %in% class(design) ) & !( "logical" %in% class(attr(design, "full_design"))) ){
full_design <- attr( design , "full_design" )
if( 'sex' %in% names( list( ... ) ) ){
full_design$variables <-
cbind(
getvars(formula, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset),
getvars(by, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset) ,
getvars(list( ... )[["sex"]], full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset)
)
design$variables <-
cbind(
getvars(formula, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset),
getvars(by, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset) ,
getvars(list( ... )[["sex"]], design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset)
)
} else if( 'age' %in% names( list( ... ) ) ){
full_design$variables <-
cbind(
getvars(formula, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset),
getvars(by, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset) ,
getvars(list( ... )[["age"]], full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset)
)
design$variables <-
cbind(
getvars(formula, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset),
getvars(by, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset) ,
getvars(list( ... )[["age"]], design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset)
)
} else if( 'subgroup' %in% names( list( ... ) ) ){
full_design$variables <-
cbind(
getvars(formula, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset),
getvars(by, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset) ,
getvars(list( ... )[["subgroup"]], full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset)
)
design$variables <-
cbind(
getvars(formula, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset),
getvars(by, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset) ,
getvars(list( ... )[["subgroup"]], design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset)
)
} else {
full_design$variables <-
cbind(
getvars(formula, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset),
getvars(by, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset)
)
design$variables <-
cbind(
getvars(formula, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset),
getvars(by, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset)
)
}
attr( design , "full_design" ) <- full_design
rm( full_design )
}
# remove the "convey.design" and "DBIsvydesign" classes from the current object
class(design) <- setdiff(class(design), "convey.design")
class(design) <- setdiff(class(design), "DBIsvydesign")
survey::svyby(formula,by,design,...)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.