# R/all_funs.R In convey: Income Concentration Analysis with Complex Survey Samples

#### Documented in convey_prepdensfunh_funicdf

#'Computes the bandwidth needed to compute the derivative of the cdf function
#'
#'Using the whole sample, computes the bandwith used to get the linearized variable
#'
#' @param incvar income variable used in the estimation of the indicators
#' @param w vector of design weights
#' @return value of the bandwidth
#' @author Djalma Pessoa and Anthony Damico
#' @keywords survey
#' @export
h_fun <- function(incvar, w) {
N <- sum(w)
sd_inc <- sqrt((sum(w * incvar * incvar) - sum(w * incvar) * sum(w * incvar)/N)/N)
h <- sd_inc/exp(0.2 * log(sum(w)))
h
}

#'Estimate the derivative of the cdf function using kernel estimator
#'
#'computes the derivative of a function in a point using kernel estimation
#'
#' @param formula a formula specifying the income variable
#' @param design a design object of class \code{survey.design} from the \code{survey} library.
#' @param x the point where the derivative is calculated
#' @param h value of the bandwidth based on the whole sample
#' @param FUN if \code{F} estimates the derivative of the cdf function; if \code{big_s} estimates the derivative of total in the tails of the distribution
#' @param na.rm Should cases with missing values be dropped?
#' @param ... future expansion
#'
#' @return the value of the derivative at \code{x}
#'
#' @author Djalma Pessoa and Anthony Damico
#'
#' @keywords survey
#' @examples
#' library(laeken)
#' data(eusilc) ; names( eusilc ) <- tolower( names( eusilc ) )
#' library(survey)
#' des_eusilc <- svydesign(ids = ~rb030, strata =~db040,  weights = ~rb050, data = eusilc)
#' des_eusilc <- convey_prep( des_eusilc )
#' densfun (~eqincome, design=des_eusilc, 10000, FUN="F" )
#' # linearized design using a variable with missings
#' densfun ( ~ py010n , design = des_eusilc, 10000, FUN="F" )
#' densfun ( ~ py010n , design = des_eusilc , 10000,FUN="F", na.rm = TRUE )
#'
#' @export
densfun <- function(formula, design, x, h = NULL, FUN = "F" , na.rm=FALSE, ...) {

if( !( FUN %in% c( "F" , "big_s" ) ) ) stop( "valid choices for FUN= are 'F' and 'big_s'" )

incvar <- model.frame(formula, design$variables, na.action = na.pass)[[1]] if(na.rm){ nas<-is.na(incvar) design<-design[!nas,] if (length(nas) > length(design$prob))
incvar <- incvar[!nas]
else incvar[nas] <- 0
}
w <- 1/design$prob N <- sum(w) if(is.null(h)) h <- h_fun(incvar,w) u <- (x - incvar)/h vectf <- exp(-(u^2)/2)/sqrt(2 * pi) if (FUN == "F") res <- sum(vectf * w)/(N * h) else { v <- w * incvar res <- sum(vectf * v)/h } res } #' Linearization of the cumulative distribution function (cdf) of a variable #' #' Computes the linearized variable of the cdf function in a point. #' #' @param formula a formula specifying the income variable #' @param design a design object of class \code{survey.design} or class \code{svyrep.design} from the \code{survey} library. #' @param x the point where the cdf is calculated #' @param na.rm Should cases with missing values be dropped? #' @param ... future expansion #' #' @return Object of class "\code{cvystat}", which are vectors with a "\code{var}" attribute giving the variance and a "\code{statistic}" attribute giving the name of the statistic. #' #' @author Djalma Pessoa and Anthony Damico #' #' @seealso \code{\link{svyarpr}} #' #' @references Guillaume Osier (2009). Variance estimation for complex indicators #'of poverty and inequality. \emph{Journal of the European Survey Research #' Association}, Vol.3, No.3, pp. 167-195, #' ISSN 1864-3361, URL \url{https://ojs.ub.uni-konstanz.de/srm/article/view/369}. #'Jean-Claude Deville (1999). Variance estimation for complex statistics and estimators: #' linearization and residual techniques. Survey Methodology, 25, 193-203, #' URL \url{https://www150.statcan.gc.ca/n1/en/catalogue/12-001-X19990024882}. #' #' @keywords survey #' @examples #' library(laeken) #' data(eusilc) ; names( eusilc ) <- tolower( names( eusilc ) ) #' library(survey) #' des_eusilc <- svydesign(ids = ~rb030, strata =~db040, weights = ~rb050, data = eusilc) #' des_eusilc <- convey_prep( des_eusilc ) #' icdf(~eqincome, design=des_eusilc, 10000 ) #' # linearized design using a variable with missings #' icdf( ~ py010n , design = des_eusilc, 10000 ) #' icdf( ~ py010n , design = des_eusilc , 10000, na.rm = TRUE ) #' @export icdf <- function(formula, design, x, na.rm = FALSE, ...) { incvar <- model.frame(formula, design$variables, na.action = na.pass)[[1]]
ncom<- names(design$prob) if(na.rm){ nas<-is.na(incvar) design<-design[!nas,] if (length(nas) > length(design$prob))
incvar <- incvar[!nas]
else incvar[nas] <- 0
}
w <- 1/design$prob #ind<- names(w) N <- sum(w) poor <- (incvar <= x) * 1 value<- sum(poor*w)/N lin<-((incvar<=x)-value)/N rval <- value variance <- survey::svyrecvar(lin/design$prob, design$cluster, design$strata, design$fpc, postStrata = design$postStrata)
class(rval) <- c( "cvystat" , "svystat" )
attr(rval, "lin") <- lin
attr(rval, "var") <- variance
attr(rval, "statistic") <- "cdf"
rval
}

# Functions U and big_t from Jenkins & Biewen:
U_fn <-
function( x, weights, gamma ) {
x <- x[weights != 0]

weights <- weights[weights != 0]

sum( weights * x^gamma )
}

T_fn <-
function( x, weights, gamma ) {
x <- x[weights != 0]

weights <- weights[weights != 0]

sum( weights * x^gamma * log( x ) )
}

# cvystat print method
#' @method print cvystat
#' @export
print.cvystat <- function(x, ...) {

vv <- attr(x, "var")

if ( attr( x, "statistic" ) %in% c( "alkire-foster", "bourguignon-chakravarty", "bourguignon" ) ) {

statistic <- attr( x, "statistic" )
m <- matrix( data = c( x[1] , sqrt(vv) ) , ncol = 2, dimnames = list( NULL, c( statistic, "SE" ) ) )

return( printCoefmat(m) )
}

if (is.matrix(vv)) {
m <- cbind(x, sqrt(diag(vv)))
} else {
m <- cbind(x, sqrt(vv))
}

nattr <- length(names(attributes(x)))
if (nattr>5) {
for(i in 6:nattr)
{m <- cbind(m, attr(x, names(attributes(x)[i])))}
colnames(m) <- c(attr(x, "statistic"), "SE", names(attributes(x))[6:nattr])
}
else {
colnames(m) <- c(attr(x, "statistic"), "SE")
}

printCoefmat(m)

}

# cvystat vcov method
#' @export
vcov.cvystat <- function (object, ...)
{
as.matrix(attr(object, "var"))
}

# cvystat coef method
#' @export
coef.cvystat <- function(object, ...) {
attr(object, "statistic") <- NULL
attr(object, "deff") <- NULL
attr(object, "var") <- NULL
attr(object, "lin") <- NULL
attr(object, "quantile") <- NULL
attr(object, "epsilon") <- NULL
attr(object, "dimensions") <- NULL
attr(object, "parameters") <- NULL
attr(object, "extra") <- NULL
attr(object, "components") <- NULL
unclass(object)
}

# cvydstat print method
#' @method print cvydstat
#' @export
print.cvydstat <- function(x, ...) {

vv <- attr(x, "var")

m <- matrix( x[[1]], nrow = 1 )
m <- rbind( m , matrix( sqrt( diag(vv) ), nrow = 1 ) )

if ( grepl( "watts index decomposition|fgt.* decomposition", attr( x , "statistic" ) ) ) {
dimnames(m) <- list( c( "coef", "SE" ), names(coef(x)) )
} else {
dimnames(m) <- list( c( "coef", "SE" ), c( "total", "within", "between" ) )
}

printCoefmat(m, digits = 5)

}

# cvydstat vcov method
#' @method vcov cvydstat
#' @export
vcov.cvydstat <- function (object, ...)
{
as.matrix(attr(object, "var"))
}

# cvydstat coef method
#' @method coef cvydstat
#' @export
coef.cvydstat <- function(object, ...) {

object[[1]]

}

# cvydstat SE method
#' @importFrom survey SE
#' @export
SE.cvydstat <- function (object, ...) {
vv <- as.matrix(attr(object, "var"))
if (!is.null(dim(object)) && length(object) == length(vv))
sqrt(vv)
else sqrt(diag(vv))
}

#' prepare svydesign and svyrep.design objects for the convey package
#'
#' stores the full survey design (needed for convey functions that use a global poverty threshold) within the design.  this function must be run immediately after the full design object creation with \code{svydesign} or \code{svrepdesign}
#'
#' @param design a survey design object of the library survey.
#'
#' @return the same survey object with a \code{full_design} attribute as the storage space for the unsubsetted survey design
#'
#' @author Djalma Pessoa and Anthony Damico
#'
#' @details  functions in the convey package that use a global poverty threshold require the complete (pre-subsetted) design in order to calculate variances correctly.  this function stores the full design object as a separate attribute so that functions from the \code{survey} package such as \code{subset} and \code{svyby} do not disrupt the calculation of error terms.
#'
#' @keywords survey
#'
#' @examples
#'
#' library(survey)
#' library(laeken)
#' data(eusilc) ; names( eusilc ) <- tolower( names( eusilc ) )
#'
#' # linearized design: convey_prep must be run as soon as the linearized design has been created
#' des_eusilc <- svydesign( ids = ~rb030 , strata = ~db040 ,  weights = ~rb050 , data = eusilc )
#' des_eusilc <- convey_prep( des_eusilc )
#' # now this linearized design object is ready for analysis!
#'
#' # # # CORRECT usage example # # #
#' des_eusilc <- svydesign( ids = ~rb030 , strata = ~db040 ,  weights = ~rb050 , data = eusilc )
#' des_eusilc <- convey_prep( des_eusilc )
#' sub_eusilc <- subset( des_eusilc , age > 20 )
#' # since convey_prep() was run immediately after creating the design
#' # this will calculate the variance accurately
#' SE( svyarpt( ~ eqincome , sub_eusilc ) )
#' # # # end of CORRECT usage example # # #
#'
#' # # # INCORRECT usage example # # #
#' des_eusilc <- svydesign( ids = ~rb030 , strata = ~db040 ,  weights = ~rb050 , data = eusilc )
#' sub_eusilc <- subset( des_eusilc , age > 20 )
#' sub_eusilc <- convey_prep( sub_eusilc )
#' # since convey_prep() was not run immediately after creating the design
#' # this will make the variance wrong
#' SE( svyarpt( ~ eqincome , sub_eusilc ) )
#' # # # end of INCORRECT usage example # # #
#'
#' @export
convey_prep <- function(design) {

if (!is.null(attr(design, "full_design")))stop("convey_prep has already been run on this design")

if( as.character( design$call )[1] == 'subset' ) warning("this function must be run on the full survey design object immediately after the svydesign() or svrepdesign() call.") # store the full design within one of the attributes of the design attr(design, "full_design") <- design # store the full_design's full_design attribute as TRUE attr(attr(design, "full_design"), "full_design") <- TRUE class( design ) <- c( "convey.design" , class( design ) ) design } #' @importFrom survey svyby #' @export svyby.convey.design <- function (formula, by, design, ...){ if ( ( "DBIsvydesign" %in% class(design) ) & !( "logical" %in% class(attr(design, "full_design"))) ){ full_design <- attr( design , "full_design" ) if( 'sex' %in% names( list( ... ) ) ){ full_design$variables <-
cbind(
getvars(formula, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset),
getvars(by, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset) ,
getvars(list( ... )[["sex"]], full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset)
)

design$variables <- cbind( getvars(formula, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset), getvars(by, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset) , getvars(list( ... )[["sex"]], design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset) ) } else if( 'age' %in% names( list( ... ) ) ){ full_design$variables <-
cbind(
getvars(formula, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset),
getvars(by, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset) ,
getvars(list( ... )[["age"]], full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset)
)

design$variables <- cbind( getvars(formula, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset), getvars(by, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset) , getvars(list( ... )[["age"]], design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset) ) } else if( 'subgroup' %in% names( list( ... ) ) ){ full_design$variables <-
cbind(
getvars(formula, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset),
getvars(by, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset) ,
getvars(list( ... )[["subgroup"]], full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset)
)

design$variables <- cbind( getvars(formula, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset), getvars(by, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset) , getvars(list( ... )[["subgroup"]], design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset) ) } else { full_design$variables <-
cbind(
getvars(formula, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset),
getvars(by, full_design$db$connection, full_design$db$tablename, updates = full_design$updates, subset = full_design$subset)
)

design$variables <- cbind( getvars(formula, design$db$connection, design$db$tablename, updates = design$updates, subset = design$subset), getvars(by, design$db$connection, design$db$tablename, updates = design$updates, subset = design\$subset)
)

}

attr( design , "full_design" ) <- full_design

rm( full_design )

}

# remove the "convey.design" and "DBIsvydesign" classes from the current object
class(design) <- setdiff(class(design), "convey.design")
class(design) <- setdiff(class(design), "DBIsvydesign")

survey::svyby(formula,by,design,...)
}


## Try the convey package in your browser

Any scripts or data that you put into this service are public.

convey documentation built on April 28, 2022, 1:06 a.m.