svycdec: Anastasiade-Tillé decomposition by calibration using complex surveys

Documented in svyatdens svyatdens.survey.design svyatdens.svyrep.design

#' Density functions using Anastasiade-Tillé Decomposition
#'
#' Plots the original and the counterfactual density functions
#'
#' @param formula a formula specifying the income variable and the variables to calibrate upon.
#' @param design a design object of class \code{survey.design} or class \code{svyrep.design} from the \code{survey} library.
#' @param sex a formula indicating the sex variable, a factor of two levels (male and female).
#' @param grake.args arguments to be passed to the \code{grake} function of the \code{survey} library.
#' @param ... future expansion
#'
#' @details This function
#'
#' @return Object of class "\code{svystat}", which are vectors with a "\code{var}" attribute giving the variance-covariance matrix and a "\code{statistic}" attribute giving the name of the decomposed variable.
#'
#' @author Guilherme Jacob
#'
#' @seealso \code{\link{svyatdens}}
#'
#' @references Anastasiade, M.-C., and Tillé, Y. (2017). Decomposition of gender wage inequalities through calibration: Application to the Swiss structure of earnings survey.
#' \emph{Survey Methodology}, Statistics Canada, Catalogue No. 12-001-X, Vol. 43, No. 2. URL \url{http://www.statcan.gc.ca/pub/12-001-x/2017002/article/54887-eng.htm}.
#'
#' DiNardo, J., Fortin, N., and Lemieux, T. (1996). Labor Market Institutions and the Distribution of Wages, 1973-1992: A Semiparametric Approach. \emph{Econometrica},
#' vol. 64, no. 5, pp. 1001–1044, URL \url{http://www.jstor.org/stable/2171954}.
#'
#' Oaxaca, R. (1973). Male-Female Wage Differentials in Urban Labor Markets. \emph{International Economic Review}, vol. 14, no. 3, pp. 693–709.
#' URL \url{www.jstor.org/stable/2525981}.
#'
#' Blinder, A. (1973). Wage Discrimination: Reduced Form and Structural Estimates. \emph{The Journal of Human Resources}, vol. 8, no. 4, 1973, pp. 436–455. JSTOR, www.jstor.org/stable/144855
#'
#' @keywords survey
#'
#' @examples
#' library(survey)
#' library(vardpoor)
#' data(eusilc) ; names( eusilc ) <- tolower( names( eusilc ) )
#'
#' # linearized design
#' des_eusilc <- svydesign( ids = ~rb030 , strata = ~db040 ,  weights = ~rb050 , data = eusilc )
#' des_eusilc <- convey_prep(des_eusilc)
#'
#' svyatd( ~eqincome , design = des_eusilc )
#'
#' # replicate-weighted design
#' des_eusilc_rep <- as.svrepdesign( des_eusilc , type = "bootstrap" )
#' des_eusilc_rep <- convey_prep(des_eusilc_rep)
#'
#' svyatd( ~eqincome , design = des_eusilc_rep )
#'
#' \dontrun{
#'
#' # linearized design using a variable with missings
#' svyatd( ~ py010n , design = des_eusilc )
#' svyatd( ~ py010n , design = des_eusilc , na.rm = TRUE )
#' # replicate-weighted design using a variable with missings
#' svyatd( ~ py010n , design = des_eusilc_rep )
#' svyatd( ~ py010n , design = des_eusilc_rep , na.rm = TRUE )
#'
#' # database-backed design
#' library(MonetDBLite)
#' library(DBI)
#' dbfolder <- tempdir()
#' conn <- dbConnect( MonetDBLite::MonetDBLite() , dbfolder )
#' dbWriteTable( conn , 'eusilc' , eusilc )
#'
#' dbd_eusilc <-
#' 	svydesign(
#' 		ids = ~rb030 ,
#' 		strata = ~db040 ,
#' 		weights = ~rb050 ,
#' 		data="eusilc",
#' 		dbname=dbfolder,
#' 		dbtype="MonetDBLite"
#' 	)
#'
#' dbd_eusilc <- convey_prep( dbd_eusilc )
#'
#' svyatd( ~ eqincome , design = dbd_eusilc )
#'
#' dbRemoveTable( conn , 'eusilc' )
#'
#' dbDisconnect( conn , shutdown = TRUE )
#'
#' }
#'
#' @export
svyatdens <-
  function(formula, design, ...) {

    # if( length( attr( terms.formula( formula ) , "term.labels" ) ) != 2 ) stop( "convey package functions currently only support one variable in the `formula=` argument" )

    UseMethod("svyatdens", design )

  }


#' @rdname svyatdens
#' @export
svyatdens.survey.design <-
  function( formula , design , sex , ngrid = 401 , bandwidth = NULL , grake.args = list( calfun = survey::cal.raking , bounds = list( lower = -Inf , upper = Inf ) , epsilon = 1e-7 , verbose = FALSE , maxit = 1000 , variance=NULL) ) {

    # collect weights
    ww <- weights( design )

    # collect data
    sexvar <- model.frame( formula = sex , data = design$variables[ ww > 0 , , drop = FALSE ] , na.action = na.pass )
    xx <- model.frame( formula = formula , data = design$variables[ ww > 0 , , drop = FALSE ] , na.action = na.pass , drop.unused.levels = TRUE )

    # test consistency
    if ( length( levels( sexvar[,1] ) ) != 2 ) stop( "sex variable must be a factor with two levels only.")

    # replace infinite for missing
    numcols <- as.logical( sapply( xx, is.numeric , USE.NAMES = FALSE , simplify = TRUE ) )
    xx[ apply( xx[ , numcols ] , 2 , is.infinite ) ] <- NA

    # test values
    nas <- rowSums( is.na( cbind( sexvar , xx ) ) )

    # filter out observations
    design <- design[ nas == 0 , ]

    # collect new weights
    ww <- weights( design )

    # collect data
    sexvar <- model.frame( formula = sex , data = design$variables[ ww > 0 , , drop = FALSE ] , na.action = na.pass )
    xx <- model.frame( formula = formula , data = design$variables[ ww > 0 , , drop = FALSE ] , na.action = na.pass , drop.unused.levels = TRUE )

    # filter weights
    ww <- ww[ ww > 0 ]

    # add contrasts
    sexvar <- model.matrix( update( sex , ~. + 0 ) , sexvar )[,]
    yy <- as.matrix( xx[,1,drop=FALSE] )
    xx <- model.matrix( ~. , xx[,-1,drop=FALSE] )[ , ]

    # calculate adjusted totals for calibration
    population <- sum( ww * sexvar[ , 2 ] ) * colSums( ww * sexvar[ , 1 ] * xx ) / sum( ww * sexvar[ , 1 ] )

    # calibrate group 2 weights on group 1 totals
    g <- tryCatch( do.call( grake , c( list( mm = xx[ as.logical(sexvar[,2]) , ] , ww = ww[ as.logical(sexvar[,2]) ] , eta = rep( 0, NCOL(xx) ) ) , population = list(population) , grake.args ) ) )
    g <- (function(a,b) { a[names(a) %in% names(b)] <- b ; a[!(names(a) %in% names(b))] <- 1 ; a })(ww,g)

    # calculate densities
    if ( length(ngrid != 3) ) ngrid <- rep( ngrid[1] , 3 )
    if ( is.null(bandwidth) ) bandwidth <- NULL else if ( length(bandwidth != 3) ) bandwidth <- rep( bandwidth[1] , 3 )
    densities <- list( custom.locpoly( yy , ww * sexvar[ , 1] , ngrid = ngrid[1] , bandwidth = bandwidth[1] ) ,
                       custom.locpoly( yy , ww * sexvar[ , 2] , ngrid = ngrid[2] , bandwidth = bandwidth[2] ) ,
                       custom.locpoly( yy , ww * g * sexvar[ , 2] , ngrid = ngrid[3] , bandwidth = bandwidth[3] ) )
    names(densities) <- c( colnames( sexvar ) , paste0( "counterfactual(" , colnames( sexvar )[2] , ")" ) )

    # result object
    densities

  }

#' @rdname svyatdens
#' @export
svyatdens.svyrep.design <-
  function( formula , design , sex , grake.args = list( calfun = survey::cal.raking , bounds = list( lower = .5 , upper = 2 ) , epsilon = 1e-7 , eta = NULL , maxit = 1000 , verbose = FALSE , variance=NULL) , ... ) {

    # collect data
    sexvar <- model.frame( formula = sex , data = design$variables , na.action = na.pass )
    xx <- model.frame( formula = formula , design$variables , na.action = na.pass , drop.unused.levels = TRUE )

    # test consistency
    if ( length( levels( sexvar[,1] ) ) != 2 ) stop( "sex variable must be a factor with two levels only.")

    # replace infinite for missing
    numcols <- as.logical( sapply( xx, is.numeric , USE.NAMES = FALSE , simplify = TRUE ) )
    xx[ apply( xx[ , numcols ] , 2 , is.infinite ) ] <- NA

    # test values
    nas <- 1*( is.na( cbind( sexvar , xx ) ) )
    nas <- rowSums( nas ) > 0

    # filter out observations
    design <- design[ !nas , ]

    # collect data
    sexvar <- model.frame( formula = sex , data = design$variables , na.action = na.pass )
    xx <- model.frame( formula = formula , design$variables , na.action = na.pass , drop.unused.levels = TRUE )

    # add contrasts
    sexvar <- model.matrix( update( sex , ~. + 0 ) , sexvar )[,]
    yy <- as.matrix( xx[ , 1, drop = FALSE ] )
    xx <- model.matrix( ~. , xx[ , -1 , drop = FALSE ] )[ , ]

    # collect sampling weights
    ww <- weights( design , "sampling" )

    # calculate adjusted totals for calibration
    population <- sum( ww * sexvar[ , 2 ] ) * colSums( ww * sexvar[ , 1 ] * xx ) / sum( ww * sexvar[ , 1 ] )

    # calibrate group 2 weights on group 1 totals
    g <- do.call( grake , c( list( mm = xx[ as.logical(sexvar[,2]) , ] , ww = ww[ as.logical(sexvar[,2]) ] , eta = rep( 0, NCOL(xx) ) ) , population = list(population) , grake.args ) )
    g <- (function(a,b) { a[names(a) %in% names(b)] <- b ; a[!(names(a) %in% names(b))] <- 1 ; a })(ww,g)

    # calculate densities
    if ( length(ngrid != 3) ) ngrid <- rep( ngrid[1] , 3 )
    if ( is.null(bandwidth) ) bandwidth <- NULL else if ( length(bandwidth != 3) ) bandwidth <- rep( bandwidth[1] , 3 )
    densities <- list( custom.locpoly( yy , ww * sexvar[ , 1] , ngrid = ngrid[1] , bandwidth = bandwidth[1] ) ,
                       custom.locpoly( yy , ww * sexvar[ , 2] , ngrid = ngrid[2] , bandwidth = bandwidth[2] ) ,
                       custom.locpoly( yy , ww * g * sexvar[ , 2] , ngrid = ngrid[3] , bandwidth = bandwidth[3] ) )
    names(densities) <- c( colnames( sexvar ) , paste0( "counterfactual(" , colnames( sexvar )[2] , ")" ) )

    # result object
    densities

  }


#' @rdname svyatdens
#' @export
svyatdens.DBIsvydesign <-
  function (formula, design, sex , ...){

    design$variables <-
      survey:::getvars(
        unique( c( all.vars(formula) , all.vars(sex) ) ),
        design$db$connection,
        design$db$tablename,
        updates = design$updates,
        subset = design$subset ,
      )

    NextMethod("svyatdens", design)
  }

custom.locpoly <- function ( x, w, ngrid = 401, xlim = NULL, ylim = NULL,
                             bandwidth = NULL, ...)
{

  if (is.null(xlim)) { xlim <- apply(x, 2, range) }
  if (!is.matrix(xlim)) xlim <- matrix(xlim, nrow = 2)
  if (is.null(bandwidth)) { bandwidth <- KernSmooth::dpik( x , gridsize = ngrid ) } else { bandwidth <- rep(bandwidth, length = ncol(x)) }
  ll <- vector("list", 1)
  gx <- seq(min(xlim[, 1]), max(xlim[, 1]), length = ngrid)
  nx <- rowsum(c(rep(0, ngrid), w), c(1:ngrid, findInterval(x[,1], gx)))
  ll[[1]] <- KernSmooth::locpoly(rep(1, ngrid), nx *
                                   ngrid/(diff(xlim[, 1]) * sum(w)), binned = TRUE,
                                 bandwidth = bandwidth, range.x = xlim[, 1])
  names(ll) <- colnames(x)
  attr(ll, "call") <- sys.call(-1)
  attr(ll, "density") <- TRUE
  attr(ll, "ylab") <- "Density"
  class(ll) <- "svysmooth"
  ll
}

guilhermejacob/svycdec documentation built on Nov. 4, 2019, 1:24 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

guilhermejacob/svycdec
Anastasiade-Tillé decomposition by calibration using complex surveys

R/svyatdens.R
In guilhermejacob/svycdec: Anastasiade-Tillé decomposition by calibration using complex surveys

Defines functions svyatdens svyatdens.survey.design svyatdens.svyrep.design

Documented in svyatdens svyatdens.survey.design svyatdens.svyrep.design

R Package Documentation

Browse R Packages

We want your feedback!

guilhermejacob/svycdec Anastasiade-Tillé decomposition by calibration using complex surveys

R/svyatdens.R In guilhermejacob/svycdec: Anastasiade-Tillé decomposition by calibration using complex surveys

Defines functions svyatdens svyatdens.survey.design svyatdens.svyrep.design

Documented in svyatdens svyatdens.survey.design svyatdens.svyrep.design

R Package Documentation

Browse R Packages

We want your feedback!

guilhermejacob/svycdec
Anastasiade-Tillé decomposition by calibration using complex surveys

R/svyatdens.R
In guilhermejacob/svycdec: Anastasiade-Tillé decomposition by calibration using complex surveys