test_ipf.R
In surveysd: Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

context("ipf")
library(surveysd)
library(data.table)
eusilc <- demo.eusilc(n = 1, prettyNames = TRUE)

# treat households as a factor variable
eusilc[, hid := as.factor(hid)]

## example for base weights assuming a simple random sample of households
## stratified per region
eusilc[, regSamp := .N, by = region]
eusilc[, regPop := sum(pWeight), by = region]
eusilc[, baseWeight := regPop / regSamp]
eusilc[, pIncome := eqIncome / .N, by = hid]

## constraints on person level
# age
conP1 <- xtabs(pWeight ~ age, data = eusilc)
# gender by region
conP2 <- xtabs(pWeight ~ gender + region, data = eusilc)
# personal net income by gender
conP3 <- xtabs(pWeight * pIncome ~ gender, data = eusilc)

## constraints on household level
conH1 <- xtabs(pWeight ~ hsize + region, data = eusilc,
               subset = !duplicated(hid))
## constraints on household level netIncome
conH2 <- xtabs(pWeight * eqIncome ~ region, data = eusilc,
               subset = !duplicated(hid))
# array of convergence limits for conH1
epsH1 <- conH1
epsH1[1:4, ] <- 0.005
epsH1[5, ] <- 0.2

test_that("ipf with a numerical variable works as expected - computeLinear", {
  # without array epsP1
  calibweights1 <- ipf(
    eusilc, hid = "hid",
    conP = list(conP1, conP2, pIncome = conP3),
    conH = list(conH1),
    epsP = list(1e-06, 1e-06, 1e-03),
    epsH = 0.01,
    bound = NULL, verbose = FALSE,  maxIter = 200,
    numericalWeighting = computeLinear)
  conP3_adj <- xtabs(calibWeight * pIncome ~ gender, data = calibweights1)
  expect_true(abs(sum(conP3_adj) - sum(conP3)) / sum(conP3) < .01)
  expect_true(all(abs(conP3_adj - conP3) / conP3 < .01))

  err <- max(c(
    max(abs(xtabs(calibWeight ~ age, data = calibweights1) -
              conP1) / conP1),
    max(abs(xtabs(calibWeight ~ gender + region, data = calibweights1) -
              conP2) / conP2),
    max(abs(xtabs(calibWeight ~ hsize + region, data = calibweights1,
                  subset = !duplicated(hid)) - conH1) / conH1)))
  expect_true(err < .01)
})

test_that("ipf with a numerical variable works as expected - computeLinearG1", {
  # without array epsP1
  calibweights1 <- ipf(
    eusilc, hid = "hid",
    conP = list(conP1, conP2, pIncome = conP3),
    conH = list(conH1),
    epsP = list(1e-06, 1e-06, 1e-03),
    epsH = 0.01,
    bound = NULL, verbose = FALSE, maxIter = 200,
    numericalWeighting = computeLinearG1)
  conP3_adj <- xtabs(calibWeight * pIncome ~ gender, data = calibweights1)
  expect_true(abs(sum(conP3_adj) - sum(conP3)) / sum(conP3) < .01)
  expect_true(all(abs(conP3_adj - conP3) / conP3 < .01))

  err <- max(c(
    max(abs(xtabs(calibWeight ~ age, data = calibweights1) -
              conP1) / conP1),
    max(abs(xtabs(calibWeight ~ gender + region, data = calibweights1) -
              conP2) / conP2),
    max(abs(xtabs(calibWeight ~ hsize + region, data = calibweights1,
                  subset = !duplicated(hid)) - conH1) / conH1)))
  expect_true(err < .01)
})


test_that("ipf with a numerical variable in households  as expected", {
  # without array epsP1
  calibweights1 <- ipf(
    eusilc, hid = "hid",
    conP = list(conP1, conP2),
    conH = list(conH1, eqIncome = conH2),
    epsP = list(1e-06, 1e-06),
    epsH = list(0.01, 0.01),
    bound = NULL, verbose = FALSE,  maxIter = 50,
    numericalWeighting = computeFrac)
  conP3_adj <- xtabs(calibWeight * pIncome ~ gender, data = calibweights1)
  expect_true(abs(sum(conP3_adj) - sum(conP3)) / sum(conP3) < .01)
  expect_true(all(abs(conP3_adj - conP3) / conP3 < .01))

  err <- max(c(
    max(abs(xtabs(calibWeight ~ age, data = calibweights1) -
              conP1) / conP1),
    max(abs(xtabs(calibWeight ~ gender + region, data = calibweights1) -
              conP2) / conP2),
    max(abs(xtabs(calibWeight ~ hsize + region, data = calibweights1,
                  subset = !duplicated(hid)) - conH1) / conH1)))
  expect_true(err < .01)
})

test_that("ipf works as expected", {
  # with array epsP1, base weights and bound
  calibweights2 <- ipf(
    eusilc, hid = "hid", conP = list(conP1, conP2), conH = list(conH1),
    epsP = 1e-06, epsH = list(epsH1), w = "baseWeight", bound = 4,
    verbose = FALSE, maxIter = 200)
  err <- max(c(
    max(abs(xtabs(calibWeight ~ age, data = calibweights2) - conP1) /
          conP1),
    max(
      abs(xtabs(calibWeight ~ gender + region, data = calibweights2) - conP2) /
        conP2),
    max(abs(xtabs(calibWeight ~ hsize + region, data = calibweights2,
                  subset = !duplicated(hid)) - conH1) / conH1)))
  expect_true(err < .01)
})

test_that("ipf works as expected calibWeight renamed", {
  # with array epsP1, base weights and bound
  calibweights2 <- ipf(
    eusilc, hid = "hid", conP = list(conP1, conP2), conH = list(conH1),
    epsP = 1e-06, epsH = list(epsH1), w = "baseWeight", bound = 4,
    verbose = FALSE, maxIter = 200, nameCalibWeight = "calibWeightNew")
  err <- max(c(
    max(abs(xtabs(calibWeightNew ~ age, data = calibweights2) - conP1) /
          conP1),
    max(abs(
      xtabs(calibWeightNew ~ gender + region, data = calibweights2) - conP2) /
          conP2),
    max(abs(xtabs(calibWeightNew ~ hsize + region, data = calibweights2,
                  subset = !duplicated(hid)) - conH1) / conH1)))
  expect_true(err < .01)
})

test_that("ipf stops  as expected when dimensionality of eps does not fit", {
  # with array epsP1, base weights and bound
  expect_error(ipf(
    eusilc, hid = "hid", conP = list(conP1, conP2), conH = list(conH1,conH1,conH1),
    epsP = c(1e-06), epsH = list(epsH1,1e-06), w = "baseWeight", bound = 4,
    verbose = FALSE, maxIter = 200),
    regexp = "Provided household eps argument does not fit household constraints.")

  expect_error(ipf(
    eusilc, hid = "hid", conP = list(conP1, conP2), conH = list(conH1),
    epsP = c(1e-06), epsH = list(epsH1[,1:8]), w = "baseWeight", bound = 4,
    verbose = FALSE, maxIter = 200),
    regexp = "Provided household eps argument 1 does not fit in dimension to  household constraints 1 .")

    expect_error(ipf(
    eusilc, hid = "hid", conP = list(conP1, conP2), conH = list(conH1,conH1,conH1),
    epsP = c(1e-06,22), epsH = list(epsH1,1e-06,1e-6), w = "baseWeight", bound = 4,
    verbose = FALSE, maxIter = 200),
    regexp = "Individual eps arguments for each constraints must be defined as list.")

})


test_that("ipf works as expected with minMaxTrim only P", {
  # with array epsP1, base weights and bound
  calibweights2 <- ipf(
    eusilc, hid = "hid", conP = list(conP1, conP2),
    epsP = 1e-06, w = "baseWeight", bound = 4,
    verbose = FALSE, maxIter = 200, minMaxTrim = c(450,700))
  err <- max(c(
    max(abs(xtabs(calibWeight ~ age, data = calibweights2) - conP1) /
          conP1),
    max(
      abs(xtabs(calibWeight ~ gender + region, data = calibweights2) - conP2) /
        conP2)))
  expect_true(err < .01)
  expect_true(all(calibweights2$calibWeight<=700)&&all(calibweights2$calibWeight>=450))
})

test_that("ipf works as expected with minMaxTrim P and HH", {
  # with array epsP1, base weights and bound
  calibweights2 <- ipf(
    eusilc, hid = "hid", conP = list(conP1, conP2), conH = list(conH1),
    epsP = 1e-06, epsH = list(epsH1), w = "baseWeight", bound = 4,
    verbose = FALSE, maxIter = 200, minMaxTrim = c(340,870))
  err <- max(c(
    max(abs(xtabs(calibWeight ~ age, data = calibweights2) - conP1) /
          conP1),
    max(
      abs(xtabs(calibWeight ~ gender + region, data = calibweights2) - conP2) /
        conP2),
    max(abs(xtabs(calibWeight ~ hsize + region, data = calibweights2,
                  subset = !duplicated(hid)) - conH1) / conH1)))
  expect_true(err < .01)
  expect_true(all(calibweights2$calibWeight<=870)&&all(calibweights2$calibWeight>=340))
})

Any scripts or data that you put into this service are public.

surveysd documentation built on Dec. 28, 2022, 2:15 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

surveysd
Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

tests/testthat/test_ipf.R
In surveysd: Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

Try the surveysd package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

surveysd Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

tests/testthat/test_ipf.R In surveysd: Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

Try the surveysd package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

surveysd
Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

tests/testthat/test_ipf.R
In surveysd: Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs