tests/testthat/test_calibration.R

# copyright (C) 2014-2016 A.Rebecq
library(testthat)

context("Test calibration functions on big dataset")

test_that("Calibration functions check out with Calmar", {
  
  data("population_test")
  
  sample_data <- dataPop[dataPop$weight > 0,]

  wCalLin <- calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                         , method="linear", description=FALSE)
  
  ## Just checking that calibration works with linear method
  ## when all variables are categorical
  wCalLin_categ <- calibration(data=sample_data, marginMatrix=table_margins_1[10:11,], colWeights="weight"
                               , method="linear", description=FALSE)
  
  expect_length(wCalLin_categ, 1000)
  
  wCalRaking <- calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                         , method="raking", description=FALSE, pct=FALSE)
  
  wCalLogit <- calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                            , method="logit", bounds=c(0.2,1.3), description=FALSE)
  
  
  expect_equal(wCalLin, poptest_calmar$weight_cal_lin, tolerance=1e-6)
  expect_equal(wCalRaking, poptest_calmar$weight_cal_raking, tolerance=1e-6)
  expect_equal(wCalLogit, poptest_calmar$weight_cal_logit, tolerance=1e-6)
  
  popTotal <- 50000
  
  wCalLin2 <- calibration(data=sample_data, marginMatrix=table_margins_2, colWeights="weight"
                         , method="linear", description=FALSE, popTotal=popTotal, pct=TRUE)
  
  wCalRaking2 <- calibration(data=sample_data, marginMatrix=table_margins_2, colWeights="weight"
                            , method="raking", description=FALSE, popTotal=popTotal, pct=TRUE)
  
  wCalLogit2 <- calibration(data=sample_data, marginMatrix=table_margins_2, colWeights="weight"
                           , method="logit", bounds=c(0.2,1.3), description=FALSE, popTotal=popTotal, pct=TRUE)
  
  
  expect_equal(wCalLin2, poptest_calmar$weight_cal_lin_2, tolerance=1e-6)
  expect_equal(wCalRaking2, poptest_calmar$weight_cal_raking_2, tolerance=1e-6)
  expect_equal(wCalLogit2, poptest_calmar$weight_cal_logit_2, tolerance=1e-6)
  
  ## Test that calibrated estimators are equal to population totals
  totalsCalVar <- sapply(table_margins_1[,1], function(x) { return(sum(dataPop[,x])) })
  expect_equal( sapply(table_margins_1[,1], function(x) { return(HTtotal(sample_data[,x], wCalLin)) }),
                totalsCalVar,
                tolerance=1e-6)
  expect_equal( sapply(table_margins_1[,1], function(x) { return(HTtotal(sample_data[,x], wCalLogit)) }),
                totalsCalVar,
                tolerance=1e-6)
  expect_equal( sapply(table_margins_1[,1], function(x) { return(HTtotal(sample_data[,x], wCalRaking2)) }),
                totalsCalVar,
                tolerance=1e-6)
  
  ########### Tests with non-response and scale factor

  sampleNR <- sample_data[sample_data$responding==1,]
  
  wCalLinNR <- calibration(data=sampleNR, marginMatrix=table_margins_1, colWeights="weight"
                         , method="linear", description=FALSE, scale=TRUE)
  wCalRakingNR <- calibration(data=sampleNR, marginMatrix=table_margins_1, colWeights="weight"
                           , method="raking", description=FALSE, scale=TRUE)
  wCalLogitNR <- calibration(data=sampleNR, marginMatrix=table_margins_1, colWeights="weight"
                           , method="logit", bounds=c(0.1,2.0), description=FALSE, scale=TRUE, popTot=50000)
  
  expect_equal(wCalLinNR, poptest_calmar_nr$weight_cal_lin, tolerance=1e-6)
  expect_equal(wCalRakingNR, poptest_calmar_nr$weight_cal_raking, tolerance=1e-6)
  expect_equal(wCalLogitNR, poptest_calmar_nr$weight_cal_logit, tolerance=1e-6)
  
#   testDistrib <- poptest_calmar_nr$weight_cal_logit/sampleNR$weight * sum(sampleNR$weight) / 50000 
#   print(summary(testDistrib))
  
  wCalLinNR2 <- calibration(data=sampleNR, marginMatrix=table_margins_2, popTot = 50000, pct=T, colWeights="weight"
                           , method="linear", description=FALSE, scale=TRUE)
  wCalRakingNR2 <- calibration(data=sampleNR, marginMatrix=table_margins_2, popTot = 50000, pct=T, colWeights="weight"
                              , method="raking", description=FALSE, scale=TRUE)
  wCalLogitNR2 <- calibration(data=sampleNR, marginMatrix=table_margins_2, popTot = 50000, pct=T, colWeights="weight"
                             , method="logit", bounds=c(0.1,2.0), description=FALSE, scale=TRUE)
  
  expect_equal(wCalLinNR2, poptest_calmar_nr$weight_cal_lin_2, tolerance=1e-6)
  expect_equal(wCalRakingNR2, poptest_calmar_nr$weight_cal_raking_2, tolerance=1e-6)
  expect_equal(wCalLogitNR2, poptest_calmar_nr$weight_cal_logit_2, tolerance=1e-6)
  
  ## Checks for qk vectors
  wCalLin_q1 <- calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                         , q=rep(1,nrow(sample_data)), method="linear", description=FALSE)
  
  wCalLinNR_q1 <- calibration(data=sampleNR, marginMatrix=table_margins_1, colWeights="weight"
                           , q=rep(1,nrow(sampleNR)), method="linear", description=FALSE, scale=TRUE)
  
  expect_equal(wCalLin_q1, poptest_calmar$weight_cal_lin, tolerance=1e-6)
  expect_equal(wCalLinNR_q1, poptest_calmar_nr$weight_cal_lin, tolerance=1e-6)
  
  wCalLin_qTest <- calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                            , q=sample_data$qTest, method="linear", description=FALSE)
  
  wCalRaking_qTest <- calibration(data=sampleNR, marginMatrix=table_margins_2, colWeights="weight",
                               q=sampleNR$qTest, method="raking", description=FALSE, 
                               scale = TRUE, pct=TRUE, popTotal = 50000)
  
  expect_equal(wCalLin_qTest, poptest_calmar$weight_cal_lin_qtest, tolerance=1e-2)
  expect_equal(wCalRaking_qTest, poptest_calmar_nr$weight_cal_raking_2_qtest, tolerance=1e-2)
  
  ## Check that warning is correctly thrown when
  ## user selects an incorrect method
  expect_warning(
    calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                                     , method="truncated", description=FALSE, popTotal = 50000),
    "not implemented", ignore.case=T)
  
  expect_warning(
    calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                , method="randomstuff", description=FALSE, popTotal = 50000),
    "not implemented", ignore.case=T)
  
  expect_warning(
    calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                , method=NULL, description=FALSE, popTotal = 50000),
    "not specified", ignore.case=T)
  
  ## Check that errors are correctly thrown when impossible
  ## calibration margins are entered
  expect_error(
    calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                , method="linear", description=FALSE, popTotal = 51000, maxIter=100),
    "no convergence", ignore.case=T)
  
  expect_error(
    calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                , method="raking", description=FALSE, popTotal = 51000, maxIter=100),
    "no convergence", ignore.case=T)
  
  expect_error(
    calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                , method="logit", bounds=c(0.5,1.5), description=FALSE, 
                popTotal = 51000, maxIter=100),
    "no convergence", ignore.case=T)
  
  ## Check that bounds are correctly required for logit
  expect_error(
    calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
              , method="logit", description=FALSE),
    "must enter LO and UP bounds", ignore.case=T)
  
  ## Check errors when q is not valid
  expect_error(
    calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                , q=rep(1,13), method="linear", description=FALSE),
    "Vector q must have same length as data", ignore.case=T)
  
  expect_error(
    calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                , q=sample_data$qTest, method="min", description=FALSE),
    "not supported", ignore.case=T)
  
  expect_error(
    calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                , q=sample_data$qTest, costs = rep(1,nrow(sample_data)), description=FALSE),
    "not supported", ignore.case=T)
  
  ## Test when margins of categorical variables are
  ## entered in percentages whose sum is 100 instead of 1
  table_margins_3 <- table_margins_2
  table_margins_3[10,3:7] <- c(20,20,20,20,20)
  table_margins_3[11,3:5] <- c(10,60,30)
  
  wCalLin3 <- calibration(data=sample_data, marginMatrix=table_margins_3, colWeights="weight"
                          , method="linear", description=FALSE, popTotal=popTotal, pct=TRUE)
  
  wCalRaking3 <- calibration(data=sample_data, marginMatrix=table_margins_3, colWeights="weight"
                             , method="raking", description=FALSE, popTotal=popTotal, pct=TRUE)
  
  wCalLogit3 <- calibration(data=sample_data, marginMatrix=table_margins_3, colWeights="weight"
                            , method="logit", bounds=c(0.2,1.3), description=FALSE, popTotal=popTotal, pct=TRUE)
  
  expect_equal(wCalLin2, wCalLin3, tolerance=1e-6)
  expect_equal(wCalRaking2, wCalRaking3, tolerance=1e-6)
  expect_equal(wCalLogit2, wCalLogit3, tolerance=1e-6)
  
})

test_that("Test margin stats", {
  
  data("population_test")
  
  sample_data <- dataPop[dataPop$weight > 0,]
  
  testStats1 <- marginStats(sample_data, table_margins_1, colWeights = "weight")
  testStats2 <- marginStats(sample_data, table_margins_2, colWeights = "weight", pct=T, popTotal = 50000)
  
  expect_equal(testStats1[13,1], 18.94)
  expect_equal(testStats1[14,2], 20.02)
  expect_equal(testStats1[15,3], 1.44)
  
  expect_equal(testStats2[17,3], 0.27)
  expect_equal(testStats2[18,1], 30.21)
  expect_equal(testStats2[16,2], 10)
  
  ## TODO : test marginStats with calibration weights
  ## (on penalized calibration for instance)
  sample_data$wCal <- calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                      , method="linear", description=FALSE, popTotal = 50000)
  
  testCosts <- rep(Inf, length(table_margins_1[,1]))

  expect_warning(
    sample_data$wCal_penal <- calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
                             , method="linear", description=FALSE, costs=testCosts, popTotal = 50000),
    "all costs are infinite", ignore.case=T)
  
  testStats3 <- marginStats(sample_data, table_margins_1, colWeights = "weight", colCalibratedWeights = "wCal", popTotal = 50000)
  expect_equal(testStats3[,4], rep(0, length(testStats3[,1])))
  
  testStats4 <- marginStats(sample_data, table_margins_1, colWeights = "weight", colCalibratedWeights = "wCal_penal", popTotal = 50000)
  expect_equal(testStats4[,4], rep(0, length(testStats4[,1])))
  
  #### TODO : improve testing for penalized calib
#   testCosts2 <- rep(Inf, length(table_margins_1[,1]))
#   testCosts2[1] <- 1
#   testCosts2[2] <- 1
#   testCosts2[3] <- 100
#   testCosts2[4] <- 1
#   testCosts2[5] <- 1
#   testCosts2[2] <- 100
#   testCosts2[3] <- 1
#   
#   print(testCosts2)
  
  # TODO : set gap
#   sample_data$wCal_penal2 <- calibration(data=sample_data, marginMatrix=table_margins_1[c(1:3),], colWeights="weight"
#                                    , method="linear", description=TRUE, costs=testCosts2[c(1:3)], popTotal=50000, gap=0.1, uCostPenalized = 1e-3)
# 
#   sample_data$wCal_penal2 <- calibration(data=sample_data, marginMatrix=table_margins_1, colWeights="weight"
#                                     , method="linear", description=TRUE, costs=testCosts2, popTotal=50000, gap=0.2, uCostPenalized = 1e-4)
#   
  
  # testStats5 <- marginStats(sample_data, table_margins_1[c(1:3),], colWeights = "weight", colCalibratedWeights = "wCal_penal2", popTotal = 50000)
  # testStats5 <- marginStats(sample_data, table_margins_1, colWeights = "weight", colCalibratedWeights = "wCal_penal2", popTotal = 50000)

  # print(testStats5)
  
})
haroine/gaston documentation built on June 4, 2023, 8:24 p.m.