R/summary.ipf.R
In surveysd: Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

#' @title Internal function: Generate summary output for an IPF calibration
#'
#' @description
#' This function is used internally by \code{summary.ipf} to generate detailed
#' tables for calibration diagnostics.
#'
#' @param ipf_result An object of class \code{ipf} containing the calibration results.
#' @param av A list of auxiliary information used in calibration, including
#' constraints (\code{conP}, \code{conH}) and tolerance values (\code{epsP}, \code{epsH}).
#'
#' @return A named list of data.tables containing calibration diagnostics for
#' each constraint.
#'
#' @keywords internal
#' @noRd

ipf_summary_calibres <- function(ipf_result, av) {
  
  # 1. INITIAL SETUP
  . <- calibWeightName <- wg <- N <- CalibMargin <- PopMargin <- epsP <- epsH <- maxFac <- NULL
  
  if (any(names(av) == "conP")) {
    formula_name <- as.character(av$formP[[1]])[2]
    if (formula_name %in% names(ipf_result)) calibWeightName <- formula_name
  }
  
  if (is.null(calibWeightName) && any(names(av) == "conH")) {
    formula_name <- as.character(av$formH[[1]])[2]
    if (formula_name %in% names(ipf_result)) calibWeightName <- formula_name
  }
  
  if (is.null(calibWeightName)) {
    if ("gew2" %in% names(ipf_result)) {
      calibWeightName <- "gew2"
    } else if ("gew1" %in% names(ipf_result)) {
      calibWeightName <- "gew1"
    } else {
      stop("Could not determine the name of the calibrated weight column.")
    }
  }
  
  has_eps_values <- !is.null(av$epsP) && !is.null(av$epsH)
  if (!has_eps_values) cat("Warning: Epsilon values not found. Skipping maxFac calculations.\n")
  
  results_list <- list()
  
  # 2. PROCESSING PERSON CONSTRAINTS (conP)
  conP_list <- av$conP
  
  for (i in seq_along(conP_list)) {
    
    constraint_name <- names(conP_list)[i]
    if (is.null(constraint_name) || nchar(constraint_name) == 0) constraint_name <- paste0("conP", i)
    
    group_vars <- names(attr(conP_list[[i]], "dimnames"))
    
    tmp_calib_data <- copy(ipf_result)
    tmp_calib_data[, (group_vars) := lapply(.SD, as.character), .SDcols = group_vars]
    
    calib_results <- tmp_calib_data[
      , .(
        N = .N,
        CalibMargin = sum(get(calibWeightName))
      ),
      by = group_vars
    ]
    
    original_dt <- as.data.table(conP_list[[i]])
    setnames(original_dt, names(original_dt), c(group_vars, "PopMargin"))
    original_dt[, (group_vars) := lapply(.SD, as.character), .SDcols = group_vars]
    
    merged_results <- merge(calib_results, original_dt, by = group_vars, all.x = TRUE)
    merged_results[is.na(N), N := 0]
    merged_results[is.na(CalibMargin), CalibMargin := 0]
    
    # --- GEÄNDERT: EPSP Handling ---
    if (!is.null(av$epsP)) {
      epsP_obj <- if (is.list(av$epsP)) av$epsP[[i]] else av$epsP
      if (length(epsP_obj) == 1) {
        merged_results[, epsP := as.numeric(epsP_obj)]
      } else {
        # EPSP-Matrix wird "aufgelöst" in long-Format
        epsP_dt_long <- as.data.table(as.table(as.array(epsP_obj)))
        # Merge nur nach tatsächlich vorhandenen Variablen → verhindert NA
        merge_vars <- intersect(names(epsP_dt_long), group_vars)
        setnames(epsP_dt_long, c(merge_vars, "epsP"))
        epsP_dt_long[, (merge_vars) := lapply(.SD, as.character), .SDcols = merge_vars]
        merged_results <- merge(merged_results, epsP_dt_long, by = merge_vars, all.x = TRUE)
      }
    } else {
      merged_results[, epsP := NA_real_]
    }
    
    merged_results[, maxFac := abs(1 - CalibMargin / PopMargin)]
    merged_results <- merged_results[order(is.na(PopMargin), N)]
    results_list[[constraint_name]] <- merged_results[N > 0]
    
  }
  
  # 3. PROCESSING HOUSEHOLD CONSTRAINTS (conH)
  conH_list <- av$conH
  
  for (i in seq_along(conH_list)) {
    
    constraint_name <- names(conH_list)[i]
    if (is.null(constraint_name) || nchar(constraint_name) == 0) constraint_name <- paste0("conH", i)
    
    group_vars <- names(attr(conH_list[[i]], "dimnames"))
    
    tmp_calib_data <- copy(ipf_result)
    tmp_calib_data[, (group_vars) := lapply(.SD, as.character), .SDcols = group_vars]
    
    hid <- attr(ipf_result, "hid") 
    if (!is.null(hid) && hid %in% names(tmp_calib_data)) {
      tmp_calib_data[, `:=`(wg, .N), by = hid]
    } else {
      tmp_calib_data[, `:=`(wg, 1L)]
    }
    
    calib_results <- tmp_calib_data[
      , .(
        N = .N,
        CalibMargin = sum(get(calibWeightName) / as.numeric(wg))
      ),
      by = group_vars
    ]
    
    original_dt <- as.data.table(conH_list[[i]])
    setnames(original_dt, names(original_dt), c(group_vars, "PopMargin"))
    original_dt[, (group_vars) := lapply(.SD, as.character), .SDcols = group_vars]
    
    merged_results <- merge(calib_results, original_dt, by = group_vars, all.x = TRUE)
    merged_results[is.na(N), N := 0]
    merged_results[is.na(CalibMargin), CalibMargin := 0]
    
    # --- GEÄNDERT: EPSH Handling ---
    # Ziel: auch "teilweise aufgelöste" EPSH-Matrizen korrekt zuordnen
    if (!is.null(av$epsH)) {
      epsH_obj <- if (is.list(av$epsH)) av$epsH[[i]] else av$epsH
      if (length(epsH_obj) == 1) {
        merged_results[, epsH := as.numeric(epsH_obj)]
      } else {
        epsH_dt_long <- as.data.table(as.table(as.array(epsH_obj)))
        merge_vars <- intersect(names(epsH_dt_long), group_vars)
        setnames(epsH_dt_long, c(merge_vars, "epsH"))
        epsH_dt_long[, (merge_vars) := lapply(.SD, as.character), .SDcols = merge_vars]
        merged_results <- merge(merged_results, epsH_dt_long, by = merge_vars, all.x = TRUE)
      }
    } else {
      merged_results[, epsH := NA_real_]
    }
    
    merged_results[, maxFac := abs(1 - CalibMargin / PopMargin)]
    merged_results <- merged_results[order(is.na(PopMargin), N)]
    results_list[[constraint_name]] <- merged_results[N > 0]
  }
  
  return(results_list)
}




#' @title Generate Summary Output for IPF Calibration
#'
#' @description Generates a detailed summary of an Iterative Proportional Fitting (IPF) calibration, providing a complete tool for evaluating the calibration's success and the validity of the resulting weights.
#'
#' The output is a list of data.tables for a comprehensive evaluation, including:
#'
#' **Calibration Results:**
#'   - `calib_results_conP_*` and `calib_results_conH_*`: Key diagnostic tables that compare calibrated margins to population targets and assess the goodness of fit via metrics like `maxFac`.
#'
#' **Data and Diagnostics:**
#'   - `weighted data`: An excerpt of the final dataset with the calculated calibration weights.
#'   - `distribution of the weights`: A statistical overview of the weight distribution (min, max, CV).
#'
#' **Detailed Margin Comparisons:**
#'   - `conP_*`, `conH_*`, `*_adjusted`, `*_original`, `*_rel_diff_*`: Tables that compare original sample margins, calibrated margins, and population targets, along with their relative differences.
#'
#' @param object object of class ipf
#' @param ... additional arguments
#'
#' @return a list of the following outputs
#' @export
#'
#' @examples
#'
#' \dontrun{
#' # load data
#' eusilc <- demo.eusilc(n = 1, prettyNames = TRUE)
#'
#' # personal constraints
#' conP1 <- xtabs(pWeight ~ age, data = eusilc)
#' conP2 <- xtabs(pWeight ~ gender + region, data = eusilc)
#' conP3 <- xtabs(pWeight*eqIncome ~ gender, data = eusilc)
#'
#' # household constraints
#' conH1 <- xtabs(pWeight ~ hsize + region, data = eusilc)
#'
#' # simple usage ------------------------------------------
#'
#' calibweights1 <- ipf(
#'  eusilc,
#'  conP = list(conP1, conP2, eqIncome = conP3),
#'  bound = NULL,
#'  verbose = TRUE
#' )
#' output <- summary(calibweights1)
#' # the output can easily be exported to an Excel file, e.g. with
#' # library(openxlsx)
#' # write.xlsx(output, "SummaryIPF.xlsx")
#' }

summary.ipf <- function(object, ...){
  terms <- variable <- NULL
  dots <- list(...)
  extraExport <- dots$extraExport
  av <- attributes(object)
  w <- av$baseweight
  hid <- av$hid
  
  
  ## ---- FormP/FormH - Variables etc. ---- ##
  vars <- c()
  formulas <- c()
  if(any(names(av) == "conP")){
    calibWeightName <- as.character(av$formP[1][[1]])[2]
    varsP_help <- c()
    for(i in seq_along(av$formP)){
      varsP_help <- c(varsP_help,labels(terms(av$formP[[i]])))
    }
    vars <- c(vars,unique(varsP_help))
    
    #
    formPBase <- lapply(av$formP,function(x){
      x <- as.character(x)
      as.formula(paste(w,"~",x[3]))
    })
    #
    formulas <- c(formulas,as.character(av$formP))
    
  }
  
  if(any(names(av) == "conH")){
    calibWeightName <- as.character(av$formH[1][[1]])[2]
    varsH_help <- c()
    for(i in seq_along(av$formH)){
      varsH_help <- c(varsH_help,labels(terms(av$formH[[i]])))
    }
    vars <- c(vars,unique(varsH_help))
    
    #
    formHBase <- lapply(av$formH,function(x){
      x <- as.character(x)
      as.formula(paste(w,"~",x[3]))
    })
    #
    formulas <- c(formulas,as.character(av$formH))
    
  }
  vars <- as.list(unique(vars))
  
  dist_gew <- list()
  if(!is.null(w)){
    for(i in vars){
      part_i <- object[,list(cv=sd(get(w))/mean(get(w)),
                             min=min(get(w)),
                             max=max(get(w)),
                             quotient=min(get(w))/max(get(w))),keyby=c(i)]
      setnames(part_i,i,paste0("value",1:length(i)))
      part_i[,variable:=paste(i,collapse=" x ")]
      setcolorder(part_i,c("variable",paste0("value",1:length(i)),"cv","min","max","quotient"))
      dist_gew <- c(dist_gew,list(part_i))
    }
    dist_gew <- rbindlist(dist_gew,use.names=TRUE,fill=TRUE)
    setcolorder(dist_gew,c("variable",paste0("value",1:length(i)),"cv","min","max","quotient"))
  }
  
  
  cols <- c(which(colnames(object) %in% c(hid,vars,w,calibWeightName,extraExport)))
  
  # EXTENSION: The existing output is created here.
  original_output <- vector(mode = "list", length = 3)
  names(original_output)[1] <- "weighted data"
  original_output[[1]] <- subset(object,select=cols)
  
  formulaOutput <- data.table(Verteilungen=rep(NA,length(formulas)))
  formulaOutput$Verteilungen[seq_along(formulas)] <- formulas
  
  names(original_output)[2] <- "margin tables"
  original_output[[2]] <- formulaOutput
  
  names(original_output)[3] <- "distribution of the weights"
  original_output[[3]] <- dist_gew
  
  #### ---- ####
  if (any(names(av) == "conP")) {
    for (i in seq_along(av$conP)) {
      output4 <- list(
        "conP_%i" = as.data.table(av$conP[[i]]),
        "conP_%i_adjusted" = as.data.table(av$conP_adj[[i]]),
        "conP_%i_original" = as.data.table(xtabs(formPBase[[i]], data = object)),
        "conP_%i_rel_diff_original" = as.data.table(round(
          100*(av$conP[[i]] - xtabs(formPBase[[i]], data = object)) / av$conP[[i]], 2)),
        "conP_%i_rel_diff_calib" = as.data.table(round(
          100*(av$conP[[i]] - av$conP_adj[[i]]) / av$conP[[i]], 2))
      )
      names(output4) <- sprintf(names(output4), i)
      original_output <- append(original_output, output4)
    }
  }
  
  
  if(any(names(av) == "conH")){
    output5 <- vector(mode = "list", length = length(av$conH)*5)
    for(i in seq_along(av$conH)){
      j <- ((i-1)*5)+1
      names(output5)[(j):(j+4)] <- c(paste0("conH_",i),paste0("conH_",i,"_adjusted"),paste0("conH_",i,"_original"),
                                     paste0("conH_",i,"_rel_diff_original"),paste0("conH_",i,"_rel_diff_calib"))
      output5[[j]] <- as.data.table(av$conH[[i]]) #conH_i
      output5[[j+1]] <- as.data.table(av$conH_adj[[i]]) #conH_i_adjusted
      output5[[j+2]] <- as.data.table(xtabs(formHBase[[i]],data=object)) #conH_i_original
      output5[[j+3]] <- as.data.table(round(100*(av$conH[[i]]-xtabs(formHBase[[i]],data=object))/av$conH[[i]],2)) #conH_i_rel_diff_original
      output5[[j+4]] <- as.data.table(round(100*(av$conH[[i]]-av$conH_adj[[i]])/av$conH[[i]],2)) #conH_i__rel_diff_calib
      
    }
    original_output <- append(original_output,output5)
  }
  
  # --- Start of new table integration ---
  # Calls the generate_ipf_summary function to create the initial tables.
  custom_summary <- ipf_summary_calibres(object, av)
  
  # Renames the list appropriately to distinguish it from the other tables.
  names(custom_summary) <- paste0("calib_results_", names(custom_summary))
  
  # Initializes the final output with the new tables.
  output <- custom_summary
  # --- End of integration ---
  
  # Appends the original tables to the custom tables.
  output <- append(output, original_output)
  
  # ---- NEW: Append all formulas as the last element ----
  output[["all_formulas"]] <- formulas
  
  class(output) <- "summary.ipf"
  
  return(output)
}

Any scripts or data that you put into this service are public.

surveysd documentation built on Aug. 26, 2025, 5:08 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

surveysd
Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

R/summary.ipf.R
In surveysd: Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

Defines functions ipf_summary_calibres

Try the surveysd package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

surveysd Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

R/summary.ipf.R In surveysd: Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

Defines functions ipf_summary_calibres

Try the surveysd package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

surveysd
Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs

R/summary.ipf.R
In surveysd: Survey Standard Error Estimation for Cumulated Estimates and their Differences in Complex Panel Designs