Nothing
#' @title Internal function: Generate summary output for an IPF calibration
#'
#' @description
#' This function is used internally by \code{summary.ipf} to generate detailed
#' tables for calibration diagnostics.
#'
#' @param ipf_result An object of class \code{ipf} containing the calibration results.
#' @param av A list of auxiliary information used in calibration, including
#' constraints (\code{conP}, \code{conH}) and tolerance values (\code{epsP}, \code{epsH}).
#'
#' @return A named list of data.tables containing calibration diagnostics for
#' each constraint.
#'
#' @keywords internal
#' @noRd
ipf_summary_calibres <- function(ipf_result, av) {
# 1. INITIAL SETUP
. <- calibWeightName <- wg <- N <- CalibMargin <- PopMargin <- epsP <- epsH <- maxFac <- NULL
if (any(names(av) == "conP")) {
formula_name <- as.character(av$formP[[1]])[2]
if (formula_name %in% names(ipf_result)) calibWeightName <- formula_name
}
if (is.null(calibWeightName) && any(names(av) == "conH")) {
formula_name <- as.character(av$formH[[1]])[2]
if (formula_name %in% names(ipf_result)) calibWeightName <- formula_name
}
if (is.null(calibWeightName)) {
if ("gew2" %in% names(ipf_result)) {
calibWeightName <- "gew2"
} else if ("gew1" %in% names(ipf_result)) {
calibWeightName <- "gew1"
} else {
stop("Could not determine the name of the calibrated weight column.")
}
}
has_eps_values <- !is.null(av$epsP) && !is.null(av$epsH)
if (!has_eps_values) cat("Warning: Epsilon values not found. Skipping maxFac calculations.\n")
results_list <- list()
# 2. PROCESSING PERSON CONSTRAINTS (conP)
conP_list <- av$conP
for (i in seq_along(conP_list)) {
constraint_name <- names(conP_list)[i]
if (is.null(constraint_name) || nchar(constraint_name) == 0) constraint_name <- paste0("conP", i)
group_vars <- names(attr(conP_list[[i]], "dimnames"))
tmp_calib_data <- copy(ipf_result)
tmp_calib_data[, (group_vars) := lapply(.SD, as.character), .SDcols = group_vars]
calib_results <- tmp_calib_data[
, .(
N = .N,
CalibMargin = sum(get(calibWeightName))
),
by = group_vars
]
original_dt <- as.data.table(conP_list[[i]])
setnames(original_dt, names(original_dt), c(group_vars, "PopMargin"))
original_dt[, (group_vars) := lapply(.SD, as.character), .SDcols = group_vars]
merged_results <- merge(calib_results, original_dt, by = group_vars, all.x = TRUE)
merged_results[is.na(N), N := 0]
merged_results[is.na(CalibMargin), CalibMargin := 0]
# --- GEÄNDERT: EPSP Handling ---
if (!is.null(av$epsP)) {
epsP_obj <- if (is.list(av$epsP)) av$epsP[[i]] else av$epsP
if (length(epsP_obj) == 1) {
merged_results[, epsP := as.numeric(epsP_obj)]
} else {
# EPSP-Matrix wird "aufgelöst" in long-Format
epsP_dt_long <- as.data.table(as.table(as.array(epsP_obj)))
# Merge nur nach tatsächlich vorhandenen Variablen → verhindert NA
merge_vars <- intersect(names(epsP_dt_long), group_vars)
setnames(epsP_dt_long, c(merge_vars, "epsP"))
epsP_dt_long[, (merge_vars) := lapply(.SD, as.character), .SDcols = merge_vars]
merged_results <- merge(merged_results, epsP_dt_long, by = merge_vars, all.x = TRUE)
}
} else {
merged_results[, epsP := NA_real_]
}
merged_results[, maxFac := abs(1 - CalibMargin / PopMargin)]
merged_results <- merged_results[order(is.na(PopMargin), N)]
results_list[[constraint_name]] <- merged_results[N > 0]
}
# 3. PROCESSING HOUSEHOLD CONSTRAINTS (conH)
conH_list <- av$conH
for (i in seq_along(conH_list)) {
constraint_name <- names(conH_list)[i]
if (is.null(constraint_name) || nchar(constraint_name) == 0) constraint_name <- paste0("conH", i)
group_vars <- names(attr(conH_list[[i]], "dimnames"))
tmp_calib_data <- copy(ipf_result)
tmp_calib_data[, (group_vars) := lapply(.SD, as.character), .SDcols = group_vars]
hid <- attr(ipf_result, "hid")
if (!is.null(hid) && hid %in% names(tmp_calib_data)) {
tmp_calib_data[, `:=`(wg, .N), by = hid]
} else {
tmp_calib_data[, `:=`(wg, 1L)]
}
calib_results <- tmp_calib_data[
, .(
N = .N,
CalibMargin = sum(get(calibWeightName) / as.numeric(wg))
),
by = group_vars
]
original_dt <- as.data.table(conH_list[[i]])
setnames(original_dt, names(original_dt), c(group_vars, "PopMargin"))
original_dt[, (group_vars) := lapply(.SD, as.character), .SDcols = group_vars]
merged_results <- merge(calib_results, original_dt, by = group_vars, all.x = TRUE)
merged_results[is.na(N), N := 0]
merged_results[is.na(CalibMargin), CalibMargin := 0]
# --- GEÄNDERT: EPSH Handling ---
# Ziel: auch "teilweise aufgelöste" EPSH-Matrizen korrekt zuordnen
if (!is.null(av$epsH)) {
epsH_obj <- if (is.list(av$epsH)) av$epsH[[i]] else av$epsH
if (length(epsH_obj) == 1) {
merged_results[, epsH := as.numeric(epsH_obj)]
} else {
epsH_dt_long <- as.data.table(as.table(as.array(epsH_obj)))
merge_vars <- intersect(names(epsH_dt_long), group_vars)
setnames(epsH_dt_long, c(merge_vars, "epsH"))
epsH_dt_long[, (merge_vars) := lapply(.SD, as.character), .SDcols = merge_vars]
merged_results <- merge(merged_results, epsH_dt_long, by = merge_vars, all.x = TRUE)
}
} else {
merged_results[, epsH := NA_real_]
}
merged_results[, maxFac := abs(1 - CalibMargin / PopMargin)]
merged_results <- merged_results[order(is.na(PopMargin), N)]
results_list[[constraint_name]] <- merged_results[N > 0]
}
return(results_list)
}
#' @title Generate Summary Output for IPF Calibration
#'
#' @description Generates a detailed summary of an Iterative Proportional Fitting (IPF) calibration, providing a complete tool for evaluating the calibration's success and the validity of the resulting weights.
#'
#' The output is a list of data.tables for a comprehensive evaluation, including:
#'
#' **Calibration Results:**
#' - `calib_results_conP_*` and `calib_results_conH_*`: Key diagnostic tables that compare calibrated margins to population targets and assess the goodness of fit via metrics like `maxFac`.
#'
#' **Data and Diagnostics:**
#' - `weighted data`: An excerpt of the final dataset with the calculated calibration weights.
#' - `distribution of the weights`: A statistical overview of the weight distribution (min, max, CV).
#'
#' **Detailed Margin Comparisons:**
#' - `conP_*`, `conH_*`, `*_adjusted`, `*_original`, `*_rel_diff_*`: Tables that compare original sample margins, calibrated margins, and population targets, along with their relative differences.
#'
#' @param object object of class ipf
#' @param ... additional arguments
#'
#' @return a list of the following outputs
#' @export
#'
#' @examples
#'
#' \dontrun{
#' # load data
#' eusilc <- demo.eusilc(n = 1, prettyNames = TRUE)
#'
#' # personal constraints
#' conP1 <- xtabs(pWeight ~ age, data = eusilc)
#' conP2 <- xtabs(pWeight ~ gender + region, data = eusilc)
#' conP3 <- xtabs(pWeight*eqIncome ~ gender, data = eusilc)
#'
#' # household constraints
#' conH1 <- xtabs(pWeight ~ hsize + region, data = eusilc)
#'
#' # simple usage ------------------------------------------
#'
#' calibweights1 <- ipf(
#' eusilc,
#' conP = list(conP1, conP2, eqIncome = conP3),
#' bound = NULL,
#' verbose = TRUE
#' )
#' output <- summary(calibweights1)
#' # the output can easily be exported to an Excel file, e.g. with
#' # library(openxlsx)
#' # write.xlsx(output, "SummaryIPF.xlsx")
#' }
summary.ipf <- function(object, ...){
terms <- variable <- NULL
dots <- list(...)
extraExport <- dots$extraExport
av <- attributes(object)
w <- av$baseweight
hid <- av$hid
## ---- FormP/FormH - Variables etc. ---- ##
vars <- c()
formulas <- c()
if(any(names(av) == "conP")){
calibWeightName <- as.character(av$formP[1][[1]])[2]
varsP_help <- c()
for(i in seq_along(av$formP)){
varsP_help <- c(varsP_help,labels(terms(av$formP[[i]])))
}
vars <- c(vars,unique(varsP_help))
#
formPBase <- lapply(av$formP,function(x){
x <- as.character(x)
as.formula(paste(w,"~",x[3]))
})
#
formulas <- c(formulas,as.character(av$formP))
}
if(any(names(av) == "conH")){
calibWeightName <- as.character(av$formH[1][[1]])[2]
varsH_help <- c()
for(i in seq_along(av$formH)){
varsH_help <- c(varsH_help,labels(terms(av$formH[[i]])))
}
vars <- c(vars,unique(varsH_help))
#
formHBase <- lapply(av$formH,function(x){
x <- as.character(x)
as.formula(paste(w,"~",x[3]))
})
#
formulas <- c(formulas,as.character(av$formH))
}
vars <- as.list(unique(vars))
dist_gew <- list()
if(!is.null(w)){
for(i in vars){
part_i <- object[,list(cv=sd(get(w))/mean(get(w)),
min=min(get(w)),
max=max(get(w)),
quotient=min(get(w))/max(get(w))),keyby=c(i)]
setnames(part_i,i,paste0("value",1:length(i)))
part_i[,variable:=paste(i,collapse=" x ")]
setcolorder(part_i,c("variable",paste0("value",1:length(i)),"cv","min","max","quotient"))
dist_gew <- c(dist_gew,list(part_i))
}
dist_gew <- rbindlist(dist_gew,use.names=TRUE,fill=TRUE)
setcolorder(dist_gew,c("variable",paste0("value",1:length(i)),"cv","min","max","quotient"))
}
cols <- c(which(colnames(object) %in% c(hid,vars,w,calibWeightName,extraExport)))
# EXTENSION: The existing output is created here.
original_output <- vector(mode = "list", length = 3)
names(original_output)[1] <- "weighted data"
original_output[[1]] <- subset(object,select=cols)
formulaOutput <- data.table(Verteilungen=rep(NA,length(formulas)))
formulaOutput$Verteilungen[seq_along(formulas)] <- formulas
names(original_output)[2] <- "margin tables"
original_output[[2]] <- formulaOutput
names(original_output)[3] <- "distribution of the weights"
original_output[[3]] <- dist_gew
#### ---- ####
if (any(names(av) == "conP")) {
for (i in seq_along(av$conP)) {
output4 <- list(
"conP_%i" = as.data.table(av$conP[[i]]),
"conP_%i_adjusted" = as.data.table(av$conP_adj[[i]]),
"conP_%i_original" = as.data.table(xtabs(formPBase[[i]], data = object)),
"conP_%i_rel_diff_original" = as.data.table(round(
100*(av$conP[[i]] - xtabs(formPBase[[i]], data = object)) / av$conP[[i]], 2)),
"conP_%i_rel_diff_calib" = as.data.table(round(
100*(av$conP[[i]] - av$conP_adj[[i]]) / av$conP[[i]], 2))
)
names(output4) <- sprintf(names(output4), i)
original_output <- append(original_output, output4)
}
}
if(any(names(av) == "conH")){
output5 <- vector(mode = "list", length = length(av$conH)*5)
for(i in seq_along(av$conH)){
j <- ((i-1)*5)+1
names(output5)[(j):(j+4)] <- c(paste0("conH_",i),paste0("conH_",i,"_adjusted"),paste0("conH_",i,"_original"),
paste0("conH_",i,"_rel_diff_original"),paste0("conH_",i,"_rel_diff_calib"))
output5[[j]] <- as.data.table(av$conH[[i]]) #conH_i
output5[[j+1]] <- as.data.table(av$conH_adj[[i]]) #conH_i_adjusted
output5[[j+2]] <- as.data.table(xtabs(formHBase[[i]],data=object)) #conH_i_original
output5[[j+3]] <- as.data.table(round(100*(av$conH[[i]]-xtabs(formHBase[[i]],data=object))/av$conH[[i]],2)) #conH_i_rel_diff_original
output5[[j+4]] <- as.data.table(round(100*(av$conH[[i]]-av$conH_adj[[i]])/av$conH[[i]],2)) #conH_i__rel_diff_calib
}
original_output <- append(original_output,output5)
}
# --- Start of new table integration ---
# Calls the generate_ipf_summary function to create the initial tables.
custom_summary <- ipf_summary_calibres(object, av)
# Renames the list appropriately to distinguish it from the other tables.
names(custom_summary) <- paste0("calib_results_", names(custom_summary))
# Initializes the final output with the new tables.
output <- custom_summary
# --- End of integration ---
# Appends the original tables to the custom tables.
output <- append(output, original_output)
# ---- NEW: Append all formulas as the last element ----
output[["all_formulas"]] <- formulas
class(output) <- "summary.ipf"
return(output)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.