#' Small count rounding with post-processing to expected frequencies
#'
#' The counts rounded by \code{\link{PLSrounding}}
#' Thereafter, based on the publishable rounded data, expected inner cell frequencies are generated by iterative proportional fitting using \code{\link[SSBtools]{Mipf}}.
#' To ensure that empty cells missing in input data are included in the fitting process, the data is first extended using \code{\link[SSBtools]{Extend0}}.
#'
#' The nine first parameters is documented in more detail in \code{\link{PLSrounding}}.
#' If iterative proportional fitting succeeds, the maximum difference between rounded counts and `ipFit` is less than input parameter `eps`.
#'
#' @param data data frame (inner cells)
#' @param freqVar Variable holding counts
#' @param roundBase Rounding base
#' @param formula Model formula
#' @param hierarchies List of hierarchies
#' @param dimVar Dimensional variables
#' @param preAggregate Aggregation
#' @param printInc Printing iteration information
#' @param xReturn Dummy matrix in output when `TRUE`. To return crossTable as well, use `xReturn = 2`.
#' @param extend0 `PLSrounding` parameter. See below.
#' @param extend0Fits When `extend0Fits` is set to `TRUE` (default), the data is automatically extended.
#' Additionally, `extend0Fits` can be specified as a list, or set to `"all"` (see \code{\link{PLSrounding}}).
#' Previously, this functionality was controlled by a parameter called `extend0`,
#' but now `extend0` is specific to the \code{\link{PLSrounding}} function.
#' When both `extend0` and `extend0Fits` are used simultaneously,
#' `extend0Fits` adds an additional extension on top of the one provided by `extend0`
#' (see example).
#' @param limit `LSfitNonNeg` parameter
#' @param viaQR `LSfitNonNeg` parameter
#' @param iter `Mipf` parameter
#' @param eps `Mipf` parameter
#' @param tol `Mipf` parameter
#' @param reduceBy0 `Mipf` parameter
#' @param reduceByColSums `Mipf` parameter
#' @param reduceByLeverage `Mipf` parameter
#' @param ... Further parameters to \code{\link{PLSrounding}}.
#'
#' @return Output from \code{\link{PLSrounding}} (class attribute "PLSrounded") with modified versions of `inner` and `publish`:
#' \item{inner}{Extended with more input data variables and with expected frequencies (`ipFit`).}
#' \item{publish}{Extended with aggregated expected frequencies (`ipFit`).}
#' @importFrom SSBtools Mipf Extend0 Reduce0exact
#' @importFrom Matrix crossprod rowSums colSums
#' @export
#'
#' @examples
#' z <- data.frame(geo = c("Iceland", "Portugal", "Spain"),
#' eu = c("nonEU", "EU", "EU"),
#' year = rep(c("2018","2019"), each = 3),
#' freq = c(2,3,7,1,5,6), stringsAsFactors = FALSE)
#' z4 <- z[-c(1:2), ]
#'
#' PLSroundingFits(z4, "freq", formula = ~eu * year + geo, extend0 = FALSE)[c("inner", "publish")]
#' PLSroundingFits(z4, "freq", formula = ~eu * year + geo)[c("inner", "publish")]
#'
#' my_km2 <- SSBtools::SSBtoolsData("my_km2")
#'
#' # Default automatic extension (extend0Fits = TRUE)
#' PLSroundingFits(my_km2, "freq",
#' formula = ~(Sex + Age) * Municipality * Square1000m + Square250m)[c("inner", "publish")]
#'
#' # Manual specification to avoid Nittedal combined with another_km
#' PLSroundingFits(my_km2, "freq", formula = ~(Sex + Age) * Municipality * Square1000m + Square250m,
#' extend0Fits = list(c("Sex", "Age"),
#' c("Municipality", "Square1000m", "Square250m")))[c("inner", "publish")]
#'
#' # Example with both extend0 (specified) and extend0Fits (default is TRUE)
#' PLSroundingFits(my_km2, "freq", formula = ~(Sex + Age) * Municipality * Square1000m + Square250m,
#' printInc = TRUE, zeroCandidates = TRUE, roundBase = 5, extend0 = list(c("Sex", "Age"),
#' c("Municipality", "Square1000m", "Square250m")))[c("inner", "publish")]
PLSroundingFits <- function(data, freqVar = NULL, roundBase = 3, hierarchies = NULL, formula = NULL, dimVar = NULL,
preAggregate = is.null(freqVar),
printInc = nrow(data)>1000,
xReturn = FALSE,
extend0 = FALSE,
extend0Fits = TRUE,
limit = 1e-10, viaQR = FALSE,
iter = 1000, eps = 0.01,
tol = 1e-13, reduceBy0 = TRUE, reduceByColSums = TRUE, reduceByLeverage = FALSE, ...) {
force(preAggregate)
if(!is.null(freqVar)){
freqVar <- names(data[1, freqVar, drop = FALSE])
}
if ("freq" %in% names(data)) {
newfreq <- "f_Re_qVa_r"
} else {
newfreq <- "freq"
}
if (identical(extend0, extend0Fits)) {
extend0Fits <- FALSE
}
isExtend0 <- IsExtend0(extend0)
isExtend0Fits <- IsExtend0(extend0Fits)
if (is.null(hierarchies) & is.null(formula) & is.null(dimVar)) {
dimVar <- names(data)
dimVar <- dimVar[!(dimVar %in% c(freqVar))]
}
if (preAggregate | isExtend0 | isExtend0Fits) {
# 1
if (preAggregate | isExtend0) {
data <- PLSrounding(data = data, freqVar = freqVar, roundBase = roundBase,
hierarchies = hierarchies, formula = formula, dimVar = dimVar,
preAggregate = preAggregate, output = "input",
printInc = printInc,
extend0 = extend0, ...)
ma <- match(c(freqVar, "f_Re_qVa_r"), names(data))
ma <- ma[!is.na(ma)]
freqVar <- c(names(data)[ma], newfreq)[1]
}
nrowOrig <- nrow(data)
# 2
if (isExtend0Fits) {
if (printInc) {
cat("[extend0Fits ", dim(data)[1], "*", dim(data)[2], "->", sep = "")
flush.console()
}
data <- Extend0fromModelMatrixInput(data = data,
freqName = freqVar,
hierarchies = hierarchies,
formula = formula,
dimVar = dimVar,
extend0 = extend0Fits,
...)
if (printInc) {
cat(dim(data)[1], "*", dim(data)[2], "]\n", sep = "")
flush.console()
}
}
} else {
nrowOrig <- nrow(data)
}
# 3
mm <- ModelMatrix(data = data, hierarchies = hierarchies, formula = formula, crossTable = TRUE, dimVar = dimVar, ...)
if (is.null(freqVar)) {
data[newfreq] <- 1L
freqVar <- newfreq
}
# 4
if (nrowOrig < nrow(data)) {
obj <- PLSrounding(data = data[seq_len(nrowOrig), ], freqVar = freqVar, roundBase = roundBase, preAggregate = FALSE,
printInc = printInc,
x = mm$modelMatrix[seq_len(nrowOrig), , drop = FALSE], crossTable = mm$crossTable, ...)
} else {
obj <- PLSrounding(data = data, freqVar = freqVar, roundBase = roundBase, preAggregate = FALSE,
printInc = printInc,
x = mm$modelMatrix, crossTable = mm$crossTable, ...)
}
# 5
lsFit = Matrix(obj$publish$rounded, ncol = 1)
# 6
if (min(rowSums(mm$modelMatrix[, colSums(mm$modelMatrix) == 1, drop = FALSE])) > 0) {
cat("- Mipf not needed -")
r0e <- Reduce0exact(mm$modelMatrix, z = lsFit, reduceByColSums = TRUE)
if (any(!r0e$yKnown)) {
stop("Something is wrong")
}
ipFit <- r0e$y
} else {
ipFit <- Mipf(mm$modelMatrix, z = lsFit, iter = iter, eps = eps, tol = tol, reduceBy0 = reduceBy0,
reduceByColSums = reduceByColSums, reduceByLeverage = reduceByLeverage)
}
data$original <- data[[freqVar]]
data$rounded <- c(obj$inner$rounded, rep_len(0L, nrow(data) - nrowOrig))
data$difference <- data$rounded - data$original
if(max(abs(data$difference[seq_len(nrowOrig)] - obj$inner$difference)) != 0){
stop("Something is wrong,")
}
data$ipFit = as.vector(ipFit)
obj$inner <- data
obj$publish$ipFit = as.vector(crossprod(mm$modelMatrix, ipFit))
cat("\n")
if (xReturn) {
names(mm)[1] <- "x"
if (xReturn != 2) {
mm <- mm[1]
}
return(c(obj, mm))
}
obj
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.