R/PLSroundingFits.R

Defines functions PLSroundingFits

Documented in PLSroundingFits

#' Small count rounding with post-processing to expected frequencies
#' 
#' The counts rounded by \code{\link{PLSrounding}}  
#' Thereafter, based on the publishable rounded data, expected inner cell frequencies are generated by iterative proportional fitting using \code{\link[SSBtools]{Mipf}}.
#' To ensure that empty cells missing in input data are included in the fitting process, the data is first extended using \code{\link[SSBtools]{Extend0}}.
#' 
#' The seven first parameters is documented in more detail in \code{\link{PLSrounding}}. 
#' If iterative proportional fitting succeeds, the maximum difference between rounded counts and `ipFit` is less than input parameter `eps`. 
#' 
#' @param data data frame (inner cells)
#' @param freqVar Variable holding counts 
#' @param roundBase Rounding base 
#' @param formula Model formula  
#' @param hierarchies List of hierarchies
#' @param dimVar Dimensional variables
#' @param preAggregate Aggregation
#' @param xReturn	Dummy matrix in output when `TRUE`. To return crossTable as well, use `xReturn = 2`.
#' @param extend0  Data is automatically extended by `Extend0` when `TRUE`. Can also be specified as a list meaning parameter `varGroups` to `Extend0`.
#' @param limit `LSfitNonNeg` parameter
#' @param viaQR `LSfitNonNeg` parameter 
#' @param iter `Mipf` parameter
#' @param eps  `Mipf` parameter
#' @param tol  `Mipf` parameter
#' @param reduceBy0 `Mipf` parameter
#' @param reduceByColSums `Mipf` parameter
#' @param reduceByLeverage `Mipf` parameter
#' @param ... Further parameters to \code{\link{PLSrounding}}. 
#' 
#' @return Output from \code{\link{PLSrounding}} (class attribute "PLSrounded") with modified versions of `inner` and `publish`: 
#'    \item{inner}{Extended with more input data variables and with expected frequencies (`ipFit`).}
#'    \item{publish}{Extended with aggregated expected frequencies (`ipFit`).}
#' @importFrom SSBtools Mipf Extend0 Reduce0exact
#' @importFrom Matrix crossprod rowSums colSums
#' @export
#'
#' @examples
#' z <- data.frame(geo  = c("Iceland", "Portugal", "Spain"), 
#'                 eu = c("nonEU", "EU", "EU"),
#'                 year = rep(c("2018","2019"), each = 3),
#'                 freq = c(2,3,7,1,5,6), stringsAsFactors = FALSE)
#' z4 <- z[-c(1:2), ]
#' 
#' PLSroundingFits(z4, "freq", formula = ~eu * year + geo, extend0 = FALSE)[c("inner", "publish")]
#' PLSroundingFits(z4, "freq", formula = ~eu * year + geo)[c("inner", "publish")]
#' 
#' my_km2 <- SSBtools::SSBtoolsData("my_km2")
#' 
#' # Default automatic extension (extend0 = TRUE)
#' PLSroundingFits(my_km2, "freq", 
#'        formula = ~(Sex + Age) * Municipality * Square1000m + Square250m)[c("inner", "publish")]
#' 
#' # Manual specification to avoid Nittedal combined with another_km
#' PLSroundingFits(my_km2, "freq", formula = ~(Sex + Age) * Municipality * Square1000m + Square250m, 
#'        extend0 = list(c("Sex", "Age"), 
#'        c("Municipality", "Square1000m", "Square250m")))[c("inner", "publish")]
PLSroundingFits <- function(data, freqVar = NULL, roundBase = 3, hierarchies = NULL, formula = NULL, dimVar = NULL, 
                        preAggregate = is.null(freqVar), xReturn = FALSE, 
                        extend0 = TRUE, 
                        limit = 1e-10, viaQR = FALSE,
                        iter = 1000, eps = 0.01,
                        tol = 1e-13, reduceBy0 = TRUE, reduceByColSums = TRUE, reduceByLeverage = FALSE, ...) {
  
  force(preAggregate)
  
  if(!is.null(freqVar)){
    freqVar <- names(data[1, freqVar, drop = FALSE])
  }
  
  if ("freq" %in% names(data)) {
    newfreq <- "f_Re_qVa_r"
  } else {
    newfreq <- "freq"
  }
  
  if (is.list(extend0)) {
    varGroups <- extend0
    extend0 <- TRUE
  } else {
    varGroups <- NULL
  }
  
  if (preAggregate | extend0) {
    
    # 1
    data <- PLSrounding(data = data, freqVar = freqVar, roundBase = roundBase, 
                    hierarchies = hierarchies, formula = formula, dimVar = dimVar, 
                    preAggregate = preAggregate, output = "input", ...)
    
    ma <- match(c(freqVar, "f_Re_qVa_r"), names(data))
    ma <- ma[!is.na(ma)]
    freqVar <- c(names(data)[ma], newfreq)[1]
    
    nrowOrig <- nrow(data)
    
    # 2
    if (extend0) {
      data <- Extend0(data, freqVar,  varGroups = varGroups)
    }
  } else {
    nrowOrig <- nrow(data)
  }
  
  
  # 3
  if (is.null(hierarchies) & is.null(formula) & is.null(dimVar)) {
    dimVar <- names(data)
    dimVar <- dimVar[!(dimVar %in% c(freqVar))]
  }
  mm <- ModelMatrix(data = data, hierarchies = hierarchies, formula = formula, crossTable = TRUE, dimVar = dimVar, ...)
  
  
  if (is.null(freqVar)) {
    data[newfreq] <- 1L
    freqVar <- newfreq
  }
  
  # 4
  if (nrowOrig < nrow(data)) {
    obj  <- PLSrounding(data = data[seq_len(nrowOrig), ], freqVar = freqVar, roundBase = roundBase, preAggregate = FALSE, 
                 x = mm$modelMatrix[seq_len(nrowOrig), , drop = FALSE], crossTable = mm$crossTable, ...)
  } else {
    obj  <- PLSrounding(data = data, freqVar = freqVar, roundBase = roundBase, preAggregate = FALSE, 
                 x = mm$modelMatrix, crossTable = mm$crossTable, ...)
  }

  # 5
  
  lsFit = Matrix(obj$publish$rounded, ncol = 1)
  
  # 6
  if (min(rowSums(mm$modelMatrix[, colSums(mm$modelMatrix) == 1, drop = FALSE])) > 0) {
    cat("- Mipf not needed -")
    r0e <- Reduce0exact(mm$modelMatrix, z = lsFit, reduceByColSums = TRUE)
    if (any(!r0e$yKnown)) {
      stop("Something is wrong")
    }
    ipFit <- r0e$y
  } else {
    ipFit <- Mipf(mm$modelMatrix, z = lsFit, iter = iter, eps = eps, tol = tol, reduceBy0 = reduceBy0, 
                  reduceByColSums = reduceByColSums, reduceByLeverage = reduceByLeverage)
  }
  
  data$original <- data[[freqVar]]
  data$rounded <- c(obj$inner$rounded, rep_len(0L, nrow(data) - nrowOrig))
  data$difference <- data$rounded - data$original
  if(max(abs(data$difference[seq_len(nrowOrig)] - obj$inner$difference)) != 0){
    stop("Something is wrong,")
  }
  
  data$ipFit = as.vector(ipFit)
  
  obj$inner <- data
  obj$publish$ipFit = as.vector(crossprod(mm$modelMatrix, ipFit))
  
  
  cat("\n")
  
  if (xReturn) {
    names(mm)[1] <- "x"
    if (xReturn != 2) {
      mm <- mm[1]
    }
    return(c(obj, mm))
  }
  
  obj
}                           
 

Try the SmallCountRounding package in your browser

Any scripts or data that you put into this service are public.

SmallCountRounding documentation built on Sept. 11, 2024, 5:46 p.m.