Nothing
#' Suppressed tabular data: Inner cell frequencies as decimal numbers
#'
#' Assume that frequencies to be published, \code{z}, can be computed from inner
#' frequencies, \code{y}, via \code{ z = t(x) \%*\% y},
#' where \code{x} is a dummy matrix.
#' Assuming correct suppression, this function will generate safe inner cell frequencies as decimal numbers.
#'
#' This function makes use of \code{\link{ReduceX}} and \code{\link{RegSDCipso}}.
#' It is not required that \code{y} consists of cell frequencies. A multivariate \code{y} or \code{z} is also possible.
#' Then several values are possible as \code{digits}, \code{resScale} and \code{rmse} input.
#'
#'
#' @param x Dummy matrix where the dimensions matches z and/or y input. Sparse matrix (Matrix package) is possible.
#' @param z Frequencies to be published. All, only the safe ones or with suppressed as NA.
#' @param y Inner cell frequencies (see details).
#' @param suppressed Logical vector defining the suppressed elements of z.
#' @param digits Output close to whole numbers will be rounded using \code{digits} as input to \code{\link{RoundWhole}}.
#' @param nRep Integer, when >1, several y's will be generated. Extra columns in output.
#' @param yDeduct Values to be subtracted from y and added back after the calculations.
#' Can be used to perform the modulo method described in the paper (see examples).
#' @param resScale Residuals will be scaled by resScale
#' @param rmse Desired root mean square error (residual standard error). Will be used when resScale is NULL or cannot be used.
#' @param sparseLimit Limit for the number of rows of a reduced x-matrix within the algorithm. When exceeded, a sparse algorithm is used
#' (see \code{\link{IpsoExtra}}).
#'
#' @note Capital letters, X, Y and Z, are used in the paper.
#'
#' @return The inner cell frequencies as decimal numbers
#' @importFrom SSBtools RoundWhole
#' @export
#' @author Øyvind Langsrud
#'
#' @examples
#' # Same data as in the paper
#' z <- RegSDCdata("sec7z")
#' x <- RegSDCdata("sec7x")
#' y <- RegSDCdata("sec7y") # Now z is t(x) %*% y
#' zAll <- RegSDCdata("sec7zAll")
#' zAllSupp <- RegSDCdata("sec7zAllSupp")
#' xAll <- RegSDCdata("sec7xAll")
#'
#' # When no suppression, output is identical to y
#' SuppressDec(xAll, zAll, y)
#' SuppressDec(xAll, zAll) # y can be seen in z
#'
#' # Similar to Y* in paper (but other random values)
#' SuppressDec(x, z, y)
#'
#' # Residual standard error forced to be 1
#' SuppressDec(x, z, y, rmse = 1)
#'
#' # Seven ways of obtaining the same output
#' SuppressDec(x, z, rmse = 1) # slower, y must be estimated
#' SuppressDec(x, y = y, rmse = 1)
#' SuppressDec(xAll, zAllSupp, y, rmse = 1)
#' SuppressDec(xAll, zAllSupp, rmse = 1) # slower, y must be estimated
#' SuppressDec(xAll, zAll, y, is.na(zAllSupp), rmse = 1)
#' SuppressDec(xAll, zAll, suppressed = is.na(zAllSupp), rmse = 1) # y seen in z
#' SuppressDec(xAll, y = y, suppressed = is.na(zAllSupp), rmse = 1)
#'
#' # YhatMod4 and YhatMod10 in Table 2 in paper
#' SuppressDec(xAll, zAllSupp, y, yDeduct = 4 * (y%/%4), resScale = 0)
#' SuppressDec(xAll, zAllSupp, y, yDeduct = 10 * (y%/%10), rmse = 0)
#'
#' # As data in Table 3 in paper (but other random values)
#' SuppressDec(xAll, zAllSupp, y, yDeduct = 10 * (y%/%10), resScale = 0.1)
#'
#' # rmse instead of resScale and 5 draws
#' SuppressDec(xAll, zAllSupp, y, yDeduct = 10 * (y%/%10), rmse = 1, nRep = 5)
SuppressDec <- function(x, z = NULL, y = NULL, suppressed = NULL, digits = 9, nRep = 1, yDeduct = NULL, resScale = NULL, rmse = NULL, sparseLimit = 500) {
origY <- !is.null(y)
if (!is.null(z))
z <- EnsureMatrix(z, NCOL(x))
if (!is.null(y))
y <- EnsureMatrix(y, NROW(x))
if (!is.null(suppressed)) {
suppr <- suppressed
} else {
if (is.null(z)) {
suppr <- rep(FALSE, NCOL(x))
} else {
suppr <- is.na(z[, 1])
}
}
if (is.null(y))
if (!is.null(suppressed))
if (!any(is.na(z)))
if (any(suppr)) {
a <- ReduceX(x = x, z = z, digits = digits)
y <- a$y
origY <- !any(!a$yKnown)
}
nonSuppr <- which(!suppr)
if (is.null(y)) {
if (is.null(z)) {
stop("z needed in input when y is NULL")
}
} else {
z <- NULL
}
a <- ReduceX(x = x[, nonSuppr, drop = FALSE], z = z[nonSuppr, , drop = FALSE], y = y, digits = digits)
if (is.null(y))
origY <- !any(!a$yKnown)
if (!origY) {
if (is.null(rmse)) {
stop("Without original y values, rmse must be supplied.")
} else {
if (!is.null(resScale)) {
warning("Without original y values, rmse is used instead of resScale.")
}
}
}
if (any(!a$yKnown)){
if(is.null(yDeduct)){
rw <- RoundWhole(IpsoExtra(y = a$y[which(!a$yKnown), , drop = FALSE], x = a$x, nRep = nRep,
ensureIntercept = FALSE, rmse = rmse, resScale = resScale, sparseLimit = sparseLimit), digits = digits)
} else {
yDeduct <- EnsureMatrix(yDeduct)[which(!a$yKnown), , drop = FALSE]
rw <- RoundWhole(IpsoExtra(y = a$y[which(!a$yKnown), , drop = FALSE] - yDeduct, x = a$x, nRep = nRep,
ensureIntercept = FALSE, rmse = rmse, resScale = resScale, sparseLimit = sparseLimit), digits = digits)
if (nRep != 1)
yDeduct <- ColRepMatrix(yDeduct, nRep)
rw <- rw + yDeduct
}
}
if (nRep != 1)
a$y <- ColRepMatrix(a$y, nRep)
if (any(!a$yKnown))
a$y[which(!a$yKnown), ] <- rw
a$y
}
ColRepMatrix <- function(x, nRep) {
if (nRep == 1)
return(x)
if (nRep == 0)
return(x[, integer(0), drop = FALSE])
cn <- colnames(x)
rn <- rownames(x)
x <- matrix(x, nrow(x), ncol(x) * nRep)
if (!is.null(cn))
colnames(x) <- rep_len(cn, ncol(x))
if (!is.null(rn))
rownames(x) <- rn
x
}
#' Suppressed tabular data: Yhat from X and Z
#'
#' Implementation of equation 21 in the paper.
#'
#' Generalized inverse is computed by \code{\link{ginv}}.
#' In practise, the computations can be speeded up using reduced versions of X and Z. See \code{\link{ReduceX}}.
#'
#' @param z Z as a matrix
#' @param x X as a matrix
#' @param digits When non-NULL, output values close to whole numbers will be rounded using
#' \code{digits} as input to \code{\link{RoundWhole}}.
#'
#' @return Yhat as a matrix
#'
#' @seealso \code{\link{IpsoExtra}}
#'
#' @importFrom MASS ginv
#' @export
#' @author Øyvind Langsrud
#'
#' @examples
#' # Same data as in the paper
#' z <- RegSDCdata("sec7z")
#' x <- RegSDCdata("sec7x")
#' Z2Yhat(z, x)
#'
#' # With y known, yHat can be computed in other ways
#' y <- RegSDCdata("sec7y") # Now z is t(x) %*% y
#' fitted(lm(y ~ x - 1))
#' IpsoExtra(y, x, FALSE, resScale = 0)
Z2Yhat <- function(z, x, digits = 9) {
yHat <- crossprod(ginv(as.matrix(x)), z)
if(!is.null(rownames(x)))
rownames(yHat) <- rownames(x)
# if (!is.null(digits)) # Taken care of in RoundWhole
# if (!is.na(digits[1])) # Taken care of in RoundWhole
# yHat <- RoundWhole(yHat, digits = digits)
# yHat
RoundWhole(yHat, digits = digits)
}
#' Suppressed tabular data: Reduce dummy matrix, X (and estimate Y)
#'
#' In section 7 in the paper \code{ Z = t(X) \%*\% Y} where \code{X} is a dummy matrix.
#' Some elements of Y can be found directly as elements in Z. Corresponding rows of X will be removed.
#' After removing rows, some columns will only have zeros and these will also be removed.
#'
#' To estimate Y, this function finds some values directly from Z and other values by running \code{\link{Z2Yhat}} on reduced versions of X and Z.
#'
#'
#' @param z Z as a matrix
#' @param x X as a matrix
#' @param y Y as a matrix
#' @param digits When non-NULL and when NULL y input, output y estimates close to whole numbers will be rounded using
#' \code{digits} as input to \code{\link{RoundWhole}}.
#'
#' @return A list of four elements:
#' \item{\code{x}}{Reduced \code{x}}
#' \item{\code{z}}{Corresponding reduced \code{z} or NULL when no \code{z} in input}
#' \item{\code{yKnown}}{Logical vector specifying elements of y that can be found directly as elements in z}
#' \item{\code{y}}{As \code{y} in input (not reduced) or estimated \code{y} when NULL y in input}
#'
#' @keywords internal
#' @importFrom SSBtools As_TsparseMatrix
#' @importFrom Matrix which
#' @export
#' @author Øyvind Langsrud
#'
#' @examples
#' # Same data as in the paper
#' z <- RegSDCdata("sec7z")
#' x <- RegSDCdata("sec7x")
#' y <- RegSDCdata("sec7y") # Now z is t(x) %*% y
#'
#' a <- ReduceX(x, z, y)
#' b <- ReduceX(x, z)
#' d <- ReduceX(x, z = NULL, y) # No z in output
#'
#' # Identical output for x and z
#' identical(a$x, b$x)
#' identical(a$x, d$x)
#' identical(a$z, b$z)
#'
#' # Same y in output as input
#' identical(a$y, y)
#' identical(d$y, y)
#'
#' # Estimate of y (yHat) when NULL y input
#' b$y
#'
#' # These elements of y can be found directly in in z
#' y[a$yKnown, , drop = FALSE]
#' # They can be found by searching for unit colSums
#' colSums(x)[colSums(x) == 1]
#'
#' # These trivial data rows can be omitted when processing data
#' x[!a$yKnown, ]
#' # Now several columns can be omitted since zero colSums
#' colSums0 <- colSums(x[!a$yKnown, ]) == 0
#' # The resulting matrix is output from the function
#' identical(x[!a$yKnown, !colSums0], a$x)
#'
#' # Output z can be computed from this output x
#' identical(t(a$x) %*% y[!a$yKnown, , drop = FALSE], a$z)
ReduceX <- function(x, z = NULL, y = NULL, digits = 9) {
yNULL <- is.null(y)
zNULL <- is.null(z)
if(yNULL & zNULL)
stop("z or y must be supplied")
colSums_1 <- which(colSums(x) == 1)
x1 <- x[, colSums_1, drop = FALSE]
x1dgT <-As_TsparseMatrix(x1) # x1dgT <- as(x1, "dgTMatrix")
nonDub <- x1dgT@j[x1dgT@x != 0][!duplicated(x1dgT@i[x1dgT@x != 0])] + 1L
x1 <- x1[, nonDub, drop = FALSE]
if (!zNULL)
zA <- z[colSums_1[nonDub], , drop = FALSE]
zA1 <- matrix(1, NCOL(x1), 1)
yKnown1 <- round(x1 %*% zA1)
yKnown1_0 <- which(yKnown1 == 0)
if (yNULL) {
yHat <- x1 %*% zA
} else {
yHat <- y
yHat[yKnown1_0, ] <- 0
}
if (!zNULL)
z <- z - crossprod(x, yHat)
x <- x[yKnown1_0, , drop = FALSE]
colSums_ok <- which(colSums(x) != 0)
if (!zNULL)
z <- z[colSums_ok, , drop = FALSE]
x <- x[, colSums_ok, drop = FALSE]
if (yNULL) {
if (length(yKnown1_0))
yHat[yKnown1_0, ] <- Z2Yhat(z, x, digits = NA)
#if (!is.null(digits)) Taken care of in RoundWhole
# if (!is.na(digits[1])) ----- // -----
yHat <- RoundWhole(yHat, digits = digits)
} else {
yHat <- y
}
list(x = x, z = z, yKnown = yKnown1 != 0, y = yHat)
}
#' Extended variant of RegSDCipso
#'
#' Possible to generate several y's and to re-scale residuals.
#' Regression fitting by a sparse matrix algorithm is also possible (see reference).
#'
#' @references
#' Douglas Bates and R Development Core Team (2022),
#' Comparing Least Squares Calculations,
#' R Vignette,
#' \code{vignette("Comparisons", package="Matrix")}.
#'
#' @param y Matrix of confidential variables
#' @param x Matrix of non-confidential variables
#' @param ensureIntercept Whether to ensure/include a constant term. Non-NULL x is subjected to \code{\link{EnsureIntercept}}
#' @param returnParts Alternative output two matrices: yHat (fitted) and yRes (generated residuals).
#' @param nRep Integer, when >1, several y's will be generated. Extra columns in output.
#' @param resScale Residuals will be scaled by resScale
#' @param digits Digits used to detect perfect fit (caused by fitted values as input).
#' This checking will be done only when rmse is in input. When perfect fit, rmse will be used instead of resScale.
#' @param rmse Desired root mean square error (residual standard error). Will be used when resScale is
#' NULL or cannot be used (see parameter digits). This parameter forces the rmse value for one y variable (the first).
#' @param sparseLimit Limit for the number of rows of a reduced x-matrix within the algorithm. When exceeded, a sparse algorithm is used (see reference).
#'
#' @return Generated version of y
#' @importFrom SSBtools SeqInc DummyDuplicated GaussIndependent
#' @importFrom Matrix solve crossprod
#' @importFrom utils flush.console
#' @export
#' @author Øyvind Langsrud
#' @keywords internal
#'
#' @examples
#' x <- matrix(1:5, 5, 1)
#' y <- matrix(10 * (sample(7:39, 15) + 4 * (1:15)), 5, 3)
#' colnames(y) <- paste("y", 1:3, sep = "")
#' y1 <- y[, 1, drop = FALSE]
#'
#' IpsoExtra(y, x) # Same as RegSDCipso(y, x)
#'
#' IpsoExtra(y, x, resScale = 0) # Fitted values (whole numbers in this case)
#' IpsoExtra(y, x, nRep = 2, resScale = 1e-05) # Downscaled residuals
#'
#' ySynth <- IpsoExtra(y1, x, nRep = 2, rmse = 0.25) # Downscaled residuals
#' summary(lm(ySynth ~ x)) # Identical regression results with Residual standard error: 0.25
#'
#' IpsoExtra(fitted(lm(y1 ~ x)), x, nRep = 2, resScale = 0.1) # resScale no effect since perfect fit
#' IpsoExtra(fitted(lm(y1 ~ x)), x, nRep = 2, resScale = 0.1, rmse = 2) # with warning
#'
#' # Using data in the paper
#' IpsoExtra(RegSDCdata("sec7y"), RegSDCdata("sec7x")) # Similar to Y*
#' IpsoExtra(RegSDCdata("sec7y"), RegSDCdata("sec7x"), rmse = 1)
IpsoExtra <- function(y, x = NULL, ensureIntercept = TRUE, returnParts = FALSE, nRep = 1, resScale = NULL, digits = 9, rmse = NULL, sparseLimit = 500, printInc = TRUE) {
y <- EnsureMatrix(y)
sparse <- (nrow(y) > sparseLimit) & (!is.matrix(x))
if(sparse){
if(nrow(y) != nrow(x))
stop("nrow(y) != nrow(x)")
if (ensureIntercept)
stop("ensureIntercept not implemented when sparse")
dd <- DummyDuplicated(x, rnd = TRUE)
if (any(dd)) {
if (printInc) {
cat("-")
flush.console()
}
x <- x[ , !dd, drop = FALSE]
}
x <- x[ , GaussIndependent(x, printInc = printInc)[[2]], drop = FALSE]
if (printInc) {
cat("\n")
}
NROW_xQ <- nrow(x)
NCOL_xQ <- ncol(x)
crossprod_x <- crossprod(x)
} else {
x <- EnsureMatrix(x, nrow(y))
if (ensureIntercept)
x <- EnsureIntercept(x)
xQ <- GenQR(x, findR = FALSE)
NROW_xQ <- NROW(xQ)
NCOL_xQ <- NCOL(xQ)
}
if (NROW_xQ == NCOL_xQ) {
if (!is.null(resScale) | !is.null(rmse))
warning("resScale/rmse ignored when Q from X is square.")
if (nRep != 1)
y <- ColRepMatrix(y, nRep)
if (returnParts)
return(list(yHat = y, yRes = 0 * y)) else return(y)
}
if(sparse){
yHat <- as.matrix(x %*% solve(crossprod_x, crossprod(x, y)))
} else {
yHat <- xQ %*% (t(xQ) %*% y)
}
n <- NROW(y)
ncoly <- NCOL(y)
eQRR <- NULL
if (!is.null(digits)) { # if (!is.null(digits) & !is.null(resScale)) {
if (!is.null(rmse))
if (max(abs(round(y - yHat, digits = digits))) == 0) {
if (!is.null(resScale)){
if (ncoly > 1){
warning("rmse with identical residual vectors used instead of resScal since perfect fit.")
} else {
warning("rmse used instead of resScal since perfect fit.")
}
resScale <- NULL
}
eQRR <- matrix(1, 1, ncoly) # Changed below
m <- 1L
}
}
if (is.null(eQRR)) {
eQRR <- GenQR(y - yHat, makeunique = TRUE)$R
m <- NROW(eQRR)
}
if (!is.null(rmse))
if (ncoly > 1){
rmseVar <- match(TRUE,!is.na(rmse))
rmse <- rmse[rmseVar]
resScale <- rmse * sqrt((n - NCOL_xQ)/sum(eQRR[, rmseVar]^2))
}
if (!is.null(resScale)) {
eQRR <- resScale * eQRR
} else {
if (!is.null(rmse))
eQRR[] <- sqrt(n - NCOL_xQ) * rmse
}
if (nRep != 1) {
yHat <- ColRepMatrix(yHat, nRep)
yRes <- 0 * yHat
}
for (i in seq_len(nRep)) {
yNew <- matrix(rnorm(n * m), n, m)
if(sparse){
eSim <- yNew - as.matrix(x %*% solve(crossprod_x, crossprod(x, yNew)))
} else {
eSim <- yNew - xQ %*% (t(xQ) %*% yNew)
}
eSimQ <- GenQR(eSim, findR = FALSE, makeunique = TRUE)
if (nRep == 1)
yRes <- eSimQ %*% eQRR
else {
yRes[, SeqInc(1 + ncoly * (i - 1), ncoly * i)] <- eSimQ %*% eQRR
}
}
if(!is.null(rownames(y))){
rownames(yHat) <- rownames(y)
rownames(yRes) <- rownames(y)
}
if (returnParts)
return(list(yHat = yHat, yRes = yRes))
yHat + yRes
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.