Nothing
#' Easy interface to sdcTable: Table suppression according to a frequency rule.
#'
#' \code{\link{GaussSuppression}}, \code{\link{protectTable}} or \code{\link{protect_linked_tables}}
#' is run with a data set as the only required input. One (stacked) or several (unstacked) input variables can hold cell counts.
#' `ProtectTableData` is a tidy wrapper function, which returns a single data frame instead of a list (`info` omitted).
#'
#' @encoding UTF8
#'
#' @param data data frame
#' @param dimVar The main dimensional variables and additional aggregating variables (name or number).
#' @param freqVar Variable(s) holding counts or NULL in the case of micro data (name or number).
#' @param protectZeros When TRUE empty cells (count=0) is considered sensitive (i.e. same as allowZeros in \code{\link{primarySuppression}}).
#' @param maxN All cells having counts <= maxN are set as primary suppressed.
#' @param method Parameter `method` in \code{\link{protectTable}}, \code{\link{protect_linked_tables}}
#' or wrapper methods via \code{\link{PTwrap}}.
#' `Gauss` (default) is implemented independently of `sdcTable`. There is also a similar variant implemented in sdcTable as `GAUSS`.
#' But this implementation is not as optimal and `Gauss` is recommended instead.
#' * **`"SIMPLEHEURISTIC"`:** This method is default in protectable.
#' * **`"SIMPLEHEURISTIC_OLD"`:** As `"SIMPLEHEURISTIC"` in sdcTable versions prior to 0.32.
#' * **`"OPT"`, `"HITAS"`, `"HYPERCUBE"`, `"GAUSS"`:** Other methods in protectable. `"HYPERCUBE"` is not possible in cases with two linked tables.
#' * **`"SimpleSingle"`:** `"SIMPLEHEURISTIC_OLD"` with `detectSingletons=TRUE` when `protectZeros=FALSE` and
#' `"SIMPLEHEURISTIC_OLD"` with `threshold=1` (can be overridden by input) when `protectZeros=TRUE`.
#' * **`"SIMPLEHEURISTICSingle"`:** As `"SimpleSingle"` with `"SIMPLEHEURISTIC"` instead of `"SIMPLEHEURISTIC_OLD"`.
#' * **`"Simple"`:** `"SIMPLEHEURISTIC_OLD"` with `detectSingletons=FALSE`.
#' * **`"Gauss"`:** \code{\link{GaussSuppression}} is called with parameters `x`, `candidates`, `primary` and `singleton` automatically generated.
#' Other parameters (`singletonMethod`, `printInc`) can be specified.
#'
#' Alternatively this parameter can be a named list specifying parameters for running tau-argus (see details).
#' See \code{\link{PTwrap}} for other (experimental) wrapper methods (see details).
#' @param findLinked When TRUE, the function may find two linked tables and run protect_linked_tables.
#' @param total String used to name totals.
#' @param addName When TRUE the variable name is added to the level names, except for variables with most levels.
#' @param sep A character string to separate when addName apply and when creating variable names.
#' @param removeZeros When TRUE, rows with zero count will be removed from the data within the algorithm.
#' @param dimList By default, hierarchies will be automatically found from data (see \code{\link{FindDimLists}}).
#' With non-NULL dimList, these will be changed.
#' In practice this is done by the function \code{\link{ReplaceDimList}}.
#' @param groupVarInd Possible manual specification of list defining the hierarchical
#' variable groups. When NULL (default) this information will be found automatically
#' by \code{\link{FindTableGroup}}.
#' @param ind1 Coding of table 1 as indices referring to elements of groupVarInd. This information
#' will be found automatically
#' by \code{\link{FindTableGroup}} when groupVarInd=NULL.
#' @param ind2 Coding of table 2 as indices referring to elements of groupVarInd (as ind1 above).
#' @param rowData Input to \code{\link{Stack}} used to generate extra dimVar variables when stacking in cases with several
#' freqvar variables. When NULL rowData will be created automatically by \code{\link{AutoSplit}} using varNames (see below)
#' and the the freqvar names as input.
#' @param varNames The names of the extra dimVar variable(s) made when stacking in cases with several
#' freqvar variables. When length(varNames)>1 several variables may be found by \code{\link{AutoSplit}}.
#' @param split Parameter to \code{\link{AutoSplit}} - see varNames and rowData above.
#' When NULL (default), automatic splitting without needing a split string.
#' @param border Parameter to \code{\link{AutoSplit}} - see varNames and rowData above.
#' @param revBorder Parameter to \code{\link{AutoSplit}} - see varNames and rowData above..
#' @param freqName Input to \code{\link{Stack}}. The name of the new freqvar variable obtained when stacking in cases with several
#' input freqvar variables.
#' @param totalFirst Parameter controlling how output is sorted.
#' @param numericOrder Parameter controlling how output is sorted.
#' Output is character but sorting can be based on the numeric input variables.
#' @param namesAsInput When TRUE those output variables (created by unstacking) that correspond to input will be named as input.
#' @param orderAsInput When TRUE output corresponding to input will be ordered as input and kept together as one block.
#' @param sortByReversedColumns When TRUE output will be sorted by variables in opposite order.
#' @param doUnstack When FALSE output will not be unstacked (in cases with sever input freqvar variables)
#' @param removeTotal When TRUE the total string (see total above) will be removed from the names of output variables
#' created by unstacking (in cases with sever input freqvar variables).
#' @param singleOutput When TRUE output will be in as single data set. Default is FALSE for unstacked data
#' (in cases with sever input freqvar variables) and otherwise TRUE.
#' @param suppression Value used for suppressed elements in suppressed output data. Default is NA.
#' @param outFreq String used to name output variable(s)
#' @param outSdcStatus String used to name output variable(s)
#' @param outSuppressed String used to name output variable(s)
#' @param infoAsFrame When TRUE output element info is a data frame (useful in Shiny).
#' @param IncProgress A function to report progress (incProgress in Shiny). Set equal to NULL to turn it off.
#' @param verbose Parameter sent to \code{\link{protectTable}}, \code{\link{protect_linked_tables}} or \code{\link{runArgusBatchFile}}.
#' @param ... Further parameters sent to \code{\link{protectTable}} (possibly via \code{\link{protect_linked_tables}})
#' such as timeLimit.
#' Parameters to \code{\link{GaussSuppression}}, \code{\link{createArgusInput}} and \code{\link{PTwrap}} is also possible (see details).
#'
#' @details One or two tables are identified automatically and subjected to cell suppression
#' by \code{\link{protectTable}} (single table) or \code{\link{protect_linked_tables}} (two linked tables).
#' The tables can alternatively be specified manually by groupVarInd, ind1 and ind2.
#' The output will be on a form similiar to input depending on whether freqVar is a single variable or not.
#' The status of the cells are
#' coded as "u" (primary suppressed), "x" (secondary suppression), and "s" (can be published).
#' This is taken directly from the output from sdcTable. In cases with two linked tables "u" or "x"
#' for common cells are based on output from the first table.
#'
#' * **To run tau-argus** specify `method` as a named list containing the
#' parameter `exe` for \code{\link{runArgusBatchFile}} and other parameters for
#' \code{\link{createArgusInput}}.
#' * One may specify:
#' \code{method = list(exe="C:/Tau/TauArgus.exe", typ="tabular", path= getwd(),}
#' \code{solver= "FREE", method= "OPT")}
#' However these values of "exe", "path" and "solver" and "method" are set by default so in this case
#' using "\code{method = list(typ="tabular", method= "OPT")}" is equivalent.
#' * If \code{typ="microdata"} is specified. Necessary transformation to microdata will be made.
#'
#' * **Wrapper methods (partly experimental):**
#' In the function \code{\link{PTwrap}} several additional methods are defined.
#' If input to ProtectTable() is one of these methods ProtectTable() will
#' be run via PTwrap(). So making explicit call to PTwrap() is not needed.
#'
#' * **Singleton and zeros:** The parameter detectSingletons was included in protecttable to handle the so-called
#' singleton problem that appers when `protectZeros=FALSE`.
#' Not all problems were solved and the parameter threshold has been introduced later. The value of threshold
#' needed depends on the number of singletons in one group. It seems that `threshold=3` is equivalent to `detectSingletons=TRUE`.
#' When `protectZeros=TRUE` the related “zero problem” occurs. This problem is solved by `threshold=1`.
#'
#' * **NOTE:** The use of numVarInd, weightInd and sampWeightInd in sdcTable is not implemented. This also limit possible
#' input to tau-argus.
#'
#' @return When singleOutput=TRUE output is a list of two elements.
#'
#' * **`info`:** Information as a single column character matrix. This is information about the extra
#' dimVar variables created when stacking, information about the identified (linked)
#' table(s) and summary output from sdcTable.
#' With `method="Gauss"`, a sdcTable function is run with `maxN=0` to create a template for the real output.
#' Some of the summary info is therefore misleading in this case.
#' * **`data`:** A data frame where variables are named according to outFreq,
#' outSdcStatus and outSuppressed.
#' When singleOutput=FALSE output element data is replaced by three elements and these are named
#' according to outFreq, outSdcStatus and outSuppressed.
#'
#' @export
#' @importFrom sdcTable summary getInfo
#' @importFrom SSBtools AutoSplit Stack SortRows Unstack GaussSuppression Extend0
#' @importFrom utils capture.output flush.console
#' @importFrom methods hasArg
#' @importFrom Matrix colSums
#' @importFrom stats aggregate
#'
#' @note ProtectTable makes a call to the function \code{\link{ProtectTable1}}.
#'
#' @seealso See also the vignettes.
#'
#'
#' @examples
#' \dontrun{
#'
#' # ==== Example 1 , 8 regions ====
#' z1 <- EasyData("z1")
#' ProtectTable(z1,1:2, 3)
#' ProtectTableData(z1,1:2, 3)
#' ProtectTable(z1, c("region","hovedint"), "ant") # Input by name
#' # --- Unstacked input data ---
#' z1w = EasyData("z1w")
#' ProtectTable(z1w, 1, 2:5)
#' ProtectTableData(z1w, 1, 2:5)
#' ProtectTable(z1w, 1, 2:5, varName="hovedint")
#' ProtectTable(z1w, 1, 2:5, method="HITAS")
#' ProtectTable(z1w, 1, 2:5, totalFirst = TRUE, method ="Simple")
#'
#' # ==== Example 2 , 11 regions ====
#' z2 <- EasyData("z2")
#' ProtectTable(z2,c(1,3,4), 5) # With region-variable kostragr
#' # --- Unstacked input data ---
#' z2w <- EasyData("z2w")
#' ProtectTable(z2w, 1:2, 4:7) # With region-variable fylke
#' ProtectTable(z2w, 1:3, 4:7) # Two linked tables
#'
#' # ==== Example 3 , 36 regions ====
#' z3 <- EasyData("z3")
#' ProtectTable(z3, c(1,4,5), 7) # Three dimensions. No subtotals
#' ProtectTable(z3, 1:6, 7) # Two linked tables
#' # --- Unstacked input data with coded column names
#' z3w <- EasyData("z3w")
#' ProtectTable(z3w,1:3,4:15, varName="g12") # coding not used when single varName
#' ProtectTable(z3w,1:3,4:15, varName=c("hovedint","mnd")) # Two variables found automatically
#' ProtectTable(z3w,1:3,4:15, varName=c("hovedint","mnd"),
#' removeTotal=FALSE) # Keep "Total" in variable names
#' # --- Unstacked input data with three level column name coding
#' z3wb <- EasyData("z3wb")
#' ProtectTable(z3wb,1:3,4:15,varName=c("hovedint","mnd","mnd2")) # Two variables found automatically
#' ProtectTable(z3wb,1:3,4:15,varName=c("hovedint","mnd","mnd2"),
#' split="_") # Three variables when splitting
#' ProtectTable(z3wb,1:3,4:15,varName=c("hovedint","mnd","mnd2"),
#' split="_",namesAsInput=FALSE,orderAsInput=FALSE) # Alternative ouput format
#'
#' # ==== Examples Tau-Argus ====
#' exeArgus <- "C:/TauArgus4.1.4/TauArgus.exe" # Change to TauArgus.exe-path in your computer
#' pathArgus <- "C:/Users/nnn/Documents" # Change to an existing folder
#' z1 = EasyData("z1")
#' ProtectTable(z1,1:2,3,method=list(exe=exeArgus, path=pathArgus, typ="tabular", method="OPT"))
#' ProtectTable(z1,1:2,3,method=list(exe=exeArgus, path=pathArgus, typ="tabular", method="MOD"))
#' ProtectTable(z1,1:2,3,method=list(exe=exeArgus, path=pathArgus, typ="tabular", method="GH"))
#' ProtectTable(z1,1:2,3,maxN=-1,
#' method=list(path=pathArgus, exe=exeArgus, method="OPT",
#' primSuppRules= list(list(type="freq", n=4, rg=20))))
#' z3 <- EasyData("z3")
#' ProtectTable(z3,c(1:2,4,5),7,maxN=-1,
#' method=list(path=pathArgus, exe=exeArgus, method="OPT",
#' primSuppRules=list(list(type="freq", n=4, rg=20))))
#'
#'
#' # ==== Examples with parameter dimList ====
#' z2 <- EasyData("z2")
#' dList <- FindDimLists(z2[-5])
#' ProtectTable(z2[, c(1,4,5)], 1:2, 3, dimList = dList[c(1,3)])
#' ProtectTable(z2[, c(1,4,5)], 1:2, 3, dimList = dList[2])
#' ProtectTable(z2[, c(1,4,5)], 1:2, 3, dimList = DimList2Hrc(dList[c(2,3)]))
#' }
#'
ProtectTable <- function(data,
dimVar=1:NCOL(data),
freqVar=NULL,
protectZeros=TRUE,
maxN=3,
method="Gauss",
findLinked=TRUE,
total="Total",
addName=FALSE,
sep="_",
removeZeros=FALSE,
dimList = NULL,
groupVarInd=NULL,
ind1=NULL,
ind2=NULL,
rowData=NULL,
varNames=paste("var",1:100,sep=""),
split=NULL,
border=sep,
revBorder=FALSE,
freqName="values",
totalFirst=FALSE,
numericOrder=TRUE,
namesAsInput=TRUE,
orderAsInput=TRUE,
sortByReversedColumns=FALSE,
doUnstack=TRUE,
removeTotal=TRUE,
singleOutput=NULL, # eller TRUE/FALSE
suppression=NA,
outFreq="freq",
outSdcStatus="sdcStatus",
outSuppressed="suppressed",
infoAsFrame = FALSE,
IncProgress = IncDefault,
verbose = FALSE,
...) {
if (hasArg("allowZeros"))
stop('Use "protectZeros" instead of "allowZeros"')
if (hasArg("protectzeros"))
stop('Misspelled parameter "protectzeros" found. Use "protectZeros".')
if (hasArg("dimvar"))
stop('Misspelled parameter "dimvar" found. Use "dimVar".')
if (hasArg("freqvar"))
stop('Misspelled parameter "freqvar" found. Use "freqVar".')
if (hasArg("maxn"))
stop('Misspelled parameter "maxn" found. Use "maxN".')
# Inspired by
# https://stackoverflow.com/questions/30528652/r-catch-message-return-result-efficiently
Sms <- function(expr) {
foo <- "foo"
zz <- textConnection("foo", "w", local = TRUE)
sink(zz, type = "message")
res <- try(eval(expr))
sink(type = "message")
close(zz)
c(res,foo)
}
is_null_IncProgress <- is.null(IncProgress)
if (is_null_IncProgress){
IncProgress <- function(){NULL}
}
IncProgress()
tauArgus <- is.list(method)
if(!tauArgus)
if(method %in% c("Simple","SimpleSingle", "SIMPLEHEURISTICSingle", "TauArgus","TauArgusOPT","TauArgusMOD","TauArgusGH")){
sysCall <- sys.call()
sysCall[[1]] <- as.name("PTwrap")
parentFrame = parent.frame()
return(eval(sysCall, envir=parentFrame))
}
if(!tauArgus)
doGaussSuppression <- grepl("Gauss",method)
else
doGaussSuppression <- FALSE
if(doGaussSuppression){
if(!(method %in% c("Gauss","GaussBasic", "GaussNoSingleton")))
stop(paste(method, "is not a valid method"))
}
if (is.character(dimVar))
dimVarInd <- match(dimVar, names(data)) else dimVarInd <- dimVar
if (is.character(freqVar))
freqVarInd <- match(freqVar, names(data)) else freqVarInd <- freqVar
tryAutoSplit <- (length(varNames) > 1)
stacked <- FALSE
if (length(freqVarInd) > 1) {
stacked <- TRUE
if (orderAsInput & !namesAsInput)
stop("orderAsInput=TRUE combined with namesAsInput=FALSE is not implemented")
if (orderAsInput & !doUnstack)
warning("orderAsInput=TRUE ignored when doUnstack=FALSE")
stackVar <- freqVarInd
dataOrig <- data
if (is.null(rowData))
rowData <- AutoSplit(colnames(data)[freqVarInd], split = split, border = border,
revBorder = revBorder, noSplit = !tryAutoSplit, varNames = varNames) else rownames(rowData) <- colnames(data)[freqVarInd]
varNames <- colnames(rowData)
data <- Stack(dataOrig, stackVar = freqVarInd, blockVar = dimVarInd, rowData = rowData,
valueName = freqName, indName = NULL)
colnamesOrig <- colnames(dataOrig)
dimVarNamesOrig <- colnamesOrig[dimVarInd]
dimVarNames <- c(dimVarNamesOrig, varNames)
dimVarInd <- match(dimVarNames, colnames(data)) ######### New dimVarInd refer to stacked data
freqVarInd <- match(freqName, colnames(data)) ######### New freqVarInd refer to stacked data
}
IncProgress()
if (doGaussSuppression) {
ProtectTable1parameterRemove <- function(threshold = NULL, detectSingletons = NULL, ...) {
ProtectTable1(...)
}
pt <- ProtectTable1parameterRemove(data = data, dimVarInd = dimVarInd, freqVarInd = freqVarInd, protectZeros = FALSE, maxN = 0, method = "SIMPLEHEURISTIC",
findLinked = findLinked, total = total, addName = addName, sep = sep, removeZeros = removeZeros, dimList = dimList, groupVarInd = groupVarInd,
ind1 = ind1, ind2 = ind2, dimDataReturn = TRUE, IncProgress = IncProgress, verbose = verbose, ...)
dimLists <- ProtectTable1dimList(data = data, dimVarInd = dimVarInd, freqVarInd = freqVarInd, protectZeros = FALSE, maxN = 0, method = "SIMPLEHEURISTIC",
findLinked = findLinked, total = total, addName = addName, sep = sep, removeZeros = removeZeros, dimList = dimList, groupVarInd = groupVarInd,
ind1 = ind1, ind2 = ind2, dimDataReturn = FALSE, IncProgress = IncProgress, verbose = verbose, ...)
} else {
pt <- ProtectTable1(data = data, dimVarInd = dimVarInd, freqVarInd = freqVarInd, protectZeros = protectZeros, maxN = maxN, method = method,
findLinked = findLinked, total = total, addName = addName, sep = sep, removeZeros = removeZeros, dimList = dimList, groupVarInd = groupVarInd,
ind1 = ind1, ind2 = ind2, dimDataReturn = TRUE, IncProgress = IncProgress, verbose = verbose, ...)
}
if(infoAsFrame){
i00 <- as.data.frame(rbind(
## c("method",method),
c("protectZeros",protectZeros),
c("maxN",maxN)
),stringsAsFactors=FALSE)
if(!tauArgus) names(i00) <- c("method",method) #c("Parameter","Choice")
else names(i00) <- c("method","TauArgus")
if (stacked)
i0 <- data.frame(InputName=rownames(rowData),as.data.frame(as.matrix(rowData),stringsAsFactors=FALSE),stringsAsFactors=FALSE) else i0 <- NULL
i1 <- as.data.frame(as.matrix(pt$common$info),stringsAsFactors=FALSE)
if(!tauArgus){
i2 <- as.data.frame(Sms(capture.output(sdcTable::summary(pt$table1[[1]]))),stringsAsFactors=FALSE)
names(i2) = "Summary1sdcTable"
} else {
i2 <- as.data.frame(capture.output(print(method)),stringsAsFactors=FALSE)
names(i2) = "TauArgus"
} # i2 = NULL
if (!is.null(pt$table2[[1]])) {
i3 <- as.data.frame(Sms(capture.output(sdcTable::summary(pt$table2[[1]]))),stringsAsFactors=FALSE)
names(i3) = "Summary2sdcTable"
} else i3 <- NULL
info <- RbindAllwithNames(i00,i0,i1,i2,i3,toRight=TRUE,extra="= = =")
colnames(info)[1] <- "Info"
} else {
if (stacked)
i0 <- capture.output(print(rowData)) else i0 <- NULL
i1 <- capture.output(print(pt$common$info))
if(!tauArgus){
i2 <- Sms(capture.output(sdcTable::summary(pt$table1[[1]]))) ## Wrong in html Vignette without "sdcTable::"
} else
i2 <- capture.output(print(method)) ## Wrong in html Vignette without "sdcTable::"
#i2 = NULL
if (!is.null(pt$table2[[1]]))
i3 <- Sms(capture.output(sdcTable::summary(pt$table2[[1]]))) else i3 <- NULL
info <- c(i0, "==========", i1, "==========", i2, "==========", i3)
info <- as.matrix(info, ncol = 1) # One element pr row when printed
}
gVC <- GroupVarCombined(pt$common$groupVarInd, totalFirst)
nDim <- length(gVC)
try( { # Include in try as extra safety. Sorting can be omitted"
sortedLists <- vector("list", nDim)
names(sortedLists) <- names(gVC)
for (i in seq_len(nDim)) {
if(is.null(dimList)){
if (numericOrder)
sortedLists[[i]] <- SortedFromData(pt$common$dimData, ind = gVC[[i]],
total = total, xNumeric = data[, dimVarInd, drop = FALSE])
else
sortedLists[[i]] <- SortedFromData(pt$common$dimData, ind = gVC[[i]], total = total)
} else{
sortedLists[[i]] <- SortedFromDimList(pt$table1$dimList[names(gVC)[i]][[1]],pt$table2$dimList[names(gVC)[i]][[1]])
}
}
}, silent = TRUE)
if (is.null(pt[[2]][[1]])) {
if(!tauArgus){
finalData <- as.data.frame(getInfo(pt[[1]][[1]], type = "finalData"))
} else{
finalData <- as.data.frame(pt[[1]][[1]]) ## Start treating tauArgus
names(finalData)[names(finalData)=="freq"] <- "Freq"
if(!is.null(finalData$cellvalue)){
if(is.null(finalData$Freq)) finalData$Freq <- finalData$cellvalue
finalData$cellvalue <- NULL
}
if(!is.null(finalData$sdcStatus_argus)){
finalData$sdcStatus <- finalData$sdcStatus_argus
finalData$sdcStatus_argus <- NULL
}
} ## End treating tauArgus
} else {
t1 <- as.data.frame(getInfo(pt[[1]][[1]], type = "finalData"))
t2 <- as.data.frame(getInfo(pt[[2]][[1]], type = "finalData"))
if (dim(t1)[2] != dim(t2)[2])
stop("Output from linked tables: Something is wrong!")
b <- merge(t1, t2, all = TRUE, by = seq_len(dim(t1)[2] - 2), suffixes = c("", ".y"))
if (max(abs(b$Freq - b$Freq.y), na.rm = TRUE) > (.Machine$double.eps)^(5/8)*max(abs(b$Freq + b$Freq.y), na.rm = TRUE))
stop("Output from protect_linked_tables: Something is wrong!")
if (sum(abs(as.integer(b$sdcStatus == "s") - as.integer(b$sdcStatus.y ==
"s")), na.rm = TRUE) > 0) {
b$sdcStatus[!is.na(b$sdcStatus) & b$sdcStatus.y == "s"] <- "s"
warning("Non-unique suppression-output form protect_linked_tables")
}
if (sum(!(is.na(b$Freq) == is.na(b$sdcStatus))) > 0)
stop("Output from protect_linked_tables: Something is wrong!")
nat1 <- is.na(b$Freq)
b$sdcStatus[nat1] <- b$sdcStatus.y[nat1]
b$Freq[nat1] <- b$Freq.y[nat1]
finalData <- b[, !(colnames(b) %in% c("Freq.y", "sdcStatus.y")), drop = FALSE]
}
okSortTry = FALSE
try( { # Include in try as extra safety. Sorting can be omitted"
if (sortByReversedColumns)
fd <- finalData[, rev(seq_len(nDim)), drop = FALSE] else fd <- finalData[, seq_len(nDim), drop = FALSE]
for (i in seq_len(nDim)) {
fd[, names(sortedLists)[i]] <- as.integer(factor(fd[, names(sortedLists)[i]],
levels = sortedLists[[i]]))
}
if (sum(is.na(fd)))
stop("Something went wrong when sorting output")
finalData <- finalData[SortRows(fd, index.return = TRUE), , drop = FALSE]
okSortTry = TRUE
}, silent = TRUE)
if(!okSortTry)
warning("Something went wrong when sorting output. Output is not sorted.")
rownames(finalData) <- NULL
suppressed <- finalData$Freq
if(protectZeros)
suppressed[!finalData$sdcStatus == "s"] <- suppression
else
suppressed[!(finalData$sdcStatus == "s" | finalData$sdcStatus == "z") ] <- suppression
finalData$supp6547524 <- suppressed
IncProgress()
if (!is_null_IncProgress)
cat("\n")
attributes(finalData)$index <- NULL # avoid attribute
if (doGaussSuppression) {
# Code copied from PTxyz
ptA <- finalData[, !(names(finalData) %in% c("Freq", "sdcStatus", "supp6547524")), drop = FALSE]
if (is.null(freqVarInd)) {
GetPrintInc <- function(printInc = TRUE, ...) {
printInc
}
printInc <- GetPrintInc(...)
if (printInc) {
cat("[preAggregate ", dim(data)[1], "*", dim(data)[2], "->", sep = "")
flush.console()
}
# These four lines is about aggregate. Other lines is about printing.
dVar <- names(dimLists)
freqVar_ <- "f_Re_qVa_r"
data <- aggregate(list(f_Re_qVa_r = data[[dVar[1]]]), data[, dVar, drop = FALSE], length)
freqVarInd <- ncol(data)
if (printInc) {
cat(dim(data)[1], "*", dim(data)[2], "]\n", sep = "")
flush.console()
}
}
#xxx <- CrossTable2ModelMatrix(data[, c(freqVarInd, dimVarInd), drop = FALSE], ptA, dimLists)
#xxx <- CrossTable2ModelMatrix(data[, dimVarInd, drop = FALSE], ptA, dimLists)
xxx <- CrossTable2ModelMatrix(data, ptA, dimLists)
rownames(xxx) <- apply(data[, names(data) %in% names(ptA), drop = FALSE], 1, paste, collapse = "_")
colnames(xxx) <- apply(ptA, 1, paste, collapse = ":")
yyy <- as.matrix(data[, freqVarInd, drop = FALSE])
zzz <- as.matrix(Matrix::crossprod(xxx, yyy))
GetCandidates <- function(candidates = NULL, ...) {
candidates
}
candidates <- GetCandidates(...)
primary <- (zzz <= maxN)[, 1, drop = TRUE]
if (!protectZeros)
primary[zzz == 0] <- FALSE
# Avoid warning in GaussSuppression: warning("Suppressed cells with empty input will not be protected. Extend input data with zeros?")
if(any(primary))
if(min(colSums(xxx[, primary, drop = FALSE])) == 0){
colnames_xxx <- colnames(xxx)
xxx <- CrossTable2ModelMatrix(Extend0(data[, (names(data) %in% names(dimLists))], hierarchical = FALSE), ptA, dimLists)
colnames(xxx) <- colnames_xxx
rownames(xxx) <- NULL
yyy <- rbind(yyy, matrix(0, nrow(xxx) - nrow(yyy), ncol = 1))
}
if(is.null(candidates)){
tie <- as.matrix(Matrix::crossprod(xxx, xxx %*% zzz))
tie <- tie/max(tie)
zzzOrd <- (zzz + 0.99 * tie)[, 1, drop = TRUE]
if (!protectZeros) {
zzzOrd[zzz == 0] <- 0.01 + max(zzzOrd) + zzzOrd[zzz == 0]
}
candidates <- order(zzzOrd, decreasing = TRUE)
candidatesManually <- FALSE
} else {
warning('candidates specified manually. You may try: hidden="output" ')
candidatesManually <- TRUE
}
if (protectZeros) {
singleton <- (yyy == 0)[, 1, drop = TRUE]
} else {
singleton <- (yyy == 1)[, 1, drop = TRUE]
}
GetHidden <- function(hidden = NULL, ...) {
hidden
}
hidden <- GetHidden(...)
GetForced <- function(forced = NULL, ...) {
forced
}
forced <- GetForced(...)
if (is.character(hidden)) {
input <- data[, c(dimVarInd, freqVarInd), drop = FALSE]
if (hidden == "input")
return(input)
output <- finalData[, !(names(finalData) %in% c("sdcStatus", "supp6547524")), drop = FALSE]
if (hidden == "output")
return(output)
if (hidden == "inputoutput")
return(list(input = input, output = output))
stop(paste(hidden, "is not a valid as hidden"))
}
if(candidatesManually){
if (method == "Gauss")
secondary <- GaussSuppression(x = xxx, primary = primary, singleton = singleton, ...)
if (method == "GaussNoSingleton")
secondary <- GaussSuppression(x = xxx, primary = primary, ...)
} else {
if (method == "Gauss")
secondary <- GaussSuppression(x = xxx, candidates = candidates, primary = primary, singleton = singleton, ...)
if (method == "GaussNoSingleton")
secondary <- GaussSuppression(x = xxx, candidates = candidates, primary = primary, ...)
}
if (method == "GaussBasic") {
GetPrimary <- function(primary = integer(0), ...) {
primary
}
primary <- GetPrimary(...)
secondary <- GaussSuppression(x = xxx, ...)
}
if (length(hidden)) {
finalData$sdcStatus[hidden] <- "h"
finalData$supp6547524[hidden] <- suppression
}
finalData$sdcStatus[primary] <- "u"
finalData$sdcStatus[secondary] <- "x"
finalData$sdcStatus[forced] <- "z"
if (length(forced)) {
finalData$sdcStatus[forced] <- "z"
}
finalData$supp6547524[primary] <- suppression
finalData$supp6547524[secondary] <- suppression
}
if(get0("doReturnExtraFinalData",ifnotfound = FALSE))
extraFinalData <- list(inputData=data[,c(freqVarInd, dimVarInd),drop=FALSE],finalData=finalData)
if (stacked & doUnstack) {
if (is.null(singleOutput))
singleOutput <- FALSE
mainVar1 <- match("Freq", names(finalData))
mainVar2 <- match("sdcStatus", names(finalData))
mainVar3 <- match("supp6547524", names(finalData))
stackVar <- match(varNames, names(finalData))
stackVar <- stackVar[!is.na(stackVar)]
stackVarNames <- names(finalData)[stackVar]
blockVar <- match(dimVarNamesOrig, names(finalData))
blockVar <- blockVar[!is.na(blockVar)]
x1 <- Unstack(finalData, mainVar = mainVar1, stackVar = stackVar, blockVar = blockVar,
sep = sep)
x2 <- Unstack(finalData, mainVar = mainVar2, stackVar = stackVar, blockVar = blockVar,
sep = sep)
x3 <- Unstack(finalData, mainVar = mainVar3, stackVar = stackVar, blockVar = blockVar,
sep = sep)
if (namesAsInput) {
w <- rbind(cbind(a12345645 = 1, rowData), cbind(a12345645 = 1, rowData))
ww <- Unstack(w, stackVar = match(stackVarNames, names(w)), sep = sep)$rowData
w2 <- cbind(outputNames = rownames(ww), ww, stringsAsFactors = FALSE)
w1 <- cbind(inputNames = rownames(rowData), rowData, stringsAsFactors = FALSE)
namesFrame <- merge(w1, w2)
indNames <- match(namesFrame$outputNames, colnames(x1$data))
indNames2 <- match(namesFrame$outputNames, colnames(x2$data))
indNames3 <- match(namesFrame$outputNames, colnames(x3$data))
if (sum(as.integer(indNames != indNames2)))
stop("Problems with namesAsInput")
if (sum(as.integer(indNames != indNames3)))
stop("Problems with namesAsInput")
colnames(x1$data)[indNames] <- namesFrame$inputNames
colnames(x2$data)[indNames] <- namesFrame$inputNames
colnames(x3$data)[indNames] <- namesFrame$inputNames
}
if (orderAsInput) {
namesOrig <- dimVarNamesOrig[dimVarNamesOrig %in% names(gVC)]
rInput <- unique(apply(CharacterDataFrame(dataOrig[, namesOrig, drop = FALSE]),
1, paste, collapse = "_"))
rOutput <- apply(x1$data[, namesOrig, drop = FALSE], 1, paste, collapse = "_")
rI <- match(rInput, rOutput)
rI <- rI[!is.na(rI)]
rAll <- seq_len(length(rOutput))
rO <- rAll[!(rAll %in% rI)]
if (totalFirst)
rr <- c(rO, rI) else rr <- c(rI, rO)
cI <- match(colnames(dataOrig), colnames(x1$data))
cI <- cI[!is.na(cI)]
cAll <- seq_len(length(colnames(x1$data)))
cO <- cAll[!(cAll %in% cI)]
if (totalFirst)
cc <- c(cO, cI) else cc <- c(cI, cO)
x1$data <- x1$data[rr, cc, drop = FALSE]
x2$data <- x2$data[rr, cc, drop = FALSE]
x3$data <- x3$data[rr, cc, drop = FALSE]
rownames(x1$data) <- NULL
rownames(x2$data) <- NULL
rownames(x3$data) <- NULL
}
if (removeTotal) {
colnames(x1$data) <- RemoveTotal(colnames(x1$data), total = total, sep = sep)
colnames(x2$data) <- RemoveTotal(colnames(x2$data), total = total, sep = sep)
colnames(x3$data) <- RemoveTotal(colnames(x3$data), total = total, sep = sep)
}
if (!singleOutput) {
output <- list(info = info, x1 = x1$data, x2 = x2$data, x3 = x3$data)
names(output) <- c("info", outFreq, outSdcStatus, outSuppressed)
} else {
bv <- seq_len(NCOL(x1$data)) %in% seq_len(length(blockVar))
block <- x1$data[, bv, drop = FALSE]
x1 <- x1$data[, !bv, drop = FALSE]
x2 <- x2$data[, !bv, drop = FALSE]
x3 <- x3$data[, !bv, drop = FALSE]
names(x1) <- paste(outFreq, names(x1), sep = sep)
names(x2) <- paste(outSdcStatus, names(x2), sep = sep)
names(x3) <- paste(outSuppressed, names(x3), sep = sep)
output <- list(info = info, data = cbind(block, x1, x2, x3))
}
if(get0("doReturnExtraFinalData",ifnotfound = FALSE))
output <- c(output,extraFinalData)
return(output)
}
if (is.null(singleOutput))
singleOutput <- TRUE
if (!stacked & orderAsInput) {
# Fungerer ikke å ha denne før unstack
rInput <- unique(apply(pt$common$dimData[, names(gVC), drop = FALSE], 1,
paste, collapse = "_"))
rOutput <- apply(finalData[, names(gVC), drop = FALSE], 1, paste, collapse = "_")
rI <- match(rInput, rOutput)
rI <- rI[!is.na(rI)]
rAll <- seq_len(length(rOutput))
rO <- rAll[!(rAll %in% rI)]
if (totalFirst)
finalData <- finalData[c(rO, rI), , drop = FALSE] else finalData <- finalData[c(rI, rO), , drop = FALSE]
rownames(finalData) <- NULL
}
names(finalData)[match(c("Freq", "sdcStatus", "supp6547524"), names(finalData))] <- c(outFreq,
outSdcStatus, outSuppressed)
if (singleOutput)
output <- list(info = info, data = finalData) else {
output <- list(info = info, x1 = finalData[, !(names(finalData) %in% c(outSdcStatus,
outSuppressed))], x2 = finalData[, !(names(finalData) %in% c(outFreq,
outSuppressed))], x3 = finalData[, !(names(finalData) %in% c(outFreq,
outSdcStatus))])
names(output) <- c("info", outFreq, outSdcStatus, outSuppressed)
}
if(get0("doReturnExtraFinalData",ifnotfound = FALSE))
output <- c(output,extraFinalData)
return(output)
}
#' @rdname ProtectTable
#' @export
ProtectTableData <- function(data, ...) {
ProtectTable(data, ..., singleOutput = TRUE)$data
}
GroupVarCombined <- function(x, reverse = FALSE) {
l <- sapply(x, length)
ml <- max(l)
naml <- rep(NA, ml)
for (i in 1:length(x)) x[[i]] <- c(x[[i]], naml)[seq_len(ml)]
un <- unique(names(x))
z <- vector("list", length(un))
names(z) <- un
for (i in seq_len(length(un))) {
z[[i]] <- unique(as.vector(t(as.matrix(data.frame(x[names(x) == un[i]])))))
z[[i]] <- c(z[[i]][!is.na(z[[i]])], 0) # 0 as total code
if (reverse)
z[[i]] <- rev(z[[i]])
}
z
}
if (FALSE) {
# Generering av testdata
z <- EasyData("sosialFiktiv")
z8 <- z[z$fylke <= 10 & z$kostragr == 300, ] # 8 regions
z11 <- z[z$fylke <= 10 & (z$kostragr == 300 | z$kostragr == 400), ] # 11 regions
z8$kostragr <- "A"
z8$kostragr[z8$region %in% c(43200, 51400, 62000, 83400)] <- "B"
z36 <- z[z$fylke >= 11 & z$fylke <= 14 & z$kostragr <= 500, ] # 36 regions
z1 <- Unstack(z8, mainVar = match("ant", names(z8)), stackVar = match(c("hovedint"),
names(z8)), blockVar = match(c("region"), names(z8)))
z2 <- Unstack(z11, mainVar = match("ant", names(z8)), stackVar = match(c("hovedint"),
names(z8)), blockVar = match(c("region", "fylke", "kostragr"), names(z8)))
x1 <- aggregate(z1$data[, 2:5], list(region = z1$data$region), sum)
x2 <- aggregate(z2$data[, 4:7], list(region = z2$data$region, fylke = z2$data$fylke,
kostragr = z2$data$kostragr), sum)
y1 <- aggregate((z8[, 7]), list(region = z8$region, hovedint = z8$hovedint),
sum)
y2 <- aggregate((z11[, 7]), list(region = z11$region, fylke = z11$fylke, kostragr = z11$kostragr,
hovedint = z11$hovedint), sum)
names(y1)[3] <- "ant"
names(y2)[5] <- "ant"
y3 <- z36
mnd <- gsub("_", "m", y3$mnd)
mnd <- gsub("6", "06", mnd)
mnd <- gsub("9", "09", mnd)
mnd <- gsub("1m", "01m", mnd)
mnd <- gsub("5", "05", mnd)
mnd <- paste("m", mnd, sep = "")
y3$mnd <- mnd
mnd <- gsub("-", "M", y3$mnd2)
mnd <- gsub("6", "06", mnd)
mnd <- gsub("1M", "01M", mnd)
mnd <- gsub("5", "05", mnd)
mnd <- paste("M", mnd, sep = "")
y3$mnd2 <- mnd
y3Unstack <- Unstack(y3, mainVar = match("ant", names(y3)), stackVar = match(c("hovedint",
"mnd"), names(y3)), extraVar = match(c("mnd2"), names(y3)), blockVar = match(c("region",
"fylke", "kostragr"), names(y3)))
x3 <- y3Unstack$data
x3b <- x3
names(x3b)[4:15] <- paste(names(x3)[4:15], y3Unstack$rowData[, 3], sep = "_")
ind <- NULL
for (i in 1:NROW(y1)) ind <- c(ind, rep(i, y1$ant[i]))
y1micro <- y1[ind, ]
# rename and save
if (FALSE) {
z1 <- y1
z1micro <- y1micro
z2 <- y2
z3 <- y3
z1w <- x1
z2w <- x2
z3w <- x3
z3wb <- x3b
save(z1, file = "C:/R/easysdctable/data/z1.RData")
save(z1micro, file = "C:/R/easysdctable/data/z1micro.RData")
save(z2, file = "C:/R/easysdctable/data/z2.RData")
save(z3, file = "C:/R/easysdctable/data/z3.RData")
save(z1w, file = "C:/R/easysdctable/data/z1w.RData")
save(z2w, file = "C:/R/easysdctable/data/z2w.RData")
save(z3w, file = "C:/R/easysdctable/data/z3w.RData")
save(z3wb, file = "C:/R/easysdctable/data/z3wb.RData")
}
}
SortedFromDimList <- function(dimList1, dimList2 = NULL) {
if (!is.null(dimList2)) {
dimList <- rbind(dimList1, dimList2)
return(unique(dimList[order(c(-5 * as.integer(factor(dimList1$levels)), -4 *
as.integer(factor(dimList2$levels))), c(dimList1$codes, dimList2$codes)), ])[, 2])
}
return(unique(dimList1[order(c(-5 * as.integer(factor(dimList1$levels))), c(dimList1$codes)), ])[, 2])
}
uniqueIndex <- function(x, ordered = FALSE) {
ui <- seq_len(length(x))[!duplicated(x)]
if (!ordered)
return(ui)
ui[order(x[ui])]
}
SortedFromData <- function(xCharacter, ind, total, xNumeric = NULL) {
z <- NULL
for (i in ind) {
if (i == 0)
x <- total else {
if (!is.null(xNumeric))
ui <- uniqueIndex(xNumeric[, i], ordered = TRUE) else ui <- uniqueIndex(xCharacter[, i], ordered = TRUE)
x <- xCharacter[ui, i]
}
z <- c(z, x)
}
z
}
CharacterDataFrame <- function(x) {
for (i in seq_len(NCOL(x))) x[, i] <- as.character(x[, i])
x
}
RemoveTotal <- function(x, total = "Total", sep = "_") {
x <- gsub(paste(total, sep, sep = ""), "", x)
gsub(paste(sep, total, sep = ""), "", x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.