Nothing
#' Statistical Disclosure Control (SDC) for the generation of protected
#' microdata for researchers and for public use.
#'
#' This package includes all methods of the popular software mu-Argus plus
#' several new methods. In comparison with mu-Argus the advantages of this
#' package are that the results are fully reproducible even with the included
#' GUI, that the package can be used in batch-mode from other software, that
#' the functions can be used in a very flexible way, that everybody could look
#' at the source code and that there are no time-consuming meta-data management
#' is necessary. However, the user should have a detailed knowledge about SDC
#' when applying the methods on data.
#'
#' The package is programmed using S4-classes and it comes with a well-defined
#' class structure.
#'
#' The implemented graphical user interface (GUI) for microdata protection
#' serves as an easy-to-handle tool for users who want to use the sdcMicro
#' package for statistical disclosure control but are not used to the native R
#' command line interface. In addition to that, interactions between objects
#' which results from the anonymization process are provided within the GUI.
#' This allows an automated recalculation and displaying information of the
#' frequency counts, individual risk, information loss and data utility after
#' each anonymization step. In addition to that, the code for every
#' anonymization step carried out within the GUI is saved in a script which can
#' then be easily modified and reloaded.
#'
#' \tabular{ll}{ Package: \tab sdcMicro\cr Type: \tab Package\cr Version: \tab
#' 2.5.9\cr Date: \tab 2009-07-22\cr License: \tab GPL 2.0 \cr }
#'
#' @name sdcMicro-package
#' @aliases sdcMicro-package sdcMicro
#' @docType package
#' @author Matthias Templ, Alexander Kowarik, Bernhard Meindl
#'
#' Maintainer: Matthias Templ <templ@@statistik.tuwien.ac.at>
#' @references
#' Templ, M. Statistical Disclosure Control for Microdata: Methods and Applications in R.
#' \emph{Springer International Publishing}, 287 pages, 2017. ISBN 978-3-319-50272-4.
#' \doi{10.1007/978-3-319-50272-4}
#'
#' Templ, M. and Kowarik, A. and Meindl, B.
#' Statistical Disclosure Control for Micro-Data Using the R Package sdcMicro.
#' \emph{Journal of Statistical Software}, \strong{67} (4), 1--36, 2015. \doi{10.18637/jss.v067.i04}
#'
#' Templ, M. and Meindl, B. \emph{Practical Applications in
#' Statistical Disclosure Control Using R}, Privacy and Anonymity in
#' Information Management Systems, Bookchapter, Springer London, pp. 31-62,
#' 2010. \doi{10.1007/978-1-84996-238-4_3}
#'
#' Kowarik, A. and Templ, M. and Meindl, B. and Fonteneau, F. and Prantner, B.:
#' \emph{Testing of IHSN Cpp Code and Inclusion of New Methods into sdcMicro},
#' in: Lecture Notes in Computer Science, J. Domingo-Ferrer, I. Tinnirello
#' (editors.); Springer, Berlin, 2012, ISBN: 978-3-642-33626-3, pp. 63-77.
#' \doi{10.1007/978-3-642-33627-0_6}
#'
#' Templ, M. \emph{Statistical Disclosure Control for Microdata Using the
#' R-Package sdcMicro}, Transactions on Data Privacy, vol. 1, number 2, pp.
#' 67-85, 2008. \url{http://www.tdp.cat/issues/abs.a004a08.php}
#'
#' Templ, M. \emph{New Developments in Statistical Disclosure Control and
#' Imputation: Robust Statistics Applied to Official Statistics},
#' Suedwestdeutscher Verlag fuer Hochschulschriften, 2009, ISBN: 3838108280,
#' 264 pages.
#' @keywords package
#' @examples
#' \donttest{
#' ## example from Capobianchi, Polettini and Lucarelli:
#' data(francdat)
#' f <- freqCalc(francdat, keyVars=c(2, 4:6), w = 8)
#' f
#' f$fk
#' f$Fk
#'
#' ## dealing with missing values:
#' x <- francdat
#' x[3,5] <- NA
#' x[4,2] <- x[4,4] <- NA
#' x[5,6] <- NA
#' x[6,2] <- NA
#' f2 <- freqCalc(x, keyVars = c(2, 4:6), w = 8)
#' f2$fk
#' f2$Fk
#'
#' ## individual risk calculation:
#' indivf <- indivRisk(f)
#' indivf$rk
#'
#' ## Local Suppression
#' localS <- localSupp(f, keyVar = 2, threshold = 0.25)
#' f2 <- freqCalc(localS$freqCalc, keyVars=c(2, 4:6), w = 8)
#' indivf2 <- indivRisk(f2)
#' indivf2$rk
#'
#' ## select another keyVar and run localSupp() once again,
#' ## if you think the table is not fully protected
#' data(free1)
#' free1 <- as.data.frame(free1)
#' f <- freqCalc(x = free1, keyVars = 1:3, w = 30)
#' ind <- indivRisk(f)
#' ## and now you can use the interactive plot for individual risk objects:
#' ## plot(ind)
#'
#' ## example from Capobianchi, Polettini and Lucarelli:
#' data(francdat)
#' l1 <- localSuppression(
#' obj = francdat,
#' keyVars=c(2, 4:6),
#' importance = c(1, 3, 2, 4)
#' )
#' l1
#' l1$x
#' l2 <- localSuppression(obj = francdat, keyVars=c(2, 4:6), k = 2)
#' l3 <- localSuppression(obj = francdat, keyVars=c(2, 4:6), k = 4)
#'
#' ## Global recoding:
#' data(free1)
#' free1 <- as.data.frame(free1)
#' free1[, "AGE"] <- globalRecode(
#' obj = free1[, "AGE"],
#' breaks = c(1,9,19,29,39,49,59,69,100),
#' labels = 1:8
#' )
#'
#' ## Top coding:
#' topBotCoding(
#' obj = free1[, "DEBTS"],
#' value = 9000,
#' replacement = 9100,
#' kind = "top"
#' )
#'
#' ## Numerical Rank Swapping:
#' data(Tarragona)
#' Tarragona1 <- rankSwap(Tarragona, P = 10, K0 = NULL, R0 = NULL)
#'
#' ## Microaggregation:
#' m1 <- microaggregation(Tarragona, method = "onedims", aggr = 3)
#' m2 <- microaggregation(Tarragona, method = "pca", aggr = 3)
#'
#' ## using a subset because of computation time
#' valTable(Tarragona[1:50, ], method = c("simple", "onedims", "pca"))
#'
#' data(microData)
#' microData <- as.data.frame(microData)
#' m_micro <- microaggregation(microData, method = "mdav")
#' summary(m_micro)
#' plotMicro(m_micro, 1, which.plot = 1) # not enough observations...
#' data(free1)
#' free1 <- as.data.frame(free1)
#' plotMicro(
#' x = microaggregation(free1[,31:34], method = "onedims"),
#' p = 1,
#' which.plot = 1
#' )
#'
#' ## disclosure risk (interval) and data utility:
#' m1 <- microaggregation(Tarragona, method = "onedims", aggr = 3)
#' dRisk(obj = Tarragona, xm = m1$mx)
#' dRisk(obj = Tarragona, xm = m2$mx)
#' dUtility(obj = Tarragona, xm = m1$mx)
#' dUtility(obj = Tarragona, xm = m2$mx)
#'
#' ## Fast generation of synthetic data with approximately
#' ## the same covariance matrix as the original one.
#' data(mtcars)
#' cov(mtcars[, 4:6])
#' df_gen <- dataGen(obj = mtcars[, 4:6], n = 200)
#' cov(df_gen)
#' pairs(mtcars[, 4:6])
#' pairs(df_gen)
#'
#' ## Post-Randomization (PRAM)
#' x <- factor(sample(1:4, 250, replace = TRUE))
#' pr1 <- pram(x)
#' length(which(pr1$x_pram == x))
#' summary(pr1)
#' x2 <- factor(sample(1:4, 250, replace=TRUE))
#' length(which(pram(x2)$x_pram == x2))
#'
#' data(free1)
#' marstat <- as.factor(free1[,"MARSTAT"])
#' marstatPramed <- pram(marstat)
#' summary(marstatPramed)
#'
#' ## The same functionality can be also applied to `sdcMicroObj`-objects
#' data(testdata)
#'
#' ## undo-functionality is by default restricted to data sets
#' ## with <= `1e5` rows; to modify, env-var `sdcMicro_maxsize_undo`
#' ## can to be changed before creating a problem instance
#' Sys.setenv("sdcMicro_maxsize_undo" = 1e6)
#'
#' ## create an object
#' testdata$water <- factor(testdata$water)
#' sdc <- createSdcObj(
#' dat = testdata,
#' keyVars = c("urbrur", "roof", "walls", "electcon", "water", "relat", "sex"),
#' numVars = c("expend", "income", "savings"),
#' w = "sampling_weight"
#' )
#' head(sdc@@manipNumVars)
#'
#' ## Display risk-measures
#' sdc@@risk$global
#' sdc <- dRisk(sdc)
#' sdc@@risk$numeric
#'
#' ## Generation of synthetic data
#' synthdat <- dataGen(sdc)
#'
#' ## use addNoise with default parameters (not suggested)
#' sdc <- addNoise(sdc, variables = c("expend", "income"))
#' head(sdc@@manipNumVars)
#' sdc@@risk$numeric
#'
#' ## undolast step (remove adding noise)
#' sdc <- undolast(sdc)
#' head(sdc@@manipNumVars)
#' sdc@@risk$numeric
#'
#' ## apply addNoise() with custom parameters
#' sdc <- addNoise(sdc, noise = 0.2)
#' head(sdc@@manipNumVars)
#' sdc@@risk$numeric
#'
#' ## LocalSuppression
#' sdc <- undolast(sdc)
#' head(sdc@@risk$individual)
#' sdc@@risk$global
#' sdc <- localSuppression(sdc)
#' head(sdc@@risk$individual)
#' sdc@@risk$global
#'
#' ## microaggregation
#' sdc <- undolast(sdc)
#' head(get.sdcMicroObj(sdc, type = "manipNumVars"))
#' sdc <- microaggregation(sdc)
#' head(get.sdcMicroObj(sdc, type = "manipNumVars"))
#'
#' ## Post-Randomization
#' sdc <- undolast(sdc)
#' head(sdc@@risk$individual)
#' sdc@@risk$global
#' sdc <- pram(sdc, variables = "water")
#' head(sdc@@risk$individual)
#' sdc@@risk$global
#'
#' ## rankSwap
#' sdc <- undolast(sdc)
#' head(sdc@@risk$individual)
#' sdc@@risk$global
#' head(get.sdcMicroObj(sdc, type = "manipNumVars"))
#' sdc <- rankSwap(sdc)
#' head(get.sdcMicroObj(sdc, type = "manipNumVars"))
#' head(sdc@@risk$individual)
#' sdc@@risk$global
#'
#'
#' ## topBotCoding
#' head(get.sdcMicroObj(sdc, type = "manipNumVars"))
#' sdc@@risk$numeric
#' sdc <- topBotCoding(
#' obj = sdc,
#' value = 60000000,
#' replacement = 62000000,
#' column = "income"
#' )
#' head(get.sdcMicroObj(sdc, type = "manipNumVars"))
#' sdc@@risk$numeric
#'
#' ## LocalRecProg
#' data(testdata2)
#' keyVars <- c("urbrur", "roof", "walls", "water", "sex")
#' w <- "sampling_weight"
#' sdc <- createSdcObj(testdata2,
#' keyVars = keyVars,
#' weightVar = w
#' )
#' sdc@@risk$global
#' sdc <- LocalRecProg(sdc)
#' sdc@@risk$global
#'
#' ## Model-based risks using a formula
#' form <- as.formula(paste("~", paste(keyVars, collapse = "+")))
#' sdc <- modRisk(sdc, method = "default", formulaM = form)
#' get.sdcMicroObj(sdc, "risk")$model
#'
#' sdc <- modRisk(sdc, method = "CE", formulaM = form)
#' get.sdcMicroObj(sdc, "risk")$model
#'
#' sdc <- modRisk(sdc, method = "PML", formulaM = form)
#' get.sdcMicroObj(sdc, "risk")$model
#'
#' sdc <- modRisk(sdc, method = "weightedLLM", formulaM = form)
#' get.sdcMicroObj(sdc, "risk")$model
#'
#' sdc <- modRisk(sdc, method = "IPF", formulaM = form)
#' get.sdcMicroObj(sdc, "risk")$model
#' }
NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.