sdcMicroObj-class: Class '"sdcMicroObj"'

Description Usage Arguments Value Objects from the Class Author(s) Examples

Description

Class to save all information about the SDC process

undo last changes to sdcMicroObj-objects if possible note that this will only work if the user makes use of the prev slot or uses the sdcMicroObj functions

strataVar<- allows to modify the variable which is used if anonymization limitation techniques are applied independent for each characteristic of the defined strata.

Usage

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
createSdcObj(dat, keyVars, numVars = NULL, pramVars = NULL,
  ghostVars = NULL, weightVar = NULL, hhId = NULL, strataVar = NULL,
  sensibleVar = NULL, excludeVars = NULL, options = NULL, seed = NULL,
  randomizeRecords = FALSE, alpha = 1)

undolast(object)

strataVar(object) <- value

## S4 replacement method for signature 'sdcMicroObj,characterOrNULL'
strataVar(object) <- value

Arguments

dat

The microdata set. A numeric matrix or data frame containing the data.

keyVars

Indices or names of categorical key variables. They must, of course, match with the columns of ‘dat’.

numVars

Index or names of continuous key variables.

pramVars

Indices or names of categorical variables considered to be pramed.

ghostVars

if specified a list which each element being a list of exactly two elements. The first element must be a character vector specifying exactly one variable name that was also specified as a categorical key variable (keyVars), while the second element is a character vector of valid variable names (that must not be listed as keyVars). If localSuppression or kAnon was applied, the resulting suppression pattern for each key-variable is transferred to the depending variables.

weightVar

Indices or name determining the vector of sampling weights.

hhId

Index or name of the cluster ID (if available).

strataVar

Indices or names of stratification variables.

sensibleVar

Indices or names of sensible variables (for l-diversity)

excludeVars

which variables of dat should not be included in result-object? Users may specify a vector of variable-names available in dat that were not specified in either keyVars, numVars, pramVars, ghostVars, hhId, strataVar or sensibleVar.

options

additional options (if specified, a list must be used as input)

seed

(numeric) number specifiying the seed which will be set to allow for reproducablity. The number will be rounded and saved as element seed in slot options.

randomizeRecords

(logical) if TRUE, the order of observations in the input microdata set will be randomized.

alpha

numeric between 0 and 1 specifying the fraction on how much keys containing NAs should contribute to the frequency calculation which is also crucial for risk-estimation.

object

a sdcMicroObj-class object

value

NULL or a character vector of length 1 specifying a valid variable name

Value

a sdcMicroObj-class object

an object of class sdcMicroObj with modified slot @strataVar

Objects from the Class

Objects can be created by calls of the form new("sdcMicroObj", ...).

Author(s)

Bernhard Meindl, Alexander Kowarik, Matthias Templ, Elias Rut

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
showClass("sdcMicroObj")
## Not run: 
data(testdata)
sdc <- createSdcObj(testdata,
  keyVars=c('urbrur','roof','walls','water','electcon','relat','sex'),
  numVars=c('expend','income','savings'), w='sampling_weight')
head(sdc@manipNumVars)
### Display Risks
sdc@risk$global
sdc <- dRisk(sdc)
sdc@risk$numeric
### use addNoise without Parameters
sdc <- addNoise(sdc,variables=c("expend","income"))
head(sdc@manipNumVars)
sdc@risk$numeric
### undolast
sdc <- undolast(sdc)
head(sdc@manipNumVars)
sdc@risk$numeric
### redo addNoise with Parameter
sdc <- addNoise(sdc, noise=0.2)
head(sdc@manipNumVars)
sdc@risk$numeric
### dataGen
#sdc <- undolast(sdc)
#head([email protected]$individual)
#[email protected]$global
#sdc <- dataGen(sdc)
#head([email protected]$individual)
#[email protected]$global
### LocalSuppression
sdc <- undolast(sdc)
head(sdc@risk$individual)
sdc@risk$global
sdc <- localSuppression(sdc)
head(sdc@risk$individual)
sdc@risk$global
### microaggregation
sdc <- undolast(sdc)
head(get.sdcMicroObj(sdc, type="manipNumVars"))
sdc <- microaggregation(sdc)
head(get.sdcMicroObj(sdc, type="manipNumVars"))
### pram
sdc <- undolast(sdc)
head(sdc@risk$individual)
sdc@risk$global
sdc <- pram(sdc,keyVar="water")
head(sdc@risk$individual)
sdc@risk$global
### rankSwap
sdc <- undolast(sdc)
head(sdc@risk$individual)
sdc@risk$global
head(get.sdcMicroObj(sdc, type="manipNumVars"))
sdc <- rankSwap(sdc)
head(get.sdcMicroObj(sdc, type="manipNumVars"))
head(sdc@risk$individual)
sdc@risk$global
### suda2
sdc <- suda2(sdc)
sdc@risk$suda2
### topBotCoding
head(get.sdcMicroObj(sdc, type="manipNumVars"))
sdc@risk$numeric
sdc <- topBotCoding(sdc, value=60000000, replacement=62000000, column="income")
head(get.sdcMicroObj(sdc, type="manipNumVars"))
sdc@risk$numeric
### LocalRecProg
data(testdata2)
sdc <- createSdcObj(testdata2,
  keyVars=c("urbrur", "roof", "walls", "water", "sex", "relat"))
sdc@risk$global
sdc <- LocalRecProg(sdc)
sdc@risk$global
### LLmodGlobalRisk
sdc <- undolast(sdc)
sdc <- LLmodGlobalRisk(sdc, inclProb=0.001)
sdc@risk$model

## End(Not run)

## we can also specify ghost (linked) variables
## these variables are linked to some categorical key variables
## and have the sampe suppression pattern as the variable that they
## are linked to after \code{\link{localSuppression}} has been applied
data(testdata)
testdata$electcon2 <- testdata$electcon
testdata$electcon3 <- testdata$electcon
testdata$water2 <- testdata$water

keyVars <- c("urbrur","roof","walls","water","electcon","relat","sex")
numVars <- c("expend","income","savings")
w <- "sampling_weight"

## we want to make sure that some variables not used as key-variables
## have the same suppression pattern as variables that have been
## selected as key variables. Thus, we are using 'ghost'-variables.
ghostVars <- list()

## we want variables 'electcon2' and 'electcon3' to be linked
## to key-variable 'electcon'
ghostVars[[1]] <- list()
ghostVars[[1]][[1]] <- "electcon"
ghostVars[[1]][[2]] <- c("electcon2","electcon3")

## we want variable 'water2' to be linked to key-variable 'water'
ghostVars[[2]] <- list()
ghostVars[[2]][[1]] <- "water"
ghostVars[[2]][[2]] <- "water2"

## create the sdcMicroObj
obj <- createSdcObj(testdata, keyVars=keyVars,
  numVars=numVars, w=w, ghostVars=ghostVars)

## apply 3-anonymity to selected key variables
obj <- kAnon(obj, k=3); obj

## check, if the suppression patterns are identical
manipGhostVars <- get.sdcMicroObj(obj, "manipGhostVars")
manipKeyVars <- get.sdcMicroObj(obj, "manipKeyVars")
all(is.na(manipKeyVars$electcon) == is.na(manipGhostVars$electcon2))
all(is.na(manipKeyVars$electcon) == is.na(manipGhostVars$electcon3))
all(is.na(manipKeyVars$water) == is.na(manipGhostVars$water2))

## exclude some variables
obj <- createSdcObj(testdata, keyVars=c("urbrur","roof","walls"), numVars="savings",
   weightVar=w, excludeVars=c("relat","electcon","hhcivil","ori_hid","expend"))
colnames(get.sdcMicroObj(obj, "origData"))

sdcMicro documentation built on Nov. 17, 2017, 8:08 a.m.