#' Stability mechanism
#'
#' @import methods
#' @export mechanismStability
#' @exportClass mechanismStability
#'
#' @include mechanism.R
mechanismStability <- setRefClass(
Class = 'mechanismStability',
contains = 'mechanism'
)
mechanismStability$methods(
#' Stability Mechanism
#'
#' Differentially private evaluation of input function `fun` with sensitivity `sens` on input data `x` using
#' the stability mechanism. In general, the stability mechanism is one which takes advantage of `stable`
#' functions, i.e. ones where the function output is constant in some neighborhood around the input database.
#' In this library, the stability mechanism is implemented to be used specifically for the histogram statistic,
#' and should not be used for any other function except by users who are confident in their understanding of
#' the mechanism. See references for more information on the general stability mechanism.
#'
#' For a histogram generated by the Stability mechanism, empty buckets will be removed and any
#' buckets with a count below an accuracy threshold will also be removed. Removing these buckets based on
#' low counts is what creates the added guarantee of privacy.
#'
#' The accuracy threshold for removing bins with low counts is calculated by: \eqn{1 + (2 * ln(2 / \delta) / \epsilon)}.
#'
#' @name Stability Mechanism
#' @references
#' S. Vadhan The Complexity of Differential Privacy, Section 3.3 Releasing Stable Values p.23-24. March 2017.
#'
#' Also discussed in:
#'
#' A. Smith, A. Thakurta Differentially Private Model Selection via Stability Arguments and the Robustness of the Lasso. 2013.
#'
#' C. Dwork, J. Lei Differential Privacy and Robust Statistics. November 2008.
#'
#'
#' @param fun function of input x to add Laplace noise to.
#' @param x input that function fun will be evaluated on.
#' @param sens sensitivity of fun. Sensitivity is defined in above citation.
#' @param ... any additional (optional) parameters
#'
#' @return result of post-processing on input function "fun" evaluated on database "x", assuming sensitivity of fun is "sens".
#'
#' @examples
#' # the function in `statistic-histogram.R` that creates a histogram from input data
#' histogram_function <- fun.hist
#'
#' # the data frame that holds the data we want to analyze, in this case the data is called "PUMS5extract10000"
#' data <- data(PUMS5extract10000)
#'
#' # the variable for which we want a histogram
#' variable <- "age"
#'
#' # the sensitivity for the histogram, the default sensitivity for histograms is 2
#' sens <- 2
#'
#' # the post-processing function to use to format the histogram release correctly
#' post_processing_function <- dpHistogram$postProcess
#'
#' stability_histogram <- mechanismStability$evaluate(histogram_function, data[, variable], sens, post_processing_function)
#'
evaluate = function(fun, x, sens, ...) {
# before calculating the histogram statistic, confirm that delta is less than (1/n^2)
# if delta is greater than or equal to (1/n^2), return an error message to the user
if (.self$delta >= (1 / (.self$n)^2)) stop("A delta value on the order of 1/n^2 is a privacy risk, as it allows for additional data to leak beyond the privacy parameter epsilon. Choose a smaller value for delta to maintain your privacy guarantee.")
# if the variable is numeric or integer and the stability mechanism is being used,
# then the stability mechanism needs to determine the bins to maintain privacy.
# Get the range of the data, then get the number of bins from the input number of
# bins or the input granularity
dataRange <- NULL
numHistogramBins <- NULL
imputationRange <- NULL
histogramBins <- NULL
if (.self$varType %in% c('numeric', 'integer')) {
dataRange <- range(x)
numHistogramBins <- ifelse(is.null(.self$nBins), .self$n / .self$granularity, .self$nBins)
histogramBins <- seq(dataRange[1], dataRange[2], length.out=(numHistogramBins + 1))
# set the imputation range to the detected data range to maintain privacy
imputationRange <- dataRange
}
x <- censorData(x, .self$varType, dataRange, histogramBins, rngFormat)
x <- fillMissing(x, .self$varType, imputeRng=imputationRange, categories=levels(x)) # levels(x) will be NULL for numeric variables, a vector of bins for character variables
fun.args <- getFuncArgs(fun, inputList=list(bins=histogramBins), inputObject=.self)
inputVals <- c(list(x=x), fun.args)
trueVal <- do.call(fun, inputVals) # Concern: are we confident that the environment this is happening in is getting erased.
# remove empty bins before noise is added (per definition of stability mechanism)
trueVal <- trueVal[trueVal > 0]
scale <- sens / .self$epsilon
release <- trueVal + dpNoise(n=length(trueVal), scale=scale, dist='laplace')
# calculate the accuracy threshold, below which histogram buckets should be removed
accuracyThreshold <- 1+2*log(2/.self$delta)/.self$epsilon
# remove buckets below the threshold
release <- release[release > accuracyThreshold]
out <- list('release' = release)
return(out)
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.