Nothing
#'
#' @title returns the minimum and the maximum of the input numeric vector
#' @description this function returns the minimum and maximum of the input numeric vector which
#' depends on the argument \code{method.indicator}. If the method.indicator is set to 1 (i.e. the
#' 'smallCellsRule' is used) the computed minimum and maximum values are multiplied by a very small
#' random number. If the method.indicator is set to 2 (i.e. the 'deterministic' method is used) the
#' function returns the minimum and maximum values of the vector with the scaled centroids. If the
#' method.indicator is set to 3 (i.e. the 'probabilistic' method is used) the function returns the
#' minimum and maximum values of the generated 'noisy' vector.
#' @param xvect the numeric vector for which the histogram is desired.
#' @param method.indicator a number equal to either 1, 2 or 3 indicating the method of disclosure
#' control that is used for the generation of the histogram. If the value is equal to 1 then the
#' 'smallCellsRule' is used. If the value is equal to 2 then the 'deterministic' method is used.
#' If the value is set to 3 then the 'probabilistic' method is used.
#' @param k the number of the nearest neighbours for which their centroid is calculated if the
#' \code{method.indicator} is equal to 2 (i.e. deterministic method).
#' @param noise the percentage of the initial variance that is used as the variance of the embedded
#' noise if the \code{method.indicator} is equal to 3 (i.e. probabilistic method).
#' @return a numeric vector which contains the minimum and the maximum values of the vector
#' @author Amadou Gaye, Demetris Avraam for DataSHIELD Development Team
#' @export
#'
histogramDS1 <- function(xvect, method.indicator, k, noise){
##################################################################
# MODULE 1: CAPTURE THE nfilter SETTINGS #
thr <- dsBase::listDisclosureSettingsDS() #
nfilter.tab <- as.numeric(thr$nfilter.tab) #
#nfilter.glm <- as.numeric(thr$nfilter.glm) #
#nfilter.subset <- as.numeric(thr$nfilter.subset) #
#nfilter.string <- as.numeric(thr$nfilter.string) #
#nfilter.stringShort <- as.numeric(thr$nfilter.stringShort) #
nfilter.kNN <- as.numeric(thr$nfilter.kNN) #
nfilter.noise <- as.numeric(thr$nfilter.noise) #
nfilter.levels.density <- as.numeric(thr$nfilter.levels.density) #
nfilter.levels.max <- as.numeric(thr$nfilter.levels.max) #
##################################################################
# back-up current .Random.seed and revert on.exit
old_seed <- .Random.seed
on.exit(.Random.seed <- old_seed, add = TRUE)
# print an error message if the input vector is not a numeric
if(!(is.numeric(xvect))){
output <- "The input vector is not a numeric!"
}else{
if (method.indicator==1){
# the study-specific seed for random number generation
seed <- getOption("datashield.seed")
if (is.null(seed))
stop("histogramDS1 requires 'datashield.seed' R option to operate", call.=FALSE)
set.seed(seed)
rr <- c(min(xvect, na.rm=TRUE), max(xvect, na.rm=TRUE))
if(rr[1] < 0){ min <- rr[1] * stats::runif(1, 1.01, 1.05) }else{ min <- rr[1] * stats::runif(1, 0.95, 0.99) }
if(rr[2] < 0){ max <- rr[2] * stats::runif(1, 0.95, 0.99) }else{ max <- rr[2] * stats::runif(1, 1.01, 1.05) }
output <- c(min, max)
}
if(method.indicator==2){
# Remove any missing values
x <- stats::na.omit(xvect)
# Standardise the variable
x.standardised <- (x-mean(x))/stats::sd(x)
# Calculate the length of the variable after ommitting any NAs
N.data <- length(x)
# Check if k is integer and has a value greater than or equal to the pre-specified threshold
# and less than or equal to the length of rows of data.complete minus the pre-specified threshold
if(k < nfilter.kNN | k > (N.data - nfilter.kNN)){
stop(paste0("k must be greater than or equal to ", nfilter.kNN, " and less than or equal to ", (N.data-nfilter.kNN), "."), call.=FALSE)
}else{
neighbours = k
}
# Find the k-1 nearest neighbours of each data point
nearest <- RANN::nn2(x.standardised, k = neighbours)
# Calculate the centroid of each n nearest data points
x.centroid <- matrix()
for (i in 1:N.data){
x.centroid[i] <- mean(x.standardised[nearest$nn.idx[i,1:neighbours]])
}
# Calculate the scaling factor
x.scalingFactor <- stats::sd(x.standardised)/stats::sd(x.centroid)
# Apply the scaling factor to the centroids
x.masked <- x.centroid * x.scalingFactor
# Shift the centroids back to the actual position and scale of the original data
x.new <- (x.masked * stats::sd(x)) + mean(x)
# find the minimum and the maximum of the distribution
min <- min(x.new)
max <- max(x.new)
output <- c(min, max)
}
if(method.indicator==3){
# Remove any missing values
x <- stats::na.omit(xvect)
# Calculate the length of the variable after ommitting any NAs
N.data <- length(x)
# Check if the percentage of the variance that is specified in the argument 'noise'
# and is used as the variance of the embedded noise is a greater
# than the minimum threshold specified in the filter 'nfilter.noise'
if(noise < nfilter.noise){
stop(paste0("'noise' must be greater than or equal to ", nfilter.noise), call.=FALSE)
}else{
percentage <- noise
}
# the study-specific seed for random number generation
seed <- getOption("datashield.seed")
if (is.null(seed))
stop("histogramDS requires 'datashield.seed' R option to operate", call.=FALSE)
set.seed(seed)
# generate the noise-augmented vector
x.new <- x + stats::rnorm(N.data, mean=0, sd=sqrt(percentage*stats::var(x)))
# find the minimum and the maximum of the distribution
min <- min(x.new)
max <- max(x.new)
output <- c(min, max)
}
}
return (output)
}
# AGGREGATE FUNCTION
# histogramDS1
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.