Nothing
#'
#' @title Calculates the coordinates of the centroid of each n nearest neighbours
#' @description This function calculates the coordinates of the centroids for each n nearest neighbours.
#' @details The function finds the n-1 nearest neighbours of each data point in a 2-dimensional space.
#' The nearest neighbours are the data points with the minimum Euclidean distances from the point of
#' interest. Each point of interest and its n-1 nearest neighbours are then used for the calculation
#' of the coordinates of the centroid of those n points. Centroid here is referred to the centre of mass,
#' i.e. the x-coordinate of the centroid is the average value of the x-coordinates of the n nearest
#' neighbours and the y-coordinate of the centroid is the average of the y-coordinates of the n nearest
#' neighbours. The coordinates of the centroids return to the client side function and can be used for the
#' plot of non-disclosive graphs (e.g. scatter plots, heatmap plots, contour plots, etc).
#' @param x the name of a numeric vector, the x-variable.
#' @param y the name of a numeric vector, the y-variable.
#' @param k the number of the nearest neighbours for which their centroid is calculated if the
#' \code{method.indicator} is equal to 1 (i.e. deterministic method).
#' @param noise the percentage of the initial variance that is used as the variance of the embedded
#' noise if the \code{method.indicator} is equal to 2 (i.e. probabilistic method).
#' @param method.indicator a number equal to either 1 or 2. If the value is equal to 1 then the
#' 'deterministic' method is used. If the value is set to 2 the 'probabilistic' method is used.
#' @return a list with the x and y coordinates of the centroids if the deterministic method is used
#' or the x and y coordinated of the noisy data if the probabilistic method is used.
#' @author Demetris Avraam for DataSHIELD Development Team
#' @export
#'
heatmapPlotDS <- function(x, y, k, noise, method.indicator){
###################################################################
# MODULE 1: CAPTURE THE nfilter SETTINGS #
thr <- dsBase::listDisclosureSettingsDS() #
#nfilter.tab <- as.numeric(thr$nfilter.tab) #
#nfilter.glm <- as.numeric(thr$nfilter.glm) #
#nfilter.subset <- as.numeric(thr$nfilter.subset) #
#nfilter.string <- as.numeric(thr$nfilter.string) #
#nfilter.stringShort <- as.numeric(thr$nfilter.stringShort) #
nfilter.kNN <- as.numeric(thr$nfilter.kNN) #
nfilter.noise <- as.numeric(thr$nfilter.noise) #
#nfilter.levels.density <- as.numeric(thr$nfilter.levels.density) #
#nfilter.levels.max <- as.numeric(thr$nfilter.levels.max) #
###################################################################
# back-up current .Random.seed and revert on.exit
old_seed <- .Random.seed
on.exit(.Random.seed <- old_seed, add = TRUE)
# Cbind the columns of the two variables and remove any rows that include NAs
data.table <- cbind.data.frame(x, y)
data.complete <- stats::na.omit(data.table)
x <- as.vector(data.complete[,1])
y <- as.vector(data.complete[,2])
if(method.indicator==1){
# standardise the variables
x.standardised <- (x-mean(x))/stats::sd(x)
y.standardised <- (y-mean(y))/stats::sd(y)
# Create a data.frame for the variables
data <- data.frame(x.standardised, y.standardised)
# Calculate the length of the data.frame after ommitting any rows with NAs
N.data <- dim(data)[1]
# Check if k is integer and has a value greater than or equal to the pre-specified threshold
# and less than or equal to the length of rows of data.complete minus the pre-specified threshold
if(k < nfilter.kNN | k > (N.data - nfilter.kNN)){
stop(paste0("k must be greater than or equal to ", nfilter.kNN, "and less than or equal to ", (N.data-nfilter.kNN), "."), call.=FALSE)
}else{
neighbours = k
}
# Find the k-1 nearest neighbours of each data point
nearest <- RANN::nn2(data, k = neighbours)
# Calculate the centroid of each n nearest data points
x.centroid <- matrix()
y.centroid <- matrix()
for (i in 1:N.data){
x.centroid[i] <- mean(x.standardised[nearest$nn.idx[i,1:neighbours]])
y.centroid[i] <- mean(y.standardised[nearest$nn.idx[i,1:neighbours]])
}
# Calculate the scaling factor
x.scalingFactor <- stats::sd(x.standardised)/stats::sd(x.centroid)
y.scalingFactor <- stats::sd(y.standardised)/stats::sd(y.centroid)
# Apply the scaling factor to the centroids
x.masked <- x.centroid * x.scalingFactor
y.masked <- y.centroid * y.scalingFactor
# Shift the centroids back to the actual position and scale of the original data
x.new <- (x.masked * stats::sd(x)) + mean(x)
y.new <- (y.masked * stats::sd(y)) + mean(y)
}
if (method.indicator==2){
# Create a data.frame for the variables
data <- data.frame(x, y)
# Calculate the length of the data.frame after ommitting any rows with NAs
N.data <- dim(data)[1]
# Check if the percentage of the variance that is specified in the argument 'noise'
# and is used as the variance of the embedded noise is a greater
# than the minimum threshold specified in the filter 'nfilter.noise'
if(noise < nfilter.noise){
stop(paste0("'noise' must be greater than or equal to ", nfilter.noise), call.=FALSE)
}else{
percentage <- noise
}
# the study-specific seed for random number generation
seed <- getOption("datashield.seed")
if (is.null(seed))
stop("heatmapPlotDS requires 'datashield.seed' R option to operate", call.=FALSE)
set.seed(seed)
x.new <- x + stats::rnorm(N.data, mean=0, sd=sqrt(percentage*stats::var(x)))
y.new <- y + stats::rnorm(N.data, mean=0, sd=sqrt(percentage*stats::var(y)))
}
# Return a list with the x and y coordinates of the centroids
return(list(x.new, y.new))
}
# AGGREGATE FUNCTION
# heatmapPlotDS
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.