Nothing
#' @title Secure ranking of "V2BR" (vector to be ranked) across all sources
#' @description The second key serverside function that prepares the global
#' ranks of the the real data only generated in the first stage of the
#' ranking procedure and encrypts them in preparation for generating global
#' ranks that correspond 1 to 1 with only the real data in V2BR.
#' @details Severside assign function called by ds.ranksSecure. It takes the
#' global ranks currently held in sR5.df which reflect the global ranks based on
#' the "combined real+pseudo data vector" as encrypted by blackBoxDS
#' but with all pseudo-data stripped out. It then uses these global ranks (of
#' the real data) as if they were a new variable to be ranked. This is then
#' equivalent to blackBoxDS with the primary difference that no
#' pseudo-data are needed. This is because the global ranks are fundamentally
#' non-disclosive and so can be transferred to the clientside with no risk of
#' disclosure. However, in order to ensure that the client cannot compare
#' the list of global.ranks in sR4.df (after initial global ranking based on
#' ranking of real and pseudo-data combined) with the global.ranks to be
#' generated by blackBoxRanksDS (based solely on the real data they are
#' processed through seven more rounds of encryption as before in blackBoxDS.
#' In consequence the client remains unable to determine which of the original
#' global ranks corresponded to real data and which to pseudo-data. In
#' addition, blackBoxRanksDS does not need to determine the number of decimal
#' places in the data because it is only applied to ranks which are assumed to
#' be integers. For more details about the cluster of functions that
#' collectively enable secure global ranking and estimation of global quantiles
#' see the associated document entitled "secure.global.ranking.docx". Also
#' see the header file for ds.ranksSecure and the header file for blackBoxDS
#' @param input.var.name a character string specifying the name of the
#' vector holding the global ranks. This argument is set automatically by
#' the clientside function ds.ranksSecure
#' @param shared.seedval a pseudorandom number seed that ensures that the
#' processes generating the order and parameterisation of the encryption
#' algorithms are the same in each study. This argument is set by the argument
#' <shared.seed.value> in the clientside function ds.ranksSecure. The
#' seed value shared by all studies in setting up the encryption procedures
#' in blackBoxRanksDS is arbitrarily changed from that used to set up the
#' encryption procedures in blackBoxDS, so the the set of 7 encryption
#' algorithms is deliberately different. For more
#' details, including future plans to share this starting seed in a more secure
#' way, please see the associated document entitled "secure.global.ranking.docx"
#' and the header file for ds.ranksSecure.
#' @return writes a data frame object entitled blackbox.ranks.df to the
#' serverside. In each study this contains the encrypted global ranks
#' and a range of other key components from the second stage (ranking of global
#' ranks for real observations only) of the ranking procedure. For more details
#' see the associated document entitled "secure.global.ranking.docx"
#' @author Paul Burton 9th November, 2021
#' @export
#'
blackBoxRanksDS <- function(input.var.name=NULL, shared.seedval){ #START FUNC
#######################################################
#MODULE 1: CAPTURE THE nfilter SETTINGS
#thr<-dsBase::listDisclosureSettingsDS()
#nfilter.tab <- as.numeric(thr$nfilter.tab)
#nfilter.glm <- as.numeric(thr$nfilter.glm)
#nfilter.subset <- as.numeric(thr$nfilter.subset)
#nfilter.string <- as.numeric(thr$nfilter.string)
#nfilter.stringShort <- as.numeric(thr$nfilter.stringShort)
#nfilter.kNN <- as.numeric(thr$nfilter.kNN)
#nfilter.noise <- as.numeric(thr$nfilter.noise)
#nfilter.levels <- as.numeric(thr$nfilter.levels)
########################################################
# back-up current .Random.seed and revert on.exit
old_seed <- .Random.seed
on.exit(.Random.seed <- old_seed, add = TRUE)
input.var <- eval(parse(text=input.var.name), envir = parent.frame())
input.global.ranks<-input.var
#ESTIMATED OVERALL MEAN AND SD FROM meanQuantileDS
#SAVED IN input.mean.sd.df BY ds.dmtC2S
max.sd.input.ranks<-input.ranks.sd.df$max.sd.input.ranks
mean.input.ranks<-input.ranks.sd.df$mean.input.ranks
numsubs.real<-length(input.global.ranks)
numsubs<-numsubs.real
#Create indicators for original original sequence order and real/synthetic
ID.seq.real.orig<-1:numsubs
#Allow individual implementations of blackBoxDS for groups of projects to have a
#unique but shared starting seed (given specified starting seed) but only
#the person setting up the function can know how that seed is perturbed relative
#to the specified seed
restart.seed.transformation.control.n<-78615
restart.seed.other.seed.actions<-43
#Now set up for repeated transformations
shared.seedval<-(shared.seedval+19)*7
set.seed(shared.seedval)
#Create long fixed sequence of calls to seed to keep resetting seed at start
null.vector<-stats::runif(restart.seed.transformation.control.n,0,1)
#Create transformation control vectors/values first so all studies
#have the same transformation controls given input random seed
#even if the lengths of later vectors (eg synthetic concealing data) are
#different between studies so would lead to inconsistent control
#vectors/values
control.vector<-sample(c(1,1,2,2,3,3),replace=FALSE)
#control.vector<-c(1,control.vector)
control.vector
control.value<-stats::runif(6,0.0001,1)
control.value
#Reset seed for other purposes
set.seed(shared.seedval+restart.seed.other.seed.actions)
#Initialise input.var for analysis. Start by converting to values 0 to 1
#in same order as original input.var. Going to use probit function (stats::pnorm) for this.
#This can take any values -inf to +inf but to avoid extreme value rounding errors
#scale input.var to normal 0 1
#Unlike blackBoxDS no need to check that no attempt has been made to enter a
#fake value for max.sd.input.var. That is because in blackBoxRanksDS no
#synthetic data are generated so the only impact of changing the value of
#max.sd.input.var is to change the first probit transformation. This will either
#cause the whole analysis to fail if the value of max.sd.input.var is too small
#or large or will have no effect at all. So there is no enhanced disclosure risk
#and no need for the equivalent disclosure trap.
input.var.probit.temp<-((input.global.ranks-mean.input.ranks)/max.sd.input.ranks)
input.var.probit<-stats::pnorm(input.var.probit.temp)
if(min(input.var.probit)<=0 | max(input.var.probit)>=1){
error.message<-
paste0("FAILED: initialised values should strictly be >0 and <1 this rule has been violated
there is possiblyly an NA, inf or other error in the input.global.ranks")
stop(error.message, call. = FALSE)
}
if(min(rank(input.global.ranks)-rank(input.var.probit))<0 | max(rank(input.global.ranks)-rank(input.var.probit))>0) {
error.message<-
paste0("FAILED: probit initialised values are not in an identical order to the original input variable please check")
stop(error.message, call. = FALSE)
}
intermediate.value.matrix<-matrix(NA,ncol=8,nrow=numsubs)
intermediate.value.matrix[,1]<-input.global.ranks
intermediate.value.matrix[,2]<-input.var.probit
for(cv in 3:(length(control.vector)+2)){
if(control.vector[cv-2]==1){
intermediate.value.matrix[,cv]<-intermediate.value.matrix[,(cv-1)]^(control.value[cv-2])
}
if(control.vector[cv-2]==2){
intermediate.value.matrix[,cv]<-intermediate.value.matrix[,(cv-1)]+(control.value[cv-2])
}
if(control.vector[cv-2]==3){
intermediate.value.matrix[,cv]<-intermediate.value.matrix[,(cv-1)]*(control.value[cv-2])
}
}
intermediate.value.matrix<-cbind(intermediate.value.matrix,ID.seq.real.orig)
colnames(intermediate.value.matrix)<-c("input.global.ranks.orig","input.var.probit",
1:6,"ID.seq.real.orig")
dim(intermediate.value.matrix)
utils::head(intermediate.value.matrix)
utils::tail(intermediate.value.matrix)
rank.intermediate.value.matrix<-matrix(NA,ncol=8,nrow=numsubs)
for(k in 1:8)
{
rank.intermediate.value.matrix[,k]<-rank(intermediate.value.matrix[,k])
}
rank.intermediate.value.matrix<-cbind(rank.intermediate.value.matrix,ID.seq.real.orig)
colnames(rank.intermediate.value.matrix)<-c("input.global.ranks.orig","input.var.probit",
1:6,"ID.seq.real.orig")
message("\nRANKS IN ALL COLUMNS ABOVE SHOULD BE THE SAME\n")
control.vector
control.value
#CREATE blackBox OUTPUT DF
intermediate.value.df<-data.frame(intermediate.value.matrix)
names(intermediate.value.df)
encrypted.global.ranks<-intermediate.value.df$"X6"
ranks.input.global.ranks.orig<-rank(intermediate.value.df$input.global.ranks.orig)
ranks.encrypted.global.ranks<-rank(intermediate.value.df$"X6")
output.df<-data.frame(cbind(input.global.ranks,encrypted.global.ranks,ranks.input.global.ranks.orig,ranks.encrypted.global.ranks,ID.seq.real.orig))
utils::head(output.df)
utils::tail(output.df)
#Sort df by magnitude of key variable
ord.by.val<-order(input.global.ranks)
output.temp.sort.by.val<-output.df[ord.by.val,]
utils::head(output.temp.sort.by.val)
ID.by.val<-1:nrow(output.temp.sort.by.val)
output.df.sort.by.val<-data.frame(cbind(output.temp.sort.by.val,ID.by.val))
utils::head(output.df.sort.by.val)
utils::tail(output.df.sort.by.val)
dim(output.df.sort.by.val)
blackbox.ranks.df <- cbind(sR5.df[,c(5,1,2,3,9,6,10)],output.df.sort.by.val[,1:2])
colnames(blackbox.ranks.df)[3]<-"encrypted.input.var"
colnames(blackbox.ranks.df)[5]<-"global.ranks.input.from.sR5"
colnames(blackbox.ranks.df)[6]<-"is.real"
colnames(blackbox.ranks.df)[8]<-"global.ranks.after.blackbox"
colnames(blackbox.ranks.df)[9]<-"encrypted.global.ranks"
utils::head(blackbox.ranks.df)
if(sum(round(rank(blackbox.ranks.df[,5])-rank(blackbox.ranks.df[,8]),2)==0)!=numsubs)
{
error.message<-
paste0("FAILED: inconsistent ranking across different transformations in black box,
try a different seed. Altenatively this could reflect modification of the
clientside code which is not recommended. Finally, it can also occur
if the R session on one or more of the opal data servers runs out
of memory")
stop(error.message, call. = FALSE)
}else{
message("\nPROCESSING SUCCESSFUL, ALL RANKS AGREE FOR ALL TRANSFORMATIONS\n\n")
}
return(blackbox.ranks.df)
} #END FUNCTION
#ASSIGN
# blackBoxRanksDS
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.