Nothing
#'
#' @title recodeValuesDS an assign function called by ds.recodeValues
#' @description This function recodes specified values of elements in a vector into
#' a matched set of alternative specified values.
#' @details For all details see the help header for ds.recodeValues
#' @param var.name.text a character string providing the name for the vector representing the
#' variable to be recoded. <var.name.text> argument generated and passed directly to
#' recodeValuesDS by ds.recodeValues
#' @param values2replace.text a character string specifying the values in the
#' vector specified by the argument <var.name.text> that are to be replaced by new
#' values as specified in the new.values.vector. The <values2replace.text> argument
#' is generated and passed directly to recodeValuesDS by ds.recodeValues. In effect, the
#' <values2replace.vector> argument of the ds.recodeValues function is converted
#' to a character string format that is acceptable to the DataSHIELD R parser in the data repository
#' and so can be accepted by recodeValuesDS
#' @param new.values.text a character string specifying the new values to which
#' the specified values in the vector <var.name> are to be converted.
#' The <new.values.text> argument is generated and passed directly to recodeValuesDS
#' by ds.recodeValues. In effect, the <new.values.vector> argument of the
#' ds.recodeValues function is converted to a character string format that is
#' acceptable to the DataSHIELD R parser in the data repository
#' and so can be used in the call to recodeValuesDS.
#' @param missing if supplied, any missing values in the variable referred to by var.name.text
#' will be replaced by this value.
#' @return the object specified by the <newobj> argument (or default name '<var.name>_recoded')
#' initially specified in calling ds.recodeValues. The output object (the required
#' recoded variable called <newobj> is written to the serverside.
#' @author Paul Burton, Demetris Avraam for DataSHIELD Development Team
#' @export
#'
recodeValuesDS <- function(var.name.text=NULL, values2replace.text=NULL, new.values.text=NULL, missing=NULL){
# Check Permissive Privacy Control Level.
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana'))
#############################################################
#MODULE 1: CAPTURE THE used nfilter SETTINGS
thr <- dsBase::listDisclosureSettingsDS()
nfilter.subset <- as.numeric(thr$nfilter.subset)
nfilter.stringShort <- as.numeric(thr$nfilter.stringShort)
#############################################################
# DISCLOSURE TRAPS
var.name.text.chars <- strsplit(var.name.text, split="")
if(length(var.name.text.chars[[1]]) > nfilter.stringShort){
studysideMessage <- "Error: var.name.text argument too long (see nfilter.stringShort)"
stop(studysideMessage, call. = FALSE)
}
values2replace.text.chars <- strsplit(values2replace.text, split="")
if(length(values2replace.text.chars[[1]]) > nfilter.stringShort){
studysideMessage <- "Error: values2replace.text argument too long (see nfilter.stringShort)"
stop(studysideMessage, call. = FALSE)
}
new.values.text.chars <- strsplit(new.values.text, split="")
if(length(new.values.text.chars[[1]]) > nfilter.stringShort){
studysideMessage <- "Error: new.values.text argument too long (see nfilter.stringShort)"
stop(studysideMessage, call. = FALSE)
}
var2recode <- eval(parse(text=var.name.text), envir = parent.frame())
values2replace <- unlist(strsplit(values2replace.text, split=","))
new.values <- unlist(strsplit(new.values.text, split=","))
if(!is.null(missing)){missing <- as.numeric(missing)}
# get the class of the input variable
var.class <- class(var2recode)
# if the class of the input variable is not factor, numeric, character or integer then
# stop and return an error message
if (!(var.class %in% c('factor', 'character', 'numeric', 'integer'))){
studysideMessage <- "Error: The variable to recode must be either a factor, a character, a numeric or an integer"
stop(studysideMessage, call. = FALSE)
}
# recode using the recode function from the dplyr package
if (var.class == 'factor'){
expr <- as.list(new.values)
names(expr) <- values2replace
var.recoded <- dplyr::recode_factor(var2recode, !!!(expr))
if (!is.null(missing)){
var.recoded.tmp <- var.recoded
var.recoded <- addNA(var.recoded.tmp)
levels(var.recoded) <- c(levels(var.recoded.tmp), missing)
}
}
if (var.class == 'character'){
expr <- as.list(new.values)
names(expr) <- values2replace
var.recoded <- dplyr::recode(var2recode, !!!(expr), .missing=if(is.null(missing)){NULL}else{paste0("'", missing, "'")})
}
if (var.class == 'numeric'){
expr <- as.list(as.numeric(new.values))
names(expr) <- values2replace
var.recoded <- dplyr::recode(var2recode, !!!(expr), .missing=missing)
}
if (var.class == 'integer'){
expr <- as.list(as.numeric(new.values))
names(expr) <- values2replace
var2recode.n <- as.numeric(var2recode)
var.recoded <- dplyr::recode(var2recode.n, !!!(expr), .missing=missing)
var.recoded <- as.integer(var.recoded)
}
# DISCLOSURE TRAP ON LENGTH OF NA AND non-NA ELEMENTS OF ORIGINAL AND RECODED VECTORS
mark.original <- stats::complete.cases(var2recode)
non.NA.original.vector <- var2recode[mark.original]
non.NA.length.original <- length(non.NA.original.vector)
mark.recoded <- stats::complete.cases(var.recoded)
non.NA.recoded.vector <- var.recoded[mark.recoded]
non.NA.length.recoded <- length(non.NA.recoded.vector)
difference.non.NA.lengths <- abs(non.NA.length.recoded-non.NA.length.original)
# Non-NA SUBSET OF RECODED VARIABLE SMALLER THAN MINIMUM SUBSET SIZE - BLOCK CREATION OF RECODED VECTOR
# AND RETURN MESSAGE
if(non.NA.length.recoded < nfilter.subset){
studysideMessage <- "Error: number of non-NA elements of recoded vector < minimum subset size"
stop(studysideMessage, call. = FALSE)
}
########################################################################
##########MODULE WARNING OF POTENTIAL DIFFERENCE ATTACK ################
########################################################################
if((difference.non.NA.lengths < nfilter.subset) && (difference.non.NA.lengths > 0)){
studysideWarning1 <- "Warning: DataSHIELD monitors every session for potentially disclosive analytic requests. "
studysideWarning2 <- "The analysis you just submitted has generated a recoded variable in which the number of non-missing "
studysideWarning3 <- "elements differs - but only very slightly - from the original variable. This is most likely to be "
studysideWarning4 <- "an innocent consequence of your recoding needs. However, it could in theory be one step "
studysideWarning5 <- "in a difference-based attack aimed at identifying individuals. This analytic request has "
studysideWarning6 <- "therefore been highlighted in the session log file. Please be reassured, if you do not try "
studysideWarning7 <- "to identify individuals this will cause you no difficulty. However, if you do plan a "
studysideWarning8 <- "malicious attempt to identify individuals by differencing, this will become obvious in the "
studysideWarning9 <- "session log and you will be sanctioned. Possible consequences include loss of future access "
studysideWarning10 <- "to DataSHIELD and/or legal penalties."
return.message <- list(studysideWarning1, studysideWarning2, studysideWarning3, studysideWarning4,
studysideWarning5, studysideWarning6, studysideWarning7, studysideWarning8,
studysideWarning9, studysideWarning10)
warning(return.message, call. = FALSE)
}
# Convert characters "NA" to values NA if any
var.recoded[which(var.recoded == "NA")] <- NA
return(var.recoded)
}
# ASSIGN FUNCTION
# recodeValuesDS
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.