Nothing
###############################################################################|###################| 80 and 100
#' @title dataFrameSubsetDS2 an assign function called by ds.dataFrameSubset
#' @description Second serverside function for subsetting a data frame by row or by column.
#' @details A data frame is a list of variables all with the same number of rows,
#' which is of class 'data.frame'. For all details see the help header for ds.dataFrameSubset
#' @param df.name a character string providing the name for the data.frame
#' to be sorted. <df.name> argument generated and passed directly to
#' dataFrameSubsetDS2 by ds.dataFrameSubset
#' @param V1.name A character string specifying the name of a subsetting vector
#' to which a Boolean operator will be applied to define the subset to be created.
#' <V1.name> argument generated and passed directly to
#' dataFrameSubsetDS2 by ds.dataFrameSubset
#' @param V2.name A character string specifying the name of the vector
#' or scalar to which the values in the vector specified by the argument <V1.name>
#' is to be compared.
#' <V2.name> argument generated and passed directly to
#' dataFrameSubsetDS2 by ds.dataFrameSubset
#' @param Boolean.operator.n A character string specifying one of six possible Boolean operators:
#' '==', '!=', '>', '>=', '<', '<='
#' <Boolean.operator.n> argument generated and passed directly to
#' dataFrameSubsetDS2 by ds.dataFrameSubset
#' @param keep.cols a numeric vector specifying the numbers of the columns to be kept in the
#' final subset when subsetting by column. For example: keep.cols=c(2:5,7,12) will keep
#' columns 2,3,4,5,7 and 12.
#' <keep.cols> argument generated and passed directly to
#' dataFrameSubsetDS2 by ds.dataFrameSubset
#' @param rm.cols a numeric vector specifying the numbers of the columns to be removed before
#' creating the final subset when subsetting by column. For example: rm.cols=c(2:5,7,12)
#' will remove columns 2,3,4,5,7 and 12.
#' <rm.cols> argument generated and passed directly to
#' dataFrameSubsetDS2 by ds.dataFrameSubset
#' @param keep.NAs logical, if TRUE any NAs in the vector holding the final Boolean vector
#' indicating whether a given row should be included in the subset will be converted into
#' 1s and so they will be included in the subset. Such NAs could be caused by NAs in
#' either <V1.name> or <V2.name>. If FALSE or NULL NAs in the final Boolean vector will
#' be converted to 0s and the corresponding row will therefore be excluded from the subset.
#' <keep.NAs> argument generated and passed directly to
#' dataFrameSubsetDS2 by ds.dataFrameSubset
#' @return the object specified by the <newobj> argument (or default name '<df.name>_subset')
#' initially specified in calling ds.dataFrameSubset. The output object (the required
#' subsetted data.frame called <newobj> is written to the serverside. In addition,
#' two validity messages are returned via ds.dataFrameSubset
#' indicating whether <newobj> has been created in each data source and if so whether
#' it is in a valid form. If its form is not valid in at least one study - e.g. because
#' a disclosure trap was tripped and creation of the full output object was blocked -
#' dataFrameSubsetDS2 (via ds.dataFrame()) also returns any studysideMessages
#' that can explain the error in creating
#' the full output object. As well as appearing on the screen at run time,if you wish to
#' see the relevant studysideMessages at a later date you can use the \code{ds.message}
#' function. If you type ds.message("newobj") it will print out the relevant
#' studysideMessage from any datasource in which there was an error in creating <newobj>
#' and a studysideMessage was saved. If there was no error and <newobj> was created
#' without problems no studysideMessage will have been saved and ds.message("newobj")
#' will return the message: "ALL OK: there are no studysideMessage(s) on this datasource".
#' @author DataSHIELD Development Team
#' @export
#'
dataFrameSubsetDS2<-function(df.name=NULL,V1.name=NULL, V2.name=NULL, Boolean.operator.n=NULL,keep.cols=NULL, rm.cols=NULL, keep.NAs=NULL){
# Check Permissive Privacy Control Level.
dsBase::checkPermissivePrivacyControlLevel(c('permissive', 'banana'))
#########################################################################
# DataSHIELD MODULE: CAPTURE THE nfilter SETTINGS #
thr <- dsBase::listDisclosureSettingsDS() #
#nfilter.tab<-as.numeric(thr$nfilter.tab) #
#nfilter.glm<-as.numeric(thr$nfilter.glm) #
nfilter.subset<-as.numeric(thr$nfilter.subset) #
nfilter.string<-as.numeric(thr$nfilter.string) #
#nfilter.stringShort<-as.numeric(thr$nfilter.stringShort) #
#nfilter.kNN<-as.numeric(thr$nfilter.kNN) #
#datashield.privacyLevel<-as.numeric(thr$datashield.privacyLevel) #
#########################################################################
###############################################
#SCRIPT.TO.CHECK.VALIDITY.OF.EVALUATABLE.RCODE#
###############################################
#ARGUMENTS TO BE EVALUATED
#keep.cols
if(!is.null(keep.cols)){
keep.code.input<-keep.cols
keep.code.c<-unlist(strsplit(keep.code.input, split=","))
keep.code.n<-as.numeric(keep.code.c)
#In this case, code must only contain numeric elements split by ",",
#anything else will fail outright or will have returned an NA to
#code.num and so the following sum will exceed 0
###############################################################################|###################| 80 and 100
if(sum(is.na(keep.code.n))>0){
studysideMessage<-"FAILED: keep.cols argument contains non-numerics (disclosure risk)"
stop(studysideMessage, call. = FALSE)
}else{
keep.cols<-keep.code.n
}
#Code must only contain numeric elements split by ",",
#anything else will fail outright or will have returned an NA to
#code.num and so the following sum will exceed 0
if(sum(is.na(keep.code.n))>0){
studysideMessage<-"FAILED: keep.cols argument contains non-numerics (disclosure risk)"
stop(studysideMessage, call. = FALSE)
}else{
keep.cols<-keep.code.n
}
}
#ARGUMENTS TO BE EVALUATED
#rm.cols
if(!is.null(rm.cols)){
rm.code.input<-rm.cols
rm.code.c<-unlist(strsplit(rm.code.input, split=","))
rm.code.n<-as.numeric(rm.code.c)
#In this case, code must only contain numeric elements split by ",",
#anything else will fail outright or will have returned an NA to
#code.num and so the following sum will exceed 0
if(sum(is.na(rm.code.n))>0){
studysideMessage<-"FAILED: rm.cols argument contains non-numerics (disclosure risk)"
stop(studysideMessage, call. = FALSE)
}else{
rm.cols<-rm.code.n
}
#Code must only contain numeric elements split by ",",
#anything else will fail outright or will have returned an NA to
#code.num and so the following sum will exceed 0
if(sum(is.na(rm.code.n))>0){
studysideMessage<-"FAILED: rm.cols argument contains non-numerics (disclosure risk)"
stop(studysideMessage, call. = FALSE)
}else{
rm.cols<-rm.code.n
}
}
#ADDITIONAL DISCLOSURE TRAPS
if(!is.null(df.name)){
df.name.chars<-strsplit(df.name,split="")
if(length(df.name.chars[[1]])>nfilter.string){
studysideMessage<-"FAILED: df.name argument > nfilter.string - please shorten"
stop(studysideMessage, call. = FALSE)
}
}
if(!is.null(V1.name)){
V1.name.chars<-strsplit(V1.name,split="")
if(length(V1.name.chars[[1]])>nfilter.string){
studysideMessage<-"FAILED: V[i].name argument > nfilter.string - please shorten"
stop(studysideMessage, call. = FALSE)
}
}
if(!is.null(V2.name)){
V2.name.chars<-strsplit(V2.name,split="")
if(length(V2.name.chars[[1]])>nfilter.string){
studysideMessage<-"FAILED: V[ii].name argument > nfilter.string - please shorten"
stop(studysideMessage, call. = FALSE)
}
}
df.name.2<-paste0("data.frame(",df.name,")")
df2subset <- eval(parse(text=df.name.2), envir = parent.frame())
if(V1.name=="ONES"||V2.name=="ONES")
{
length.ONES<-dim(df2subset)[1]
V1<-rep(1,length=length.ONES)
V2<-rep(1,length=length.ONES)
Boolean.operator.n<-1
#if using "ONES" for V1 or V2 then need to ensure a variable called "ONES" exists
#when it comes to generating the Boolean indicator below. If it doesn't exist
#generate it. If it does exist (for another purpose) then just leave as it is
#because its form doesn't matter, it just has to exist
if(!exists("ONES"))
{
ONES<-V1
}
}else{
V1<-eval(parse(text=V1.name), envir = parent.frame())
V2<-eval(parse(text=V2.name), envir = parent.frame())
}
##########CHECK APPROPRIATE CLASSES ##############
if(!is.character(df.name) || !is.data.frame(df2subset)){
studysideMessage<-"FAILED: df.name argument must be character and must name a data.frame"
stop(studysideMessage, call. = FALSE)
}
if(!is.character(V1.name)){
studysideMessage<-"FAILED: V[i].name must be character"
stop(studysideMessage, call. = FALSE)
}
if(!is.character(V2.name)){
studysideMessage<-"FAILED: V[ii].name must be character"
stop(studysideMessage, call. = FALSE)
}
########### CHECK LENGTHS OF V1, V2 ARE CONSISTENT WITH COLUMN LENGTH OF df TO BE SUBSETTED
df.col.length<-dim(df2subset)[1]
V1.length<-length(V1)
V2.length<-length(V2)
if(!((df.col.length == V1.length))){
###############################################################################|###################| 80 and 100
studysideMessage<-"FAILED: V[i] must of length equal to column length of df being subsetted"
stop(studysideMessage, call. = FALSE)
}
if(!((V1.length == V2.length) || (V2.length==1))){
studysideMessage<-"FAILED: V[ii] must either be of length one or of length equal to V[i]"
stop(studysideMessage, call. = FALSE)
}
if(!is.numeric(Boolean.operator.n) || Boolean.operator.n==0){
studysideMessage<-"FAILED: Boolean.operator must be: '==', '!=', '<', '<=', '>' or '>='"
stop(studysideMessage, call. = FALSE)
}
Boolean.operator<-" "
if(Boolean.operator.n==1) Boolean.operator<-"=="
if(Boolean.operator.n==2) Boolean.operator<-"!="
if(Boolean.operator.n==3) Boolean.operator<-"<"
if(Boolean.operator.n==4) Boolean.operator<-"<="
if(Boolean.operator.n==5) Boolean.operator<-">"
if(Boolean.operator.n==6) Boolean.operator<-">="
#APPLY BOOLEAN OPERATOR SPECIFIED
Boolean.indicator<-integer(length=V1.length)
#EVALUATE DIFFERENTLY IF V2 IS SAME LENGTH AS V1 OR OF LENGTH 1
if(V2.length==V1.length){
for(j in 1:V1.length){
command.text<-paste0(V1.name,"[",j,"]",Boolean.operator,V2.name,"[",j,"]")
Boolean.indicator[j]<-eval(parse(text=command.text), envir = parent.frame())*1
}
}
if(V2.length==1){
for(j in 1:V1.length){
command.text<-paste0(V1.name,"[",j,"]",Boolean.operator,V2.name)
Boolean.indicator[j]<-eval(parse(text=command.text), envir = parent.frame())*1
}
}
#BY DEFAULT IF SELECTION VARIABLE HAS MISSING VALUES EXPLICITLY REPLACE NAs WITH 0
#TO DISAMBIGUATE WHAT HAPPENS BUT IF keep.NAs IS REPLACE NAs WITH 1s (TO KEEP IN)
if(keep.NAs){
Boolean.indicator[is.na(Boolean.indicator)==1]<-1
}else{
Boolean.indicator[is.na(Boolean.indicator)==1]<-0
}
#NOW SUBSET df TO BE SUBSETTED
df.subset<-df2subset[(Boolean.indicator==1),]
#CHECK SUBSET LENGTH IS CONSISTENT WITH nfilter FOR MINIMUM SUBSET SIZE
subset.size<-dim(df.subset)[1]
if(subset.size < nfilter.subset){
studysideMessage<-"Subset to be created is too small (<nfilter.subset)"
stop(studysideMessage, call. = FALSE)
}
############### CHECK ONLY keep.cols OR rm.cols ARE SET: NOT BOTH ##########################
if(!is.null(keep.cols) && !is.null(rm.cols)){
studysideMessage<-"You can either specify keep.cols or rm.cols, not both"
stop(studysideMessage, call. = FALSE)
}
#REMOVE COLUMNS BY SPECIFYING COLUMNS TO KEEP OR COLUMNS TO REMOVE
if(!is.null(keep.cols)){
df.subset<-df.subset[,keep.code.n]
return(df.subset)
}
if(!is.null(rm.cols)){
numcols<-dim(df2subset)[2]
template.cols<-c(1:numcols,rm.code.n)
element.counts<-table(template.cols)
element.counts<-2-element.counts
keep.cols<-rep(1:numcols,element.counts)
df.subset<-df.subset[,keep.cols]
return(df.subset)
}
return(df.subset)
}
#ASSIGN FUNCTION
# dataFrameSubsetDS2
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.