R/ds.completeCases.R

Defines functions ds.completeCases

Documented in ds.completeCases

#' 
#' @title Identifies complete cases in server-side R objects 
#' @description Selects complete cases of a data frame,
#' matrix or vector that contain missing values.
#' @details In the case of a data frame or matrix, \code{ds.completeCases} deletes 
#' all rows containing one or more missing values. However \code{ds.completeCases} 
#' in vectors only deletes the observation recorded as NA.  
#' 
#' Server function called: \code{completeCasesDS}
#' 
#' @param x1 a character denoting the name of the input object which can be a data frame,
#' matrix or vector.
#' @param newobj a character string that provides the name for the complete-cases object  
#' that is stored on the data servers. If the user does not specify a name, then the function 
#' generates a name for the generated object that is the name of the input object with the 
#' suffix "_complete.cases"
#' @param datasources a list of \code{\link{DSConnection-class}} objects obtained after login. 
#' If the \code{datasources} argument is not specified, the default set of connections will be
#' used: see \code{\link{datashield.connections_default}}.
#' @return \code{ds.completeCases} generates a modified data frame, matrix or vector from which
#' all rows containing at least one NA have been deleted. The output object is stored on the
#' server-side. Only two validity messages are returned to the client-side indicating the name
#' of the \code{newobj} that has been created in each data source and if it is in a valid form.  
#' @examples 
#' \dontrun{
#'   ## Version 6, for version 5 see the Wiki
#'   # Connecting to the Opal servers
#' 
#'   require('DSI')
#'   require('DSOpal')
#'   require('dsBaseClient')
#' 
#'   builder <- DSI::newDSLoginBuilder()
#'   builder$append(server = "study1", 
#'                  url = "http://192.168.56.100:8080/", 
#'                  user = "administrator", password = "datashield_test&", 
#'                  table = "CNSIM.CNSIM1", driver = "OpalDriver")
#'   builder$append(server = "study2", 
#'                  url = "http://192.168.56.100:8080/", 
#'                  user = "administrator", password = "datashield_test&", 
#'                  table = "CNSIM.CNSIM2", driver = "OpalDriver")
#'   builder$append(server = "study3",
#'                  url = "http://192.168.56.100:8080/", 
#'                  user = "administrator", password = "datashield_test&", 
#'                  table = "CNSIM.CNSIM3", driver = "OpalDriver")
#'   logindata <- builder$build()
#'   
#'   # Log onto the remote Opal training servers
#'   connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D") 
#' 
#'   # Select complete cases from different R objects
#' 
#'   ds.completeCases(x1 = "D", #data frames in the Opal servers 
#'                              #(see above the connection to the Opal servers)
#'                    newobj = "D.completeCases", # name for the output object 
#'                                                # that is stored in the Opal servers
#'                    datasources = connections)  # All Opal servers are used 
#'                                                # (see above the connection to the Opal servers)
#'                  
#'   ds.completeCases(x1 = "D$LAB_TSC", #vector (variable) of the data frames in the Opal servers 
#'                                      #(see above the connection to the Opal servers)
#'                    newobj = "LAB_TSC.completeCases", #name for the output variable 
#'                                                      #that is stored in the Opal servers
#'                    datasources = connections[2]) #only the second Opal server is used ("study2")
#'                    
#'   # Clear the Datashield R sessions and logout
#'   datashield.logout(connections) 
#'   }
#'   
#' @author DataSHIELD Development Team
#' @export
#' 
ds.completeCases <- function(x1=NULL, newobj=NULL, datasources=NULL){
  
  # if no connection login details are provided look for 'connection' objects in the environment
  if(is.null(datasources)){
    datasources <- datashield.connections_find()
  }

  # ensure datasources is a list of DSConnection-class
  if(!(is.list(datasources) && all(unlist(lapply(datasources, function(d) {methods::is(d,"DSConnection")}))))){
    stop("The 'datasources' were expected to be a list of DSConnection-class objects", call.=FALSE)
  }

  # check if a value has been provided for x1
  if(is.null(x1)){
    return("Error: x1 must be a character string naming a serverside data.frame, matrix or vector")
  }
  
  # check if the input object is defined in all the studies
  isDefined(datasources, x1)

  # rename target object for transfer (not strictly necessary as string will pass parser anyway)
  # but maintains consistency with other functions
  x1.transmit <- x1

  # if no value specified for output object, then specify a default
  if(is.null(newobj)){
    newobj <- paste0(x1,"_complete.cases")
  }

  # CALL THE MAIN SERVER SIDE FUNCTION
  calltext <- call("completeCasesDS", x1.transmit)
  DSI::datashield.assign(datasources, newobj, calltext)

 
#############################################################################################################
#DataSHIELD CLIENTSIDE MODULE: CHECK KEY DATA OBJECTS SUCCESSFULLY CREATED                                  #
																											#
#SET APPROPRIATE PARAMETERS FOR THIS PARTICULAR FUNCTION                                                 	#
test.obj.name<-newobj																					 	#
																											#
#TRACER																									 	#
#return(test.obj.name)																					 	#
#}                                                                                   					 	#
																											#
																											#							
# CALL SEVERSIDE FUNCTION                                                                                	#
calltext <- call("testObjExistsDS", test.obj.name)													 	#
																											#
object.info<-DSI::datashield.aggregate(datasources, calltext)												 	#
																											#
# CHECK IN EACH SOURCE WHETHER OBJECT NAME EXISTS														 	#
# AND WHETHER OBJECT PHYSICALLY EXISTS WITH A NON-NULL CLASS											 	#
num.datasources<-length(object.info)																	 	#
																											#
																											#
obj.name.exists.in.all.sources<-TRUE																	 	#
obj.non.null.in.all.sources<-TRUE																		 	#
																											#
for(j in 1:num.datasources){																			 	#
	if(!object.info[[j]]$test.obj.exists){																 	#
		obj.name.exists.in.all.sources<-FALSE															 	#
		}																								 	#
	if(is.null(object.info[[j]]$test.obj.class) || ("ABSENT" %in% object.info[[j]]$test.obj.class)){														 	#
		obj.non.null.in.all.sources<-FALSE																 	#
		}																								 	#
	}																									 	#
																											#
if(obj.name.exists.in.all.sources && obj.non.null.in.all.sources){										 	#
																											#
	return.message<-																					 	#
    paste0("A data object <", test.obj.name, "> has been created in all specified data sources")		 	#
																											#
																											#
	}else{																								 	#
																											#
    return.message.1<-																					 	#
	paste0("Error: A valid data object <", test.obj.name, "> does NOT exist in ALL specified data sources")	#
																											#
	return.message.2<-																					 	#
	paste0("It is either ABSENT and/or has no valid content/class,see return.info above")				 	#
																											#
	return.message.3<-																					 	#
	paste0("Please use ds.ls() to identify where missing")												 	#
																											#
																											#
	return.message<-list(return.message.1,return.message.2,return.message.3)							 	#
																											#
	}																										#
																											#
	calltext <- call("messageDS", test.obj.name)															#
    studyside.message<-DSI::datashield.aggregate(datasources, calltext)											#
																											#	
	no.errors<-TRUE																							#
	for(nd in 1:num.datasources){																			#
		if(studyside.message[[nd]]!="ALL OK: there are no studysideMessage(s) on this datasource"){			#
		no.errors<-FALSE																					#
		}																									#
	}																										#	
																										#
																											#
	if(no.errors){																							#
	validity.check<-paste0("<",test.obj.name, "> appears valid in all sources")							    #
	return(list(is.object.created=return.message,validity.check=validity.check))						    #
	}																										#
																											#
if(!no.errors){																								#
	validity.check<-paste0("<",test.obj.name,"> invalid in at least one source. See studyside.messages:")   #
	return(list(is.object.created=return.message,validity.check=validity.check,					    		#
	            studyside.messages=studyside.message))			                                            #
	}																										#
																											#
#END OF CHECK OBJECT CREATED CORECTLY MODULE															 	#
#############################################################################################################

}
#ds.completeCases
datashield/dsBaseClient documentation built on May 16, 2023, 10:19 p.m.