#'
#' @title Checks if all variables do exist and are not empty
#' @description This function check that the variables to analyse are (1) available from all
#' the studies and (2) that they do not contain only missing values (NAs). It excludes studies
#' that fail any of these two checks
#' @param opals a list of opal object(s) obtained after login in to opal servers;
#' these objects hold also the data assign to R, as \code{dataframe}, from opal
#' datasources.
#' @param variables a character vector, the names of the variable(s) to check
#' @return the opal objects which passed the checks
#' @author Gaye, A.
#' @export
#' @examples {
#'
#' # load that contains the login details
#' data(logindata)
#'
#' # login and assign specific variable(s)
#' myvar <- list("LAB_TSC")
#' opals <- ag.ds.login(logins=logindata,assign=TRUE,variables=myvar)
#'
#' # run checks for the variable LAB_TSC
#' ag.ds.checkvar(opals=opals, variables=list(quote(D$LAB_TSC)))
#' }
#'
ag.ds.checkvar <- function(opals, variables){
# print a message for the user informing of checks
cat("\nChecks are carried out on the variables used for the analysis\nto ensure they are available from the dataset(s) and not empty.\n\n")
# get the names of the opal servers/studies
stdname <- names(opals)
# get the names of the variables to check
varIDs <- vector("character", length(variables))
for(i in 1:length(variables)){
xx <- variables[[i]]
varIDs[i] <- strsplit(deparse(xx), "\\$", perl=TRUE)[[1]][2]
}
# a vector that keeps the results of the checks for each study
toremove <- c()
# loop through the dataset(s) and the variable(s)
for(i in 1: length(opals)){
# Carry out the first check: are all the variables to analyse available from dataset
track <- FALSE
# get the names of the variables in the assigned dataset
var.names <- datashield.aggregate(opals[i], quote(colnames(D)))
# check if any of the variables in the arguments is missing from the assigned dataset
idx1 <- which(!(varIDs %in% var.names[[1]]))
missings <- length(idx1)
if(missings > 0){
# record that the stduy has failed the first checks and print a message
track <- TRUE
cat("The variable(s)", varIDs[idx1], "is/are missing from", stdname[i],"!\n")
}else{
# carry out the second check: do any of the variables to anlyse contain only NAs
# this second check is carried out only if the first check is negative
# loop through the variables in the argument and if any fails break out the loop
# get the indices, in the assigned dataset, of the variables to check
idx2 <- which(var.names[[1]] %in% varIDs)
for(j in idx2){
# the server side function 'isNA.ds' to check if vector is empty
cally <- call("ag.isNA.ds", variables[[j]])
out <- datashield.aggregate(opals[i], cally)
if(out[[1]]){
track <- TRUE
cat("The variable", var.names[[1]][j], "in", stdname[i], "is empty (NAs only)!\n")
}
}
}
# if a study fails any of the two checks add it to the list of studies to exclude and print a message
if(track){
toremove <- append(toremove, i)
cat(stdname[i], "will not be included in the analysis\n\n")
}
}
# remove studies which contain one or more variables that failed the checks
if(length(toremove) > 0){
opals <- opals[-toremove]
}else{
cat("The checks went fine: no missing or empty variable(s)!\n\n")
}
# If none of the datasets passed the checks stop the process
# ortherwise return the opal objects that passed the checks
if(length(opals) == 0){
stop("The variables specified in the arguments are not available or contain only missing values, in all the assigned datasets!")
}else{
return(opals)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.