#' rmInvalid
#
#' @description removeInvalid removes the invalid ESM questionnaires as specified by the user.
#
#' @param dfList a list. Each element of the list must be a data.frame. Each data.frame is a separate raw ESM dataset/an ESM questionnaire version. If there is just one ESM version the list therefore contains one data.frame.
#
#' @param RELEVANTVN_ES a list. This list is generated by function \code{\link{setES}} and it is extended once either by function \code{\link{genDateTime}} or by function \code{\link{splitDateTime}}.
#
#' @details A data line is assumed to be invalid if both the start date and the start time are missing (NA = not available).
#
#' @return The user receives a list containing 4 elements:
#' \enumerate{
#' \item dfValid, i.e. the raw ESM dataset(s), after removing all invalid lines of data.
#' \item listInvalid, i.e. the raw ESM dataset(s), containing only the removed lines of data.
#' \item rmInvalidFinished, i.e. a logical value, specifying whether at least one line of data was removed due to being invalid.
#' \item noLinesRemovedAtAll, i.e. a logical vector, specifying in which of the ESM raw dataset(s) there were invalid lines of data.
#' }
#' See \strong{Details} for more information.
#
#' @examples
#' # o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o
#' # Prerequisites in order to execute rmInvalid. Start ----------------
#' # Use example list delivered with the package
#' RELEVANTVN_ES <- RELEVANTVN_ESext
#' # keyLsNew is a list of datasets, also delivered with the package
#' # Prerequisites in order to execute rmInvalid. End ------------------
#' # ------------------------------------------------------
#' # Run function 9 of 29; see esmprep functions' hierarchy.
#' # ------------------------------------------------------
#' # keyLsNew is the result of function 'genDateTime' (or of function 'splitDateTime').
#' rmInvLs <- rmInvalid(keyLsNew, RELEVANTVN_ES)
#' # Result of function 'rmInvalid' is a list with 4 elements:
#' names(rmInvLs)
#' # o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o
#
#' @seealso Exemplary code (fully executable) in the documentation of \code{\link{esmprep}} (function 9 of 29).
#
#' @export
#
rmInvalid <- function(dfList, RELEVANTVN_ES = NULL) {
# Check if all elements in dfList are of class data.frame.
dfCheck <- sapply(dfList, FUN = is.data.frame)
# If at least one of the elements in dfList is not of class data.frame
# stop the processing.
if(any(dfCheck == FALSE)) {
stop("At least one argument is not a data frame. Function 'rmInvalid' only accepts a list which contains data frames.")
}
# Error handling function for all set-up lists generated by setES and setREF.
# Both lists RELEVANTVN_ES and RELEVANTVN_REF get extended either by function
# genDateTime or by function splitDateTime!
SETUPLISTCheck(RELEVANTINFO_ES=NULL,
RELEVANTVN_ES=RELEVANTVN_ES,
RELEVANTVN_REF=NULL)
# Columns that are known not to be actual questionnaire items.
notItemsVec <- as.character(unlist(RELEVANTVN_ES))
for(i in 1:length(dfList)) {
if(any(is.na(match(notItemsVec, names(dfList[[i]]))))) {
idxColNamesNotFound <- which(is.na( match(notItemsVec, names(dfList[[i]]))))
colNamesNotFound <- names(dfList[[i]])[idxColNamesNotFound]
stop(paste0("In data frame no. ", i, " within the list the column name(s) ", colNamesNotFound, " can't be found."))
}
}
# One list to collect the clean dataframes: dfListNew
dfListNew <- listFiltered <- list()
dfListNewNames <- names(dfList)
SET_REMOVAL <- rep(FALSE, times = length(dfList))
for(j in 1 : length(dfList)) {
# Apply function 'rmInvalidLines', generate temporary list with 2 elements (kept and removed).
listTemp <- rmInvalidLines(dfList[[j]], RELEVANTVN_ES = RELEVANTVN_ES)
if(!all(is.na(listTemp[[2]]))) {
SET_REMOVAL[j] <- TRUE
# Extract 1st element (valid data frame) from temporary list. -> First column
# of raw dataset has been cast to type 'numeric', if before it wasn't numeric.
dfListNew[[dfListNewNames[j]]] <- listTemp[[1]]
# Extract 2nd element (data frame of invalid lines) from temporary list.
listFiltered[[paste0(dfListNewNames[j], " filtered")]] <- listTemp[[2]]
cat(
paste0("Filter no.", j, ". Elements removed: ", nrow(listFiltered[[j]]), "."),"\n")
} else {
# Extract 1st element (valid data frame) from temporary list. -> First column
# of raw dataset has been cast to type 'numeric', if before it wasn't numeric.
dfListNew[[dfListNewNames[j]]] <- listTemp[[1]]
# Extract 2nd element (data frame of invalid lines) from temporary list.
listFiltered[[paste0(dfListNewNames[j], " filtered")]] <- NA
cat(paste0("Filter no.", j, ". Elements removed: none.\n"))
}
cat("---------------------------------\n\n")
}
list(dfValid=dfListNew, listInvalid=listFiltered, rmInvalidFinished=TRUE, noLinesRemovedAtAll=SET_REMOVAL)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.