R/rmInvalid.R

#' rmInvalid
#
#' @description removeInvalid removes the invalid ESM questionnaires as specified by the user.
#
#' @param dfList a list. Each element of the list must be a data.frame. Each data.frame is a separate raw ESM dataset/an ESM questionnaire version. If there is just one ESM version the list therefore contains one data.frame.
#
#' @param RELEVANTVN_ES a list. This list is generated by function \code{\link{setES}} and it is extended once either by function \code{\link{genDateTime}} or by function \code{\link{splitDateTime}}.
#
#' @details A data line is assumed to be invalid if both the start date and the start time are missing (NA = not available).
#
#' @return The user receives a list containing 4 elements:
#' \enumerate{
#' \item dfValid, i.e. the raw ESM dataset(s), after removing all invalid lines of data.
#' \item listInvalid, i.e. the raw ESM dataset(s), containing only the removed lines of data.
#' \item rmInvalidFinished, i.e. a logical value, specifying whether at least one line of data was removed due to being invalid.
#' \item noLinesRemovedAtAll, i.e. a logical vector, specifying in which of the ESM raw dataset(s) there were invalid lines of data.
#' }
#' See \strong{Details} for more information.
#
#' @examples
#' # o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o
#' # Prerequisites in order to execute rmInvalid. Start ----------------
#' # Use example list delivered with the package
#' RELEVANTVN_ES <- RELEVANTVN_ESext
#' # keyLsNew is a list of datasets, also delivered with the package
#' # Prerequisites in order to execute rmInvalid. End ------------------
#' # ------------------------------------------------------
#' # Run function 9 of 29; see esmprep functions' hierarchy.
#' # ------------------------------------------------------
#' # keyLsNew is the result of function 'genDateTime' (or of function 'splitDateTime').
#' rmInvLs <- rmInvalid(keyLsNew, RELEVANTVN_ES)
#' # Result of function 'rmInvalid' is a list with 4 elements:
#' names(rmInvLs)
#' # o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o
#
#' @seealso Exemplary code (fully executable) in the documentation of \code{\link{esmprep}} (function 9 of 29).
#
#' @export
#
rmInvalid <- function(dfList, RELEVANTVN_ES = NULL) {
	
	# Check if all elements in dfList are of class data.frame.
    dfCheck <- sapply(dfList, FUN = is.data.frame)

    # If at least one of the elements in dfList is not of class data.frame
    # stop the processing.
    if(any(dfCheck == FALSE)) {
        stop("At least one argument is not a data frame. Function 'rmInvalid' only accepts a list which contains data frames.")
    }
    
    
    # Error handling function for all set-up lists generated by setES and setREF.
    # Both lists RELEVANTVN_ES and RELEVANTVN_REF get extended either by function
    # genDateTime or by function splitDateTime!
    SETUPLISTCheck(RELEVANTINFO_ES=NULL,
    			   RELEVANTVN_ES=RELEVANTVN_ES,
    			   RELEVANTVN_REF=NULL)
    
	
	# Columns that are known not to be actual questionnaire items.
    notItemsVec <- as.character(unlist(RELEVANTVN_ES))
	
	for(i in 1:length(dfList)) {
        if(any(is.na(match(notItemsVec, names(dfList[[i]]))))) {
            idxColNamesNotFound <- which(is.na( match(notItemsVec, names(dfList[[i]]))))
            colNamesNotFound <- names(dfList[[i]])[idxColNamesNotFound]
            stop(paste0("In data frame no. ", i, " within the list the column name(s) ", colNamesNotFound, " can't be found."))
        }
    }

    # One list to collect the clean dataframes: dfListNew
    dfListNew <- listFiltered <- list()
    dfListNewNames <- names(dfList)

    SET_REMOVAL <- rep(FALSE, times = length(dfList))
    for(j in 1 : length(dfList)) {

        # Apply function 'rmInvalidLines', generate temporary list with 2 elements (kept and removed).
        listTemp <- rmInvalidLines(dfList[[j]], RELEVANTVN_ES = RELEVANTVN_ES)

        if(!all(is.na(listTemp[[2]]))) {
            SET_REMOVAL[j] <- TRUE
            # Extract 1st element (valid data frame) from temporary list. -> First column
            # of raw dataset has been cast to type 'numeric', if before it wasn't numeric.
            dfListNew[[dfListNewNames[j]]] <- listTemp[[1]]
            # Extract 2nd element (data frame of invalid lines) from temporary list.
            listFiltered[[paste0(dfListNewNames[j], " filtered")]] <- listTemp[[2]]
            cat(
                paste0("Filter no.", j, ". Elements removed: ", nrow(listFiltered[[j]]), "."),"\n")
        } else {
            # Extract 1st element (valid data frame) from temporary list. -> First column
            # of raw dataset has been cast to type 'numeric', if before it wasn't numeric.
            dfListNew[[dfListNewNames[j]]] <- listTemp[[1]]
            # Extract 2nd element (data frame of invalid lines) from temporary list.
            listFiltered[[paste0(dfListNewNames[j], " filtered")]] <- NA
            cat(paste0("Filter no.", j, ". Elements removed: none.\n"))
        }

        cat("---------------------------------\n\n")
    }
    list(dfValid=dfListNew, listInvalid=listFiltered, rmInvalidFinished=TRUE, noLinesRemovedAtAll=SET_REMOVAL)
}
mmiche/esmprep documentation built on July 7, 2019, 8:23 p.m.