R/intolerable.R

#' intolerable
#
#' @description intolerable registers the intolerable ESM questionnaires as specified by the user.
#
#' @param esDf a data.frame. A single ESM dataset. It must contain the 2 columns that hold the date-time object for when an ESM questionnaire was started and finished, respectively.
#
#' @param intoleranceDf a data.frame with 2 columns. The first column must contain the prompt index, which must \strong{never} be combined with the 'expected category' in the second column; the second column must contain the 'expected category' of the survey version (as specified by the user in the function \code{\link{expectedPromptIndex}}). See \strong{Details} for more information.
#
#' @param RELEVANTINFO_ES a list. This list is generated by function \code{\link{setES}}.
#
#' @details The user must specify exactly which kind of ESM questionnaires are intolerable relative to the expectation, which was specified in the function \code{\link{expectedPromptIndex}}. For example, a questionnaire which was expected to be answered in the morning (specified by the expected category 1) must \strong{never} be combined with the last prompt of the day (e.g. 4). That is, it doesn't make sense to have the answers to the morning questionnaire, which was filled out in the evening.
#
#' @return The user receives a list containing 2 datasets:
#' \enumerate{
#' \item cleanedDf, i.e. the ESM dataset from which the intolerable lines of data were removed.
#' \item intoleranceDf, i.e. the intolerable lines of data that were removed.
#' }
#
#' @examples
#' # o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o
#' # Prerequisites in order to execute intolerable. Start --------------
#' # RELEVANTINFO_ES is delivered with the package
#' # expectedDf is a raw ESM dataset, delivered with the package.
#' # Prerequisites in order to execute intolerable. End ----------------
#' # -------------------------------------------------------
#' # Run function 24 of 29; see esmprep functions' hierarchy.
#' # -------------------------------------------------------
#' # Generate second argument of function 'intolerable'
#' (intoleranceDf <- data.frame(
#' # Column 'prompt': Prompts that must NEVER be comined with expected categories.
#' prompt = c(2, 3, 4, 1, 1),
#' # Column 'expect': Expected categories that must NEVER be combined with the prompts.
#' expect = c(1, 1, 1, 2, 3)))
#' # Read: Prompts 2, 3, and 4 must never be combined with expected category 1.
#' # Read: Prompt 1 must never be combined with expected category 2.
#' # Read: Prompt 1 must never be combined with expected category 3.
#' # expectedDf is the result of function 'expectedPromptIndex'.
#' intolLs <- intolerable(expectedDf, intoleranceDf, RELEVANTINFO_ES)
#' # o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o
#
#' @seealso Exemplary code (fully executable) in the documentation of \code{\link{esmprep}} (function 24 of 29).
#
#' @export
#
intolerable <- function(esDf, intoleranceDf, RELEVANTINFO_ES = NULL) {

    dfCheck <- is.data.frame(esDf)

    # If argument 'esVersion' is not of class data.frame stop the processing.
    if(dfCheck == FALSE) {
        stop("First argument must be a data frame.")
    }
    
    dfCheck2 <- is.data.frame(intoleranceDf)
    
    # If argument 'intoleranceDf' is not of class data.frame stop the processing.
    if(dfCheck2 == FALSE) {
        stop("Second argument must be a data frame.")
    }
	
	# Check whether both columns are of class numeric.
	colClass <- sapply(intoleranceDf, FUN = class)
	if(any(colClass!="numeric")) {
    		stop("Both columns must contain values of class numeric.")
    }
    
    # Check whether all elements are integers
    if(any(unlist(intoleranceDf)%%1 != 0)) {
		stop("All values must be integers (no floating point numbers).")
	}
	
	
	# Error handling function for all set-up lists generated by setES and setREF.
    # Both lists RELEVANTVN_ES and RELEVANTVN_REF get extended either by function
    # genDateTime or by function splitDateTime!
    SETUPLISTCheck(RELEVANTINFO_ES=RELEVANTINFO_ES,
    			   RELEVANTVN_ES=NULL,
    			   RELEVANTVN_REF=NULL)
	
	
	# Check the plausibility of the first column of 'intoleranceDf'.
	# The values all must be part of the range of possible prompts.
	plausib1 <- sort(unique(intoleranceDf[,1])) == 1:RELEVANTINFO_ES[["MAXPROMPT"]]
	if(!all(plausib1)) {
		stop(paste0("Each of the values in the first column of the argument 'intoleranceDf' must be part of the range of prompts per ESM day."))
	}
	
	# Check the plausibility of the second column of 'intoleranceDf'.
	# The values all must be part of the values that have been generated
	# as part of the second argument in function 'expectedStartTimeIndex'.
	plausib2 <- sort(unique(intoleranceDf[,2])) == sort(unique(esDf[,"EXPCATEGORY"]))
	if(!all(plausib2)) {
		stop(paste0("Each of the values in the second column of the argument 'intoleranceDf' must be part of the range of values of the variable 'EXPCATEGORY', which has been generated by the user by applying the function 'expectedStartTimeIndex'. (For details see Details section in the documentation of the function 'intolerable'.)"))
	}
	
    # Name the 2 columns of intoleranceDf according to their content.
    names(intoleranceDf) <- c("PROMPT", "EXPCATEGORY")

    intoleranceIdx <- rep(FALSE, times = nrow(esDf))
    for(j in 1:nrow(intoleranceDf)) {

        intoleranceIdx <- intoleranceIdx |
            (esDf[,"EXPCATEGORY"] == intoleranceDf[j,"EXPCATEGORY"] &
                 esDf[,"PROMPT"] == intoleranceDf[j,"PROMPT"])

    }
    list(cleanedDf=esDf[!intoleranceIdx,], intoleranceDf=esDf[intoleranceIdx,])
}
mmiche/esmprep documentation built on July 7, 2019, 8:23 p.m.