R/esIdentical.R

#' esIdentical
#
#' @description esIdentical checks whether there are fully identical lines of data in the ESM dataset.
#
#' @param esDf a data.frame. A single ESM dataset. It must contain the 2 columns that hold the date-time object for when an ESM questionnaire was started and finished, respectively.
#
#' @param RELEVANTVN_ES a list. This list is generated by function \code{\link{setES}} and it is extended once either by function \code{\link{genDateTime}} or by function \code{\link{splitDateTime}}.
#
#' @details At least 2 ESM questionnaires must be 100 percent identical in order to be registered as duplicates.
#
#' @return \code{esDf} with the additional column IDENT denoting an ESM questionnaire to be unique (= 0) or to be duplicated (= 1).
#
#' @examples
#' # o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o
#' # Prerequisites in order to execute esIdentical. Start --------------
#' # Use example list delivered with the package
#' RELEVANTVN_ES <- RELEVANTVN_ESext
#' # esAssigned is a list of datasets, delivered with the package. It is
#' # the result of the assignment of the ESM questionnaires to ALL 8
#' # participants in the reference dataset.
#' noEndDf <- missingEndDateTime(esAssigned[["ES"]], RELEVANTVN_ES)
#' # Prerequisites in order to execute esIdentical. End ----------------
#' # -------------------------------------------------------
#' # Run function 19 of 29; see esmprep functions' hierarchy.
#' # -------------------------------------------------------
#' # noEndDf is the result of function 'noEndDateTime'.
#' identDf <- esIdentical(noEndDf, RELEVANTVN_ES)
#' # o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o
#
#' @seealso Exemplary code (fully executable) in the documentation of \code{\link{esmprep}} (function 19 of 29).
#
#' @export
#
esIdentical <- function(esDf, RELEVANTVN_ES=NULL) {

    if(!is.data.frame(esDf)) {
        stop("Function 'esIdentical' only accepts a single data frame as argument.")
    }
	
	# Error handling function for all set-up lists generated by setES and setREF.
    # Both lists RELEVANTVN_ES and RELEVANTVN_REF get extended either by function
    # genDateTime or by function splitDateTime!
    SETUPLISTCheck(RELEVANTINFO_ES=NULL,
    			   RELEVANTVN_ES=RELEVANTVN_ES,
    			   RELEVANTVN_REF=NULL)

    # Generate a list that collects the indices of fully identical lines of data.
    idxList <- list()
    # paste the relevant data elements.
    setData <- paste(esDf[,RELEVANTVN_ES[["ES_IMEI"]]], esDf[,RELEVANTVN_ES[["ES_START_DATE"]]], esDf[,RELEVANTVN_ES[["ES_START_TIME"]]])
    # Check the pasted data elements for duplicates.
    idxDupl <- which(duplicated(setData))
    # Indices for duplicated lines; Fl = fromLast-argument set to TRUE
    idxDuplFl <- which(duplicated(setData, fromLast = TRUE))
    if(is.integer0(idxDupl)) {
        # If there are no duplicates return NA
        idxList[["fromFirst"]] <- NA
        idxList[["fromLast"]] <- NA
    } else {
        # Else return the indices.
        idxList[["fromFirst"]] <- idxDupl
        idxList[["fromLast"]] <- idxDuplFl
    }

    IDENT <- rep(0, times = nrow(esDf))
    if(length(idxList[["fromFirst"]]) > 0) {
        identical <- sort(c(idxList[["fromFirst"]], idxList[["fromLast"]]))
        IDENT[identical] <- 1
    }

    esDf[,"IDENT"] <- IDENT

    # Return lists with indices
    return(esDf)
}

Try the esmprep package in your browser

Any scripts or data that you put into this service are public.

esmprep documentation built on July 5, 2019, 5:03 p.m.