R/esMerge.R

#' esMerge
#
#' @description esMerge merges all the ESM versions into one single dataset.
#
#' @param dfList a list. Each element of the list must be a data.frame. Each data.frame is a separate raw ESM dataset/an ESM questionnaire version. If there is just one ESM version the list therefore contains one data.frame.
#
#' @param RELEVANTVN_ES a list. This list is generated by function \code{\link{setES}} and it is extended once either by function \code{\link{genDateTime}} or by function \code{\link{splitDateTime}}.
#
#' @return One single (merged) ESM dataset, where all datasets of the argument \code{dfList} are included.
#
#' @examples
#' # o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o
#' # Prerequisites in order to execute esMerge. Start ------------------
#' # Use example list delivered with the package
#' RELEVANTVN_ES <- RELEVANTVN_ESext
#' # isCompleteLs is a list of datasets, also delivered with the package
#' # Prerequisites in order to execute esMerge. End --------------------
#' # -------------------------------------------------------
#' # Run function 14 of 29; see esmprep functions' hierarchy.
#' # -------------------------------------------------------
#' # Merge all raw ESM datasets. isCompleteLs is the result
#' # of function 'esComplete'.
#' esMerged <- esMerge(isCompleteLs, RELEVANTVN_ES)
#' # If preferred convert the 15 digit IMEI number from scientfic notation to text.
#' esMerged[,RELEVANTVN_ES[["ES_IMEI"]]] <- as.character(esMerged[,RELEVANTVN_ES[["ES_IMEI"]]])
#' # o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o=o
#
#' @seealso Exemplary code (fully executable) in the documentation of \code{\link{esmprep}} (function 14 of 29).
#
#' @export
#
esMerge <- function(dfList, RELEVANTVN_ES=NULL) {

    if(length(dfList)==1) {
        stop("No merging possible with only one file.")
    }

    dfCheck <- sapply(dfList, FUN = is.data.frame)

    # If at least one of the elements in dfList is not of class data.frame
    # stop the processing.
    if(any(dfCheck == FALSE)) {
        stop("At least one argument is not a data frame. Function 'merge_csvFiles' only accepts a list which contains data frames.")
    }

    # checkKey_merge checks whether KEY is the first column and whether
    # its values are of type numeric.
    checkKey_merge <- checkKey(dfList)
    # checkKey2_merge returns the column of each dataset, which goes by
    # name KEY
    checkKey2_merge <- checkKey2(dfList)
    checkKey_df <- data.frame(checkKey_merge, checkKey2_merge)
    # If function 'esMerge' is used prior to the function 'assignES'
    # then the variable 'KEY' must be the first column.
    if(all(apply(checkKey_df, MARGIN = 1, function(x) x[1]==FALSE & x[2]==1))) {
        stop("First use function 'genKey' before using this function.")
    }
    # Else the function 'esMerge' is used after the function 'assignES',
    # meaning that there is no need to stop the merge-function.

    # Error handling function for all set-up lists generated by setES and setREF.
    # Both lists RELEVANTVN_ES and RELEVANTVN_REF get extended either by function
    # genDateTime or by function splitDateTime!
    SETUPLISTCheck(RELEVANTINFO_ES=NULL,
    			   RELEVANTVN_ES=RELEVANTVN_ES,
    			   RELEVANTVN_REF=NULL)

    for(i in 1:length(dfList)) {
        if(any(is.na(match(c(RELEVANTVN_ES[["ES_START_DATETIME"]], RELEVANTVN_ES[["ES_IMEI"]]), names(dfList[[i]]))))) {
            idxNoMatch <- which(is.na(match(c(RELEVANTVN_ES[["ES_START_DATETIME"]], RELEVANTVN_ES[["ES_IMEI"]]), names(dfList[[i]]))))
            stop(paste0("Column name ", c(RELEVANTVN_ES[["ES_START_DATETIME"]], RELEVANTVN_ES[["ES_IMEI"]])[idxNoMatch], " cannot be found in the data frame no. ", i))
        }
    }

    # Starting dataset with which the 2nd dataset will be merged.
    mergedUnordered <- dfList[[1]]

    # Sequence of all remaining datasets in 'dfList' (1st dataset is the starting
    # point) that will be merged with the pre-merged datasets.
    for(j in 2:length(dfList)) {
        mergedUnordered <- merge(mergedUnordered, dfList[[j]], all = TRUE, stringsAsFactors=FALSE)
    }

    mergedOrdered <- mergedUnordered[order(mergedUnordered[,RELEVANTVN_ES[["ES_START_DATETIME"]]], mergedUnordered[,RELEVANTVN_ES[["ES_IMEI"]]]),]

    # RETURN single (merged) data frame
    return(mergedOrdered)
}

Try the esmprep package in your browser

Any scripts or data that you put into this service are public.

esmprep documentation built on July 5, 2019, 5:03 p.m.