R/ensureImputationInputs.R

##' Ensure Imputation Inputs
##'
##' @param data The data.frame containing the data to be imputed.
##' @param imputationParameters A list of the imputation parameters, such as
##' one generated by defaultImputationParameters().
##'
##' @return No value is returned, but errors are thrown if the input parameters
##' are not as expected.
##'
##' @export
##'

ensureImputationInputs = function(data, imputationParameters){

    ### Basic checks
    stopifnot(is(data, "data.table"))
    stopifnot(is(imputationParameters, "list"))

    ############################# Param checks #############################

    ### Check types of imputation parameters
    p = imputationParameters
    stopifnot(is.character(c(p$yearValue, p$byKey)))
    stopifnot(is(p$ensembleModels, "list"))
    stopifnot(sapply(p$ensembleModels, is) == "ensembleModel")
    if(length(p$ensembleModels) != length(unique(p$ensembleModels)))
        stop("Some ensemble models are duplicated!  Remove duplicates and try",
             " again.")
    if(length(names(p$ensembleModels)) != length(unique(names(p$ensembleModels))))
        stop("Some ensemble names are duplicated!  Remove duplicates and try",
             " again.")
    if(any(names(p$ensembleModels) == ""))
        stop("No ensembleModel's can be named ''")
    stopifnot(is.logical(c(p$restrictWeights, p$estimateNoData)))
    stopifnot(is.numeric(p$groupCount))
    stopifnot(is.character(c(p$missingFlag, p$imputationFlag,
                             p$newMethodFlag, p$newImputationColumn)))

    ### Check manually set parameters
    manuallyAssigned = c(p$imputationValueColumn,
                         p$imputationFlagColumn,
                         p$imputationMethodColumn)
    unAssigned = manuallyAssigned == ""
    if(any(unAssigned))
        stop("Certain parameters must be manually assigned and cannot be the ",
             "default value of ''.  Currently problematic:\n",
             paste0(c("imputationValueColumn", "imputationFlagColumn",
                      "imputationMethodColumn")[unAssigned], sep = "\n"))

    ### Check certain input parameters match allowable categories
    stopifnot(p$maximumWeights <= 1 & p$maximumWeights >= 0.5)
    stopifnot(p$errorType %in% c("raw", "loocv"))
    stopifnot(p$plotImputation %in% c("", "faceted", "prompt", "individual"))
    stopifnot(p$groupCount >= 2)
    stopifnot(p$groupCount <= 100)

    ### Ensure the error function is non-negative and returns a single value
    f = p$errorFunction
    stopifnot(is(f, "function"))
    stopifnot(f(1:10) >= 0)
    stopifnot(f(-1:-10) >= 0)
    stopifnot(length(f(1:10)) == 1)

    ############################# Data checks #############################

    ### Make sure all column name variables exist in data
    columnNames = c(p$imputationValueColumn,
                    p$imputationFlagColumn,
                    p$imputationMethodColumn,
                    p$yearValue,
                    p$byKey)
    missingColumns = ! columnNames %in% colnames(data)
    if( any(missingColumns) )
        stop("The following columns do not exist in data but should (or the ",
             "names in imputationParameters should be corrected):\n\t",
             paste(columnNames[missingColumns], collapse="\n\t"))

    ### Coerce columns to appropriate type:
    data[, c(p$imputationValueColumn) :=
             as.numeric(get(p$imputationValueColumn))]
    data[, c(p$imputationFlagColumn) :=
             as.character(get(p$imputationFlagColumn))]
    data[, c(p$imputationMethodColumn) :=
             as.character(get(p$imputationMethodColumn))]
    data[, c(p$yearValue) := as.numeric(get(p$yearValue))]

    ### Make sure byKey partitions the data (i.e. only one time observation per
    ### byKey value)
    uniquePts = data[, .N, by = c(p$byKey, p$yearValue)]
    if(max(uniquePts$N) > 1)
        stop("The byKey parameter does not partition the data, but it must ",
             "for ensemble imputation to be valid.  Check ",
             "imputationParameters$byKey and make sure that the input ",
             "dataset has at most one observation for each unique ",
             "combination of the byKey variables and the time variable.")

    if(any(data[[p$imputationValueColumn]] < 0, na.rm = TRUE)){
        warning("Negative values observed in the data and this will cause ",
                "problems will some default models ","
                (exponential, logistic, etc.)!")
    }
    ############################# Flag checks #############################

    ### Flag Table checks
    stopifnot(c(p$missingFlag, p$imputationFlag) %in%
                  p$flagTable$flagObservationStatus)
    stopifnot(is.data.frame(p$flagTable))
    stopifnot(colnames(p$flagTable) == c("flagObservationStatus",
                                         "flagObservationWeights"))
    stopifnot(is.character(p$flagTable$flagObservationStatus))
    stopifnot(is.numeric(p$flagTable$flagObservationWeights))
    stopifnot(p$flagTable$flagObservationWeights <= 1)
    stopifnot(p$flagTable$flagObservationWeights >= 0)

    ## Check that all flags are in the flagTable:
    flags = data[[p$imputationFlagColumn]]
    flags = unique(flags)
    missingFlags = flags[!flags %in% p$flagTable$flagObservationStatus]

    ### Globally assign ensuredImputationData so data will not need to be ensured again
    ensuredImputationData <<- TRUE
}
SWS-Methodology/faoswsImputation documentation built on May 9, 2019, 11:48 a.m.