R/variableKey.R
In kutils: Project Management Tools

Documented in all.equal.key all.equal.keylong assignMissing assignRecode checkValue_old cleanDataFrame is.data.frame.simple isNA keyApply keyCheck keyCrossRef keyDiagnostic keyDiff keyImport keyLookup keyRead keySave keysPool keysPoolCheck keyTemplate keyTemplateSPSS keyTemplateStata keyUpdate long2wide makeKeylist n2NA naLast print.keyDiff safeInteger sortStanza statdatKey varlabTemplate wide2long zapspace

##' If a numeric variable has only integer values, then
##' make it an integer.
##'
##' Users often accidentally create floating point numeric variables
##' when they really mean integers, such as c(1, 2, 3), when they
##' should have done c(1L, 2L, 3L). Before running \code{as.integer()}
##' to coerce the variable, we'd rather be polite and ask the variable
##' "do you mind being treated as if you are an integer?"  This
##' function checks to see if the variable is "close enough" to being
##' an integer, and then coerces as integer. Otherwise, it returns
##' NULL. And issues a warning.
##'
##' First, calculate absolute value of differences between \code{x}
##' and \code{as.integer(x)}. Second, find out if the sum of those
##' differences is smaller than \code{tol}. If so, then x can
##' reasonably be coerced to an integer.
##'
##' Be careful with the return. The correct return value for variables
##' that should not be coerced as integer is uncertain at this
##' point. We've tested various strategies, sometimes returning FALSE,
##' NULL, or just the original variable.
##' @param x a numeric variable
##' @param tol Tolerance value. Defaults to Machine$double.eps. See
##'     details.
##' @param vmax Maximum value allowed for an integer. Defaults to
##'     Machine$integer.max.
##' @param digits Digits value passed to the zapsmall
##'     function. Defaults to 7.
##' @param verbose Default FALSE: print warnings about x
##' @export
##' @return Either an integer vector or the original variable
##' @author Paul Johnson <pauljohn@@ku.edu> and Ben Kite
##'     <bakite@@ku.edu>
##' @examples
##' x1 <- c(1, 2, 3, 4, 5, 6)
##' is.integer(x1)
##' is.double(x1)
##' is.numeric(x1)
##' (x1int <- safeInteger(x1))
##' is.integer(x1int)
##' is.double(x1int)
##' is.numeric(x1int)
##' x2 <- rnorm(100)
##' x2int <- safeInteger(x2)
##' head(x2int)
##' x3 <- factor(x1, labels = c(LETTERS[1:6]))
##' x3int <- safeInteger(x3)
##'
safeInteger <- function(x, tol = .Machine$double.eps,
                        digits = 7, vmax = .Machine$integer.max,
                        verbose = FALSE)
{
    if(!is.numeric(x)) {
        if (verbose) {
            messg <- paste("safeInteger: x must be numeric or integer.")
            warning(messg)
        }
        return(NULL)
    }

    if(max(x, na.rm = TRUE) > vmax) {
        messg <- paste0("Values in x exceed the maximum integer value of ",
                        vmax, ". ", "safeInteger cannot be used safely ",
                        "for this variable. Original value is returned.")
        warning(messg)
        return(x)
    }

    if(is.integer(x)){
        return(x)
    } else {
        x <- zapsmall(x, digits)
        xnew <- as.integer(x)
        if (sum(abs(x - xnew), na.rm = TRUE) < tol) {
            return(xnew)
        } else {
            if (verbose) {
                messg <- paste("asInteger x:", paste(head(x), collapse=", "),
                               "... is not close enough to an integer")
                warning(messg)
            }
            return(NULL)
        }
    }
    messg <- paste0("safeInteger should not have reached this point")
    stop(messg)
}
NULL

##' Set missing values
##'
##' The missings values have to be carefully written, depending on the
##' type of variable that is being processed.
##'
##' Version 0.95 of kutils introduced a new style for specification of
##' missing values.
##'
##' @param x A variable
##' @param missings A string vector of semi-colon separated values,
##'     ranges, and/or inequalities.  For strings and factors, only an
##'     enumeration of values (or factor levels) to be excluded is
##'     allowed. For numeric variables (integers or floating point
##'     variables), one can specify open and double-sided intervals as
##'     well as particular values to be marked as missing. One can
##'     append particular values and ranges by
##'     "1;2;3;(8,10);[22,24];> 99;< 2". The double-sided interval is
##'     represented in the usual mathematical way, where hard
##'     bracketes indicate "closed" intervals and parentheses indicate
##'     open intervals.\enumerate{
##'
##' \item "(a,b)" means values of x greater than a and smaller than b
##'     will be set as missing.
##'
##' \item "[a,b]" is a closed interval, one which includes the
##'     endpoints, so a <= x <= b will be set as NA
##'
##' \item "(a,b]" and "[a,b)" are acceptable.
##' \item "< a"  indicates all values smaller than a will be missing
##' \item  "<= a" means values smaller than or equal to a will be
##'     excluded
##' \item "> a" and ">= a" have comparable
##'     interpretations.
##' \item "8;9;10" Mark off specific values by
##' an enumeration. Be aware, however, that this is
##' useful only for integer variables.  As demonstrated in the
##' example, for floating point numbers, one must specify intervals.
##' \item For factors and character variables, the argument missings
##' can be written either as "lo;med;hi" or "c('lo','med','hi')" }
##' @param sep A separator symbol, ";" (semicolon) by default
##' @return A cleaned column in which R's NA symbol replaces values
##'     that should be missing
##' @export
##' @importFrom utils head
##' @author Paul Johnson <pauljohn@@ku.edu>
##' @examples
##' ## 1.  Integers.
##' x <- seq.int(-2L, 22L, by = 2L)
##' ## Exclude scores 8, 10, 18
##' assignMissing(x, "8;10;18")
##' ## Specify range, 4 to 12 inclusive
##' missings <- "[4,12]"
##' assignMissing(x, missings)
##' ## Not inclusive
##' assignMissing(x,  "(4,12)")
##' ## Set missing for any value smaller that 7
##' assignMissing(x, "< 7")
##' assignMissing(x, "<= 8")
##' assignMissing(x, "> 11")
##' assignMissing(x, "< -1;2;4;(7, 9);> 20")
##'
##'
##' ## 2. strings
##' x <- c("low", "low", "med", "high")
##' missings <- "low;high"
##' assignMissing(x, missings)
##' missings <- "med;doesnot exist"
##' assignMissing(x, missings)
##' ## Test alternate separator
##' assignMissing(x, "low|med", sep = "|")
##'
##' ## 3. factors (same as strings, really)
##' x <- factor(c("low", "low", "med", "high"), levels = c("low", "med", "high"))
##' missings <- "low;high"
##' assignMissing(x, missings)
##' ## Previous same as
##' missings <- c("low", "high")
##' assignMissing(x, missings)
##'
##' missings <- c("med", "doesnot exist")
##' assignMissing(x, missings)
##' ## ordered factor:
##' x <- ordered(c("low", "low", "med", "high"), levels = c("low", "med", "high"))
##' missings <- c("low", "high")
##' assignMissing(x, missings)
##'
##' ## 4. Real-valued variable
##' set.seed(234234)
##' x <- rnorm(10)
##' x
##' missings <- "< 0"
##' assignMissing(x, missings)
##' missings <- "> -0.2"
##' assignMissing(x, missings)
##' ## values above 0.1 and below 0.7 are missing
##' missings <- "(0.1,0.7)"
##' assignMissing(x, missings)
##' ## Note that in floating point numbers, it is probably
##' ## futile to specify specific values for missings. Even if we
##' ## type out values to 7 decimals, nothing gets excluded
##' assignMissing(x, "-0.4879708;0.1435791")
##' ## Can mark a range, however
##' assignMissing(x, "(-0.487971,-0.487970);(0.14357, 0.14358)")
##' x
assignMissing <- function(x, missings = NULL, sep = ";"){
    if (is.null(missings)) return(x)
    if (is.character(missings)) missings <- zapspace(missings)
    missings <- unlist(strsplit(missings, split = sep, fixed = TRUE))
    missings <- na.omit(missings)
    missings <- zapspace(missings)
    missings <- gsub("-", " -", missings,  fixed = TRUE)
    ## If no NA  missings to remove, then return
    if (is.null(missings) | length(missings) == 0) return(x)

    if (is.character(x)){
        x[x %in% missings] <- NA
        return(x)
    }

    if (is.logical(x)){
        missings <- as.logical(missings)
        x[x %in% missings] <- NA
        return(x)
    }
    
    if (is.factor(x)){
        levels(x)[which(levels(x) %in% missings)] <- NA
        return(x)
    }

    ## 20170203: TODO following is hard coded to look at first character,
    ## if space first, fails, used zapspace to address above, but that's
    ## stupid bandaid, better regex work would be solid.
    if (is.numeric(x)){
        ## is  numeric includes integer and double and numeric
        ## separate the elements that inequality signs
        hasineq <- missings[substr(missings, 1, 1) %in% c(">", "<", "(", "[")]
        hasnoineq <- setdiff(missings, hasineq)
        hasnoineq <- if (length(hasnoineq) > 0) paste("x == ", hasnoineq)

        ## (9 ->   x > 9
        ## [9 ->   x >= 9
        ins <- c(">", "<" , "(",     "[",     ")",     "]",  ",")
        outs <- c("x >", "x <", "x >", "x >= ", " > x", " >= x", " & ")
        ranges <- mgsub(ins, outs, hasineq, fixed = TRUE)

        for(rr in c(ranges, hasnoineq)){
            eval(parse(text = paste0("x[", rr, "] <- NA")))
        }
        return(x)
    }

    messg <- "Sorry, no missings assigned because variable type was unhandled"
    warning(messg)
    ## return unchanged input, didn't see what to do
    x
}




##' A variable is transformed in an indicated way
##'
##' In the variable key framework, the user might request
##' transformations such as the logarithm, exponential, or square
##' root. This is done by including strings in the recodes column,
##' such as "log(x + 1)" or "3 + 1.1 * x + 0.5 * x ^ 2". This
##' function implements the user's request by parsing the character
##' string and applying the indicated re-calculation.
##'
##' In the variable key framework, this is applied to the raw data,
##' after missings are imposed.
##' @param x A column to be recoded
##' @param recode A character string using placeholder "x". See
##'     examples
##' @return A new column
##' @export
##' @author Paul Johnson <pauljohn@@ku.edu>
##' @examples
##' set.seed(234234)
##' x <- rpois(100, lambda = 3)
##' x <- x[order(x)]
##' str1 <- "log(x + 1)"
##' xlog <- assignRecode(x, recode = str1)
##' plot(xlog ~ x, type = "l")
##' mean(xlog, na.rm = TRUE)
##' str2 <- "x^2"
##' xsq <- assignRecode(x, recode = str2)
##' plot(xsq ~ x, type = "l")
##' str3 <- "sqrt(x)"
##' xsrt <- assignRecode(x, recode = str3)
assignRecode <- function(x, recode = NULL){
    y <- eval(parse(text = recode))
}


##' Check and Clean data.frame for usage with variable key functions
##'
##' Checks that the data.frame is made up of simple individual
##' columns. Checks numeric columns to find out if they are acceptable
##' to treat as integers. If they are acceptable to treat as integers,
##' then convert those numeric to integer class variables.
##' @param dframe A data frame
##' @param safeNumericToInteger Default TRUE: Should we treat values
##'     which appear to be integers as integers? If a column is
##'     numeric, it might be safe to treat it as an integer.  In many
##'     csv data sets, the values coded c(1, 2, 3) are really
##'     integers, not floats c(1.0, 2.0, 3.0). See \code{safeInteger}.
##' @param trimws Defaults as "both", in meaning of \code{which}
##'     argument in \code{trimws} function.  Set as NULL if character
##'     variables must not be trimmed to eliminate white
##'     space. Otherwise, value should be one of \code{c("left",
##'     "right", "both")}.
##' @export
##' @return A checked and cleaned data frame
##' @keywords internal
##' @author Paul Johnson <pauljohn@@ku.edu>
cleanDataFrame <- function(dframe, safeNumericToInteger = TRUE, trimws = "both"){
    if(!is.data.frame(dframe)){
        messg <- paste("keyUpdate: The dframe object must be a data frame")
        stop(messg)
    }

    if (!(res <- is.data.frame.simple(dframe))) {
        messg <- paste(paste("cleanDataFrame checked if dframe elements are not single columns.",
                       "This frame has some elements that are not single columns.",
                       "The troublesome elements are:", collapse = ""),
                       paste(attr(res, "not_a_simple_column"), collapse = ", "))
        stop(messg)
    }

    ## If integer-like columns exist, turn them into integers
    if (safeNumericToInteger){
        for(i in colnames(dframe)){
            if(is.numeric(dframe[ , i])
               && !is.null(tmp <- safeInteger(dframe[ , i])))
                dframe[ , i] <- tmp
        }
    }

    ## Clean characters that have leading spaces to delete space
    if (!is.null(trimws)){
        for(i in colnames(dframe)){
            if(is.character(dframe[ , i])){
                dframe[ , i] <- trimws(dframe[ , i], which = trimws)
            } else if (is.factor(dframe[ , i])){
                levels(dframe[ , i]) <- trimws(levels(dframe[ , i]))
            }
        }
    }
    
    dframe
}


##' Check if a data frame is a simple collection of columns (no lists
##' or matrices within)
##'
##' Checks for the existence of dimensions within the data
##' frame. Returns FALSE if any object within dframe has non-null dim
##' value.
##'
##' See: http://stackoverflow.com/questions/38902880/data-frame-in-which-elements-are-not-single-columns
##'
##' @param dframe A data frame
##' @return Boolean, TRUE or FALSE. An attribute "not_a_simple_column"
##'     is created, indicating which of the elements in the dframe
##'     have dimensions
##' @author Paul Johnson <pauljohn@@ku.edu>
##' @export
##' @examples
##' N <- 100
##' mydf <- data.frame(x5 = rnorm(N),
##'                    x4 = rpois(N, lambda = 3),
##'                    x3 = ordered(sample(c("lo", "med", "hi"),
##'                    size = N, replace=TRUE)))
##' is.data.frame.simple(mydf)
##' mydf$amatr <- matrix(0, ncol = 2, nrow = NROW(mydf))
##' is.data.frame.simple(mydf)
##' mydf$amatr <- NULL
##' is.data.frame.simple(mydf)
##' mydf$adf <- mydf
##' is.data.frame.simple(mydf)
is.data.frame.simple <- function(dframe){
    no.dims <- function(x) {!is.null(dim(x))}
    elemdims <- sapply(dframe, no.dims)
    res <-  if(sum(elemdims) > 0) FALSE else TRUE
    attr(res, "not_a_simple_column") <- names(which(elemdims))
    res
}



##' Compare observed values with the values listed
##' (presumably from a variable key).
##'
##' This is purely diagnostic. It prints warnings in
##' either of 2 cases. 1)  observed data has values that
##' are not in the value_old, or 2) that value_old has
##' values that are not in the data.
##'
##' @param x a variable, either character or factor
##' @param value_old a vector of old values for which we are checking
##' @param xname character string to use for x's name when printing output
##' @param diagnostic prints messages about variables if TRUE
##' @keywords internal
##' @return NULL
##' @author Paul Johnson <pauljohn@@ku.edu>
checkValue_old <- function(x, value_old, xname, diagnostic = FALSE){
    if (!is.factor(x) && !is.character(x)) return(NULL)
    ## if value_old is length 1 and is equal to NA, don't bother
    if(all(value_old %in% c("NA", NA))) return(NULL)
    xobs <- unique(x)
    keynotinobs <- value_old[!value_old %in% xobs]
    keynotinobs <- na.omit(keynotinobs)
    if (diagnostic && length(keynotinobs) > 0){
        messg <- paste("Data Check (variable", xname, ":)\n",
                     "Key values were not observed in the input data: ",
                     paste(keynotinobs, collapse = ", "), "\n")
        cat(messg)
    }
    tobsnotinkey <- xobs[!xobs %in% value_old]
    tobsnotinkey <- na.omit(tobsnotinkey)
    if (diagnostic && length(tobsnotinkey) > 0) {
        messg <- paste("Data Check (variable", xname, ":)\n",
                       "These values in the input data were not in value_old: ",
                       paste(tobsnotinkey, collapse = ", "), "\n")
        cat(messg)
    }
    NULL
}



##' Check if values are R NA symbol or any one of the na.strings
##' elements
##'
##' A value vector in the key will generally be a character
##' vector.  This utility is used to check if the characters
##' are either R missing or values in a list of characters that
##' represent missings.
##' 
##' @param x Input data vector
##' @param na.strings Vector of string values to be considered as
##'     missing. Defaults will match values that are equal to ., empty
##'     string, any number of white space elements, or charcter
##'     string N/A. We do not include `NA` by default because some
##'     projects use NA to mean "not appropriate".
##' @return Logical vector, TRUE if a value is either NA or in
##'     na.strings.
##' @export 
##' @examples
##' x1 <- c("TRUE", "FALSE", FALSE, TRUE, NA, "NA", ".", "N/A", " ", "")
##' x1na <- kutils:::isNA(x1)
##' cbind(x1, x1na)
isNA <- function(x, na.strings = c("\\.", "", "\\s+",  "N/A")){
    ismissing <- grepl(paste0("^", paste0(na.strings, collapse="$|^"), "$"), x)
    ismissing[is.na(x)] <- TRUE
    ismissing
}

##' Create variable key template (in memory or in a file)
##'
##' A variable key is a human readable document that describes the
##' variables in a data set. A key can be revised and re-imported by R
##' to recode data. This might also be referred to as a
##' "programmable codebook."  This function inspects a data frame,
##' takes notice of its variable names, their classes, and legal
##' values, and then it creates a table summarizing that
##' information. The aim is to create a document that principal
##' investigators and research assistants can use to keep a project
##' well organized.  Please see the vignette in this package.
##'
##' The variable key can be created in two formats, wide and long.
##' The original style of the variable key, wide, has one row per
##' variable. It has a style for compact notation about current values
##' and required recodes.  That is more compact, probably easier for
##' experts to read, but perhaps more difficult to edit. The long
##' style variable key has one row per value per variable.  Thus, in a
##' larger project, the long key can have many rows. However, in a
##' larger project, the long style key is easier to edit with a spread
##' sheet program.
##'
##' After a key is created, it should be re-imported into R with the
##' \code{kutils::keyImport} function.  Then the key structure can
##' guide the importation and recoding of the data set.
##'
##' Concerning the varlab attribute. Run \code{attr(key, "varlab"} to
##' review existing labels, if any.
##'
##' Storing the variable labels in files requires some care because
##' the \code{rds}, \code{xlsx}, and \code{csv} formats have different
##' capabilities.  The \code{rds} storage format saves all attributes without
##' difficulty. In contrast, because \code{csv} and \code{xlsx} do not save
##' attributes, the varlabs are stored as separate character
##' matrices. For \code{xlsx} files, the varlab object is saved as a second
##' sheet in \code{xlsx} file, while in \code{csv} a second file suffixed
##' "-varlab.csv" is created. 
##'
##' @param dframe A data frame
##' @param long Default FALSE.
##' @param sort Default FALSE. Should the rows representing the
##'     variables be sorted alphabetically? Otherwise, they appear in
##'     the order in which they were included in the original dataset.
##' @param file DEFAULT NULL, meaning no file is produced. Choose a
##'     file name ending in either "csv" (for comma separated
##'     variables), "xlsx" (compatible with Microsoft Excel), or "rds"
##'     (R serialization data). The file name will be used to select
##'     among the 3 storage formats. XLSX output requires the openxlsx
##'     package.
##' @param max.levels How high is the limit on the number of values
##'     for discrete (integer, character, and Date) variables?
##'     Default = 15. If observed number exceeds max.levels, we
##'     conclude the author should not assign new values in the key
##'     and only the missing value will be included in the key as a
##'     "placeholder". This does not affect variables declared as
##'     factor or ordered variables, for which all levels are included
##'     in all cases.
##' @param missings Values in exising data which should be treated as
##'     missing in the new key. Character string in format acceptable
##'     to the \code{assignMissing} function. Can be a string with
##'     several missing indicators"1;2;3;(8,10);[22,24];> 99;< 2".
##' @param missSymbol Default ".".  A character string used to
##'     represent missing values in the key that is created.  Relevant
##'     (mostly) for the key's \code{value_new} column. Default is the
##'     period, ".". Because R's symbol \code{NA} can be mistaken for
##'     the character string \code{"NA"}, we use a different
##'     (hopefully unmistakable) symbol in the key.
##' @param varlab A key can have a companion data structure for
##'     variable labels. Default is FALSE, but the value may also be
##'     TRUE or a named vector of variable labels, such as
##'     \code{c("x1" = "happiness", "x2" = "wealth")}. The labels
##'     become an attribute of the key object. See Details for
##'     information on storage of varlabs in saved key files.
##' @param safeNumericToInteger Default TRUE: Should we treat values
##'     which appear to be integers as integers? If a column is
##'     numeric, it might be safe to treat it as an integer.  In many
##'     csv data sets, the values coded c(1, 2, 3) are really
##'     integers, not floats c(1.0, 2.0, 3.0). See \code{safeInteger}.
##' @param trimws Default is "both", user can change to "left", "right", or
##'     set as NULL to avoid any trimming.
##' @return A key in the form of a data frame. May also be saved on
##'     disk if the file argument is supplied. The key may have an
##'     attribute "varlab", variable labels.
##' @export
##' @importFrom utils write.csv
##' @importFrom methods as
##' @author Paul Johnson <pauljohn@@ku.edu>
##' @examples
##' set.seed(234234)
##' N <- 200
##' mydf <- data.frame(x5 = rnorm(N),
##'                    x4 = rpois(N, lambda = 3),
##'                    x3 = ordered(sample(c("lo", "med", "hi"),
##'                    size = N, replace=TRUE),
##'                    levels = c("med", "lo", "hi")),
##'                    x2 = letters[sample(c(1:4,6), N, replace = TRUE)],
##'                    x1 = factor(sample(c("cindy", "bobby", "marcia",
##'                                         "greg", "peter"), N,
##'                    replace = TRUE)),
##'                    x7 = ordered(letters[sample(c(1:4,6), N, replace = TRUE)]),
##'                    x6 = sample(c(1:5), N, replace = TRUE),
##'                    stringsAsFactors = FALSE)
##' mydf$x4[sample(1:N, 10)] <- 999
##' mydf$x5[sample(1:N, 10)] <- -999
##'
##' ## Note: If we change this example data, we need to save a copy in
##' ## "../inst/extdata" for packacing
##' dn <- tempdir()
##' write.csv(mydf, file = file.path(dn, "mydf.csv"), row.names = FALSE)
##' mydf.templ <- keyTemplate(mydf, file = file.path(dn, "mydf.templ.csv"),
##'                           varlab = TRUE)
##' mydf.templ_long <- keyTemplate(mydf, long = TRUE,
##'                             file = file.path(dn, "mydf.templlong.csv"),
##'                             varlab = TRUE)
##'
##' mydf.templx <- keyTemplate(mydf, file = file.path(dn, "mydf.templ.xlsx"),
##'                             varlab = TRUE)
##' mydf.templ_longx <- keyTemplate(mydf, long = TRUE,
##'                              file = file.path(dn, "mydf.templ_long.xlsx"),
##'                              varlab = TRUE)
##' ## Check the varlab attribute
##' attr(mydf.templ, "varlab")
##' mydf.tmpl2 <- keyTemplate(mydf,
##'                          varlab = c(x5 = "height", x4 = "age",
##'                          x3 = "intelligence", x1 = "Name"))
##' ## Check the varlab attribute
##' attr(mydf.tmpl2, "varlab")
##' 
##' ## Try with the national longitudinal study data
##' data(natlongsurv)
##' natlong.templ <- keyTemplate(natlongsurv,
##'                           file = file.path(dn, "natlongsurv.templ.csv"),
##'                           max.levels = 15, varlab = TRUE, sort = TRUE)
##'
##' natlong.templlong <- keyTemplate(natlongsurv, long = TRUE,
##'                    file = file.path(dn, "natlongsurv.templ_long.csv"), sort = TRUE)
##' if(interactive()) View(natlong.templlong)
##' natlong.templlong2 <- keyTemplate(natlongsurv, long = TRUE,
##'                       missings = "<0", max.levels = 50, sort = TRUE,
##'                       varlab = TRUE)
##' if(interactive()) View(natlong.templlong2)
##' 
##' natlong.templwide2 <- keyTemplate(natlongsurv, long = FALSE,
##'                       missings = "<0", max.levels = 50, sort = TRUE)
##' if(interactive()) View(natlong.templwide2)
##'
##' all.equal(wide2long(natlong.templwide2), natlong.templlong2)
##'
##' head(keyTemplate(natlongsurv, file = file.path(dn, "natlongsurv.templ.xlsx"),
##'              max.levels = 15, varlab = TRUE, sort = TRUE), 10)
##' head(keyTemplate(natlongsurv, file = file.path(dn, "natlongsurv.templ.xlsx"),
##'              long = TRUE, max.levels = 15, varlab = TRUE, sort = TRUE), 10)
##'
##' list.files(dn)
##'
keyTemplate <-
    function(dframe, long = FALSE, sort = FALSE,
             file = NULL, max.levels = 15, missings = NULL, missSymbol = ".",
             safeNumericToInteger = TRUE, trimws = "both", 
             varlab = FALSE)
{
    if (class(dframe)[1] != "data.frame"){
        MESSG <- paste("Warning: keyTemplate is intended for an R *data.frame*,",
                       "not subclasses like tibbles or data.tables.",
                       "keyTemplate coerced your input",
                       "with as.data.frame().\n")
        if (inherits(dframe, "data.frame")){
            ## Coerce without warning, but print message
            cat(MESSG)
        } else {
            ## Elevate the message to a warning
            warning(MESSG)
        }
        dframe <- as.data.frame(dframe)
    }
            
    dframe <- cleanDataFrame(dframe, safeNumericToInteger = safeNumericToInteger,
                             trimws = trimws)

    df.class <- sapply(dframe, function(x)class(x)[1])
    cn <- colnames(dframe)

    ## Keep at most max.levels elements, but always add an NA symbol
    ## if there is none.  Also, always keep value that are missing
    ## according to missings
    shortenValues <- function(x, max.levels, missings){
        xmiss <- assignMissing(x, missings)
        xnotmissing <- if (length(temp <- x[-which(is.na(xmiss))]) > 0) temp else NULL
        xismissing <- unique(x[which(is.na(xmiss))])
        if (!any(vapply(xismissing, is.na, logical(1)))) xismissing <- c(xismissing, NA)
        if (length(xnotmissing) <= max.levels) {
            xnotmissing <- xnotmissing[1:min(max.levels, length(xnotmissing))]
            df <- data.frame(value_old = c(xnotmissing, xismissing),
                             value_new = c(xnotmissing, rep(NA, length(xismissing))),
                             stringsAsFactors = FALSE)
            return(df)
        } else {
            return(data.frame(value_old = NA, value_new = NA,
                              stringsAsFactors = FALSE))
        }
    }

    ## Returns all unique values, inserts NA at end if not present
    getUnique <- function(xname){
        ## For discrete variables integer, character, logical:
        if (df.class[[xname]] %in% c("integer", "character", "logical")){
            val <- unique(dframe[ , xname, drop = TRUE])
            ##pj 20170926: new sort method keeps missing on end, sets NA as missSymbol
            ##pj 20180502: tibble fails on following order function.
            val.sort <- val[order(val)]
            if(!NA %in% val.sort) val.sort <- c(val.sort, NA)
            return(val.sort)
        }
        ## For discrete variables factor, ordered:
        if (df.class[[xname]] %in% c("factor", "ordered")){
            return(c(levels(dframe[ , xname]), NA))
        }
        ## if coercion check passes, then use "as" to coerce the missing
        if (checkCoercion(c(NA), df.class[[xname]])) {
            value <- NA
            mytext <- paste0("as.",  df.class[[xname]], "(value)")
            res <- eval(parse(text = mytext))
            return(res)
        } else{
            ## Give up trying to cast the NA type
            return(c(NA))
        }
    }

    ## First, make a long key
    ## Generate a small key for one variable
    smallTemplate <- function(xname, missings = NULL){
        value_old <- getUnique(xname)
        valoldnew <- shortenValues(value_old,
                                   max.levels = max.levels,
                                   missings = missings)
        valoldnew[is.na(valoldnew)] <- missSymbol
        keysmall <- data.frame(name_old = xname, name_new = xname, 
                               class_old = df.class[[xname]],
                               class_new = df.class[[xname]],
                               value_old = as.character(valoldnew[ , "value_old"]),
                               value_new = as.character(valoldnew[ , "value_new"]),
                               missings = "",
                               recodes = "", 
                               stringsAsFactors = FALSE)
        keysmall
    }
        
    keyList <- lapply(cn, smallTemplate, missings = missings)

    key <- do.call("rbind", keyList)

    if(isTRUE(long)){
        if (sort) key <- key[order(key$name_old, key$name_new), ]
        class(key) <- c("keylong", class(key))
     } else {
        ## else !long, so make a wide key
        key <- long2wide(key)
        rownames(key) <- key$name_old
        if (sort) key <- key[order(key$name_old), ]
        ## put in order of original data columns
        else key <- key[cn, ]
    }

    attr(key, "missSymbol") <- missSymbol
    if (!missing(varlab) && !identical(varlab, FALSE)) {
        attr(key, "varlab") <- varlabTemplate(key, varlab)
        varlab <- TRUE
    }
   
    if (!missing(file) && !is.null(file)){
        keySave(key, file, na_ = missSymbol, varlab = varlab)
    }
    key
}
NULL

##' Create Variable Label Template
##'
##' Receive a key, create a varlab object, with columns
##'     \code{name_old} \code{name_new}, and \code{varlab}.
##'
##' If not specified, a matrix is created with empty variable labels.
##' @param obj A variable key
##' @param varlab Default NULL, function will start from clean slate,
##'     a set of column labels that match \code{name_new}. User can
##'     specify values by providing a named vector of labels, e.g.,
##'     \code{c("x1" = "happiness", "x2" = "wealth")}, where the names
##'     are values to be matched against "name_new" in key.
##' @return Character matrix with columns \code{name_new} and \code{varlab}.
##' @export
##' @author Paul Johnson
##' @examples
##' mydf.path <- system.file("extdata", "mydf.csv", package = "kutils")
##' mydf <- read.csv(mydf.path, stringsAsFactors=FALSE)
##' mydf.keywide1 <- keyTemplate(mydf, long = FALSE, sort = FALSE,
##'                     varlab = TRUE)
##' attr(mydf.keywide1, "varlab")
##' mydf.keywide2 <- keyTemplate(mydf, long = FALSE, sort = FALSE,
##'                     varlab = c("x3" = "fun"))
##' attr(mydf.keywide2, "varlab")
##' attr(mydf.keywide2, "varlab") <- varlabTemplate(mydf.keywide2,
##'                   varlab = c("x5" = "wealth", "x10" = "happy"))
##' attr(mydf.keywide2, "varlab")
##' attr(mydf.keywide2, "varlab") <- varlabTemplate(mydf.keywide2,
##'                   varlab = TRUE)
##' attr(mydf.keywide2, "varlab")
##' ## Target we are trying to match:
##' mydf.keylong <- keyTemplate(mydf, long = TRUE, sort = FALSE, varlab = TRUE)
##' attr(mydf.keylong, "varlab")
##' attr(mydf.keylong, "varlab") <- NULL
##' varlabTemplate(mydf.keylong)
##' attr(mydf.keylong, "varlab") <- varlabTemplate(mydf.keylong,
##'                    varlab = c("x3" = "wealth", "x10" = "happy"))
##' attr(mydf.keylong, "varlab")
##' attr(mydf.keylong, "varlab") <- varlabTemplate(mydf.keylong, varlab = TRUE)
##' attr(mydf.keylong, "varlab")
varlabTemplate <- function(obj, varlab = TRUE){
    if (identical(varlab, FALSE)){
        return(NULL)
    }
    varlabs.orig <- attr(obj, "varlab")
    if(is.null(varlabs.orig)){
        if (isTRUE(varlab)) {
            varlabs <- unique(obj[ , "name_new", drop = TRUE])
            names(varlabs) <- varlabs
            return(varlabs)
        } else if (is.vector(varlab)){
            return(varlab)
        } else {
            MESSG <- paste("varlabTemplate input:",
                           paste(varlab, collapse = " "),
                           "is not understandable")
            stop(MESSG)
        }
    }

    varlabs.all <- unique(obj[ , "name_new"])
    names(varlabs.all) <- varlabs.all
    if (isTRUE(varlab)){
        ## don't change existing varlabs, only new ones
        ## replace from varlabs.orig into varlabs.all
        varlabs <- varlabs.all
        varlabs.addtokey <- varlabs.all[!names(varlabs.all) %in% names(varlabs.orig)]
        return(c(varlabs.orig, varlabs.addtokey))
    } else if (is.vector(varlab)){
        ## keep if in original and not in varlab
        keepinkey <- varlabs.orig[!names(varlabs.orig) %in% names(varlab)]
        ## add from varlab to replace
        replaceinkey <- varlab[names(varlab) %in% names(varlabs.orig)]
        addtokey <- varlab[!names(varlab) %in% names(varlabs.orig)]
        res <- c(keepinkey, replaceinkey, addtokey)
        if(any(duplicated(names(res)))) stop("varlabTemplate fail1")
        return(c(keepinkey, replaceinkey, addtokey))
    } else {
            MESSG <- paste("varlabTemplate input:",
                           paste(varlab, collapse = " "),
                           "is not understandable")
            stop(MESSG)
    }
}
NULL

##' Save key as file after deducing type from suffix
##'
##' This is specialized to saving of key objects, it is not a
##' general purpose function for saving things.  It scans the
##' suffix of the file name and then does the right thing.
##'
##' In updates 2017-09, a varlab element was introduced.  The varlab
##' attribute of the object is saved.  The files created incorporate
##' the variable labels object in different ways. 1) XLSX: variable
##' labels a worksheet named "varlab" 2) CSV: variable labels saved in
##' a separate file suffixed "-varlab.csv". 3) RDS: varlab is an
##' attribute of the key object.
##' @param obj a variable key object
##' @param file file name. must end in "csv", "xlsx" or "rds"
##' @param na_ Value to insert to represent a missing score. Default
##'     ".".
##' @param varlab FALSE or TRUE. Default is FALSE, no new labels will
##'     be created. If a key object has a varlab already, it is saved
##'     with the key, always. This parameter controls whether a new
##'     varlab template should be created when the object is saved.
##'     If TRUE and obj has no varlab attribute, a new varlab template
##'     is created by the \code{varlabTemplate} function. If TRUE and
##'     a varlab attribute currently exists, but some variables are
##'     missing labels, then \code{varlabTemplate} is called to fill
##'     in new variable labels.
##' @return NULL if no file is created. Otherwise, a key object with
##'     an attribute varlab is returned.
##' @importFrom openxlsx addWorksheet writeDataTable saveWorkbook
##' @export
##' @author Paul Johnson <pauljohn@@ku.edu>
keySave <- function(obj, file, na_ = ".", varlab){
    obj[is.na(obj)] <- "."
    if (!missing(varlab) && !varlab %in% c(TRUE, FALSE)){
        MESSG <- "keySave varlab argument must be TRUE or FALSE"
        stop(MESSG)
    }
    
    if (!is.null(attr(obj, "varlab"))){
        varlab <- TRUE
    }  else if (!missing(varlab) && !identical(varlab, FALSE)
                && is.null(attr(obj, "varlab"))) {
     ## varlab neither provided with key nor varlab == FALSE, so create
        attr(obj, "varlab") <- varlabTemplate(obj, varlab = TRUE)
        varlab <- TRUE
    } else {
        varlab <- FALSE
    }
      
    if (length(grep("csv$", tolower(file))) > 0){
        write.csv(obj, file = file, na = na_, row.names = FALSE)
        if(!identical(varlab, FALSE) && !is.null(attr(obj, "varlab"))){
            varlab.orig <- attr(obj, "varlab")
            varlab.mat <- cbind("name_new" = names(varlab.orig),
                                "varlab" = varlab.orig)
            write.csv(varlab.mat,
                      file = gsub(".csv$", "-varlab.csv", file),
                      row.names = FALSE, na = na_)
        }
    } else if (length(grep("xlsx$", tolower(file)))){
        wb <- openxlsx::createWorkbook()
        addWorksheet(wb, "key")
        writeDataTable(wb, sheet = "key", x = obj)
        if(!identical(varlab, FALSE) && !is.null(attr(obj, "varlab"))){
            varlab.orig <- attr(obj, "varlab")
            varlab.mat <- data.frame("name_new" = names(varlab.orig),
                                "varlab" = varlab.orig)
            addWorksheet(wb, "varlab")
            writeDataTable(wb, sheet = "varlab", x = varlab.mat)
        }
        saveWorkbook(wb, file, overwrite = TRUE)
    } else if (length(grep("rds$", tolower(file)))){
        saveRDS(obj, file = file)
    } else {
        warning("keySave: unrecognized suffix. No file created")
        NULL
    }
    invisible(obj)
}
NULL

##' Read file after deducing file type from suffix.
##'
##' If the input is XLSX, sheets named "key" and "varlab" are
##' imported if the exist. If input is CSV, then the key
##' CSV file is imported and another file suffixed with "-varlab" is
##' imported if it exists.
##'
##' The variable lables are a named vector saved as an attribute of
##' the key object.
##' @param file name of file to be imported, including path to
##'     file. file name must end in "csv", "xlsx" or "rds"
##' @param ... additional arguments for read.csv or read.xlsx.
##' @param na.strings Values to be converted to R missing symbol
##'     NA. Default is white space, "\\s+".
##' @return A data frame or matrix.
##' @importFrom utils read.csv
##' @importFrom openxlsx read.xlsx getSheetNames readWorkbook
##' @export
##' @author Paul Johnson <pauljohn@@ku.edu>
keyRead <- function(file, ..., na.strings = c("\\s+")){
    ## TODO: implement code to sort out the dots arguments, find
    ## which are aimed at read.xlsx or read.csv, and divide them. See
    ## peek() function example.
    dots <- list(...)
    readxlsxFormals <- names(formals(openxlsx::read.xlsx))
    readcsvFormals <- names(formals(read.csv))
    dotsforxlsx <- dots[readxlsxFormals[readxlsxFormals %in% names(dots)]]
    dotsforcsv <- dots[readcsvFormals[readcsvFormals %in% names(dots)]]
    if (!is.character(file) || !file.exists(file)){
        messg <- paste("keyRead: 'file' not found")
        stop(messg)
    } else {
        ## key is file name, so scan for suffix
        if (length(grep("xlsx$", tolower(file))) > 0){
            sheetNames <- getSheetNames(file)
            xlsxargs <- list(xlsxFile = file, sheet = "key", colNames = TRUE,
                             check.names = FALSE, na.strings = na.strings)
            xlsxargz <- modifyList(xlsxargs, dotsforxlsx)
            key <- do.call("readWorkbook", xlsxargz)
            ## Force columns to be of type "character"
            ## replace NAs with empty strings
            for(i in colnames(key)){
                if (!inherits(key[ , i], "character")) key[ , i] <- as.character(key[ , i])
                key[which(is.na(key[ ,i])), i] <- "."
            }
            if ("varlab" %in% sheetNames){
                xlsxargz[["sheet"]] <- "varlab"
                varlab.mat <- tryCatch(do.call("readWorkbook", xlsxargz),
                                       finally = NULL)
                varlab <- varlab.mat[ , "varlab"]
                names(varlab) <- varlab.mat[ , "name_new"]
                attr(key, "varlab") <- varlab
            }
        } else if (length(grep("csv$", tolower(file))) > 0){
            csvargs <- list(file = file, stringsAsFactors = FALSE,
                            colClasses = "character", na.strings = na.strings)
            csvargz <- modifyList(csvargs, dotsforcsv)
            key <- do.call("read.csv", csvargz)
            filevarlab <- gsub("csv$", "-varlab.csv", file)
            if (file.exists(filevarlab)){
                csvargs[["file"]] <- filevarlab
                varlab.mat <- do.call("read.csv", csvargz)
                varlab <- varlab.mat[ , "varlab"]
                names(varlab) <- varlab.mat[ , "name_new"]
                attr(key, "varlab") <- varlab
            }
        } else if (length(grep("rds$", tolower(file))) > 0){
            key <- readRDS(file)
        }
    }
    invisible(key)
}
NULL

##' Convert nothing to R missing(NA).
##'
##' By "nothing", we mean white space or other indications of
##' nothingness.  Goal is to find character strings that users
##' might insert in a key to indicate missing values. Those things,
##' which are given default values in the argument nothings, will be
##' changed to NA.
##'
##' Using regular expression matching, any value that has nothing
##' except for the indicated "nothing" values is converted to NA.  The
##' "nothing" values included by default are a period by itself (A SAS
##' missing value), an empty string, or white space, meaning " ", or
##' any number of spaces, or a tab.
##' @param x A character vector. If x is not a character vector, it is
##'     returned unaltered without warning.
##' @param nothings A vector of values to be matched by regular
##'     expressions as missing.  The default vector is c("\\.",
##'     "\\s"), where "\\." means a literal period (backslashes needed
##'     to escape the symbol which would otherwise match anything in a
##'     regular expression).
##' @param zapspace Should leading and trailing white space be
##'     ignored, so that, for example " . " and "." are both treated
##'     as missing.
##' @return A vector with "nothing" values replaced by R's NA symbol.
##'     Does not alter other values in the vector. Previous version
##'     had applied zapspace to non-missing values, but it no longer
##'     does so.
##' @author Paul Johnson <pauljohn@@ku.edu>
##' @export
##' @examples
##' gg <- c("", " ", "   ", "\t", "\t some", "some\t", " space first", ".",
##'        " . ")
##' n2NA(x = gg)
##' n2NA(x = gg, zapspace = FALSE)
##' n2NA(x = gg, nothings = c("\\s"), zapspace = FALSE)
##' n2NA(x = gg, nothings = c("\\."), zapspace = TRUE)
##' n2NA(x = gg, nothings = c("\\."), zapspace = FALSE)
n2NA <- function(x, nothings = c("\\.", "\\s"), zapspace = TRUE){
    if (!is.character(x)) {
        return(x)
    }
    if (!zapspace){
        for(j in seq_along(nothings)){
            x[grep(paste0("^", nothings[j], "*$"), x)] <- NA
        }
    } else {
        for(j in seq_along(nothings)){
            x[grep(paste0("^\\s*", nothings[j], "*\\s*$"), x)] <- NA
        }
    }
    x
}
NULL

##' Convert leading or trailing white space and tab characters to nothing.
##'
##' This eliminates any characters matched by the regular expression
##' `\\s` if they appear at the beginning of a string or at its
##' end. It does not alter spaces in the interior of a string. This
##' was created when I was not aware of R's \code{trimws} and the purpose
##' is the same. On our TODO list, we intend to eliminate this function
##' and replace its use with \code{trimws}
##' @param x A character vector
##' @return If x is a character vector, return is a character vector
##'     with leading and trailing white space values removed. If x is
##'     not a character vector, an unaltered x is returned.
##' @author Paul Johnson <pauljohn@@ku.edu>
##' @export
##' @examples
##' x <-  c("", " ", "   ", "\t", "\t some", "some\t", " space first")
##' zapspace(x)
zapspace <- function(x){
    if (!is.character(x)){
        return(x)
    }
    y <- gsub("^\\s*", "", x)
    y <- gsub("\\s*$", "", y)
    y
}
NULL


##' Import/validate a key object or import/validate a key from a file.
##'
##' After the researcher has updated the key by filling in new names
##' and values, we import that key file. This function can import the
##' file by its name, after deducing the file type from the suffix, or
##' it can receive a key object from memory.
##'
##' This can be either a wide or long format key file.
##'
##' This cleans up variables in following ways.  1) \code{name_old}
##' and \code{name_new} have leading and trailing spaces removed 2)
##' \code{value_old} and \code{value_new} have leading and trailing
##' spaces removed, and if they are empty or blank spaces, then new
##' values are set as NA.
##'
##' Policy change concerning empty "value_new" cells in input keys
##' (20170929).
##' 
##' There is confusion about what ought to happen in a wide key when
##' the user leaves value_new as empty or missing. Literally, this
##' means all values are converted to missing, which does not seem
##' reasonable. Hence, when a key is wide, and value_new is one of the
##' na.strings elements, we assume the value_new is to be copied
##' from value_old. That is to say, if value_new is not supplied,
##' the values remain same as in old data.
##'
##' In a long key, the behavior is different.  Since the user can
##' specify each value for a variable in a separate row, the na.strings
##' appearing in value_new are treated as missing scores in the new
##' data set to be created.
##' 
##' @param key A key object (class key or keylong) or a file name
##'     character string (ending in csv, xlsx or rds).
##' @param ignoreCase In the use of this key, should we ignore
##'     differences in capitalization of the "name_old" variable?
##'     Sometimes there are inadvertent misspellings due to changes in
##'     capitalization. Columns named "var01" and "Var01" and "VAR01"
##'     probably should receive the same treatment, even if the key
##'     has name_old equal to "Var01".
##' @param sep Character separator in \code{value_old} and
##'     \code{value_new} strings in a wide key. Default is are "|".
##'     It is also allowed to use "<" for ordered variables. Use
##'     regular expressions in supplying separator values.
##' @param na.strings Values that should be converted to missing data.
##'     This is relevant in \code{name_new} as well as
##'     \code{value_new}. In spreadsheet cells, we treat "empty" cells
##'     (the string ""), or values like "." or "N/A", as missing with
##'     defaults ".", "", "\\s" (white space), and "N/A". Change that
##'     if those are not to be treated as missings.
##' @param ... additional arguments for read.csv or read.xlsx.
##' @param keynames Don't use this unless you are very careful. In our
##'     current scheme, the column names in a key should be
##'     c("name_old", "name_new", "class_old", "class_new",
##'     "value_old", "value_new", "missings", "recodes"). If your key
##'     does not use those column names, it is necessary to provide
##'     keynames in a format "our_name"="your_name". For example,
##'     keynames = c(name_old = "oldvar", name_new = "newname",
##'     class_old = "vartype", class_new = "class", value_old =
##'     "score", value_new = "val").
##' @param missSymbol Defaults to period "." as missing value
##'     indicator.
##' @export
##' @return key object, should be same "wide" or "long" as the input
##'     Missing symbols in value_old and value_new converted to ".".
##' @author Paul Johnson <pauljohn@@ku.edu>
##' @examples
##' mydf.key.path <- system.file("extdata", "mydf.key.csv", package = "kutils")
##' mydf.key <-  keyImport(mydf.key.path)
##' ## Create some dupes
##' mydf.key <- rbind(mydf.key, mydf.key[c(1,7), ])
##' mydf.key2 <- keyImport(mydf.key)
##' mydf.key2
##' ## create some empty value_new cells
##' mydf.key[c(3, 5, 7) , "value_new"] <- ""
##' mydf.key3 <- keyImport(mydf.key)
##' mydf.key3
##' mydf.keylong.path <- system.file("extdata", "mydf.key_long.csv", package = "kutils")
##' mydf.keylong <- keyImport(mydf.keylong.path)
##'
##' ## testDF is a slightly more elaborate version created for unit testing:
##' testdf.path <- system.file("extdata", "testDF.csv", package = "kutils")
##' testdf <- read.csv(testdf.path, header = TRUE)
##' keytemp <- keyTemplate(testdf, long = TRUE)
##' ## A "hand edited key file"
##' keyPath <- system.file("extdata", "testDF-key.csv", package="kutils")
##' key <- keyImport(keyPath)
##' keydiff <- keyDiff(keytemp, key)
##' key2 <- rbind(key, keydiff$neworaltered)
##' key2 <- unique(key)
##' if(interactive())View(key2)
##' 
keyImport <- function(key, ignoreCase = TRUE,
                      sep = c(character = "\\|", logical = "\\|",
                              integer = "\\|", factor = "\\|",
                              ordered = "[\\|<]", numeric = "\\|")
                      , na.strings = c("\\.", "", "\\s+",  "N/A")
                      , missSymbol = "."
                      , ...
                      , keynames = NULL)
{
    if (is.character(key)) key <- keyRead(key)

    legalClasses = c("integer", "numeric", "double", "factor",
                     "ordered", "character", "logical")
    keynames.std <- c("name_old",
                      "name_new",
                      "class_old",
                      "class_new",
                      "value_old",
                      "value_new",
                      "missings",
                      "recodes")

    if(!is.null(keynames)){
        ## User supplied keynames, so reverse their keynames
        ## to put corrected names onto the  key
        colnames(key) <- mapvalues(colnames(key), from = keynames,
                                   to = names(keynames), warn_missing=FALSE)
    }
    ## Use partial matching to try to fix column names that editor
    ## may have altered accidentally. 
    ## If key does not include all of the expected
    ## names, this uses partial matching to replace the existing
    ## unrecognized names.
    if (any(!keynames.std %in% colnames(key))) { 
        key.oldnames <- colnames(key)
        ## Filter out oldnames that don't pmatch, rm the NAs
        key.oldnames.pmatches <- na.omit(key.oldnames[pmatch(key.oldnames, keynames.std)])
        keynames.std.pmatches <- keynames.std[pmatch(key.oldnames.pmatches, keynames.std)]
        colnames(key) <- mapvalues(colnames(key),
                                   from = key.oldnames.pmatches,
                                   to = keynames.std.pmatches)
    }
    ## Now, what to do if columns are missing?
    ## if no "name_old" "class_old", "value_old", then quit
    ## if "name_new", "class_new", "value_new" missing, then copy from old.
    ## if "missings" and "recodes" are missing, create new full of ""
    ## TODO: omit blank rows
    cols.required <- c("name_old", "class_old", "value_old")
    cols.copyable <- c("name_new", "class_new", "value_new")
    cols.blankable <- c("missings", "recodes")
    if(any(!cols.required %in% colnames(key))){
        MESSG <- paste0("Key is missing columns: ",
                 paste0(cols.required[!cols.required %in% colnames(key)], collapse = " "))
        stop(MESSG)
    }
    if(any(!cols.copyable %in% colnames(key))){
        cols.missing <- cols.copyable[!cols.copyable %in% colnames(key)]
        for(i in cols.missing) {
            j <- gsub("new$", "old", i)
            key[ , i] <- key[ ,j]
        }
    }
    if(any(!cols.blankable %in% colnames(key))){
        cols.missing <- cols.blankable[!cols.blankable %in% colnames(key)]
        for(i in cols.missing) {
            key[ , i] <- ""
        }
    }

    
   
    uniquifyNameNew <- function(key, long = FALSE){
        ## wide key easy!
        if(!long){
            name_new_orig <- key[ , "name_new"]
            key[ , "name_new"] <- make.names(key[ , "name_new"], unique = TRUE)
            if (sumdupes <- sum(key[ , "name_new"] != name_new_orig)){
                MESSG <- paste("keyImport: Wide key with duplicated name_new values.",
                               sumdupes, " name_new values were altered:\n")
                cat(MESSG)
                print(key[name_new_orig != key[ , "name_new"], c("name_old", "name_new")])
            }
            return(key[ , "name_new"])
        } else {
            ## long key, TODO 20171003: Difficult to get right for
            ## sure, so lets return an error now, think on it later.
            newindex <- paste0(key[ , "name_old"], "|",  key[ , "name_new"], "|",
                               key[ , "value_old"])
            dupes <- duplicated(newindex)
            if(any(dupes)){
                cat("keyImport: These are duplicated rows in the key:\n")
                print(key[dupes, c("name_old", "name_new", "value_old")])
                MESSG <- paste("keyImport terminated: long key format is incorrect.")
                stop(MESSG)
            }
            return(key[ , "name_new"])
        }
     }

    key.orig <- key

    ## Omit key rows in which name_new is a missing or na.string
    key <- key[!isNA(key$name_new), ]
    
    ## Deduce if this is a long key. If separators | < assume it is wide
    if (any(grepl("[\\|<]", key$value_old))) {
        ## There are separators, this is almost surely a wide key
        long <- FALSE
    } else {
        long <- TRUE
    }
    
    if (!long){
        MESSG <- paste("keyImport guessed that is a wide format key.\n")
        cat(MESSG)
        ## Use the unique-ified key now
        key[ , "name_new"] <- uniquifyNameNew(key, long = FALSE)
        ## If wide key and value_new is empty/missing, make it a copy of value_old.
        ## Otherwise, the missing treatment sets all values as missing, which is pointless.
        ismissing <- isNA( key[ , "value_new"])
        key[ismissing, "value_new"] <- key[ismissing , "value_old"]
        key <- wide2long(key, sep)
        keysplit <- split(key, key[ , "name_new"])
        valcheck <- vapply(keysplit, function(keyvar){
            keyclass <- unique(keyvar$class_new)
            isTRUE(checkCoercion(keyvar$value_new,  unique(keyclass),
                                 na.strings = na.strings))
        }, logical(1))
        if(any(!valcheck)){
            MESSG <- "value_new in key cannot be coerced to class_new\n"
            cat(MESSG)
            print(keysplit[!valcheck])
            MESSG <- 
            stop(MESSG)
        }
        
        rownames(key) <- NULL
    } else {
        key[ , "name_new"] <- uniquifyNameNew(key, long = TRUE)
        keyw <- long2wide(key)
        ## Use same trick as in import of wide key to replace empty value_new
        ## with value_old
        ismissing <- grepl(paste0("^", paste0(na.strings, collapse="$|^"), "$"), keyw[ , "value_new"])
        keyw[ismissing, "value_new"] <- keyw[ismissing , "value_old"]
        key <- wide2long(keyw)
    }
    
    ## For Logicals: If value_new is 1 or 0, convert to TRUE/FALSE symbols
    key[key$class_new == "logical" & !is.na(key$value_new) & key$value_new == 1, "value_new"] <- TRUE
    key[key$class_new == "logical" & !is.na(key$value_new) & key$value_new == 0, "value_new"] <- FALSE
    key[key$class_old == "logical" & !is.na(key$value_old) & key$value_old == 1, "value_old"] <- TRUE
    key[key$class_old == "logical" & !is.na(key$value_old) & key$value_old == 0, "value_old"] <- FALSE
    
    key$missings <- gsub("<-", "< -", key$missings, fixed = TRUE)
    
    ## protect against user-inserted spaces (leading or trailing)
    key$name_old <- zapspace(key$name_old)
    key$name_new <- zapspace(key$name_new)
    
    ## if recode or missings are not supplied, we get logicals.
    ## handle empty missing/recode columns
    if (all(is.na(key$missings))) key$missings <- character(length(key$missings))
    if (all(is.na(key$recodes))) key$recodes <- character(length(key$recodes))
    
    ## If name_new is any missing symbol, remove from key
    remove <- key$name_old[key$name_new %in% na.strings]
    key <- key[!key$name_old %in% remove,]

    if (length(unique(remove)) >= 1){
        MESSG <- paste0("keyImport: Delete entries for ",
                        length(remove),
                        "variables because name_new is an na.string.\n") 
        warning(MESSG)
    }

    ## if this is long key, following is safe. How about wide key?
    key$value_old <- trimws(key$value_old)
    key$value_new <- trimws(key$value_new)

    ## pj 20171002: following appears redundant
    ## key$value_old <- ifelse(is.na(key$value_old), missSymbol, key$value_old)
    ## key$value_new <- ifelse(is.na(key$value_new), missSymbol, key$value_new)
    key[key$value_new %in% na.strings, "value_new"] <- missSymbol
    key[key$value_old %in% na.strings, "value_old"] <- missSymbol
    
    ## Delete repeated rows:
    key <- key[!duplicated(key), ]

    if (any(!unique(key$class_new) %in% legalClasses)){
        messg <- paste("Unfamiliar class_new value\n",
                       paste(unique(key$class_new)[!unique(key$class_new) %in% legalClasses], collapse = ", ")
                       )
        warning(messg)
    }

    if (long){
        class(key) <- c("keylong", "data.frame")
        attr(key, "ignoreCase") <- ignoreCase
        attr(key, "na.strings") <- na.strings
        return(key)
    } else {
        keywide <- long2wide(key)
        class(keywide) <- c("key", "data.frame")
        attr(keywide, "ignoreCase") <- ignoreCase
        attr(keywide, "na.strings") <- na.strings
        return(keywide)
    }
    stop("keyImport should not reach this point")
}



##' Convert the variable key into a list of long keys, one for each variable.
##'
##' A keylist element is a long key for one new variable. The new
##' variables are named by "name_old.name_new" for uniqueness.
##'
##' If the key has one old variable being recoded 6 ways, that begets
##' 6 elements in the resulting list. Attributes including the classes
##' of the old and new variables are included.
##'
##' @param key A key object or a file name, csv, xlsx or rds.
##' @param sep Separator regular expressions
##' @param na.strings Strings that will be treated as NA. This will be
##'     used only if the key object does not have an na.strings
##'     attribute.
##' @keywords internal
##' @return A list with one element per variable name, along with some
##'     attributes like class_old and class_new. The class is set as
##'     well, "keylist"
##' @author Paul Johnson <pauljohn@@ku.edu>
## ##' @examples
## ##' mydf.key.path <- system.file("extdata", "mydf.key.csv", package = "kutils")
## ##' mydf.key <-  keyImport(mydf.key.path)
## ##' mydf.key.keylist <- makeKeylist(mydf.key)
## ##' mydf.keylong.path <- system.file("extdata", "mydf.key_long.csv", package = "kutils")
## ##' mydf.keylong <- keyImport(mydf.keylong.path)
## ##' mydf.keylong.keylist
makeKeylist <- function(key,
                        sep = c(character = "\\|", logical = "\\|",
                              integer = "\\|", factor = "\\|",
                              ordered = "[\\|<]", numeric = "\\|"),
                        na.strings = c("\\.", "", "\\s+",  "N/A"))
{
    ## if x is in na.strings, return NA
    ## if "split" is NA or blank space or TAB, return unchanged x
    strsplit2 <- function(x, split, fixed = FALSE, perl = FALSE, useBytes = FALSE){
        if (is.na(n2NA(zapspace(split)))) return(x)
        if (is.na(n2NA(zapspace(x)))) return(NA)
        strsplit(x, split, fixed = fixed, perl = perl, useBytes = useBytes)
    }

    ## if it is already a key list, return with no changes
    if (inherits(key,"keylist")) return(key)
    if (is.character(key)) key <- keyImport(key)
    if (inherits(key, "key")){
        long <- FALSE
    } else if (inherits(key, "keylong")){
        long <- TRUE
    }
    ## Allow arguments to override na.strings from key?
    if (missing(na.strings) && !is.null(attr.na.strings <- attr(key, "na.strings"))){
        na.strings <- attr.na.strings
    }
    if (!long){
        key <- wide2long(key)
    }
    
    ## like unique, but it throws away white space, NA, and
    ## fails if there is more than one unique nonmissing value
    unique.one <- function(x){
        y <- unique(na.omit(n2NA(zapspace(x))))
        if (length(y) != 1){
            messg <- paste("Value of", deparse(substitute(x)), "not unique")
            stop (messg)
        }
        y
    }

    ## Clean up the "name_new" column. Danger that 2 name_new
    ## values are same for different variables, but they must be
    ## unique named columns.
    name_new.unique <- unique(key$name_new)
    name_new.clean <- make.names(name_new.unique, unique = TRUE)
    names(name_new.clean) <- name_new.unique
    key[ , "name_new"] <- name_new.clean[key[, "name_new"]]
    
    ## Create a keylist member for a given key chunk
    makeOneVar <- function(keyds){
        name_old <- unique.one(keyds$name_old)
        name_new <- unique.one(keyds$name_new)
        class_old <- unique.one(keyds$class_old)
        class_new <- unique.one(keyds$class_new)
        recodes <- unique(na.omit(n2NA(zapspace(keyds$recodes))))
        recodes <- if(length(recodes) > 0) unlist(strsplit(recodes, split=";", fixed = TRUE))
        missings <- paste(na.omit(n2NA(zapspace(keyds$missings))), collapse = ";")
        value_new <- keyds$value_new
        value_old <- keyds$value_old
        if (class_old == "logical") {
            value_old[value_old == "TRUE"] <- TRUE
            value_old[value_old == "FALSE"] <- FALSE
        }
        if (class_new == "logical") {
            value_new[value_new == "TRUE"] <- TRUE
            value_new[value_new == "FALSE"] <- FALSE
        }
        value_new[isNA(value_new, na.strings)] <- NA
        value_old[isNA(value_old, na.strings)] <- NA
        ## If not a factor, cast new values with class_new. Don't do this to
        ## factors, though, leave as text
        if (!class_new %in% c("factor", "ordered") && class(value_new) != class_new){
            mytext <- paste0("value_new <- as.", class_new, "(value_new)")
            value_new <- eval(parse(text = mytext))
        }
        values <- data.frame("value_old" = value_old,
                             "value_new" = value_new, stringsAsFactors = FALSE)
        list(name_old = name_old, name_new = name_new,
             class_old = class_old, class_new = class_new,
             values = values,  missings = missings, recodes = recodes)
    }
    
    ## Make this a factor and control the ordering of the levels. Otherwise,
    ## split applies factor() and re-alphabetizes it.
    name_old.new <- paste0(key[ , "name_old"], ".", key[ , "name_new"])
    name_old.new <- factor(name_old.new, levels = unique(name_old.new))
    keysplit <- split(key, name_old.new, drop = FALSE)
    keylist <- lapply(keysplit, makeOneVar)

    attr(keylist, "na.strings") <- na.strings
    attr(keylist, "class_old") <- sapply(keylist, function(keyds) keyds$class_old)
    attr(keylist, "class_new") <- sapply(keylist, function(keyds) keyds$class_new)
    class(keylist) <- "keylist"
    keylist
}
NULL




##' Apply variable key to data frame (generate recoded data frame)
##'
##' This is the main objective of the variable key system.
##' @param dframe An R data frame
##' @param key A variable key object, of class either "key" or
##'     "keylong"
##' @param diagnostic Default TRUE: Compare the old and new data
##'     frames carefully with the keyDiagnostic function.
##' @param drop Default TRUE. True implies drop = c("vars",
##'     "vals"). TRUE applies to both variables ("vars") and values
##'     ("vals"). "vars" means that a column will be omitted from data
##'     if it is not in the key "name_old". Similarly, if anything
##'     except "." appears in value_old, then setting drop="vals"
##'     means omission of a value from key "value_old" causes
##'     observations with those values to become NA.  This is the
##'     original variable key behavior.  The drop argument allows
##'     "partial keys", beginning with kutils version 1.12. drop =
##'     FALSE means that neither values nor variables are omitted.
##'     Rather than TRUE, one can specify either drop = "vars", or
##'     drop = "vals".
##' @param safeNumericToInteger Default TRUE: Should we treat values
##'     which appear to be integers as integers? If a column is
##'     numeric, it might be safe to treat it as an integer.  In many
##'     csv data sets, the values coded c(1, 2, 3) are really
##'     integers, not floats c(1.0, 2.0, 3.0). See \code{safeInteger}.
##' @param trimws Default is "both", can change to "left", "right", or
##'     set as NULL to avoid any trimming.
##' @param ignoreCase Default TRUE. If column name is capitalized
##'     differently than name_old in the key, but the two are
##'     otherwise identical, then the difference in capitalization
##'     will be ignored.
##' @param debug Default FALSE. If TRUE, emit some warnings.
##' @return A new data.frame object, with renamed and recoded variables
##' @author Paul Johnson <pauljohn@@ku.edu>
##' @export
##' @importFrom plyr mapvalues
##' @examples
##' mydf.key.path <- system.file("extdata", "mydf.key.csv", package = "kutils")
##' mydf.key <-  keyImport(mydf.key.path)
##' mydf.path <- system.file("extdata", "mydf.csv", package = "kutils")
##'
##' mydf <- read.csv(mydf.path, stringsAsFactors = FALSE)
##' mydf2 <- keyApply(mydf, mydf.key)
##'
##' nls.keylong.path <- system.file("extdata", "natlongsurv.key_long.csv", package = "kutils")
##' nls.keylong <- keyImport(nls.keylong.path, long = TRUE)
##' data(natlongsurv)
##' nls.dat <- keyApply(natlongsurv, nls.keylong)
##'
keyApply <- function(dframe, key, diagnostic = TRUE,
                     safeNumericToInteger = TRUE, trimws = "both",
                     ignoreCase = TRUE,
                     drop = TRUE, debug = FALSE)
{
    legalClasses <- c("integer", "numeric", "double", "factor",
                      "ordered", "character", "logical")
    
    if(is.na(drop)|| is.null(drop)) stop("keyApply: drop argument is NA.")
    if (is.character(drop)){
        stopifnot(drop %in%  c("vars","vals"))
    }
    if (is.logical(drop)){
        if(drop) drop <- c("vars", "vals")
    }

    if (class(dframe)[1] != "data.frame"){
        MESSG <- paste("Warning: keyApply is intended for an R *data.frame*,",
                       "not subclasses like tibbles or data.tables.",
                       "keyApply coerced your input",
                       "with as.data.frame().\n")
        if (inherits(dframe, "data.frame")){
            ## Coerce without warning, but print message
            cat(MESSG)
        } else {
            ## Elevate the message to a warning
            warning(MESSG)
        }
        dframe <- as.data.frame(dframe)
    }
    dframe <- cleanDataFrame(dframe, safeNumericToInteger = safeNumericToInteger,
                             trimws = trimws)
    if (diagnostic) dforig <- dframe

    ## Need to snapshot class of input variables
    class_old.dframe <- sapply(dframe, function(x) class(x)[1])

    ## implement ignoreCase by keeping vector dfname_old.orig that we
    ## can use later to put old names back onto data frame.
    ## If key has multiple entries that are identcal after tolower(), will use
    ## first one.
    ## Keep vector of original names. If ignoreCase=FALSE, this changes nothing.
    dfname_old.orig <- colnames(dframe)
    if (ignoreCase){
        colnames(dframe) <- tolower(colnames(dframe))
    }
    names(dfname_old.orig) <- colnames(dframe)
    ## Create vector to use for re-assignment later
    names(class_old.dframe) <- colnames(dframe)

    na.strings <- attr(key, "na.strings")
    ## TODO: figure out what to do if class_old does not match input data
    ## coerce existing column to type requested in data frame?

    ## keylist: a list of long keys, one per variable
    keylist <- makeKeylist(key)
    ## list for collecting new variables.
    xlist <- list()

    names_in_key <- unique(key$name_old)
    if (ignoreCase) names_in_key <- tolower(names_in_key)
    names_not_in_key <- setdiff(colnames(dframe), names_in_key)

    ## B/c "vars" in drop, copy over the variables that are
    ## not in the key.
    if(length(names_not_in_key) && (!"vars" %in% drop)){
        for(jj in names_not_in_key){
            mytext <- paste0("xlist[[\"", jj, "\"]] <- ", "dframe[ , jj]")
            eval(parse(text = mytext))
        }
    }

    ## Process variables in keylist, put into "xlist"
    for (v in keylist) {
        if(debug){
            print(paste("\n debug"))
            print(v)
        }
        ## Extract values for convenience
        values <- v$values
        oldVals <- v$values$value_old
        newVals <- v$values$value_new
        ## keep spare copy of original name
        v$name_old.orig <- v$name_old
        if(ignoreCase) v$name_old <- tolower(v$name_old)

        ## TODO: what if class_old does not match class of imported
        ## data?  Need to think through implications of doing something like
        ## xnew <- as(xnew, class_old)

        ## If variable name from key is not in the data frame, go to next variable.
        if (!v$name_old %in% colnames(dframe)){
            messg <- paste("keyApply: ", v$name_old.orig, "not in data.\n")
            cat(messg)
            next()
        }

        ## DELETE VARIABLES from data frame
        ## If name_new is empty, then ignore that variable
        if (length(v$name_new) == 0 || isTRUE(isNA(unique(v$name_new)))){
            messg <- paste("keyApply: ", v$name_old.orig, "dropped.\n")
            cat(messg)
            next()
        }
        ## Extract candidate variable to column, will recode xnew.
        xnew <- dframe[ , v$name_old]

        ## Apply missing codes
        if (length(v$missings) > 0){
            xnew <- assignMissing(xnew, v$missings)
        }

        ## Be simple. If they have "recodes" in key, apply them.
        ## Ignore value_new. next().
        if (length(v$recodes) > 0 && !all(is.na(v$recodes))) {
            for (cmd in v$recodes) xnew <- assignRecode(xnew, cmd)
            if(!inherits(xnew, v$class_new)) {
                messg <- paste("keyApply: the key's recode function for the variable",
                               v$name_old, " did generate correct output class")
                print(v)
                stop(messg)
            }
            mytext <- paste0("xlist[[\"", v$name_new, "\"]] <- ", "xnew")
            eval(parse(text = mytext))
            next()
        }

        ## value_old and value_new are full of only NA, so
        ## only perform direct class conversions
        if (NROW(na.omit(values)) == 0 || length(na.omit(newVals)) == 0) {
            ## if coercion not safe, stop
            if(!isTRUE(checkCoercion(xnew, unique(v$class_new), na.strings))){
                print(v)
                stop("keyApply: coercion failed, ", unique(v$name_old.orig))
            }
            if (v$class_new %in% c("factor", "ordered")){
                # no direct conversion to factors
                xnew1 <- factor(xnew, ordered=(v$class_new == "ordered"))  
            } else {
                eval(parse(text=paste0("xnew1 <- ", "as.", v$class_new, "(xnew)")))
            }
            mytext <- paste0("xlist[[\"", v$name_new, "\"]] <- ", "xnew1")
            eval(parse(text = mytext))
            next()
        }

        ## if vals is drop, following SHOULD set as missing scores in xnew that
        ## are not in the key 
        if("vals" %in% drop){
            xnew[!xnew %in% values$value_old] <- NA
            ## pj 20171006 the factor gotca killed this:
            ## xnew <- ifelse(xnew %in% values$value_old, xnew,  NA)
        }

        ## If output is ordered or factor
        if(length(v$class_new) > 0 && v$class_new %in% c("ordered", "factor")){
            xnew2 <- plyr::mapvalues(xnew, from = values$value_old,
                                     to = values$value_new, warn_missing = FALSE)
            xnew2 <- factor(xnew2, levels = unique(values$value_new), 
                            ordered=(v$class_new == "ordered"))
            mytext <- paste0("xlist[[\"", v$name_new, "\"]] <- xnew2")
            eval(parse(text = mytext))
            next()
        }

        ## if class from data frame is not same as class_old, then MUST cast
        ## as correct type. Could cast as character.
        if ((class(xnew)[1] != v$class_old) ||
           (v$class_old %in% c("ordered", "factor"))) {
            if(!isTRUE(checkCoercion(unique(xnew), unique(v$class_old), na.strings))){
                MESSG <- paste("keyApply:", unique(v$name_old.orig),
                               "input data cannot be coerced to original class.")
                print(v)
                stop(MESSG)
            }
            xnew.orig <- xnew
            if(v$class_old %in% c("ordered", "factor")){
                ## creates factor with levels in value_old
                mytext1 <- paste0("xnew <- ", v$class_old, "(xnew, values$value_old)")
            } else {
                ## coerce to class_old
                mytext1 <- paste0("xnew <- as.", v$class_old, "(xnew)")
            }
            eval(parse(text = mytext1))
        }
        
        ##Class stays same, use mapvalues, only on values that differ:
        if (classsame <- v$class_new == v$class_old)
        {
            ## keep only rows where value_old and value_new differ
            values <- values[!mapply(identical, values$value_old,
                                     values$value_new), ]
            xnew <- plyr::mapvalues(xnew, values[ , "value_old"],
                                    values[ ,"value_new"],
                                    warn_missing = FALSE)
            mytext <- paste0("xlist[[\"", v$name_new, "\"]] <- xnew")
            eval(parse(text = mytext))
            next()
        }
        
        ## Special treatment to convert level values to integer/numeric values
        if (v$class_old %in% c("factor", "ordered") &&
            v$class_new %in% c("integer", "numeric", "double"))
        {
            if(!isTRUE(checkCoercion(values$value_new, unique(v$class_new), na.strings))){
                ## Value new cannot be coerced numeric, should stop
                MESSG <- paste("key error:", v$name_old.orig, "value_new not consistent with class_new:\n")
                print(v)
                stop(MESSG)
            }
            if(!isTRUE(checkCoercion(xnew, unique(v$class_new), na.strings))){
                ## Value new cannot be coerced numeric, should stop
                MESSG <- paste("key error: values of", v$name_old.orig,
                               "don't match class_new:\n")
                print(v)
                stop(MESSG)
            }
            
            ## otherwise continue
            xnew2 <- plyr::mapvalues(xnew, values[ , "value_old"],
                                     values[ , "value_new"],
                                     warn_missing = FALSE)
            if(!isTRUE(checkCoercion(levels(xnew2), unique(v$class_new), na.strings))){
                MESSG <- paste("keyApply value error:", v$name_old, "levels don't match class_new.")
                print(v)
                warning(MESSG)
            }
            xnew3 <- as(levels(xnew2)[xnew2], v$class_new)
            mytext <- paste0("xlist[[\"", v$name_new, "\"]] <- xnew3")
            eval(parse(text = mytext))
            next()
        }
        
        ## convert for variables with new mapping of value_old to value_new
        ## need to run this if values differ or one is NA but other not
        valueDiff <- v$values$value_old != v$values$value_new
        if (any(valueDiff, na.rm = TRUE) || any(is.na(valueDiff))) {
            xnew <- plyr::mapvalues(xnew, values$value_old, values$value_new, warn_missing = FALSE)
            mytext1 <- paste0("xnew2 <- as.", v$class_new, "(xnew)")
            eval(parse(text = mytext1))
            mytext <- paste0("xlist[[\"", v$name_new, "\"]] <- xnew2")
            eval(parse(text = mytext))
            next()
        }
        print(v)
        MESSG <- paste("keyApply:", v$name_old, "Logic error in value mapping.")
        stop(MESSG)
    }
    
    ## How to pass stringsAsFactors=FALSE as argument? Only way is
    ## run through environment?
    stringsAsFactors.orig <-  unname(unlist(options("stringsAsFactors")))
    options(stringsAsFactors = FALSE)
    dframe <- do.call(data.frame, xlist)
    options(stringsAsFactors = stringsAsFactors.orig )
    if(diagnostic) keyDiagnostic(dforig, dframe, keylist)
    dframe
}
NULL

##' Diagnose accuracy of result from applying variable key to data
##'
##' Compare the old and new data frames, checking for accuracy of
##' calculations in various ways.
##' 
##' CAUTION: This can print WIDE matrices.  Because the on-screen
##' output will be WIDE, make the display window WIDE!
##'
##' Crosstabulate new variable versus old variable to see the coding
##' mismatches.  For tables of up to 10 values or so, that will be
##' satisfactory.
##'
##' For numeric variables, it appears there is no good thing to do
##' except possibly to re-apply any transformations.
##' 
##' @param dfold Original data frame
##' @param dfnew The new recoded data frame
##' @param keylist The imported variable key that was used to
##'     transform dfold into dfnew.
##' @param max.values Show up to this number of values for the old
##'     variable
##' @param nametrunc Truncate column and row names. Needed if there
##'     are long factor labels and we want to fit more information on
##'     table. Default = 18 for new name, old name is 10 more
##'     characters (18 + 10 = 28).
##' @param wide Number of characters per row in printed
##'     output. Suggest very wide screen, default = 200.
##' @param confidential Should numbers in table be rounded to nearest
##'     "10" to comply with security standard enforced by some
##'     American research departments.
##' @return NULL
##' @author Paul Johnson <pauljohn@@ku.edu>
keyDiagnostic <- function(dfold, dfnew, keylist, max.values = 20,
                          nametrunc = 18, wide = 200, confidential = FALSE)
{

    ## TODO if class in dfnew does not match keylist specification, fail

    ## TODO think more deeply on warning signs of bad recoding.
    ## other summary of match and mismatch.

    width.orig <- options("width")
    options(width = wide)
    if (confidential) {
        roundAt <- -1
    } else {
        roundAt <- 2
    }
    for (v in keylist){
        if (is.na(v$name_new) ||(length(v$name_new) == 0) || (!v$name_new %in% colnames(dfnew))){
            messg <- paste("Variable", v$name_new, "is not included in the new data frame")
            next()
        }
        if (length(unique(dfnew[ , v$name_new])) <= max.values){
            name_new.trunc <- substr(v$name_new, 1, min(nchar(v$name_new), nametrunc))
            name_old.trunc <- paste0(substr(v$name_old, 1, min(nchar(v$name_old), nametrunc + 10)), " (old var)")
            print(round(table(dfnew[ , v$name_new], dfold[ , v$name_old],
                              exclude = NULL, dnn = c(name_new.trunc, name_old.trunc)), roundAt))
        } else {
            messg <- paste("Variable", v$name_new, "has", max.values,
                           "unique values.", "Too large for a table.")
            print(messg)
        }
    }
    options(width = unlist(width.orig))
    NULL
}


##' Convert a key object from wide to long format
##'
##' This is not flexible, assumes columns are named in our canonical
##' style, which means the columns are named c("name_old", "name_new",
##' "class_old", "class_new", "value_old", "value_new").
##' @param key A variable key in the wide format
##' @param sep Default separator is the pipe, "\\|" for most
##'     variables, while ordered accepts pipe or less than, "\\|<". If
##'     the key did not follow those customs, other sep values may be
##'     specified for each variable class.
##' @return A long format variable key
##' @export
##' @author Paul Johnson <pauljohn@@ku.edu>
##' @examples
##' mydf.path <- system.file("extdata", "mydf.csv", package = "kutils")
##' mydf <- read.csv(mydf.path, stringsAsFactors=FALSE)
##' ## Target we are trying to match:
##' mydf.keylong <- keyTemplate(mydf, long = TRUE, sort = FALSE)
##' 
##' mydf.key <- keyTemplate(mydf)
##' mydf.keywide2long <- wide2long(mydf.key)
##'
##' ## rownames not meaningful in long key, so remove in both versions
##' row.names(mydf.keywide2long) <- NULL
##' row.names(mydf.keylong) <- NULL
##' all.equal(mydf.keylong, mydf.keywide2long)
wide2long <- function(key, sep = c(character = "\\|", logical = "\\|",
                              integer = "\\|", factor = "\\|",
                              ordered = "[\\|<]", numeric = "\\|"))
{
    makeOneVar <- function(x){
        value_old <- unlist(strsplit(x$value_old, sep[x$class_old]))
        value_new <- unlist(strsplit(x$value_new, sep[x$class_new]))
        if (length(value_old) == 0) value_old <- as.character("")
        if (length(value_new) == 0) value_new <- as.character("")
        values <- cbind(value_old, value_new)
        values <- unique(values)
        ## missings <- na.omit(unlist(strsplit(x$missings, ";")))
        missings <- c(x$missings, rep("", NROW(values) - length(x$missings)))
        ## recodes <- na.omit(unlist(strsplit(x$recodes, ";")))
        recodes <- c(x$recodes, rep("", NROW(values) - length(x$recodes)))

        zz <- data.frame(name_old = x$name_old,
                         name_new = x$name_new,
                         class_old = x$class_old,
                         class_new = x$class_new,
                         value_old = values[ , "value_old"],
                         value_new = values[ , "value_new"],
                         missings = missings,
                         recodes = recodes, stringsAsFactors = FALSE)
        zz <- sortStanza(zz)
        zz
    }

    ## Unique-ify name_new
    key[ , "name_new"] <- make.names(key[ , "name_new"], unique = TRUE)
    
    ## keysplit
    name_old.new <- paste0(key[ , "name_old"], ".", key[ , "name_new"])
    name_old.new <- factor(name_old.new, levels = unique(name_old.new))
    ks <- split(key, name_old.new, drop = TRUE)
    ## build a "long stanza" for each variable
    ksl <- lapply(ks, makeOneVar)

    keylong <- do.call(rbind, lapply(ksl, as.data.frame, stringsAsFactors = FALSE))
    attr(keylong, "na.strings") <- attr(key, "na.strings")
    attr(keylong, "varlab") <- attr(key, "varlab")
    class(keylong) <- c("keylong", "data.frame")
    keylong
}



##' convert a key object from long to wide format
##'
##' ##' This is not flexible, assumes columns are named in our canonical
##' style, which means the columns are named c("name_old", "name_new",
##' "class_old", "class_new", "value_old", "value_new").
##' @param keylong A variable key in the long format
##' @param na.strings Strings to be treated as missings in value_new
##' @param missSymbol Default is ".", character to insert in value when R NA is found.
##' @return A wide format variable key
##' @export
##' @author Paul Johnson <pauljohn@@ku.edu>
##' @examples
##' mydf.path <- system.file("extdata", "mydf.csv", package = "kutils")
##' mydf <- read.csv(mydf.path, stringsAsFactors=FALSE)
##' ## A wide key we are trying to match:
##' mydf.key <- keyTemplate(mydf, long = FALSE, sort = TRUE)
##' mydf.key["x4", "missings"] <- "999"
##' ## A long key we will convert next
##' mydf.keylong <- keyTemplate(mydf, long = TRUE, sort = TRUE)
##' mydf.keylong[mydf.keylong[ , "name_old"] == "x4" &
##'     mydf.keylong[ , "value_old"] == "999", "missings"] <- "999"
##' mydf.long2wide <- long2wide(mydf.keylong)
##' all.equal(mydf.key, mydf.long2wide)
##' 
##' mydf.keylong.path <- system.file("extdata", "mydf.key_long.csv", package = "kutils")
##' mydf.keylong <- keyImport(mydf.keylong.path)
##' mydf.keywide <- long2wide(mydf.keylong)
##' mydf.keylong2 <- wide2long(mydf.keywide)
##' ## Is error if following not TRUE
##' all.equal(mydf.keylong2, mydf.keylong)
##' 
long2wide <- function(keylong, na.strings = c("\\.", "", "\\s+",  "N/A"),
                      missSymbol = "."){
    name_old.new <- paste0(keylong[ , "name_old"], ".", keylong[ , "name_new"])
    name_old.new <- factor(name_old.new, levels = unique(name_old.new))
    ##kls = keylong split.
    ## 20161215: why didn't I use keylist maker here?
    keylong[is.na(keylong[ , "value_new"]), "value_new"] <- missSymbol
    keylong[is.na(keylong[ , "value_old"]), "value_old"] <- missSymbol

    kls <- split(keylong, name_old.new, drop = TRUE)

    makeOneWide <- function(x){
        sep_old <- if("ordered" %in% unique(x$class_old)) "<" else "|"
        sep_new <- if("ordered" %in% unique(x$class_new)) "<" else "|"
        ## Replace "" with NA, then get rid of NAs
        if(is.null(x$missings) || all(is.na(x$missings))){
            missings <- ""
        } else {
            missings <- n2NA(unique(x$missings))
            missings <- if (all(is.na(missings))) "" else na.omit(missings)
        }
        if(is.null(x$recodes) || all(is.na(x$recodes))) {
            recodes <- ""
        } else{
            recodes <- n2NA(unique(x$recodes))
            recodes <- if (all(is.na(recodes))) "" else na.omit(recodes)
        }
        values <- cbind(value_old = x$value_old, value_new = x$value_new)
        values <- unique(values)
        values <- sortStanza(values)
        value_new.ismissing <- isNA(values[ , "value_new"])
                   
        if(any(!na.omit(value_new.ismissing))){
            newvalues <- paste(values[ , "value_new"], collapse = sep_new)
        } else {
            newvalues <- "."
        }
        list(name_old = unique(x$name_old),
             name_new = unique(x$name_new),
             class_old = unique(x$class_old),
             class_new = unique(x$class_new),
             value_old = paste(values[ , "value_old"], collapse = sep_old),
             value_new = newvalues,
             missings = paste(missings, collapse = ";"),
             recodes =  paste(recodes, collapse = ";"))
    }

    keywide <- lapply(kls, makeOneWide)

    key <- do.call("rbind", lapply(keywide, data.frame, stringsAsFactors = FALSE))
    class(key) <- c("key", "data.frame")
    attr(key, "na.strings") <- attr(keylong, "na.strings")
    attr(key, "varlab") <- attr(keylong, "varlab")
    key
}

##' An all.equal method for variable wide keys
##'
##' Disregards attributes by defaults. Before comparing the two keys,
##' the values are sorted by \code{"name_new")}.
##' @author Paul E. Johnson <pauljohn@@ku.edu>
##' @method all.equal key
##' @param target A wide variable key
##' @param current A wide variable key
##' @param ... Other arguments that are ignored
##' @param check.attributes Default FALSE
##' @export
all.equal.key <- function(target, current, ..., check.attributes = FALSE){
    target <- target[order(target[ , "name_new"]), ]
    current <- current[order(current[ , "name_new"]), ]
    reslt <- base::all.equal.list(target, current, check.attributes = FALSE)
    reslt
}


##' An all.equal method for variable long keys
##'
##' Disregards attributes by defaults. Before comparing the two keys,
##' the values are sorted by \code{"name_new")}.
##' @author Paul E. Johnson <pauljohn@@ku.edu>
##' @method all.equal keylong
##' @export
##' @param target A long variable key
##' @param current A long variable key
##' @param ... Other arguments that are ignored
##' @param check.attributes Default FALSE
all.equal.keylong <- function(target, current, ..., check.attributes = FALSE){
    target <- target[order(target[ , "name_new"],
                           target[ , "value_old"],
                           target[ , "value_new"]), ]
    current <- current[order(current[ , "name_new"],
                             current[ , "value_old"],
                             current[ , "value_new"]), ]
    reslt <- base::all.equal.list(target, current, check.attributes = FALSE)
    reslt
}


##' Update a key in light of a new data frame (add variables and
##' values)
##'
##' The following chores must be handled.
##' 1. If the data.frame has variables which are not currently
##' listed in the variable key's "name_old" variable, then new
##' variables are added to the key.
##' 2. If the data.frame has new values for the previously
##' existing variables, then those values must be added to the
##' keys.
##' 3. If the old key has "name_new" or "class_new" designated
##' for variables, those MUST be preserved in the new key
##' for all new values of those variables.
##'
##' This function will not alter key values for "class_old",
##' "value_old" or "value_new" for variables that have no new
##' information.
##'
##' This function deduces if the key provided is in the wide or long
##' format from the class of the object.
##' @param key A variable key
##' @param dframe A data.frame object.
##' @param append If long key, should new rows be added to the end of
##'     the updated key? Default is TRUE. If FALSE, new rows will be
##'     sorted with the original values.
##' @param safeNumericToInteger Default TRUE: Should we treat
##'     variables which appear to be integers as integers? In many csv
##'     data sets, the values coded \code{c(1, 2, 3)} are really
##'     integers, not floats \code{c(1.0, 2.0, 3.0)}. See
##'     \code{safeInteger}.
##' ## Need to consider implementing this:
##' ## @param ignoreCase
##' @export
##' @return Updated variable key.
##' @importFrom plyr rbind.fill
##' @author Ben Kite <bakite@ku.edu>
##' @examples
##' ## Original data frame has 2 variables
##' dat1 <- data.frame("Score" = c(1, 2, 3, 42, 4, 2),
##'                    "Gender" = c("M", "M", "M", "F", "F", "F"))
##' ## New data has all of original dat1, plus a new variable "Weight"
##' #and has new values for "Gender" and "Score"
##' dat2 <- plyr::rbind.fill(dat1, data.frame("Score" = 7,
##'            "Gender" = "other", "Weight" = rnorm(3)))
##' ## Create a long key for the original data, specify some
##' ## recodes for Score and Gender in value_new
##' key1.long <- keyTemplate(dat1, long = TRUE, varlab = TRUE)
##' 
##' key1.long$value_new <- gsub("42", "10", key1.long$value_new)
##' key1.long$value_new[key1.long$name_new == "Gender"] <-
##'        mgsub(c("F", "M"), c("female", "male"),
##'        key1.long$value_new[key1.long$name_new == "Gender"])  
##' key1.long[key1.long$name_old == "Score", "name_new"] <- "NewScore"
##' keyUpdate(key1.long, dat2, append = TRUE)
##' ## Throw away one row, make sure key still has Score values
##' dat2 <- dat2[-1,]
##' (key1.long.u <- keyUpdate(key1.long, dat2, append = FALSE))
##' ## Key change Score to character variable
##' key1.longc <- key1.long
##' key1.longc[key1.longc$name_old == "Score", "class_new"] <- "character"
##' keyUpdate(key1.longc, dat2, append = TRUE)
##' str(dat3 <- keyApply(dat2, key1.longc))
##' ## Now try a wide key
##' key1.wide <- keyTemplate(dat1)
##' ## Put in new values, same as in key1.long
##' key1.wide[key1.wide$name_old == "Score", c("name_new", "value_new")] <-
##'                             c("NewScore", "1|2|3|4|10|.")
##' key1.wide[key1.wide$name_old == "Gender", "value_new"] <- "female|male|."
##' ## Make sure key1.wide equivalent to key1.long:
##' ## If this is not true, it is a fail
##' all.equal(long2wide(key1.long), key1.wide, check.attributes = FALSE)
##' (key1.wide.u <- keyUpdate(key1.wide, dat2))
##' key1.long.to.wide <- long2wide(key1.long.u)
##' all.equal(key1.long.to.wide, key1.wide.u, check.attributes = FALSE)
##' str(keyApply(dat2, key1.wide.u))
##' 
##' mydf.key.path <- system.file("extdata", "mydf.key.csv", package = "kutils")
##' mydf.key <-  keyImport(mydf.key.path)
##' ##'
##' set.seed(112233)
##' N <- 20
##' ## The new Jan data arrived!
##' mydf2 <- data.frame(x5 = rnorm(N),
##'                     x4 = rpois(N, lambda = 3),
##'                     x3 = ordered(sample(c("lo", "med", "hi"),
##'                                        size = N, replace=TRUE),
##'                                 levels = c("med", "lo", "hi")),
##'                     x2 = letters[sample(c(1:4,6), N, replace = TRUE)],
##'                     x1 = factor(sample(c("jan"), N, replace = TRUE)),
##'                     x7 = ordered(letters[sample(c(1:4,6), N, replace = TRUE)]),
##'                     x6 = sample(c(1:5), N, replace = TRUE),
##'                     stringsAsFactors = FALSE)
##' mydf.key2 <- keyUpdate(mydf.key, mydf2)
##' mydf.key2
##' mydf.key2["x1", "value_old"] <- "cindy|bobby|jan|peter|marcia|greg|."
##' mydf.key2["x1", "value_new"] <- "Cindy<Bobby<Jan<Peter<Marcia<Greg<."
##' ##'
##' mydf.key.path <- system.file("extdata", "mydf.key.csv", package = "kutils")
##' mydf.path <- system.file("extdata", "mydf.csv", package = "kutils")
##' mydf <- read.csv(mydf.path, stringsAsFactors=FALSE)
##' mydf3 <- rbind(mydf, mydf2)
##' ## Now recode with revised key
##' mydf4 <- keyApply(mydf3, mydf.key2)
##' rockchalk::summarize(mydf4)
keyUpdate <- function(key, dframe, append = TRUE,
                      safeNumericToInteger = TRUE)
{
    ## it is a long key, or convert it into one
    long <- TRUE
    if (class(key)[1] == "key") {
        key <- wide2long(key)
        long <- FALSE ## key in was wide
    }
    if (class(key)[1] != "keylong") {
        messg <- paste("The key object is from the wrong class.")
        stop(messg)
    }

    dframe <- cleanDataFrame(dframe, safeNumericToInteger = safeNumericToInteger)
    keynew <- keyTemplate(dframe, long = TRUE)
    if (isTRUE(all.equal(keynew, key))){
        return(key)
    }
   
    ## pj 20170929 BIG QUESTION. If keyDiff is correct (still dont know),
    ## can't we use it, and then just take the neworaltered object and
    ## append to old key?  We never want to delete rows from key, just
    ## add new ones.  Maybe then need check for contradictions.

    ## CHECK: what does "long2wide" do when rows in a long key are
    ## "shuffled" or if the new values all exist at end of long key.

    nameval.old <- paste0(key$name_old, key$value_old)
    nameval.new <- paste0(keynew$name_old, keynew$value_old)
    ## Throw away previously observed name-value combinations
    ## Creates new rows to insert in original key long form
    keynew2 <- keynew[!nameval.new %in% nameval.old, ]

    ## if name_new and class_new are re-defined in old key,
    ## copy  those into new key
    name.old.new <- unique(key[ , c("name_old", "name_new")])
    rownames(name.old.new) <- name.old.new[ , "name_old"]

    ## Tricky if new variable arrived with data, can't just copy
    ## name_new without checking
    keynew2$name_new <- ifelse(keynew2$name_old %in% name.old.new[ , "name_old"],
                               name.old.new[keynew2$name_old, "name_new"],
                               keynew2$name_new)

    class.old.new <- unique(key[ , c("name_old", "class_old", "class_new")])
    rownames(class.old.new)<- class.old.new[ , "name_old"]
    ## for same-name_old cases, copy in class
    keynew2$class_new <- ifelse(keynew2$name_old %in% name.old.new[ , "name_old"],
                                class.old.new[keynew2$name_old, "class_new"],
                                keynew2$class_new)
    ## 20181127
    ## output <- rbind(key, keynew2)
    output <- keysPool(list(key, keynew2))
    ## User expects key returned in same format, keylong or key
    if(!long) {
        output <- long2wide(output)
        row.names(output) <- output$name_old
        return(output)
    } else {
        if (!append) {
            output <- output[order(output$name_old),]
            output <- naLast(output)
        }
        row.names(output) <- seq(1, nrow(output), 1)
        return(output)
    }
    attr(output, "na.strings") <- attr(key, "na.strings")
    output
}

##' Sort key so that non missing values are first in the
##' value vector.
##'
##' @keywords internal
##' @param key key, long or wide
##' @param byvar Default is "name_new", the column for sorting into blocks.
##' @param valvar The value variable on which sorting is to be done, "value_new".
##' @param na.strings vector of characters to be treated as NA
##' @return sorted key, with blocks that have the missings last
naLast <- function(key, byvar = "name_new", valvar = "value_new", 
                   na.strings = c("\\.", "", "\\s+",  "N/A"))
{
    ##keep attributes not equal to "names" and "row.names"
    attrs <- attributes(key)[!names(attributes(key)) %in% c("names", "row.names")]
    long <- if(inherits(key, "keylong")) TRUE else FALSE
    if(!long) key <- wide2long(key)
    keysplit <- split(key, key[ , byvar])
    for(jj in names(keysplit)) {
        keysplit[[jj]] <- sortStanza(keysplit[[jj]],
                                     na.strings = na.strings)
    }
    key <- do.call(rbind, keysplit)
    if(!long) return(long2wide(key))
    ## else long return
    key
}


##' Move missing values to last row in long key block
##' 
##' Receive key stanza and sort the rows so that the missing
##' values are last in the list. Leaves ordering of other rows
##' unchanged otherwise.
##' 
##' @param keyblock a variable key, or section of rows
##' @param valvar Default is "value_new", the column for sorting.
##' @param na.strings Stings to be treated as missing, along with R's
##'     NA symbol
##' @return keyblock, row re-arranged with missings last
##' @keywords internal
sortStanza <- function(keyblock, valvar = "value_new",
                       na.strings = c("\\.", "", "\\s+",  "N/A"))
{
    ordr <- seq_along(keyblock[ , valvar])
    ismissing <- isNA(keyblock[ , valvar])
    neworder <- c(ordr[!ismissing], ordr[ismissing])
    keyblock[neworder, , drop = FALSE]
}
    

##' Show difference between 2 keys
##'
##' @param oldkey key, original 
##' @param newkey key, possibly created by keyUpdate or by user edits
##' @return NULL, or list with as many as 2 key difference data.frames,
##'  named "deleted" and "neworaltered"
##' @author Ben Kite <bakite@@ku.edu> and Paul Johnson <pauljohn@@ku.edu>
##' @export
##' @examples
##' 
##' dat1 <- data.frame("Score" = c(1, 2, 3, 42, 4, 2),
##'                    "Gender" = c("M", "M", "M", "F", "F", "F"))
##' ## First try with a long key
##' key1 <- keyTemplate(dat1, long = TRUE)
##' key1$value_new <- gsub("42", "10", key1$value_new)
##' key1$value_new[key1$name_new == "Gender"] <-
##'        mgsub(c("F", "M"), c("female", "male"),
##'        key1$value_new[key1$name_new == "Gender"])  
##' key1[key1$name_old == "Score", "name_new"] <- "NewScore"
##' dat2 <- data.frame("Score" = 7, "Gender" = "other", "Weight" = rnorm(3))
##' dat2 <- plyr::rbind.fill(dat1, dat2)
##' dat2 <- dat2[-1,]
##' key2 <- keyUpdate(key1, dat2, append = TRUE)
##' (kdiff <- keyDiff(key1, key2))
keyDiff <- function(oldkey, newkey){
    oldkey.name <- deparse(substitute(oldkey))
    newkey.name <- deparse(substitute(newkey))
    rownames(oldkey) <- paste0(rownames(oldkey), ".old")
    rownames(newkey) <- paste0(rownames(newkey), ".new")
    oldkey$key <- "old"
    newkey$key <- "new"
    xx <- rbind(oldkey, newkey)
    ## xx$top: is a duplicate of one that went before
    xx$top <- duplicated(xx[, -match("key", colnames(xx))])
    ## xx$bot: will be duplicated below
    xx$bot <- duplicated(xx[, -match(c("key", "top"), colnames(xx))], fromLast = TRUE)
    ## lines that are in old key but not in new key:
    deleted <- xx[xx$key == "old" & !xx$bot, -match(c("key", "top", "bot"), colnames(xx)) , drop = FALSE]
    ## in newkey but not old key
    neworaltered <- xx[xx$key == "new" & !xx$top, -match(c("key", "top", "bot"), colnames(xx)), drop = FALSE]

    if(NROW(deleted) == 0  && NROW(neworaltered)  == 0){
        print("There are no differences between these keys!")
        return(NULL)
    }

    if(NROW(deleted) > 0)  {
        messg1 <- paste0("keyDiff: ", NROW(deleted),
                         " rows in ", oldkey.name,
                         " are not in ", newkey.name,
                         "\n")
        attr(deleted, "message") <- messg1
        cat(messg1)
    } else {
        deleted <- NULL
    }
     if(NROW(neworaltered) > 0){
         messg2 <- paste0("keyDiff: ", NROW(neworaltered),
                          " rows in ", newkey.name,
                          " are not in ", oldkey.name, "\n")
         attr(neworaltered, "message") <- messg2
         cat(messg2)
     }
      
    output <- list(deleted = deleted,
                   neworaltered = neworaltered)
    class(output) <- "keyDiff"
    invisible(output)
}

##' Print a keyDiff object
##'
##' @method print keyDiff
##' @export
##' @param x A keyDiff object
##' @param ... Other arguments passed through to print
##' @author Ben Kite <bakite@@ku.edu>
print.keyDiff <- function(x, ...){
    if(!is.null(x[["deleted"]])){
        dat <- x[["deleted"]]
        cat(attr(dat, "message")) 
        print(dat, ...)
    }
    if(!is.null(x[["neworaltered"]])){
        dat <- x[["neworaltered"]]
        cat(attr(dat, "message"))
        print(x[["neworaltered"]], ...)
    }
}





##' Compares keys from different data sets; finds differences classes of variables.

##' This used to check for similarity of keys from various data sets,
##' one precursor to either combining the keys or merging the data
##' sets themselves.
##'
##' When several supposedly "equivalent" data sets are used
##' to generate variable keys, there may be trouble. If variables
##' with same name have different classes, keyApply might fail
##' when applied to one of the data sets.
##'
##' This reports on differences in classes among keys. By default, it
##' looks for differences in "class_old", because that's where we
##' usually see trouble.
##'
##' The output here is diagnostic. The keys can be fixed manually, or the
##' function keysPool can implement an automatic correction.
##' @param keys A list with variable keys.
##' @param col Name of key column to check for equivalence. Default is "class_old", but
##' "class_new" can be checked as well.
##' @param excludere Exclude variables matching a regular expression
##'     (re). Default example shows exclusion of variables that end in
##'     the symbol "TEXT".
##' @return Data.frame summarizing class differences among keys
##' @author Paul Johnson
##' @export
##' @examples
##' set.seed(234)
##' dat1 <- data.frame(x1 = rnorm(100),
##'                    x2 = sample(c("Male", "Female"), 100, replace = TRUE),
##'                    x3_TEXT = "A", x4 = sample(1:10000, 100))
##' dat2 <- data.frame(x1 = rnorm(100), x2 = sample(c("Male", "Female"),
##'                    100, replace = TRUE),
##'                    x3_TEXT = sample(1:100, 100),
##'                    stringsAsFactors = FALSE)
##' key1 <- keyTemplate(dat1)
##' key2 <- keyTemplate(dat2)
##' keys <- list(key1, key2)
##' keysPoolCheck(keys)
##' ## See problem in class_old
##' keysPoolCheck(keys, col = "class_old")
##' ## problems in class_new
##' keysPoolCheck(keys, col = "class_new")
##' keysPoolCheck(keys, excludere = "TEXT$")
keysPoolCheck <- function(keys, col = "class_old", excludere = "TEXT$"){
    ## How spot trouble? class_old changes among keys?
    classnameold <- lapply(keys, function(x) {
        fst <- x[!duplicated(x$name_old), ]
        res <- fst[ , c("name_old", col)]
        rownames(res) <- NULL
        res
    })

    for(i in 2:length(keys)){
        if (i == 2){
            classmerge <- merge(classnameold[[1]], classnameold[[2]],
                                by = "name_old", suffixes = c("1", "2"))
        } else {
            classmerge <- merge(classmerge, classnameold[[i]],
                                by = "name_old", suffixes = c("", i))
        }
    }
    ## ## Suppose all detected logicals should be integer (long story why)
    ## classmerge[classmerge == "logical"] <- "integer"
    ## ## Promote all integers to numeric
    ## classmerge[classmerge == "integer"] <- "numeric"

    classmerge$troublevar <- apply(classmerge, 1, function(x){length(unique(x[grep(col, names(x))])) > 1})
    classProblems <- classmerge[classmerge$troublevar, ]
   
    classProblems <- classProblems[!grepl(excludere, classProblems$name_old), ]
    classProblems
}



##' Check a key for consistency of names, values with classes.
##'
##' Split the key into blocks of rows defined by "name_new". Within
##' these blocks, Perform these checks: 1. name_old must be
##' homogeneous (identical) within a block of rows. class_old and
##' class_new must also be identical.
##' 2. elements in "value_new" must be consistent with "class_new".
##' If values cannot be coerced to match the class specified by
##' class_new, there must be user error.
##' Same for "value_old" and "class_old".
##' @param key A variable key object.
##' @param colname Leave as default to check consistency between classes, values, and names.
##' One can specify a check only on "class_old" or "class_new", for example.  But now that
##' all work correctly, I suggest you leave the default.
##' @param na.strings A regular expression of allowed text strings that represent missings.
##' Now it amounts to any of these: ".", "NA", "N/A", or any white space or tab as signified by \\s+.
##' @return Profuse warnings and a list of failed key blocks.
##' @export
##' @author Paul Johnson <pauljohn@@ku.edu> and Ben Kite <bakite@@ku.edu>
keyCheck <- function(key,
                     colname = c("name_new", "class_old", "class_new"),
                     na.strings = c("\\.", "", "\\s+",  "N/A")){
    if (is.character(key)){
        key <- keyRead(key)
    }
    if (prod(c("name_old", "name_new", "class_old", "class_new",
               "value_old", "value_new") %in% names(key)) != 1L){
        stop ("At a minimum a variable key needs to have the following columns: name_old, name_new, class_old, class_new, value_old, value_new")
    }
    ## Deduce if this is a long key

    if(inherits(key, "keylong")) long = TRUE else long = FALSE

    if (!long){
        xx <- strsplit(key$value_old, split = "[|<]", fixed = FALSE)
        xx.l <- sapply(xx, length)
        yy <- strsplit(key$value_new, "[|<]", fixed = FALSE)
        yy.l <- sapply(yy, length)
        inconsistent <- xx.l != yy.l
        issues <- key[inconsistent, "name_old"]
        if (length(issues) > 0){
            stop (paste0("Value error, columns: ", issues))
        }
    }
    ## Transition from Ben's work to PJ's
    if(!inherits(key, "keylong")) keylong <- kutils::wide2long(key) else keylong <- key

    keysplit <- split(keylong, keylong[ , "name_new"])

    keyfails <- list()
    for(ii in intersect(c("name_old", "class_old", "class_new"), colname)){
            ## check same-value for all of "class_old", or "class_new"
            for(jj in names(keysplit)) {
                keyblock <- keysplit[[jj]]
                ## Stanza 1: check homogeneous colname = class_old(or new) values"
                if (length(unique(keyblock[ , ii])) > 1) {
                    warning(paste("Key Violation:", jj, ii,  "\n"), immediate. = TRUE)
                    keyfails[[jj]] <- keysplit[[jj]]
                }
            }
    }
    for(ii in intersect(c("class_old", "class_new"), colname)){
        ## compare value_old(new) against
        ## class_old(new). If all non-missing cannot be coerced to
        ## indicated class, key should fail.
        for (jj in names(keysplit)) {
            keyblock <- keysplit[[jj]]
            value_col <- paste0("value_", gsub("class_(.*)", "\\1", ii))
            value <- keyblock[ , value_col]
            ## exclude any that have missing marker from na.strings
            value <- na.omit(value[!value %in% na.strings])
            testcol <- NA
            mytext <- paste0("testcol <- as.", keyblock[1, ii], "(value)")
            eval(parse(text = mytext))
            if (sum(is.na(testcol)) > 0L){
                warning(paste("Key value violation:", ii, jj, "\n"), immediate. = TRUE)
                keyfails[[jj]] <- keysplit[[jj]]
            }
            ## did not fail yet, so return NULL for fails
            ##NULL
        }
    }


    if (long){
        if (!identical(key, wide2long(long2wide(key)))){
            stop ("Key error: wide2long(long2wide(key)) fails.")
        }
    } else {
        if (!identical(key, long2wide(wide2long(key)))){
            stop ("Key error: long2wide(wide2long(key)) fails.")
        }
    }
    if (length(keyfails) > 0){
        return(keyfails)
    }else{
        message("No errors were detected")
    }
}


##' Homogenize class values and create a long key by pooling variable
##' keys.
##'
##' For long-format keys, this is one way to correct for errors in
##' "class_old" or "class_new" for common variables. For a long key
##' created by stacking together several long keys, or for a list of
##' long keys, this will try to homogenize the classes by using a
##' "highest common denominator" approach.  If one key has x1 as a
##' floating point, but another block of rows in the key has x1 as
##' integer, then class must be changed to floating point
##' (numeric). If another section of a key has x1 as a character, then
##' character becomes the class.
##'
##' Users might run keyTemplate on several data sets, arriving
##' at keys that need to be combined.  The long versions of the
##' keys can be stacked together by a function like \code{rbind}.
##' If the values class_old and class_new for a single variable are
##' inconsistent, then the "key stack" will fail the tests in keyCheck.
##' This function automates the process of fixing the class variables by
##' "promoting" classes where possible.
##'
##' Begin with a simple example.  In one data set, the value of x is
##' drawn from integers 1L, 2L, 3L, while in another set it is
##' floating values like 1.1, 2.2. After creating long format keys,
##' and stacking them together, the values of class_old will clash.
##' For x, we will observe both "integer" and "numeric" in the
##' class_old column.  In that situation, the class_old for all of the
##' rows under consideration should be set as "numeric".
##'
##' The promotion schemes are described by the variable classes, where
##' we have the most conservative changes first. The most destructive
##' change is when variables are converted from integer to character,
##' for example. The conservative conversion strategies are specified
##' in the classes variable, in which the last element in a vector
##' will be used to replace the preceeding classes.  For example,
##' c("ordered", "factor", "character") means that the class_old
##' values of "ordered" and "factor" will be replaced by "character".
##'
##' The conversions specified by classes are tried, in order.
##' 1. logical -> integer
##' 2. integer -> numeric
##' 3. ordered -> factor
##'
##' If their application fails to homogenize a vector, then class is
##' changed to "character". For example, when the value of class_old
##' observed is c("ordered", "numeric", "character"). In that case,
##' the class is promoted to "character", it is the least common
##' denominator.
##' @param keylong A list of long keys, or just one long key,
##'     presumably a result of rbinding several long keys.
##' @param keysplit Not often needed for user-level code. A list of
##'     key blocks, each of which is to be inspected and
##'     homogenized. Not used if a keylong argument is provided.
##' @param classes A list of vectors specifying legal promotions.
##' @param colnames Either c("class_old","class_new), ""class_old", or
##'     "class_new".  The former is the default.
##' @param textre A regular expression matching a column name to be
##'     treated as character. Default matches any variable name ending
##'     in "TEXT"
##' @return A class-corrected version of the same format as the input,
##'     either a long key or a list of key elements.
##' @author Paul Johnson <pauljohn@@ku.edu>
##' @export
##' @examples
##' dat1 <- data.frame(x1 = as.integer(rnorm(100)), x2 = sample(c("Apple", "Orange"),
##'                    100, replace = TRUE), x3 = ifelse(rnorm(100) < 0, TRUE, FALSE))
##' dat2 <- data.frame(x1 = rnorm(100), x2 = ordered(sample(c("Apple", "Orange"),
##'                    100, replace = TRUE)), x3 = rbinom(100, 1, .5),
##'                    stringsAsFactors = FALSE)
##' key1 <- keyTemplate(dat1, long = TRUE)
##' key2 <- keyTemplate(dat2, long = TRUE)
##' keys2stack <- rbind(key1, key2)
##' keys2stack.fix <- keysPool(keys2stack)
##' keys2stack.fix2 <- keysPool(keys2stack.fix, colname = "class_new")
##' ## Sometimes this will not be able to homogenize
##' dat1 <- data.frame(x1 = as.integer(rnorm(100)),
##'                    x2 = sample(c("Apple", "Orange"), 100, replace = TRUE))
##' dat2 <- data.frame(x1 = rnorm(100),
##'                    x2 = sample(c("Apple", "Orange"), 100, replace = TRUE),
##'                    stringsAsFactors = FALSE)
##' key1 <- keyTemplate(dat1, long = TRUE)
##' key2 <- keyTemplate(dat2, long = TRUE)
##' ## Create a stack of keys for yourself
##' keys2stack <- rbind(key1, key2)
##' keys.fix <- keysPool(keys2stack)
##' ## We will create stack of keys for you
##' keys.fix2 <- keysPool(list(key1, key2)) 
##' ## View(keys.fix)
##' ## View(keys.fix2)
##'
##' 
##' ## If you have wide keys, convert them with wide2long, either by
##' key1 <- keyTemplate(dat1)
##' key2 <- keyTemplate(dat2)
##' keysstack.wide <- rbind(wide2long(key1), wide2long(key2))
##' keys.fix <- keysPool(keysstack.wide)
##' ## or
##' keysPool(list(wide2long(key1), wide2long(key2)))
keysPool <- function(keylong = NULL, keysplit = NULL,
                        classes = list(c("logical", "integer"),
                                       c("integer", "numeric"),
                                       c("ordered", "factor"),
                                       c("factor", "character")),
                        colnames = c("class_old","class_new"),
                        textre = "TEXT$")
{

    if (is.list(keylong) && !is.data.frame(keylong)){
        islongkey <- vapply(keylong, function(x){
            inherits(x, "keylong")
        }, logical(1))
        if (any(!islongkey)){
            MESSG <- "All elements in keylong list must be long keys"
            stop(MESSG)
        }
        keylong <- do.call(rbind, keylong)
    }
        
    if (!is.null(keylong)){
        if (!is.null(keysplit)){
            warning("keysplit is ignored because keylong was not null")
        }
        if (!inherits(keylong, "keylong")) {
            stop("keylong is not recognized as a long key")
        } else {
            NULL
        }
        if (missing(keysplit)) keysplit <- split(keylong, keylong[ , "name_new"])
    } else {
        if (is.null(keysplit)) {
            stop("A key or keysplit argument must be provided")
        }
    }

    ## Change the values of colname (say, class_old) to equal the last value of classes.
    ## keyblock: a row block from a variable key
    ## classes: vector of classes, the last of which is the acceptable one, to replace
    ## the others.
    ## colname: either "class_old" or "class_new"
    classClean <- function (keyblock, colnames = colnames, classes)
    {
        for (col in colnames) {
            keyblock[keyblock[ , col] %in% classes[-length(classes)], col] <- classes[length(classes)]
        }
        keyblock
    }

    ## if mixed, promote all to last named class
    for(i in seq_along(keysplit)){
        keyblock <- keysplit[[i]]
        if (length(unique(keyblock[, colnames[1]])) == 1) next() # use first col if two
        ## Special case. Any variable matching textre
        if (any(grepl(textre, names(keysplit)[i], ignore.case = TRUE))) {
            for (col in colnames) {
                keyblock[ , col] <- "character"
            }
            keysplit[[i]] <- unique(keyblock)
            next()
        }

        for (j in classes) {
            keyblock <- classClean(keyblock, colnames = colnames, classes = j)
            keysplit[[i]] <- unique(keyblock)
            if (length(unique(keyblock[, colnames[1]])) == 1) {
                next()
            }
        }

        if (length(unique(keyblock[, colnames[1]])) > 1) {
            MESSG <- paste("Cannot painlessly reduce key classes to homogeneous class.",
                           names(keysplit)[i], "changing class to character")
            warning(paste0(names(keysplit)[i], " ", paste(unique(keyblock[ , colnames[1]]),
                                                    collapse = " + "), ". ", MESSG), immediate. = TRUE)
            for (col in colnames) {
                keyblock[ , col] <- "character"
            }
            keysplit[[i]] <- unique(keyblock)
            ##warning(paste(names(keysplit)[i], "changing class to character"), immediate. = TRUE)
        }
    }

    ## If a key came in, give back a key
    if(!is.null(keylong)){
        keystack <- do.call(rbind, keysplit)
        class(keystack) <- c("keylong", "data.frame")
        return(keystack)
    }
    ## If a keysplit was passed in, give that back
    return(keysplit)
}


##' Checks a key for dangerous matches of old and new values in a key
##' for different levels.
##'
##' Positions in a long key are referred to as levels. If a value is
##' mismatched at levels 1 and 3, this means that issues are in rows 1
##' and 3 of the section of the given variable in a long key.
##'
##' @title keyCrossRef
##' @param key A variable key, ideally a long key. If a wide key is
##'     provided it is converted to long.
##' @param ignoreClass Classes that should be excluded from
##'     check. Useful when many integer variables are being reverse-
##'     coded. Takes a string or vector.
##' @param verbose Should a statement about the number of issues
##'     detected be returned? Defaults to FALSE.
##' @param lowercase Should old and new values be passed through
##'     tolower function? Defaults to FALSE.
##' @return Presents a warning for potentially problematic key
##'     sections. Return is dependent on verbose argument.
##' @author Ben Kite <bakite@@ku.edu>
##' @examples
##' dat <- data.frame(x1 = sample(c("a", "b", "c", "d"), 100, replace = TRUE),
##'                   x2 = sample(c("Apple", "Orange"), 100, replace = TRUE),
##'                   x3 = ordered(sample(c("low", "medium", "high"), 100, replace = TRUE),
##'                   levels = c("low", "medium", "high")),
##'                   stringsAsFactors = FALSE)
##' key <- keyTemplate(dat, long = TRUE)
##' ## No errors with a fresh key.
##' kutils:::keyCrossRef(key, verbose = TRUE)
##' key[1:2, "value_new"] <- c("b", "a")
##' key[5, "value_new"]
##' key[7:9, "value_new"] <- c("high", "medium", "low")
##' kutils:::keyCrossRef(key)
##' kutils:::keyCrossRef(key, ignoreClass = c("ordered", "character"), verbose = TRUE)
keyCrossRef <- function(key, ignoreClass = NULL, verbose = FALSE, lowercase = FALSE){
    if(!inherits(key, "keylong")){
        key <- kutils::wide2long(key)
    }
    keysplit <- split(key, key[ , "name_new"])
    problems <- 0
    for (k in keysplit){
        if (k[1, "class_new"] %in% ignoreClass){
            next()
        }
        N <- nrow(k)
        for (r in 1:length(k[,"value_new"])){
            i <- k[r, "value_new"]
            others <- k[seq(1, N)[!seq(1,N) %in% r],"value_old"]
            if (isTRUE(lowercase)){
                i <- tolower(i)
                others <- tolower(others)
            }
            if (i %in% others){
                problems <- problems + 1
                warning(k[1, "name_new"], ": The value of \"", i, "\" is at level ",
                        r, " in value_new, but \"", i,
                        "\" is also the value at level ",
                        which(k[,"value_old"] == i), " in value_old.")
            }
        }
    }
    if (isTRUE(verbose)){
        if (problems == 0){
            return("No potentially problematic value matches across levels detected.")
        } else {
            return(paste0("There are ", problems,
                          " potential issues with this key that need to be considered"))
        }
    }

}


##' Look for old (or new) names in variable key
##'
##' Use the key to find the original name of a variable that has been
##' renamed, or find the new name of an original variable.  The
##' \code{get} argument indicates if the \code{name_old} or
##' \code{name_new} is desired.
##'
##' If \code{get = "name_old"}, the return is a character vector, with
##' one element per value of \code{x}.  If there is no match for a
##' value of \code{x}, the value NA is returned for that
##' value. However, if \code{get = "name_new"}, the return might be
##' either a vector (one element per value of \code{x}) or a list with
##' one element for each value of \code{x}.  The list is returned when
##' a value of \code{x} corresponds to more than one element in
##' \code{name_old}.
##' @param x A variable name. If \code{get = "name_old"}, then
##'     \code{x} is a value for \code{name_new}. If \code{get =
##'     "name_new"}, \code{x} should be a value for \code{name_old}.
##' @param key Which key should be used
##' @param get Either "name_old" (to retrieve the original name) or
##'     "name_new" (to get the new name)
##' @return A vector or list of matches between x and either name_new
##'     or name_old elements in the key.
##' @author Paul Johnson
##' @export
##' @examples
##' mydf.key.path <- system.file("extdata", "mydf.key.csv", package = "kutils")
##' mydf.key <-  keyImport(mydf.key.path)
##' mydf.key$name_new <- paste0("new_", mydf.key$name_new)
##' keyLookup("new_x5", mydf.key, get = "name_old")
##' keyLookup(c("new_x6", "new_x1"), mydf.key, get = "name_old")
##' keyLookup(c("x6", "x1"), mydf.key, get = "name_new")
##' keyLookup(c("asdf", "new_x1"), mydf.key, get = "name_old")
##'
##' mydf.key <- rbind(mydf.key,
##'                  c("x3", "x3f",  "ordered", "factor", "","","",""))
##' keyLookup(c("x3"), mydf.key, get = "name_new")
##' keyLookup(c("x1", "x3", "x5"), mydf.key, get = "name_new")
keyLookup <- function(x, key, get = "name_old"){
    if(!length(match.arg(get, c("name_old", "name_new")))){
        MESSG <- "keyLookup: get must be 'name_old' or 'name_new'"
        stop(MESSG)
    }
    
    if(class(key)[1] == "keylong"){
        key <- long2wide(key)
    }
    
    if(get == "name_old"){
        if (any(duplicated(key$name_new))){
            MESSG <- paste0("keyLookup finds duplicates in 'name_new'")
            stop(MESSG)
        }
        target <- key$name_new[match(x, key$name_new, nomatch = NA)]
        return(target)
    }
    ## else get == "name_new"
    target <- sapply(x, function(jj){
        fits <- key[which(key$name_old %in% jj), "name_new"]
        fits
    })

    for(jj in names(target)){
        if(!length(target[[jj]])) {
            MESSG <- paste("No value in name_old matches:", jj)
            warning(MESSG)
            target[jj] <- NULL
        } else if (length(target[[jj]]) > 1){
            MESSG <- paste0("Note: name_old '", jj,
                            "' matches several values in name_new: ",
                            paste(target[[jj]], collapse = ", "))
            print(MESSG)
        }
    }
    if(length(target) == 0 || is.null(target)){
        return(NULL)
    }
    target
}



##' Import an SPSS file, create a key representing the numeric ->
##' factor transition
##'
##' This is a way to keep track of the scores that are used in the SPSS file.
##' It is also an easy way to start a new variable key that makes it convenient
##' to work on the value_new column with R text functions.
##' @param dat A character string path to the SPSS file
##' @param long TRUE returns a long key, otherwise a wide key
##' @return A variable key (long or wide)
##' @importFrom foreign read.spss
##' @export
##' @author Paul Johnson <pauljohn@@ku.edu>
keyTemplateSPSS <- function(dat, long = TRUE){
    datf <- read.spss(dat, max.value.labels = 15, to.data.frame = TRUE,
                      use.value.labels = TRUE)
    datn <- read.spss(dat, to.data.frame = TRUE, use.value.labels = FALSE)
    key <- statdatKey(datf, datn, long)
    key
}



##' Import a Stata (version 12 or lower) file, create a key
##' representing the numeric -> factor transition
##'
##' This is a way to keep track of the scores that are used in the Stata file.
##' It is also an easy way to start a new variable key that makes it convenient
##' to work on the value_new column with R text functions.
##' @param dat A character string path to the Stata file
##' @param long TRUE returns a long key, otherwise a wide key
##' @return A variable key (long or wide)
##' @importFrom foreign read.dta
##' @export
##' @author Paul Johnson <pauljohn@@ku.edu>
keyTemplateStata <- function(dat, long = TRUE){
    datf <- read.dta(dat, convert.factors = TRUE)
    datn <- read.dta(dat, convert.factors = FALSE)
    key <- statdatKey(datf, datn, long)
    key
}

##' keyFactors: private function that does work for keyTemplateSPSS and
##' key template Stata
##' @param datf Data frame with factors
##' @param datn Numeric data frame
##' @param long Should the result be a long or wide key
statdatKey <- function(datf, datn, long = TRUE){
    numericTF <- vapply(datf, is.numeric, logical(1)) 
    notnumeric <- colnames(datf)[!numericTF]
    isnumeric <- colnames(datf)[numericTF]
    
    partA <- lapply(isnumeric, function(i) {
        data.frame(name_old = i, name_new = i,
                   class_old = class(datf[ , i])[1], class_new = class(datf[ , i])[1],
                   value_old = ".", value_new = ".", stringsAsFactors = FALSE)
    })
    partB <- lapply(notnumeric, function(i) {
        val <- unique(datn[ , i])
        val <- val[order(val)]
        val.level <- datf[match(val, datn[ , i]),  i]
        data.frame(name_old = i, name_new = paste0(i, "f"),
                                   class_old = class(datn[ , i])[1], class_new = class(datf[ , i])[1],
                                   value_old = val, value_new = val.level, stringsAsFactors = FALSE)
        ## match(datn[match(levels(datf$Q76), datf$Q76),"Q76"])
    })
    keylong <- rbind(do.call(rbind, partA), do.call(rbind, partB))
    ## 20180418: variables did not come out in same order as SPSS.
    keywide <- long2wide(keylong)
    rownames(keywide) <- keywide$name_old
    keywide <- keywide[colnames(datf), ]
    if (!long) return(keywide)
    ## else turn the wide key long
    wide2long(keywide)
}