R/makeSASNames.R

#' @importFrom stringi stri_trans_general
#' @export
makeSASNames <- function(names, nchar=8, maxPasses=10, quiet=FALSE)
  {
    ## This function takes a vector of potential SAS dataset or
    ## variable names and converts them into *unique* 8-character
    ## names.

    #!# # Step -2: convert to ASCII
    #!# names <- stri_trans_general(names, "Any-Latin; Latin-ASCII")

    #!# # Step -1: strip all whitespace
    #!# names <- gsub("[[:space:]]+", "", names)

    # Step 0: converce to uppercase
    names <- toupper(names)
    
    # Step 1: expand/truncate to 8 characters
    tooLong <- nchar(names, "bytes")>8
    if (any(tooLong))
      {
        shortNames <- substr(as.character(names), 1, nchar)
        if(!quiet)
          warning("Truncated ", sum(tooLong), " long names to 8 characters.")
      }
    else
      shortNames <- names

    # concievably, this could take a couple of iterations, because
    # shortening the names to add digits may create new duplicates...
    varNames <- shortNames
    passes <- 0
    dups <- FALSE
    while ( any(duplicated(varNames)) && passes<maxPasses )
      {
        passes <- passes+1
        dups <- duplicated(varNames)
        repeatCount <- table(varNames)-1
        digitChars <- nchar(as.character(repeatCount), "bytes")+1
        names(digitChars) <- names(repeatCount)
        newNames <- make.names(substr(varNames, 1, nchar-digitChars[varNames]), unique=TRUE)
        changed <- newNames != names
        
        ##newNames[changed] <- gsub("\\.([0-9]+)$","\\1", newNames[changed])
        varNames <- newNames
      }

    if(any(duplicated(varNames)))
      stop("Unable to make all names unique after ", passes, " passes.")
    
    if(any(dups) && !quiet)
      warning("Made ",sum(dups)," duplicate names unique.")

    varNames
  }

Try the SASxport package in your browser

Any scripts or data that you put into this service are public.

SASxport documentation built on May 2, 2019, 6:38 a.m.