R/cleanName.R

Defines functions cleanName

Documented in cleanName

#' Clean Column Names
#'
#' Cleans the column names when importing data with messy names
#'
#' @param x a data.frame or vector of names
#' @param adobeClean Defaults to FALSE. Also strips off all the "evar" "prop" "event" nonsense
#' @return The vector of cleaned names to be assigned to colnames(object)
#' @export

cleanName <- function(x, adobeClean = F) {
  if (is.data.frame(x)) {
    x <- colnames(x)
  }
  #standardizes names to all lower and "." for spaces.
  x <- gsub("\\.+.^*[a-z]+\\d+", "", x)
  x <- gsub("[[:punct:]]", " ", x)
  x <- gsub("\\s+", " ", x)
  x <- gsub("\\s+$", "", x)
  x <- tolower(gsub("\\s", "\\.", x))

  if(adobeClean){
    #strips off all the "evar" "prop" "event" nonsense
    x <- gsub("\\.v\\d+", "", x)
    x <- gsub("\\.c\\d+", "", x)
    x <- gsub("\\.evar\\d+", "", x)
    x <- gsub("\\.prop\\d+", "", x)
    x <- gsub("\\.e\\d+", "", x)
    x <- gsub("\\.event\\d+", "", x)
  }
  return(x)
}
blazickjoe/DataScienceLibrary documentation built on Nov. 5, 2019, 2:26 p.m.