R/tgd_clean_names.R

Defines functions tgd_clean_names

Documented in tgd_clean_names

#' Clean all dirty column names of a dataframe
#' 
#' @param df a dataframe or a tibble
#' @param short logical indicating if name component should be truncated to 4 characters. Default is FALSE
#' @return \code{x} with clean names (all in lower case without special characters, accents and spaces)
#' @examples
#' tgd_clean_names(iris)
#' @import dplyr magrittr janitor stringr
#' @export tgd_clean_names


tgd_clean_names <- function(df,
                            short = FALSE){
  # transform camel case to lower case with "_"
  names(df) <- gsub("([a-z])([A-Z])", "\\1_\\L\\2", names(df), perl = TRUE)
  # Use janitor clean names
  df <- janitor::clean_names(df)
  # Convert to lower case
  names(df) <- stringr::str_to_lower(names(df))
  # Remove "°" character
  names(df) <- stringr::str_replace_all(names(df), "°", "") 
  # Replace % sign
  names(df) <- stringr::str_replace_all(names(df), "%", "pc")
  # Change ratio notation
  names(df) <- stringr::str_replace_all(names(df), "/", "_over_")
  # Standardise "e"
  names(df) <- stringr::str_replace_all(names(df), "é|ê|è|€", "e")
  # Modify "¨" to normal "i"
  names(df) <- stringr::str_replace_all(names(df), "ï", "i")
  # Convert double "_" to single "_"
  names(df) <- stringr::str_replace_all(names(df), "__", "_")
  # Convert . to "_"
  names(df) <- stringr::str_replace_all(names(df), "\\.", "_")
  # Remove leading and trailing spaces
  names(df) <- stringr::str_trim(names(df))
  # make short names
  if(short == TRUE){
    names(df) <- unlist(
      # Split terms of names
      lapply(stringr::str_split(names(df), "_"),
             # Take the 4 first terms
             function(i) paste(stringr::str_sub(i, 1, 4),
                               collapse = "_")
      )
    )
  }
  names(df) <-  make.unique(names(df))
  return(data = df)
}
juliengoo/test documentation built on May 18, 2019, 10:14 a.m.