
Defines functions clean_names.tbl_lazy clean_names.tbl_graph clean_names.sf clean_names.default clean_names

Documented in clean_names clean_names.default clean_names.sf clean_names.tbl_graph clean_names.tbl_lazy

#' @title Cleans names of an object (usually a data.frame).
#' @description
#' Resulting names are unique and consist only of the \code{_} character, numbers, and letters.
#' Capitalization preferences can be specified using the \code{case} parameter.
#' Accented characters are transliterated to ASCII.  For example, an "o" with a
#' German umlaut over it becomes "o", and the Spanish character "enye" becomes
#' "n".
#' This function takes and returns a data.frame, for ease of piping with
#' \code{`\%>\%`}. For the underlying function that works on a character vector
#' of names, see \code{\link[janitor]{make_clean_names}}.  \code{clean_names} 
#' relies on the versatile function \code{\link[snakecase]{to_any_case}}, which 
#' accepts many arguments.  See that function's documentation for ideas on getting 
#' the most out of \code{clean_names}.  A few examples are included below.
#' A common issue is that the micro/mu symbol is replaced by "m" instead of "u".
#' The replacement with "m" is more correct when doing Greek-to-ASCII
#' transliteration but less correct when doing scientific data-to-ASCII
#' transliteration.  A warning will be generated if the "m" replacement occurs.
#' To replace with "u", please add the argument \code{replace=janitor:::mu_to_u}
#' which is a character vector mapping all known mu or micro Unicode code points
#' (characters) to "u".
#' @param dat the input data.frame.
#' @inheritDotParams make_clean_names -string
#' @return Returns the data.frame with clean names.
#' @details \code{clean_names()} is intended to be used on \code{data.frames}
#'   and \code{data.frame}-like objects. For this reason there are methods to
#'   support using \code{clean_names()} on \code{sf} and \code{tbl_graph} (from
#'   \code{tidygraph}) objects as well as on database connections through
#'   \code{dbplyr}. For cleaning other named objects like named lists 
#'   and vectors, use \code{make_clean_names()}.
#' @export
#' @family Set names
#' @examples
#' # --- Simple Usage ---
#' x <- data.frame(caseID = 1, DOB = 2, Other = 3)
#' clean_names(x)
#' # or pipe in the input data.frame:
#' x %>%
#'   clean_names()
#' # if you prefer camelCase variable names:
#' x %>%
#'   clean_names(., "lower_camel")
#' # (not run) run clean_names after reading in a spreadsheet:
#' # library(readxl)
#' # read_excel("messy_excel_file.xlsx") %>%
#' #   clean_names()
#' # --- Taking advantage of the underlying snakecase::to_any_case arguments ---
#' # Restore column names to Title Case, e.g., for plotting
#' mtcars %>%
#'   clean_names(case = "title")
#' # Tell clean_names to leave certain abbreviations untouched:
#' x %>%
#'   clean_names(case = "upper_camel", abbreviations = c("ID", "DOB")) 
clean_names <- function(dat, ...) {

#' @rdname clean_names
#' @export
clean_names.default <- function(dat, ...) {
  if(is.null(names(dat)) && is.null(dimnames(dat))) {
      "`clean_names()` requires that either names or dimnames be non-null.",
      call. = FALSE
  if(is.null(names(dat))) {
    dimnames(dat) <- lapply(dimnames(dat), make_clean_names, ...)
  } else {
    names(dat) <- make_clean_names(names(dat), ...)

#' @rdname clean_names
#' @export
clean_names.sf <- function(dat, ...) {
  if (!requireNamespace("sf", quietly = TRUE)) { # nocov start
      "Package 'sf' needed for this function to work. Please install it.",
      call. = FALSE
  } # nocov end
  # get old names
  sf_names <- names(dat) 
  # identify ending column index to clean
  n_cols <- length(dat)-1 
  # clean all but last column
  sf_cleaned <- make_clean_names(sf_names[1:n_cols], ...) 
  # rename original df
  names(dat)[1:n_cols] <- sf_cleaned 

#' @rdname clean_names
#' @export
clean_names.tbl_graph <- function(dat, ...) {
  if (!requireNamespace("tidygraph", quietly = TRUE)) { # nocov start
      "Package 'tidygraph' needed for this function to work. Please install it.", 
      call. = FALSE
  } # nocov end
  dplyr::rename_all(dat, .funs=make_clean_names, ...)

#' @rdname clean_names
#' @export
clean_names.tbl_lazy <- function(dat, ...) {
  if (!requireNamespace("dbplyr", quietly = TRUE)) { # nocov start
      "Package 'dbplyr' needed for this function to work. Please install it.", 
      call. = FALSE
  } # nocov end
  dplyr::rename_with(dat, janitor::make_clean_names, .cols = dplyr::everything(), ...)

# TODO: According to https://www.compart.com/en/unicode/U+03BC reviewed on
# 2021-07-10, there are some UTF-32 encoding characters that are also mu or
# micro.  This only handles the utf-8 values; to add more characters, just add
# to this character vector.

#' Constant to help map from mu to u
#' This is a character vector with names of all known Unicode code points that
#' look like the Greek mu or the micro symbol and values of "u".  This is
#' intended to simplify mapping from mu or micro in Unicode to the character "u"
#' with \code{clean_names()} and \code{make_clean_names()}.
#' See the help in \code{clean_names()} for how to use this.
#' @family Set names
mu_to_u <-
  # setNames is used instead of setting the names directly because it prevents a
  # warning like "unable to translate '<U+3382>' to native encoding" for several
  # of the items.
    rep("u", 10),
        "\u00b5", "\u03bc", "\u3382", "\u338c", "\u338d",
        "\u3395", "\u339b", "\u33b2", "\u33b6", "\u33bc"

Try the janitor package in your browser

Any scripts or data that you put into this service are public.

janitor documentation built on Feb. 16, 2023, 10:16 p.m.