R/strings.R

Defines functions nstring strip_non_english clean_space clean_text convert_case toinitcap tostart totitle

Documented in nstring

#' convert string to title case
#' @noRd

totitle <- function(x) {
  ux <- unique(x)
  uy <- tools::toTitleCase(tolower(ux))
  uy[match(x, ux)]
}

#' convert string to start case (for a vector)
#' @noRd

tostart <- function(x) {
  ux <- unique(x)
  uy <- gsub("\\b(\\w)", "\\U\\1", tolower(ux), perl = TRUE)
  uy[match(x, ux)]
}

#' convert string to lower case with initial letter in upper case (for vector)
#' @noRd

toinitcap <- function(x) {
  paste0(toupper(substr(x, 1, 1)), tolower(substr(x, 2, nchar(x))))
}

#' case conversion
#' @noRd

convert_case <- function(x, case) {
  switch(case,
    lower = tolower(x),
    upper = toupper(x),
    title = totitle(x),
    start = tostart(x),
    initcap = toinitcap(x)
  )
}

#' Removing punctuation and special characters from a string
#' @noRd

clean_text <- function(x, whitelist_specials = "") {
  ptmp <- paste0(whitelist_specials, collapse = "|")
  pattern <- paste0("[^", ptmp, "[:alnum:][:space:]]")
  ux <- unique(x)
  uy <- gsub(pattern = pattern, "", ux)
  uy[match(x, ux)]
}


#' White space cleaning
#' @noRd

clean_space <- function(x) {
  # Optimization: unique() check
  ux <- unique(x)
  uy <- trimws(gsub("[ ]+", " ", ux))
  uy[match(x, ux)]
}

#' Removing non-english characters
#' @noRd

strip_non_english <- function(x) {
  ux <- unique(x)
  uy <- gsub("[^ -~]", "", ux)
  uy[match(x, ux)]
}

#' neat representation of string
#' @param text a string / character
#' @param case a string, It specifies how the string should be formatted.
#' Current options are 'lower', 'upper', 'title', 'start' and 'initcap'.
#' @param remove_specials a Boolean. If TRUE, special characters are removed
#' from the string.
#' @param keep_chars a vector of characters that are kept even if
#' remove_specials is TRUE.
#' @param ascii_only a Boolean. If TRUE, only ASCII characters are kept.
#' @return White space cleaned and optionally formatted by case conversion
#' and removal of special characters of the input string.
#' @examples
#' nstring("   All MOdels are wrong.   some ARE useful!!! ",
#'   case = "title",
#'   remove_specials = TRUE
#' )
#' nstring("all Models are Wrong some are Useful",
#'   case = "start",
#'   remove_specials = TRUE
#' )
#' nstring("variable_123!!", remove_specials = TRUE, keep_chars = c("_"))
#' @param string Deprecated. Use 'text' instead.
#' @param whitelist_specials Deprecated. Use 'keep_chars' instead.
#' @param en_only Deprecated. Use 'ascii_only' instead.
#' @export


nstring <- function(
  text, case = NULL, remove_specials = FALSE,
  keep_chars = "", ascii_only = FALSE,
  string = NULL, whitelist_specials = NULL, en_only = NULL
) {
  if (missing(text) && !is.null(string)) {
    warning("The argument `string` is deprecated; please use `text` instead.",
      call. = FALSE
    )
    text <- string
  } else if (!missing(text) && !is.null(string)) {
    warning("Both `text` and `string` were provided.
    `string` is deprecated and ignored.",
      call. = FALSE
    )
  }
  keep_chars <- .handle_deprecated_args(
    whitelist_specials, keep_chars,
    "whitelist_specials", "keep_chars"
  )
  ascii_only <- .handle_deprecated_args(
    en_only, ascii_only,
    "en_only", "ascii_only"
  )
  # Handle default logical NA
  if (is.logical(text) && all(is.na(text))) {
    text <- as.character(text)
  }

  if (!is.character(text)) {
    stop("text must be a string (character type).")
  }
  str_singleton_check(case, is_nullable = TRUE)
  bool_singleton_check(remove_specials)
  is.character(keep_chars)
  bool_singleton_check(ascii_only)

  if (!is.null(case) &&
        !any(case %in% c("lower", "upper", "title", "start", "initcap"))) {
    stop("To convert case of the string variable,
    select case = lower/upper/title/start")
  }
  ux <- unique(text)
  if (!is.null(case)) {
    ux <- convert_case(ux, case)
  }
  if (remove_specials) {
    ux <- clean_text(ux, whitelist_specials = keep_chars)
  }
  if (ascii_only) {
    ux <- strip_non_english(ux)
  }
  ux <- clean_space(ux)
  ux[match(text, unique(text))]
}

Try the neatR package in your browser

Any scripts or data that you put into this service are public.

neatR documentation built on Jan. 31, 2026, 5:07 p.m.