R/standardize_str.R

Defines functions standardize_str

Documented in standardize_str

# Generated by fusen: do not edit by hand

#' Standardize Strings
#' 
#' Description
#' 
#' @param .str 
#' A string
#' @param .op 
#' One of c("space", "punct", "case", "ascii")
#'
#' @return A string
#' 
#' @export
#' @examples
#' standardize_str(c("jkldsa   jkdhas   äää  §$ ## #'''"))
standardize_str <- function(.str, .op = c("space", "punct", "case", "ascii")) {
  str_ <- .str

  if ("ascii" %in% .op) {
    str_ <- stringi::stri_trans_general(str_, "Latin-ASCII")
  }

  if ("punct" %in% .op) {
    str_ <- trimws(stringi::stri_replace_all_regex(str_, "\\W", " "))
    str_ <- trimws(stringi::stri_replace_all_regex(str_, "[[:punct:]]", " "))

    if (!"space" %in% .op) {
      str_ <- trimws(stringi::stri_replace_all_regex(str_, "([[:blank:]]|[[:space:]])+", " "))
    }
  }

  if ("space" %in% .op) {
    str_ <- trimws(stringi::stri_replace_all_regex(str_, "([[:blank:]]|[[:space:]])+", " "))
  }

  if ("case" %in% .op) {
    str_ <- toupper(str_)
  }

  return(str_)
}
MatthiasUckert/Rmatch documentation built on Jan. 3, 2022, 11:09 p.m.