# -*- encoding:ASCII -*-
##################
# function tidy_rname
# Created the 2014-09-26 by Joris Muller
##################
## documentation ------------------------------------------------
#' @title Transform char to rname
#'
#' @description Transform a character vector to another one with good names for data.frames header following several rules.
#'
#'
#' @param char A character vector with the original columns names.
#' @param ascii Boolean. Only produce ASCII char. Default is \code{TRUE}.
#' @param no_dots Boolean. Replace dots by underscores. Default is \code{TRUE}.
#' @param max_size Integer. Number max of character. If 0, no limits. Will not cut words < of 8 letters. Default is 16. Will keep the first letters and the last ones (a least 3)
#' @param no_duplicate Boolean. If there is a duplicate, add a number at the end of the name.
#' @param lowercase Boolean. Change to lower cases. Default is \code{TRUE}.
#' @details The rules applied are :
#' \enumerate{
#' \item rules from official doc : use the function \code{\link{make.names}}
#' \item rules of good practice
#' \itemize{
#' \item only ASCII character
#' \item don't use dots because they have a special meaning in R
#' \item avoid duplicates
#' \item don't use to long name (less than 16 char).
#' \item should be only lower cases
#' }
#' }
#' @return A character vector.
#' @seealso This function is used by \code{\link{descvars_skeleton}} to produce
#' the column rname. The base \code{\link{make.names}} function does a part of
#' the work.
#' @export
#' @author Joris Muller
#' @examples
#' bad_names <- c("white space", "multiples...dots..", "duplicate",
#' "duplicate", "$pécial char", "looooooooognameeeeeeee")
#' transformed_names <- tidy_rname(bad_names)
#' transformed_names
#'
tidy_rname <- function(char, ascii = TRUE, no_dots = TRUE, no_duplicate = TRUE,
max_size = 16, lowercase = TRUE){
# char must be a charcter vector
if(class(char) != "character") stop("char must be a character vector")
# Always perform a make.names
clean_names <- make.names(char, unique = no_duplicate)
# Transform to ASCII if asked
if (ascii) {
clean_names <- ascii_text <- iconv(clean_names, to="ASCII//TRANSLIT")
#remove all unecessary char.
clean_names <- gsub("[^a-z A-Z 0-9. _]", replacement = "", x = clean_names)
}
# Transform to lowercases
if (lowercase) {
clean_names <- tolower(clean_names)
}
# Remove dots because they are used for non compatible characters
if (no_dots) {
clean_names <- gsub(pattern = "\\.+", replacement = "_", x = clean_names)
# remove dots if at the end
clean_names <- gsub(pattern = "_+$", replacement = "", x = clean_names)
# remove dots if at the begining. Because not allowed at the begining.
clean_names <- gsub(pattern = "^_+", replacement = "", x = clean_names)
}
# Shorten the name if too long
if (max_size > 0) {
clean_names <- shorten(char = clean_names, max_size = max_size)
}
return(clean_names)
}
#' @title Shorten names
#'
#' @description Shorten names in a character vector, keeping the begining and the end
#'
#'
#' @param char A character vector with the names.
#' @param max_size Integer. Number max of character. If 0, no limits.
#' Will not cut words < of 8 letters. Default is 16.
#' Will keep the first letters and the last ones (a least 3)
#' @param tail_keep Integer. Number max of character to keep at the end of
#' the words. If 0, don't keep any character. Must be < to max_size.
#' Default is 3.
#' @details The function keep
#' @return A character vector with shorten names.
#' @seealso This function is used by \code{\link{tidy_rname}} to produce
#' the short enough names.
#' @export
#' @keywords internal
#' @author Joris Muller
#' @examples
#' bad_names <- c("short", "looooooooognameeeeeeee")
#' transformed_names <- shorten(bad_names)
#' transformed_names
shorten <- function(char, max_size = 16, tail_keep = 3) {
size <- nchar(char)
new_char <- ifelse(test = (size > max_size), yes = {
# calculate the number of leading character
# max_size - tail_keep - 1 (for the _)
nb_leadings <- max_size - tail_keep - 1
leadings_chars <- substr(x = char, start = 1, stop = nb_leadings)
# Calcultate the numbre of tail characters
# Risk of overlapping if nb_leading + tail_keep + 1 > size
# TODO: fix it
tail_chars <- substr(x = char, start = size - tail_keep + 1, stop = size)
paste(leadings_chars, tail_chars, sep = "_")
}, no = {
char
}
)
return(new_char)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.