R/col_converters.R

Defines functions convert_to_numeric validate_categorical get_categories convert_to_categorical convert_to_logical convert_to_date

#' Convert a vector to numeric with some tolerance to errors / local variations
#'
#' Convert a character vector to numeric. Coma are replaced by dots and spaces
#' are removed to also work with some european countries. Coma used as thousand
#' separators will result in missing values
#'
#' @param x a character vector
#'
#' @return x converted as numeric
convert_to_numeric <- function(x) {
  # remove white spaces
  x <- stringr::str_remove_all(x, " ")
  # replace comma with dots
  x <- stringr::str_replace_all(x, stringr::fixed(","), ".")

  # return x as numeric
  as.numeric(x)
}

#' Validate the categories in a character vector
#'
#' @param x a character vector
#' @param categories a character vector with the authorised values for x
#'
#' @return x with the unauthorized values set to NA
validate_categorical <- function(x, categories) {
  unauthorized <- which(!(x %in% categories))

  # set the unauthorized values to NA
  x[unauthorized] <- NA

  # return x as character
  x
}

#' Get a categories string and split it into a character vector
#'
#' @param categories a string separated by comas with somtimes spaces
#'
#' @return a character vector of categories
get_categories <- function(categories) {
  stringr::str_split(categories, ",") %>%
  purrr::flatten_chr() %>%
  stringr::str_trim(side = "both")
}

#' Process a categorical vector
#'
#' @param x a character vector
#' @param categories a string separated by comas with somtimes spaces
#'
#' @return x with the unauthorized values set to NA
convert_to_categorical <- function(x, categories) {
  # extract the categories
  categories <- get_categories(categories)

  # validate
  validate_categorical(x, categories)
}

#' Convert a vector to logical
#'
#' Convert a vector to logical after having uppercased all values
#'
#' @param x a character vector
#'
#' @return x converted to logical
convert_to_logical <- function(x) {
  # ensure that every value is lowercase
  x <- toupper(x)

  # return x as a logical vector
  as.logical(x)
}

#' Convert a vector to date / date_time
#'
#' Convert a vector to date / date_time given a lubridate function name
#'
#' @param x a character vector
#' @param lbrdte_fn the name of a lubridate function as string
#'
#' @return x converted to logical
convert_to_date <- function(x, lbrdte_fn) {
  # get the date_parser depending on the `lbrdte_fn`
  date_parser <- getExportedValue("lubridate", lbrdte_fn)

  # parse then return the dates
  date_parser(x)
}
AdrienLeGuillou/autostudy documentation built on May 16, 2019, 8:15 p.m.