R/uniqueness_vec.R

Defines functions uniqueness_vec

# Generated by fusen: do not edit by hand

#' Uniquness/Rarity of a vector 
#' 
#' Description
#' 
#' @param .vec A character vector
#' @param .normalize Normalize between 0 and 1
#'
#' @return A numeric vector
#' 
#' @noRd
#' @examples
#' mean(uniqueness_vec(table_source[["name"]], TRUE), na.rm = TRUE) 
#' mean(uniqueness_vec(table_source[["iso3"]], TRUE), na.rm = TRUE)
#' mean(uniqueness_vec(table_source[["city"]], TRUE), na.rm = TRUE)
#' mean(uniqueness_vec(table_source[["address"]], TRUE), na.rm = TRUE)
uniqueness_vec <- function(.vec, .normalize = FALSE) {
  value <- name <- n <- NULL
  
  l1_ <- stringi::stri_split_fixed(.vec, " ")
  v1_ <- unlist(l1_)
  v1_ <- as.integer(stats::ave(v1_, v1_, FUN = length))
  l1_ <- utils::relist(v1_, l1_)
  v1_ <- purrr::map_dbl(l1_, ~ mean(.x, na.rm = TRUE))
  
  v2_ <- as.integer(stats::ave(.vec, .vec, FUN = length))
  
  1 / ((v1_ + v2_) / 2)
}
MatthiasUckert/Rmatch documentation built on Jan. 3, 2022, 11:09 p.m.