R/phonetic.R

Defines functions get_phonetic_group phonetic_distance soundex vowel_shift phonetic_shift

Documented in get_phonetic_group phonetic_distance phonetic_shift soundex vowel_shift

# R/phonetic.R
# Phonetic Transformation Engine functions

#' Shift consonant to phonetically similar consonant
#'
#' @param word Character. Word to transform
#' @param target Character. Consonant to shift (if found)
#' @return Character. Transformed word
#' @export
#' @examples
#' phonetic_shift("coverage", "v")  # "coferage"
phonetic_shift <- function(word, target) {
  target <- tolower(target)
  if (!target %in% names(consonant_pairs)) {
    return(word)
  }
  replacement <- consonant_pairs[[target]]
  gsub(target, replacement, word, fixed = TRUE)
}

#' Shift vowels in a word
#'
#' @param word Character. Word to transform
#' @param from Character. Vowel to replace
#' @param to Character. Replacement vowel
#' @return Character. Transformed word
#' @export
#' @examples
#' vowel_shift("test", "e", "i")  # "tist"
vowel_shift <- function(word, from, to) {
  gsub(from, to, word, fixed = TRUE)
}

#' Calculate Soundex code
#'
#' American Soundex algorithm for phonetic encoding.
#'
#' @param word Character. Word to encode
#' @return Character. 4-character Soundex code
#' @export
#' @examples
#' soundex("Robert")     # "R163"
#' soundex("Rupert")     # "R163" (same as Robert)
#' soundex("Washington") # "W252"
soundex <- function(word) {
  word <- toupper(word)
  chars <- strsplit(word, "")[[1]]

  if (length(chars) == 0) return("")

  # Keep first letter
  first <- chars[1]
  rest <- chars[-1]

  # Convert to codes
  codes <- vapply(tolower(rest), function(c) {
    if (c %in% names(soundex_map)) soundex_map[c] else ""
  }, character(1), USE.NAMES = FALSE)

  # Remove adjacent duplicates
  codes <- codes[codes != ""]
  if (length(codes) > 1) {
    keep <- c(TRUE, codes[-1] != codes[-length(codes)])
    codes <- codes[keep]
  }

  # Pad/truncate to 3 digits
  codes <- c(codes, "0", "0", "0")[1:3]

  paste0(first, paste(codes, collapse = ""))
}

#' Calculate phonetic distance between words
#'
#' Uses Soundex codes to measure phonetic similarity.
#'
#' @param word1 Character. First word
#' @param word2 Character. Second word
#' @return Numeric. Distance (0 = identical, higher = more different)
#' @export
#' @examples
#' phonetic_distance("coverage", "covfefe")
phonetic_distance <- function(word1, word2) {
  code1 <- soundex(word1)
  code2 <- soundex(word2)

  # Simple character difference count
  chars1 <- strsplit(code1, "")[[1]]
  chars2 <- strsplit(code2, "")[[1]]

  sum(chars1 != chars2)
}

#' Get phonetic group for a consonant
#'
#' @param consonant Character. Single consonant
#' @return Character. Group name (labial, dental, velar, etc.)
#' @export
#' @examples
#' get_phonetic_group("b")  # "labial"
get_phonetic_group <- function(consonant) {
  consonant <- tolower(consonant)
  for (group_name in names(phonetic_groups)) {
    if (consonant %in% phonetic_groups[[group_name]]) {
      return(group_name)
    }
  }
  NA_character_
}

Try the covfefe package in your browser

Any scripts or data that you put into this service are public.

covfefe documentation built on Jan. 26, 2026, 5:08 p.m.