R/syllable.R

Defines functions syllable_swap onset_nucleus_coda syllable_count syllabify

Documented in onset_nucleus_coda syllabify syllable_count syllable_swap

# R/syllable.R
# Syllable Engine functions

# Internal: vowel pattern
.vowels <- c("a", "e", "i", "o", "u", "y")
.vowel_pattern <- "[aeiouy]+"

#' Split word into syllables
#'
#' Uses a simplified vowel-based algorithm.
#'
#' @param word Character. Word to syllabify
#' @return Character vector of syllables
#' @export
#' @examples
#' syllabify("hello")     # c("hel", "lo")
#' syllabify("coverage")  # c("cov", "er", "age")
syllabify <- function(word) {
  word <- tolower(word)

  # Find vowel positions
  chars <- strsplit(word, "")[[1]]
  n <- length(chars)
  is_vowel <- chars %in% .vowels

  if (!any(is_vowel)) return(word)

  # Find vowel groups (nuclei) - consecutive vowels form one nucleus
  vowel_positions <- which(is_vowel)

  # Group consecutive vowels
  groups <- list()
  current_group <- vowel_positions[1]
  for (i in seq_along(vowel_positions)[-1]) {
    if (vowel_positions[i] == vowel_positions[i-1] + 1) {
      current_group <- c(current_group, vowel_positions[i])
    } else {
      groups <- c(groups, list(current_group))
      current_group <- vowel_positions[i]
    }
  }
  groups <- c(groups, list(current_group))

  # Build syllables
  syllables <- character(0)
  start <- 1

  for (g in seq_along(groups)) {
    vowel_group <- groups[[g]]
    vowel_end <- max(vowel_group)

    if (g < length(groups)) {
      # Find where to split between this vowel group and the next
      next_vowel_start <- min(groups[[g + 1]])
      consonants_between <- (vowel_end + 1):(next_vowel_start - 1)
      num_consonants <- length(consonants_between)

      is_next_last <- (g + 1) == length(groups)

      if (num_consonants == 0) {
        # No consonants between - split at vowel boundary
        end <- vowel_end
      } else if (num_consonants == 1 && is_next_last) {
        # One consonant before final syllable - goes with next (onset for final)
        end <- vowel_end
      } else if (num_consonants == 1) {
        # One consonant, not before final - stays with current (coda)
        end <- consonants_between[1]
      } else {
        # Multiple consonants - first stays with current, rest go to next
        end <- consonants_between[1]
      }

      syllables <- c(syllables, paste(chars[start:end], collapse = ""))
      start <- end + 1
    } else {
      # Last group - take everything remaining
      syllables <- c(syllables, paste(chars[start:n], collapse = ""))
    }
  }

  # Clean empty syllables
  syllables <- syllables[nchar(syllables) > 0]

  # Handle silent-e pattern: merge single vowel + short consonant-vowel final syllable
  # e.g., "a" + "ge" -> "age" (silent e pattern)
  # Only applies when final syllable is consonant + single vowel (2 chars, ends with vowel)
  if (length(syllables) >= 2) {
    n_syl <- length(syllables)
    second_last <- syllables[n_syl - 1]
    last <- syllables[n_syl]

    # Check if second-to-last is single vowel and last is consonant+vowel (silent-e pattern)
    if (nchar(second_last) == 1 &&
        second_last %in% .vowels &&
        nchar(last) == 2) {
      last_chars <- strsplit(last, "")[[1]]
      # Last syllable is consonant + vowel (like "ge", "ne", "le")
      if (!last_chars[1] %in% .vowels && last_chars[2] %in% .vowels) {
        # Merge: "a" + "ge" -> "age"
        syllables[n_syl - 1] <- paste0(second_last, last)
        syllables <- syllables[-n_syl]
      }
    }
  }

  syllables
}

#' Count syllables in a word
#'
#' @param word Character. Word to count
#' @return Integer. Number of syllables
#' @export
#' @examples
#' syllable_count("hello")  # 2
syllable_count <- function(word) {
  length(syllabify(word))
}

#' Parse syllable into onset, nucleus, coda
#'
#' @param syllable Character. Single syllable
#' @return List with onset, nucleus, coda components
#' @export
#' @examples
#' onset_nucleus_coda("cat")  # list(onset="c", nucleus="a", coda="t")
onset_nucleus_coda <- function(syllable) {
  syllable <- tolower(syllable)
  chars <- strsplit(syllable, "")[[1]]
  is_vowel <- chars %in% .vowels

  vowel_start <- which(is_vowel)[1]
  vowel_end <- tail(which(is_vowel), 1)

  if (is.na(vowel_start)) {
    return(list(onset = syllable, nucleus = "", coda = ""))
  }

  list(
    onset = if (vowel_start > 1) paste(chars[1:(vowel_start-1)], collapse = "") else "",
    nucleus = paste(chars[vowel_start:vowel_end], collapse = ""),
    coda = if (vowel_end < length(chars)) paste(chars[(vowel_end+1):length(chars)], collapse = "") else ""
  )
}

#' Swap syllables between two words
#'
#' @param word1 Character. First word
#' @param word2 Character. Second word
#' @param position Integer. Which syllable to swap (1-indexed)
#' @return Character vector of length 2 with swapped words
#' @export
#' @examples
#' syllable_swap("coverage", "president", 1)
syllable_swap <- function(word1, word2, position = 1L) {
  syl1 <- syllabify(word1)
  syl2 <- syllabify(word2)

  if (position > length(syl1) || position > length(syl2)) {
    warning("Position exceeds syllable count")
    return(c(word1, word2))
  }

  # Swap syllables at position
  tmp <- syl1[position]
  syl1[position] <- syl2[position]
  syl2[position] <- tmp

  c(paste(syl1, collapse = ""), paste(syl2, collapse = ""))
}

Try the covfefe package in your browser

Any scripts or data that you put into this service are public.

covfefe documentation built on Jan. 26, 2026, 5:08 p.m.