R/blend.R

Defines functions blend_score detect_blend .blend_naturalness blend_words

Documented in blend_score blend_words detect_blend

# R/blend.R
# Word Blending Engine functions

#' Blend two words into a portmanteau
#'
#' Combines the beginning of word1 with the end of word2.
#'
#' @param word1 Character. First word (contributes beginning)
#' @param word2 Character. Second word (contributes ending)
#' @param overlap Integer. Desired overlap characters (auto-detected if NULL)
#' @return Character. Blended word
#' @export
#' @examples
#' blend_words("breakfast", "lunch")  # "brunch" or similar
#' blend_words("refute", "repudiate") # "refudiate" or similar
blend_words <- function(word1, word2, overlap = NULL) {
  word1 <- tolower(word1)
  word2 <- tolower(word2)

  n1 <- nchar(word1)
  n2 <- nchar(word2)

  if (is.null(overlap)) {
    # Find best overlap point
    best_score <- 0
    best_blend <- ""

    for (split1 in 2:(n1-1)) {
      prefix <- substr(word1, 1, split1)

      for (split2 in 2:(n2-1)) {
        suffix <- substr(word2, split2, n2)

        # Check for overlapping characters
        end_prefix <- substr(word1, split1, split1)
        start_suffix <- substr(word2, split2, split2)

        # Score based on phonetic similarity at join point
        if (end_prefix == start_suffix) {
          blend <- paste0(prefix, substr(suffix, 2, nchar(suffix)))
        } else {
          blend <- paste0(prefix, suffix)
        }

        # Score: prefer blends that sound natural
        score <- .blend_naturalness(blend, word1, word2)

        if (score > best_score) {
          best_score <- score
          best_blend <- blend
        }
      }
    }

    if (best_blend == "") {
      # Fallback: simple concatenation at midpoint
      best_blend <- paste0(
        substr(word1, 1, ceiling(n1/2)),
        substr(word2, ceiling(n2/2), n2)
      )
    }

    return(best_blend)
  }

  # Manual overlap
  paste0(
    substr(word1, 1, n1 - overlap),
    substr(word2, overlap + 1, n2)
  )
}

# Internal: score blend naturalness
.blend_naturalness <- function(blend, word1, word2) {
  # Prefer shorter blends
  len_score <- 1 / (1 + abs(nchar(blend) - (nchar(word1) + nchar(word2)) / 2))

  # Prefer blends that preserve beginnings/endings
  start_match <- startsWith(blend, substr(word1, 1, 2))
  end_match <- endsWith(blend, substr(word2, nchar(word2) - 1, nchar(word2)))

  len_score + as.numeric(start_match) + as.numeric(end_match)
}

#' Detect possible source words for a blend
#'
#' @param blend Character. The blended word
#' @param candidates Character vector. Possible source words
#' @return Character vector. Most likely source words
#' @export
#' @examples
#' detect_blend("brunch", c("breakfast", "lunch", "dinner"))
detect_blend <- function(blend, candidates) {
  blend <- tolower(blend)
  candidates <- tolower(candidates)

  # Score each candidate by character overlap
  scores <- vapply(candidates, function(cand) {
    # Check prefix overlap
    prefix_match <- 0
    for (i in seq_len(min(nchar(blend), nchar(cand)))) {
      if (substr(blend, 1, i) == substr(cand, 1, i)) {
        prefix_match <- i
      }
    }

    # Check suffix overlap
    suffix_match <- 0
    for (i in seq_len(min(nchar(blend), nchar(cand)))) {
      if (substr(blend, nchar(blend) - i + 1, nchar(blend)) ==
          substr(cand, nchar(cand) - i + 1, nchar(cand))) {
        suffix_match <- i
      }
    }

    max(prefix_match, suffix_match)
  }, numeric(1))

  # Return top 2 candidates
  top_idx <- order(scores, decreasing = TRUE)[1:min(2, length(candidates))]
  candidates[top_idx[scores[top_idx] > 1]]
}

#' Score how well a word is a blend of two others
#'
#' @param blend Character. Potential blend
#' @param word1 Character. First source word
#' @param word2 Character. Second source word
#' @return Numeric. Score from 0 (poor blend) to 1 (perfect blend)
#' @export
#' @examples
#' blend_score("brunch", "breakfast", "lunch")
blend_score <- function(blend, word1, word2) {
  blend <- tolower(blend)
  word1 <- tolower(word1)
  word2 <- tolower(word2)

  # Check how much of word1's start is in blend
  start_overlap <- 0
  for (i in seq_len(min(nchar(blend), nchar(word1)))) {
    if (substr(blend, 1, i) == substr(word1, 1, i)) {
      start_overlap <- i
    }
  }

  # Check how much of word2's end is in blend
  end_overlap <- 0
  for (i in seq_len(min(nchar(blend), nchar(word2)))) {
    if (substr(blend, nchar(blend) - i + 1, nchar(blend)) ==
        substr(word2, nchar(word2) - i + 1, nchar(word2))) {
      end_overlap <- i
    }
  }

  # Normalize
  max_possible <- (nchar(word1) + nchar(word2)) / 2
  min(1, (start_overlap + end_overlap) / max_possible)
}

Try the covfefe package in your browser

Any scripts or data that you put into this service are public.

covfefe documentation built on Jan. 26, 2026, 5:08 p.m.