Nothing
# R/detect.R
# Detection and Analysis Tools functions
#' Detect likely typo type
#'
#' @param typo Character. The typo'd word
#' @param intended Character. The intended word (if known)
#' @return List with possible_types and confidence scores
#' @export
#' @examples
#' detect_typo_type("tesy", "test")
detect_typo_type <- function(typo, intended = NULL) {
types <- character(0)
scores <- numeric(0)
if (is.null(intended)) {
# Can only do basic analysis
return(list(possible_types = "unknown", confidence = 0))
}
typo_chars <- strsplit(tolower(typo), "")[[1]]
intended_chars <- strsplit(tolower(intended), "")[[1]]
len_diff <- length(typo_chars) - length(intended_chars)
# Check for double letter
if (len_diff == 1) {
for (i in seq_len(length(typo_chars) - 1)) {
if (typo_chars[i] == typo_chars[i + 1]) {
# Found doubled char
types <- c(types, "double")
scores <- c(scores, 0.8)
break
}
}
}
# Check for omission
if (len_diff == -1) {
types <- c(types, "omission")
scores <- c(scores, 0.8)
}
# Check for keyboard adjacency
if (len_diff == 0) {
for (i in seq_along(typo_chars)) {
if (typo_chars[i] != intended_chars[i]) {
if (is_keyboard_adjacent(typo_chars[i], intended_chars[i])) {
types <- c(types, "keyboard")
scores <- c(scores, 0.9)
break
}
}
}
}
# Check for phonetic similarity
if (len_diff == 0) {
for (i in seq_along(typo_chars)) {
if (typo_chars[i] != intended_chars[i]) {
if (typo_chars[i] %in% names(consonant_pairs) &&
consonant_pairs[[typo_chars[i]]] == intended_chars[i]) {
types <- c(types, "phonetic")
scores <- c(scores, 0.85)
break
}
}
}
}
# Check for swap
if (len_diff == 0 && length(typo_chars) >= 2) {
for (i in seq_len(length(typo_chars) - 1)) {
swapped <- typo_chars
tmp <- swapped[i]
swapped[i] <- swapped[i + 1]
swapped[i + 1] <- tmp
if (identical(swapped, intended_chars)) {
types <- c(types, "swap")
scores <- c(scores, 0.9)
break
}
}
}
if (length(types) == 0) {
types <- "unknown"
scores <- 0
}
list(possible_types = types, confidence = scores)
}
#' Suggest corrections for a typo
#'
#' @param typo Character. The typo'd word
#' @param dictionary Character vector. Optional custom dictionary
#' @return Character vector of suggested corrections
#' @export
#' @examples
#' suggest_corrections("tesy")
suggest_corrections <- function(typo, dictionary = NULL) {
if (is.null(dictionary)) {
# Use a minimal built-in dictionary
dictionary <- c("test", "testing", "tested", "text", "best", "rest",
"the", "this", "that", "them", "then", "there",
"coverage", "president", "political", "government")
}
# Calculate edit distance to each word
distances <- vapply(dictionary, function(word) {
.edit_distance(tolower(typo), tolower(word))
}, numeric(1))
# Return words within distance 2
close_words <- dictionary[distances <= 2]
if (length(close_words) == 0) {
return(character(0))
}
# Sort by distance
close_words[order(distances[distances <= 2])]
}
# Internal: simple edit distance (Levenshtein)
.edit_distance <- function(s1, s2) {
n1 <- nchar(s1)
n2 <- nchar(s2)
if (n1 == 0) return(n2)
if (n2 == 0) return(n1)
# Initialize matrix
d <- matrix(0, nrow = n1 + 1, ncol = n2 + 1)
d[, 1] <- 0:n1
d[1, ] <- 0:n2
chars1 <- strsplit(s1, "")[[1]]
chars2 <- strsplit(s2, "")[[1]]
for (i in seq_len(n1)) {
for (j in seq_len(n2)) {
cost <- if (chars1[i] == chars2[j]) 0 else 1
d[i + 1, j + 1] <- min(
d[i, j + 1] + 1, # deletion
d[i + 1, j] + 1, # insertion
d[i, j] + cost # substitution
)
}
}
d[n1 + 1, n2 + 1]
}
#' Analyze a gaffe comprehensively
#'
#' @param gaffe Character. The typo/gaffe
#' @param intended Character. The intended word
#' @return List with comprehensive analysis
#' @export
#' @examples
#' analyze_gaffe("covfefe", "coverage")
analyze_gaffe <- function(gaffe, intended) {
typo_analysis <- detect_typo_type(gaffe, intended)
list(
gaffe = gaffe,
intended = intended,
distance = .edit_distance(tolower(gaffe), tolower(intended)),
typo_types = typo_analysis$possible_types,
confidence = typo_analysis$confidence,
phonetic_similarity = 1 - phonetic_distance(gaffe, intended) / 4,
keyboard_plausible = .check_keyboard_plausibility(gaffe, intended)
)
}
# Internal: check if typo is keyboard-plausible
.check_keyboard_plausibility <- function(typo, intended) {
typo_chars <- strsplit(tolower(typo), "")[[1]]
intended_chars <- strsplit(tolower(intended), "")[[1]]
if (length(typo_chars) != length(intended_chars)) {
return(FALSE)
}
different_positions <- which(typo_chars != intended_chars)
if (length(different_positions) == 0) return(TRUE)
all(vapply(different_positions, function(i) {
is_keyboard_adjacent(typo_chars[i], intended_chars[i])
}, logical(1)))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.