Nothing
# R/corrupt.R
# Text Corruption Simulator functions
#' Corrupt text with specified error type
#'
#' @param text Character. Text to corrupt
#' @param type Character. Type of corruption: "keyboard", "phonetic", "double",
#' "truncate", "swap", "omit", "random"
#' @param position Integer. Position for positional corruptions (NULL = random)
#' @return Character. Corrupted text
#' @export
#' @examples
#' set.seed(42)
#' corrupt_text("hello", type = "keyboard")
#' corrupt_text("coverage", type = "truncate", position = 3)
corrupt_text <- function(text, type = "random", position = NULL) {
type <- match.arg(type, c("keyboard", "phonetic", "double", "truncate",
"swap", "omit", "random"))
if (type == "random") {
type <- sample(c("keyboard", "phonetic", "double", "truncate", "swap", "omit"), 1)
}
switch(type,
keyboard = .corrupt_keyboard(text, position),
phonetic = .corrupt_phonetic(text, position),
double = .corrupt_double(text, position),
truncate = .corrupt_truncate(text, position),
swap = .corrupt_swap(text, position),
omit = .corrupt_omit(text, position)
)
}
# Internal corruption functions
.corrupt_keyboard <- function(text, position) {
words <- strsplit(text, " ")[[1]]
if (is.null(position)) position <- sample(seq_along(words), 1)
position <- min(position, length(words))
words[position] <- adjacent_key_typo(words[position])
paste(words, collapse = " ")
}
.corrupt_phonetic <- function(text, position) {
# Swap a consonant for phonetically similar one
consonants <- c("b", "d", "f", "g", "k", "m", "n", "p", "s", "t", "v", "z")
chars <- strsplit(tolower(text), "")[[1]]
consonant_pos <- which(chars %in% consonants)
if (length(consonant_pos) == 0) return(text)
if (is.null(position)) position <- sample(consonant_pos, 1)
else position <- consonant_pos[min(position, length(consonant_pos))]
target <- chars[position]
if (target %in% names(consonant_pairs)) {
chars[position] <- consonant_pairs[[target]]
}
paste(chars, collapse = "")
}
.corrupt_double <- function(text, position) {
chars <- strsplit(text, "")[[1]]
if (is.null(position)) position <- sample(seq_along(chars), 1)
position <- min(position, length(chars))
# Double the character at position
chars <- append(chars, chars[position], after = position)
paste(chars, collapse = "")
}
.corrupt_truncate <- function(text, position) {
words <- strsplit(text, " ")[[1]]
if (length(words) < 2) return(text)
if (is.null(position)) position <- sample(2:(length(words) - 1), 1)
position <- min(position, length(words) - 1)
paste(words[1:position], collapse = " ")
}
.corrupt_swap <- function(text, position) {
chars <- strsplit(text, "")[[1]]
if (length(chars) < 2) return(text)
if (is.null(position)) position <- sample(seq_len(length(chars) - 1), 1)
position <- min(position, length(chars) - 1)
# Swap adjacent characters
tmp <- chars[position]
chars[position] <- chars[position + 1]
chars[position + 1] <- tmp
paste(chars, collapse = "")
}
.corrupt_omit <- function(text, position) {
chars <- strsplit(text, "")[[1]]
if (length(chars) < 2) return(text)
if (is.null(position)) position <- sample(seq_along(chars), 1)
position <- min(position, length(chars))
paste(chars[-position], collapse = "")
}
#' Garble a sentence with random corruptions
#'
#' @param sentence Character. Sentence to garble
#' @param corruption_rate Numeric. Fraction of words to corrupt (0-1)
#' @return Character. Garbled sentence
#' @export
#' @examples
#' set.seed(42)
#' garble_sentence("This is a test", corruption_rate = 0.5)
garble_sentence <- function(sentence, corruption_rate = 0.3) {
words <- strsplit(sentence, " ")[[1]]
n_corrupt <- ceiling(length(words) * corruption_rate)
if (n_corrupt == 0) return(sentence)
positions <- sample(seq_along(words), n_corrupt)
for (pos in positions) {
words[pos] <- corrupt_text(words[pos], type = "random")
}
paste(words, collapse = " ")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.