```
#' @title Combine single letter words
#'
#' @description A function to combine words in which the letters have been separated with spaces to avoid content moderation. It was inspired by Hosseini et al.'s paper on deceiving the Perspective API, available here: https://arxiv.org/pdf/1702.08138.pdf
#' @param pattern vector containing one or more regular expressions (or character string for fixed = TRUE) to be matched in the given character vectors.
#' @param replacement vector containing one or more replacements for matched pattern. Must be the same length as pattern.
#' @param x a character vector where matches are sought, or an object which can be coerced by as.character to a character vector.
#' @return vector with substituted values
#' @export
slw = function(temp){
if(class(temp) != "character"){
stop('class is not \'character\'')
}
temp.original = unname(temp)
temp = tolower(temp)
temp = gsub(x = temp,
pattern = "\\s+",
replacement = " ")
temp
# convert the letters and numbers and punctuation to 'a' values
temp.a = gsub(pattern = "[[:punct:]A-z0-9]", # http://www.endmemo.com/program/R/gsub.php - should be all punctuation, letters and numbers
replacement = 'a', # A-z0-9
x = temp)
temp.a
temp.a = paste0(temp.a, ' aaaa') # add this in because the code has an issue with the final value -> and it won't mess up the indexing. Before I was adding + 1 to the final item to be manipulated, but that as messing up other code
# index where the spaces occur in the string
index.slw = gregexpr(pattern = ' ',
text = temp.a)
index.slw = unlist(index.slw)
index.slw # these are the locations of every space, i.e. between every word/string of letters
# what is the size of the spaces between the values? # 2 indicates a single letter, anything more indicates a legit string
diff.slw = diff(index.slw)
diff.slw
# which of the spaces are just of size 2?
which.2 = which(unlist(gregexpr(pattern = '^2$',
text = diff.slw)) == 1)
if(length(which.2) == 0){
print('no floating letters have been found! Returning lowered text')
return(temp)
}
which.2
# which of the entries are consecutive? (i.e. we know there is a problem)
which.2.consecutive = (which(diff(which.2) == 1) + 1) # add one because it tells us which are consecutive from the one before - i.e. this way the values line up
which.2.consecutive
if(length(which.2.consecutive) == 0){
print('no strings of floating letters have been found! Floating letters might be legit. Returning lowered text')
return(temp)
}
# where do the breaks fall to indicate a new 'run' of single letters?
which.2.new = (which(diff(which.2.consecutive) != 1) + 1) # add one, again because we want to know where the consecutive value ends not 'lands'
which.2.new = unique(c(1, which.2.new)) # 1 is not necessarily included - we know that these are all consecutive single letters, so it is fine to manually add this in
which.2.new
# get the ranges for each of the 'runs' of consecutive 2s
list.ranges = list()
for (i in 1:length(which.2.new)){
#print(i)
if(i != length(which.2.new)){
list.ranges[[i]] = (which.2.new[i]:(which.2.new[i+1]-1))
} else {
list.ranges[[i]] = which.2.new[i]:length(which.2.consecutive)
}
# print(list.ranges[i])
}
list.ranges # the runs of consecutive 2s split up into separate items in a list - pretty simple!
# now actually work out which values to swap out
save.combine = list()
for(j in 1:length(list.ranges)){
#print(j)
#if(length(list.ranges[[i]] > 1)){
temp.range = c(min(which.2.consecutive[list.ranges[[j]]]) - 1,
which.2.consecutive[list.ranges[[j]]]) # add back on one below the minimum value
temp.range
temp.range = c(which.2[temp.range],
max(which.2[temp.range]) + 1) # add back on one above the max value (to account for the diff calculations)
temp.range
save.combine[[j]] = index.slw[temp.range]
#} else {
#}
rm(temp.range)}
save.combine
# Now, for the save.combine values, get the identified single letter words, and the equivalent squashed together text
list.in = list()
list.out = list()
for (k in 1:length(save.combine)){
#print(k)
#print(save.combine[k])
list.in[k] = substr(x = temp, # we now use the original temp object rather than temp.a -> with temp we have the position of letters and now we actually swap them round
start = min(range(save.combine[k])),
stop = max(range(save.combine[k])))
list.out[k] = paste0(' ',
trimws(gsub(pattern = ' ',
replacement = '',
x = list.in[k])),
' ') }
list.in; list.out
# Swap out the values
for(i in 1:length(list.in)){
#print(i)
#print(list.out[[i]])
#print(list.in[[i]])
temp = gsub(pattern = list.in[[i]],
replacement = list.out[[i]],
x = temp)
}
temp = trimws(temp)
temp = unname(temp)
return(temp)
}
```

Embedding an R snippet on your website

Add the following code to your website.

For more information on customizing the embed code, read Embedding Snippets.