#' Yet (another urlencode compatible) encoding scheme
#'
#' @description
#' Encode and decode using an encoding scheme that is a superset of the
#' [utils::URLencode()] encoding. With default settings, `yencode()` and
#' `ydecode()` produce strings that are fully compatible with `urlencode`
#' encoded strings. However, these functions allow a custom whitelist of symbols
#' that should not be escaped by the encoding process, and a configurable escape
#' character to use in place of the `%` symbol, for example to work with storage
#' layers that do not like the `%` symbol.
#'
#' The `yencoder()` and `ydecoder()` functions are convenience function, which
#' return the corresponding a function with the escape and whitelist already
#' set, allowing easy use in contexts that expect a single-argument function.
#'
#' @details
#' In addition to the supplied white-list, `A-Z`, `a-z`, and `0-9` are always
#' white-listed. There are no restrictions on the white-list, except that the
#' escape character must not be part of it (and will be removed from it with a
#' warning). Of course, it is important that the underlying storage layer
#' handles all white-listed characters gracefully.
#'
#' Note that any `ascii` letter or number will work perfectly fine as an escape
#' character, the output will be well-formed and decoded correctly, even if some
#' of them, such as `1` will result in escape sequences that contain the letter
#' itself.
#'
#' In particular, `yencoder("Z", whitelist="")` returns a encoder that will
#' encode any string to a pure `A-Z`, `a-z`, and `0-9` representation, suitable
#' for extremely limited storage layers (it will encode `Z` as `Z5A`).
#'
#' @param string The string to process.
#' @param escape The escape character to use.
#' @param whitelist Any characters that should not be escaped. See details.
#' @return The processed (encoded or decoded) string.
#'
#' @md
#' @export
yencode <- function(string, escape="%", whitelist=c("._~-", "][!$&'()*+,;=:/?@#")) {
# The escape string must be one single-byte character
stopifnot( is.character(escape) ,
length(escape) == 1 ,
length(charToRaw(escape)) == 1 )
# Treat NULL or NA whitelist as empty string, but other types should error
if (is.null(whitelist)) { whitelist <- "" }
whitelist[is.na(whitelist)] <- ""
stopifnot(is.character(whitelist))
# The following characters are always whitelisted and cannot be escaped
whitelist_core <- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
whitelist_final <- paste0(whitelist_core, paste0(whitelist, collapse=""))
# The escape character must NOT be in the final whitelist
if (stringr::str_detect(whitelist_final, stringr::fixed(escape))) {
warning(paste0("The escape character (", escape, ") cannot be present in the whitelist,\n",
"and has been removed. The results are, strictly speaking, well-specified\n",
"and robust, but you should be sure you know what you are doing."))
whitelist_final <- stringr::str_remove(whitelist_final, stringr::fixed(escape))
}
# Paste whitelists together, then remove duplicates, and escape any special chars
wl <- whitelist_final |>
stringr::str_split("") |>
unlist() |>
# The following could probably be vectorized in pattern to optimize replacement
stringr::str_replace("\\/","\\\\/") |>
stringr::str_replace("\\-","\\\\-") |>
stringr::str_replace("\\]","\\\\]") |>
stringr::str_replace("\\[","\\\\[") |>
paste0(collapse = "")
# Some defensive programming, the above should result in character of length 1
stopifnot(length(wl) == 1)
# Final bit of defensive programming, the escape character byte must not
# be an intra-char byte in any of the whitelist chars
if (charToRaw(escape) %in% charToRaw(wl)) {
stop(paste0("The escape character byte (", charToRaw(escape), ") is present within,\n",
"a character in this whitelist. The offending whitelist character\n",
"must be removed manually."))
}
vapply(string, function(string) {
OK <- paste0("[^", wl, "]")
x <- strsplit(string, "")[[1L]]
#print(x)
#z <- grep(OK, x) # base R regexp syntax not standard?
z <- stringr::str_detect(x,OK)
#print(z)
if (length(z)) {
y <- vapply(x[z], function(x) paste0(escape, toupper(as.character(charToRaw(x))),
collapse = ""), "")
x[z] <- y
}
paste(x, collapse = "")
}, FUN.VALUE = character(1), USE.NAMES = FALSE)
}
#' @rdname yencode
#' @md
#' @export
yencoder <- function(escape="%", whitelist=c("._~-", "][!$&'()*+,;=:/?@#")) {
# We trigger the "escape in whitelist" warning once on creation, but
# then suppress that specific warning in the subsequent calls to
# the encoder function
yencode("", escape, whitelist)
function(string) {
suppress_warnings(
yencode(string, escape, whitelist),
"The escape character .* cannot be present in the whitelist")
}
}
#' @rdname yencode
#' @md
#' @export
ydecode <- function (string, escape = "%")
{
vapply(string, function(string) {
x <- charToRaw(string)
pc <- charToRaw(escape)
out <- raw(0L)
i <- 1L
while (i <= length(x)) {
if (x[i] != pc) {
out <- c(out, x[i])
i <- i + 1L
}
else {
y <- as.integer(x[i + 1L:2L])
y[y > 96L] <- y[y > 96L] - 32L
y[y > 57L] <- y[y > 57L] - 7L
y <- sum((y - 48L) * c(16L, 1L))
out <- c(out, as.raw(as.character(y)))
i <- i + 3L
}
}
rawToChar(out)
}, character(1), USE.NAMES = FALSE)
}
#' @rdname yencode
#' @md
#' @export
ydecoder <- function(escape="%") {
function(string) {
ydecode(string, escape)
}
}
#' Suppress warnings that match specific regular expressions
#'
#' Based on [this stack overflow answer](https://stackoverflow.com/questions/16517795/selective-suppresswarnings-that-filters-by-regular-expression/55182432#55182432),
#' except it does not allow function arguments, only regular expressions. This removes the dependency on non-base-R packages.
#'
#' @md
#' @keywords internal
suppress_warnings <- function(expr, regexp) {
eval.parent(substitute(
withCallingHandlers( expr, warning = function(w) {
cm <- conditionMessage(w)
cond <- grepl(regexp, cm)
if (cond) {
invokeRestart("muffleWarning")
}
})
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.