Nothing
print.dsb = function(x, ...){
cat(x, sep = "\n")
}
#' Simple and powerful string manipulation with the dot square bracket operator
#'
#' Compactly performs many low level string operations. Advanced support for pluralization.
#'
#'
#'
#' @param ... Character scalars that will be collapsed with the argument `sep`. You can use `".[x]"` within each character string to insert the value of `x` in the string. You can add string operations in each `".[]"` instance with the syntax `"'arg'op ? x"` (resp. `"'arg'op ! x"`) to apply the operation `'op'` with the argument `'arg'` to `x` (resp. the verbatim of `x`). Otherwise, what to say? Ah, nesting is enabled, and since there's over 30 operators, it's a bit complicated to sort you out in this small space. But type `dsb("--help")` to prompt an (almost) extensive help.
#' @param frame An environment used to evaluate the variables in `".[]"`.
#' @param sep Character scalar, default is `""`. It is used to collapse all the elements in `...`.
#' @param vectorize Logical, default is `FALSE`. If `TRUE`, Further, elements in `...` are NOT collapsed together, but instead vectorised.
#' @param nest Logical, default is `TRUE`. Whether the original character strings should be nested into a `".[]"`. If `TRUE`, then things like `dsb("S!one, two")` are equivalent to `dsb(".[S!one, two]")` and hence create the vector `c("one", "two")`.
#' @param collapse Character scalar or `NULL` (default). If provided, the resulting character vector will be collapsed into a character scalar using this value as a separator.
#'
#'
#' There are over 30 basic string operations, it supports pluralization, it's fast (e.g. faster than `glue` in the benchmarks), string operations can be nested (it may be the most powerful feature), operators have sensible defaults.
#'
#' See detailed help on the console with `dsb("--help")`. The real help is in fact in the "Examples" section.
#'
#'
#' @return
#' It returns a character vector whose length depends on the elements and operations in `".[]"`.
#'
#' @examples
#'
#' #
#' # BASIC USAGE ####
#' #
#'
#' x = c("Romeo", "Juliet")
#'
#' # .[x] inserts x
#' dsb("Hello .[x]!")
#'
#' # elements in ... are collapsed with "" (default)
#' dsb("Hello .[x[1]], ",
#' "how is .[x[2]] doing?")
#'
#' # Splitting a comma separated string
#' # The mechanism is explained later
#' dsb("/J. Mills, David, Agnes, Dr Strong")
#'
#' # Nota: this is equivalent to (explained later)
#' dsb("', *'S !J. Mills, David, Agnes, Dr Strong")
#'
#' #
#' # Applying low level operations to strings
#' #
#'
#' # Two main syntax:
#'
#' # A) expression evaluation
#' # .[operation ? x]
#' # | |
#' # | \-> the expression to be evaluated
#' # \-> ? means that the expression will be evaluated
#'
#' # B) verbatim
#' # .[operation ! x]
#' # | |
#' # | \-> the expression taken as verbatim (here ' x')
#' # \-> ! means that the expression is taken as verbatim
#'
#' # operation: usually 'arg'op with op an operation code.
#'
#' # Example: splitting
#' x = "hello dear"
#' dsb(".[' 's ? x]")
#' # x is split by ' '
#'
#' dsb(".[' 's !hello dear]")
#' # 'hello dear' is split by ' '
#' # had we used ?, there would have been an error
#'
#' # By default, the string is nested in .[], so in that case no need to use .[]:
#' dsb("' 's ? x")
#' dsb("' 's !hello dear")
#'
#' # There are 35 string operators
#' # Operators usually have a default value
#' # Operations can be chained by separating them with a comma
#'
#' # Example: default of 's' is ' ' + chaining with collapse
#' dsb("s, ' my 'c!hello dear")
#'
#' #
#' # Nesting
#' #
#'
#' # .[operations ! s1.[expr]s2]
#' # | |
#' # | \-> expr will be evaluated then added to the string
#' # \-> nesting requires verbatim evaluation: '!'
#'
#' dsb("The variables are: .[C!x.[1:4]].")
#'
#' # This one is a bit ugly but it shows triple nesting
#' dsb("The variables are: .[w, C!.[2* ! x.[1:4]].[S, 4** ! , _sq]].")
#'
#' #
#' # Splitting
#' #
#'
#' # s: split with fixed pattern, default is ' '
#' dsb("s !a b c")
#' dsb("' b 's !a b c")
#'
#' # S: split with regex pattern, default is ', *'
#' dsb("S !a, b, c")
#' dsb("'[[:punct:] ]'S !a! b; c")
#'
#' #
#' # Collapsing
#' #
#'
#' # c and C do the same, their default is different
#' # syntax: 's1||s2' with
#' # - s1 the string used for collapsing
#' # - s2 (optional) the string used for the last collapse
#'
#' # c: default is ' '
#' dsb("c?1:3")
#'
#' # C: default is ', || and '
#' dsb("C?1:3")
#'
#' dsb("', || or 'c?1:4")
#'
#' #
#' # Extraction
#' #
#'
#' # x: extracts the first pattern
#' # X: extracts all patterns
#' # syntax: 'pattern'x
#' # Default is '[[:alnum:]]+'
#'
#' x = "This years is... 2020"
#' dsb("x ? x")
#' dsb("X ? x")
#'
#' dsb("'\\d+'x ? x")
#'
#' #
#' # STRING FORMATTING ####
#' #
#'
#' #
#' # u, U: uppercase first/all letters
#'
#' # first letter
#' dsb("u!julia mills")
#'
#' # title case: split -> upper first letter -> collapse
#' dsb("s, u, c!julia mills")
#'
#' # upper all letters
#' dsb("U!julia mills")
#'
#' #
#' # L: lowercase
#'
#' dsb("L!JULIA MILLS")
#'
#' #
#' # q, Q: single or double quote
#'
#' dsb("S, q, C!Julia, David, Wilkins")
#' dsb("S, Q, C!Julia, David, Wilkins")
#'
#' #
#' # f, F: formats the string to fit the same length
#'
#'
#' score = c(-10, 2050)
#' nm = c("Wilkins", "David")
#' dsb("Monopoly scores:\n.['\n'c ! - .[f ? nm]: .[F ? score] US$]")
#'
#' # OK that example may have been a bit too complex,
#' # let's make it simple:
#'
#' dsb("Scores: .[f ? score]")
#' dsb("Names: .[F ? nm]")
#'
#' #
#' # w, W: reformat the white spaces
#' # w: suppresses trimming white spaces + normalizes successive white spaces
#' # W: same but also includes punctuation
#'
#' dsb("w ! The white spaces are now clean. ")
#'
#' dsb("W ! I, really -- truly; love punctuation!!!")
#'
#' #
#' # %: applies sprintf formatting
#'
#' dsb("pi = .['.2f'% ? pi]")
#'
#' #
#' # a: appends text on each item
#' # syntax: 's1|s2'a, adds s1 at the beginning and s2 at the end of the string
#' # It accepts the special values :1:, :i:, :I:, :a:, :A:
#' # These values create enumerations (only one such value is accepted)
#'
#' # appending square brackets
#' dsb("'[|]'a, ' + 'c!x.[1:4]")
#'
#' # Enumerations
#' acad = dsb("/you like admin, you enjoy working on weekends, you really love emails")
#' dsb("Main reasons to pursue an academic career:\n .[':i:) 'a, C ? acad].")
#'
#' #
#' # A: same as 'a' but adds at the begging/end of the full string (not on the elements)
#' # special values: :n:, :N:, give the number of elements
#'
#' characters = dsb("/David, Wilkins, Dora, Agnes")
#' dsb("There are .[':N: characters: 'A, C ? characters].")
#'
#'
#' #
#' # stop: removes basic English stopwords
#' # the list is from the Snowball project: http://snowball.tartarus.org/algorithms/english/stop.txt
#'
#' dsb("stop, w!It is a tale told by an idiot, full of sound and fury, signifying nothing.")
#'
#' #
#' # k: keeps the first n characters
#' # syntax: nk: keeps the first n characters
#' # 'n|s'k: same + adds 's' at the end of shortened strings
#' # 'n||s'k: same but 's' counts in the n characters kept
#'
#' words = dsb("/short, constitutional")
#' dsb("5k ? words")
#'
#' dsb("'5|..'k ? words")
#'
#' dsb("'5||..'k ? words")
#'
#' #
#' # K: keeps the first n elements
#' # syntax: nK: keeps the first n elements
#' # 'n|s'K: same + adds the element 's' at the end
#' # 'n||s'K: same but 's' counts in the n elements kept
#' #
#' # Special values :rest: and :REST:, give the number of items dropped
#'
#' bx = dsb("/Pessac Leognan, Saint Emilion, Marguaux, Saint Julien, Pauillac")
#' dsb("Bordeaux wines I like: .[3K, ', 'C ? bx].")
#'
#' dsb("Bordeaux wines I like: .['3|etc..'K, ', 'C ? bx].")
#'
#' dsb("Bordeaux wines I like: .['3||etc..'K, ', 'C ? bx].")
#'
#' dsb("Bordeaux wines I like: .['3|and at least :REST: others'K, ', 'C ? bx].")
#'
#' #
#' # Ko, KO: special operator which keeps the first n elements and adds "others"
#' # syntax: nKo
#' # KO gives the rest in letters
#'
#' dsb("Bordeaux wines I like: .[4KO, C ? bx].")
#'
#' #
#' # r, R: string replacement
#' # syntax: 's'R: deletes the content in 's' (replaces with the empty string)
#' # 's1 => s2'R replaces s1 into s2
#' # r: fixed / R: perl = TRUE
#'
#' dsb("'e'r !The letter e is deleted")
#'
#' # adding a perl look-behind
#' dsb("'(?<! )e'R !The letter e is deleted")
#'
#' dsb("'e => a'r !The letter e becomes a")
#'
#' dsb("'([[:alpha:]]{3})[[:alpha:]]+ => \\1.'R !Trimming the words")
#'
#' #
#' # *, *c, **, **c: replication, replication + collapse
#' # syntax: n* or n*c
#' # ** is the same as * but uses "each" in the replication
#'
#' dsb("N.[10*c!o]!")
#'
#' dsb("3*c ? 1:3")
#' dsb("3**c ? 1:3")
#'
#' #
#' # d: replaces the items by the empty string
#' # -> useful in conditions
#'
#' dsb("d!I am going to be annihilated")
#'
#' #
#' # ELEMENT MANIPULATION ####
#' #
#'
#' #
#' # D: deletes all elements
#' # -> useful in conditions
#'
#' x = dsb("/I'll, be, deleted")
#' dsb("D ? x")
#'
#' #
#' # i, I: inserts an item
#' # syntax: 's1|s2'i: inserts s1 first and s2 last
#' # I: is the same as i but is 'invisibly' included
#'
#' characters = dsb("/David, Wilkins, Dora, Agnes, Trotwood")
#' dsb("'Heep|Spenlow'i, C ? characters")
#'
#' dsb("'Heep|Spenlow'I, C ? characters")
#'
#'
#' #
#' # PLURALIZATION ####
#' #
#'
#' # There is support for pluralization
#'
#' #
#' # *s, *s_: adds 's' or 's ' depending on the number of elements
#'
#' nb = 1:5
#' dsb("Number.[*s, D ? nb]: .[C ? nb]")
#' dsb("Number.[*s, D ? 2 ]: .[C ? 2 ]")
#'
#' # or
#' dsb("Number.[*s, ': 'A, C ? nb]")
#'
#'
#' #
#' # v, V: adds a verb at the beginning/end of the string
#' # syntax: 'verb'v
#'
#' # Unpopular opinion?
#' brand = c("Apple", "Samsung")
#' dsb(".[V, C ? brand] overrated.")
#' dsb(".[V, C ? brand[1]] overrated.")
#'
#' win = dsb("/Peggoty, Agnes, Emily")
#' dsb("The winner.[*s_, v, C ? win].")
#' dsb("The winner.[*s_, v, C ? win[1]].")
#'
#' # Other verbs
#' dsb(".[' have'V, C ? win] won a prize.")
#' dsb(".[' have'V, C ? win[1]] won a prize.")
#'
#' dsb(".[' was'V, C ? win] unable to come.")
#' dsb(".[' was'V, C ? win[1]] unable to come.")
#'
#' #
#' # *A: appends text depending on the length of the vector
#' # syntax: 's1|s2 / s3|s4'
#' # if length == 1: applies 's1|s2'A
#' # if length > 1: applies 's3|s4'A
#'
#' win = dsb("/Barkis, Micawber, Murdstone")
#' dsb("The winner.[' is /s are '*A, C ? win].")
#' dsb("The winner.[' is /s are '*A, C ? win[1]].")
#'
#' #
#' # CONDITIONS ####
#' #
#'
#' # Conditions can be applied with 'if' statements.",
#' # The syntax is 'type comp value'if(true : false), with
#' # - type: either 'len', 'char', 'fixed' or 'regex'
#' # + len: number of elements in the vector
#' # + char: number of characters
#' # + fixed: fixed pattern
#' # + regex: regular expression pattern
#' # - comp: a comparator:
#' # + valid for len/char: >, <, >=, <=, !=, ==
#' # + valid for fixed/regex: !=, ==
#' # - value: a value for which the comparison is applied.
#' # - true: operations to be applied if true (can be void)
#' # - false: operations to be applied if false (can be void)
#'
#' dsb("'char <= 2'if('(|)'a : '[|]'a), ' + 'c ? c(1, 12, 123)")
#'
#' sentence = "This is a sentence with some longish words."
#' dsb("s, 'char<=4'if(D), c ? sentence")
#'
#' dsb("s, 'fixed == e'if(:D), c ! Only words with an e are selected.")
#'
#' #
#' # ARGUMENTS FROM THE FRAME ####
#' #
#'
#' # Arguments can be evaluated from the calling frame.
#' # Simply use backticks instead of quotes.
#'
#' dollar = 6
#' reason = "glory"
#' dsb("Why do you develop packages? For .[`dollar`*c!$]?",
#' "For money? No... for .[U,''s, c?reason]!", sep = "\n")
#'
#'
#'
#'
#'
dsb = function(..., frame = parent.frame(), sep = "", vectorize = FALSE, nest = TRUE,
collapse = NULL){
check_arg(vectorize, nest, "logical scalar")
check_arg(sep, "character scalar")
check_arg(collapse, "NULL character scalar")
check_arg(frame, "class(environment) l0")
check_arg(..., "vector len(1)")
if(...length() == 0) return("")
sc = sys.call()
if(identical(sc[[2]], "--help")){
msg = c(
"Welcome to dsb help\nUsage: dsb(s) with 's' a character string",
" ",
"BASIC usage ------------|",
" dsb evaluates anything in '.[]' and inserts it in 's'.",
' Ex: if x = "John", then dsb("Hi .[x]!") -> "Hi John!"',
" ",
"STRING OPERATIONS ------|",
" Each .[] instance supports one or more string operations.",
" The syntax is .['arg'op?x] or .['arg'op!x], with:",
" - 'arg' a quoted string used as argument,",
" - op an operator code,",
" - ? or !:",
" + ?: evaluates the expression x",
" + !: takes x as verbatim",
" - x an expression to be evaluated or some verbatim text (no quote needed).",
' Ex: dsb(".[\' + \'c?1:3] = 6") -> "1 + 2 + 3 = 6". 1:3 is collapsed (c) with \' + \'.',
"",
" Using ! instead of ? applies the operation to the *verbatim* of the expression.",
' Ex: dsb(".[\': => 2\'r!1:3] = 6") -> "123 = 6".',
" In the string '1:3', ':' is replaced (r) with '2'.",
"",
" Operations can be chained using comma-separation. The syntax is: .['s1'op1, 's2'op2?x]",
" Evaluations are from left to right.",
' Ex: dsb(".[\': => 2\'r, \'\'s, \' + \'c!1:3] = 6") -> "1 + 2 + 3 = 6',
" 1) '1:3' -> ':' is replaced (r) with '2' -> '123',",
" 2) '123' -> is split (s) with '' -> c('1', '2', '3')",
" 3) c('1', '2', '3') -> is collapsed (c) with ' + ' -> '1 + 2 + 3'",
"",
" Nesting works, but only in verbatim components.",
" Ex: x = c(\"Doe\", \"Smith\")",
" dsb(\"Hi .[' and 'c!John .[x]]\") -> \"Hi John Doe and John Smith\"",
"",
" If nest = TRUE (default), the initial string is implicitly embedded in .[] ",
" and considered as verbatim. This means that operations can be applied from the start.",
" Ex: dsb(\"', 's!a1, b3, d8\") -> c(\"a1\", \"b3\", \"d8\")",
"",
" Operators have default values, so the quoted argument is optional.",
' Ex: dsb("c?1:3") -> "1 2 3". 1:3 is collapsed (c) with \' \', its default.',
"",
"OPERATORS --------------|",
" Below is the list of operators and their default argument when relevant.",
" OPERATOR DEFAULT VALUE ",
"",
" s ' ' split, fixed = TRUE",
" S ', *' split, perl = TRUE",
" x '[[:alnum:]]+' extracts the first pattern, perl = TRUE",
" X '[[:alnum:]]+' extracts all patterns, perl = TRUE",
"",
" Collapse:",
" - collapses a vector with paste(x, collapse = 's'),",
" - use a double pipe to apply a special collapse to the last values,",
" - the syntax is 's1||s2', s2 will be applied to the last 2 values,",
" - .[', || and 'c!1:3] -> \"1, 2 and 3\".",
" c '' collapse",
" C ', || and ' collapse",
"",
" Replication:",
" - use 5* to replicate 5 times,",
" - using quotes also works, like in '5'*.",
" * '1' replicates n times",
" *c '1' replicates n times, then collapses with ''",
" ** '1' replicates n times, each",
" **c '1' replicates n times, each, then collapses with ''",
"",
" Replacement: ",
" - the syntax is 'old => new', 'old=>new' or 'old_=>_new',",
" - if new is missing, it is considered as the empty string,",
" - the default for 'R' is: removing trailing spaces.",
" r '\\n' replacement, fixed = TRUE ",
" R '^[ \\t\\r\\n]+|[ \\t\\r\\n]+$' replacement, perl = TRUE ",
"",
" Operators without arguments:",
" u puts the first letter of the string to uppercase",
" U puts the complete string to uppercase",
" l, L puts the complete string to lowercase",
" q adds single quotes",
" Q adds double quotes",
" f applies format(x)",
" F applies format(x, justify = 'right')",
" w reformat white spaces",
" W reformat white spaces and punctuation",
" d replaces the content with the empty string",
" D removes all elements",
" stop removes basic English stop words",
"",
" sprintf formatting:",
" - applies a formatting via sprintf,",
" - .['.3f'%?x] is equivalent to sprintf('%.3f', x),",
" - .['.2f'% ? pi] -> '3.14'.",
" % No default applies sprintf formatting",
"",
" Keeping a selected number of characters or elements: ",
" - the syntax is nK, 'n'K, 'n|s'K, or 'n||s'K",
" - with 'n' a number and 's' a string,",
" - K keeps the first n elements and drops the rest,",
" - k keeps the first n characters of each element,",
" - optionaly a string 's' can be appended at the end,",
" - but only when the length is greater than n.",
" - 'n|s' simply appends 's' while 'n||s' includes the length of 's'",
" - in the calculation of the final length.",
" - the special markup :rest: and :REST: can be used in 's'.",
" - Ex: .['5||..'k!c('hello', 'bonjour')] -> c('hello', 'bon..')",
" - .['5|..'k!c('hello', 'bonjour')] -> c('hello', 'bonjo..')",
" k NO DEFAULT keeps the first n characters",
" K NO DEFAULT keeps the first n elements",
" Ko NO DEFAULT keeps the first n elements and adds 'others'",
" ex. usage: 3Ko",
"",
" Insertion and appending operators:",
" - the syntax is 's1|s2' to insert, or append, the string s1 first",
" - and the string 's2' last.",
" - Note that '_|_' can also be used to separate first/last.",
" - The uppercase versions insert/append invisibly: ie without changing the vector.",
" - for *A, the syntax is 's1|s2 / s3|s4'. If the nbr of elements is:",
" * equal to 1: this is equiv. to 's1|s2'A",
" * greater then 1: this is equiv. to 's3|s4'A",
" a NO DEFAULT equiv. to paste0(s1, x, s2)",
" A NO DEFAULT appends s1/s2 to the beginning/end of the full string",
" i NO DEFAULT inserts s1/s2 at the beginning/end of the vector",
" I NO DEFAULT inserts s1/s2 at the beginning/end of the vector",
" *A NO DEFAULT appends s1/s2 at the beg./end of the full string if len 1",
" appends s3/s4 if len > 1",
"",
" Adding length dependent verbs/s:",
" - verbs can be conjugated WRT the length of the vector,",
" - Ex: .[' have'V, C? c('David', 'Steerforth')] -> 'David and Steerforth have'",
" .[' have'V, C? 'Agnes'] -> 'Agnes has'",
" - 's' can be added in the same way:",
" - Ex: 'The number.[*s_, v, C ? 1:3]' -> 'The numbers are 1, 2 and 3'",
" 'The number.[*s_, v, C ? 5]' -> 'The number is 5'",
" v 'is ' adds a conjugated verb at the beginning",
" V ' is' adds a conjugated verb at the end",
" *s NO ARGUMENT adds an 's' at the beginning of the string",
" *s_ NO ARGUMENT adds an 's' followed by a space at the beginning of the string",
"",
"CONDITIONS--------------|",
" Conditions can be applied with 'if' statements.",
" The syntax is 'type comp value'if(true : false), with",
" - type: either 'len', 'char', 'fixed' or 'regex'",
" + len: number of elements in the vector",
" + char: number of characters",
" + fixed: fixed pattern",
" + regex: regular expression pattern",
" - comp: a comparator: ",
" + valid for len/char: >, <, >=, <=, !=, ==",
" + valid for fixed/regex: !=, ==",
" - value: a value for which the comparison is applied.",
" - true: operations to be applied if true (can be void)",
" - false: operations to be applied if false (can be void)",
"",
" Example: .['char <= 2'if('(|)'a : '[|]'a), ' + 'c ? c(1, 12, 123)]",
" -> '(1) + (12) + [123]'",
"",
" Which operators are valid?",
" The 'len' type looks at the length of the vector so that the true/false",
" will apply to the full vector. Hence the operations will concern the full vector and",
" all operations are valid.",
"",
" The other types apply the true/false operations only to the subset of elements",
" which were true or false. Hence to avoid consistency problems, all operators",
" that would change the length of the vector/or affect the full vector are disabled.",
" Operators not working in that case: A, i, I, K, c, C, *, *c, s, S, v, V.",
" Note that the D operator of deletion still works.",
"",
" Special if operators:",
" 'type comp value'IF(true : false):",
" The operator 'IF' works like the regular 'if' except that it also applies any().",
" This means that the result of the if statement will be at the level of the vector,",
" thus for the types different from 'len', all operators can be applied.",
"",
" *if(true : false):",
" The operator *if accepts no argument. The value is true if the length of the vector",
" is greater than 1. ",
" This is a shortcut to 'len>1'if(true : false).",
"",
"SPECIALS ---------------|",
" Use '/' first to split the character with commas:",
' Ex: dsb("/x1, x2") -> c("x1", "x2")',
' dsb("Hi .[/David, Dora]!") -> c("Hi David!", "Hi Dora!")',
"",
" In quoted arguments, use backticks to evaluate them from the frame.",
' Ex: n = 3 ; dsb("`n`*c!$") -> "$$$". The \'$\' is replicated n times, then collapsed.'
)
message(paste(msg, collapse = "\n"))
return(invisible(NULL))
}
res = .dsb(..., frame = frame, sep = sep, vectorize = vectorize, nest = nest, collapse = collapse, check = TRUE)
class(res) = c("dsb", "character")
res
}
.dsb0 = function(..., frame = parent.frame(), sep = "", vectorize = FALSE, check = FALSE){
.dsb(..., frame = frame, nest = FALSE, sep = sep, vectorize = vectorize, check = check)
}
.dsb = function(..., frame = parent.frame(), sep = "", vectorize = FALSE,
nest = TRUE, collapse = NULL, check = FALSE){
if(...length() == 0){
return("")
} else if(...length() == 1){
x = as.character(..1)
if(length(x) > 1){
stop("dsb can only be applied to character scalars. Problem: the argument is of length ",
length(qui), "")
}
} else {
if(check){
dots = error_sender(list(...), "In dsb, one element of ... could not be evaluated.")
} else {
dots = list(...)
}
if(any(lengths(dots) > 1)){
qui = which(lengths(dots) > 1)[1]
stop("dsb can only be applied to character scalars. Problem: The ", n_th(qui),
" elment in ... is of length ", length(dots[[qui]]), ".")
}
if(!vectorize){
# Note: using paste(..1, ..2, sep = sep) explicitly only saves 2us vav do.call
# not worth it.
dots$sep = sep
x = do.call(paste, dots)
} else {
# vectorize
n = length(dots)
res = vector("list", n)
for(i in 1:n){
res[[i]] = .dsb(dots[[i]], nest = nest, frame = frame, check = check)
}
return(unlist(res))
}
}
if(is.na(x) || length(x) == 0){
return(x)
}
if(nest){
x_parsed = list(cpp_dsb_full_string(x))
} else if(!grepl(".[", x, fixed = TRUE)){
return(x)
} else {
x_parsed = cpp_dsb(x)
}
n_x_all = lengths(x_parsed)
n = length(x_parsed)
res = character(0)
i_done = FALSE
for(i in 1:n){
if(i_done){
i_done = FALSE
next
}
xi = x_parsed[[i]]
if(length(xi) == 1){
if(i == 1){
res = xi
} else {
res = paste0(res, xi)
}
} else {
operators = xi[[1]]
xi = xi[[2]]
if(length(operators) == 0){
if(nest){
# means verbatim => no evaluation
if(grepl(".[", xi, fixed = TRUE)){
xi = .dsb(xi, frame = frame, nest = FALSE, check = check)
}
} else {
# we need to evaluate xi
if(check){
xi_call = error_sender(str2lang(xi), "The value '", xi,
"' could not be parsed.")
} else {
xi_call = str2lang(xi)
}
if(is.character(xi_call)){
# if a string literal => it's nesting
if(grepl(".[", xi_call, fixed = TRUE)){
xi = .dsb(xi_call, frame = frame, nest = FALSE, check = check)
}
} else {
if(check){
xi = error_sender(eval(xi_call, frame), "The value '", xi,
"' could not be evaluated.")
} else {
xi = eval(xi_call, frame)
}
}
}
} else {
n_op = length(operators)
verbatim = operators[n_op] %in% c("!", "/")
if(operators[n_op] == "/"){
operators = "',[ \t\n\r]*'S"
} else {
operators = operators[-n_op]
# The two separators ? and ! have no default operation
}
# If split operator, we concatenate
concat_nested = length(operators) > 0 && grepl("(s|S)$", operators[[1]])
if(verbatim && grepl(".[", xi, fixed = TRUE)){
xi = .dsb(xi, frame = frame, nest = FALSE, vectorize = concat_nested, check = check)
} else if(!verbatim){
# evaluation
if(check){
xi_call = error_sender(str2lang(xi), "The value '", xi,
"' could not be parsed.")
xi = error_sender(eval(xi_call, frame), "The value '", xi,
"' could not be evaluated.")
} else {
xi = eval(str2lang(xi), frame)
}
if(is.function(xi)){
stop_up("dsb cannot coerce functions into strings. Problem: '",
trimws(x_parsed[[i]][[2]]), "' is a function.")
}
}
# Now we apply the operators
for(j in seq_along(operators)){
opi = operators[[j]]
op_parsed = dsb_char2operator(opi)
if(op_parsed$do_eval){
if(check){
quoted_call = error_sender(str2lang(op_parsed$quoted),
"In operation '", opi, "', the value '",
op_parsed$quoted, "' could not be parsed.")
quoted = error_sender(eval(quoted_call, frame),
"In operation '", opi, "', the value '",
op_parsed$quoted, "' could not be evaluated.")
} else {
quoted = eval(str2lang(op_parsed$quoted), frame)
}
} else {
quoted = op_parsed$quoted
}
if(check){
xi = error_sender(dsb_operators(xi, quoted, op_parsed$op,
check = check, frame = frame),
"The operation '", opi, "' failed. Please revise your call.")
} else {
xi = dsb_operators(xi, quoted, op_parsed$op, check = check, frame = frame)
}
}
extra = attr(xi, "extra")
if(length(extra) > 0){
if(length(extra$element_add_first) > 0){
if(length(extra$element_add_last) > 0){
xi = c(extra$element_add_first, xi, extra$element_add_last)
} else {
xi = c(extra$element_add_first, xi)
}
} else if(length(extra$element_add_last) > 0){
xi = c(xi, extra$element_add_last)
}
if(length(extra$str_add_first) > 0){
if(length(xi) > 0){
xi[1] = paste0(extra$str_add_first, xi[1])
} else {
xi = extra$str_add_first
}
}
if(length(extra$str_add_last) > 0){
if(length(xi) > 0){
xi[length(xi)] = paste0(xi[length(xi)], extra$str_add_last)
} else {
xi = extra$str_add_last
}
}
attr(xi, "extra") = NULL
}
}
if(i == 1){
res = xi
} else{
if(i < n && n_x_all[i + 1] == 1){
if(vectorize){
res = c(res, xi, x_parsed[[i + 1]])
} else {
res = paste0(res, xi, x_parsed[[i + 1]])
}
i_done = TRUE
} else {
if(vectorize){
res = c(res, xi)
} else {
res = paste0(res, xi)
}
}
}
}
}
if(!is.null(collapse) && length(res) > 1){
res = paste0(res, collapse = collapse)
}
return(res)
}
dsb_char2operator = function(x){
quote = substr(x, 1, 1)
OPERATORS = c("s", "S", "x", "X", "c", "C", "r", "R",
"*", "*c", "**", "**c",
"u", "U", "l", "L", "q", "Q", "f", "F", "%",
"a", "A", "k", "K", "d", "D", "v", "V",
"w", "W", "stop", "i", "I",
"*s", "*s_", "*A", "*if", "if", "IF")
ok = do_eval = FALSE
if(quote %in% c("'", "\"", "`")){
pat = paste0("^", quote, "[^", quote, "]*", quote)
op_abbrev = sub(pat, "", x)
in_quote = str_trim(x, 1, nchar(op_abbrev) + 1)
do_eval = quote == "`"
if(do_eval && substr(in_quote, 1, 1) == "!"){
# special case: back ticks used as extra quote
# => enables to use ' and " freely in arguments
do_eval = FALSE
in_quote = str_trim(in_quote, 1)
}
if(nchar(op_abbrev) == 0){
stop_up("In dsb, if a quoted value is present, the operators must always be of the form 'value'op, with 'op' an operator. Problem: In '", x, "' the operator is missing.")
}
} else if(x %in% OPERATORS){
# default values
in_quote = switch(x,
s = " ", S = ", *",
x = "[[:alnum:]]+", X = "[[:alnum:]]+",
c = " ", C = ", || and ",
"*" = "1", "*c" = "1",
"**" = "1", "**c" = "1",
r = "\n", R = "^[ \t\r\n]+|[ \t\r\n]+$",
v = "is ", V = " is",
"")
op_abbrev = x
if(op_abbrev %in% c("%", "k", "K", "a", "A", "if", "IF", "*A")){
ex = c("%" = ".['.3f'%?pi]",
"k" = ".[4k ! longuest word ]", "K" = ".[2K ? 1:5]",
"a" = ".['|s'a ! cat]", "A" = ".['|two'A ! one]",
"if" = ".['len<3'if(d) ? c('a', 'abc')]",
"IF" = ".['char>3'IF(D) ? c('a', 'abc')]",
"*A" = ".['The winner/All winners'*A ? c('John', 'Steve')]")
stop_up("The operator '", op_abbrev,
"' has no default value, you must provide values explicitly. Like in ",
ex[op_abbrev], " for instance.")
}
} else {
last1 = substr(x, nchar(x), nchar(x))
if(last1 %in% c("*", "k", "K")){
in_quote = str_trim(x, -1)
op_abbrev = last1
if(op_abbrev == "*" && substr(in_quote, nchar(in_quote), nchar(in_quote)) == "*"){
op_abbrev = "**"
in_quote = str_trim(in_quote, -1)
}
} else {
last2 = substr(x, nchar(x) - 1, nchar(x))
if(last2 %in% c("*c", "Ko", "KO")){
in_quote = str_trim(x, -2)
op_abbrev = last2
if(op_abbrev == "*c" && substr(in_quote, nchar(in_quote), nchar(in_quote)) == "*"){
op_abbrev = "**c"
in_quote = str_trim(in_quote, -1)
}
if(op_abbrev %in% c("Ko", "KO")){
# special case
text = if(op_abbrev == "Ko") "||:rest: others" else "||:REST: others"
in_quote = paste0(in_quote, text)
op_abbrev = "K"
}
} else {
first3 = substr(x, 1, 3)
if(first3 %in% c("*if")){
ok = TRUE
in_quote = ""
op_abbrev = x
# OK, dealt with later => special case
} else {
op_abbrev = "problem"
}
}
}
}
if(!ok && !op_abbrev %in% OPERATORS){
first2 = substr(op_abbrev, 1, 2)
if(!first2 %in% c("if", "IF")){
msg = c("The operation '", x, "' is not valid. It must be something quoted followed by a valid operator.",
"\n Valid operators: to split: s, S / to replace: r, R / to collapse: c, C / to extract: x, X",
"\n to replicate: * / to replicate and collapse with the empty string: *c",
"\n to upper/lower case: u, U, L / to single/double quote: q, Q",
"\n to format f, F / to apply sprintf format: %",
"\n to format whitespaces: w, W / to append: a, A / to insert: i, I",
"\n to keep: k (#characters), K (#items) / to delete: d, D",
"\n to remove stopwords: stop / to add a verb: v, V / to add an 's': *s, *s_",
"\n Conditions: *if(true:false), 'cond'if(true:false), 'cond'IF(true:false)",
"\n cond: 'regex==expr', 'fixed==expr', 'len==digit', or 'char==digit'.",
"\n------------------------------",
"\n type dsb('--help') for more help.",
"\n Example: .[', *'S, 'a => b'r? var] first splits the variable var by commas then replaces every 'a' with a 'b'.")
message(msg)
stop_up("In dsb, the operation is not valid, see upper message.")
}
}
res = list(quoted = in_quote, do_eval = do_eval, op = op_abbrev)
res
}
dsb_operators = function(x, quoted, op, check = FALSE, frame = NULL){
extra = attr(x, "extra")
# S, C, R ####
if(op %in% c("s", "S")){
# Split is always applied on verbatim stuff => length 1
if(op == "s"){
res = unlist(strsplit(x, quoted, fixed = TRUE))
} else {
res = unlist(strsplit(x, quoted, perl = TRUE))
}
res = res[nchar(res) > 0]
} else if(op %in% c("c", "C")){
# collapse
n_x = length(x)
if(n_x > 1 && grepl("||", quoted, fixed = TRUE)){
# This is the "last" operator
quoted_split = strsplit(quoted, "||", fixed = TRUE)[[1]]
if(n_x == 2){
res = paste(x, collapse = quoted_split[[2]])
} else {
res = paste(x[-n_x], collapse = quoted_split[[1]])
res = paste0(res, quoted_split[[2]], x[n_x])
}
} else {
res = paste(x, collapse = quoted)
}
} else if(op %in% c("r", "R")){
new = ""
if(grepl("=>", quoted, fixed = TRUE)){
pat = "=>"
if(grepl(" => ", quoted, fixed = TRUE)){
pat = " => "
} else if(grepl("_=>_", quoted, fixed = TRUE)){
pat = "_=>_"
}
quoted_split = strsplit(quoted, pat, fixed = TRUE)[[1]]
quoted = quoted_split[[1]]
new = quoted_split[[2]]
}
if(op == "r"){
res = gsub(quoted, new, x, fixed = TRUE)
} else {
res = gsub(quoted, new, x, perl = TRUE)
}
} else if(op %in% c("*", "**", "*c", "**c")){
# *, X ####
if(!is_numeric_in_char(quoted)){
stop("In dsb: the operator '", op, "' must have numeric arguments, '", quoted, "' is not numeric.")
}
if(substr(op, 1, 2) == "**"){
res = rep(x, each = as.numeric(quoted))
} else {
res = rep(x, as.numeric(quoted))
}
if(substr(op, nchar(op), nchar(op)) == "c"){
res = paste(res, collapse = "")
}
} else if(op == "x"){
# extract the first pattern
x_pat = regexpr(quoted, x, perl = TRUE)
res = substr(x, x_pat, x_pat - 1 + attr(x_pat, "match.length"))
} else if(op == "X"){
# extract all patterns
x_list = regmatches(x, gregexpr(quoted, x, perl = TRUE))
res = unlist(x_list)
} else if(op == "U"){
# U, L, Q, F, %, W ####
res = toupper(x)
} else if(op == "u"){
# First letter only, if relevant
res = x
substr(res, 1, 1) = toupper(substr(x, 1, 1))
} else if(op %in% c("l", "L")){
res = tolower(x)
} else if(op == "q"){
res = paste0("'", x, "'")
} else if(op == "Q"){
res = paste0("\"", x, "\"")
} else if(op == "f"){
res = format(x)
if(is.numeric(x)){
res = format(trimws(res))
}
} else if(op == "F"){
res = format(x, justify = "right")
} else if(op == "%"){
res = sprintf(paste0("%", quoted), x)
} else if(op %in% c("w", "W")){
# w: only whitespaces
# W: whitespaces extended
res = trimws(x)
# now the whitespaces
pat = if(op == "w") "[ \t\r\n]+" else "[[:blank:][:punct:]]+"
res = gsub(pat, " ", res, perl = TRUE)
} else if(op %in% c("k", "K")){
# keep: either the nber of characters (k) or the number of elements (K)
#
# Keep ####
#
quoted_split = quoted
pat = c("_||_", "_|_", "||", "|")
for(p in pat){
if(grepl(p, quoted, fixed = TRUE)){
quoted_split = strsplit(quoted, p, fixed = TRUE)[[1]]
break
}
}
is_included = grepl("||", p, fixed = TRUE)
if(length(quoted_split) == 1){
nb = quoted
add = ""
is_included = FALSE
} else {
nb = quoted_split[[1]]
add = quoted_split[[2]]
}
if(!is_numeric_in_char(nb)){
stop("In dsb: the operator '", op, "' must first contain a numeric argument, '", quoted, "' does not contain a numeric first.")
}
nb = as.numeric(nb)
if(op == "k"){
qui = nchar(x) > nb
res = substr(x, 1, nb)
if(is_included){
res[qui] = substr(res[qui], 1, nb - nchar(add))
}
if(nchar(add) > 0){
res[qui] = paste0(res[qui], add)
}
} else if(op == "K"){
if(nb == 0){
res = character(0)
} else {
res = if(nb < length(x)) x[1:nb] else x
if(any(grepl(":(n|N|rest|REST):", add))){
n = length(x)
N = ""
if(grepl(":N:", add, fixed = TRUE)){
N = n_letter(n)
add = gsub(":N:", N, add, fixed = TRUE)
}
add = gsub(":n:", n, add, fixed = TRUE)
n_rest = n - nb + is_included
if(n_rest > 0){
add = gsub(":rest:", n_rest, add, fixed = TRUE)
add = gsub(":REST:", n_letter(n_rest), add, fixed = TRUE)
}
}
if(length(x) > nb){
if(is_included){
res = res[-nb]
}
if(nchar(add) > 0){
res = c(res, add)
}
}
}
}
} else if(op %in% c("a", "i", "A", "I", "*A")){
# Appends at the beginning/end of all strings
# Conditional A
if(op == "*A"){
# We transform it into a regular A call
op = "A"
pat = c("_/_", " / ", "/")
quoted_split = quoted
for(p in pat){
if(grepl(p, quoted, fixed = TRUE)){
quoted_split = strsplit(quoted, p, fixed = TRUE)[[1]]
break
}
}
if(length(x) == 1){
quoted = quoted_split[[1]]
} else {
quoted = if(length(quoted_split) > 1) quoted_split[[2]] else ""
}
}
#
# Insert/Append ####
#
pat = c("_|_", "|")
quoted_split = quoted
for(p in pat){
if(grepl(p, quoted, fixed = TRUE)){
quoted_split = strsplit(quoted, p, fixed = TRUE)[[1]]
break
}
}
left = quoted_split[[1]]
right = if(length(quoted_split) > 1) quoted_split[[2]] else ""
if(nchar(quoted) == 0){
res = x
} else if(op == "a"){
if(length(x) == 0){
res = x
} else {
# We replace the special values
# :1:, :i:, :a:
# we allow only one special value
n_x = length(x)
pat = c(":1:", ":i:", ":a:", ":I:", ":A:")
for(i in 1:2){
tmp = if(i == 1) left else right
any_done = FALSE
for(p in pat){
if(grepl(p, tmp, fixed = TRUE)){
any_done = TRUE
tmp_split = strsplit(tmp, p, perl = TRUE)[[1]]
if(length(tmp_split) > 1){
txt = switch(p,
":1:" = 1:n_x,
":i:" = tolower(as.roman(1:n_x)),
":I:" = as.character(as.roman(1:n_x)),
":a:" = enum_letter(n_x),
":A:" = toupper(enum_letter(n_x)))
if(length(tmp_split) == 1){
tmp = paste0(tmp_split[[1]], txt)
} else {
tmp_new = paste0(tmp_split[[1]], txt, tmp_split[[2]])
k = 2
while(k + 1 <= length(tmp_split)){
k = k + 1
tmp_new = paste0(tmp_new, txt, tmp_split[[k]])
}
tmp = tmp_new
}
}
break
}
}
if(any_done){
if(i == 1){
left = tmp
} else {
right = tmp
}
}
}
if(any(nchar(left) > 0)){
if(any(nchar(right) > 0)){
res = paste0(left, x, right)
} else {
res = paste0(left, x)
}
} else {
res = paste0(x, right)
}
}
} else if(op == "i"){
# inserts an ELEMENT at the beginning/end
if(nchar(left) > 0){
if(nchar(right) > 0){
res = c(left, x, right)
} else {
res = c(left, x)
}
} else {
res = c(x, right)
}
} else if(op %in% c("A", "I")){
# appends/inserts **implicitly** at the beginning of the first/last string
res = x
if(any(grepl(":(n|N):", c(left, right)))){
n = length(x)
n_letter = ""
if(any(grepl(":N:", c(left, right), fixed = TRUE))){
n_letter = n_letter(n)
left = gsub(":N:", n_letter, left)
right = gsub(":N:", n_letter, right)
}
left = gsub(":n:", n, left)
right = gsub(":n:", n, right)
}
if(is.null(extra)){
extra = list()
}
if(op == "A"){
if(nchar(left) > 0){
extra$str_add_first = paste0(extra$str_add_first, left)
}
if(nchar(right) > 0){
extra$str_add_last = paste0(extra$str_add_last, right)
}
} else if(op == "I"){
if(nchar(left) > 0){
extra$element_add_first = c(extra$element_add_first, left)
}
if(nchar(right) > 0){
extra$element_add_last = c(extra$element_add_last, right)
}
}
}
# END: insert/append
} else if(op == "d"){
res = rep("", length(x))
} else if(op == "D"){
res = character(0)
} else if(op %in% c("v", "V")){
# The verb is added implicitly
# original code from dreamerr
#
# *A, D, Verb ####
#
res = x
PLURAL = length(x) > 1
space_left = space_right = ""
if(grepl("^ ", quoted)){
space_left = " "
quoted = str_trim(quoted, 1)
}
if(grepl(" $", quoted)){
space_right = " "
quoted = str_trim(quoted, -1)
}
past = grepl("\\bpast\\b", quoted)
if(past){
quoted = gsub("\\bquoted\\b", "", quoted)
}
verb = trimws(quoted)
if(nchar(verb) == 0){
verb = "is"
}
if(verb %in% c("be", "are")){
verb = "is"
} else if(verb == "have"){
verb = "has"
} else if(verb == "does"){
verb = "do"
} else if(verb %in% c("do not", "does not", "don't", "doesn't")){
verb = "do not"
} else if(verb %in% c("is not", "are not", "isn't", "aren't")){
verb = "is not"
} else if(verb %in% c("was", "were")){
verb = "is"
past = TRUE
}
if(past){
if(verb %in% c("is", "is not", "has", "do", "do not")){
verb_format = switch(verb, is = ifelse(!PLURAL, "was", "were"), "is not" = ifelse(!PLURAL, "wasn't", "weren't"), has = "had", do = "did", "do not" = "didn't")
} else {
verb_format = paste0(verb, "ed")
}
} else {
if(verb %in% c("is", "is not", "has", "do", "do not")){
verb_format = switch(verb, is = ifelse(!PLURAL, "is", "are"), "is not" = ifelse(!PLURAL, "isn't", "aren't"), has = ifelse(!PLURAL, "has", "have"), do = ifelse(!PLURAL, "does", "do"), "do not" = ifelse(!PLURAL, "doesn't", "don't"))
} else {
verb_format = ifelse(PLURAL, verb, paste0(verb, "s"))
}
}
# Adding the verb
if(is.null(extra)){
extra = list()
}
if(op == "v"){
extra$str_add_first = paste0(extra$str_add_first, space_left, verb_format, space_right)
} else if(op == "V"){
extra$str_add_last = paste0(extra$str_add_last, space_left, verb_format, space_right)
}
} else if(op %in% c("*s", "*s_")){
res = x
if(is.null(extra)){
extra = list()
}
if(length(x) > 1){
if(op == "*s_"){
extra$str_add_first = paste0(extra$str_add_first, "s ")
} else {
extra$str_add_first = paste0(extra$str_add_first, "s")
}
} else if(op == "*s_"){
extra$str_add_first = paste0(extra$str_add_first, " ")
}
} else if(grepl("^(\\*?if|IF)", op)){
# The operator is ALWAYS of the form if(yes:no), there must be a parenthesis
#
# IF ####
#
if(substr(op, 1, 1) == "*"){
# star if
quoted = "len > 1"
if_operators = str_trim(op, 4, 1)
op = "*if"
} else {
if_operators = str_trim(op, 3, 1)
op = substr(op, 1, 2)
}
# We have 4 types:
# - len
# - char
# - fixed
# - regex
# left trim
quoted = sub("^ +", "", quoted)
type = substr(quoted, 1, 3)
if(type %in% c("fix", "reg")){
type = substr(quoted, 1, 5)
} else if(type == "cha"){
type = substr(quoted, 1, 4)
}
if(!type %in% c("len", "char", "regex", "fixed")){
stop("In the operator 'if', the argument must be of the form 'type comp value', with 'comp' a comparator (eg ==, >=, etc), and type one of 4 types: 'len', 'char', 'regex' or 'fixed'.",
"\nIn the current argument ('", quoted, "') the type is not valid.")
}
rest = str_trim(quoted, type)
is_space = substr(rest, 1, 1) == " "
if(is_space){
rest = str_trim(rest, 1)
}
comp1 = substr(rest, 1, 1)
comp2 = substr(rest, 2, 2)
if(!comp1 %in% c("<", ">", "!", "=") || (comp1 %in% c("!", "=") && comp2 != "=")){
comp = if(comp1 %in% c("!", "=") && comp2 != "=") paste0(comp1, comp2) else comp1
stop("In the operator 'if', the argument must be of the form 'type comp value', with 'comp' a comparator (eg ==, >=, etc), and type one of 4 types: 'len', 'char', 'regex' or 'fixed'.",
"\nIn the current argument ('", quoted, "') the comparator ('", comp, "') is not valid.")
}
if(comp2 == "="){
comp = paste0(comp1, comp2)
rest = str_trim(rest, 2)
} else {
comp = comp1
rest = str_trim(rest, 1)
}
if(is_space && substr(rest, 1, 1) == " "){
rest = str_trim(rest, 1)
}
value = rest
# Now we have the 3 elements:
# - type
# - comp
# - value
if(type %in% c("len", "char")){
if(!is_numeric_in_char(value)){
stop("In the operator 'if' (equal to '", quoted, "'), '", type, "' should be compared to a number. Problem: '", value, "' is not a number.")
}
}
is_len = type == "len"
operators_TF = cpp_dsb_if_extract(if_operators)
if(isFALSE(operators_TF[[1]])){
# means error
stop("Parsing error in the operators of the '", op, "' statement. ",
"\nIt must be of the form: ", op, "(yes:no) with yes and no chains of valid operations. Further, 'if' statements cannot be nested.",
"\nEx: 'len>3'if(3K, '| and others'A, ', 'c : 'n <= 3'A, D)")
}
len_true = FALSE
if(is_len){
n_x = length(x)
len_true = eval(str2lang(paste0(n_x, comp, value)))
if(len_true){
x_true = x
x_false = character(0)
} else {
x_true = character(0)
x_false = x
}
} else {
if(type == "char"){
comp_call = str2lang(paste0("nchar(x)", comp, value))
} else {
comp_txt = paste0("grepl(\"", value, "\", x")
if(type == "regex"){
comp_txt = paste0(comp_txt, ", perl = TRUE)")
} else {
comp_txt = paste0(comp_txt, ", fixed = TRUE)")
}
comp_call = str2lang(comp_txt)
}
qui = eval(comp_call)
if(op == "IF"){
is_len = TRUE
len_true = any(qui)
if(len_true){
x_true = x
x_false = character(0)
} else {
x_true = character(0)
x_false = x
}
} else {
x_true = x[qui]
x_false = x[!qui]
}
}
# All operators that change the length of the element:
FORBIDDEN = c("c", "C", "*", "*c", "s", "S", "i", "I", "A", "v", "V")
# if TRUE
op_true = operators_TF[[1]]
for(state in c("true", "false")){
if(state == "true"){
if(is_len && !len_true) next
xi = x_true
} else {
if(is_len && len_true) next
xi = x_false
}
index = 1 + (state == "false")
op_all = operators_TF[[index]]
for(i in seq_along(op_all)){
opi = op_all[i]
op_parsed = dsb_char2operator(opi)
if(!is_len && op_parsed$op %in% FORBIDDEN){
stop_up("In the 'if' statement, you cannot use operators that would change the length of the vector, hence '", op_parsed$op, "' is forbidden. The IF statement (which does like 'if(any(cond))'), or if applied to the length, would be OK since they apply changes to the full vector.")
}
if(op_parsed$do_eval){
if(check){
quoted_call = error_sender(up = 1, str2lang(op_parsed$quoted),
"In operation '", opi, "', the value '",
op_parsed$quoted, "' could not be parsed.")
quoted = error_sender(up = 1, eval(quoted_call, frame),
"In operation '", opi, "', the value '",
op_parsed$quoted, "' could not be evaluated.")
} else {
quoted = eval(str2lang(op_parsed$quoted), frame)
}
} else {
quoted = op_parsed$quoted
}
if(check){
xi = error_sender(up = 1, dsb_operators(xi, quoted, op_parsed$op),
"The operation '", opi, "' failed. Please revise your call.")
} else {
xi = dsb_operators(xi, quoted, op_parsed$op)
}
}
if(state == "true"){
x_true = xi
} else {
x_false = xi
}
}
if(is_len){
res = if(len_true) x_true else x_false
# we can add extra stuff, if needed
extra_if = attr(res, "extra")
if(length(extra_if) > 0){
if(is.null(extra)){
extra = list()
}
if(length(extra_if$element_add_first) > 0){
extra$element_add_first = c(extra$element_add_first, extra_if$element_add_first)
}
if(length(extra_if$element_add_last) > 0){
extra$element_add_last = c(extra$element_add_last, extra_if$element_add_last)
}
if(length(extra_if$str_add_first) > 0){
extra$str_add_first = paste0(extra$str_add_first, extra_if$str_add_first)
}
if(length(extra_if$str_add_last) > 0){
extra$str_add_last = paste0(extra$str_add_last, extra_if$str_add_last)
}
}
} else {
# extra is not touched
res = x
if(length(x_true) == 0 && length(x_false) == 0){
res = character(0)
} else if(length(x_true) == 0){
res[!qui] = x_false
res = res[!qui]
} else if(length(x_false) == 0){
res[qui] = x_true
res = res[qui]
} else {
res[qui] = x_true
res[!qui] = x_false
}
}
} else if(op == "stop"){
# stop ####
# current limitation: does not work for quoted words
# but quoted words are not stopwords usually
# Snowball stopwords
# These come from http://snowballstem.org/algorithms/english/stop.txt
stopwords = c("i", "me", "my", "myself", "we", "our", "ours", "ourselves",
"you", "your", "yours", "yourself", "yourselves", "he", "him",
"his", "himself", "she", "her", "hers", "herself", "it", "its",
"itself", "they", "them", "their", "theirs", "themselves", "what",
"which", "who", "whom", "this", "that", "these", "those", "am", "is",
"are", "was", "were", "be", "been", "being", "have", "has", "had",
"having", "do", "does", "did", "doing", "would", "should",
"could", "ought", "i'm", "you're", "he's", "she's", "it's",
"we're", "they're", "i've", "you've", "we've", "they've",
"i'd", "you'd", "he'd", "she'd", "we'd", "they'd", "i'll",
"you'll", "he'll", "she'll", "we'll", "they'll", "isn't",
"aren't", "wasn't", "weren't", "hasn't", "haven't", "hadn't",
"doesn't", "don't", "didn't", "won't", "wouldn't", "shan't",
"shouldn't", "can't", "cannot", "couldn't", "mustn't", "let's",
"that's", "who's", "what's", "here's", "there's", "when's", "where's",
"why's", "how's", "a", "an", "the", "and", "but", "if", "or",
"because", "as", "until", "while", "of", "at", "by", "for",
"with", "about", "against", "between", "into", "through",
"during", "before", "after", "above", "below", "to", "from",
"up", "down", "in", "out", "on", "off", "over", "under",
"again", "further", "then", "once", "here", "there", "when",
"where", "why", "how", "all", "any", "both", "each", "few",
"more", "most", "other", "some", "such", "no", "nor", "not",
"only", "own", "same", "so", "than", "too", "very")
n = length(x)
x_split = strsplit(x, "(?<=[[:alnum:]])(?=[^[:alnum:]'])|(?<=[^[:alnum:]'])(?=[[:alnum:]])", perl = TRUE)
x_len = lengths(x_split)
x_vec = unlist(x_split)
id = rep(1:n, x_len)
# Lowering is costly, checking costs a bit less
if(any(grepl("[[:upper:]]", x))){
qui_drop = which(tolower(x_vec) %in% stopwords)
} else {
qui_drop = which(x_vec %in% stopwords)
}
x_vec = x_vec[-qui_drop]
id = id[-qui_drop]
res = cpp_paste_conditional(x_vec, id, n)
} else {
stop("In dsb: the operator '", op, "' is not recognized. Internal error: this problem should have been spotted beforehand.")
}
if(length(extra) > 0){
attr(res, "extra") = extra
}
return(res)
}
n_letter = function(n){
num2char = c("zero", "one", "two", "three", "four", "five", "six", "seven",
"eight", "nine", "ten", "eleven", "twelve", "thirteen",
"fourteen", "fifteen", "sixteen", "seventeen", "eighteen",
"nineteen")
if(n < 20){
n_letter = num2char[n + 1]
} else if(n < 100){
tens = n %/% 10
digit = n %% 10
tens_letter = c("twenty", "thirty", "forty", "fifty",
"sixty", "seventy", "eighty", "ninety")
num2char = paste0("-", num2char)
num2char[1] = ""
n_letter = paste0(tens_letter[tens - 1], num2char[digit + 1])
} else {
n_letter = n
}
n_letter
}
enum_letter = function(n){
# returns only lowercase
# oddity: there is no powers of z
# => because otherwise the algorithm would have been too complex
# and we don't care tbh
if(n < 27){
return(letters[1:n])
}
n_26 = log(n, 26)
n_26 = floor(n_26) + (n_26 %% 1 == 0)
rest = (1:n) - 1
res = vector("list", n_26 + 1)
i = 1
for(p in n_26:0){
num = 26**p
if(p == 0){
res[[i]] = letters[(rest %/% num) + 1]
} else {
res[[i]] = c("", letters)[(rest %/% num) + 1]
}
rest = rest %% num
i = i + 1
}
res = do.call(base::paste0, res)
res
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.