rm_between | R Documentation |
Remove/replace/extract strings bounded between a left and right marker.
rm_between(
text.var,
left,
right,
fixed = TRUE,
trim = TRUE,
clean = TRUE,
replacement = "",
extract = FALSE,
include.markers = ifelse(extract, FALSE, TRUE),
dictionary = getOption("regex.library"),
...
)
rm_between_multiple(
text.var,
left,
right,
fixed = TRUE,
trim = TRUE,
clean = TRUE,
replacement = "",
extract = FALSE,
include.markers = FALSE,
merge = TRUE
)
ex_between(
text.var,
left,
right,
fixed = TRUE,
trim = TRUE,
clean = TRUE,
replacement = "",
extract = TRUE,
include.markers = ifelse(extract, FALSE, TRUE),
dictionary = getOption("regex.library"),
...
)
ex_between_multiple(
text.var,
left,
right,
fixed = TRUE,
trim = TRUE,
clean = TRUE,
replacement = "",
extract = TRUE,
include.markers = FALSE,
merge = TRUE
)
text.var |
The text variable. |
left |
A vector of character or numeric symbols as the left edge to extract. |
right |
A vector of character or numeric symbols as the right edge to extract. |
fixed |
logical. If |
trim |
logical. If |
clean |
trim logical. If |
replacement |
Replacement for matched |
extract |
logical. If |
include.markers |
logical. If |
dictionary |
A dictionary of canned regular expressions to search within
if |
... |
Other arguments passed to |
merge |
logical. If |
Returns a character string with markers removed. If
rm_between
returns merged strings and is significantly faster. If
rm_between_multiple
the strings are optionally merged by
left
/right
symbols. The latter approach is more flexible and
names extracted strings by symbol boundaries, however, it is slower than
rm_between
.
gsub
,
rm_bracket
,
stri_extract_all_regex
Other rm_ functions:
rm_abbreviation()
,
rm_bracket()
,
rm_caps_phrase()
,
rm_caps()
,
rm_citation_tex()
,
rm_citation()
,
rm_city_state_zip()
,
rm_city_state()
,
rm_date()
,
rm_default()
,
rm_dollar()
,
rm_email()
,
rm_emoticon()
,
rm_endmark()
,
rm_hash()
,
rm_nchar_words()
,
rm_non_ascii()
,
rm_non_words()
,
rm_number()
,
rm_percent()
,
rm_phone()
,
rm_postal_code()
,
rm_repeated_characters()
,
rm_repeated_phrases()
,
rm_repeated_words()
,
rm_tag()
,
rm_time()
,
rm_title_name()
,
rm_url()
,
rm_white()
,
rm_zip()
x <- "I like [bots] (not)."
rm_between(x, "(", ")")
ex_between(x, "(", ")")
rm_between(x, c("(", "["), c(")", "]"))
ex_between(x, c("(", "["), c(")", "]"))
rm_between(x, c("(", "["), c(")", "]"), include.markers=FALSE)
ex_between(x, c("(", "["), c(")", "]"), include.markers=TRUE)
## multiple (naming and ability to keep separate bracket types but slower)
x <- c("Where is the /big dog#?",
"I think he's @arunning@b with /little cat#.")
rm_between_multiple(x, "@a", "@b")
ex_between_multiple(x, "@a", "@b")
rm_between_multiple(x, c("/", "@a"), c("#", "@b"))
ex_between_multiple(x, c("/", "@a"), c("#", "@b"))
x2 <- c("Where is the L1big dogL2?",
"I think he's 98running99 with L1little catL2.")
rm_between_multiple(x2, c("L1", 98), c("L2", 99))
ex_between_multiple(x2, c("L1", 98), c("L2", 99))
state <- c("Computer is fun. Not too fun.", "No it's not, it's dumb.",
"What should we do?", "You liar, it stinks!", "I am telling the truth!",
"How can we be certain?", "There is no way.", "I distrust you.",
"What are you talking about?", "Shall we move on? Good then.",
"I'm hungry. Let's eat. You already?")
rm_between_multiple(state, c("is", "we"), c("too", "on"))
## Use Grouping
s <- "something before stuff $some text$ in between $1$ and after"
rm_between(s, "$", "$", replacement="<B>\\2<E>")
## Using regular expressions as boundaries (fixed =FALSE)
x <- c(
"There are 2.3 million species in the world",
"There are 2.3 billion species in the world"
)
ex_between(x, left='There', right = '[mb]illion', fixed = FALSE, include=TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.