knitr::opts_chunk$set(echo = TRUE, collapse = TRUE, comment = "#>") library(chr)
R package for simple string manipulation
Clean, wrangle, and parse character [string] vectors using base exclusively base R functions.
## install devtools is not alreasy installed if (!requireNamespace("devtools", quietly = TRUE)) { install.packages("devtools") } ## install chr from github devtools::install_github("mkearney/chr") ## load chr library(chr)
Detect text patterns (an easy-to-use wrapper for base::grep()
and base::grepl()
).
## return logical vector chr_detect(letters, "a|b|c|x|y|z") ## return inverted logical values chr_detect(letters, "a|b|c|x|y|z", invert = TRUE) ## return matching positions chr_detect(letters, "a|b|c|x|y|z", which = TRUE) ## return inverted matching positions chr_detect(letters, "a|b|c|x|y|z", which = TRUE, invert = TRUE) ## return matching values chr_detect(letters, "a|b|c|x|y|z", value = TRUE) ## return inverted matching values chr_detect(letters, "a|b|c|x|y|z", value = TRUE, invert = TRUE)
Extract text patterns.
## some text strings x <- c("this one is @there has #MultipleLines https://github.com and http://twitter.com @twitter", "this @one #istotally their and some non-ascii symbols: \u00BF \u037E", "this one is they're https://github.com", "this one #HasHashtags #afew #ofthem", "and more @kearneymw at https://mikew.com") ## extract all URLS chr_extract_links(x) ## extract all hashtags chr_extract_hashtags(x) ## extract mentions chr_extract_mentions(x)
Count number of matches.
## extract all there/their/they're chr_count(x, "there|their|they\\S?re", ignore.case = TRUE)
Remove text patterns.
## remove URLS chr_remove_links(x) ## string together functions with magrittr pipe library(magrittr) ## remove mentions and extra [white] spaces chr_remove_mentions(x) %>% chr_remove_ws() ## remove hashtags chr_remove_hashtags(x) ## remove hashtags, line breaks, and extra spaces x %>% chr_remove_hashtags() %>% chr_remove_linebreaks() %>% chr_remove_ws() ## remove links and extract words x %>% chr_remove_links() %>% chr_remove_mentions() %>% chr_extract_words()
Replace text with string.
## replace their with they're chr_replace(x, "their", "they're", ignore.case = TRUE)
ASCII functions currently in progress. For example, replace non-ASCII symbols with similar ASCII characters (work in progress).
## ascii version chr_replace_nonascii(x)
Create ngrams at the character-level.
## character vector x <- c("Acme Pizza, Inc.", "Tom's Sports Equipment, LLC") ## 2 char level ngram chr_ngram_char(x, n = 2L) ## 3 char level ngram in lower case and stripped of punctation and white space chr_ngram_char(x, n = 3L, lower = TRUE, punct = TRUE, space = TRUE)
Please note that this project is released with a Contributor Code of Conduct. By participating in this project you agree to abide by its terms.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.