# count_punc
#' Count the punctuations in a string.
#'
#' @param text A character vector with, at most, one element.
#' @inheritParams stringi::stri_detect_fixed
#'
#' @return A vector containing the number of punctuations.
#' @export
#'
#' @examples
#' x <- "Hello, World!"
#' count_punc(x)
#' 2
count_punc <- function(text) {
if (!is.character(text)) {
stop("Text should be of type 'String'")
}
if (length(text) > 1) {
stop("Text should be a character vector of length 1")
}
punctuations <- c(',','!','"','#','$','%','&',"'",'(',')','*','+','-','.',
'/',':',';','<','=','>','?','@','[',']','^','_','`',
'{','|','}','~')
count_of_punc <- 0
for (char in strsplit(text, "")[[1]]) {
if (any(stringi::stri_detect_fixed(char, punctuations) == TRUE))
count_of_punc = count_of_punc + 1
}
return (count_of_punc)
}
# avg_word_len
#' Calculate the average word length in a string.
#'
#' @param text A character vector for the average word length extraction.
#' @return A numeric vector representing the average word length in the text.
#' @export
#'
#' @examples
#' x <- "Here are some words"
#' avg_word_len(x)
#' 4
avg_word_len <- function(text) {
if (!is.character(text)) {
stop("Text should be of type 'String'")
}
# prevents users from inputting character vectors with more than one element
if (length(text) > 1) {
stop("Text should be a character vector of length 1")
}
# Get all punctuation from text
punc <- c(',','!','"','#','$','%','&',"'",'(',')','*','+','-','.',
'/',':',';','<','=','>','?','@','[',']','^','_','`',
'{','|','}','~')
# removes all punctuation from string
for (char in punc) {
text <- stringr::str_replace_all(text, stringr::fixed(char), " ")
}
# to ensure that the text is not an empty string or a string with only spaces
if (nchar(text) == 0 | grepl("^\\s*$", text)) {
return(0)
}
else {
# separates words by spaces and places words into a list
word_list <- as.list(unlist(strsplit(text, "\\s{1,}")))
# calculates the average length of the words in the string
letter_count <- 0
for (word in word_list) {
letter_count <- letter_count + nchar(word)
}
# removes empty strings from the word_list before computation
word_list <- word_list[word_list != ""]
average_length <- letter_count / length(word_list)
return (average_length)
}
}
# perc_cap_words
#' Percentage of fully capitalised words in a string.
#'
#' @param text A character vector with length one containing the piece of text to analyze.
#'
#' @return A vector containing the number of fully capitalised words
#' @export
#'
#' @examples
#' text <- "This is REALLY Awesome!"
#' perc_cap_words(text)
#' 25
perc_cap_words <- function(text) {
if(length(text) > 1){
stop("Text should be a character vector of length 1")
}
if(!is.character(text)){
stop("'text' should be of type 'String'")
}
if(rapportools::is.empty(text)){
stop("Please provide a non-empty text!")
}
no_cap_words <- stringr::str_count(text, "\\b[A-Z]{1,}\\b")
words_in_string <- lengths(strsplit(text, "\\W+"))
return (no_cap_words/words_in_string * 100)
}
# remove_stop_words
#' Remove the stop words in a string.
#'
#' @param text A character vector with length one containing the piece of text to analyze.
#'
#' @return A character vector containing words in the text that are not stop words.
#' @export
#'
#' @examples
#' text <- "Tomorrow is a big day!"
#' remove_stop_words(text)
#' "tomorrow"
remove_stop_words <- function(text) {
if(length(text) > 1){
stop("Text should be a character vector of length 1")
}
if(!is.character(text)){
stop("'text' should be of type 'String'")
}
if(rapportools::is.empty(text)){
stop("Please provide a non-empty text!")
}
# Get all the stopwords
stop_words <- stopwords::stopwords("en", source = "stopwords-iso")
# Make the text lowercase
text <- tolower(text)
# All punctuation
punc <- c(',','!','"','#','$','%','&',"'",'(',')','*','+','-','.',
'/',':',';','<','=','>','?','@','[',']','^','_','`',
'{','|','}','~')
# Remove all punctuation from the text
for (char in punc) {
text <- stringr::str_replace_all(text, stringr::fixed(char), " ")
}
# Split the text into words
words <- strsplit(text, " ")[[1]]
# Store the words that are not stopwords
clean_words <- vector()
for(word in words) {
if(sum(stringr::str_detect(stop_words, word)) == 0) {
clean_words <- append(clean_words, word)
}
}
if(length(clean_words) == 0){
return(character(0))
}
clean_words
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.