R/spam_functions.R

Defines functions begins_re has_religious multiple_punctuation ends_with_period has_mister has_dear has_ampersand has_dollar_sign all_caps

Documented in all_caps begins_re ends_with_period has_ampersand has_dear has_dollar_sign has_mister has_religious multiple_punctuation

#' All Caps
#'
#' This function takes in any string and returns a boolean indicating
#' whether or not it is in all caps. This function is intended for use
#' on any of the email MEA datasets.
#'
#' @param text string/ email subject line
#'
#' @importFrom stringr str_detect
#'
#' @examples
#' \dontrun{
#' emails_caps <- emails %>%
#'    mutate(all_caps = all_caps(text = subjectline))
#' }
#'
#' @export

all_caps <- function(text) {
  !str_detect(text, "[a-z]")
}

#' Dollar Sign
#'
#' This function takes in any string and returns a boolean indicating
#' whether or not there is a dollar sign. This function is intended for use
#' on any of the email MEA datasets.
#'
#' @param text string/ email subject line
#'
#' @importFrom stringr str_detect
#' 
#' @examples
#' \dontrun{
#' emails_dollar <- emails %>%
#'    mutate(has_dollar_sign = has_dollar_sign(text = subjectline))
#' }
#'
#' @export

has_dollar_sign <- function(text) {
  str_detect(text, "\\$")
}

#' Ampersand
#'
#' This function takes in any string and returns a boolean indicating
#' whether or not there is an ampersand. This function is intended for use
#' on any of the email MEA datasets.
#'
#' @param text string/ email subject line
#'
#' @importFrom stringr str_detect
#' 
#' @examples
#' \dontrun{
#' emails_dollar <- emails %>%
#'    mutate(has_ampersand = has_ampersand(text = subjectline))
#' }
#'
#' @export

has_ampersand <- function(text) {
  str_detect(text, "\\&")
}

#' Dear
#'
#' This function takes in any string and returns a boolean indicating
#' whether or not the word "dear" appears within it. This function is
#' intended for use on any of the email MEA datasets.
#'
#' @param text string/ email subject line
#'
#' @importFrom stringr str_detect
#' 
#' @examples
#' \dontrun{
#' emails_dear <- emails %>%
#'    mutate(has_dear = has_dear(text = subjectline))
#' }
#'
#' @export

has_dear <- function(text) {
  str_detect(text, "dear|Dear")
}

#' Mister
#'
#' This function takes in any string and returns a boolean indicating
#' whether or not the word "Mister" or "Mr" appears within it. This function
#' is intended for use on any of the email MEA datasets.
#'
#' @param text string/ email subject line
#'
#' @importFrom stringr str_detect
#' 
#' @examples
#' \dontrun{
#' emails_mister <- emails %>%
#'    mutate(has_mister = has_mister(text = subjectline))
#' }
#'
#' @export

has_mister <- function(text) {
  str_detect(text, "Mr|Mister")
}

#' Ends with a Period 
#'
#' This function takes in any string and returns a boolean indicating
#' whether or not it ends with a period. This function
#' is intended for use on any of the email MEA datasets.
#'
#' @param text string/ email subject line
#'
#' @importFrom stringr str_detect
#' 
#' @examples
#' \dontrun{
#' emails_mister <- emails %>% ends_with_period(text = subjectline))
#' }
#'
#' @export

ends_with_period <- function(text) {
  str_detect(text, "\\.$")
}

#' Multiple Punctuation
#'
#' This function takes in any string and returns a boolean indicating
#' whether or not multiple punctuation marks are present. This function is
#' intended for use on any of the email MEA datasets.
#'
#' @param text string/ email subject line
#'
#' @importFrom stringr str_detect
#' 
#' @examples
#' \dontrun{
#' emails_punctuation <- emails %>%
#'    mutate(multiple_punctuation =
#'           multiple_punctuation(text = subjectline))
#' }
#'
#' @export

multiple_punctuation <- function(text) {
  str_detect(text, "!!!!")
}

#' Religious Subject Matter
#'
#' This function takes in any string and returns a boolean indicating
#' whether or not religious words are present. This function is
#' intended for use on any of the email MEA datasets.
#'
#' @param text string/ email subject line
#'
#' @importFrom stringr str_detect
#' 
#' @examples
#' \dontrun{
#' emails_religious <- emails %>%
#'    mutate(has_religious = has_religious(text = subjectline))
#' }
#'
#' @export

has_religious <- function(text) {
  str_detect(text, "lord|Lord|god|God|Blessing|blessing")
}

#' Re:
#'
#' This function takes in any string and returns a boolean indicating
#' whether or not it begins with "Re:", not followed by a space. This function is
#' intended for use on any of the email MEA datasets.
#'
#' @param text string/ email subject line
#'
#' @importFrom stringr str_detect
#' 
#' @examples
#' \dontrun{
#' emails_punctuation <- emails %>%
#'    mutate(begins_re = begins_re(text = subjectline))
#' }
#'
#' @export

begins_re <- function(text) {
  str_detect(text, "^(Re:)") & !str_detect(text, "^(Re: )")
}
leahannejohnson/textclassificationexamples documentation built on Feb. 7, 2022, 11:04 p.m.