R/utils-transforEmotion.R
In transforEmotion: Sentiment Analysis for Text, Image and Video using Transformer Models

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Preprocessing functions ----
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

#' Warning to user about non-text input
#'
#' @noRd
#'
# Non-text Warning
# Updated 05.03.2022
non_text_warning <- function(text)
{
  # Clear na.action attribute
  attr(text, "na.action") <- NULL

  # Check that input is vector or list
  if(!is.vector(text)){

    # Send error if not a vector or list
    stop(
      paste(
        "The 'text' argument expects a 'vector' or 'list'. Instead, class(text) = ",
        paste("'", class(text), "'", sep = "", collapse = ", ")
      )
    )

  }else{

    # Check for text
    text_index <- unlist(lapply(text, is.character))

    # Check for all text
    if(!all(text_index)){

      # Obtain bad indices
      bad_index <- which(!text_index)

      # Remove non-text
      text <- text[-bad_index]

      # Check if all elements were removed
      if(length(text) == 0){
        stop(
          "All elements of 'text' object were not identified as 'character' type.\n\nPlease check for whether your text is 'character' type: `is.character(text)`"
        )
      }

      # Adjust warning for number of non-character types
      if(length(bad_index) == 1){

        # Set up head of warning
        head <- "Index"

        # Set up body of warning
        body <- paste(bad_index, sep = "")

        # Set up end of warning
        end <- "of 'text' object was not identified 'character' type.\n\nThis index was removed from analysis."

      }else if(length(bad_index) == 2){

        # Set up head of warning
        head <- "Indices"

        # Set up body of warning
        body <- paste(bad_index[1], "and", bad_index[2])

        # Set up end of warning
        end <- "of 'text' object were not identified 'character' type.\n\nThese indices were removed from analysis."

      }else if(length(bad_index) > 2){

        # Set up head of warning
        head <- "Indices"

        # Set up body of warning
        ## All bad indices except for last
        body <- paste(bad_index[-length(bad_index)], sep = "", collapse = ", ")

        ## Add last bad index
        body <- paste(
          body,
          ", and ", bad_index[length(bad_index)],
          sep = ""
        )

        # Set up end of warning
        end <- "of 'text' object were not identified 'character' type.\n\nThese indices were removed from analysis."

      }

      # Provide warning
      warning(
        paste(
          head, body, end
        )
      )

    }

  }

}

#' Alphanumeric characters
#'
#' @noRd
#'
# Alphanumeric characters
# Updated 05.03.2022
keep_alphanumeric <- function(text)
{
  # Remove non-alphanumeric characters
  alphanum_text <- lapply(text, function(x){
    gsub("[^[:alnum:][:space:][:punct:]]", "", x)
  })

  return(alphanum_text)

}

#' Reduces characters repeated more than twice to single character
#'
#' @noRd
#'
# Remove repeated characters
# Updated 05.03.2022
remove_repeated <- function(text)
{
  # Repeated characters (more than 2) reduced to 1
  repeated_text <- lapply(text, function(x){
    gsub('([[:alnum:]])\\1{2,}', '\\1', x)
  })

  # Repeated punctuations reduced to 1
  repeated_text <- lapply(text, function(x){
    gsub('([[:punct:]])\\1+', '\\1', x)
  })

  return(repeated_text)

}

#' Remove escaped characters
#'
#' @noRd
#'
# Removes escaped characters
# Updated 05.03.2022
remove_escapes <- function(text)
{
  # Escapes removed
  escape_text <- lapply(text, function(x){

    # Remove new line
    x <- gsub("\\n", " ", x)
    # Remove tab
    x <- gsub("\\t", " ", x)
    # Remove quote
    x <- gsub('\\"', " ", x)
    # Remove single backslash
    x <- gsub("\\\\", " ", x)

    return(x)

  })

  return(escape_text)

}

#' Ensures proper spacing for punctuations
#'
#' @noRd
#'
# Proper punctuation spacing
# Updated 05.03.2022
punctuation_spaces <- function(text)
{
  # Converge spaces next to punctuation
  converge_text <- lapply(text, function(x){

    # Find punctuation
    punct_index <- grep("[[:punct:]]", x)

    # Punctuation with only one character
    punct_index <- punct_index[nchar(x[punct_index]) == 1]

    # Ensure their are punctuations
    if(length(punct_index) != 0){

      # Loop through punctuations
      replace_punct <- unlist(
        lapply(punct_index, function(i){
          paste(x[i-1], x[i], sep = "")
        })
      )

      # Replace punctuations in text
      x[punct_index] <- replace_punct

      # Remove previous word in text
      x <- x[-(punct_index-1)]

    }

    return(x)

  })

  return(converge_text)

}

#' Remove all white space between words
#' (correctly spaces punctuations as well)
#'
#' @noRd
#'
# Removes white space
# Updated 05.03.2022
remove_whitespace <- function(text)
{
  # Split and trimmed text
  split_text <- lapply(text, function(x){
    trimws(unlist(strsplit(x, split = " ")))
  })

  # Remove blank text
  blank_text <- lapply(split_text, function(x){
    na.omit(ifelse(x == "", NA, x))
  })

  # Proper spaces for punctuations
  punct_text <- punctuation_spaces(blank_text)

  # Re-paste text together
  repaste_text <- lapply(punct_text, function(x){
    paste(x, collapse = " ")
  })

  return(repaste_text)

}

#' Remove stop words from text
#'
#' @importFrom utils data
#'
#' @noRd
#'
# Removes stop words
# Updated 05.03.2022
remove_stop_words <- function(text)
{
  # Load stop words
  stop_words <- get(data("stop_words", envir = environment()))

  # Format stop words for `gsub` function
  stop_format <- paste("\\b", stop_words, "\\b", sep = "", collapse = "|")

  # Remove stop words
  stop_text <- lapply(text, function(x){
    gsub(stop_format, "", x)
  })

  return(stop_text)

}

#' Basic preprocessing
#'
#' @noRd
#'
# Preprocessing
# Updated 05.03.2022
preprocess_text <- function(text, remove_stop = FALSE)
{

  # Convert to lowercase
  text <- lapply(text, tolower)

  # Keep alphanumeric and punctuation characters
  text <- keep_alphanumeric(text)

  # Remove escaped text
  text <- remove_escapes(text)

  # Remove repeated text
  text <- remove_repeated(text)

  # Remove stop words
  if(isTRUE(remove_stop)){
    text <- remove_stop_words(text)
  }

  # Remove white space (with proper punctuation spaces)
  text <- remove_whitespace(text)

  return(text)

}

#%%%%%%%%%%%%%%%%
# nlp_scores ----
#%%%%%%%%%%%%%%%%

#' Message to user about bad classes
#'
#' @noRd
#'
# Bad Class Message
# Updated 05.03.2022
bad_classes_message <- function(bad_classes)
{
  # Adjust message for number of bad classes
  if(length(bad_classes) == 1){

    # Set up head of message
    head <- "Class"

    # Set up body of message
    body <- paste("'", bad_classes, "'", sep = "")

    # Set up end of message
    end <- "was not found in the semantic space.\n\nIt was removed from the NLP analysis."

  }else if(length(bad_classes) == 2){

    # Set up head of message
    head <- "Classes"

    # Set up body of message
    body <- paste(
      "'", bad_classes[1], "' and '", bad_classes[2], "'", sep = ""
    )

    # Set up end of message
    end <- "were not found in the semantic space.\n\nThey were removed from the NLP analysis."

  }else if(length(bad_classes) > 2){

    # Set up head of message
    head <- "Classes"

    # Set up body of message
    ## All bad classes except for last
    body <- paste(
      "'", bad_classes[-length(bad_classes)], "'", sep = "", collapse = ", "
    )

    ## Add last bad class
    body <- paste(
      body,
      ", and '", bad_classes[length(bad_classes)], "'",
      sep = ""
    )

    # Set up end of message
    end <- "were not found in the semantic space.\n\nThey were removed from the NLP analysis."

  }

  # Generate full message
  message(
    paste(
      head, body, end
    )
  )

}

#%%%%%%%%%%%%%%%%%%%%%%
# SYSTEM FUNCTIONS ----
#%%%%%%%%%%%%%%%%%%%%%%

# Silent call ----
# Updated 02.02.2024
silent_call <- function(input)
{
  # Capture any output
  sink <- capture.output(
    output <- suppressWarnings(
      suppressMessages(input)
    )
  )

  # Return output
  return(output)

}

#' Error report
#'
#' @description Gives necessary information for user reporting error
#'
#' @param result Character.
#' The error from the result
#'
#' @param SUB_FUN Character.
#' Sub-routine the error occurred in
#'
#' @param FUN Character.
#' Main function the error occurred in
#'
#' @return Error and message to send to GitHub
#'
#' @author Alexander Christensen <alexpaulchristensen@gmail.com>
#'
#' @noRd
#'
#' @importFrom utils packageVersion
#'
# Error Report
# Updated 26.02.2021
error.report <- function(result, SUB_FUN, FUN)
{
  # Let user know that an error has occurred
  message(paste("\nAn error has occurred in the '", SUB_FUN, "' function of '", FUN, "':\n", sep =""))

  # Give them the error to send to you
  cat(paste(result))

  # Tell them where to send it
  message("\nPlease open a new issue on GitHub (bug report): https://github.com/hfgolino/EGAnet/issues/new/choose")

  # Give them information to fill out the issue
  OS <- as.character(Sys.info()["sysname"])
  OSversion <- paste(as.character(Sys.info()[c("release", "version")]), collapse = " ")
  Rversion <- paste(R.version$major, R.version$minor, sep = ".")
  EGAversion <- paste(unlist(packageVersion("EGAnet")), collapse = ".")

  # Let them know to provide this information
  message(paste("\nBe sure to provide the following information:\n"))

  # To reproduce
  message(styletext("To Reproduce:", defaults = "bold"))
  message(paste(" ", textsymbol("bullet"), " Function error occurred in: ", SUB_FUN, " function of ", FUN, sep = ""))

  # R, SemNetCleaner, and SemNetDictionaries
  message(styletext("\nR and EGAnet versions:", defaults = "bold"))
  message(paste(" ", textsymbol("bullet"), " R version: ", Rversion, sep = ""))
  message(paste(" ", textsymbol("bullet"), " EGAnet version: ", EGAversion, sep = ""))

  # Desktop
  message(styletext("\nOperating System:", defaults = "bold"))
  message(paste(" ", textsymbol("bullet"), " OS: ", OS, sep = ""))
  message(paste(" ", textsymbol("bullet"), " Version: ", OSversion, sep = ""))
}

#' System check for OS and RSTUDIO
#'
#' @description Checks for whether text options are available
#'
#' @param ... Additional arguments
#'
#' @return \code{TRUE} if text options are available and \code{FALSE} if not
#'
#' @author Alexander Christensen <alexpaulchristensen@gmail.com>
#'
#' @noRd
# System Check
# Updated 08.09.2020
system.check <- function (...)
{
  OS <- unname(tolower(Sys.info()["sysname"]))

  RSTUDIO <- ifelse(Sys.getenv("RSTUDIO") == "1", TRUE, FALSE)

  TEXT <- TRUE

  if(!RSTUDIO){if(OS != "linux"){TEXT <- FALSE}}

  res <- list()

  res$OS <- OS
  res$RSTUDIO <- RSTUDIO
  res$TEXT <- TEXT

  return(res)
}

#' Colorfies Text
#'
#' Makes text a wide range of colors (8-bit color codes)
#'
#' @param text Character.
#' Text to color
#'
#' @return Colorfied text
#'
#' @author Alexander Christensen <alexpaulchristensen@gmail.com>
#'
#' @noRd
#'
# Color text
# Updated 08.09.2020
colortext <- function(text, number = NULL, defaults = NULL)
{
  # Check system
  sys.check <- system.check()

  if(sys.check$TEXT)
  {
    # Defaults for number (white text)
    if(is.null(number) || number < 0 || number > 231)
    {number <- 15}

    # Check for default color
    if(!is.null(defaults))
    {
      # Adjust highlight color based on background color
      if(defaults == "highlight")
      {
        if(sys.check$RSTUDIO)
        {

          if(rstudioapi::getThemeInfo()$dark)
          {number <- 226
          }else{number <- 208}

        }else{number <- 208}
      }else{

        number <- switch(defaults,
                         message = 204,
                         red = 9,
                         orange = 208,
                         yellow = 11,
                         "light green" = 10,
                         green = 34,
                         cyan = 14,
                         blue = 12,
                         magenta = 13,
                         pink = 211,
        )

      }

    }

    return(paste("\033[38;5;", number, "m", text, "\033[0m", sep = ""))

  }else{return(text)}
}

#' Stylizes Text
#'
#' Makes text bold, italics, underlined, and strikethrough
#'
#' @param text Character.
#' Text to stylized
#'
#' @return Sytlized text
#'
#' @author Alexander Christensen <alexpaulchristensen@gmail.com>
#'
#' @noRd
# Style text
# Updated 08.09.2020
styletext <- function(text, defaults = c("bold", "italics", "highlight",
                                         "underline", "strikethrough"))
{
  # Check system
  sys.check <- system.check()

  if(sys.check$TEXT)
  {
    if(missing(defaults))
    {number <- 0
    }else{

      # Get number code
      number <- switch(defaults,
                       bold = 1,
                       italics = 3,
                       underline = 4,
                       highlight = 7,
                       strikethrough = 9
      )

    }

    return(paste("\033[", number, ";m", text, "\033[0m", sep = ""))
  }else{return(text)}
}

#' Text Symbols
#'
#' Makes text symbols (star, checkmark, square root)
#'
#' @param symbol Character.
#'
#' @return Outputs symbol
#'
#' @author Alexander Christensen <alexpaulchristensen@gmail.com>
#'
#' @noRd
# Symbols
# Updated 24.04.2020
textsymbol <- function(symbol = c("alpha", "beta", "chi", "delta",
                                  "eta", "gamma", "lambda", "omega",
                                  "phi", "pi", "rho", "sigma", "tau",
                                  "theta", "square root", "infinity",
                                  "check mark", "x", "bullet")
)
{
  # Get number code
  sym <- switch(symbol,
                alpha = "\u03B1",
                beta = "\u03B2",
                chi = "\u03C7",
                delta = "\u03B4",
                eta = "\u03B7",
                gamma = "\u03B3",
                lambda = "\u03BB,",
                omega = "\u03C9",
                phi = "\u03C6",
                pi = "\u03C0",
                rho = "\u03C1",
                sigma = "\u03C3",
                tau = "\u03C4",
                theta = "\u03B8",
                "square root" = "\u221A",
                infinity = "\u221E",
                "check mark" = "\u2713",
                x = "\u2717",
                bullet = "\u2022"
  )

  return(sym)
}

Any scripts or data that you put into this service are public.

transforEmotion documentation built on Sept. 11, 2024, 9:26 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

transforEmotion
Sentiment Analysis for Text, Image and Video using Transformer Models

R/utils-transforEmotion.R
In transforEmotion: Sentiment Analysis for Text, Image and Video using Transformer Models

Defines functions textsymbol styletext colortext error.report silent_call bad_classes_message preprocess_text remove_stop_words remove_whitespace punctuation_spaces remove_escapes remove_repeated keep_alphanumeric non_text_warning

Try the transforEmotion package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

transforEmotion Sentiment Analysis for Text, Image and Video using Transformer Models

R/utils-transforEmotion.R In transforEmotion: Sentiment Analysis for Text, Image and Video using Transformer Models

Defines functions textsymbol styletext colortext error.report silent_call bad_classes_message preprocess_text remove_stop_words remove_whitespace punctuation_spaces remove_escapes remove_repeated keep_alphanumeric non_text_warning

Try the transforEmotion package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

transforEmotion
Sentiment Analysis for Text, Image and Video using Transformer Models

R/utils-transforEmotion.R
In transforEmotion: Sentiment Analysis for Text, Image and Video using Transformer Models