inst/doc/SentimentAnalysis.R

## -----------------------------------------------------------------------------
# install.packages("SentimentAnalysis")
library(SentimentAnalysis)

## -----------------------------------------------------------------------------
# Analyze a single string to obtain a binary response (positive / negative)
sentiment <- analyzeSentiment("Yeah, this was a great soccer game for the German team!")
convertToBinaryResponse(sentiment)$SentimentQDAP

## -----------------------------------------------------------------------------
# Create a vector of strings
documents <- c("Wow, I really like the new light sabers!",
               "That book was excellent.",
               "R is a fantastic language.",
               "The service in this restaurant was miserable.",
               "This is neither positive or negative.",
               "The waiter forget about my dessert -- what poor service!")

# Analyze sentiment
sentiment <- analyzeSentiment(documents)

# Extract dictionary-based sentiment according to the QDAP dictionary
sentiment$SentimentQDAP

# View sentiment direction (i.e. positive, neutral and negative)
convertToDirection(sentiment$SentimentQDAP)

response <- c(+1, +1, +1, -1, 0, -1)

compareToResponse(sentiment, response)

compareToResponse(sentiment, convertToBinaryResponse(response))

plotSentimentResponse(sentiment$SentimentQDAP, response)

## -----------------------------------------------------------------------------
documents <- c("This is good",
               "This is bad",
               "This is inbetween")
convertToDirection(analyzeSentiment(documents)$SentimentQDAP)

## -----------------------------------------------------------------------------
library(tm)
corpus <- VCorpus(VectorSource(documents))
convertToDirection(analyzeSentiment(corpus)$SentimentQDAP)

## -----------------------------------------------------------------------------
dtm <- preprocessCorpus(corpus)
convertToDirection(analyzeSentiment(dtm)$SentimentQDAP)

## -----------------------------------------------------------------------------
# Make dictionary available in the current R environment
data(DictionarHE)
# Display the internal structure 
str(DictionaryHE)
# Access dictionary as an object of type SentimentDictionary
dict.HE <- loadDictionaryHE()
# Print summary statistics of dictionary
summary(dict.HE)

data(DictionaryLM)
str(DictionaryLM)

## -----------------------------------------------------------------------------
d <- SentimentDictionaryWordlist(c("uncertain", "possible", "likely"))
summary(d)

# Alternative call
d <- SentimentDictionary(c("uncertain", "possible", "likely"))
summary(d)

## -----------------------------------------------------------------------------
d <- SentimentDictionaryBinary(c("increase", "rise", "more"),
                               c("fall", "drop"))
summary(d)

# Alternative call
d <- SentimentDictionary(c("increase", "rise", "more"),
                         c("fall", "drop"))
summary(d)

## -----------------------------------------------------------------------------
d <- SentimentDictionaryWeighted(c("increase", "decrease", "exit"),
                                 c(+1, -1, -10),
                                 rep(NA, 3))
summary(d)

# Alternative call
d <- SentimentDictionary(c("increase", "decrease", "exit"),
                         c(+1, -1, -10),
                         rep(NA, 3))
summary(d)                         

## -----------------------------------------------------------------------------
# Create a vector of strings
documents <- c("This is a good thing!",
               "This is a very good thing!",
               "This is okay.",
               "This is a bad thing.",
               "This is a very bad thing.")
response <- c(1, 0.5, 0, -0.5, -1)

# Generate dictionary with LASSO regularization
dict <- generateDictionary(documents, response)

dict

summary(dict)

## ----eval=FALSE---------------------------------------------------------------
#  write(dict, file="dictionary.dict")
#  dict <- read("dictionary.dict")

## -----------------------------------------------------------------------------
compareDictionaries(dict,
                    loadDictionaryQDAP())

sentiment <- predict(dict, documents)
compareToResponse(sentiment, response)
plotSentimentResponse(sentiment, response)

## -----------------------------------------------------------------------------
test_documents <- c("This is neither good nor bad",
                    "What a good idea!",
                    "Not bad")
test_response <- c(0, 1, 1)

pred <- predict(dict, test_documents)

compareToResponse(pred, test_response)
plotSentimentResponse(pred, test_response)

compareToResponse(analyzeSentiment(test_documents), test_response)

## -----------------------------------------------------------------------------
corpus <- VCorpus(VectorSource(documents))
tdm <- TermDocumentMatrix(corpus, 
                          control=list(wordLengths=c(1,Inf), 
                                       tokenize=function(x) ngram_tokenize(x, char=FALSE, 
                                                                           ngmin=1, ngmax=2)))
rownames(tdm)

dict <- generateDictionary(tdm, response)
summary(dict)
dict

## -----------------------------------------------------------------------------
sentiment <- analyzeSentiment(documents,
                              rules=list("SentimentLM"=list(ruleSentiment, loadDictionaryLM())))
sentiment

## -----------------------------------------------------------------------------
documents <- c("Das ist ein gutes Resultat",
               "Das Ergebnis war schlecht")
dictionaryGerman <- SentimentDictionaryBinary(c("gut"), 
                                              c("schlecht"))

sentiment <- analyzeSentiment(documents,
                              language="german",
                              rules=list("GermanSentiment"=list(ruleSentiment, dictionaryGerman)))
sentiment

convertToBinaryResponse(sentiment$GermanSentiment)

## -----------------------------------------------------------------------------
woorden <- c("goed","slecht")
scores <- c(0.8,-0.5)
dictionaryDutch <- SentimentDictionaryWeighted(woorden, scores)
documents <- "dit is heel slecht"
sentiment <- analyzeSentiment(documents,
                              language="dutch",
                              rules=list("DutchSentiment"=list(ruleLinearModel, dictionaryDutch)))
sentiment

## -----------------------------------------------------------------------------
library(tm)
data("crude")

# Analyze sentiment
sentiment <- analyzeSentiment(crude)

# Count positive and negative news releases
table(convertToBinaryResponse(sentiment$SentimentLM))

# News releases with highest and lowest sentiment
crude[[which.max(sentiment$SentimentLM)]]$meta$heading
crude[[which.min(sentiment$SentimentLM)]]$meta$heading

# View summary statistics of sentiment variable
summary(sentiment$SentimentLM)

# Visualize distribution of standardized sentiment variable
hist(scale(sentiment$SentimentLM))

# Compute cross-correlation 
cor(sentiment[, c("SentimentLM", "SentimentHE", "SentimentQDAP")])

# crude oil news between  1987-02-26 until 1987-03-02
datetime <- do.call(c, lapply(crude, function(x) x$meta$datetimestamp))

plotSentiment(sentiment$SentimentLM)
plotSentiment(sentiment$SentimentLM, x=datetime, cumsum=TRUE)

## -----------------------------------------------------------------------------
# count words (without stopwords)
countWords(documents)

# count all words (including stopwords)
countWords(documents, removeStopwords=FALSE)

Try the SentimentAnalysis package in your browser

Any scripts or data that you put into this service are public.

SentimentAnalysis documentation built on Aug. 24, 2023, 1:07 a.m.