Nothing
#' @title Opinion score of a digital text document (DTD)
#' @description Given a DTD,
#' this function computes the overall opinion score based on the
#' proportion of text records classified as expressing positive,
#' negative or a neutral sentiment.
#' The function first transforms
#' the text document into a tidy-format dataframe, described as the
#' `observed sentiment document (OSD)` (Adepeju and Jimoh, 2021),
#' in which each text record is assigned a sentiment class based
#' on the summation of all sentiment scores expressed by the words in
#' the text record.
#' @param textdoc An \code{n} x \code{1} list (dataframe) of
#' individual text records, where \code{n} is the total
#' number of individual records.
#' @param metric (an integer) Specify the metric to utilize for
#' the calculation of opinion score. Valid values include
#' \code{1, 2, ...,5}.
#' Assuming \code{P}, \code{N} and \code{O} represent positive,
#' negative, and neutral record sentiments, respectively,
#' the followings are the details of the opinion score function
#' represented by the numerical arguments above:
#' \code{1}: Polarity (percentage difference)
#' \code{((P - N)/(P + N))*100}, (Bound: -100%, +100%);
#' \code{2}: Polarity (proportional difference)
#' \code{((abs(P - N) / (P + N + O))*100},
#' (Bound: 0, +100%);
#' \code{3}: Positivity \code{(P/ (P + N + O))*100},
#' (Bound: 0, +100%); \code{4}: Negativity \code{(N / (P + N + O))*100},
#' (Bound: 0, +100%) (Malshe, A. 2019;
#' Lowe et al. 2011). \code{5}: To pass a
#' user-defined opinion score function (also see the \code{fun}
#' parameter below.
#' @param fun A user-defined function given that \code{metric}
#' parameter (above) is set equal to \code{5}.
#' For example, given a defined opinion score function
#' `myfun` <- `function(P, N, O){`
#' `("some tasks to do")`; `return("a value")}`, the input
#' argument of \code{fun} parameter then becomes `fun = myfun`.
#' Default: \code{NULL}.
#'
#' @usage opi_score(textdoc, metric = 1, fun = NULL)
#'
#' @examples
#' # Use police/pandemic posts on Twitter
#' # Experiment with a standard metric (e.g. metric 1)
#' score <- opi_score(textdoc = policing_dtd, metric = 1, fun = NULL)
#' #print result
#' print(score)
#'
#' #Example using a user-defined opinion score -
#' #a demonstration with a component of SIM opinion
#' #Score function (by Razorfish, 2009). The opinion
#' #function can be expressed as:
#'
#' myfun <- function(P, N, O){
#' score <- (P + O - N)/(P + O + N)
#' return(score)
#' }
#'
#' #Run analysis
#' score <- opi_score(textdoc = policing_dtd, metric = 5, fun = myfun)
#' #print results
#' print(score)
#'
#'
#' @details An opinion score is derived from all the sentiments
#' (i.e. positive, negative (and neutral) expressed within a
#' text document. We deploy a lexicon-based approach
#' (Taboada et al. 2011) using the `AFINN` lexicon
#' (Nielsen, 2011).
#' @return Returns an `opi_object` containing details of the
#' opinion measures from the text document.
#' @references (1) Adepeju, M. and Jimoh, F. (2021). An
#' Analytical Framework for Measuring Inequality in the
#' Public Opinions on Policing – Assessing the impacts
#' of COVID-19 Pandemic using Twitter Data.
#' https://doi.org/10.31235/osf.io/c32qh
#' (2) Malshe, A. (2019) Data Analytics Applications.
#' Online book available at:
#' https://ashgreat.github.io/analyticsAppBook/index.html.
#' Date accessed: 15th December 2020.
#' (3) Taboada, M.et al. (2011).
#' Lexicon-based methods for sentiment analysis. Computational
#' linguistics, 37(2), pp.267-307.
#' (4) Lowe, W. et al. (2011).
#' Scaling policy preferences from coded political texts.
#' Legislative studies quarterly, 36(1), pp.123-155.
#' (5) Razorfish (2009) Fluent: The Razorfish Social Influence
#' Marketing Report. Accessed: 24th February, 2021.
#' (6) Nielsen, F. A. (2011), “A new ANEW: Evaluation of a word
#' list for sentiment analysis in microblogs”, Proceedings of the
#' ESWC2011 Workshop on 'Making Sense of Microposts': Big things
#' come in small packages (2011) 93-98.
#' @importFrom dplyr mutate select rename filter
#' left_join inner_join group_by summarise ungroup n
#' @importFrom tidyr separate
#' @importFrom tidytext unnest_tokens get_sentiments
#' @importFrom tibble tibble as_tibble
#' @importFrom magrittr %>%
#' @import tibble tibble
#' @export
opi_score <- function(textdoc, metric = 1, fun = NULL){
#call afinn
#afinn_111 <- afinn_111
#data("afinn_111")
#global variables
ID <- as_tibble <- bigram <- get_sentiments <- neg <- sentiment <-
sentiment_score <- separate <- text <- value <- value2 <-
word <- word1 <- word2 <- NULL
#ensuring appropriate length of document is supplied
if(dim(textdoc)[1] < 15){
stop(paste("Length of document is too small!!",
"The minimum document length of 15 is recommended!",
"Process terminated!!", sep = " "))
}
#if the length of document is too large
if(dim(textdoc)[1] > 10000000){
stop(paste("Length of document is too large!!",
"The maximum document length is 10 million records!!",
"Process terminated!!", sep = " "))
}
#check metric
if(!metric %in% c(1:5)){
stop(paste(" 'Metric' argument can only assume values from",
"1, 2,..., 5", sep=" "))
}
#check if a user-defined function is inputted
if(metric == 5 & is.null(fun)){
stop(paste("A user-defined opinion function is",
"need in the parameter 'fun'", sep=" "))
}
if(metric %in% c(1:4) & !is.null(fun)){
print(paste("Warning: `fun` parameter will not be used!!",
"Otherwise, set`metric = 5`", sep=" "))
}
opi_object <- list()
textdoc <- as.data.frame(textdoc[,1])
colnames(textdoc) <- "text"
#head(textdoc)#nrow(textdoc)
#create the id
textdoc$ID <- seq.int(nrow(textdoc))
#handle sentiment words preceded by negation words
token_neg_pre <- suppressMessages(as_tibble(textdoc) %>%
#unnest_tokens
unnest_tokens(bigram, text, token = "ngrams", n = 2)%>%
separate(bigram, c("word1", "word2"), sep = " ")%>%
as.data.frame() %>%
#get only words preceded by the 'negation' word
filter(word1 %in% c("not", "never", "no", "without")) %>%
#get sentiment score
mutate(neg = paste(word1, word2, sep=" ")) %>%
rename(word = word2)%>%
#left_join(get_sentiments("afinn")) %>% #
left_join(sysdata) %>% #
select(ID, neg, value)%>%
#reverse the score (and multiply by 2)
mutate(value2 = value * 2)%>%
mutate(value = value2)%>%
select(-c(value2))%>%
rename(word=neg)%>%
filter(!is.na(value)))
#handle all others
token_regular <- suppressMessages(textdoc %>%
#handle regular words
unnest_tokens(word, text, drop = FALSE) %>%
#join to the lexicon
#inner_join(get_sentiments("afinn")) %>%
inner_join(sysdata) %>%
select(-c(text)))#drop text
#join both tables
both_tokens <- data.frame(rbind(token_regular, token_neg_pre))
afinn_OSD_each <- both_tokens %>%
group_by(ID)%>%
summarise(sentiment_score = sum(value))%>%
#retain neutral
#filter(sentiment_score != 0) %>%
mutate(sentiment = if_else(sentiment_score > 0,
"positive",
if_else(sentiment_score < 0, "negative",
"neutral")))%>%
select(-c(sentiment_score))%>%
ungroup()
afinn_OSD <- afinn_OSD_each %>%
group_by(sentiment)%>%
#count the proportion of
summarise(n=n())
#to ensure that each value exist
sent_gr <- data.frame(sentiment=c("negative", "positive", "neutral"),
n=0)
wh <- sent_gr$sentiment %in% afinn_OSD$sentiment
afinn_OSD <- afinn_OSD %>%
bind_rows(sent_gr[which(wh==FALSE),])
#calculate opinion score
if (metric %in% c(1:4)){
if(metric == 1){
total_n <- sum(afinn_OSD$n)
afinn_OSD <- afinn_OSD %>%
rename(No_of_text_records=n)
P <- afinn_OSD[which(afinn_OSD$sentiment == "positive"),2]
N <- afinn_OSD[which(afinn_OSD$sentiment == "negative"),2]
PD <- round(((P - N)/(P + N))*100,digits = 2)
opi_object$sentiments <- knitr::kable(data.frame(afinn_OSD))
opi_object$opiscore <- paste(PD, "%", sep="")
opi_object$metric <- "Polarity (Percentage Difference)"
opi_object$equation <- paste('((#Positive - #Negative)',
'(#Positive + #Negative))*100%',
sep="/")
opi_object$OSD <- as_tibble(afinn_OSD_each)
}
if(metric == 2){
P <- afinn_OSD[which(afinn_OSD$sentiment == "positive"),2]
N <- afinn_OSD[which(afinn_OSD$sentiment == "negative"),2]
O <- afinn_OSD[which(afinn_OSD$sentiment == "neutral"),2]
PD <- round((abs(P - N) / (P + N + O))*100,digits = 2)
total_n <- sum(afinn_OSD$n)
afinn_OSD <- afinn_OSD %>%
rename(No_of_text_records=n)
opi_object$sentiments <- knitr::kable(data.frame(afinn_OSD))
opi_object$opiscore <- paste(PD, "%", sep="")
opi_object$metric <- "Polarity (Proportional Difference)"
opi_object$equation <- paste('(abs(#Positive - #Negative)',
'(#Positive + #Negative + #neutral))*100%',
sep="/")
opi_object$OSD <- as_tibble(afinn_OSD_each)
}
if(metric == 3){
P <- afinn_OSD[which(afinn_OSD$sentiment == "positive"),2]
N <- afinn_OSD[which(afinn_OSD$sentiment == "negative"),2]
O <- afinn_OSD[which(afinn_OSD$sentiment == "neutral"),2]
PoS <- round((P / (P + N + O))*100, digits = 2)
total_n <- sum(afinn_OSD$n)
afinn_OSD <- afinn_OSD %>%
rename(No_of_text_records=n)
opi_object$sentiments <- knitr::kable(data.frame(afinn_OSD))
opi_object$opiscore <- paste(PoS, "%", sep="")
opi_object$metric <- "Positivity"
opi_object$equation <-
"(#Positive / (#Positive + #Negative + #neutral))*100%"
opi_object$OSD <- as_tibble(afinn_OSD_each)
}
if(metric == 4){
P <- afinn_OSD[which(afinn_OSD$sentiment == "positive"),2]
N <- afinn_OSD[which(afinn_OSD$sentiment == "negative"),2]
O <- afinn_OSD[which(afinn_OSD$sentiment == "neutral"),2]
Neg <- round((N / (P + N + O))*100, digits = 2)
total_n <- sum(afinn_OSD$n)
afinn_OSD <- afinn_OSD %>%
rename(No_of_text_records=n)
opi_object$sentiments <- knitr::kable(data.frame(afinn_OSD))
opi_object$opiscore <- paste(Neg, "%", sep="")
opi_object$metric <- "Negativity"
opi_object$equation <-
"(#Negativity / (#Positive + #Negative + #neutral))*100%"
opi_object$OSD <- as_tibble(afinn_OSD_each)
}
}
if(metric == 5){
P <- afinn_OSD[which(afinn_OSD$sentiment == "positive"),2]
N <- afinn_OSD[which(afinn_OSD$sentiment == "negative"),2]
O <- afinn_OSD[which(afinn_OSD$sentiment == "neutral"),2]
userfun <- as.numeric(fun(P, N, O))
total_n <- sum(afinn_OSD$n)
afinn_OSD <- afinn_OSD %>%
rename(No_of_text_records=n)
opi_object$sentiments <- knitr::kable(data.frame(afinn_OSD))
opi_object$opiscore <- userfun
opi_object$metric <- "User-defined"
opi_object$equation <- fun
opi_object$OSD <- as_tibble(afinn_OSD_each)
}
return(opi_object)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.