# splitting dataset for batch requests - microsoft api allows 100 requests per collection
transform_dataframe <- function(dataset, batch){
return(split(dataset, (as.numeric(rownames(dataset))-1) %/% batch))
}
# transforming dataset into json format as required for POST request
transform_data <- function(dataset) {
return(jsonlite::toJSON(list(documents = dataset), auto_unbox = TRUE))
}
# hitting api key with post request to get sentiment score
post_to_api <- function(data_for_api, auth_key, api_region) {
api_link <- paste("https://", toString(api_region), ".api.cognitive.microsoft.com/text/analytics/v2.1/sentiment", sep = "")
result <- httr::POST(toString(api_link), body = data_for_api,
httr::add_headers(.headers = c("Content-Type"="application/json","Ocp-Apim-Subscription-Key"=toString(auth_key))))
return(httr::content(result))
}
# extracting results from API response to POST request
api_results <- function(api_output) {
api_returned_data <- data.frame(matrix(unlist(api_output$documents), nrow=length( api_output$documents), byrow=T),stringsAsFactors=FALSE)
names(api_returned_data) <- c('Id', 'Sentiment Score')
return(api_returned_data)
}
# extracting any errors generated while retrieving sentiment score
api_errors <- function(api_output) {
api_error_data <- data.frame(matrix(unlist(api_output$errors), nrow=length( api_output$errors), byrow=T),stringsAsFactors=FALSE)
names(api_error_data) <- c('Id', 'Error Message')
return(api_error_data)
}
# function consolidating all the above functions with exception handling
get_batch_sentiment <- function(data, auth_key, api_region, batch_size = 100) {
# checking if input dataset is in correct format
if (sum(c('language', 'text') %in% colnames(data)) == 2) {
# adding id to dataset; necessary for sending request to api
data$Id <- c(1:length(data$text))
# creating a temporary dataset to store results after each batch results are received. Results are appended to temp_data after processing each batch
temp_data <- setNames(data.frame(matrix(ncol = 2, nrow = 0)), c("Id", "Sentiment Score"))
error_data <- setNames(data.frame(matrix(ncol = 2, nrow = 0)), c("Id", "Error Message"))
failure_message <-''
split_data <- transform_dataframe(data, batch_size)
for (each_data in split_data) {
json_data <- transform_data(each_data)
# try except handling to make sure that api URL is correct. Wrong region input will result in error.
tryCatch(
{
hitting_api <- post_to_api(json_data, auth_key, api_region)
},
error=function(cond){
cat("Error: Link is Invalid. Make sure valid region is passed to function.\nNote: API wrapper uses version 2.1 for sentiment analysis, therefore parameters passed should be valid for this version.")
cat("\n\nHere's original Error Messge:\n")
message(cond)
return(NA)
}
)
# if-else statement to handle exceptions while receiving results from the microsoft API. If some error occurs then else statement will handle the error
if (length(hitting_api$documents) == length(each_data$Id)) {
api_res <- api_results(hitting_api)
temp_data <- rbind(temp_data, api_res)
# if error occurs, output's error message and also returns sentiment score of all successfully processed batches
} else if (length(hitting_api$documents) < length(each_data$Id) & length(hitting_api$documents) > 0 ) {
api_res <- api_results(hitting_api)
if (length(hitting_api$errors) > 0) {
temp_errors <- api_errors(hitting_api)
error_data <- rbind(error_data, temp_errors)
}
temp_data <- rbind(temp_data, api_res)
if (length(hitting_api$message) > 0) {
if (unlist(hitting_api$message) != failure_message) {
failure_message = unlist(hitting_api$message)
cat("Addtional Message Returned:\n", unlist(hitting_api$message))
}
}
} else {
if (length(hitting_api$errors) > 0) {
temp_errors <- api_errors(hitting_api)
error_data <- rbind(error_data, temp_errors)
}
if (length(hitting_api$message) > 0) {
if (unlist(hitting_api$message) != failure_message) {
failure_message = unlist(hitting_api$message)
cat("Addtional Message Returned:\n", unlist(hitting_api$message))
}
}
}
}
if (length(error_data$Id)>0) {
write.csv(error_data, 'Error_Log.csv')
cat('Check Error_Log.csv file for generated errors.')
}
if (length(temp_data$Id) == 0) {
return(NA)
} else {
data_with_sentiment <- merge(data, temp_data, by.x='Id', by.y = 'Id', all.x = TRUE)
data_with_sentiment$`Sentiment Score` <- as.numeric(data_with_sentiment$`Sentiment Score`)
return(data_with_sentiment)
}
} else {
cat("Error: Dataframe not passed in correct format. Read documentation for correct format.")
return(NA)
}
}
# distribution plot for retrieved sentiment score under negative, neutral and positive sentiment
sentiment_dist_plot <- function(data, negative_cutoff = 0.35, positive_start = 0.65, graph_alpha = 0.5, graph_type = 'density') {
# exception handling for data imput with no sentiment score column
if ('Sentiment Score' %in% colnames(data)) {
# exception handling where sentiment score range for negative and positive sentiment is logically not right or non-numeric data
if (all(is.numeric(c(negative_cutoff, positive_start))) & negative_cutoff<= positive_start & positive_start <=1) {
data$Sentiment <- c("Negative", "Neutral", "Positive")[ findInterval(data$`Sentiment Score`, c(0, negative_cutoff, positive_start, 1.01))]
# changing data type for plotting
data$Sentiment <- as.factor(data$Sentiment)
data$Score <- as.numeric(data$`Sentiment Score`)
# exception handling for wrong graphy type input
if (graph_type == 'density') {
return(ggplot(data, aes(x = Score, fill = Sentiment)) + geom_density(alpha =graph_alpha) + ggtitle("Sentiment Distribution Plot") + theme(plot.title = element_text(hjust = 0.5, size = 12, face = 'bold'), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black")))
} else if (graph_type == 'boxplot') {
return(ggplot(data, aes(y = Score, x = Sentiment, fill = Sentiment)) + geom_boxplot(alpha =graph_alpha) + ggtitle("Sentiment Boxplot") + theme(plot.title = element_text(hjust = 0.5, size = 12, face = 'bold'), panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black")))
} else {
return("Error: Invalid Graph Type Passed")
}
# generating error message based on type of error in user input
} else if(positive_start>1) {
return("Error: Sentiment Cutoff values cannot be greater than 1.0")
} else {
return("Error: Either non numeric input for sentiment score cutoff or negative score cutoff is greater than positive score cutoff")
}
} else {
return("Error: 'Sentiment Score' column not provided in data.")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.