R/text_analysis.R

# You can learn more about package authoring with RStudio at:
#
#   http://r-pkgs.had.co.nz/
#
# Some useful keyboard shortcuts for package authoring:
#
#   Build and Reload Package:  'Ctrl + Shift + B'
#   Check Package:             'Ctrl + Shift + E'
#   Test Package:              'Ctrl + Shift + T'

#myKey <- "bee58a3520a04c4e9ceaf8c7e2001630"
#sentence1 <- "jason bourne movies i think they are pretty cool"
#sentence2 <- "This is the worst thing in the world"
#sentence3 <- "can't complain"
#library(httr)
#library(jsonlite)

#'Sentiment Analysis
#' @description Analyzes the sentiment of a string of text and returns a value between 0 and 1.
#' This function is a wrapper for the Sentiment post request in Microsoft's Cognitive Services API.
#'
#' @param text The text to be analyzed.
#' @param language The language of the text being analyzed. Must be en, es, fr, or pt.
#' @return Returns the sentiment score, between 0 and 1 of the \code{text}.
#' A data frame is returned containing the original text and the sentiment score.
#' @seealso \url{https://www.microsoft.com/cognitive-services}
#' @examples
#' get_sentiment("I am very angry")
#' get_sentiment("je suis tres en colere", language = "fr")
#' get_sentiment(c(sentence1, sentence2, sentence3))
#' @export
get_sentiment <- function(text, language = "en") {
  #Setting basic variables
  url <- "https://westus.api.cognitive.microsoft.com/text/analytics/v2.0/sentiment"
  n <- NROW(text)

  #Checking for valid language and throwing error if not valid
  if(!match_language(language)) {
    stop("Supplied language is not supported. Pass in one of: en,es,fr,pt",call. = FALSE)
  }

  #Setting key value, will throw error if env var not set
  key <- get_text_analytics_key()

  #Building out the request's body
  request_body <- data.frame(
    language = rep(language, n),
    id = as.character(c(1:n)),
    text = text
  )

  #Coverting the request body to JSON
  request_body_json <- jsonlite::toJSON(list(documents = request_body), auto_unbox = TRUE)

  #Sending request
  result <- httr::POST(url,
                 body = request_body_json,
                 httr::add_headers(.headers = c("Content-Type" = "application/json",
                                          "Ocp-Apim-Subscription-Key" = key)))

  #Throwing error if request does not return json
  if (httr::http_type(result) != "application/json") {
    stop("API did not return json", call. = FALSE)
  }

  #Converting JSON to R-usable object
  output <- httr::content(result)

  #Throwing error if status code != 200
  if (httr::status_code(result) != 200) {
    stop(
      paste(
        "API request failed.\nStatus code:",
        output$statusCode, "\nMessage:",
        output$message
      ),
      call. = FALSE
    )
  }

  #Coverting output to a dataframe
  output <- data.frame(matrix(unlist(output), nrow = n, byrow = T))
  names(output) <- c("Sentiment", "Id")
  output$Text <- text
  output
}

#'Key Phrase Analysis
#' @description Analyzes a string of text and returns the key phrases (comma delimited).
#' This function is a wrapper for the Key Phrase post request in Microsoft's Cognitive Services API.
#'
#' @param text The text to be analyzed for key phrases.
#' @param language The language of the text being analyzed. Must be en, es, fr, or pt.
#' @return Returns the key phrases in the string, \code{text}.
#' A data frame is returned containing the original text and the key phrases, comma delimited.
#' @seealso \url{https://www.microsoft.com/cognitive-services}
#' @examples
#' get_key_phrases("I am very angry")
#' get_key_phrases("je suis tres en colere", language = "fr")
#' get_key_phrases(c(sentence1, sentence2, sentence3))
#' @export
get_key_phrases <- function(text, language = "en") {
  #Setting basic variables
  url <- "https://westus.api.cognitive.microsoft.com/text/analytics/v2.0/keyPhrases"
  n <- NROW(text)

  #Checking for valid language and throwing error if not valid
  if(!match_language(language)) {
    stop("Supplied language is not supported. Pass in one of: en,es,fr,pt",call. = FALSE)
  }

  #Setting key value, will throw error if env var not set
  key <- get_text_analytics_key()

  #Building out the request's body
  request_body <- data.frame(
    language = rep(language, n),
    id = as.character(c(1:n)),
    text = text
  )

  #Coverting the request body to JSON
  request_body_json <- jsonlite::toJSON(list(documents = request_body), auto_unbox = TRUE)

  #Sending request
  result <- httr::POST(url,
                 body = request_body_json,
                 httr::add_headers(.headers = c("Content-Type" = "application/json",
                                          "Ocp-Apim-Subscription-Key" = key)))

  #Throwing error if request does not return json
  if (httr::http_type(result) != "application/json") {
    stop("API did not return json", call. = FALSE)
  }

  #Converting JSON to R-usable object
  output <- httr::content(result)

  #Throwing error if status code != 200
  if (httr::status_code(result) != 200) {
    stop(
      paste(
        "API request failed.\nStatus code:",
        output$statusCode, "\nMessage:",
        output$message
      ),
      call. = FALSE
    )
  }

  #output <- data.frame(matrix(unlist(output), nrow=n, byrow=T))
  #names(output) <- c("Sentiment", "Id")
  #output$Text <- text
  unlist(output)
}

detect_languages <- function(text, num_lang = 1) {
  #Setting basic variables
  if (num_lang == 1) {
    url <- "https://westus.api.cognitive.microsoft.com/text/analytics/v2.0/languages"
  }
  else {
    url <- paste("https://westus.api.cognitive.microsoft.com/text/analytics/v2.0/languages?numberOfLanguagesToDetect=", num_lang, sep = "")
  }
  n <- NROW(text)

  #Setting key value, will throw error if env var not set
  key <- get_text_analytics_key()

  #Building out the request's body
  request_body <- data.frame(
    id = as.character(c(1:n)),
    text = text
  )

  #Coverting the request body to JSON
  request_body_json <- jsonlite::toJSON(list(documents = request_body), auto_unbox = TRUE)

  #Sending request
  result <- httr::POST(url,
                       body = request_body_json,
                       httr::add_headers(.headers = c("Content-Type" = "application/json",
                                                "Ocp-Apim-Subscription-Key" = key)))

  #Throwing error if request does not return json
  if (httr::http_type(result) != "application/json") {
    stop("API did not return json", call. = FALSE)
  }

  #Converting JSON to R-usable object
  output <- httr::content(result)

  #Throwing error if status code != 200
  if (httr::status_code(result) != 200) {
    stop(
      paste(
        "API request failed.\nStatus code:",
        output$statusCode, "\nMessage:",
        output$message
      ),
      call. = FALSE
    )
  }

  #Coverting output to a dataframe
  output <- data.frame(matrix(unlist(output), nrow = n, byrow = T))
  names(output) <- c("Id", "Language", "Abr", "Score")
  output$Text <- text
  output
}

#Not currently working
detect_topics <- function(text) {
  #Setting basic variables
  url <- "https://westus.api.cognitive.microsoft.com/text/analytics/v2.0/topics"
  n <- NROW(text)
  if (n < 100) {
    stop("Requests to this API should contain at least 100 documents, where each document is not null or empty", call. = FALSE)
  }
  #Setting key value, will throw error if env var not set
  key <- get_text_analytics_key()

  #Building out the request's body
  request_body <- data.frame(
    id = as.character(c(1:n)),
    text = text
  )

  #Coverting the request body to JSON
  request_body_json <- jsonlite::toJSON(c(list(stopWords = c("xxxxxxx", "xxxxxxxxxx")), list(topicsToExclude = c("hi", "ho")), list(documents = request_body)), auto_unbox = TRUE)

  #Sending request
  result <- httr::POST(url,
                       body = request_body_json,
                       httr::add_headers(.headers = c("Content-Type" = "application/json",
                                                      "Ocp-Apim-Subscription-Key" = key)))

  #Throwing error if request does not return json
  if (httr::http_type(result) != "application/json") {
    stop("API did not return json", call. = FALSE)
  }

  #Converting JSON to R-usable object
  output <- httr::content(result)

  #Throwing error if status code != 200
  if (httr::status_code(result) != 200) {
    stop(
      paste(
        "API request failed.\nStatus code:",
        output$statusCode, "\nMessage:",
        output$message
      ),
      call. = FALSE
    )
  }

  #output <- data.frame(matrix(unlist(output), nrow=n, byrow=T))
  #names(output) <- c("Sentiment", "Id")
  #output$Text <- text
  output
}

match_language <- function(lang) {
  lang <- tolower(lang)
  valid_lang <- c("en", "es", "fr", "pt")

  if (lang %in% valid_lang) {
    return(TRUE)
  }
  else {
    return(FALSE)
  }
}

get_text_analytics_key <- function() {
  key <- Sys.getenv('KEY_TA')
  if (identical(key, "")) {
    stop("Please set env var KEY_TA to your key for the text analytics APIS. Key provided by Microsoft (https://www.microsoft.com/cognitive-services)",
         call. = FALSE)
  }

  key
}

get_operation_status <- function(url) {
  key <- get_text_analytics_key()

  #Sending request
  result <- httr::GET(url, httr::add_headers(.headers = c("Content-Type" = "application/json",
                                                      "Ocp-Apim-Subscription-Key" = key)))
  result
}

#######
#get_sentiment(c("I am very angry", "I am very happy"))
#detect_languages("je suis très en colère")
#get_sentiment("je suis très en colère", language = "fr")
dereklegenzoff/cognitiver documentation built on May 5, 2019, 3:49 a.m.