knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

reticulate::use_virtualenv("../env")
library(gensimr)

set.seed(42) # rerproducability

# sample data
data(corpus, package = "gensimr")

# preprocess corpus
docs <- preprocess(corpus)

BM25 Weights

get_bm25_weights(docs)

Keywords

Extract keywords.

str <- "Challenges in natural language processing frequently 
  speech recognition, natural language understanding, natural language
  generation (frequently from formal, machine-readable logical forms),
  connecting language and machine perception, dialog systems, or some
  combination thereof."
(keywords(str))

Summarize

Text summarisation.

str <- "Rice Pudding - Poem by Alan Alexander Milne
  What is the matter with Mary Jane?
  She's crying with all her might and main,
  And she won't eat her dinner - rice pudding again -
  What is the matter with Mary Jane?
  What is the matter with Mary Jane?
  I've promised her dolls and a daisy-chain,
  And a book about animals - all in vain -
  What is the matter with Mary Jane?
  What is the matter with Mary Jane?
  She's perfectly well, and she hasn't a pain;
  But, look at her, now she's beginning again! -
  What is the matter with Mary Jane?
  What is the matter with Mary Jane?
  I've promised her sweets and a ride in the train,
  And I've begged her to stop for a bit and explain -
  What is the matter with Mary Jane?
  What is the matter with Mary Jane?
  She's perfectly well and she hasn't a pain,
  And it's lovely rice pudding for dinner again!
  What is the matter with Mary Jane?"
summarize(str) %>% 
  unlist() %>% 
  cat()


news-r/gensimr documentation built on Jan. 9, 2021, 5:55 a.m.