R/lex_rank.R

Defines functions add_lex_rank .add_lex_rank

# library(lexRankr)
# library(dplyr)

.add_lex_rank <- function(text = "aircraft will be an experience unlike any other plane. You’ll feel good about the lower noise and smoother ride, and feel great about reducing your impact on the environment. The TailWind-E is powered by our super-efficient electric propulsion system. This configured inside a sleek lightweight package tailored to maximize efficiency with a \"ramp\" presence that creates excitement. The TailWind-H will be a hybrid-electric version of our aircraft, designed for longer-range flights."
, split_terms = "\\.|\n", top_n = 2) {
  parts <-
    text %>% str_split(pattern = split_terms) %>% flatten_chr() %>% str_trim() %>% purrr::discard(function(x) {
      x == ""
    })
  lexRank_safe <- purrr::possibly(lexRank, tibble())
  df_summary <-
    lexRank_safe(parts,
            #only 1 article; repeat same docid for all of input vector
            docId = rep(1, length(parts)),
            #return 3 sentences to mimick /u/autotldr's output
            n = top_n,
            continuous = TRUE) %>%
    as_tibble()

  if (nrow(df_summary) == 0) {
    data <- tibble(text) %>% mutate(hasLexRank = F)
    return(data)
  }

  data <-
    tibble(text) %>% mutate(dataLexRank = list(df_summary)) %>%
    mutate(hasLexRank = T)

  data
}

add_lex_rank <-
  function(data, text_column = "text", split_terms = "\\.|\n", top_n = 3){
   data <-
     data %>%
      rename(text := UQ(text_column))

   df <- data %>% filter(!is.na(text))
   texts <- df$text
   .add_lex_rank_safe <- possibly(.add_lex_rank, tibble())

   all_summary <-
     texts %>%
     map_dfr(function(text) {
       text %>% message()
       .add_lex_rank_safe(text = text,
                          split_terms = split_terms,
                          top_n = top_n)
     })

   data <-
     data %>%
     left_join(all_summary, by = "text")

   data <-
     data %>%
     rename(!!sym(text_column) := text)

   data
  }
abresler/govtrackR documentation built on July 11, 2020, 12:30 a.m.