R/texplor_lemmatize.R

Defines functions texplor_lemmatize

##' @param corpus quanteda corpus object
##' @param parsed data frame, result of udpipe::udpipe or spacyr::spacy_parse applied on `corpus`.
##' @import shiny
##' @import dplyr
##' @export

texplor_lemmatize <- function(corpus, parsed) {

  settings <- list()
  settings$parsed_name <- deparse(substitute(parsed))
  settings$corpus_name <- deparse(substitute(corpus))
  settings$tab_id <- paste0("tab", round(runif(1, 1, 1e9)))
  
  if (!inherits(parsed, "data.frame")) {
    stop("parsed must be a data frame generated by udpipe::udpipe or spacyr::spacy_parse.")
  }
  
  if (inherits(parsed, "spacyr_parsed")) {
    parsed <- parsed %>% 
      select(doc_id, token, lemma, pos)
    settings$pos_var <- "pos"
  }
  
  if (!inherits(parsed, "spacyr_parsed")) {
    if(!("upos" %in% names(parsed))) {
      stop("parsed must be a data frame generated by udpipe::udpipe or spacyr::spacy_parse.")
    }
    parsed <- parsed %>% 
      select(doc_id, token, lemma, pos = upos)
    settings$pos_var <- "upos"
  }
  
  
  parsed <- parsed %>%
    group_by(doc_id) %>% 
    mutate(
      prev1 = lag(token, default = ""),
      prev2 = lag(token, 2, default = ""),
      next1 = lead(token, default = ""),
      next2 = lead(token, 2, default = ""),
      context = paste(prev2, prev1, token, next1, next2)
    ) %>% 
    ungroup() %>% 
    mutate(pos = factor(pos)) %>%
    select(-prev2, -prev1, -next1, -next2)
  
  pos_choices <- parsed %>% 
    count(pos) %>% 
    mutate(name = paste0(pos, " (n=", n,")")) %>% 
    arrange(n) %>% 
    select(pos, name)
  choices <- as.character(pos_choices$pos)
  names(choices) <- pos_choices$name
  settings$pos_choices <- choices
  
  ## Launch interface
  runGadget(
    shiny::shinyApp(
      ui = lemmatizeUI(id = "texplorLemmatize", settings),
      server = function(input, output, session) {
        callModule(lemmatizeServer, "texplorLemmatize", corpus, parsed, settings)
      }
    ),
    viewer = dialogViewer("texplor lemmatize", width = 1900, height = 1000)
  )

}
juba/texplor documentation built on Oct. 14, 2020, 5:32 p.m.