##' @param corpus quanteda corpus object
##' @param parsed data frame, result of udpipe::udpipe or spacyr::spacy_parse applied on `corpus`.
##' @import shiny
##' @import dplyr
##' @export
texplor_lemmatize <- function(corpus, parsed) {
settings <- list()
settings$parsed_name <- deparse(substitute(parsed))
settings$corpus_name <- deparse(substitute(corpus))
settings$tab_id <- paste0("tab", round(runif(1, 1, 1e9)))
if (!inherits(parsed, "data.frame")) {
stop("parsed must be a data frame generated by udpipe::udpipe or spacyr::spacy_parse.")
}
if (inherits(parsed, "spacyr_parsed")) {
parsed <- parsed %>%
select(doc_id, token, lemma, pos)
settings$pos_var <- "pos"
}
if (!inherits(parsed, "spacyr_parsed")) {
if(!("upos" %in% names(parsed))) {
stop("parsed must be a data frame generated by udpipe::udpipe or spacyr::spacy_parse.")
}
parsed <- parsed %>%
select(doc_id, token, lemma, pos = upos)
settings$pos_var <- "upos"
}
parsed <- parsed %>%
group_by(doc_id) %>%
mutate(
prev1 = lag(token, default = ""),
prev2 = lag(token, 2, default = ""),
next1 = lead(token, default = ""),
next2 = lead(token, 2, default = ""),
context = paste(prev2, prev1, token, next1, next2)
) %>%
ungroup() %>%
mutate(pos = factor(pos)) %>%
select(-prev2, -prev1, -next1, -next2)
pos_choices <- parsed %>%
count(pos) %>%
mutate(name = paste0(pos, " (n=", n,")")) %>%
arrange(n) %>%
select(pos, name)
choices <- as.character(pos_choices$pos)
names(choices) <- pos_choices$name
settings$pos_choices <- choices
## Launch interface
runGadget(
shiny::shinyApp(
ui = lemmatizeUI(id = "texplorLemmatize", settings),
server = function(input, output, session) {
callModule(lemmatizeServer, "texplorLemmatize", corpus, parsed, settings)
}
),
viewer = dialogViewer("texplor lemmatize", width = 1900, height = 1000)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.