R/complete.R

Defines functions stemCompletion

Documented in stemCompletion

# Author: Ingo Feinerer

stemCompletion <-
function(x, dictionary,
         type = c("prevalent", "first", "longest",
                  "none", "random", "shortest"))
{
    if (inherits(dictionary, "Corpus"))
        dictionary <- unlist(lapply(dictionary, words))

    type <- match.arg(type)
    possibleCompletions <- lapply(x, function(w) grep(sprintf("^%s", w),
                                                      dictionary,
                                                      value = TRUE))
    switch(type,
           first = {
               setNames(sapply(possibleCompletions, "[", 1), x)
           },
           longest = {
               ordering <-
                   lapply(possibleCompletions,
                          function(x) order(nchar(x), decreasing = TRUE))
               possibleCompletions <-
                   mapply(function(x, id) x[id], possibleCompletions,
                          ordering, SIMPLIFY = FALSE)
               setNames(sapply(possibleCompletions, "[", 1), x)
           },
           none = {
               setNames(x, x)
           },
           prevalent = {
               possibleCompletions <-
                   lapply(possibleCompletions,
                          function(x) sort(table(x), decreasing = TRUE))
               n <- names(sapply(possibleCompletions, "[", 1))
               setNames(if (length(n)) n else rep_len(NA, length(x)), x)
           },
           random = {
               setNames(sapply(possibleCompletions, function(x) {
                   if (length(x)) sample(x, 1) else NA
               }), x)
           },
           shortest = {
               ordering <- lapply(possibleCompletions,
                                  function(x) order(nchar(x)))
               possibleCompletions <-
                   mapply(function(x, id) x[id], possibleCompletions,
                          ordering, SIMPLIFY = FALSE)
               setNames(sapply(possibleCompletions, "[", 1), x)
           }
           )
}

Try the tm package in your browser

Any scripts or data that you put into this service are public.

tm documentation built on Feb. 16, 2023, 9:40 p.m.