R/inspectCorpus.R

inspectCorpus <- function() {
    setBusyCursor()
    on.exit(setIdleCursor())

    objects <- .getCorpusWindow()
    window <- objects$window
    txt <- objects$txt
    listbox <- objects$listbox

    tkwm.title(window, .gettext("Current Corpus"))

    mark <- 0

    tktag.configure(txt, "heading", font="sans 13 bold")
    tktag.configure(txt, "articlehead", font="sans 12 bold")
    tktag.configure(txt, "details", font="sans 10 italic")
    tktag.configure(txt, "small", font="sans 5")
    tktag.configure(txt, "fixed", font="courier 11")

    tkinsert(txt, "end", paste(sprintf(.gettext("Current corpus contains %i documents and %i terms."),
                                       nrow(dtm), ncol(dtm)), "\n\n", sep=""), "body")

    # Extracting document IDs is very slow: only do it once
    ids <- names(corpus)

    for(i in seq_along(corpus)) {
        id <- ids[i]
        tkinsert(txt, "end", paste(id, "\n", sep=""),
                 "articlehead")
        tkmark.set(txt, paste("mark", mark, sep=""), tkindex(txt, "insert-1c"))
        mark <- mark + 1
        tkinsert(listbox, "end", id)

        doc <- corpus[[i]]
        origin <- meta(doc, "Origin")
        date <- meta(doc, "DateTimeStamp")
        if(length(origin) > 0 && length(date) > 0)
            tkinsert(txt, "end", paste(origin, " - ", date, "\n", sep=""), "details")
        else if(length(origin) > 0)
            tkinsert(txt, "end", paste(origin, "\n", sep=""), "details")
        else if(length(origin) > 0)
            tkinsert(txt, "end", paste(date, "\n", sep=""), "details")

         if(length(origin) > 0 || length(date) > 0)
            tkinsert(txt, "end", "\n", "small")

        tkinsert(txt, "end", paste(paste(doc, collapse="\n"), "\n\n"), "body")
    }

    # Only raise the window when we're done, as filling it may take some time
    tkraise(window)
}

Try the RcmdrPlugin.temis package in your browser

Any scripts or data that you put into this service are public.

RcmdrPlugin.temis documentation built on May 2, 2019, 11:10 a.m.