tidy_dtm: Convert a 'DocumentTermMatrix'/'TermDocumentMatrix' into Tidy...

Description Usage Arguments Value Examples

View source: R/tidy_dtm.R

Description

Converts non-zero elements of a DocumentTermMatrix/TermDocumentMatrix into a tidy data set.

Usage

1
2
3

Arguments

x

A DocumentTermMatrix/TermDocumentMatrix.

...

ignored.

Value

Returns a tidied data.frame.

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
data(simple_dtm)

tidy_dtm(simple_dtm)

## Not run: 
if (!require("pacman")) install.packages("pacman")
pacman::p_load_current_gh('trinker/gofastr')
pacman::p_load(tidyverse, magrittr, ggstance)

my_dtm <- with(
    presidential_debates_2012, 
    q_dtm(dialogue, paste(time, tot, sep = "_"))
)

tidy_dtm(my_dtm) %>%
    tidyr::extract(
        col = doc, 
        into = c("time", "turn", "sentence"), 
        regex = "(\\d)_(\\d+)\\.(\\d+)"
    ) %>%
    mutate(
        time = as.numeric(time),
        turn = as.numeric(turn),
        sentence = as.numeric(sentence)
    ) %>%
    tbl_df() %T>%
    print() %>%
    group_by(time, term) %>%
    summarize(n = sum(n)) %>%
    group_by(time) %>%
    arrange(desc(n)) %>%
    slice(1:10) %>%
    ungroup() %>%
    mutate(
        term = factor(paste(term, time, sep = "__"),
            levels = rev(paste(term, time, sep = "__")))
    ) %>%
    ggplot(aes(x = n, y = term)) +
        geom_barh(stat='identity') +
        facet_wrap(~time, ncol=2, scales = 'free_y') +
        scale_y_discrete(labels = function(x) gsub("__.+$", "", x))

## End(Not run)

textshape documentation built on May 29, 2021, 1:07 a.m.