hilight: Highlight Text

Description Usage Arguments Examples

Description

hilight_term - Highlight regex matching substrings.

hilight_token - Highlight matching tokens.

hilight_sentence - Highlight matching sentences.

Usage

1
2
3
4
5
6
7
8
9
hilight_term(text.var, map, grouping.var = NULL, ignore.case = TRUE,
  keep.row.order = FALSE, ...)

hilight_token(text.var, map, grouping.var = NULL, ignore.case = TRUE,
  keep.row.order = FALSE, ...)

hilight_sentence(text.var, map, grouping.var = NULL,
  first.appearance = TRUE, ignore.case = TRUE, keep.row.order = FALSE,
  ...)

Arguments

text.var

The text string variable.

map

A named list or two column dataframe. For lists the names are colors and the vectors are the conditional matches. For dataframes the first column is a color and the secon is the match condition.

grouping.var

The grouping variable(s). Default NULL generates one plot for all text. Also takes a single grouping variable or a list of 1 or more grouping variables.

ignore.case

logical. If FALSE, the pattern initial matching is case sensitive and if TRUE, case is ignored during initial matching.

keep.row.order

logical. If TRUE the grouping.var argument is used for headings but the row text is not collapsed within the grouping.var. This produces transcript like formatting.

first.appearance

logical. If TRUE the first regex that matches a sentence will be applied, otherwise the last matching regex will be applied.

...

ignored.

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
## Not run: 

## highlight regex expressions
map1 <- list(
    `#FF69B4` = c('\\bwe(\'[a-z]+)?\\b'),
    `#7CFC00` = c('\\bhe is', "he's"),
    yellow = 'you(\'(ll|[vr]e))?\\b',
    gray70 = '\\btalk'
)

term_regex <- with(presidential_debates_2012,
    hilight_term(dialogue, map1, list(person, time)))

plot(term_regex)

## Keep row order
term_regex <- with(presidential_debates_2012,
    hilight_term(dialogue, map1, list(person, time), keep.row.order = TRUE))

plot(term_regex)

## tidier
library(tidyverse)

map1B <- list(
    `orange` = c('\\bwe(\'[a-z]+)?\\b'),
    `pink` = c('that[\'a-z]*\\b'),
    yellow = 'you(\'(ll|[vr]e))?\\b',
    gray = '\\bI\\b'
)

presidential_debates_2012 %>%
    dplyr::filter(person %in% c('ROMNEY', 'OBAMA')) %$%
    hilight_term(
        text.var = dialogue,
        map = map1B,
        grouping.var = person
    ) %>%
    plot()

## highlight tokens
map2 <- list(
    `#FF69B4` = c('talk', 'you'),
    `#7CFC00` = c('he', "he's", 'we\'re', 'we'),
    yellow = 'right',
    gray70 = c('.', '?', '!')
)

token_match <- with(presidential_debates_2012,
    hilight_token(dialogue, map2, list(person, time)))

plot(token_match)


## highlight sentences regex
map3 <- list(
    `#FF69B4` = 'think',
    `#7CFC00` = c('he is', "he's", 'you(\'[vr]e)?\\b')
)


sent_regex <- with(presidential_debates_2012,
    hilight_sentence(dialogue, map3, list(person, time)))

plot(sent_regex)

## highlight sentences index
set.seed(10)
map_index <- list(
    yellow = sample(1:2912, 200),
    orange = sample(1:2912, 200),
    `#ff69b4` = sample(1:2912, 200)
)

map_index[[2]] <- map_index[[2]][!map_index[[2]] %in% map_index[[1]]]
map_index[[3]] <- map_index[[3]][!map_index[[3]] %in% unlist(map_index[1:2])]


sent_index <- with(presidential_debates_2012,
    hilight_sentence(dialogue, map_index, list(person, time)))

plot(sent_index)


## End(Not run)

trinker/textplot documentation built on May 29, 2019, 1:38 p.m.