Description Usage Arguments Value Note Examples
term_count
- Search a string by any number of grouping variables for
categories (themes) of grouped root terms/substrings.
1 2 3 4 5 6 7 8 9 10 11 |
text.var |
The text string variable. |
grouping.var |
The grouping variable(s). Default |
term.list |
A list of named character vectors. 'codeterm_count can
be used in a hierarchical fashion as well; that is a list of regexes that can
be passed and counted and then a second (or more) pass can be taken with a new
set of regexes on only those rows/text elements that were left untagged
(count |
ignore.case |
logical. If |
pretty |
logical. If |
group.names |
A vector of names that corresponds to group. Generally for internal use. |
meta.sep |
A character separator (or character vector of separators) to
break up the term list names (tags) into that will generate an merge table
attribute on the output that has the supplied tags and meta + sub tags as
dictated by the separator breaks. Meta tags called in |
meta.names |
A vector of names corresponding to the meta tags generated
by |
... |
ignored. |
Returns a tibble object of term counts by grouping variable.
Note that while a term_count
object prints as a
combination of integer counts and weighted (default percent of terms) in
parenthesis the underlying object is actually a tibble
of integer term/substring counts. The user can alter a
term_count
object to print as integer permanently using
the as_count
function. A percent Coverage also
prints. This is the rate of grouping variables with no term found (i.e.,
rowSums
is zero for terms). For more details on coverage
see coverage
.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 | ## Not run:
data(presidential_debates_2012)
discoure_markers <- list(
response_cries = c("\\boh", "\\bah", "\\baha", "\\bouch", "yuk"),
back_channels = c("uh[- ]huh", "uhuh", "yeah"),
summons = "\\bhey",
justification = "because"
)
(markers <- with(presidential_debates_2012,
term_count(dialogue, list(person, time), discoure_markers)
))
print(markers, pretty = FALSE)
print(markers, zero.replace = "_")
plot(markers)
plot(markers, labels=TRUE)
# permanently remove pretty printing
(markers2 <- as_count(markers))
# manipulating the output in a dplyr chain
library(dplyr)
presidential_debates_2012 %>%
with(., term_count(dialogue, list(person, time), discoure_markers)) %>%
as_count() # removes pretty print method (not necessary to manipulate)
presidential_debates_2012 %>%
with(., term_count(dialogue, list(person, time), discoure_markers)) %>%
mutate(totals = response_cries + back_channels + summons + justification) %>%
arrange(-totals)
## hierarchical terms
trms <- frequent_terms(presidential_debates_2012[["dialogue"]])[[1]]
discoure_markers <- list(
response_cries = c("\\boh", "\\bah", "\\baha", "\\bouch", "yuk"),
back_channels = c("uh[- ]huh", "uhuh", "yeah"),
summons = "hey",
justification = "because"
)
dbl_list <- list(
discoure_markers,
setNames(as.list(trms[1:8]), trms[1:8]),
setNames(as.list(trms[9:length(trms)]), trms[9:length(trms)])
)
x <- with(presidential_debates_2012,
term_count(dialogue, TRUE, dbl_list)
)
coverage(x)
## Auto mapping hierarchical terms w/ duplicate names
trpl_list <- list(
list(
response_cries = c("\\boh", "\\bah", "\\baha", "\\bouch", "yuk"),
back_channels = c("uh[- ]huh", "uhuh", "yeah"),
summons = "hey",
justification = "because"
),
list(summons ='the'),
list(summons = 'it', justification = 'ed\\s')
)
(x2 <- with(presidential_debates_2012, term_count(dialogue, TRUE, trpl_list)))
## get the pre-collapse hierarchical coverage
attributes(x2)[['pre_collapse_coverage']]
## End(Not run)
## External dictionaries
## Not run:
## dictionary from quanteda
require(quanteda); require(textreadr)
## Laver. M. & Garry, J. (2000). Estimating Policy Positions from Political Texts. American
## Journal of Political Science, 44 (3), 619-634.
dict_laver_garry <- textreadr::download("https://provalisresearch.com/Download/LaverGarry.zip") %>%
unzip(exdir = tempdir()) %>%
`[`(1) %>%
dictionary(file = .)
lg <- as_term_list(dict_laver_garry)
presidential_debates_2012 %>%
with(term_count(dialogue, list(time, person), lg)) %>%
plot()
## End(Not run)
## Not run:
## use with the qdapRegex package for feature extraction
if (!require("pacman")) install.packages("pacman")
pacman::p_load(qdapRegex, termco, dplyr, textshape, magrittr)
x <- c(
"@hadley I like #rstats for #ggplot2 work.",
"Difference between #magrittr and #pipeR, both implement pipeline operators for #rstats:
http://renkun.me/r/2014/07/26/difference-between-magrittr-and-pipeR.html @timelyportfolio",
"Slides from great talk: @ramnath_vaidya: Interactive slides from Interactive Visualization
presentation #user2014. http://ramnathv.github.io/user2014-rcharts/#1",
"fred is fred@foo.com and joe is joe@example.com - but @this is a",
"twitter handle for twit@here.com or foo+bar@google.com/fred@foo.fnord",
"hello world",
"I went to Washington Heights, NY for food! ",
"It's in West ven,PA, near Bolly Bolly Bolly, CA!",
"I like Movies, PG13",
'There is at UCLA',
'And at UB too',
'But UB is not UCLA.',
'It is like RSU',
"Dr. Brend is mizz hart's in mrs. Holtz's.",
"Where is mr. Bob Jr. and Ms. John Kennedy?",
"I want $2.33 at 2:30 p.m. to go to A.n.p.",
"She will send it A.S.A.P. (e.g. as soon as you can) said I.",
"Hello world.", "In the U. S. A.",
"Hello World (V. Raptor, 1986) bye",
"Narcissism is not dead (Rinker, 2014)",
"The R Core Team (2014) has many members.",
paste("Bunn (2005) said, \"As for elegance, R is refined, tasteful, and",
"beautiful. When I grow up, I want to marry R.\""),
"It is wrong to blame ANY tool for our own shortcomings (Baer, 2005).",
"Wickham's (in press) Tidy Data should be out soon.",
"Rinker's (n.d.) dissertation not so much.",
"I always consult xkcd comics for guidance (Foo, 2012; Bar, 2014).",
"Uwe Ligges (2007) says, \"RAM is cheap and thinking hurts\"",
" Mr. Bean bought 2 tickets 2-613-213-4567 or 5555555555 call either one",
"43 Butter Rd, Brossard QC K0A 3P0 - 613 213 4567",
"Please contact Mr. Bean (613)2134567",
"1.575.555.5555 is his #1 number",
"7164347566",
"I like 1234567 dogs",
"download file from http://example.com",
"this is the link to my website http://example.com",
"go to http://example.com from more info.",
"Another url ftp://www.example.com",
"And https://www.example.net",
"twitter type: t.co/N1kq0F26tG",
"still another one https://t.co/N1kq0F26tG :-)",
"I'm getting 3:04 AM just fine, but...",
"for 10:47 AM I'm getting 0:47 AM instead.",
"no time here",
"Some time has 12:04 with no AM/PM after it",
"Some time has 12:04 a.m. or the form 1:22 pm",
"download file from http://example.com",
"this is the link to my website http://example.com",
"go to http://example.com from more info.",
"Another url ftp://www.example.com",
"And https://www.example.net",
"twitter type: t.co/N1kq0F26tG",
"still another one https://t.co/N1kq0F26tG :-)",
"are :-)) it 8-D he XD on =-D they :D of :-) is :> for :o) that :-/",
"as :-D I xD with :^) a =D to =) the 8D and :3 in =3 you 8) his B^D was"
)
matches <- list(
phone = grab('@rm_phone'),
hash = grab('@rm_hash'),
tag = grab('@rm_tag'),
url = grab('@rm_url'),
twitter_url = grab('@rm_twitter_url'),
email = grab('@rm_email'),
title = grab('@rm_title_name'),
citation = grab('@rm_citation'),
abbreviation = grab('@rm_abbreviation'),
time = grab('@rm_time'),
emoticon = grab('@rm_emoticon'),
state = pastex(state.abb)
)
set.seed(10)
txt <- sample(x, 1000, TRUE)
(tcnt <- term_count(txt, TRUE, matches, ignore.case = FALSE))
as_dtm(tcnt)
textshape::tidy_dtm(as_dtm(tcnt))
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.