This vignette illustrates using the tags to explore a single volume in the Foreign Relations of the United States.
knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(tidyverse) library(tidytext) library(TEIdytext) fname = system.file("extdata", "frus1946v06.xml", package = "TEIdytext") all = TEIdy(fname, ignore = c("c", "w"))
Get word counts by document author and document number.
counts = all %>% filter(div.document.subtype=="historical-document") %>% group_by(div.document.n) %>% # Lift the plaintext inside 'persName.from' tags to a new 'name_from' field. lift_data(persName.from, plaintext, name_from) %>% select(name_from, div.document.n, plaintext) %>% unnest_tokens(word, plaintext) %>% group_by(name_from, word, div.document.n) %>% filter(name_from != "") %>% summarize(count = n()) %>% arrange(-count) %>% ungroup
top_senders = counts %>% group_by(name_from) %>% summarize(count=sum(count)) %>% arrange(-count) %>% head(10) top_senders %>% ggplot() + geom_bar(aes(x=name_from, y = count), stat="identity") + coord_flip()
counts %>% group_by(name_from, div.document.n) %>% summarize(words = sum(count)) %>% inner_join(top_senders) %>% arrange(-words) %>% head(40) %>% ggplot() + geom_bar(aes(x=reorder(interaction(name_from, div.document.n), words), fill=name_from, y = words), stat='identity') + coord_flip() + labs(title="Whoa, that's one long telegram!")
j = counts %>% group_by(name_from) %>% filter(sum(count) > 10000) %>% summarize_llr(word,count) j %>% group_by(name_from) %>% group_by(name_from) %>% filter(n() > 100) %>% top_n(10, dunning_llr) %>% ggplot() + geom_bar(aes(x=word, y=dunning_llr), stat="identity") + facet_wrap(~name_from, scales="free_y") + coord_flip()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.