"You shall know a word by the company it keeps" {.smaller}

Initialization {.smaller}

if (packageVersion("polmineR") < package_version("0.7.10.9006"))
  devtools::install_github("PolMine/polmineR", ref = "dev")
library(polmineR)
use("UNGA")
for (pkg in c("magrittr", "data.table", "networkD3", "igraph", "DT", "RColorBrewer", "tm")){
  if (!pkg %in% rownames(installed.packages())) install.packages(pkg)
  library(package = pkg, character.only = TRUE)
}

The Analysis of Cooccurrences - Basics {.smaller}

Calculation of cooccurrences with polmineR {.smaller}

cooccurrences("UNGA", query = 'Islam', left = 10, right = 10)
cooccurrences("UNGA", query = "Islam")

Cooccurrences of "Islam" {.smaller}

cooccurrences("UNGA", query = "Islam")

Filtering Results {.smaller}

cooccurrences("UNGA", query = "Islam") %>% 
  subset(ll >= 11.83) %>%
  subset(count_coi >= 5) %>% 
  subset(!tolower(word) %in% tm::stopwords("en")) %>%
  subset(!word %in% c("''", ",", "``"))

Filtered word context of "Islam" {.smaller}

cooccurrences("UNGA", query = "Islam") %>% 
  subset(ll >= 11.83) %>%
  subset(count_coi >= 5) %>% 
  subset(!tolower(word) %in% tm::stopwords("en")) %>%
  subset(!word %in% c("''", ",", "``"))

Pitfalls of Filtering {.smaller}

Cooccurrence to Concordance Analysis {.smaller}

options("polmineR.pagelength" = 3L)
kwic("UNGA", query = "Islam", positivelist = "The") %>% highlight(yellow = "The")

Specification of the word context {.smaller}

options("polmineR.left" = 10L)
options("polmineR.right" = 10L)
cooccurrences("UNGA", query = '"Islam"', s_attribute = "speaker")

Using the CQP Syntax {.smaller}

options("polmineR.pagelength" = 5L)
cooccurrences("UNGA", '"unjust" [] "sanctions"', cqp = TRUE)

Cooccurrences based on Partitions {.smaller}

before_2001 <- partition("UNGA", year = 1993:2000)
after_2001 <- partition("UNGA", year = 2002:2009)
islam_before <- before_2001 %>%
  cooccurrences(query = '"Islam(s|)"', cqp = TRUE, s_attribute = "speaker") %>%
  subset(count_coi >= 3) %>% subset(ll >= 11.83)
islam_after <- after_2001 %>%
  cooccurrences(query = '"Islam(s|)"', cqp = TRUE, s_attribute = "speaker") %>%
  subset(count_coi >= 3) %>% subset(ll >= 11.83)

Cooccurrences of "Islam" (before 2001) {.smaller}

options("polmineR.pagelength" = 10L)
before_2001 %>%
  cooccurrences(query = '"Islam(s|)"', cqp = TRUE, s_attribute = "speaker") %>%
  subset(count_coi >= 3) %>% subset(ll >= 11.83)

Cooccurrences of "Islam" (after 2001) {.smaller}

after_2001 %>%
  cooccurrences(query = '"Islam(s|)"', cqp = TRUE, s_attribute = "speaker") %>%
  subset(count_coi >= 3) %>% subset(ll >= 11.83)

Filtering using Part-of-Speech annotation {.smaller}

Q <- '[pos = "JJ.*"] []{0,5} "sanctions"'
cooccurrences("UNGA", query = Q, p_attribute = c("word", "pos")) %>%
  subset(count_coi >= 3) %>% 
  subset(ll >= 11.83) %>%
  subset(pos %in% c("NN", "JJ"))

POS-filtered Cooccurrences {.smaller}

Q <- '[pos = "JJ.*"] []{0,5} "sanctions"'
sanction_cooc <- cooccurrences("UNGA", query = Q, p_attribute = c("word", "pos")) %>%
  subset(count_coi >= 3) %>% 
  subset(ll >= 11.83) %>%
  subset(pos %in% c("NN", "JJ"))
sanction_cooc

Word Cloud Visualization of Cooccurrences {.smaller}

x <- wordcloud::wordcloud(
  words = sanction_cooc[["word"]],
  freq = sanction_cooc[["count_coi"]],
  colors = rep(RColorBrewer::brewer.pal(8, "Dark2"), times = 7),
  random.color = TRUE
  )

Word Cloud Visualization of Cooccurrences {.flexbox .vcenter}

x <- wordcloud::wordcloud(
  words = sanction_cooc[["word"]],
  freq = sanction_cooc[["count_coi"]],
  colors = rep(RColorBrewer::brewer.pal(8, "Dark2"), times = 7),
  random.color = TRUE
  )

Dot Plot Visualization of Cooccurrences {.smaller}

dotplot(mmh, cex = 0.8)

Dot Plot Visualization of Cooccurrences {.flexbox .vcenter}

dotplot(sanction_cooc, cex = 0.8, main = "Cooccurrences of 'Sanctions' \n preceded by an adjective (in prev. 5 words)")

Calculating all Cooccurrences {.smaller}

secgen2008 <- partition("UNGA", year = 2001, speaker = "The Secretary-General")
terms_to_drop <- terms(secgen2008, p_attribute = "word") %>% noise() %>% unlist()
coocs <- Cooccurrences(secgen2008, p_attribute = "word", left = 5L, right = 5L, stoplist = terms_to_drop) %>% 
  decode() %>% # 
  ll() %>%
  subset(ll >= 11.83) %>%
  subset(ab_count >= 5)

Visualization of Cooccurrence Graphs {.smaller}

as_igraph(coocs) %>% plot()

Visualization of Cooccurrence Graphs (cont.) {.smaller}

as_igraph(coocs) %>% plot()

Visualization with 'networkD3' {.smaller}

G <- as_igraph(coocs)

links <- as.data.frame(cbind(as_edgelist(G, names = FALSE), rep(1, length(E(G)))))
links[,1] <- links[,1] - 1L # "zero-based" Index für Kanten 
links[,2] <- links[,2] - 1L # dito
colnames(links) <- c("source", "target", "value")

nodes <- data.frame(name = V(G)$name, group = rep(1, length(V(G)$name)), size = 3)

forceNetwork(
  Links = links, Nodes = nodes, Source = "source",
  Target = "target", Value = "value", NodeID = "name",
  Group = "group",
  opacity = 0.75, fontSize = 20, zoom = TRUE
)

An interactive Cooccurrence Graph {.smaller}

library(networkD3)
G <- as_igraph(coocs)

links <- as.data.frame(cbind(as_edgelist(G, names = FALSE), rep(1, length(E(G)))))
links[,1] <- links[,1] - 1L
links[,2] <- links[,2] - 1L 
colnames(links) <- c("source", "target", "value")

nodes <- data.frame(name = V(G)$name, group = rep(1, length(V(G)$name)), size = 3)

forceNetwork(
  Links = links, Nodes = nodes, Source = "source",
  Target = "target", Value = "value", NodeID = "name",
  Group = "group",
  opacity = 0.75, fontSize = 20, zoom = TRUE
)

Visualization, Hermeneutics and 'close reading' {.smaller}

References



PolMine/UCSSR documentation built on June 13, 2022, 10:23 p.m.