Warum die Nachbarschaft von Worten zählt {.smaller}

Initialisierung {.smaller}

if (packageVersion("polmineR") < package_version("0.7.10.9006"))
  devtools::install_github("PolMine/polmineR", ref = "dev")
library(polmineR)
for (pkg in c("magrittr", "data.table", "networkD3", "igraph", "DT", "RColorBrewer", "tm")){
  if (!pkg %in% rownames(installed.packages())) install.packages(pkg)
  library(package = pkg, character.only = TRUE)
}

Grundlagen der Analyse von Kookkurrenzen {.smaller}

Berechnung von Kookkurrenzen mit polmineR {.smaller}

cooccurrences("GERMAPARL", query = 'Islam', left = 10, right = 10)
cooccurrences("GERMAPARL", query = "Islam")

Kookkurrenzen zu "Islam" {.smaller}

cooccurrences("GERMAPARL", query = "Islam")

Filtern von Ergebnissen {.smaller}

cooccurrences("GERMAPARL", query = "Islam") %>% 
  subset(ll >= 11.83) %>%
  subset(count_coi >= 5) %>% 
  subset(!tolower(word) %in% tm::stopwords("de")) %>%
  subset(!word %in% c("''", ",", "``"))

Gefilterter Wortkontext von "Islam" {.smaller}

cooccurrences("GERMAPARL", query = "Islam") %>% 
  subset(ll >= 11.83) %>%
  subset(count_coi >= 5) %>% 
  subset(!tolower(word) %in% tm::stopwords("de")) %>%
  subset(!word %in% c("''", ",", "``"))

Tücken des Filterns {.smaller}

Von der Kookkurrenz- zur Konkordanz-Analyse {.smaller}

options("polmineR.pagelength" = 3L)
kwic("GERMAPARL", query = "Islam", positivelist = "Der") %>% highlight(yellow = "Der")

Definition des Wortkontextes {.smaller}

options("polmineR.left" = 10L)
options("polmineR.right" = 10L)
cooccurrences("GERMAPARL", query = '"Islam"', s_attribute = "speaker")

Nutzung der CQP-Syntax {.smaller}

options("polmineR.pagelength" = 5L)
cooccurrences("GERMAPARL", '"Menschen" "mit" "Migrationshintergrund"', cqp = TRUE)

Partitionen als Basis von Kookkurrenzen {.smaller}

btsub <- partition("GERMAPARL", year = 2002:2011, interjection = FALSE)
islam_union <- partition(btsub, parliamentary_group = "CDU/CSU") %>%
  cooccurrences(query = '"Islam(s|)"', cqp = TRUE, s_attribute = "speaker") %>%
  subset(count_coi >= 3) %>% subset(ll >= 11.83)
islam_gruene <- partition(btsub, parliamentary_group = "GRUENE") %>%
  cooccurrences(query = '"Islam(s|)"', cqp = TRUE, s_attribute = "speaker") %>%
  subset(count_coi >= 3) %>% subset(ll >= 11.83)

Kookkurrenzen von "Islam" (CDU/CSU) {.smaller}

options("polmineR.pagelength" = 10L)
partition(btsub, parliamentary_group = "CDU/CSU") %>%
  cooccurrences(query = '"Islam(s|)"', cqp = TRUE, s_attribute = "speaker", progress = FALSE) %>%
  subset(count_coi >= 3) %>%#
  subset(ll >= 11.83)

Kookkurrenzen von "Islam" (B'90/Die Grünen) {.smaller}

partition(btsub, parliamentary_group = "GRUENE") %>%
  cooccurrences(query = '"Islam(s|)"', cqp = TRUE, s_attribute = "speaker", progress = FALSE) %>%
  subset(count_coi >= 3) %>%
  subset(ll >= 11.83)

Filtern mittels Part-of-Speech-Annotation {.smaller}

mmh_query <- '"Menschen" "mit" "Migrationshintergrund"'
cooccurrences("GERMAPARL", query = mmh_query, p_attribute = c("word", "pos")) %>%
  subset(count_coi >= 3) %>% 
  subset(ll >= 11.83) %>%
  subset(pos %in% c("NN", "ADJA"))

POS-gefilterte Kookkurrenzen {.smaller}

mmh_query <- '"Menschen" "mit" "Migrationshintergrund"'
mmh <- cooccurrences("GERMAPARL", query = mmh_query, p_attribute = c("word", "pos")) %>%
  subset(count_coi >= 3) %>% 
  subset(ll >= 11.83) %>%
  subset(pos %in% c("NN", "ADJA"))
mmh

Wordcloud-Visualisierung von Kookkurrenzen {.smaller}

x <- wordcloud::wordcloud(
  words = mmh[["word"]],
  freq = mmh[["count_coi"]],
  colors = rep(RColorBrewer::brewer.pal(8, "Dark2"), times = 7),
  random.color = TRUE
  )

Wortwolken-Visualisierung von Kookkurrenzen {.flexbox .vcenter}

x <- wordcloud::wordcloud(
  words = mmh[["word"]],
  freq = mmh[["count_coi"]],
  colors = rep(RColorBrewer::brewer.pal(8, "Dark2"), times = 7),
  random.color = TRUE
  )

Dotplot-Visualisierung von Kookkurrenzen {.smaller}

dotplot(mmh, cex = 0.8)

Dotplot-Visualisierung von Kookkurrenzen {.flexbox .vcenter}

dotplot(mmh, cex = 0.8)

Berechnung aller Kookkurrenzen {.smaller}

m2008 <- partition("GERMAPARL", year = 2008, speaker = "Angela Merkel", interjection = FALSE)
terms_to_drop <- terms(m2008, p_attribute = "word") %>% noise() %>% unlist()
coocs <- Cooccurrences(m2008, p_attribute = "word", left = 5L, right = 5L, stoplist = terms_to_drop) %>% 
  decode() %>% # 
  ll() %>%
  subset(ll >= 11.83) %>%
  subset(ab_count >= 5)

Visualisierung von Kookkurrenz-Graphen {.smaller .columns-2}

as_igraph(coocs) %>% plot()

Visualisierung mit 'networkD3' {.smaller}

G <- as_igraph(coocs)

links <- as.data.frame(cbind(as_edgelist(G, names = FALSE), rep(1, length(E(G)))))
links[,1] <- links[,1] - 1L # "zero-based" Index für Kanten 
links[,2] <- links[,2] - 1L # dito
colnames(links) <- c("source", "target", "value")

nodes <- data.frame(name = V(G)$name, group = rep(1, length(V(G)$name)), size = 3)

forceNetwork(
  Links = links, Nodes = nodes, Source = "source",
  Target = "target", Value = "value", NodeID = "name",
  Group = "group",
  opacity = 0.75, fontSize = 20, zoom = TRUE
)

Kookkurrenz-Graph, interaktiv {.smaller}

library(networkD3)
G <- as_igraph(coocs)

links <- as.data.frame(cbind(as_edgelist(G, names = FALSE), rep(1, length(E(G)))))
links[,1] <- links[,1] - 1L
links[,2] <- links[,2] - 1L 
colnames(links) <- c("source", "target", "value")

nodes <- data.frame(name = V(G)$name, group = rep(1, length(V(G)$name)), size = 3)

forceNetwork(
  Links = links, Nodes = nodes, Source = "source",
  Target = "target", Value = "value", NodeID = "name",
  Group = "group",
  opacity = 0.75, fontSize = 20, zoom = TRUE
)

Visualisierung, Hermeneutik, "close reading" {.smaller}

Literatur



PolMine/UCSSR documentation built on June 13, 2022, 10:23 p.m.