demo/collocation_score_by_country.R

library(RKorAPClient)
library(tidyverse)

NODE <- '[tt/l=Ei]' # see https://github.com/KorAP/RKorAPClient/issues/8
COLLOCATES <- c('[tt/l=pellen]', '[tt/l=sch\u00e4len]') # Demos may only contain ASCII characters
COUNTRIES <- c("AT", "BE", "CH", "DE", "IT", "LU")
VC <- "textType=/Zeit.*/ & pubPlaceKey=%s"

df <- expand_grid(node = NODE, collocate = COLLOCATES, country = COUNTRIES) %>%
  mutate(vc = sprintf(VC, country))

g <- new("KorAPConnection", verbose=TRUE) %>%
  collocationScoreQuery(df$node, df$collocate, df$vc, smoothingConstant = 0) %>%
  bind_cols(df %>% select(country)) %>%
  ggplot(aes(x = country, y = logDice, label = sprintf("(%d)", O), fill = collocate))  +
  geom_col(position="dodge") +
  geom_text(position = position_dodge(width = 0.9), vjust=1.5) +
  labs(title = sprintf("Collocates of '%s' by country of publication.", NODE),
       caption = "(absolute cooccurrence frequencies in parentheses)")

print(g)

Try the RKorAPClient package in your browser

Any scripts or data that you put into this service are public.

RKorAPClient documentation built on Aug. 9, 2023, 1:07 a.m.