Nothing
## ---- include = FALSE---------------------------------------------------------
Sys.setenv(CORPUS_REGISTRY = "")
## ----loading_polmineR---------------------------------------------------------
library(polmineR)
## ----get_registry-------------------------------------------------------------
registry()
## ---- eval = FALSE------------------------------------------------------------
# Sys.getenv("CORPUS_REGISTRY")
## ----use_polmineR_data, message = FALSE, eval = TRUE--------------------------
use("polmineR")
use("RcppCWB", corpus = "REUTERS")
## ---- eval = TRUE, message = FALSE--------------------------------------------
corpus()
## ---- eval = FALSE, message = FALSE, results = 'hide'-------------------------
# options()[grep("polmineR", names(options()))]
## -----------------------------------------------------------------------------
options("polmineR.left" = 5)
options("polmineR.right" = 5)
options("polmineR.mc" = FALSE)
## ---- echo = FALSE, message = FALSE-------------------------------------------
options("polmineR.pagelength" = 3L)
## ---- eval = TRUE, render = knit_print----------------------------------------
k <- kwic("REUTERS", "oil")
## ---- eval = TRUE, render = knit_print----------------------------------------
k <- kwic("REUTERS", "oil", s_attributes = "places")
## ---- eval = TRUE, render = knit_print----------------------------------------
k <- kwic("REUTERS", "oil", s_attributes = c("id", "places"))
## ---- eval = TRUE, render = knit_print----------------------------------------
k <- kwic("REUTERS", '"oil" "price.*"')
## ---- eval = TRUE-------------------------------------------------------------
cnt <- count("REUTERS", "Kuwait")
cnt <- count("REUTERS", c("Kuwait", "USA", "Bahrain"))
cnt <- count("REUTERS", c('"United" "States"', '"Saudi" "Arabia.*"'), cqp = TRUE)
## ---- eval = TRUE, message = FALSE--------------------------------------------
oil <- dispersion("REUTERS", query = "oil", s_attribute = "id", progress = FALSE)
## -----------------------------------------------------------------------------
saudi_arabia <- dispersion(
"REUTERS", query = '"Saudi" "Arabia.*"',
s_attribute = "id", cqp = TRUE, progress = FALSE
)
## ---- eval = TRUE-------------------------------------------------------------
barplot(height = saudi_arabia[["count"]], names.arg = saudi_arabia[["id"]], las = 2)
## ---- eval = TRUE, message = FALSE--------------------------------------------
oil <- cooccurrences("REUTERS", query = "oil")
sa <- cooccurrences("REUTERS", query = '"Saudi" "Arabia.*"', left = 10, right = 10)
top5 <- subset(oil, rank_ll <= 5)
## ---- eval = rmarkdown::pandoc_available(), render = knit_print---------------
top5
## ---- eval = TRUE-------------------------------------------------------------
as.data.frame(top5)
## ---- eval = TRUE, message = FALSE, results = 'hide'--------------------------
kuwait <- partition("REUTERS", places = "kuwait", regex = TRUE)
## ---- eval = TRUE-------------------------------------------------------------
kuwait
## ---- eval = TRUE, message = FALSE--------------------------------------------
saudi_arabia <- partition("REUTERS", places = "saudi-arabia", regex = TRUE)
s_attributes(saudi_arabia, "id")
## ---- eval = TRUE, message = FALSE--------------------------------------------
saudi_arabia <- partition("REUTERS", places = "saudi-arabia", regex = TRUE)
oil <- cooccurrences(saudi_arabia, "oil", p_attribute = "word", left = 10, right = 10)
## ---- eval = TRUE-------------------------------------------------------------
df <- as.data.frame(oil)
df[1:5, c("word", "ll", "rank_ll")]
## ---- eval = TRUE-------------------------------------------------------------
q1 <- dispersion(saudi_arabia, query = 'oil', s_attribute = "id", progress = FALSE)
q2 <- dispersion(saudi_arabia, query = c("oil", "barrel"), s_attribute = "id", progress = FALSE)
## ---- eval = TRUE, message = FALSE, render = knit_print-----------------------
saudi_arabia <- partition("REUTERS", places = "saudi-arabia", regex = TRUE)
saudi_arabia <- enrich(saudi_arabia, p_attribute = "word")
saudi_arabia_features <- features(saudi_arabia, "REUTERS", included = TRUE)
saudi_arabia_features_min <- subset(saudi_arabia_features, rank_chisquare <= 10.83 & count_coi >= 5)
saudi_arabia_features_min
## ---- eval = TRUE, message = FALSE--------------------------------------------
df <- as.data.frame(saudi_arabia_features_min)
df_min <- df[,c("word", "count_coi", "count_ref", "chisquare")]
## ---- eval = TRUE-------------------------------------------------------------
articles <- corpus("REUTERS") %>% partition_bundle(s_attribute = "id", progress = FALSE)
articles_count <- count(articles, p_attribute = "word")
tdm <- as.TermDocumentMatrix(articles_count, col = "count", verbose = FALSE)
class(tdm) # to see what it is
show(tdm)
m <- as.matrix(tdm) # turn it into an ordinary matrix
m[c("oil", "barrel"),]
## ---- eval = TRUE, message = FALSE--------------------------------------------
P <- partition("REUTERS", id = "248")
H <- html(P, height = "250px")
H
## ---- eval = FALSE------------------------------------------------------------
# Sys.setenv(CORPUS_REGISTRY = "C:/PATH/TO/YOUR/REGISTRY")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.