inst/doc/introduction_usage.R

## ----message=FALSE, warning=FALSE---------------------------------------------
library(rainette)
library(quanteda)

## Import du corpus
fichier <- system.file("extdata", "manifeste_pc.txt", package = "rainette")
corpus <- import_corpus_iramuteq(fichier)

## -----------------------------------------------------------------------------
corpus

## ----paged.print=TRUE---------------------------------------------------------
docvars(corpus)

## ----message=FALSE------------------------------------------------------------
corpus <- split_segments(corpus, segment_size = 40)

## -----------------------------------------------------------------------------
corpus

## -----------------------------------------------------------------------------
head(docvars(corpus))

## -----------------------------------------------------------------------------
as.character(corpus)[1:2]

## -----------------------------------------------------------------------------
tok <- tokens(corpus, remove_punct = TRUE, remove_numbers = TRUE)
tok <- tokens_remove(tok, stopwords("fr"))
tok <- tokens_tolower(tok)
dtm <- dfm(tok)

## -----------------------------------------------------------------------------
dtm <- dfm_trim(dtm, min_docfreq = 3)

## ----message=FALSE------------------------------------------------------------
res <- rainette(dtm, k = 5, min_segment_size = 10, min_split_members = 10)

## -----------------------------------------------------------------------------
res

## ----eval = FALSE-------------------------------------------------------------
#  rainette_explor(res, dtm, corpus)

## ----eval=FALSE---------------------------------------------------------------
#  ## Clustering description plot
#  rainette_plot(res, dtm, k = 5, type = "bar", n_terms = 20, free_scales = FALSE,
#      measure = "chi2", show_negative = "TRUE", text_size = 11)
#  ## Groups
#  cutree(res, k = 5)

## -----------------------------------------------------------------------------
corpus$groupe <- cutree(res, k = 5)
head(docvars(corpus))

## -----------------------------------------------------------------------------
clusters_by_doc_table(corpus, clust_var = "groupe")

## -----------------------------------------------------------------------------
clusters_by_doc_table(corpus, clust_var = "groupe", prop = TRUE)

## -----------------------------------------------------------------------------
docs_by_cluster_table(corpus, clust_var = "groupe")

## ----message=FALSE, warning=FALSE---------------------------------------------
res1 <- rainette(dtm, k = 7, min_segment_size = 10)
res2 <- rainette(dtm, k = 7, min_segment_size = 15)

## ----message=FALSE------------------------------------------------------------
res <- rainette2(res1, res2, max_k = 7)

## ----eval=FALSE---------------------------------------------------------------
#  rainette2_explor(res, dtm, corpus)

## -----------------------------------------------------------------------------
groupes <- cutree(res, k = 5)
groupes_complets <- rainette2_complete_groups(dtm, groupes)

Try the rainette package in your browser

Any scripts or data that you put into this service are public.

rainette documentation built on March 31, 2023, 6:43 p.m.