| encode | R Documentation | 
encode(x, ...)
## S4 method for signature 'data.frame'
encode(
  x,
  corpus,
  s_attributes = NULL,
  encoding = "utf8",
  registry_dir = fs::path(tempdir(), "cwb_registry"),
  data_dir = fs::path(tempdir(), "cwb_data_dir", tolower(corpus)),
  properties = c(),
  method = c("R", "CWB"),
  verbose = TRUE,
  compress = FALSE,
  reload = TRUE,
  quietly = TRUE
)
| x | A  | 
| ... | Further arguments (unused). | 
| corpus | ID of the CWB corpus to create. | 
| s_attributes | A  | 
| encoding | Encoding as defined in the charset corpus property of the registry file for the corpus ('latin1' to 'latin9', and 'utf8'). | 
| registry_dir | Registry directory. | 
| data_dir | The data directory for the binary files of the corpus. | 
| properties | A named  | 
| method | Either 'CWB' or 'R', defaults to 'R'. See section 'Details'. | 
| verbose | A  | 
| compress | A  | 
| reload | A logical value, whether to reload the corpus to make it immediatedly available. | 
| quietly | A  | 
# This is an example we run conditionally as packages are suggested.
dplyr_available <- requireNamespace("dplyr")
tidytext_available <- requireNamespace("tidytext")
quanteda_available <- requireNamespace("quanteda")
if (dplyr_available && tidytext_available && quanteda_available){
library(dplyr) # pipe would not be available otherwise
library(tidytext)
registry_tmp <- fs::path(tempdir(), "cwb_registry")
dir.create(registry_tmp)
tidydata <- quanteda::data_char_ukimmig2010 %>%
   as.data.frame() %>%
   as_tibble(rownames = "party") %>%
   rename(`text` = ".")
   
tokenstream <- tidydata %>%
   unnest_tokens(word, text, to_lower = FALSE, strip_punct = FALSE) %>%
   mutate(cpos = 0L:(nrow(.) - 1L))
   
metadata <- tokenstream %>% 
  group_by(party) %>% 
  summarise(cpos_left = min(cpos), cpos_right = max(cpos))
tokenstream %>%
  select(-cpos, -party) %>%
  encode(
    corpus = "UKIMMIG2010",
    s_attributes = metadata,
    properties = c(lang = "en")
  )
  
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.