inst/doc/vignette.R

## -----------------------------------------------------------------------------
library(Rcpp)
library(RcppCWB)

## -----------------------------------------------------------------------------
p_attr_word <- p_attr(
  corpus = "REUTERS",
  p_attribute = "word",
  registry = get_tmp_registry()
)

## -----------------------------------------------------------------------------
cpos_to_str(p_attr_word, 0:10)

## -----------------------------------------------------------------------------
cppFunction(
  'Rcpp::StringVector get_str(SEXP corpus, SEXP p_attribute, SEXP registry, Rcpp::IntegerVector cpos){
     SEXP attr;
     Rcpp::StringVector result;
     attr = RcppCWB::p_attr(corpus, p_attribute, registry);
     result = RcppCWB::cpos_to_str(attr, cpos);
     return(result);
  }',
  depends = "RcppCWB"
)

## -----------------------------------------------------------------------------
get_str("REUTERS", "word", RcppCWB::get_tmp_registry(), 0:50)

## -----------------------------------------------------------------------------
sourceCpp(file = system.file(package = "RcppCWB", "cpp", "fastdecode.cpp"))

## -----------------------------------------------------------------------------
outfile <- tempfile(fileext = ".txt")

write_token_stream(
  corpus = "REUTERS",
  p_attribute = "word", 
  s_attribute = "id",
  attribute_type = "s",
  registry = RcppCWB::get_tmp_registry(),
  filename = outfile
)

## -----------------------------------------------------------------------------
readLines(outfile) |>
  lapply(substr, 1, 75) |>
  unlist()

Try the RcppCWB package in your browser

Any scripts or data that you put into this service are public.

RcppCWB documentation built on July 9, 2023, 7:40 p.m.