
### ceeboo 2008


.format.count <- function(x)
    data.frame(counts = x, bytes = nchar(names(x), type = "bytes", 
                                         allowNA = TRUE),
	       encoding = Encoding(names(x)),
	       stringsAsFactors = FALSE)

## latin capital letter a with diaresis

t1 <- c("abc", "a\xc4", "", NA)
Encoding(t1) <- c("unknow", "latin1", "unknown", "unknown") 

t2 <- c(paste("_", t1[1:2], "_", sep = ""), t1[3:4])

## count n-grams
r <- .Call(tau:::tau_utf8CountNgram, list(t2), 3L, 0L, TRUE, FALSE, FALSE)

## incremental
.Call(tau:::tau_utf8CountNgram, list(t2), 3L, 0L, TRUE, TRUE, FALSE)
r <- .Call(tau:::tau_utf8CountNgram, list(t2), 3L, 0L, TRUE, FALSE, FALSE)

## count strings
r <- .Call(tau:::tau_utf8CountString, list(t1), 3L, 0L, 0L, TRUE, FALSE, FALSE)

## count prefixes
r <- .Call(tau:::tau_utf8CountString, list(t1), 3L, 0L, 1L, TRUE, FALSE, FALSE)

## count suffixes
r <- .Call(tau:::tau_utf8CountString, list(t1), 3L, 0L, 2L, TRUE, FALSE, FALSE)

## FIXME add to interface
r <- .Call(tau:::tau_utf8CountString, list(t1), 3L, 0L, 3L, TRUE, FALSE, FALSE)

## incremental
.Call(tau:::tau_utf8CountString, list(t1), 3L, 0L, 0L, TRUE, TRUE, FALSE)
r <- .Call(tau:::tau_utf8CountString, list(t1), 3L, 0L, 0L, TRUE, FALSE, FALSE)


Try the tau package in your browser

Any scripts or data that you put into this service are public.

tau documentation built on May 29, 2024, 6:34 a.m.