ced_enc_detect: Detect Encoding

Description Usage Arguments Value Examples

View source: R/RcppExports.R

Description

Detect charset encoding of the character or raw vector.

Usage

1
ced_enc_detect(x, enc_hint = NULL, lang_hint = NULL)

Arguments

x

Raw or character vector.

enc_hint

Character vector with encoding hint.

lang_hint

Character vector with langauge code hint.

Value

Character vector with suggested encodings.

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# detect character vector with ASCII strings
ascii <- "I can eat glass and it doesn't hurt me."
ced_enc_detect(ascii)
ced_enc_detect(charToRaw(ascii))

# detect character vector with UTF-8 strings
utf8 <- "\u4e0b\u5348\u597d"
print(utf8)
ced_enc_detect(utf8)
ced_enc_detect(charToRaw(utf8))

# path to examples
ex_path <- system.file("test.txt", package = "ced")
ex_txt <- read.dcf(ex_path, all = TRUE)

# russian text
print(ex_txt[["France"]])
ced_enc_detect(ex_txt[["Russian"]])
ced_enc_detect(iconv(ex_txt[["Russian"]], "utf8", "ibm866"))
ced_enc_detect(iconv(ex_txt[["Russian"]], "utf8", "windows-1251"))
ced_enc_detect(iconv(ex_txt[["Russian"]], "utf8", "koi8-r"))

# chinese text
print(ex_txt[["Chinese"]])
ced_enc_detect(ex_txt[["Chinese"]])
ced_enc_detect(iconv(ex_txt[["Chinese"]], "utf8", "gb18030"))

# korean text
print(ex_txt[["Korean"]])
ced_enc_detect(ex_txt[["Korean"]])
ced_enc_detect(iconv(ex_txt[["Korean"]], "utf8", "uhc"))
ced_enc_detect(iconv(ex_txt[["Korean"]], "utf8", "iso-2022-kr"))

# japanese text
print(ex_txt[["Japanese"]])
ced_enc_detect(ex_txt[["Japanese"]])
ced_enc_detect(iconv(ex_txt[["Japanese"]], "utf8", "shift_jis"))
ced_enc_detect(iconv(ex_txt[["Japanese"]], "utf8", "iso-2022-jp"))



# detect encoding of the web pages content
if (require("curl")) {
  detect_enc_url <- function(u) ced_enc_detect(curl_fetch_memory(u)$content)
  detect_enc_url("https://www.corriere.it")
  detect_enc_url("https://www.vk.com")
  detect_enc_url("https://www.qq.com")
  detect_enc_url("https://kakaku.com")
  detect_enc_url("https://etoland.co.kr")
}

ced documentation built on Jan. 14, 2020, 5:10 p.m.