R/revUTF8.R

Defines functions revUTF8

Documented in revUTF8

##' Revert UTF-8 string to Chinese character.
##' 
##' @title Revert UTF-8 string to Chinese character.
##' @param string A character vector.
##' @param utype UTF-8 string type, the default is R type, such as "<U+XXXX>".
##' @return A character vector.
##' @author Jian Li <\email{rweibo@@sina.com}>
##' 

revUTF8 <- function(string, utype = "R")
{
	string <- .verifyChar(string)
	utype = match.arg(utype)
	if (length(string)  == 1) {
		str1 <- strsplit(string, "<U\\+[0-9A-Za-z][0-9A-Za-z][0-9A-Za-z][0-9A-Za-z]>")[[1]]
		str2 <- c(gsub("<U\\+|>", "", strextract(string, "<U\\+[^>]*>")[[1]]), "")
		str2 <- intToUtf8(as.hexmode(str2), multiple = TRUE)
		length(str1) <- length(str2) <- max(length(str1), length(str2))
		str1[is.na(str1)] <- ""
		str2[is.na(str2)] <- ""
		OUT <- paste(str1, str2, sep = "", collapse = "")
	} else {
		OUT <- as.vector(sapply(string, revUTF8, utype))
	}
	return(OUT)
}

Try the tmcn package in your browser

Any scripts or data that you put into this service are public.

tmcn documentation built on Aug. 8, 2019, 9:02 a.m.