R/ids_dataset.R

#'Character decomposition dataset
#'
#'@source https://github.com/cjkvi/cjkvi-ids/blob/master/ids.txt
#'@encoding UTF-8
#'@description processed with the following code:
#'ids = read_csv('C:/Users/phil_collender/Documents/bleh/RLpaper/RLmanuscript/ids.csv')
#'ids = subset(ids, Char %in% names(pylib))
#'ids$Decomp6 <- ids$Decomp7 <- NULL
#'
#'todo = lapply(ids,function(v) grep('\\[',v))
#'idscleaner = function(n){
#'  matches = unlist(regmatches(n,gregexpr('\\[.*?\\]',n)))
#'  keep = grepl('G',matches)
#'  n[keep] = gsub('\\[.*?\\]','',n[keep])
#'  n[!keep] = NA
#'  n
#'}
#'for(i in 1:length(todo)){
#'  ids[[i]][todo[[i]]] = idscleaner(ids[[i]][todo[[i]]])
#'}
#'for(i in 1:length(todo)){
#'  ids[[i]][is.na(ids[[i]])] <- ids[[i+1]][is.na(ids[[i]])]
#'  ids[[i+1]][is.na(ids[[i]])] <- NA
#'}
#'#structchar = c('ids structure characters here, but non ASCII characters break everything *smack')
#'ids = subset(ids, substr(Decomp,1,1) %in% structchar)
#'nms = ids$Char
#'ids = lapply(1:length(nms),function(i){
#'  ids[i,2]
#'})

#'names(ids) = nms
#'idslib = list2env(ids)
#'save(idslib,file = 'data/idslib.rda')

"idslib"
OPTI-SURVEIL/chinsimi documentation built on Oct. 27, 2019, 7:05 p.m.