R/search_easy_mistake.R

Defines functions search_easy_mistake

Documented in search_easy_mistake

#' This function helps me to get the easily wrong sound word, and
#' list all similar words and their pinyins.
#' @author lgm
#' @param word: a new word;
#' @param pinyin: using two inner data, pyword(同音字), data_uni词语表)
#' @return words: all similar words and a pin yin list
#' @keywords pin yin
#' @export
#' @examples
#' search_easy_mistake('朗')
#' search_easy_mistake("除",onlyTheWord = TRUE)
#'# pyword(4,) # āi  哀挨埃
#' #pyword(5:8,) #
#' #ái     挨皑癌
#' #ǎi       矮蔼
#' #ài   艾爱隘碍
#' # ān 厂广安氨庵

## list my easy mistake words by a word
search_easy_mistake <- function(my_easy_mistake, onlyTheWord = FALSE) {
	suppressWarnings(suppressPackageStartupMessages({
		library(stringr)
		library(tidyverse)
	}))

    # my_easy_mistake <-c('初')
   	# my_easy_mistake <-c('朗')

    ## define a query function
    ## 注意内置数据要用当时保存的R对象名字,如x, save(x,filename)
    search_word <- function(w) {
        return(filter(data_uni, grepl(w, word)))
    }

    search_pinyin <- function(py) {
        return(filter(pyword, grepl(py, X1)))
    }

    # common_pinyin_words.rda
    samesoundwords <- pyword %>% filter(grepl(my_easy_mistake,
        X2)) %>% as.character

    if (onlyTheWord == FALSE) {

        # samesoundwords
        easy_words <- samesoundwords[2] %>%
        	str_split(pattern = "") %>%
            .[[1]] %>%
        	.[grepl('[^\\"\\(\\)\\\\c, ]',.)] %>%  #处理多音词
        	lapply(search_word) %>%
        	bind_rows %>%
            as.data.frame
        allwords <- paste(easy_words$word, easy_words$pinyin,
            sep = "")
    } else {
        allwords <- search_word(my_easy_mistake)
    }

    return(list(samesoundwords, allwords))
}
Gabegit/gmdata documentation built on May 6, 2019, 5:32 p.m.