# -*- coding: utf-8 -*-
#' search words in online dict
#'
#' input a word you want to search,and return it's expanation and
#' relative words.
#' @author lgm
#' @param word: the word you want to search
#' @param toggles: (1)no toggles, basic search without return; (2)`ret=TRUE`, basic search with return; (3) `more=TRUE`, show more explanation without return; (4) `more=TRUE, ret=TRUE`, return more explanation; (5) `relative=TRUE`, show and return the relative words.
#' @return explanations
#' @export
#' @examples
#' # basic search,just cat the result
#' search_word_online("璇")
#'
#'# basic search and return the result
#' search_word_online("璇",ret = TRUE)
#'
#'# more search but no return
#'search_word_online("璇",more = TRUE)
#'
#' # more search but with return
#'search_word_online("璇",more = TRUE,ret = TRUE)
#'
#'#relative word search with return
#'search_word_online("稔",relative = TRUE)
#'
#'#search words in internal data `pyword`
#'pyword[4,2] %>% str_split(pattern="") %>% .[[1]] %>% map(~search_word_online(.x)) %>% .[[1]]
#'
#'#more lines
#lapply(4:6,function(x) {pyword[x,2] %>% str_split(pattern="") %>% .[[1]] %>% map(~search_word_online(.x)) %>% .[[1]]})
search_word_online <- function(word, basic=TRUE,more=FALSE,ret=FALSE,relative=FALSE){
# basic urls
# URLencode(front_url_orig,reserved = TRUE)
front_url <- URLdecode("http%3A%2F%2Fxh.5156edu.com%2Findex.php%3Ff_key%3D")
back_url <- "&f_type=zi&SearchString.x=0&SearchString.y=0"
# using a py script to quote chinese words in gbk into url
path <- paste(system.file(package = "gmdata"),"make-url.py",sep="/")
cmd <- paste("/anaconda3/bin/python", path, word)
word_gbk <- system(command = cmd, intern = TRUE)
url <- paste0(front_url, word_gbk, back_url)
# scrape the page by searching
suppressWarnings(suppressPackageStartupMessages({
library(dplyr)
library(stringr)
library(rvest)
}))
url %>%
html_session %>%
read_html(encoding="gbk") %>%
html_nodes(".font_18") %>%
str_replace_all("<br>","\n") %>%
read_html(encoding="gbk")%>%
html_text -> pg
# print the results conditional your choices
bsmean <- regmatches(pg,regexpr("基本解释:(\n)*(.*\n*)*详细解释",pg)) %>%
str_replace("详细解释","")
moremean <- regmatches(pg,regexpr("详细解释:(\n)*(.*\n*)*相关词语",pg))
rel <- regmatches(pg,regexpr("相关词语:(\n)*(.*\n*)*更多有关",pg))
if ( basic==TRUE && more==FALSE && ret==FALSE && relative==FALSE){
cat(bsmean)
} else if (basic==TRUE && ret==TRUE && more==FALSE){
return(bsmean)
} else if (more==TRUE && ret== FALSE){
cat(moremean)
} else if ( more==TRUE && ret== TRUE){
return(moremean)
} else if (relative==TRUE){
rel <- gsub("\n"," ",rel) %>%
gsub("相关词语:\r\n? ","",.) %>%
gsub(" \n?更多有关","",.) %>%
gsub(" ",",",.) %>%
str_trim(side="both")
return(rel)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.