R/trans_zh_to_en.R

#' translate Chinese html page from econ.jxufe.cn into English to modify
#' @param url the page link
#' @param encoding the encoding
#' @return
#' @export
#' @import tidyverse
#' @examples
#' url <- "http://econ.jxufe.cn/news-show-3391.html"
#' #trans_zh_to_en(url)

trans_zh_to_en <-
	function(url,
					 encoding = "utf-8",
					 outfile = "out.txt") {
		library(magrittr)
		path <- "/Users/gabe/Nutstore/4行政工作/网站更新"
		date <- lubridate::today() %>% format("%Y%m%d")
		ofile <- paste0(date, outfile)
		outfile <- file.path(path, ofile)
		xml2::read_html(url, encoding = encoding) %>%
			rvest::html_nodes("p") %>%
			rvest::html_text() %>%
			.[. != ""] %>%
			.[. != " "] %>%
			.[-length(.)] %>%
			str_split("。", simplify = TRUE) %>%
			map( ~ paste(.x, gmtools::ggtrans(., "zh-CN", "en") %>% .[2], sep = ".")) %>%
			.[. != "\\."] %>%  # delete an empty item in a list
			unlist() %>%
			cat(., file = outfile, sep = "\n\n")
		return("check the translated news in Nutstore.")

	}
Gabegit/gmtools documentation built on May 6, 2019, 5:32 p.m.