R/batch_encode_convert.R

Defines functions batch_encode_convert

Documented in batch_encode_convert

#' convert encode converter
#' @description  convert files in a directory from one encode to another. You can use `rvest::guess_encoding(file)` to guess the initial encoding.
#' @author lgm
#' @param dir a directory path
#' @param pattern the file extension that need to convert
#' @param incode original encode
#' @param outcode new encode
#' @param recursive 是否遍历所有子文件夹
#' @return converted encoding files
#' @export
#' @examples
#'
#'
batch_encode_convert <- function(dir,pattern,incode="GB18030",outcode="UTF-8",recursive=TRUE){

	suppressWarnings(suppressPackageStartupMessages(
		{library(tidyverse)
		library(stringr)
		library(rvest)}
		))

	# list all files wanted
	allfiles <- list.files(dir,pattern = pattern,recursive = recursive)
  print(glue::glue("There is {length(allfiles)} files needed to convert."))

	# define the single converting function
	conv <- function(file){
		# backup the file
		## change file's names
		file %>%
			str_replace_all("/","_") %>%
			.[[1]] -> filename

		if (!dir.exists("bak")) {dir.create("bak")}


		rfile <- read_file(file)
		if (!guess_encoding(rfile)==outcode) {
			# backup the file in process
			if(!file.exists(paste0("bak/",filename)))
			  {file.copy(file,paste0("bak/",filename))}

			# convert the file
				iconv(rfile,from=incode,to=outcode) %>%
				cat(., file=file,sep = "\r\n")
			  print(paste(file,"is converted."))
			} else {
				print("This file's encoding is also the goal encoding.")
			}

	}

	# batch converting
	sapply(allfiles, conv)



}
Gabegit/gmdata documentation built on May 6, 2019, 5:32 p.m.