R/rwordpress.R

Defines functions listblogIds getBlog getBlogV updateBlog fetchBlogIds loadBlogIds extractKeyWords calcKeywords genHtml convOneBlog convBlog convBlogs buildbook orgbook single_pdf url2pdf matchfile addtitle partial_escape_dataframe escape_wrap blogtable blogtable2 doc_type is_latex `%||%` link embed_png png_meta html_doc tex_doc tex_chapter pdf_doc knitr_opts escape_latex escape_html

Documented in addtitle blogtable blogtable2 buildbook calcKeywords convBlog convBlogs convOneBlog doc_type embed_png escape_html escape_latex extractKeyWords fetchBlogIds genHtml getBlog getBlogV html_doc is_latex link listblogIds loadBlogIds matchfile orgbook partial_escape_dataframe pdf_doc png_meta single_pdf tex_chapter tex_doc updateBlog url2pdf

library(jsonlite)
library(httr)
library(plyr)
library(jiebaR)
library(tm)
library(magrittr)
library(igraph)
library(diagram)

rwpEnv<-new.env()

user <- Sys.getenv("WP_USER")
#wordpress user with the update privilege
pwd <- Sys.getenv("WP_PASS")
#password for the user
markdownRoot <- Sys.getenv("MARKDOWN_ROOT")
#local markdown root directory, where the markdown files for the blog exist
homeurl <- Sys.getenv("BLOG_APIURL")
#blog api url, see the WP_API

if (user == "" || pwd == "" || markdownRoot == "" || homeurl == "") {
  stop("Set WP_USER, WP_PASS, MARKDOWN_ROOT, BLOG_APIURL, env vars", call. = FALSE)
}


mixseg = worker()
key_worker = worker("keywords",topn=3)

#' list the blog ids for the given page
#' 
#' description
#' 
#' @param page value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
listblogIds<-function(page)
{
    print(paste0("fetch page ",page," ..."))
    pageurl=paste0(homeurl,"?page=",page)
    print(pageurl)
    yy<-httr::GET(pageurl)
    print(yy)
    con=jsonlite::fromJSON(httr::content(yy,as="text",encoding="utf-8"))
    #con=fromJSON(content(yy))
    con[,c("id")]
}

#' get the blog title and content for given id
#' 
#' the id should be one of the fetched id
#' 
#' @param id value
#' @return list(title=,content=)
#' @export 
#' @examples 
#' x=c(1,2,3) 
getBlog<-function(id)
{
    aurl=paste0(homeurl,"/",id)
    yy<-httr::GET(aurl)
    jj=jsonlite::fromJSON(httr::content(yy,"text",encoding="utf-8"))
    list(title=jj$title$rendered,content=jj$content$rendered)
}

#' return the title and content in vec
#' 
#' description
#' 
#' @param id value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
getBlogV<-function(id)
{
	bb=getBlog(id)
	c(id,unlist(bb))
}

#' update blog with new content, do not use it now
#' 
#' description
#' 
#' @param id value
#' @param title value
#' @param content value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
updateBlog<-function(id)
{
	murl=paste0(homeurl,"/",id)
	#use curl instead
        system2("curl",c("--user", paste0(user,":",pwd),"-X","POST",murl,"-d",paste0("content=[bgurl]markdown/p",id,".html[/bgurl]")))
}

#' fetch the blog IDs , save it in the ./allids.Rds
#' 
#' description
#' 
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
fetchBlogIds<-function()
{
	pages=1:70
	allblogs <- unlist(plyr::mlply(pages,listblogIds))
        saveRDS(allblogs,"./allids.Rds")
}

#' load the allids.Rds and return it
#' 
#' description
#' 
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
loadBlogIds<-function()
{
	readRDS("./allids.Rds")
}

#' title 
#' 
#' description
#' 
#' @param id value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
extractKeyWords<-function(id)
{
	print(paste0("processing ", id, " ..."))
	con=getBlog(id)
	con$content=substr(con$content,20,nchar(con$content)-13)
	#replace the .html to .md
	con$content = gsub(".html",".md",con$content)
	mdfile=paste0("./",con$content)
	kk = keys[mdfile]
        if(length(grep("md",kk)) >= 1){
	    con$content = gsub(".md",".rmd",con$content)
	    mdfile=paste0("./",con$content)
	    kk = keys[mdfile]
	}
	#remove any keywords that length > 15
	kk = kk[nchar(kk) < 15]
	kks=paste(kk,collapse="/")
	c(id,unlist(con),kks)
}

#' title 
#' 
#' description
#' 
#' @param ids value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
calcKeywords<-function(ids)
{
	rr=ldply(ids,extractKeyWords)
	rr$link=sprintf("[%s](%s)",rr$title,paste0("http://www.bagualu.net/wordpress/archives/",rr$V1))
	kk=rr[,c("link","V2")]
        colnames(kk)=c("文章","关键词")
	saveRDS(kk,"./p6127.Rds")
}

#' title 
#' 
#' description
#' 
#' @param id value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
genHtml<-function(id){

    aurl=paste0(homeurl,"/",id)
    yy<-GET(aurl)
    jj=fromJSON(content(yy,"text",encoding="utf-8"))
    #print(jj)
    res=c(jj$title$rendered,jj$content$rendered)
    #generate the html
    filename=paste0("p",id,".html")
    cat(file=filename,res[2])
    return(res) 

}

#' convert one blog from html to markdown
#' 
#' description
#' 
#' @param id value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
convOneBlog<-function(id)
{
    aurl=paste0(homeurl,"/",id)
    yy<-GET(aurl)
    jj=fromJSON(content(yy,"text",encoding="utf-8"))
    res=c(jj$title$rendered,jj$content$rendered)
    #generate the html
    filename=paste0("./p",id,".html")
    htmlfile=filename
    cat(file=filename,res[2])

    #to see if there is "[sourcecode" or "[code" exist 

    #convert it to markdown

    #process the html file
    fcontent=readLines(filename)

    #if find bgurl, just return
    done=grep("bgurl",fcontent)
    if(length(done) >= 1) {
        print("it is converted already")
    }else{

    mdfile=paste0("./p",id,".md")
    system2("pandoc",c("-f","html","-t","markdown","-o",mdfile,htmlfile))

    system2("cp",c(mdfile,"/home/xuyang/blog/"))

    updateBlog(id)

    } 
}

#' title 
#' 
#' description
#' 
#' @param V value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
convBlog<-function(V)
{
	if(V[2]) {
	    print(paste0("converting ",V[1]))
	    convOneBlog(V[1])
	}
	
}
 
#' title 
#' 
#' description
#' 
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
convBlogs<-function(){
	nids=readRDS('./needIds.Rds')
	#dlply(nids[,c("V1","V2")],.(V1,V2),convBlog)
	apply(nids[,c("V1","V2")],1,convBlog)
}


#' build a book with the given ids
#' 
#' each id is a chapter
#' 
#' @param ids value
#' @param name value
#' @param title value
#' @param copyright the copyright md file for the book
#'        the file should be at the markdownRoot folder
#' @param preface : the preface md file for the book
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
buildbook<-function(ids,name="~/wpbook",title="",copyright="copyright.md",preface="preface.md")
{
	#get the titles and the markdown file
	con=plyr::ldply(ids,getBlogV)
	#get the markdown file from the content
	mds=stringr::str_extract(con$content,"p\\d+")
	print("md files")
	print(mds)
	finalfile=unlist(plyr::mlply(mds,matchfile))
	#create the folder for the new book
	print(finalfile)
	if(file.exists(name)) {
		system2("rm",c(name,"-rf"))
	}

	system2("mkdir",c(name,"-p"))
	system2("mkdir",c(paste0(name,"/rfigures"),"-p"))
	file.copy(finalfile,name,overwrite=TRUE)

	file.copy(paste0(markdownRoot,copyright),name,overwrite=TRUE)
	file.copy(paste0(markdownRoot,preface),name,overwrite=TRUE)
	#adding the title
	#get the title firstly
	oldwd=getwd()
	setwd(name)
	mdfiles_in_newplace=dir(name,'*md')
	print("new md files ")
	print(paste(mdfiles_in_newplace,collapse="\n"))
	plyr::m_ply(mdfiles_in_newplace,addtitle)
	
	#compile the file to tex
	for (x in  mdfiles_in_newplace) {
		print(paste0("rendering ",x))
		rmarkdown::render(x,tex_doc(x))
	}
	
	#fetch the template
	template=system.file("","template.tex",package='rwp')
	print(template)
	system2("cp",c(template,name,"-fv"))
	
	#generate the template for the book
	#insert the mds into the tex template
	chapters=sprintf("\\include{%s}",mds)
	
	rawtemplate=readLines("./template.tex")

	if (nchar(title) > 1 ){
		rawtemplate=sub('xxtitle',title,rawtemplate)
	}

	ll=grep('xxchapter',rawtemplate)
	if(length(ll) == 1) {
		rawtemplate[ll] = paste(chapters,collapse="\n")
	}
	writeLines(rawtemplate,"book.tex")
	
	#generate the pdf
	#xelatex -interaction=batchmode mybook
	system2("xelatex",c("-interaction=batchmode","book"))
	#the 2nd pass for the index
	system2("xelatex",c("-interaction=batchmode","book"))
	setwd(oldwd)
	print(paste("book.pdf is generated in", name,"folder"))
}

#' orgbook
#' 
#' convert the blog ids to a org file
#' 
#' @param ids value
#' @param name value
#' @param title value
#' @param copyright value
#' @param preface value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
orgbook<-function(ids,name="~/orgbook",title="",copyright="copyright.md",preface="preface.md")
{
	#get the titles and the markdown file
	con=plyr::ldply(ids,getBlogV)
	#get the markdown file from the content
	mds=stringr::str_extract(con$content,"p\\d+")
	print("md files")
	print(mds)
	finalfile=unlist(plyr::mlply(mds,matchfile))
	#create the folder for the new book
	print(finalfile)
	if(file.exists(name)) {
		system2("rm",c(name,"-rf"))
	}

	system2("mkdir",c(name,"-p"))
	system2("mkdir",c(paste0(name,"/rfigures"),"-p"))
	file.copy(finalfile,name,overwrite=TRUE)

	#adding the title
	#get the title firstly
	oldwd=getwd()
	setwd(name)
	mdfiles_in_newplace=dir(name,'*md')
	print(paste(mdfiles_in_newplace,collapse="\n"))
	plyr::m_ply(mdfiles_in_newplace,addtitle)
	
	orgfiles_in_newplace=dir(name,'*org')
	print(paste(orgfiles_in_newplace,collapse="\n"))
	plyr::m_ply(orgfiles_in_newplace,addtitle)

	#compile the file to org
	for (x in mdfiles_in_newplace) {
		print(paste0("rendering ",x))
		#replace the .md to .org
                yy = stringi::stri_replace_all_regex(x,"\\.md","\\.org")
		system2("pandoc",c("-f","markdown","-t","org","-o",yy,x))
	}
	
	##generate the final org file
	ff<-file("final.org","w")
	ll=sprintf("#+include: p%d.org :minlevel 1",ids)
	writeLines("ctex_",ff)
	writeLines(ll,ff)
	close(ff)
	print(paste("book.pdf is generated in", name,"folder"))
}


#' generate the pdf for a single blog
#' 
#' description
#' 
#' @param id value
#' @param title value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
single_pdf<-function(id,title="")
{
	con=getBlog(id)
	mds=stringr::str_extract(con$content,"p\\d+")
	finalfile=matchfile(mds,".(md|rmd)$")
	#create the folder for the new book
	print(finalfile)
	if(length(finalfile) == 1) {
		str(pdf_doc(finalfile))
		rmarkdown::render(finalfile,pdf_doc(finalfile))
	}else{
		print("warning, non expected")	
	}
	
}

#' convert a url to pdf / todo
#' 
#' description
#' 
#' @param url value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
url2pdf<-function(url) 
{
}

#' find the md or rmd file for given p123
#' 
#' description
#' 
#' @param x value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
matchfile<-function(x,pattern=".(md|rmd|tbl|Rds|org)$")
{
	#give the p123.* 
	ffs=dir(markdownRoot,pattern=paste0(x,pattern))
	sprintf("%s%s",markdownRoot,ffs)
	#mdfile=sprintf("%s%s.md",markdownRoot,x)
	#rmdfile=sprintf("%s%s.rmd",markdownRoot,x)
	#if( file.exists(mdfile) ){
	#	mdfile
	#}else if ( file.exists(rmdfile) ) {
	#	rmdfile
	#}else{
	#	NA
	#}
}


#' fetch the blog title and insert it into the md file
#' 
#' description
#' 
#' @param fname input file name, md or rmd file
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
addtitle<-function(fname){
	tt=stringr::str_extract(fname,'\\d+')
	print(paste("fetch title",tt))
        if( is.na(tt) ){
	}else{
	    bb=getBlog(tt)
            if( grep("org",fname) ){
   	         system2("sed",c("-ie",paste0("'1 i\\* ",bb$title,"\\n'"),fname))
	    }else{
   	         system2("sed",c("-ie",paste0("'1 i\\# ",bb$title,"'"),fname))
	    }
	}
}

#' escape data.frame column wise
#' 
#' description
#' 
#' @param x value
#' @param except value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
partial_escape_dataframe<-function(x,except=none_escape_column)
{	
    if(is.null(except)){
	stop("error in partial-escape-dataframe")
    }
    if(! is.numeric(except))
    {
    	nn=names(x)
    	needescap=setdiff(nn,except)
    }else{
    	needescap=setdiff(1:dim(x)[2],except)
    }

    for(idx in needescap){
	x[,idx]=escape_wrap(x[,idx])
    }
    x
}


escape_wrap<-function(x)
{
	if(is_latex()){
		escape_latex(x)
	}else{
		escape_html(x)
	}
}

#' generate table for given dataframe
#' 
#' description
#' 
#' @param x value
#' @param escape value
#' @param none_escape_column which column do not need to be escaped
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
blogtable<-function(x, escape=TRUE, none_escape_column=NULL, caption="")
{
	#only data frame is supported
	if(is.data.frame(x)){
	    if(! is.null(none_escape_column) ) {
		x=partial_escape_dataframe(x,except=none_escape_column)
		escape=FALSE
	    }
	    if(is_latex()) {
	            knitr::kable(x,format="latex",align="c",escape=escape,caption=caption)
	    }else{
            	knitr::kable(x,format="html",table.attr = "class=\"table table-bordered\"", 
                             align="c", escape=escape, caption=caption)
	    }
	}else{
		stop("blogtable supports only data.frame")
	}
}

#' table by xtable
#' 
#' description
#' 
#' @param x value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
blogtable2<-function(x)
{
	xtable::xtable(x)
}
#' target doc type for rmarkdown
#' 
#' description
#' 
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
doc_type <- function() {
	knitr::opts_knit$get('rmarkdown.pandoc.to')
}
#' is target type latex ?
#' 
#' description
#' 
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
is_latex <- function() {
  identical(doc_type(), "latex")
}

`%||%` <- function(a, b) if (is.null(a)) b else a



#' generate link for both html and latex according to is_latex
#' 
#' description
#' 
#' @param href value
#' @param name value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
link<-function(href,name)
{
	if(is_latex()){
		paste0("\\href{",href,"}{",name,"}")
	}else{
		paste0("<a href='",href,"'>",name,"</a>")
	}
}

#' embed png in the markdown
#' 
#' work for both http and local image , 
#' for html , the image
#'   if path started with http, it will be put at the img tag directly
#'
#'   otherwise, path should have the "rfigures" in it and the png file 
#'   should be existed in http://www.bagualu.net/wordpress/rfigures/
#'   it will not copy the files there , so you need to do it manually
#' 
#' for pdf , both http and local img are supported, local image does 
#'   not need to be on the website
#' 
#' @param path value
#' @param dpi value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
embed_png<- function(path, dpi = 200) {
  if( is_latex() ){
	#copy the image to local disk
	#get the imgname
	tts=strsplit(path,"/")[[1]]
	dfile=tts[length(tts)]
	if(! file.exists("./images")){
		system2("mkdir",c("./images"))
	}
	#print(paste("downloading image","./images/",dfile))
	if(length(grep("http://",path)) >= 1){
		curl::curl_download(path,paste0("./images/",dfile))
	        path = paste0("./images/",dfile)
	}

        meta <- png_meta(path)
        dpi <- dpi %||% meta$dpi[1] %||% stop("Unknown dpi", call. = FALSE)

    width <- round(meta$dim[1] / dpi, 2)

    knitr::asis_output(paste0(
      "\\includegraphics[",
      "width=", width, "in",
      "]{", path, "}"
    ))
  } else {

            #meta <- png_meta(path)
            #dpi <- dpi %||% meta$dpi[1] %||% stop("Unknown dpi", call. = FALSE)
        if(length(grep("http",path)) >= 1){
            knitr::asis_output(paste0(
              "<img src='", path, "'/>"
            ))
	
        }else{
            strs=strsplit(path,"/")[[1]]
            an=grep("rfigures",strs)

                meta <- png_meta(path)
                dpi <- dpi %||% meta$dpi[1] %||% stop("Unknown dpi", call. = FALSE)

            width <- round(meta$dim[1] / dpi, 2)
       
            dpi = 100

            if( length(an) == 1){
                path=paste(strs[an:length(strs)],collapse="/")
                knitr::asis_output(paste0(
                  "<img src='", path, "' ",
                  " width='", round(meta$dim[1] / (dpi / 96)), "'",
                  " height='", round(meta$dim[2] / (dpi / 96)), "'",
                  " />"
                ))
            }
      }
  }
}


#' return the png meta data given the path
#' 
#' description
#' 
#' @param path value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
png_meta <- function(path) {
  attr(png::readPNG(path, native = TRUE, info = TRUE), "info")
}

#' html format for rmarkdown
#' 
#' specified the fig.path according to the inputfile
#' 
#' @param inputfile value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
html_doc<-function(inputfile)
{
  #rmarkdown::output_format(
  #  knitr_opts("html", FALSE,inputfile=inputfile),
  #  rmarkdown::pandoc_options(
  #    to = "html",
  #    from = markdown_style,
  #    ext = ".html",
  #    #args = c("--chapters", pandoc_latex_engine_args(latex_engine))
  #    args = c("--toc")
  #  ),
  #  clean_supporting = FALSE
  #)
  out=rmarkdown::html_document(
	self_contained = FALSE,
	highlight= 'kate',
	toc=TRUE,
	template= './template/frag.html',
	md_extensions='-ascii_identifiers',
        lib_dir='./lib'
	)
  out$knitr=knitr_opts("html", FALSE,inputfile=inputfile)
  #out$pandoc=rmarkdown::pandoc_options(
  #    to = "html",
  #    from = markdown_style,
  #    ext = ".html",
  #    #args = c("--chapters", pandoc_latex_engine_args(latex_engine))
  #    args = c("--toc")
  # )
   out
}


#' output for tex
#' 
#' fig.path should be fixed
#' 
#' @param inputfile value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
tex_doc<-function(inputfile)
{
  out = tex_chapter(inputfile)
  out
}


#' title 
#' 
#' description
#' 
#' @param inputfile value
#' @param latex_engine value
#' @param code_width value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
tex_chapter <- function(inputfile,
                        latex_engine = c("xelatex", "pdflatex", "lualatex"),
                        code_width = 65) {
  options(digits = 3)
  set.seed(1014)

  latex_engine <- match.arg(latex_engine)
  rmarkdown::output_format(
    knitr_opts("latex", NULL, inputfile),
    rmarkdown::pandoc_options(
      to = "latex",
      from = markdown_style,
      ext = ".tex",
      args = c("--chapters")
    ),
    clean_supporting = FALSE
  )
}

#' replacement of rmarkdown::pdf_document
#' 
#' description
#' 
#' @param inputfile value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
pdf_doc<-function(inputfile)
{
	template=system.file("","pdf_template_single.tex",package='rwp')

	out=rmarkdown::pdf_document(template = template,
				latex_engine="xelatex")
	out$knitr = knitr_opts("latex",FALSE,inputfile)
	out
}


markdown_style <- paste0(
  "markdown",
  "+autolink_bare_uris",
  "-auto_identifiers",
  "+tex_math_single_backslash",
  "-implicit_figures",
  "+east_asian_line_breaks"
)


knitr_opts <- function(type = c("html", "latex"), chapter, inputfile, code_width = 65) {
  type <- match.arg(type)

  pkg <- list(
    width = code_width
  )

  #p655.rmd ->p655
  ff = stringr::str_extract(inputfile,"p\\d+")

  chunk <- list(
    comment = "#>",
    collapse = TRUE,
    #cache.path = paste0("_cache/", chapter, "/"),
    #cache = TRUE,
    fig.path = paste0("rfigures/", ff, "-"),
    fig.width = 6,
    fig.height = 6,
    fig.retina = NULL,
    dev = if (type == "html") "png" else "pdf",
    dpi = if (type == "html") 96 else 300
  )

  hooks <- list(
    #plot = if (type == "latex") html_plot(),
    small_mar = function(before, options, envir) {
      if (before)
        par(mar = c(4.1, 4.1, 0.5, 0.5))
    }
  )

  rmarkdown::knitr_options(pkg, chunk, hooks)
}

#' escape special LaTeX characters
#' 
#' @param x value
#' @param newlines value
#' @param spaces value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
escape_latex <- function(x, newlines = FALSE, spaces = FALSE) {
  x = gsub('\\\\', '\\\\textbackslash', x)
  x = gsub('([#$%&_{}])', '\\\\\\1', x)
  x = gsub('\\\\textbackslash', '\\\\textbackslash{}', x)
  x = gsub('~', '\\\\textasciitilde{}', x)
  x = gsub('\\^', '\\\\textasciicircum{}', x)
  if (newlines) x = gsub('(?<!\n)\n(?!\n)', '\\\\\\\\', x, perl = TRUE)
  if (spaces) x = gsub('  ', '\\\\ \\\\ ', x)
  x
}


#' escape html
#' 
#' description
#' 
#' @param x value
#' @return returndes
#' @export 
#' @examples 
#' x=c(1,2,3) 
escape_html = function(x) {
  x = gsub('&', '&amp;', x)
  x = gsub('<', '&lt;', x)
  x = gsub('>', '&gt;', x)
  x = gsub('"', '&quot;', x)
  x
}
jiang-hang/rwp documentation built on May 19, 2019, 9:29 a.m.