R/svgFromHtml.R

#' Extract svg graphic data from an html/webarchive file
#'
#' Extract the svg graphic data contained in an .html file or .webarchive file
#' and store it as an .svg file. Some plotting functions, which are based on
#' htmlwidgets like \code{networkD3}, generate a html file that contains
#' JavaScript code for the generation of the graphic (i.e. there is no graphic
#' stored in this html file, but the code to generate the graphic). If this file
#' is opened in a standard web browser, the graphic is rendered and displayed.
#' In some cases, the plot can even be manipulated by the user, before saving it
#' as html or webarchive file that contains all elements (i.e. the plot as svg
#' graphic) of the displayed 'website'. The resulting .svg file extracted with
#' this function can be edited with vector graphic tools like Affinity Designer
#' or Inkscape. Using \pkg{rsvg}\code{::rsvg_pdf} the .svg file can be directly
#' converted to pdf, or converted to a .png file using
#' \pkg{rsvg}\code{::rsvg_png}.
#'
#' @param file character file name, including the file typ specifier (e.g.
#'   "myplot.html"). The corresponding .html/.webarchive file must contain the
#'   svg graphic.
#'
#' @export
#'
#' @return Returns a boolean indicating the success of the attempt to store a
#'   .svg file. Also, writes the .svg file, if possible.
#'
#' @seealso \code{\link[rsvg]{rsvg_pdf}}
#'
#' @examples
#'
#' # file generated by macOS Safari (format = webarchive)
#' path1 <- system.file("extdata/alplots2.webarchive", package = "js2graphic")
#' path2 <- paste(getwd(), "/alplots2.webarchive", sep="")
#' file.copy(path1, path2)
#' file  <- "alplots2.webarchive"
#' b     <- svgFromHtml(file)
#' print(b)
#' rsvg::rsvg_png("alplots2.svg", "alplots2.png")
#' rsvg::rsvg_pdf("alplots2.svg", "alplots2.pdf")
#'
#' file.remove(c(file, "alplots2.svg", "alplots2.png", "alplots2.pdf"))
#'
#'
#'
#' # file generated by macOS Safari *without* storing web content
#' path1 <- system.file("extdata/alplots2_noSvg.html", package = "js2graphic")
#' path2 <- paste(getwd(), "/alplots2_noSvg.html", sep="")
#' file.copy(path1, path2)
#' file  <- "alplots2_noSvg.html"
#' b     <- svgFromHtml(file)  # no svg content
#' print(b)
#' file.remove(file)
#'
#'
#'
#' # file generated by macOS  Firefox (format = website, complete)
#' path1 <- system.file("extdata/alplots2_ff.html", package = "js2graphic")
#' path2 <- paste(getwd(), "/alplots2_ff.html", sep="")
#' file.copy(path1, path2)
#' file  <- "alplots2_ff.html"
#' b     <- svgFromHtml(file)
#' print(b)
#' rsvg::rsvg_png("alplots2_ff.svg", "alplots2_ff.png")
#'
#' file.remove(c(file, "alplots2_ff.svg", "alplots2_ff.png"))
#'
#' @author Christoph Schmidt <schmidtchristoph@@users.noreply.github.com>

# 13.03.17

svgFromHtml <- function(file){
   ind <- stringr::str_locate_all(file, stringr::fixed("/"))[[1]]

   if( dim(ind)[1] > 1){ # a file path was specified within the 'file' input argument
      path_     <- stringr::str_sub(file, 1L, ind[dim(ind)[1], 1])
      file_     <- stringr::str_sub(file, ind[ dim(ind)[1] ] + 1, -1L)
      filesInWD <- list.files(path = path_)
   }
   else {
      file_     <- file
      filesInWD <- list.files()
   }


   if( !any(stringr::str_detect(filesInWD, file_)) ){ stop("Specified file is not in the current working directory/ in the given path.") }




   web_page <- suppressWarnings(readLines(file))

   ind1bool <- stringr::str_detect(web_page, stringr::fixed("<svg")) # which lines contain "<svg ... >"?
   ind2bool <- stringr::str_detect(web_page, stringr::fixed("</svg>")) # which lines contain "</svg>"?

   ind1     <- which(ind1bool) # line in which the svg data starts
   ind2     <- which(ind2bool) # line in which the svg data ends


   if( length(ind1)==0 || length(ind2)==0 ){ writeLines("There was no svg graphic in the input file."); return(FALSE) }

   if( length(ind1)>1 || length(ind2)>1){ writeLines("There is more than one svg graphic in the input file.") }



   indDel <- c( seq(1, ind1-1), seq(ind2+1, length(web_page)) )
   svg    <- web_page[-indDel]


   if( !(stringr::str_sub(svg[1], 1L, 5L) == "<svg ") ){ # remove text before the <svg tag, if any
      indsvgtag <- stringr::str_locate(svg[1], stringr::fixed("<svg"))[1]
      svg[1]    <- stringr::str_sub(svg[1], indsvgtag, -1L)
   }


   len <- length(svg)

   if( !(stringr::str_sub(svg[len], -6L, -1L) == "</svg>") ){ # remove text after the <svg tag, if any
      indsvgtag <- stringr::str_locate(svg[len], stringr::fixed("</svg>"))[2]
      svg[len]  <- stringr::str_sub(svg[len], 1L, indsvgtag)
   }


   indFileType <- stringr::str_locate(file, stringr::fixed("."))[1]
   fileOut     <- paste(stringr::str_sub(file, 1, indFileType-1), ".svg", sep="")
   writeLines(svg, fileOut)

   return(TRUE)
}
schmidtchristoph/js2graphic documentation built on May 21, 2019, 10:08 a.m.