R/parse_io.R

Defines functions parse_io list_io_calls parse_code parse_R yaml_head output_Rmd parse_Rmd

Documented in parse_io parse_Rmd yaml_head

# PARSE R CODE & RETURN I/O CALLS
# Search for functions that read/write data and extract their names and values
# of the "file" argument (the name of the argument can be specified)

# SHUT UP! or see https://github.com/tidyverse/magrittr/issues/29
if(getRversion() >= "2.15.1") utils::globalVariables(c("full.file", "deps"))

#' Parse I/O calls
#'
#' Parse code & create a list of I/O calls
#' @param path Path to file/directory
#' @param read.fun Vector of function names
#' @param write.fun Vector of function names
#' @param read.arg Regex matching the name of argument the value of which we want to extract
#' @param write.arg Regex matching the name of argument the value of which we want to extract
#' @param suffix Pattern passed to \code{dir} call
#' @importFrom dplyr data_frame bind_rows

parse_io = function(path = ".", read.fun = c("read", "load"),
                    write.fun = c("write", "save"), read.arg = "[Ff]ile",
                    write.arg = read.arg, suffix = c("R", "Rmd")) {

  if (file.info(path)[["isdir"]])
    path = dir(
      path = path, full.names = TRUE, pattern = reg_suffix(suffix)
    )

  io.calls = lapply(
    X = path, FUN = function(this.file) {
      parsed = parse_code(file = this.file)
      data_frame(
        full.file = this.file,
        deps = list(list_io_calls(
          x = parsed, .fun = reg_fun(c(read.fun, "=")), .arg = read.arg)),
        targets = list(list_io_calls(
          x = parsed, .fun = reg_fun(c(write.fun, "=")), .arg = write.arg))
      )})

  bind_rows(io.calls)
}

# Extract function calls and arguments
list_io_calls = function(x, .fun, .arg) {

  if (is.null(.fun))
    stop("Missing pattern to match function calls", call. = FALSE)

  if (is.null(.arg))
    stop("Missing pattern to match arguments", call. = FALSE)

  io.values = lapply(
    X = x[grepl(pattern = .fun, x = names(x))],
    FUN = function(i) {
      file.value = i[grepl(.arg, names(i))]
      if (is.character(unlist(file.value)))
        file.value
      else
        NULL
    })

  unlist(io.values)
}

# Parse R code and unwrap 1st level "=" calls ----
# ================================================

parse_code = function(file) {

  parsed = do.call(
    paste0("parse_", reg_match(file, "[^\\.]+$")),
    list(file))

  # class "=" is different than class "call" -> one more unwrap needed
  # (we ignore the other classes: "for", "if" etc...)
  parsed = lapply(
    X = parsed,
    FUN = function(.exp) {
      if (inherits(.exp, "="))
        lapply(.exp, unlist)[[3]] #third element is the call
      else
        .exp
    })

  structure(
    .Data = lapply(parsed, function(i) as.list(i)[-1]),
    names = lapply(parsed, function(i) as.list(i)[[1]])
  )
}

# Parse .R files ----
# ===================

# parse_R: parse & add itself as a prerequisite (e.g. "read.itself")

parse_R = function(file)
  c(list(list("read.itself", file = file)), as.list(parse(file)))


# Parse .Rmd files ----
# =====================

# yaml_head:  parse Rmd header
# output_Rmd: determine output file name & type
# parse_Rmd:  parse, add output as target (e.g. "write.itself") & itself as a
#             prerequisite (e.g. "read.itself")

#' Parse YAML header
#'
#' Parse YAML header
#' @param file Path to Rmd file
#' @importFrom yaml yaml.load

yaml_head = function(file) {
  file.lines = readLines(file)[-1]
  file.head = character()
  for (i in seq_along(file.lines)) {
    if (file.lines[i] == "---")
      break
    else
      file.head[i] = file.lines[i]
  }
  yaml.load(paste(file.head, collapse = "\n"))
}

output_Rmd = function(file) {
  out.param = yaml_head(file)[["output"]]
  if (is.list(out.param))
    out.type = names(out.param)
  else
    out.type = out.param
  paste0(
    gsub("\\.[^/]+$", "", file), ".", reg_match(out.type, "^[^_]+"))
}

#' Parse Rmd file
#'
#' Parse Rmd file
#' @param file Path to Rmd file
#' @importFrom knitr purl

parse_Rmd = function(file) {
  temp.R = paste0(".", as.numeric(Sys.time()), ".R")
  purl(file, output = temp.R, quiet = TRUE)
  parsed = as.list(parse(temp.R))
  file.remove(temp.R)
  c(list(list("read.itself", file = file)),
    list(list("write.itself", file = output_Rmd(file))),
    parsed)
}
jchrom/datamake documentation built on May 18, 2019, 10:23 p.m.