R/output_format.R

#' Define an R Markdown output format
#'
#' Define an R Markdown output format based on a combination of knitr and pandoc
#' options.
#'
#' @param knitr Knitr options for an output format (see
#'   \code{\link{knitr_options}})
#' @param pandoc Pandoc options for an output format (see
#'   \code{\link{pandoc_options}})
#' @param keep_md Keep the markdown file generated by knitting. Note that
#'   if this is \code{TRUE} then \code{clean_supporting} will always be
#'   \code{FALSE}.
#' @param clean_supporting Cleanup any supporting files after conversion see
#'   \code{\link{render_supporting_files}}
#' @param pre_processor An optional pre-processor function that receives the
#'   \code{metadata}, \code{input_file}, \code{runtime}, \code{knit_meta},
#'   \code{files_dir}, and \code{output_dir} and can return additional arguments
#'   to pass to pandoc.
#' @param intermediates_generator An optional function that receives the
#'   original \code{input_file}, its \code{encoding}, and the intermediates
#'   directory (i.e. the \code{intermediates_dir} argument to
#'   \code{\link{render}}). The function should generate and return the names of
#'   any intermediate files required to render the \code{input_file}.
#' @param post_processor An optional post-processor function that receives the
#'   \code{metadata}, \code{input_file}, \code{output_file}, \code{clean},
#'   and \code{verbose} parmaeters, and can return an alternative
#'   \code{output_file}.
#' @param base_format An optional format to extend.
#'
#' @return An R Markdown output format definition that can be passed to
#'   \code{\link{render}}.
#'
#' @seealso \link{render}, \link{knitr_options}, \link{pandoc_options}
#'
#' @examples
#' \dontrun{
#' output_format(knitr = knitr_options(opts_chunk = list(dev = 'png')),
#'               pandoc = pandoc_options(to = "html"))
#' }
#'
#' @export
output_format <- function(knitr,
                          pandoc,
                          keep_md = FALSE,
                          clean_supporting = TRUE,
                          pre_processor = NULL,
                          intermediates_generator = NULL,
                          post_processor = NULL,
                          base_format = NULL) {
  format <- structure(list(knitr = knitr,
                 pandoc = pandoc,
                 keep_md = keep_md,
                 clean_supporting = clean_supporting && !keep_md,
                 pre_processor = pre_processor,
                 intermediates_generator = intermediates_generator,
                 post_processor = post_processor),
            class = "rmarkdown_output_format")

  # if a base format was supplied, merge it with the format we just created
  if (!is.null(base_format))
    merge_output_formats(base_format, format)
  else
    format
}

# merges two scalar values; picks the overlay if non-NULL and then the base
merge_scalar <- function (base, overlay) {
  if (is.null(base) && is.null(overlay))
    NULL
  else if (is.null(overlay))
    base
  else
    overlay
}

# merges two functions: if both are non-NULL, produces a new function that
# invokes each and then uses the supplied operation to combine their outputs
merge_function_outputs <- function (base, overlay, op) {
  if (!is.null(base) && !is.null(overlay)) {
    function (...) {
      op(base(...), overlay(...))
    }
  } else {
    merge_scalar(base, overlay)
  }
}

# merges two post-processors; if both are non-NULL, produces a new function that
# calls the overlay post-processor and then the base post-processor.
merge_post_processors <- function (base, overlay) {
  if (!is.null(base) && !is.null(overlay)) {
    function(metadata, input_file, output_file, ...) {
      output_file <- overlay(metadata, input_file, output_file, ...)
      base(metadata, input_file, output_file, ...)
    }
  }
  else {
    merge_scalar(base, overlay)
  }
}

# merges two output formats
merge_output_formats <- function(base, overlay)  {
  structure(list(
    knitr = merge_lists(base$knitr, overlay$knitr),
    pandoc = pandoc_options(
      to = merge_scalar(base$pandoc$to, overlay$pandoc$to),
      from = merge_scalar(base$pandoc$from, overlay$pandoc$from),
      args = c(base$pandoc$args, overlay$pandoc$args)),
    keep_md =
      merge_scalar(base$keep_md, overlay$keep_md),
    clean_supporting =
      merge_scalar(base$clean_supporting, overlay$clean_supporting),
    pre_processor =
      merge_function_outputs(base$pre_processor, overlay$pre_processor, c),
    intermediates_generator =
      merge_function_outputs(base$intermediates_generator,
                             overlay$intermediates_generator, c),
    post_processor =
      merge_post_processors(base$post_processor, overlay$post_processor)
  ), class = "rmarkdown_output_format")
}

#' Knitr options for an output format
#'
#' Define the knitr options for an R Markdown output format.
#'
#' @param opts_knit List of package level knitr options (see
#'   \code{\link[knitr:opts_knit]{opts_knit}})
#' @param opts_chunk List of chunk level knitr options (see
#'   \code{\link[knitr:opts_chunk]{opts_chunk}})
#' @param knit_hooks List of hooks for R code chunks, inline R code, and output
#'   (see \code{\link[knitr:knit_hooks]{knit_hooks}})
#' @param opts_template List of templates for chunk level knitr options (see
#'   \code{\link[knitr:opts_template]{opts_template}})
#'
#' @return An list that can be passed as the \code{knitr} argument of the
#'   \code{\link{output_format}} function.
#'
#' @seealso \link{output_format}
#'
#' @export
knitr_options <- function(opts_knit = NULL,
                          opts_chunk = NULL,
                          knit_hooks = NULL,
                          opts_template = NULL) {
  list(opts_knit = opts_knit,
       opts_chunk = opts_chunk,
       knit_hooks = knit_hooks,
       opts_template = opts_template)
}

#' Knitr options for a PDF output format
#'
#' Define knitr options for an R Markdown output format that creates PDF output.
#'
#' @inheritParams html_document
#' @inheritParams pdf_document
#'
#' @return An list that can be passed as the \code{knitr} argument of the
#'   \code{\link{output_format}} function.
#'
#' @seealso \link{knitr_options}, \link{output_format}
#'
#' @export
knitr_options_pdf <- function(fig_width, fig_height, fig_crop, dev = 'pdf') {

  # default options
  opts_knit <- NULL
  opts_chunk <- list(dev = dev,
                     fig.width = fig_width,
                     fig.height = fig_height)

  # set the dingbats option for the pdf device if requried
  if (dev == 'pdf') {
    if (utils::packageVersion("knitr") >= "1.5.31") {
      opts_chunk$dev.args <- list(pdf = list(useDingbats = FALSE))
    } else grDevices::pdf.options(useDingbats = FALSE)
  }

  knit_hooks <- NULL

  # apply cropping if requested and we have pdfcrop
  # crop <- fig_crop && !is_windows() && nzchar(find_program("pdfcrop"))
  crop <- fig_crop && nzchar(find_program("pdfcrop"))
  if (crop) {
    knit_hooks = list(crop = knitr::hook_pdfcrop)
    opts_chunk$crop = TRUE
  }

  # return options
  knitr_options(opts_knit = opts_knit,
                opts_chunk = opts_chunk,
                knit_hooks = knit_hooks)
}


#' Pandoc options for an output format
#'
#' Define the pandoc options for an R Markdown output format.
#'
#' @param to Pandoc format to convert to
#' @param from Pandoc format to convert from
#' @param args Character vector of command line arguments to pass to pandoc
#' @param keep_tex Keep the intermediate tex file used in the conversion to PDF
#'   (applies only to 'latex' and 'beamer' target formats)
#' @param ext File extension (e.g. ".tex") for output file (if \code{NULL}
#'   chooses default based on \code{to}). This is typically used to force
#'   the final output of a latex or beamer converstion to be \code{.tex}
#'   rather than \code{.pdf}.
#'
#' @return An list that can be passed as the \code{pandoc} argument of the
#'   \code{\link{output_format}} function.
#'
#' @details The \code{from} argument should be used very cautiously as it's
#'   important for users to be able to rely on a stable definition of supported
#'   markdown extensions.
#'
#' @seealso \link{output_format}, \link{rmarkdown_format}
#'
#' @export
pandoc_options <- function(to,
                           from = rmarkdown_format(),
                           args = NULL,
                           keep_tex = FALSE,
                           ext = NULL) {
  list(to = to,
       from = from,
       args = args,
       keep_tex = keep_tex,
       ext = ext)
}

#' R Markdown input format definition
#'
#' Compose a pandoc markdown input definition for R Markdown that can be
#' passed as the \code{from} argument of \link{pandoc_options}.
#'
#' @param extensions Markdown extensions to be added or removed from the
#' default definition of R Markdown.
#'
#' @return Pandoc markdown format specification
#'
#' @details
#'
#' By default R Markdown is defined as all pandoc markdown extensions with
#' the following tweaks for backward compatibility with the markdown package
#' (+ features are added, - features are removed):
#'
#' \tabular{l}{
#' \code{+autolink_bare_uris} \cr
#' \code{+ascii_identifier} \cr
#' \code{+tex_math_single_backslash} \cr
#' }
#'
#'
#' For more on pandoc markdown see the \href{http://johnmacfarlane.net/pandoc/demo/example9/pandocs-markdown.html}{pandoc markdown specification}.
#'
#' @examples
#' \dontrun{
#' rmarkdown_format("-implicit_figures")
#' }
#'
#' @seealso \link{output_format}, \link{pandoc_options}
#'
#' @export
rmarkdown_format <- function(extensions = NULL) {

  format <- c("markdown")

  # only add extensions if the user hasn't already specified
  # a manual override for them
  addExtension <- function(extension) {
    if (length(grep(extension, extensions)) == 0)
      format <<- c(format, paste0("+", extension))
  }

  addExtension("autolink_bare_uris")
  addExtension("ascii_identifiers")
  addExtension("tex_math_single_backslash")

  format <- c(format, extensions, recursive = TRUE)

  paste(format, collapse = "")
}

#' Determine the default output format for an R Markdown document
#'
#' Read the YAML metadata (and any common _output.yaml file) for the
#' document and return the output format that will be generated by
#' a call to \code{\link{render}}.
#'
#' @param input Input file (Rmd or plain markdown)
#' @param encoding The encoding of the input file; see \code{\link{file}}
#'
#' @return A named list with a \code{name} value containing the format
#'   name and an \code{options} value that is a list containing all the options
#'   for the format and their values. An option's default value will be returned
#'   if the option isn't set explicitly in the document.
#'
#' @details
#'
#' This function is useful for front-end tools that require additional
#' knowledge of the output to be produced by \code{\link{render}} (e.g. to
#' customize the preview experience).
#'
#' @export
default_output_format <- function(input, encoding = getOption("encoding")) {

  # execute within the input file's directory (this emulates the way
  # yaml front matter discovery is done within render)
  oldwd <- setwd(dirname(tools::file_path_as_absolute(input)))
  on.exit(setwd(oldwd), add = TRUE)

  # parse the YAML and front matter and get the explicitly set options
  input_lines <- read_lines_utf8(input, encoding)
  format <- output_format_from_yaml_front_matter(input_lines)

  # look up the formals of the output function to get the full option list and
  # merge against the explicitly set list
  format_function <- eval(parse(text = format$name))
  format$options <- merge_lists(as.list(formals(format_function)),
                                format$options,
                                recursive = FALSE)
  format
}


# Synthesize the output format for a document from it's YAML. If we can't
# find an output format then we just return html_document
output_format_from_yaml_front_matter <- function(input_lines,
                                                 output_options = NULL,
                                                 output_format_name = NULL) {

  # ensure input is the correct data type
  if (!is_null_or_string(output_format_name)) {
    stop("Unrecognized output format specified", call. = FALSE)
  }

  # parse the yaml
  yaml_front_matter <- parse_yaml_front_matter(input_lines)

  # default to no options
  output_format_options <- list()

  # parse common _output.yaml if we have it
  if (file.exists("_output.yaml"))
    common_output_format_yaml <- yaml_load_file_utf8("_output.yaml")
  else
    common_output_format_yaml <- list()

  # parse output format from front-matter if we have it
  if (length(common_output_format_yaml) > 0 ||
      length(yaml_front_matter$output) > 0) {

    # alias the output format yaml
    output_format_yaml <- yaml_front_matter$output

    # merge against common _output.yaml
    output_format_yaml <- merge_output_options(common_output_format_yaml,
                                               output_format_yaml)

    # if a named format was provided then try to find it
    if (!is.null(output_format_name)) {

      # if this is a named element of the list then use that
      if (output_format_name %in% names(output_format_yaml)) {

        output_format_options <- output_format_yaml[[output_format_name]]

      # otherwise this could be a heterogeneous list of characters and
      # lists so scan for an embedded list
      } else {
        for (format in output_format_yaml) {
          if (is.list(format) && !is.null(format[[output_format_name]]))
            output_format_options <- format[[output_format_name]]
        }
      }

      # if the options are just "default" then that's the same as empty list
      if (identical(output_format_options, "default"))
        output_format_options <- list()

    # no named format passed so take the first element
    } else {
      if (is.list(output_format_yaml[[1]])) {
        # check for named list
        if (nzchar(names(output_format_yaml)[[1]])) {
          output_format_name <- names(output_format_yaml)[[1]]
          output_format_options <- output_format_yaml[[1]]
        # nested named list
        } else {
          output_format_name <- names(output_format_yaml[[1]])[[1]]
          output_format_options <- output_format_yaml[[1]][[output_format_name]]
        }
      } else if (is.list(output_format_yaml) &&
                   (is.null(output_format_yaml[[1]]) ||
                      identical(output_format_yaml[[1]], "default"))) {
        output_format_name <- names(output_format_yaml)[[1]]

      } else {
        output_format_name <- output_format_yaml[[1]]
      }
    }

  # no output formats defined in the file, just take the passed format
  # by name (or default to html_document if no named format was specified)
  } else {
    if (is.null(output_format_name))
      output_format_name <- "html_document"
  }

  # merge any output_options passed in the call to render
  if (!is.null(output_options)) {
    output_format_options <- merge_output_options(output_format_options,
                                                  output_options)
  }

  # return the format name and options
  list(name = output_format_name,
       options = output_format_options)
}

create_output_format <- function(name, options) {

  # validate the name
  if (is.null(name))
    stop("The output format name must not be NULL", call. = FALSE)
  if (name == "revealjs_presentation")
    stop("reveal.js presentations are now located in a separate package: ",
         "https://github.com/jjallaire/revealjs")

  # lookup the function
  output_format_func <- eval(parse(text = name))
  if (!is.function(output_format_func))
    stop("YAML output format must evaluate to a function", call. = FALSE)

  # call the function
  output_format <- do.call(output_format_func, options)
  if (!is_output_format(output_format))
    stop("Format is not of class rmarkdown_output_format", call. = FALSE)

  # return the format
  output_format
}

is_output_format <- function(x) {
  inherits(x, "rmarkdown_output_format")
}

enumerate_output_formats <- function(input, envir, encoding) {

  # read the input
  input_lines <- read_lines_utf8(input, encoding)

  # if this is an R file then spin it
  if (identical(tolower(tools::file_ext(input)), "r"))
    input_lines <- knitr::spin(text = input_lines, knit = FALSE, envir = envir)

  # read the ymal front matter
  yaml_front_matter <- parse_yaml_front_matter(input_lines)

  # read any _output.yaml
  output_yaml <- file.path(dirname(input), "_output.yaml")
  if (file.exists(output_yaml))
    common_output_format_yaml <- yaml_load_file_utf8(output_yaml)
  else
    common_output_format_yaml <- list()

  # parse output formats from front-matter if we have it
  if (length(common_output_format_yaml) > 0 ||
      length(yaml_front_matter$output) > 0) {

    # alias the output format yaml
    output_format_yaml <- yaml_front_matter$output

    # merge against common _output.yaml
    output_format_yaml <- merge_output_options(common_output_format_yaml,
                                               output_format_yaml)
  }
  else {
    output_format_yaml <- NULL
  }

  # return them by name
  if (is.character(output_format_yaml)) {
    output_format_yaml
  } else if (is.list(output_format_yaml)) {
    names(output_format_yaml)
  } else {
    NULL
  }
}

parse_yaml_front_matter <- function(input_lines) {

  partitions <- partition_yaml_front_matter(input_lines)
  if (!is.null(partitions$front_matter)) {
    front_matter <- partitions$front_matter
    if (length(front_matter) > 2) {
      front_matter <- front_matter[2:(length(front_matter)-1)]
      front_matter <- paste(front_matter, collapse="\n")
      validate_front_matter(front_matter)
      parsed_yaml <- yaml_load_utf8(front_matter)
      if (is.list(parsed_yaml))
        parsed_yaml
      else
        list()
    }
    else
      list()
  }
  else
    list()
}

validate_front_matter <- function(front_matter) {
  front_matter <- trim_trailing_ws(front_matter)
  if (grepl(":$", front_matter))
    stop("Invalid YAML front matter (ends with ':')", call. = FALSE)
}



partition_yaml_front_matter <- function(input_lines) {

  validate_front_matter <- function(delimiters) {
    if (length(delimiters) >= 2 &&
        (delimiters[2] - delimiters[1] > 1) &&
        grepl("^---\\s*$", input_lines[delimiters[1]])) {
      # verify that it's truly front matter (not preceded by other content)
      if (delimiters[1] == 1)
        TRUE
      else
        is_blank(input_lines[1:delimiters[1]-1])
    } else {
      FALSE
    }
  }

  # is there yaml front matter?
  delimiters <- grep("^(---|\\.\\.\\.)\\s*$", input_lines)
  if (validate_front_matter(delimiters)) {

    front_matter <- input_lines[(delimiters[1]):(delimiters[2])]

    input_body <- c()

    if (delimiters[1] > 1)
      input_body <- c(input_body,
                      input_lines[1:delimiters[1]-1])

    if (delimiters[2] < length(input_lines))
      input_body <- c(input_body,
                      input_lines[-(1:delimiters[2])])

    list(front_matter = front_matter,
         body = input_body)
  }
  else {
    list(front_matter = NULL,
         body = input_lines)
  }
}

merge_output_options <- function(base_options, overlay_options) {

  # if either one of these is a character vector then normalize to a named list
  normalize_list <- function(target_list) {
    if (is.null(target_list))
      list()
    else if (is.character(target_list)) {
      new_list <- list()
      for (name in target_list)
        new_list[[name]] <- list()
      new_list
    } else {
      # remove symbols (...) from list
      target_list <- target_list[names(target_list) != "..."]
      target_list
    }
  }

  base_options <- normalize_list(base_options)
  overlay_options <- normalize_list(overlay_options)

  merge_lists(base_options, overlay_options)
}

is_pandoc_to_html <- function(options) {
  identical(options$to, "html") || identical(options$to, "html5")
}

citeproc_required <- function(yaml_front_matter, input_lines = NULL) {
  !is.null(yaml_front_matter$bibliography) ||
  !is.null(yaml_front_matter$references) ||
  length(grep("^references\\:\\s*$", input_lines)) > 0
}
UNFAOstatistics/faodoc documentation built on May 9, 2019, 7:45 p.m.