R/output_format.R
In rmarkdown: Dynamic Documents for R

Documented in all_output_formats default_output_format knitr_options knitr_options_pdf output_format output_format_dependency pandoc_options resolve_output_format rmarkdown_format yaml_front_matter

#' Define an R Markdown output format
#'
#' Define an R Markdown output format based on a combination of knitr and pandoc
#' options.
#' @param knitr Knitr options for an output format (see
#'   \code{\link{knitr_options}})
#' @param pandoc Pandoc options for an output format (see
#'   \code{\link{pandoc_options}})
#' @param keep_md Keep the markdown file generated by knitting. Note that if
#'   this is \code{TRUE} then \code{clean_supporting} will always be
#'   \code{FALSE}.
#' @param clean_supporting Cleanup any supporting files after conversion see
#'   \code{\link{render_supporting_files}}
#' @param df_print Method to be used for printing data frames. Valid values
#'   include "default", "kable", "tibble", and "paged". The "default" method
#'   uses a corresponding S3 method of \code{print}, typically
#'   \code{print.data.frame}. The "kable" method uses the
#'   \code{\link[knitr:kable]{knitr::kable}} function. The "tibble" method uses
#'   the \pkg{tibble} package to print a summary of the data frame. The "paged"
#'   method creates a paginated HTML table (note that this method is only valid
#'   for formats that produce HTML). In addition to the named methods you can
#'   also pass an arbitrary function to be used for printing data frames. You
#'   can disable the \code{df_print} behavior entirely by setting the option
#'   \code{rmarkdown.df_print} to \code{FALSE}. See
#'   \href{https://bookdown.org/yihui/rmarkdown/html-document.html#data-frame-printing}{Data
#'   frame printing section} in bookdown book for examples.
#' @param pre_knit An optional function that runs before knitting which receives
#'   the \code{input} (input filename passed to \code{render}), \code{metadata}
#'   (the parsed front matter of the Rmd file) and \code{...} (for future
#'   expansion) arguments. This function can be used to add side effects before
#'   knitting step.
#' @param post_knit An optional function that runs after knitting which receives
#'   the \code{metadata}, \code{input_file}, \code{runtime}, and \code{...} (for
#'   future expansion) arguments. This function can return additional arguments
#'   to pass to pandoc and can call \code{knitr::knit_meta_add} to add
#'   additional dependencies based on the contents of the input_file or on other
#'   assets side by side with it that may be used to produce html with
#'   dependencies during subsequent processing.
#' @param pre_processor An optional pre-processor function that receives the
#'   \code{metadata}, \code{input_file}, \code{runtime}, \code{knit_meta},
#'   \code{files_dir}, and \code{output_dir} and can return additional arguments
#'   to pass to pandoc.
#' @param intermediates_generator An optional function that receives the
#'   original \code{input_file}, and the intermediates directory (i.e. the
#'   \code{intermediates_dir} argument to \code{\link{render}}). The function
#'   should generate and return the names of any intermediate files required to
#'   render the \code{input_file}.
#' @param post_processor An optional post-processor function that receives the
#'   \code{metadata}, \code{input_file}, \code{output_file}, \code{clean}, and
#'   \code{verbose} parameters, and can return an alternative
#'   \code{output_file}.
#' @param on_exit A function to call when \code{rmarkdown::render()} finishes
#'   execution (as registered with a \code{\link{on.exit}} handler).
#' @param file_scope A function that will split markdown input to pandoc into
#'   multiple named files. This is useful when the caller has concatenated a set
#'   of Rmd files together (as \pkg{bookdown} does), and those files may need to
#'   processed by pandoc using the \code{--file-scope} option. The first
#'   argument is input file paths and the second is \code{NULL} or current file
#'   scope which is a named list of files w/ \code{name} and \code{content} for
#'   each file. The return is the new file scope. Also, the arguments should
#'   include \code{...} for the future extensions.
#' @param base_format An optional format to extend.
#' @return An R Markdown output format definition that can be passed to
#'   \code{\link{render}}.
#' @seealso \link{render}, \link{knitr_options}, \link{pandoc_options}
#' @examples
#' \dontrun{
#' output_format(knitr = knitr_options(opts_chunk = list(dev = 'png')),
#'               pandoc = pandoc_options(to = "html"))
#' }
#' @export
output_format <- function(knitr,
                          pandoc,
                          keep_md = FALSE,
                          clean_supporting = TRUE,
                          df_print = NULL,
                          pre_knit = NULL,
                          post_knit = NULL,
                          pre_processor = NULL,
                          intermediates_generator = NULL,
                          post_processor = NULL,
                          on_exit = NULL,
                          file_scope = NULL,
                          base_format = NULL) {

  format <- list(
    knitr = knitr,
    pandoc = pandoc,
    keep_md = keep_md,
    clean_supporting = if (isTRUE(keep_md)) FALSE else clean_supporting,
    df_print = df_print,
    pre_knit = pre_knit,
    post_knit = post_knit,
    pre_processor = pre_processor,
    intermediates_generator = intermediates_generator,
    post_processor = post_processor,
    file_scope = file_scope,
    on_exit = on_exit
  )

  class(format) <- "rmarkdown_output_format"

  # if a base format was supplied, merge it with the format we just created
  if (!is.null(base_format))
    merge_output_formats(base_format, format)
  else
    format
}

# merges two scalar values; picks the overlay if non-NULL and then the base
merge_scalar <- function(base,
                         overlay) {

  if (is.null(base) && is.null(overlay))
    NULL
  else if (is.null(overlay))
    base
  else
    overlay
}

# merges two functions: if both are non-NULL, produces a new function that
# invokes each and then uses the supplied operation to combine their outputs
merge_function_outputs <- function(base,
                                   overlay,
                                   op) {

  if (!is.null(base) && !is.null(overlay)) {
    function(...) {
      op(base(...), overlay(...))
    }
  } else {
    merge_scalar(base, overlay)
  }
}

# merges two post-processors; if both are non-NULL, produces a new function that
# calls the overlay post-processor and then the base post-processor.
merge_post_processors <- function(base,
                                  overlay) {

  if (!is.null(base) && !is.null(overlay)) {
    function(metadata, input_file, output_file, ...) {

      # record original output file
      original_output_file <- output_file

      # call overlay post processor
      output_file <- overlay(metadata, input_file, output_file, ...)

      # also call base post processor on original file if requested
      if (!is.null(attr(output_file, "post_process_original")))
        base(metadata, input_file, original_output_file, ...)

      # call base post processor
      base(metadata, input_file, output_file, ...)
    }
  }
  else {
    merge_scalar(base, overlay)
  }
}

merge_file_scope <- function(base,
                             overlay) {
  if (is.null(overlay)) {
    return(base)
  }
  has_ellipsis <- "..." %in% names(formals(overlay))
  if (!has_ellipsis) {
    warning("file_scope lacks ... as an argument. ",
            "Otherwise, file_scope replaces file_scope without merging.")
  }
  if (is.null(base) || !has_ellipsis) {
    return(overlay)
  }
  if (has_ellipsis) {
    return(function(x, current_scope = NULL, ...) {
      scope <- base(x, current_scope, ...)
      overlay(x, scope, ...)
    })
  }
}

# merges two output formats
merge_output_formats <- function(base,
                                 overlay) {

  structure(list(
    knitr = merge_lists(base$knitr, overlay$knitr),
    pandoc = merge_pandoc_options(base$pandoc, overlay$pandoc),
    keep_md =
      merge_scalar(base$keep_md, overlay$keep_md),
    clean_supporting =
      merge_scalar(base$clean_supporting, overlay$clean_supporting),
    df_print =
      merge_scalar(base$df_print, overlay$df_print),
    pre_knit =
      merge_function_outputs(base$pre_knit, overlay$pre_knit, c),
    post_knit =
      merge_function_outputs(base$post_knit, overlay$post_knit, c),
    pre_processor =
      merge_function_outputs(base$pre_processor, overlay$pre_processor, c),
    intermediates_generator =
      merge_function_outputs(base$intermediates_generator,
                             overlay$intermediates_generator, c),
    post_processor =
      merge_post_processors(base$post_processor, overlay$post_processor),
    file_scope = merge_file_scope(base$file_scope, overlay$file_scope),
    on_exit =
      merge_on_exit(base$on_exit, overlay$on_exit)
  ), class = "rmarkdown_output_format")
}

merge_on_exit <- function(base,
                          overlay) {

  function() {
    if (is.function(base)) base()
    if (is.function(overlay)) overlay()
  }
}

merge_pandoc_options <- function(base,
                                 overlay) {

  res <- merge_lists(base, overlay, recursive = FALSE)
  res$args <- c(base$args, overlay$args)
  res$lua_filters <- c(base$lua_filters, overlay$lua_filters)
  res
}

#' Knitr options for an output format
#'
#' Define the knitr options for an R Markdown output format.
#' @param opts_knit List of package level knitr options (see
#'   \code{\link[knitr:opts_knit]{opts_knit}})
#' @param opts_chunk List of chunk level knitr options (see
#'   \code{\link[knitr:opts_chunk]{opts_chunk}})
#' @param knit_hooks List of hooks for R code chunks, inline R code, and output
#'   (see \code{\link[knitr:knit_hooks]{knit_hooks}})
#' @param opts_hooks List of hooks for code chunk options
#'   (see \code{\link[knitr:opts_hooks]{opts_hooks}})
#' @param opts_template List of templates for chunk level knitr options (see
#'   \code{\link[knitr:opts_template]{opts_template}})
#' @return An list that can be passed as the \code{knitr} argument
#'   of the \code{\link{output_format}} function.
#' @seealso \link{output_format}
#' @export
knitr_options <- function(opts_knit = NULL,
                          opts_chunk = NULL,
                          knit_hooks = NULL,
                          opts_hooks = NULL,
                          opts_template = NULL) {

  list(opts_knit = opts_knit,
       opts_chunk = opts_chunk,
       knit_hooks = knit_hooks,
       opts_hooks = opts_hooks,
       opts_template = opts_template)
}

#' Knitr options for a PDF output format
#'
#' Define knitr options for an R Markdown output format
#' that creates PDF output.
#' @inheritParams html_document
#' @inheritParams pdf_document
#' @return An list that can be passed as the \code{knitr} argument of the
#'   \code{\link{output_format}} function.
#' @seealso \link{knitr_options}, \link{output_format}
#' @export
knitr_options_pdf <- function(fig_width,
                              fig_height,
                              fig_crop,
                              dev = 'pdf') {

  # default options
  opts_knit <- NULL
  opts_chunk <- list(dev = dev,
                     fig.width = fig_width,
                     fig.height = fig_height)

  # set the dingbats option for the pdf device if required
  if (dev == 'pdf')
    opts_chunk$dev.args <- list(pdf = list(useDingbats = FALSE))

  knit_hooks <- NULL

  # apply cropping if requested and we have pdfcrop and ghostscript
  if (identical(fig_crop, 'auto')) fig_crop <- has_crop_tools(FALSE) else {
    if (fig_crop && !has_crop_tools()) fig_crop <- FALSE
  }
  if (fig_crop) {
    knit_hooks <- list(crop = knitr::hook_pdfcrop)
    opts_chunk$crop <- TRUE
  }

  # return options
  knitr_options(opts_knit = opts_knit,
                opts_chunk = opts_chunk,
                knit_hooks = knit_hooks)
}


#' Pandoc options for an output format
#'
#' Define the pandoc options for an R Markdown output format.
#'
#' The \code{from} argument should be used very cautiously as it's important for
#' users to be able to rely on a stable definition of supported markdown
#' extensions.
#' @param to Pandoc format to convert to
#' @param from Pandoc format to convert from
#' @param args Character vector of command line arguments to pass to pandoc
#' @param keep_tex Keep the intermediate tex file used in the conversion to PDF
#'   (applies only to 'latex' and 'beamer' target formats)
#' @param latex_engine LaTeX engine to producing PDF output (applies only to
#'   'latex' and 'beamer' target formats)
#' @param ext File extension (e.g. ".tex") for output file (if \code{NULL}
#'   chooses default based on \code{to}). This is typically used to force the
#'   final output of a latex or beamer conversion to be \code{.tex} rather than
#'   \code{.pdf}.
#' @param lua_filters  Character vector of file paths to Lua filters to use with
#'   this format. They will be added to pandoc command line call using
#'   \code{--lua-filter} argument. See \code{vignette("lua-filters", package =
#'   "rmarkdown")} to know more about Lua filters.
#' @param convert_fun A function to convert the input file to the desired output
#'   format in \code{\link{render}()}. If not provided,
#'   \code{\link{pandoc_convert}()} will be used. If a custom function is
#'   provided, its arguments and returned value should match the
#'   \code{pandoc_convert()} function. Note that this function does not have to
#'   use Pandoc but can also use other tools such as \pkg{commonmark}.
#' @return An list that can be passed as the \code{pandoc} argument of the
#'   \code{\link{output_format}} function.
#' @seealso \link{output_format}, \link{rmarkdown_format}
#' @export
pandoc_options <- function(to,
                           from = rmarkdown_format(),
                           args = NULL,
                           keep_tex = FALSE,
                           latex_engine = c("pdflatex", "lualatex", "xelatex", "tectonic"),
                           ext = NULL,
                           lua_filters = NULL,
                           convert_fun = NULL) {
  list(to = to,
       from = from,
       args = args,
       keep_tex = keep_tex,
       latex_engine = match.arg(latex_engine),
       ext = ext,
       convert_fun = convert_fun,
       lua_filters = lua_filters)
}

#' R Markdown input format definition
#'
#' Compose a pandoc markdown input definition for R Markdown
#' that can be passed as the \code{from} argument of \link{pandoc_options}.
#'
#' By default R Markdown is defined as all pandoc markdown extensions with
#' the following tweaks for backward compatibility with the markdown package
#' (+ features are added, - features are removed):
#'
#' \tabular{l}{
#' \code{+autolink_bare_uris} \cr
#' \code{+tex_math_single_backslash} \cr
#' }
#'
#' For more on pandoc markdown see the
#' \href{https://pandoc.org/MANUAL.html}{pandoc online documentation}.
#' @param implicit_figures Automatically make figures from images (defaults to \code{TRUE}).
#' @param extensions Markdown extensions to be added or removed from the
#' default definition of R Markdown.
#' @return Pandoc markdown format specification
#' @examples
#' \dontrun{
#' rmarkdown_format("-implicit_figures")
#' }
#' @seealso \link{output_format}, \link{pandoc_options}
#' @export
rmarkdown_format <- function(extensions = NULL) {

  format <- c("markdown")

  # only add extensions if the user hasn't already specified
  # a manual override for them
  addExtension <- function(extension) {
    if (length(grep(extension, extensions)) == 0)
      format <<- c(format, paste0("+", extension))
  }

  addExtension("autolink_bare_uris")
  addExtension("tex_math_single_backslash")

  format <- c(format, extensions, recursive = TRUE)

  paste(format, collapse = "")
}

#' Determine the default output format for an R Markdown document
#'
#' Read the YAML metadata (and any common output YAML file) for the
#' document and return the output format that will be generated by
#' a call to \code{\link{render}}.
#'
#' This function is useful for front-end tools that require additional
#' knowledge of the output to be produced by \code{\link{render}} (e.g. to
#' customize the preview experience).
#' @param input Input file (Rmd or plain markdown)
#' @param output_yaml Paths to YAML files specifying output formats and their
#' configurations. The first existing one is used. If none are found, then
#' the function searches YAML files specified to the \code{output_yaml} top-level
#' parameter in the YAML front matter, _output.yml or _output.yaml, and then uses
#' the first existing one.
#' @return A named list with a \code{name} value containing the format
#'   name and an \code{options} value that is a list containing all the options
#'   for the format and their values. An option's default value will be returned
#'   if the option isn't set explicitly in the document.
#' @export
default_output_format <- function(input, output_yaml = NULL) {

  # execute within the input file's directory (this emulates the way
  # yaml front matter discovery is done within render)
  oldwd <- setwd(dirname(abs_path(input)))
  on.exit(setwd(oldwd), add = TRUE)

  # because we're now within the same directory as the input file,
  # we just need its basename
  input <- basename(input)

  # parse the YAML and front matter and get the explicitly set options
  input_lines <- read_utf8(input)
  format <- output_format_from_yaml_front_matter(
    input_lines, output_yaml = output_yaml)

  # look up the formals of the output function to get the full option list and
  # merge against the explicitly set list
  format_function <- eval(xfun::parse_only(format$name))
  format$options <- merge_lists(as.list(formals(format_function)),
                                format$options,
                                recursive = FALSE)
  format
}

#' Resolve the output format for an R Markdown document
#'
#' Read the YAML metadata (and any common output YAML file) for the
#' document and return an output format object that can be
#' passed to the \code{\link{render}} function.
#'
#' This function is useful for front-end tools that need to modify
#' the default behavior of an output format.
#' @inheritParams default_output_format
#' @param input Input file (Rmd or plain markdown)
#' @param output_format Name of output format (or \code{NULL} to use
#'   the default format for the input file).
#' @param output_options List of output options that should override the
#'   options specified in metadata.
#' @return An R Markdown output format definition that can be passed to
#'   \code{\link{render}}.
#' @export
resolve_output_format <- function(input,
                                  output_format = NULL,
                                  output_options = NULL,
                                  output_yaml = NULL) {

  # read the input file
  input_lines <- read_utf8(input)

  # validate that the output format is either NULL or a character vector
  if (!is.null(output_format) && !is.character(output_format))
    stop("output_format must be a character vector")

  # resolve the output format by looking at the yaml
  output_format <-
    output_format_from_yaml_front_matter(
      input_lines,
      output_options,
      output_format,
      output_yaml)

  # return it
  create_output_format(output_format$name, output_format$options)
}


#' Determine all output formats for an R Markdown document
#'
#' Read the YAML metadata (and any common output YAML file) for the
#' document and return the output formats that will be generated by
#' a call to \code{\link{render}}.
#'
#' This function is useful for front-end tools that require additional
#' knowledge of the output to be produced by \code{\link{render}} (e.g. to
#' customize the preview experience).
#' @inheritParams default_output_format
#' @param input Input file (Rmd or plain markdown)
#' @return A character vector with the names of all output formats.
#' @export
all_output_formats <- function(input, output_yaml = NULL) {
  enumerate_output_formats(input, output_yaml = output_yaml)
}

# Synthesize the output format for a document from it's YAML. If we can't
# find an output format then we use a default one based on the output_file extension
# or just return html_document
output_format_from_yaml_front_matter <- function(input_lines,
                                                 output_options = NULL,
                                                 output_format_name = NULL,
                                                 output_yaml = NULL,
                                                 output_file = NULL) {

  format_name <- output_format_name

  # ensure input is the correct data type
  if (!is_null_or_string(format_name)) {
    stop2("Unrecognized output format specified")
  }

  # parse the yaml
  yaml_input <- parse_yaml_front_matter(input_lines)

  # default to no options
  format_options <- list()

  # parse _site.yml output format if we have it
  config <- site_config(".")
  yaml_site <- config[["output"]]

  # parse common output yaml file if we have it
  output_yaml <- c(output_yaml, yaml_input$output_yaml, "_output.yml", "_output.yaml")
  output_yaml <- output_yaml[file.exists(output_yaml)][1L]
  yaml_common <- if (!is.na(output_yaml)) yaml_load_file(output_yaml)

  # merge _site.yml and output_yaml
  yaml_common <- merge_output_options(yaml_site, yaml_common)

  # parse output format from front-matter if we have it
  if (length(yaml_common) || length(yaml_input[["output"]])) {

    # alias the output format yaml
    yaml_output <- yaml_input[["output"]]

    # merge against common output yaml
    yaml_output <- merge_output_options(yaml_common, yaml_output)

    # if a named format was provided then try to find it
    if (!is.null(format_name)) {

      # if this is a named element of the list then use that
      if (format_name %in% names(yaml_output)) {

        format_options <- yaml_output[[format_name]]

      # otherwise this could be a heterogeneous list of characters and
      # lists so scan for an embedded list
      } else {
        for (format in yaml_output) {
          if (is.list(format) && !is.null(format[[format_name]]))
            format_options <- format[[format_name]]
        }
      }

      # if the options are just "default" then that's the same as empty list
      if (identical(format_options, "default")) format_options <- list()

    # no named format passed so take the first element
    } else {
      if (is.list(yaml_output[[1]])) {
        # check for named list
        if (nzchar(format_name <- names(yaml_output)[[1]])) {
          format_options <- yaml_output[[1]]
        # nested named list
        } else {
          format_name <- names(yaml_output[[1]])[[1]]
          format_options <- yaml_output[[1]][[format_name]]
        }
      } else if (is.list(yaml_output) &&
                   (is.null(yaml_output[[1]]) ||
                      identical(yaml_output[[1]], "default"))) {
        format_name <- names(yaml_output)[[1]]

      } else {
        format_name <- yaml_output[[1]]
      }
    }

  # no output formats defined in the file, just take the passed format by name,
  # or default to a format based on the output_file extension if any,
  # or html_document
  } else {
    if (is.null(format_name)) {
      format_name <- output_format_string_from_ext(output_file)
    }
  }

  # merge any output_options passed in the call to render
  if (!is.null(output_options)) {

    # create the output format function so we can check it's formals. strip options
    # not within the formals
    output_func <- create_output_format_function(format_name)
    output_func_params <- names(formals(output_func))
    if (!"..." %in% output_func_params) {
      unsupported_options <- setdiff(names(output_options), output_func_params)
      output_options[unsupported_options] <- NULL
    }

    # merge the output_options
    format_options <- merge_output_options(format_options, output_options)
  }

  # return the format name and options
  list(name = format_name, options = format_options)
}

create_output_format <- function(name,
                                 options) {

  # validate the name
  if (is.null(name))
    stop2("The output format name must not be NULL")
  if (name == "revealjs_presentation")
    stop("reveal.js presentations are now located in a separate package: ",
         "https://github.com/jjallaire/revealjs")

  # lookup the function
  output_format_func <- create_output_format_function(name)

  # call the function
  output_format <- do.call(output_format_func, options)
  if (!is_output_format(output_format))
    stop2("Format is not of class rmarkdown_output_format")

  # return the format
  output_format
}

output_format_string_from_ext <- function(output_file) {
  default_format <- "html_document"
  if (is.null(output_file)) return(default_format)
  switch(xfun::file_ext(output_file),
    html = "html_document",
    pdf = "pdf_document",
    docx = "word_document",
    default_format  # always been the default format in R Markdown
  )
}

create_output_format_function <- function(name) {
  output_format_func <- eval(xfun::parse_only(name))
  if (!is.function(output_format_func))
    stop2("YAML output format must evaluate to a function")
  output_format_func
}

is_output_format <- function(x) {
  inherits(x, "rmarkdown_output_format")
}

enumerate_output_formats <- function(input, envir, encoding, output_yaml = NULL) {

  # read the input
  input_lines <- read_utf8(input)

  # if this is an R file then spin it
  if (identical(tolower(xfun::file_ext(input)), "r"))
    input_lines <- knitr::spin(text = input_lines, knit = FALSE)

  # parse _site.yml output format if we have it
  config <- site_config(input)
  if (!is.null(config) && !is.null(config[["output"]])) {
    site_output_format_yaml <- config[["output"]]
  } else {
    site_output_format_yaml <- list()
  }

  # read the ymal front matter
  yaml_front_matter <- parse_yaml_front_matter(input_lines)

  # read any output yaml
  output_yaml <- file.path(
    dirname(input),
    c(output_yaml, yaml_front_matter$output_yaml, "_output.yml", "_output.yaml")
  )
  output_yaml <- output_yaml[file.exists(output_yaml)][1L]
  common_output_format_yaml <- if (is.na(output_yaml)) {
    list()
  } else {
    yaml_load_file(output_yaml)
  }

  # merge site and common
  common_output_format_yaml <- merge_output_options(site_output_format_yaml,
                                                    common_output_format_yaml)

  # parse output formats from front-matter if we have it
  if (length(common_output_format_yaml) > 0 ||
      length(yaml_front_matter[["output"]]) > 0) {

    # alias the output format yaml
    output_format_yaml <- yaml_front_matter[["output"]]

    # merge against common output yaml
    output_format_yaml <- merge_output_options(common_output_format_yaml,
                                               output_format_yaml)
  } else {
    output_format_yaml <- NULL
  }

  # return them by name
  if (is.character(output_format_yaml)) {
    output_format_yaml
  } else if (is.list(output_format_yaml)) {
    names(output_format_yaml)
  } else {
    NULL
  }
}

#' Parse the YAML front matter from a file
#' @inheritParams default_output_format
#' @inheritParams render
#' @keywords internal
#' @export
yaml_front_matter <- function(input, encoding = 'UTF-8') {
  parse_yaml_front_matter(read_utf8(input))
}

parse_yaml_front_matter <- function(input_lines) {

  partitions <- partition_yaml_front_matter(input_lines)
  if (!is.null(partitions$front_matter)) {
    front_matter <- partitions$front_matter
    if (length(front_matter) > 2) {
      front_matter <- front_matter[2:(length(front_matter) - 1)]
      front_matter <- one_string(front_matter)
      parsed_yaml <- yaml_load(front_matter)
      if (is.list(parsed_yaml))
        parsed_yaml
      else
        list()
    }
    else
      list()
  }
  else
    list()
}

partition_yaml_front_matter <- function(input_lines) {

  validate_front_matter <- function(delimiters) {
    if (length(delimiters) >= 2 &&
        (delimiters[2] - delimiters[1] > 1) &&
        grepl("^---\\s*$", input_lines[delimiters[1]])) {
      # verify that it's truly front matter, not preceded by
      # other content except blank lines or special comments
      # in html_notebook's intermediate .knit.md
      if (delimiters[1] == 1) {
        TRUE
      } else all(grepl(
        "^\\s*(<!-- rnb-\\w*-(begin|end) -->)?\\s*$",
        input_lines[1:delimiters[1] - 1]
      ))
    } else {
      FALSE
    }
  }

  # is there yaml front matter?
  delimiters <- grep("^(---|\\.\\.\\.)\\s*$", input_lines)
  if (validate_front_matter(delimiters)) {

    front_matter <- input_lines[(delimiters[1]):(delimiters[2])]

    input_body <- c()

    if (delimiters[1] > 1)
      input_body <- c(input_body,
                      input_lines[1:delimiters[1] - 1])

    if (delimiters[2] < length(input_lines))
      input_body <- c(input_body,
                      input_lines[-(1:delimiters[2])])

    list(front_matter = front_matter,
         body = input_body)
  }
  else {
    list(front_matter = NULL,
         body = input_lines)
  }
}

merge_output_options <- function(base_options,
                                 overlay_options) {

  # if either one of these is a character vector then normalize to a named list
  normalize_list <- function(target) {
    if (is.null(target)) {
      list()
    } else if (is.character(target)) {
      setNames(lapply(target, function(x) list()), target)
    } else {
      target[names(target) != "..."]  # remove symbols (...) from list
    }
  }

  merge_lists(normalize_list(base_options), normalize_list(overlay_options))
}

is_pandoc_to_html <- function(options) {
  options$to %in% c("html", "html4", "html5")
}

citeproc_required <- function(yaml_front_matter,
                              input_lines = NULL) {
  # TODO: remove the hack below after BETS is updated on CRAN https://github.com/nmecsys/BETS/pull/18
  if (tryCatch(xfun::check_old_package('BETS', '0.4.9'), error = function(e) FALSE)) return(FALSE)
  (
    is.null(yaml_front_matter$citeproc) ||
      yaml_front_matter$citeproc
  ) && (
    !is.null(yaml_front_matter$bibliography) ||
      !is.null(yaml_front_matter$references) ||
      # detect references: and bibliography: outside of yaml header
      # as Pandoc is supporting
      # TODO: remove when supporting multiple yaml block
      # https://github.com/rstudio/rmarkdown/issues/1891
      length(grep("^references:\\s*$", input_lines)) > 0 ||
      length(grep("^bibliography:\\s*$", input_lines)) > 0
  )
}

#' Define and merge an R Markdown's output format dependency
#'
#' Define and merge a dependency such as pre/post-processors from within
#' chunks. The merge happens explicitly when a list of dependencies are
#' passed to \code{knitr::knit_meta_add()} or implicitly when a dependency
#' is \code{knitr::knit_print}ed. Defining a function that does the former is
#' the best way for package developers to share the dependency. On the
#' contrary, the latter is useful to declare a document-specific dependency.
#' This function shares some arguments with \code{\link{output_format}},
#' but lacks the others because dependency is resolved after \code{post_knit}
#' and before \code{pre_processor}.
#'
#' @param name A dependency name. If some dependencies share the same name,
#'   then only the first one will be merged to the output format.
#' @inheritParams output_format
#'
#' @return An list of arguments with the "rmd_dependency" class.
#'
#' @examples
#' # Implicitly add lua filters from within a chunk
#' # This relies on (implicit) printing of the dependency in a chunk via
#' # knitr::knit_print()`
#' output_format_dependency(
#'   "lua_filter1",
#'   pandoc = list(lua_filters = "example1.lua")
#' )
#'
#' # Explicitly add lua filters from within a chunk
#' knitr::knit_meta_add(list(output_format_dependency(
#'   "lua_filter2",
#'   pandoc = list(lua_filters = "example2.lua")
#' )))
#'
#' # List the available dependencies
#' # Note that the list may include dependencies with duplicated names. In that
#' # case, the first one is merged to the output format and the others are
#' # discarded.
#' str(knitr::knit_meta("output_format_dependency", clean = FALSE))
#'
#' @export
output_format_dependency <- function(name,
                                     pandoc = list(),
                                     pre_processor = NULL,
                                     post_processor = NULL,
                                     file_scope = NULL,
                                     on_exit = NULL) {
  # Some arguments are NULL
  # to ensure inheriting the values from the base output format
  structure(list(name = name,
                 knitr = NULL, # must be NULL because merge happens after knit
                 pandoc = pandoc,
                 pre_processor = pre_processor,
                 keep_md = NULL,
                 clean_supporting = NULL,
                 post_processor = post_processor,
                 file_scope = file_scope,
                 on_exit = on_exit),
            class = "output_format_dependency")
}

#' @export
knit_print.output_format_dependency <- function(x, ...) {
  knitr::asis_output(list(), meta = list(x))
}

merge_output_format_dependency <- function(fmt, dep) {
  dep$name <- NULL # remove to be consistent with arguments of output_format
  dep$base_format <- fmt
  do.call(output_format, dep)
}

merge_output_format_dependencies <- function(fmt, deps) {
  skip <- c()
  for (d in deps) {
    if (inherits(d, "output_format_dependency") && !isTRUE(skip[d$name])) {
      skip[d$name] <- TRUE
      fmt <- merge_output_format_dependency(fmt, d)
    }
  }
  fmt
}