R/downlit-md.R

Defines functions check_packages pandoc_target pandoc_attr pandoc_link pandoc_raw_block pandoc_node transform_code md_format ast_version ast2md md2ast downlit_md_string downlit_md_path

Documented in downlit_md_path downlit_md_string

#' Syntax highlight and link a md document
#'
#' @description
#' `downlit_md_*` works by traversing the markdown AST generated by Pandoc.
#' It applies [highlight()] to `CodeBlock`s and [autolink()] to inline `Code`.
#'
#' Use `downlit_md_path()` to transform a file on disk; use
#' `downlit_md_string()` to transform a string containing markdown as part
#' of a larger pipeline.
#'
#' Needs pandoc 1.19 or later.
#'
#' @export
#' @param in_path,out_path Input and output paths for markdown file.
#' @param x A string containing markdown.
#' @param format Pandoc format; defaults to "gfm" if you have pandoc 2.0.0 or
#'   greater, otherwise "markdown_github".
#' @return `downlit_md_path()` invisibly returns `output_path`;
#'   `downlit_md_string()` returns a string containing markdown.
#' @examplesIf rlang::is_installed("rmarkdown")
#' if (rmarkdown::pandoc_available("1.19")) {
#' downlit_md_string("`base::t()`")
#' downlit_md_string("`base::t`")
#' downlit_md_string("* `base::t`")
#'
#' # But don't highlight in headings
#' downlit_md_string("## `base::t`")
#' }
downlit_md_path <- function(in_path, out_path, format = NULL) {
  check_packages()

  ast_path <- tempfile()
  on.exit(unlink(ast_path))

  md2ast(in_path, ast_path, format = format)

  ast <- jsonlite::read_json(ast_path)
  ast$blocks <- transform_code(ast$blocks, ast_version(ast))

  jsonlite::write_json(ast, ast_path, auto_unbox = TRUE, null = "null")

  ast2md(ast_path, out_path, format = format)
}

#' @export
#' @rdname downlit_md_path
downlit_md_string <- function(x, format = NULL) {
  check_packages()

  path <- tempfile()
  on.exit(unlink(path))

  brio::write_lines(x, path)
  downlit_md_path(path, path, format = format)
  brio::read_file(path)
}

# Markdown <-> pandoc AST -------------------------------------------------

md2ast <- function(path, out_path, format = NULL) {
  format <- format %||% md_format()
  rmarkdown::pandoc_convert(
    input = normalizePath(path, mustWork = FALSE),
    output = normalizePath(out_path, mustWork = FALSE),
    from = format,
    to = "json"
  )
  invisible(out_path)
}
ast2md <- function(path, out_path, format = NULL) {
  format <- format %||% md_format()

  options <- c(
    if (rmarkdown::pandoc_available("2.0")) "--eol=lf",
    if (rmarkdown::pandoc_version() < "2.11.2") "--atx-headers", # 1.19-2.11.2
    "--wrap=none" # 1.16
  )

  rmarkdown::pandoc_convert(
    input = normalizePath(path, mustWork = FALSE),
    output = normalizePath(out_path, mustWork = FALSE),
    from = "json",
    to = format,
    options = options
  )
  invisible(out_path)
}

ast_version <- function(ast) {
  string <- paste(unlist(ast$`pandoc-api-version`), collapse = ".")
  package_version(string)
}

md_format <- function() {
  if (rmarkdown::pandoc_available("2.0.0")) {
    "gfm"
  } else {
    "markdown_github"
  }
}

# Code transformation -----------------------------------------------------

# Data types at
# https://hackage.haskell.org/package/pandoc-types-1.20/docs/Text-Pandoc-Definition.html
transform_code <- function(x, version) {
  stopifnot(is.list(x))

  # Blocks that are a list of blocks
  block_list <- c(
    # Block
    "Plain", "Para", "LineBlock", "BlockQuote", "BulletList",
    # Inline
    "Emph", "Strong", "Strikeout", "Superscript", "Subscript",
    "SmallCaps", "Note", "Underline"
  )
  # Blocks that have a list of blocks as second child
  block_list2 <- c(
    "OrderedList", "Quoted",
    "Div", "Span",
    "Caption", "TableHead", "TableFoot", "Row"
  )
  skip <- c(
    "Header", "CodeBlock", "RawBlock", "HorizontalRule", "Null",
    "Math", "RawInline", "Link", "Image", "Cite",
    "Str", "Space", "SoftBreak", "LineBreak"
  )

  if (!is_named(x)) {
    lapply(x, transform_code, version = version)
  } else {
    if (x$t == "Code") {
      package_name <- extract_curly_package(x$c[[2]])
      # packages à la {pkgname}
      if(!is.na(package_name)) {
        href <- href_package(package_name)
        if (!is.na(href)) {
          x <-  list(t = "Str", c = package_name)
          x <- pandoc_link(pandoc_attr(), list(x), pandoc_target(href))
        } # otherwise we do not touch x
      } else {
      # other cases
        href <- autolink_url(x$c[[2]])
        if (!is.na(href)) {
          x <- pandoc_link(pandoc_attr(), list(x), pandoc_target(href))
        }
      }
    } else if (x$t == "CodeBlock") {
      out <- highlight(x$c[[2]], pre_class = "chroma")
      if (!is.na(out)) {
        x <- pandoc_raw_block("html", out)
      }
    } else if (x$t %in% block_list) {
      # Plain [Inline]
      # Para [Inline]
      # LineBlock [[Inline]]
      # BlockQuote [Block]
      # BulletList [[Block]]
      # Emph [Inline]
      # Strong [Inline]
      # Strikeout [Inline]
      # Superscript [Inline]
      # Subscript [Inline]
      # SmallCaps [Inline]
      # Note [Block]
      # Underline [Inline] <v1.21>
      x$c <- lapply(x$c, transform_code, version = version)
    } else if (x$t %in% block_list2) {
      # OrderedList ListAttributes [[Block]]
      # Quoted QuoteType [Inline]
      # Div Attr [Block]
      # Span Attr [Inline]
      # TableHead Attr [Row] <v1.21>
      # TableFoot Attr [Row] <v1.21>
      # Caption (Maybe ShortCaption) [Block] <v1.21>
      x$c[[2]] <- lapply(x$c[[2]], transform_code, version = version)
    } else if (x$t %in% "Table") {
      if (version >= "1.21") {
        # Attr Caption [ColSpec] TableHead [TableBody] TableFoot
        x$c[c(2, 4, 5, 6)] <- lapply(x$c[c(2, 4, 5, 6)], transform_code, version = version)
      } else {
        # [Inline] [Alignment] [Double] [TableCell] [[TableCell]]
        x$c[c(1, 4, 5)] <- lapply(x$c[c(1, 4, 5)], transform_code, version = version)
      }
    } else if (x$t %in% "TableBody") {
      # Attr RowHeadColumns [Row] [Row] <v1.21>
      x$c[c(3, 4)] <- lapply(x$c[c(3, 4)], transform_code, version = version)
    } else if (x$t %in% "Cell") {
      # Attr Alignment RowSpan ColSpan [Block]
      x$c[[5]] <- lapply(x$c[[5]], transform_code, version = version)
    } else if (x$t %in% "DefinitionList") {
      # DefinitionList [([Inline], [[Block]])]
      x$c <- lapply(x$c,
        function(x) list(
          transform_code(x[[1]], version = version),
          transform_code(x[[2]], version = version)
        )
      )
    } else if (x$t %in% skip) {

    } else {
      inform(paste0("Unknown type: ", x$t))
    }

    x
  }
}

# Pandoc AST constructors -------------------------------------------------

pandoc_node <- function(type, ...) {
  list(t = type, c = list(...))
}
pandoc_raw_block <- function(format, text) {
  # Format Text
  pandoc_node("RawBlock", format, text)
}
pandoc_link <- function(attr, contents, target) {
  # Attr [Inline] Target
  pandoc_node("Link", attr, contents, target)
}

pandoc_attr <- function(id = "", classes = list(), keyval = list()) {
  list(id, classes, keyval)
}
pandoc_target <- function(url, title = "") {
  list(url, title)
}

# Helpers -----------------------------------------------------------------

check_packages <- function() {
  if (!is_installed("rmarkdown") || !is_installed("jsonlite")) {
    abort("rmarkdown and jsonlite required for .md transformation")
  }
}
r-lib/downlit documentation built on June 12, 2024, 1:55 p.m.