R/markdown.R
In xfun: Supporting Functions for Packages Maintained by 'Yihui Xie'

Documented in embed_dir embed_file embed_files fenced_block fenced_div make_fence md_table prose_index protect_math tabset

#' Find the indices of lines in Markdown that are prose (not code blocks)
#'
#' Filter out the indices of lines between code block fences such as \verb{```}
#' (could be three or four or more backticks).
#' @param x A character vector of text in Markdown.
#' @param warn Whether to emit a warning when code fences are not balanced.
#' @note If the code fences are not balanced (e.g., a starting fence without an
#'   ending fence), this function will treat all lines as prose.
#' @return An integer vector of indices of lines that are prose in Markdown.
#' @export
#' @examples library(xfun)
#' prose_index(c('a', '```', 'b', '```', 'c'))
#' prose_index(c('a', '````', '```r', '1+1', '```', '````', 'c'))
prose_index = function(x, warn = TRUE) {
  xi = seq_along(x); n = length(idx <- code_lines_regex(x))
  if (n == 0) return(xi)
  if (n %% 2 != 0) {
    if (warn) warning('Code fences are not balanced')
    # treat all lines as prose
    return(xi)
  }
  idx2 = matrix(idx, nrow = 2)
  idx2 = unlist(mapply(seq, idx2[1, ], idx2[2, ], SIMPLIFY = FALSE))
  xi[-idx2]
}

# find starting and ending lines of code blocks via regex (may not be accurate)
code_lines_regex = function(x) {
  idx = NULL
  # if raw HTML <pre></pre> exists, it should be treated as code block
  inside_pre = if (length(p1 <- grep('<pre>', x))) {
    p2 = grep('</pre>', x)
    if (length(p1) == length(p2)) {
      idx = rbind(p1, p2)
      function(i) any(i > p1 & i < p2)
    }
  }
  r = '^(\\s*```+).*'; s = ''
  # shouldn't match ``` ``text``` ```, which is inline code, not code block
  i1 = grepl(r, x); i2 = !grepl('^\\s*```+\\s+`', x); i3 = !grepl('-->\\s*$', x)
  for (i in which(i1 & i2 & i3)) {
    if (is.function(inside_pre) && inside_pre(i)) next
    if (s == '') {
      s = gsub(r, '\\1', x[i]); idx = c(idx, i); next
    }
    # look for the next line with the same amount of backticks (end of block)
    if (grepl(paste0('^', s), x[i])) {
      idx = c(idx, i); s = ''
    }
  }
  idx
}

# find lines via commonmark (accurate but gregexpr()/substring() are slow)
code_lines_cmark = function(x) {
  xml = commonmark::markdown_xml(x, sourcepos = TRUE)
  r = '(?<=<code_block sourcepos=")(\\d+):\\d+-(\\d+):\\d+(?=")'
  m = gregexpr(r, xml, perl = TRUE)[[1]]
  if (all(m < 0)) return()
  s = attr2(m, 'capture.start'); l = attr2(m, 'capture.length')
  as.integer(substring(xml, s, s + l - 1))
}

#' Protect math expressions in pairs of backticks in Markdown
#'
#' For Markdown renderers that do not support LaTeX math, we need to protect
#' math expressions as verbatim code (in a pair of backticks), because some
#' characters in the math expressions may be interpreted as Markdown syntax
#' (e.g., a pair of underscores may make text italic). This function detects
#' math expressions in Markdown (by heuristics), and wrap them in backticks.
#'
#' Expressions in pairs of dollar signs or double dollar signs are treated as
#' math, if there are no spaces after the starting dollar sign, or before the
#' ending dollar sign. There should be a space or `(` before the starting dollar
#' sign, unless the math expression starts from the very beginning of a line.
#' For a pair of single dollar signs, the ending dollar sign should not be
#' followed by a number, and the inner math expression should not be wrapped in
#' backticks. With these assumptions, there should not be too many false
#' positives when detecing math expressions.
#'
#' Besides, LaTeX environments (\verb{\begin{*}} and \verb{\end{*}}) are also
#' protected in backticks.
#' @param x A character vector of text in Markdown.
#' @param token A character string to wrap math expressions at both ends. This
#'   can be a unique token so that math expressions can be reliably identified
#'   and restored after the Markdown text is converted.
#' @param use_block Whether to use code blocks (```` ```md-math ````) to protect
#'   `$$ $$` expressions that span across multiple lines. This is necessary when
#'   a certain line in the math expression starts with a special character that
#'   can accidentally start a new element (e.g., a leading `+` may start a
#'   bullet list). Only code blocks can prevent this case.
#' @return A character vector with math expressions in backticks.
#' @note If you are using Pandoc or the \pkg{rmarkdown} package, there is no
#'   need to use this function, because Pandoc's Markdown can recognize math
#'   expressions.
#' @export
#' @examples library(xfun)
#' protect_math(c('hi $a+b$', 'hello $$\\alpha$$', 'no math here: $x is $10 dollars'))
#' protect_math(c('hi $$', '\\begin{equation}', 'x + y = z', '\\end{equation}'))
#' protect_math('$a+b$', '===')
protect_math = function(x, token = '', use_block = FALSE) {
  i = prose_index(x)
  if (length(i)) x[i] = escape_math(x[i], token, use_block)
  x
}

escape_math = function(x, token = '', use_block = FALSE) {
  # replace $x$ with `\(x\)` (protect inline math in <code></code>)
  m = gregexpr('(?<=^|[\\s(])[$](?![ `])[^$]+?(?<![ `])[$](?![$0123456789])', x, perl = TRUE)
  regmatches(x, m) = lapply(regmatches(x, m), function(z) {
    if (length(z) == 0) return(z)
    z = sub('^[$]', paste0('`', token, '\\\\('), z)
    z = sub('[$]$', paste0('\\\\)', token, '`'), z)
    z
  })
  # replace $$x$$ with `$$x$$` (protect display math)
  m = gregexpr('(?<=^|[\\s(])[$][$](?! )[^$]+?(?<! )[$][$]', x, perl = TRUE)
  regmatches(x, m) = lapply(regmatches(x, m), function(z) {
    if (length(z) == 0) return(z)
    paste0('`', token, z, token, '`')
  })
  # now, if there are still lines starting and ending with $$, they might be
  # math expressions of display style spanning multiple lines, e.g.,
  # $$\alpha +
  # \beta$$
  # we assume that $$ can only appear once on one line
  i = vapply(gregexpr('[$]', x), length, integer(1)) == 2
  if (any(i)) {
    r1 = sprintf('\\1%s\\2', if (use_block) {
      paste(c('```{.md-math', if (token != '') c(' .', token), '}\n\\1'), collapse = '')
    } else paste0('`', token))
    x[i] = gsub('^(\\s*)([$][$][^ ]+)', r1, x[i], perl = TRUE)
    r2 = if (use_block) '\\1\\2\n\\1```' else paste0('\\1\\2', token, '`')
    x[i] = gsub('^(\\s*)(.*?[^ ][$][$])$', r2, x[i], perl = TRUE)
  }
  # equation environments (\begin and \end must match)
  i1 = grep('^\\\\begin\\{[^}]+\\}$', x)
  i2 = grep('^\\\\end\\{[^}]+\\}$', x)
  if (length(i1) == length(i2)) {
    # TODO: do not protect inner environments in case of nested environments (#57)
    x[i1] = paste0('`', token, x[i1])
    x[i2] = paste0(x[i2], token, '`')
  }
  x
}

#' Create a fenced block in Markdown
#'
#' Wrap content with fence delimiters such as backticks (code blocks) or colons
#' (fenced Div). Optionally the fenced block can have attributes. The function
#' `fenced_div()` is a shorthand of `fenced_block(char = ':')`.
#' @param x A character vector of the block content.
#' @param attrs A vector of block attributes.
#' @param fence The fence string, e.g., `:::` or ```` ``` ````. This will be
#'   generated from the `char` argument by default.
#' @param char The fence character to be used to generate the fence string by
#'   default.
#' @return `fenced_block()` returns a character vector that contains both the
#'   fences and content.
#' @export
#' @examples
#' # code block with class 'r' and ID 'foo'
#' xfun::fenced_block('1+1', c('.r', '#foo'))
#' # fenced Div
#' xfun::fenced_block('This is a **Div**.', char = ':')
fenced_block = function(x, attrs = NULL, fence = make_fence(x, char), char = '`') {
  a = block_attr(attrs)
  c('', paste0(fence, a), x, fence)
}

#' @param ... Arguments to be passed to `fenced_block()`.
#' @rdname fenced_block
#' @export
fenced_div = function(...) fenced_block(..., char = ':')

#' @param start The number of characters to start searching `x` with. If the
#'   string of this number of characters is found, add one more character, and
#'   repeat the search.
#' @return `make_fence()` returns a character string. If the block content
#'   contains `N` fence characters (e.g., backticks), use `N + 1` characters as
#'   the fence.
#' @rdname fenced_block
#' @export
#' @examples
#' # three backticks by default
#' xfun::make_fence('1+1')
#' # needs five backticks for the fences because content has four
#' xfun::make_fence(c('````r', '1+1', '````'))
make_fence = function(x, char = '`', start = 3) {
  f = strrep(char, start)
  while (any(grepl(f, x, fixed = TRUE))) f = paste0(f, char)
  f
}

# concatenate block attributes for fenced blocks
block_attr = function(attrs) {
  a = paste(attrs, collapse = ' ')
  if (grepl('[ .=#]', a)) a = paste0('{', a, '}')
  if (a == '') a else paste0(' ', a)
}

#' Embed a file, multiple files, or directory on an HTML page
#'
#' For a file, first encode it into base64 data (a character string). Then
#' generate a hyperlink of the form \samp{<a href="base64 data"
#' download="filename">Download filename</a>}. The file can be downloaded when
#' the link is clicked in modern web browsers. For a directory, it will be
#' compressed as a zip archive first, and the zip file is passed to
#' `embed_file()`. For multiple files, they are also compressed to a zip file
#' first.
#'
#' These functions can be called in R code chunks in R Markdown documents with
#' HTML output formats. You may embed an arbitrary file or directory in the HTML
#' output file, so that readers of the HTML page can download it from the
#' browser. A common use case is to embed data files for readers to download.
#' @param path Path to the file(s) or directory.
#' @param name The default filename to use when downloading the file. Note that
#'   for `embed_dir()`, only the base name (of the zip filename) will be used.
#' @param text The text for the hyperlink.
#' @param ... For `embed_file()`, additional arguments to be passed to
#'   [html_tag()] (e.g., `class = 'foo'`). For `embed_dir()` and
#'   `embed_files()`, arguments passed to `embed_file()`.
#' @note Windows users may need to install Rtools to obtain the \command{zip}
#'   command to use `embed_dir()` and `embed_files()`.
#'
#'   Internet Explorer does not support downloading embedded files. Chrome has a
#'   2MB limit on the file size.
#' @return An HTML tag \samp{<a>} with the appropriate attributes.
#' @export
#' @examples
#' logo = xfun:::R_logo()
#' link = xfun::embed_file(logo, text = 'Download R logo')
#' link
#' if (interactive()) xfun::html_view(link)
embed_file = function(path, name = basename(path), text = paste('Download', name), ...) {
  h = base64_uri(path)
  html_tag('a', text, href = h, download = name, ...)
}

#' @rdname embed_file
#' @export
embed_dir = function(path, name = paste0(normalize_path(path), '.zip'), ...) {
  name  = gsub('/', '', basename(name))
  in_dir(path, {
    name = file.path(tempdir(), name); on.exit(file.remove(name), add = TRUE)
    zip(name, '.'); embed_file(name, ...)
  })
}

#' @rdname embed_file
#' @export
embed_files = function(path, name = with_ext(basename(path[1]), '.zip'), ...) {
  name = file.path(tempdir(), basename(name))
  on.exit(file.remove(name), add = TRUE)
  zip(name, path)
  embed_file(name, ...)
}

zip = function(name, ...) {
  if (utils::zip(name, ...) != 0) stop('Failed to create the zip archive ', name)
  invisible(0)
}

#' Generate a simple Markdown pipe table
#'
#' A minimal Markdown table generator using the pipe `|` as column separators.
#'
#' The default argument values can be set via global options with the prefix
#' `xfun.md_table.`, e.g., `options(xfun.md_table.digits 2, xfun.md_table.na =
#' 'n/a')`.
#' @param x A 2-dimensional object (e.g., a matrix or data frame).
#' @param digits The number of decimal places to be passed to [round()]. It can
#'   be a integer vector of the same length as the number of columns in `x` to
#'   round columns separately. The default is `3`.
#' @param na A character string to represent `NA` values. The default is an
#'   empty string.
#' @param newline A character string to substitute `\n` in `x` (because pipe
#'   tables do not support line breaks in cells). The default is a space.
#' @param limit The maximum number of rows to show in the table. If it is
#'   smaller than the number of rows, the data in the middle will be omitted. If
#'   it is of length 2, the second number will be used to limit the number of
#'   columns. Zero and negative values are ignored.
#' @return A character vector.
#' @seealso [knitr::kable()] (which supports more features)
#' @export
#' @examples
#' xfun::md_table(head(iris))
#' xfun::md_table(mtcars, limit = c(10, 6))
md_table = function(x, digits = NULL, na = NULL, newline = NULL, limit = NULL) {
  if (length(d <- dim(x)) != 2)
    stop('xfun::md_table() only supports 2-dimensional objects.')
  if (d[2] == 0) return(character())
  if (is.null(digits))
    digits = getOption('xfun.md_table.digits', min(getOption('digits'), 3))
  digits = rep(digits, d[2])  # recycle for all columns
  num = logical(d[2])  # numeric columns
  for (j in seq_len(d[2])) if (is.numeric(x[, j])) {
    num[j] = TRUE
    x[, j] = round(x[, j], digits[j])
  }
  is_na = is.na(x)
  x = as.matrix(format(x))
  if (any(is_na)) x[is_na] = na %||% getOption('xfun.md_table.na', '')
  rn = rownames(x)
  # ignore empty and automatic row names (integers)
  if (!all(grepl('^[0-9]*$', rn))) {
    x = cbind(' ' = rn, x)
    num = c(FALSE, num)
  }
  # get first and last limit/2 rows/cols in N rows/cols
  if (length(limit <- limit %||% getOption('xfun.md_table.limit'))) {
    # subset rows
    l1 = limit[1]
    if (l1 > 0 && l1 < d[1]) {
      n1 = round(l1/2); n2 = l1 - n1
      x = rbind(head(x, n1), '&vellip;', tail(x, n2))
    }
    # subset columns
    if (length(limit) >= 2 && (l2 <- limit[2]) > 0 && l2 < d[2]) {
      n1 = round(l2/2); n2 = l2 - n1
      x = cbind(
        x[, seq_len(n1), drop = FALSE],
        `...` = '...',
        x[, d[2] - seq_len(n2) + 1, drop = FALSE]
      )
      num = c(head(num, n1), FALSE, tail(num, n2))
    }
    d = dim(x)
    if (d[2] == 0) return(character())
  }
  cn = colnames(x) %||% rep(' ', d[2])  # table header
  a = ifelse(num, '--:', '---')  # alignment
  a[cn == '...'] = ':-:'
  x = rbind(cn, a, x)
  d = dim(x)
  x = gsub('|', '\\|', x, fixed = TRUE)
  dim(x) = d
  res = do.call(function(...) paste(..., sep = '|'), as.data.frame(x))
  res = gsub('\n', newline %||% getOption('xfun.md_table.newline', ' '), res, fixed = TRUE)
  paste0('|', res, '|')
}

#' Represent a (recursive) list with (nested) tabsets
#'
#' The tab titles are names of list members, and the tab content contains the
#' values of list members. If a list member is also a list, it will be
#' represented recursively with a child tabset.
#' @param x A list.
#' @param value A function to print the value of a list member. By default,
#'   [str()] is used to print the structure of the value. You may also use
#'   [dput()] to output the full value, but it may be slow when the size of the
#'   value is too big.
#' @return A character vector of Markdown that can be rendered to HTML with
#'   [litedown::mark()].
#' @export
#' @examples
#' xfun::tabset(iris)
#' xfun::tabset(iris, dput)
#' xfun::tabset(iris, print)
#'
#' # a deeply nested list
#' plot(1:10)
#' p = recordPlot()
#' xfun::tabset(p)
tabset = function(x, value = str) {
  obj = paste(deparse(substitute(x)), collapse = ' ')
  md_viewable(.tabset(x, value), meta = list(
    css = c('@default', '@tabsets'), js = '@tabsets',
    title = paste0('`xfun::tabset(', obj, ')`')
  ))
}

.tabset = function(x, value) {
  if (is.list(x)) {
    idx = seq_along(x)
    nms = names(x) %||% sprintf('[[%d]]', idx)
    res = NULL
    for (i in idx) {
      con = paste0('  ', .tabset(unclass(x)[[i]], value))  # indent content
      res = c(res, paste0('- `', nms[i], '`'), '', con)
    }
    # attach attributes to the last tab if necessary
    if (is.list(att <- attributes(x))) {
      att$names = NULL  # names have been displayed
      if (length(att)) res = c(
        res, '', '- `attr(*)`', '', paste0('  ', .tabset(att, value))
      )
    }
    c('::: tabset', res, ':::')
  } else {
    c('```r', capture.output(value(x)), '```')
  }
}

md_viewable = function(x, ...) {
  structure(x, class = c('xfun_md_viewable', 'record_asis'), ...)
}

#' @export
print.xfun_md_viewable = function(x, ...) {
  if (loadable('litedown')) {
    html = litedown::mark(text = c('---', '---', '', x), meta = attr2(x, 'meta'), ...)
    html_view(html)
  } else {
    warning("Cannot preview the result since the 'litedown' package is not available.")
    raw_string(x)
  }
}

Any scripts or data that you put into this service are public.

xfun documentation built on April 3, 2025, 7:04 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

xfun
Supporting Functions for Packages Maintained by 'Yihui Xie'

R/markdown.R
In xfun: Supporting Functions for Packages Maintained by 'Yihui Xie'

Defines functions print.xfun_md_viewable md_viewable .tabset tabset md_table zip embed_files embed_dir embed_file block_attr make_fence fenced_div fenced_block escape_math protect_math code_lines_cmark code_lines_regex prose_index

Documented in embed_dir embed_file embed_files fenced_block fenced_div make_fence md_table prose_index protect_math tabset

Try the xfun package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

xfun Supporting Functions for Packages Maintained by 'Yihui Xie'

R/markdown.R In xfun: Supporting Functions for Packages Maintained by 'Yihui Xie'

Defines functions print.xfun_md_viewable md_viewable .tabset tabset md_table zip embed_files embed_dir embed_file block_attr make_fence fenced_div fenced_block escape_math protect_math code_lines_cmark code_lines_regex prose_index

Documented in embed_dir embed_file embed_files fenced_block fenced_div make_fence md_table prose_index protect_math tabset

Try the xfun package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

xfun
Supporting Functions for Packages Maintained by 'Yihui Xie'

R/markdown.R
In xfun: Supporting Functions for Packages Maintained by 'Yihui Xie'