Nothing
#' Build book chapters into separate HTML files
#'
#' Split the HTML output into chapters while updating relative links (e.g. links
#' in TOC, footnotes, citations, figure/table cross-references, and so on).
#' Functions \code{html_book()} and \code{tufte_html_book()} are simple wrapper
#' functions of \code{html_chapter()} using a specific base output format.
#' @inheritParams pdf_book
#' @inheritParams html_document2
#' @param toc,number_sections,fig_caption,lib_dir,template,pandoc_args See
#' \code{rmarkdown::\link[rmarkdown]{html_document}},
#' \code{tufte::\link[tufte:tufte_handout]{tufte_html}}, or the documentation
#' of the \code{base_format} function.
#' @param ... Other arguments to be passed to \code{base_format}. For
#' \code{html_book()} and \code{tufte_html_book()}, \code{...} is passed to
#' \code{html_chapters()}.
#' @param split_by How to name the HTML output files from the book: \code{rmd}
#' uses the base filenames of the input Rmd files to create the HTML
#' filenames, e.g. generate \file{chapter1.html} for \file{chapter1.Rmd};
#' \code{none} means do not split the HTML file (the book will be a single
#' HTML file); \code{chapter} means split the file by the first-level headers;
#' \code{section} means the second-level headers. For \code{chapter} and
#' \code{section}, the HTML filenames will be determined by the header ID's,
#' e.g. the filename for the first chapter with a chapter title \code{#
#' Introduction} will be \file{introduction.html}; for \code{chapter+number}
#' and \code{section+number}, the chapter/section numbers will be prepended to
#' the HTML filenames, e.g. \file{1-introduction.html} and
#' \file{2-1-literature.html}.
#' @param split_bib Whether to split the bibliography onto separate pages where
#' the citations are actually used.
#' @param page_builder A function to combine different parts of a chapter into a
#' page (an HTML character vector). See \code{\link{build_chapter}} for the
#' specification of this function.
#' @note These functions are expected to be used in conjunction with
#' \code{\link{render_book}()}. It is almost meaningless if they are used with
#' \code{rmarkdown::render()}. Functions like \code{\link{html_document2}} are
#' designed to work with the latter.
#'
#' If you want to use a different template, the template must contain three
#' pairs of HTML comments: \samp{<!--bookdown:title:start-->} and
#' \samp{<!--bookdown:title:end-->} to mark the title section of the book
#' (this section will be placed only on the first page of the rendered book);
#' \samp{<!--bookdown:toc:start-->} and \samp{<!--bookdown:toc:end-->} to mark
#' the table of contents section (it will be placed on all chapter pages);
#' \samp{<!--bookdown:body:start-->} and \samp{<!--bookdown:body:end-->} to
#' mark the HTML body of the book (the HTML body will be split into separate
#' pages for chapters). You may open the default HTML template
#' (\code{bookdown:::bookdown_file('templates/default.html')}) to see where
#' these comments were inserted.
#' @return An R Markdown output format object to be passed to
#' \code{bookdown::render_book()}.
#' @export
html_chapters = function(
toc = TRUE, number_sections = TRUE, fig_caption = TRUE, lib_dir = 'libs',
template = bookdown_file('templates/default.html'),
global_numbering = !number_sections, pandoc_args = NULL, ...,
base_format = rmarkdown::html_document, split_bib = TRUE, page_builder = build_chapter,
split_by = c('section+number', 'section', 'chapter+number', 'chapter', 'rmd', 'none')
) {
config = get_base_format(base_format, list(
toc = toc, number_sections = number_sections, fig_caption = fig_caption,
self_contained = FALSE, lib_dir = lib_dir,
template = template, pandoc_args = pandoc_args2(pandoc_args), ...
))
split_by = match.arg(split_by)
post = config$post_processor # in case a post processor have been defined
config$post_processor = function(metadata, input, output, clean, verbose) {
if (is.function(post)) output = post(metadata, input, output, clean, verbose)
move_files_html(output, lib_dir)
output2 = split_chapters(output, page_builder, global_numbering, split_by, split_bib)
if (file.exists(output) && !same_path(output, output2)) file.remove(output)
move_files_html(output2, lib_dir)
output2
}
config = common_format_config(config, 'html')
config
}
# add --wrap=preserve to pandoc args for pandoc 2.0:
# https://github.com/rstudio/bookdown/issues/504
pandoc_args2 = function(args) {
if (pandoc2.0() && !length(grep('--wrap', args))) c('--wrap', 'preserve', args) else args
}
#' @rdname html_chapters
#' @export
html_book = function(...) {
html_chapters(..., base_format = rmarkdown::html_document)
}
#' @rdname html_chapters
#' @export
tufte_html_book = function(...) {
html_chapters(..., base_format = tufte::tufte_html)
}
#' Output formats that allow numbering and cross-referencing
#' figures/tables/equations
#'
#' These are simple wrappers of the output format functions like
#' \code{rmarkdown::\link[rmarkdown]{html_document}()}, and they added the
#' capability of numbering figures/tables/equations/theorems and
#' cross-referencing them. See \sQuote{References} for the syntax. Note you can
#' also cross-reference sections by their ID's using the same syntax when
#' sections are numbered. In case you want to enable cross reference in other
#' formats, use \code{markdown_document2} with \code{base_format} argument.
#' @param ...,fig_caption,md_extensions,pandoc_args Arguments to be passed to a
#' specific output format function. For a function \code{foo2()}, its
#' arguments are passed to \code{foo()}, e.g. \code{...} of
#' \code{html_document2()} are passed to \code{rmarkdown::html_document()}.
#' @param number_sections Whether to number section headers: if \code{TRUE},
#' figure/table numbers will be of the form \code{X.i}, where \code{X} is the
#' current first-level section number, and \code{i} is an incremental number
#' (the i-th figure/table); if \code{FALSE}, figures/tables will be numbered
#' sequentially in the document from 1, 2, ..., and you cannot cross-reference
#' section headers in this case.
#' @param global_numbering If \code{TRUE}, number figures and tables globally
#' throughout a document (e.g., Figure 1, Figure 2, ...). If \code{FALSE},
#' number them sequentially within sections (e.g., Figure 1.1, Figure 1.2,
#' ..., Figure 5.1, Figure 5.2, ...). Note that \code{global_numbering =
#' FALSE} will not work with \code{number_sections = FALSE} because sections
#' are not numbered.
#' @inheritParams pdf_book
#' @return An R Markdown output format object to be passed to
#' \code{rmarkdown::\link[rmarkdown]{render}()}.
#' @note These output formats are used to generate single output files, such as
#' a single HTML output file (unlike \code{gitbook}, which generates multiple
#' HTML output files by default).
#'
#' The functions \samp{tufte_*()} are wrappers of functions in the \pkg{tufte}
#' package.
#' @references \url{https://bookdown.org/yihui/bookdown/}
#' @export
html_document2 = function(
..., number_sections = TRUE, global_numbering = !number_sections,
pandoc_args = NULL, base_format = rmarkdown::html_document
) {
config = get_base_format(base_format, list(
..., number_sections = number_sections, pandoc_args = pandoc_args2(pandoc_args)
))
post = config$post_processor # in case a post processor have been defined
config$post_processor = function(metadata, input, output, clean, verbose) {
if (is.function(post)) output = post(metadata, input, output, clean, verbose)
x = read_utf8(output)
x = clean_html_tags(x)
x = restore_appendix_html(x, remove = FALSE)
x = restore_part_html(x, remove = FALSE)
x = resolve_refs_html(x, global_numbering)
write_utf8(x, output)
output
}
config = common_format_config(config, 'html')
config
}
#' @rdname html_document2
#' @export
html_fragment2 = function(..., number_sections = FALSE) {
html_document2(
..., number_sections = number_sections, base_format = rmarkdown::html_fragment
)
}
#' @rdname html_document2
#' @export
html_notebook2 = function(..., number_sections = FALSE) {
html_document2(
..., number_sections = number_sections, base_format = rmarkdown::html_notebook
)
}
#' @rdname html_document2
#' @export
html_vignette2 = function(..., number_sections = FALSE) {
html_document2(
..., number_sections = number_sections, base_format = rmarkdown::html_vignette
)
}
#' @rdname html_document2
#' @export
ioslides_presentation2 = function(..., number_sections = FALSE) {
html_document2(
..., number_sections = number_sections, base_format = rmarkdown::ioslides_presentation
)
}
#' @rdname html_document2
#' @export
slidy_presentation2 = function(..., number_sections = FALSE) {
html_document2(
..., number_sections = number_sections, base_format = rmarkdown::slidy_presentation
)
}
#' @rdname html_document2
#' @export
tufte_html2 = function(..., number_sections = FALSE) {
html_document2(
..., number_sections = number_sections, base_format = tufte::tufte_html
)
}
#' Combine different parts of an HTML page
#'
#' Given the HTML header, body, and footer, etc, build an HTML page.
#'
#' This function is for expert use only. The \code{head} and \code{foot}
#' arguments may not be strictly the HTML header and footer. It depends on the
#' HTML comment tokens in the template (see \code{\link{html_chapters}}).
#' @param head A character vector of the HTML code before the document title.
#' @param toc A character vector of the table of contents.
#' @param chapter The body of a chapter.
#' @param link_prev,link_next The URL of the previous/next chapter (may be
#' \code{NULL}).
#' @param rmd_cur The Rmd filename of the current chapter (may be \code{NULL}).
#' @param html_cur The HTML filename of the current chapter (may be
#' \code{NULL}).
#' @param foot A character vector of the HTML code after the chapter body.
#' @export
#' @keywords internal
build_chapter = function(
head, toc, chapter, link_prev, link_next, rmd_cur, html_cur, foot
) {
toc = add_toc_class(toc)
paste(c(
head,
'<div class="row">',
'<div class="col-sm-12">',
toc,
'</div>',
'</div>',
'<div class="row">',
'<div class="col-sm-12">',
chapter,
'<p style="text-align: center;">',
button_link(link_prev, 'Previous'),
source_link(rmd_cur, type = 'edit'),
source_link(rmd_cur, type = 'history'),
source_link(rmd_cur, type = 'view'),
button_link(link_next, 'Next'),
'</p>',
'</div>',
'</div>',
foot
), collapse = '\n')
}
add_toc_class = function(toc) {
gsub('^(<li>)(.+)(?<!</li>)$', '<li class="has-sub">\\2', toc, perl = TRUE)
}
r_chap_pattern = '^<!--chapter:end:(.+)-->$'
split_chapters = function(
output, build = build_chapter, global_numbering, split_by, split_bib, ...
) {
use_rmd_names = split_by == 'rmd'
split_level = switch(
split_by, none = 0, chapter = 1, `chapter+number` = 1,
section = 2, `section+number` = 2, rmd = 1
)
if (!(split_level %in% 0:2)) stop('split_level must be 0, 1, or 2')
x = read_utf8(output)
x = clean_html_tags(x)
i1 = find_token(x, '<!--bookdown:title:start-->')
i2 = find_token(x, '<!--bookdown:title:end-->')
i3 = find_token(x, '<!--bookdown:toc:start-->')
i4 = find_token(x, '<!--bookdown:toc:end-->')
i5 = find_token(x, '<!--bookdown:body:start-->')
i6 = find_token(x, '<!--bookdown:body:end-->')
n = length(grep(r_chap_pattern, x))
# Need to take care of the div tags here before restore_part_html and
# restore_appendix_html erase the section ids of the hidden PART or APPENDIX
# sections.
if (split_level > 1) {
body = x[(i5 + 1):(i6 - 1)]
h1 = grep('^<div (id="[^"]+" )?class="section level1("| )', body) + i5
h2 = grep('^<div (id="[^"]+" )?class="section level2("| )', body) + i5
h12 = setNames(c(h1, h2), rep(c('h1', 'h2'), c(length(h1), length(h2))))
if (length(h12) > 0 && h12[1] != i5 + 1) stop(
'The document must start with a first (#) or second level (##) heading'
)
h12 = sort(h12)
if (length(h12) > 1) {
n12 = names(h12)
# h2 that immediately follows h1
i = h12[n12 == 'h2' & c('h2', head(n12, -1)) == 'h1'] - 1
# close the h1 section early with </div>
# reg_chap and sec_num must take this into account so that cross reference
# works when split by section. (#849)
if (length(i)) x[i] = paste0(x[i], '\n</div>')
# h1 that immediately follows h2 but not the first h1
i = n12 == 'h1' & c('h1', head(n12, -1)) == 'h2'
if (any(i) && n12[1] == 'h2') i[which(n12 == 'h1')[1]] = FALSE
i = h12[i] - 1
# need to comment out the </div> corresponding to the last <h1> in the body
if (tail(n12, 1) == 'h2' && any(n12 == 'h1')) {
for (j in (i6 - 1):(tail(h12, 1))) {
# the line j should close h1, and j - 1 should close h2
if (all(x[j - 0:1] == '</div>')) break
}
i = c(i, j)
}
for (j in i) {
# the i-th lines should be the closing </div> for h1
if (x[j] != '</div>') stop(
'Something wrong with the HTML output. The line ', x[j],
' is supposed to be </div>'
)
}
x[i] = paste('<!--', x[i], '-->') # remove the extra </div> of h1
}
}
x = add_section_ids(x)
x = restore_part_html(x)
x = restore_appendix_html(x)
# no (or not enough) tokens found in the template
if (any(c(i1, i2, i3, i4, i5, i6) == 0)) {
x = resolve_refs_html(x, global_numbering)
x = add_chapter_prefix(x)
write_utf8(x, output)
return(output)
}
html_head = x[1:(i1 - 1)] # HTML header + includes
html_title = x[(i1 + 1):(i2 - 1)] # title/author/date
html_toc = x[(i3 + 1):(i4 - 1)] # TOC
html_body = x[(i5 + 1):(i6 - 1)] # body
html_foot = x[(i6 + 1):length(x)] # HTML footer
html_toc = add_toc_ids(html_toc)
idx = grep(r_chap_pattern, html_body)
nms = gsub(r_chap_pattern, '\\1', html_body[idx]) # to be used in HTML filenames
h1 = grep('^<div (id="[^"]+" )?class="section level1("| )', x)
if (length(h1) < length(nms)) warning(
'You have ', length(nms), ' Rmd input file(s) but only ', length(h1),
' first-level heading(s). Did you forget first-level headings in certain Rmd files?'
)
html_body = resolve_refs_html(html_body, global_numbering)
# do not split the HTML file
if (split_level == 0) {
html_body[idx] = '' # remove chapter tokens
html_body = add_chapter_prefix(html_body)
write_utf8(build(
html_head, html_toc, c(html_title, html_body), NULL, NULL, NULL, output, html_foot, ...
), output)
return(move_to_output_dir(output))
}
if (split_bib) {
# parse and remove the references chapter
res = parse_references(html_body)
refs = res$refs; html_body = res$html
ref_title = res$title; refs_div = res$div
}
# parse and remove footnotes (will reassign them to relevant pages later)
res = parse_footnotes(html_body)
fnts = res$items
if (length(fnts)) html_body[res$range] = ''
if (use_rmd_names) {
html_body[idx] = ''
nms_chaps = nms # Rmd filenames
if (n >= 1) {
idx = next_nearest(idx, grep('^<div', html_body))
idx = c(1, idx[-n])
}
} else {
h1 = grep('^<div (id="[^"]+" )?class="section level1("| )', html_body)
h2 = grep('^<div (id="[^"]+" )?class="section level2("| )', html_body)
idx2 = if (split_level == 1) h1 else if (split_level == 2) sort(c(h1, h2))
n = length(idx2)
nms_chaps = if (length(idx)) {
vapply(idx2, character(1), FUN = function(i) head(nms[idx > i], 1))
}
reg_id = '^<div id="([^"]+)".*$'
reg_num = '^(<h[12]><span class="header-section-number">)([.A-Z0-9]+)(</span>.+</h[12]>).*$'
nms = vapply(idx2, character(1), FUN = function(i) {
x1 = html_body[i]; x2 = html_body[i + 1]
id = if (grepl(reg_id, x1)) gsub(reg_id, '\\1', x1)
num = if (grepl(reg_num, x2)) gsub(reg_num, '\\2', x2)
if (is.null(id) && is.null(num)) stop(
'The heading ', x2, ' must have at least an id or a number'
)
nm = if (grepl('[+]number$', split_by)) {
paste(c(num, id), collapse = '-')
} else id
if (is.null(nm)) stop('The heading ', x2, ' must have an id')
nm
})
if (anyDuplicated(nms)) (if (isTRUE(opts$get('preview'))) warning else stop)(
'Automatically generated filenames contain duplicated ones: ',
paste(nms[duplicated(nms)], collapse = ', ')
)
# generate index.html if the first Rmd filename is index.Rmd
if (identical(with_ext(head(nms_chaps, 1), ''), 'index')) nms[1] = 'index'
html_body[idx] = ''
idx = idx2
}
if (n == 0) {
idx = 1; nms = output; n = 1
}
nms = basename(with_ext(nms, '.html')) # the HTML filenames to be generated
input = opts$get('input_rmd')
html_body = add_chapter_prefix(html_body)
html_toc = restore_links(html_toc, html_body, idx, nms)
for (i in seq_len(n)) {
# skip writing the chapter.html if the current Rmd name is not in the vector
# of Rmd names passed to render_book() (only this vector of Rmd's should be
# rendered for preview purposes)
if (isTRUE(opts$get('preview')) && !(nms_chaps[i] %in% input)) {
if (!file.exists(output_path(nms[i]))) file.create(nms[i])
next
}
i1 = idx[i]
i2 = if (i == n) length(html_body) else idx[i + 1] - 1
html = c(if (i == 1) html_title, html_body[i1:i2])
a_targets = parse_a_targets(html)
if (split_bib) {
# in order to find references in footnotes, we add footnotes to chapter body
a_targets = parse_a_targets(relocate_footnotes(html, fnts, a_targets))
html = relocate_references(html, refs, ref_title, a_targets, refs_div)
}
html = relocate_footnotes(html, fnts, a_targets)
html = restore_links(html, html_body, idx, nms)
html = build(
prepend_chapter_title(html_head, html), html_toc, html,
if (i > 1) nms[i - 1],
if (i < n) nms[i + 1],
if (length(nms_chaps)) nms_chaps[i],
nms[i], html_foot, ...
)
write_utf8(html, nms[i])
}
# add a 404 page
r404 = build_404()
p404 = r404$path; h404 = r404$html
if (!is.null(h404)) {
h404 = build(
prepend_chapter_title(html_head, h404), html_toc, h404, NULL, NULL,
r404$rmd_cur, p404, html_foot, ...
)
write_utf8(h404, p404)
}
nms = move_to_output_dir(c(nms, p404))
# find the HTML output file corresponding to the Rmd file passed to render_book()
if (is.null(input) || length(nms_chaps) == 0) j = 1 else {
if (is.na(j <- match(input[1], nms_chaps))) j = 1
}
nms[j]
}
build_404 = function() {
p404 = '404.html'
# if a 404 page already exist, we do nothing specific and assume
# user has already a workflow in place
if (file.exists(p404)) return()
# We create 404 page if it does not exist
if (length(rmd_cur <- head(existing_files(c('_404.md', '_404.Rmd')), 1))) {
xfun::Rscript_call(rmarkdown::render, list(
rmd_cur, rmarkdown::html_fragment(pandoc_args = c('--metadata', 'title=404')),
output_file = p404, quiet = TRUE
))
h404 = xfun::read_utf8(p404)
} else {
rmd_cur = NULL
# default content for 404 page
h404 = c(
'<div id="page-not-found" class="section level1">',
'<h1>Page not found</h1>',
'<p>The page you requested cannot be found (perhaps it was moved or renamed).</p>',
'<p>You may want to try searching to find the page\'s new location, or use',
'the table of contents to find the page you are looking for.</p>',
'</div>'
)
}
list(path = p404, html = h404, rmd_cur = rmd_cur)
}
# clean HTML tags inside <meta>, which can be introduced by certain YAML
# metadata, such as an improper description that contains Markdown syntax, e.g.,
# <meta name="description" content="A <i>description</i>.">
clean_meta_tags = function(x) {
r = '^(\\s*<meta )(.+<.+>.+)(/?>\\s*)$'
if (length(i <- grep(r, x)) == 0) return(x)
x1 = sub(r, '\\1', x[i])
x2 = sub(r, '\\2', x[i])
x3 = sub(r, '\\3', x[i])
x2 = gsub('<[^>]+>', '', x2)
x[i] = paste0(x1, x2, x3)
# then fix URLs in meta: https://github.com/rstudio/bookdown/pull/969#issuecomment-885252698
r = '^(\\s*<meta (property="og:image"|name="twitter:image") content=")[^"]*?/(https?://[^"]+" />\\s*)$'
x = gsub(r, '\\1\\3', x)
# remove the unnecessary extra slash introduced in #969
r = '^(\\s*<meta (property="og:image"|name="twitter:image") content="https?://[^"]+?/)/([^"]+" />\\s*)$'
x = gsub(r, '\\1\\3', x)
x
}
# remove extra attributes on headers (Pandoc 2.9+):
# https://github.com/rstudio/bookdown/issues/832
clean_header_tags = function(x) {
r1 = '^<div [^>]*?class="section level[1-6][^"]*"[^>]*>$'
r2 = '^(<h[1-6])([^>]+)(>.+</h[1-6]>.*)$'
i = grep(r2, x)
i = i[grep(r1, x[i - 1])] # the line above <h1> should be <div>
x[i] = gsub(r2, '\\1\\3', x[i]) # remove attributes on <h1>
x
}
clean_html_tags = function(x) {
x = clean_meta_tags(x)
x = clean_header_tags(x)
x
}
# move files to output dir if specified
move_to_output_dir = function(files) {
files2 = output_path(files)
i = file.exists(files) & (files != files2)
file_rename(files[i], files2[i])
files2
}
find_token = function(x, token) {
i = which(x == token)
n = length(i)
if (n == 1) return(i)
if (n == 0) return(0)
stop("Cannot find the unique token '", token, "'")
}
button_link = function(target, text) {
if (length(target) == 0) return()
sprintf(
'<a href="%s"><button class="btn btn-default">%s</button></a>', target, text
)
}
source_link = function(target, type) {
if (length(target) == 0) return()
setting = source_link_setting(type = type)
if (is.null(setting)) return()
button_link(sprintf(setting$link, target), setting$text)
}
source_link_setting = function(config, type) {
config_default = load_config()[[type]]
if (missing(config) || is.null(config)) config = config_default
if (is.null(config)) return()
if (is.character(config)) config = list(link = config, text = NULL)
if (!is.character(link <- config[['link']])) return()
if (!grepl('%s', link)) stop('The ', type, ' link must contain %s')
if (!is.character(text <- config[['text']])) text = ui_language(type)
list(link = link, text = text)
}
#' Resolve figure/table/section references in HTML
#'
#' Post-process the HTML content to resolve references of figures, tables, and
#' sections, etc. The references are written in the form \code{\@ref(key)} in
#' the R Markdown source document.
#' @param content A character vector of the HTML content.
#' @param global Whether to number the elements incrementally throughout the
#' whole document, or number them by the first-level headers. For an R
#' Markdown output format, \code{global = !number_sections} is likely to be a
#' reasonable default (i.e., when the sections are numbered, you number
#' figures/tables by sections; otherwise just number them incrementally).
#' @return A post-processed character vector of the HTML character.
#' @export
#' @keywords internal
#' @examples library(bookdown)
#' resolve_refs_html(
#' c(
#' '<caption>(#tab:foo) A nice table.</caption>',
#' '<p>See Table @ref(tab:foo).</p>'
#' ),
#' global = TRUE
#' )
resolve_refs_html = function(content, global = FALSE) {
content = resolve_ref_links_html(content)
res = parse_fig_labels(content, global)
content = res$content
ref_table = c(res$ref_table, parse_section_labels(content))
# look for @ref(label) and resolve to actual figure/table/section numbers
m = gregexpr('(?<!\\\\)@ref\\(([-:[:alnum:]]+)\\)', content, perl = TRUE)
refs = regmatches(content, m)
for (i in seq_along(refs)) {
if (length(refs[[i]]) == 0) next
# strip off html tags when resolve numbers in <img>'s alt attribute because
# the numbers may contain double quotes, e.g. <img alt="<a
# href="#foo">1.2</a>"" width=...
ref = ref_to_number(refs[[i]], ref_table, FALSE)
if (is_img_line(content[i])) ref = strip_html(ref)
refs[[i]] = ref
}
regmatches(content, m) = refs
content
}
is_img_line = function(x) grepl('^<img src=".* alt="', x)
ref_to_number = function(ref, ref_table, backslash) {
if (length(ref) == 0) return(ref)
lab = gsub(if (backslash) '^\\\\@ref\\(|\\)$' else '^@ref\\(|\\)$', '', ref)
ref = prefix_section_labels(lab)
num = ref_table[ref]
i = is.na(num)
if (any(i)) {
if (!isTRUE(opts$get('preview')))
warning('The label(s) ', paste(lab[i], collapse = ', '), ' not found', call. = FALSE)
num[i] = '<strong>??</strong>'
}
# equation references should include parentheses
i = grepl('^eq:', ref)
num[i] = paste0('(', num[i], ')')
res = sprintf('<a href="#%s">%s</a>', ref, num)
# do not add relative links to equation numbers in ePub/Word (not implemented)
ifelse(backslash & i, num, res)
}
# add prefix to section id's
prefix_section_labels = function(labels) {
prefix = knitr::opts_knit$get("rmarkdown.pandoc.id_prefix")
is_sec = !grepl(sprintf("^(%s):", reg_label_types), labels)
labels[is_sec] = paste0(prefix, labels[is_sec])
labels
}
reg_chap = '^(<h1><span class="header-section-number">)([A-Z0-9]+)(</span>.+</h1>)(\n</div>)?$'
# default names for labels
label_names = list(fig = 'Figure ', tab = 'Table ', eq = 'Equation ')
# prefixes for theorem environments
theorem_abbr = c(
theorem = 'thm', lemma = 'lem', corollary = 'cor', proposition = 'prp', conjecture = 'cnj',
definition = 'def', example = 'exm', exercise = 'exr', hypothesis = 'hyp'
)
# numbered math environments
label_names_math = setNames(list(
'Theorem ', 'Lemma ', 'Corollary ', 'Proposition ', 'Conjecture ', 'Definition ', 'Example ', 'Exercise ',
'Hypothesis '
), theorem_abbr)
# unnumbered math environments
label_names_math2 = list(proof = 'Proof. ', remark = 'Remark. ', solution = 'Solution. ')
all_math_env = c(names(theorem_abbr), names(label_names_math2))
label_names = c(label_names, label_names_math)
# types of labels currently supported, e.g. \(#fig:foo), \(#tab:bar)
label_types = names(label_names)
reg_label_types = paste(label_types, collapse = '|')
# compatibility with bookdown <= 0.4.7: ex was the prefix for Example; now it's exm
reg_label_types = paste(reg_label_types, 'ex', sep = '|')
# parse figure/table labels, and number them either by section numbers (Figure
# 1.1, 1.2, ..., 2.1, ...), or globally (Figure 1, 2, ...)
parse_fig_labels = function(content, global = FALSE) {
lines = grep(reg_chap, content)
chaps = gsub(reg_chap, '\\2', content[lines]) # chapter numbers
if (length(chaps) == 0) {
global = TRUE # no chapter titles or no numbered chapters
} else {
chaps = c('0', chaps) # use Chapter 0 in case of any figure before Chapter 1
lines = c(0, lines)
}
arry = character() # an array of the form c(label = number, ...)
if (global) chaps = '0' # Chapter 0 (could be an arbitrary number)
content = restore_math_labels(content)
# look for (#fig:label) or (#tab:label) and replace them with Figure/Table x.x
m = gregexpr(sprintf('\\(#((%s):[-/[:alnum:]]+)\\)', reg_label_types), content)
labs = regmatches(content, m)
cntr = new_counters(label_types, chaps) # chapter counters
figs = grep('^<div class="figure', content)
eqns = grep('<span class="math display">', content)
for (i in seq_along(labs)) {
if (length(lab <- labs[[i]]) == 0) next
j = if (global) chaps else tail(chaps[lines <= i], 1)
if (length(j) == 0) j = chaps[1] # use Chapter 0
lab = gsub('^\\(#|\\)$', '', lab)
type = gsub('^([^:]+):.*', '\\1', lab)
# there could be multiple labels on the same line, but their types must be
# the same (https://github.com/rstudio/bookdown/issues/538)
if (length(unique(type)) != 1) stop(
'There are multiple types of labels on one line: ', paste(labs, collapse = ', ')
)
type = type[1]
num = arry[lab]
for (k in which(is.na(num))) {
num[k] = cntr$inc(type, j) # increment number only if the label has not been used
if (!global) num[k] = paste0(j, '.', num[k]) # Figure X.x
}
arry = c(arry, setNames(num, lab))
switch(type, fig = {
if (length(grep('^<p class="caption', content[i - 0:1])) == 0) {
# remove these labels, because there must be a caption on this or
# previous line (possible negative case: the label appears in the alt
# text of <img>)
labs[[i]] = character(length(lab))
next
}
labs[[i]] = label_prefix(type, sep = ': ')(num)
k = max(figs[figs <= i])
content[k] = paste(c(content[k], sprintf('<span style="display:block;" id="%s"></span>', lab)), collapse = '')
}, tab = {
if (length(grep('^\\s*<caption', content[i - 0:1])) == 0) next
labs[[i]] = sprintf(
'<span id="%s">%s</span>', lab, label_prefix(type, sep = ': ')(num)
)
}, eq = {
labs[[i]] = sprintf('\\tag{%s}', num)
k = max(eqns[eqns <= i])
content[k] = sub(
'(<span class="math display")', sprintf('\\1 id="%s"', lab), content[k]
)
}, {
labs[[i]] = label_prefix(type, sep = ' ')(num)
})
}
regmatches(content, m) = labs
write_ref_keys(names(arry))
# remove labels in figure alt text (it will contain \ like (\#fig:label))
content = gsub('"\\(\\\\#(fig:[-/[:alnum:]]+)\\)', '"', content)
list(content = content, ref_table = arry)
}
# given a label, e.g. fig:foo, figure out the appropriate prefix
label_prefix = function(type, dict = label_names, sep = '') {
label = i18n('label', type, dict)
supported_type = c('fig', 'tab', 'eq')
if (is.function(label)) {
if (type %in% supported_type) return(label)
msg = knitr::combine_words(supported_type, before = "'")
stop("Using a label function is only supported for elements of types ", msg)
}
function(num = NULL) {
if (is.null(num)) return(label)
paste0(label, num, sep)
}
}
ui_names = list(edit = 'Edit', chapter_name = '', appendix_name = '')
ui_language = function(key, dict = ui_names) i18n('ui', key, ui_names)
i18n = function(group, key, dict = list()) {
labels = load_config()[['language']][[group]]
if (is.null(labels[[key]])) dict[[key]] else labels[[key]]
}
sec_num = '^<h[1-6]><span class="header-section-number">([.A-Z0-9]+)</span>.+</h[1-6]>(\n</div>)?$'
# parse section numbers and labels (id's)
parse_section_labels = function(content) {
arry = character()
sec_ids = '^<div id="([^"]+)" class="section .+$'
for (i in grep(sec_num, content)) {
if (!grepl(sec_ids, content[i - 1])) next # no section id
# extract section number and id, store in the array
arry = c(arry, setNames(
sub(sec_num, '\\1', content[i]),
sub(sec_ids, '\\1', content[i - 1])
))
}
write_ref_keys(names(arry))
arry
}
reg_ref_links = '(\\(ref:[-/[:alnum:]]+\\))'
# parse "reference links" of the form "(ref:foo) text", and replace (ref:foo) in
# the content with `text`; this is for figure/table captions that are
# complicated in the sense that they contain special LaTeX/HTML characters (e.g.
# _), or special Markdown syntax (e.g. citations); we can just use (ref:foo) in
# the captions, and write the actual captions elsewhere using (ref:foo) text
resolve_ref_links_html = function(x) {
res = parse_ref_links(x, '^<p>%s (.+)</p>$')
if (is.null(res)) return(x)
restore_ref_links(res$content, '(?<!code>)%s', res$tags, res$txts, TRUE)
}
parse_ref_links = function(x, regexp) {
r = sprintf(regexp, reg_ref_links)
if (length(i <- grep(r, x)) == 0) return()
tags = gsub(r, '\\1', x[i])
txts = gsub(r, '\\2', x[i])
if (any(k <- duplicated(tags))) {
warning('Possibly duplicated text reference labels: ', paste(tags[k], collapse = ', '))
k = !k
tags = tags[k]
txts = txts[k]
i = i[k]
}
x[i] = ''
list(content = x, tags = tags, txts = txts, matches = i)
}
restore_ref_links = function(x, regexp, tags, txts, alt = TRUE) {
r = sprintf(regexp, reg_ref_links)
m = gregexpr(r, x, perl = TRUE)
tagm = regmatches(x, m)
for (i in seq_along(tagm)) {
tag = tagm[[i]]
if (length(tag) == 0) next
k = match(tag, tags)
tag[!is.na(k)] = txts[na.omit(k)]
if (alt && is_img_line(x[i])) tag = strip_html(tag)
tagm[[i]] = tag
}
regmatches(x, m) = tagm
x
}
# add automatic identifiers to those section headings without ID's
add_section_ids = function(content) {
r = '^(<div)( class="section level[1-6].+)$'
for (i in grep(r, content)) {
if (grepl('id=".+"', content[i])) next # the id exists
h = content[i + 1]
# use section number as ID if section is numbered, otherwise use the raw
# values of the heading text
if (grepl(sec_num, h)) {
id = gsub(sec_num, 'section-\\1', h)
} else {
id = as.character(charToRaw(gsub('^<h[1-6]>|</h[1-6]>$', '', h)))
id = paste(id, collapse = '')
}
content[i] = gsub(r, paste0('\\1 id="', id, '"\\2'), content[i])
}
content
}
# add identifiers to TOC
add_toc_ids = function(toc) {
# use section numbers as ID's
r = '^(<li><a)(><span class="toc-section-number">)([.A-Z0-9]+)(</span>.+</a>.*)$'
for (i in grep(r, toc)) {
toc[i] = gsub(r, '\\1 href="#section-\\3"\\2\\3\\4', toc[i])
}
# use raw vectors as ID's
r = '^(<li><a)(>)(.+)(</a>.*)$'
for (i in grep(r, toc)) {
id = as.character(charToRaw(gsub(r, '\\3', toc[i])))
id = paste(id, collapse = '')
toc[i] = gsub(r, paste0('\\1 href="#', id, '"\\2\\3\\4'), toc[i])
}
toc
}
add_chapter_prefix = function(content) {
for (type in c('chapter', 'appendix'))
content = add_chapter_prefix_one(content, type)
content
}
add_chapter_prefix_one = function(content, type = c('chapter', 'appendix')) {
config = load_config()
field = paste0(type, '_name')
chapter_name = config[[field]] %n% ui_language(field)
if (is.null(chapter_name) || identical(chapter_name, '')) return(content)
chapter_fun = if (is.character(chapter_name)) {
function(i) switch(
length(chapter_name), paste0(chapter_name, i),
paste0(chapter_name[1], i, chapter_name[2]),
stop(field, ' must be of length 1 or 2')
)
} else if (is.function(chapter_name)) chapter_name else {
stop(field, ' in _bookdown.yml must be a character string or function')
}
# chapters use Arabic numerals; appendices use A-Z
r_chap = sprintf(
'^(<h1><span class="header-section-number">)([%s]+)(</span>.+</h1>.*)$',
switch(type, chapter = '0-9', appendix = 'A-Z')
)
for (i in grep(r_chap, content)) {
h = content[i]
x1 = gsub(r_chap, '\\1', h)
x2 = gsub(r_chap, '\\2', h)
x3 = gsub(r_chap, '\\3', h)
content[i] = paste0(x1, chapter_fun(x2), x3)
}
content
}
restore_links = function(segment, full, lines, filenames) {
# if there is only one chapter in the HTML in total, no need to restore links
if (length(lines) <= 1) return(segment)
r = '<a href="#([^"]+)"'
m = gregexpr(r, segment)
regmatches(segment, m) = lapply(regmatches(segment, m), function(x) {
if (length(x) == 0) return(x)
links = gsub(r, '\\1', x)
for (i in seq_along(links)) {
a = grep(sprintf(' id="%s"', links[i]), full, fixed = TRUE)
if (length(a) == 0) next
a = a[1]
x[i] = sprintf(
'<a href="%s#%s"', filenames[which.max(lines[lines <= a])], links[i]
)
}
x
})
segment
}
restore_part_html = function(x, remove = TRUE) {
i = grep('^<h1>\\(PART\\*?\\) .+</h1>$', x)
if (length(i) == 0) return(x)
i = i[grep('^<div .*class=".*unnumbered.*">$', x[i - 1])]
if (remove) {
x[i] = x[i - 1] = x[i + 1] = ''
} else {
k = grepl('^<h1>\\(PART\\*\\) .+</h1>$', x[i]) # unnumbered parts
i1 = i[k]; i2 = i[!k]
x[i1] = gsub('<h1>\\(PART\\*\\)', '<h1>', x[i1])
if (length(i2)) x[i2] = mapply(
gsub, '<h1>\\(PART\\)', x = x[i2],
sprintf('<h1><span class="header-section-number">%s</span>', as.roman(seq_along(i2)))
)
}
r = '^<li><a href="[^"]*"[^>]*>\\(PART\\*\\) (.+)</a>(.+)$'
i = grep(r, x)
x[i] = gsub(r, '<li class="part"><span><b>\\1</b></span>\\2', x[i])
r = '^<li><a href="[^"]*"[^>]*>\\(PART\\) (.+)</a>(.+)$'
i = grep(r, x)
if (length(i) == 0) return(x)
x[i] = mapply(
gsub, r, x = x[i],
sprintf('<li class="part"><span><b>%s \\1</b></span>\\2', as.roman(seq_along(i)))
)
x
}
# process the appendix "chapter" in the body, and change the numbering style in
# the appendices (also change in TOC), e.g. A.1, A.2, B.1, ...
restore_appendix_html = function(x, remove = TRUE) {
r = '^(<h1>)\\(APPENDIX\\) (.+)(</h1>)$'
i = find_appendix_line(r, x)
if (length(i) == 0) return(x)
# keep or remove the appendix heading in body
if (remove) {
x[i] = x[i - 1] = x[i + 1] = ''
} else x[i] = gsub(r, '\\1\\2\\3', x[i])
x = number_appendix(x, i + 1, length(x), 'header')
r = '^<li><a href="[^"]*"[^>]*>\\(APPENDIX\\) (.+)</a>(.+)$'
i = find_appendix_line(r, x)
if (length(i) == 0) return(x)
# remove link on (APPENDIX) in the TOC item
x[i] = gsub(r, '<li class="appendix"><span><b>\\1</b></span>\\2', x[i])
x = number_appendix(x, i + 1, next_nearest(i, which(x == '</div>')), 'toc')
x
}
# parse reference items so we can move them back to the chapter where they were used
parse_references = function(x) {
i = grep('^<div id="refs" class="references[^"]*"[^>]*>$', x)
if (length(i) != 1) return(list(refs = character(), html = x))
r = '^(<div) id="(ref-[^"]+)"([^>]*>)$'
k = grep(r, x)
k = k[k > i]
n = length(k)
if (n == 0) return(list(refs = character(), html = x))
ids = gsub(r, '\\2', x[k])
ref = x[k + 1]
# replace 3 em-dashes with author names
dashes = paste0('^<p>', intToUtf8(rep(8212, 3)), '[.]')
for (j in grep(dashes, ref)) {
ref[j] = sub(dashes, sub('^([^.]+[.])( .+)$', '\\1', ref[j - 1]), ref[j])
}
ref = paste(x[k], ref, x[k + 2], sep = '\n') # add <div id=ref-...></div>
title = if (grepl('^<h1[^>]*>', x[i - 2]) && grepl('^<div ', x[i - 3]))
gsub('<span class="header-section-number">[.0-9]+</span>', '', x[i - 2])
x[k] = gsub(r, "\\1\\3", x[k]) # remove the div id's
list(refs = setNames(ref, ids), html = x, title = title, div = x[i])
}
# move references back to the relevant chapter
relocate_references = function(x, refs, title, ids, div) {
if (length(refs) == 0) return(x)
ids = intersect(names(refs), ids)
if (length(ids) == 0) return(x)
title = if (is.null(title)) '<h3>References</h3>' else gsub('h1>', 'h3>', title)
if (is.null(div)) div = '<div id="refs" class="references">'
c(x, title, div , refs[ids], '</div>')
}
# extract relative links from text
parse_a_targets = function(x) {
r = '<a href="#([^"]+)"'
unlist(lapply(regmatches(x, gregexpr(r, x)), function(target) {
if (length(target) == 0) return()
gsub(r, '\\1', target)
}), use.names = FALSE)
}
# parse footnotes in the div of class "footnotes"; each footnote is one <li>
# with id fnX and a link back to the text
parse_footnotes = function(x) {
i = grep('<div class="footnotes[^"]*">', x)
if (length(i) == 0) return(list(items = character(), range = integer()))
j = which(x == '</div>')
j = min(j[j > i])
n = length(x)
r = '<li id="fn([0-9]+)"><p>(?s).+?<a href="#fnref\\1"[^>]*?>[^<]+</a></p></li>'
s = paste(x[i:n], collapse = '\n')
items = unlist(regmatches(s, gregexpr(r, s, perl = TRUE)))
list(items = setNames(items, gsub(r, 'fn\\1', items, perl = TRUE)), range = i:j)
}
# move footnotes to the relevant page
relocate_footnotes = function(x, notes, ids) {
if (length(notes) == 0) return(x)
ids = intersect(ids, names(notes))
if (length(ids) == 0) return(x)
c(
x, '<div class="footnotes">', '<hr />',
sprintf('<ol start="%s">', gsub('^fn', '', ids[1])), notes[ids],
'</ol>', '</div>'
)
}
number_appendix = function(x, i1, i2, type = c('toc', 'header')) {
r = sprintf(
'^(<%s>.*<span class="%s-section-number">)([.0-9]+)(</span>.+)',
if (type == 'toc') 'li' else 'h[1-6]', type
)
i = grep(r, x)
i = i[i >= i1 & i <= i2]
if (length(i) == 0) return(x)
s1 = gsub(r, '\\1', x[i])
s2 = gsub(r, '\\2', x[i]) # section numbers
s3 = gsub(r, '\\3', x[i])
s = strsplit(s2, ".", fixed = TRUE)
s = lapply(s, as.integer)
top = vapply(s, length, integer(1)) == 1
app_num = findInterval(seq_along(s), which(top))
# normalize chapter numbers to appendix numbers
for (j in seq_along(s)) s[[j]][1] = app_num[j]
counter_fun = function(nums) {
if (nums[1] > length(LETTERS))
stop('Too many chapters in the appendix (more than 26)')
paste0(c(LETTERS[nums[1]], nums[-1]), collapse = ".")
}
counters = vapply(s, counter_fun, character(1))
x[i] = paste0(s1, counters, s3)
x
}
# detect and move files to the output directory (if specified)
move_files_html = function(output, lib_dir) {
if (is.null(o <- opts$get('output_dir'))) return()
x = read_utf8(output)
# detect local resources used in HTML
r = '[- ](src|href)\\s*=\\s*"([^"]+)"'
m = gregexpr(r, x)
f = unlist(lapply(regmatches(x, m), function(z) {
if (length(z) == 0) z else gsub(r, '\\2', z)
}))
f = c(f, parse_cover_image(x))
f = vapply(f, function(x) {
if (grepl('^data:', x)) x else URLdecode(x)
}, FUN.VALUE = character(1))
Encoding(f) = 'UTF-8'
f = local_resources(unique(f[file.exists(f)]))
# detect resources in CSS
css = lapply(grep('[.]css$', f, ignore.case = TRUE, value = TRUE), function(z) {
d = dirname(z)
z = read_utf8(z)
r = 'url\\((?:"|\')?([^")\']+)(?:"|\')?\\)'
lapply(regmatches(z, gregexpr(r, z)), function(s) {
s = local_resources(gsub(r, '\\1', s))
file.path(d, s)
})
})
f = c(f, unlist(css))
f = gsub('[?#].+$', '', f) # strip the #/? part in links, e.g. a.html#foo
f = gsub('^[.]/', '', f) # strip the initial ./, e.g. ./foo.png -> foo.png
f = f[f != '']
f = f[xfun::is_rel_path(f)]
if (getOption('bookdown.js.debug', FALSE)) f = c(f, js_min_sources(f))
# add leaflet images if any used
f = c(f, grep('png$', list.files(
list.files(lib_dir, '^leaflet', full.names = TRUE),
full.names = TRUE, recursive = TRUE
), value = TRUE))
f = unique(f[xfun::file_exists(f)])
lapply(file.path(o, setdiff(dirname(f), '.')), dir_create)
f2 = file.path(o, f)
i = !same_path(f, f2)
if (any(i)) file.copy(f[i], f2[i], overwrite = TRUE)
# should not need the lib dir any more
if (length(lib_dir) == 1 && is.character(lib_dir))
unlink(lib_dir, recursive = TRUE)
}
# parse the cover image from HTML meta
parse_cover_image = function(x) {
r = '^\\s*<meta property="og:url" content="([^"]+)" />\\s*$'
i = grep(r, x)
if (length(i) == 0) return()
u = gsub(r, '\\1', x[i[1]]) # URL
r = '^\\s*<meta property="og:image" content="([^"]+)" />\\s*$'
i = grep(r, x)
if (length(i) == 0) return()
m = gsub(r, '\\1', x[i[1]]) # cover image
m = gsub(u, '', m, fixed = TRUE)
m
}
# source js files and map files
js_min_sources = function(x) {
r = '[.]min[.]js$'
x = grep(r, x, value = TRUE)
c(gsub(r, '.js', x), gsub(r, '.min.map', x))
}
# only parse equation labels (\#eq:label) in math environments; this needs
# special treatment because the backslash \ before # is preserved in equation
# environments in HTML output, whereas it is removed in normal paragraphs
restore_math_labels = function(x) {
i1 = grep('^(<[a-z]+>)?<span class="math display">\\\\\\[', x)
i2 = grep('\\\\\\]</span>(</[a-z]+>)?$', x)
n1 = length(i1); n2 = length(i2)
if (n1 * n2 == 0) return(x)
i2 = next_nearest(i1, i2, TRUE)
if (any(is.na(i2))) {
# retry without assuming the closing \]</span> is only followed by an optional </tag>
i2 = grep('\\\\\\]</span>(</[a-z]+>)?', x)
i2 = next_nearest(i1, i2, TRUE)
if (any(is.na(i2))) {
warning(
"There seems to be problems with math expressions of the display style. ",
"Labels of these expressions will not be processed."
)
return(x)
}
}
i = unlist(mapply(seq, i1, i2, SIMPLIFY = FALSE))
# remove \ before #
x[i] = gsub('\\(\\\\(#eq:[-/[:alnum:]]+)\\)', '(\\1)', x[i])
x
}
# extract a chapter title from the body, and prepend it to the page <title>
prepend_chapter_title = function(head, body) {
r1 = '(.*?<title>)(.+?)(</title>.*)'
if (length(i <- grep(r1, head)) == 0) return(head)
body = paste(body, collapse = ' ')
r2 = '.*?<h[0-6][^>]*>(.+?)</h[0-6]>.*'
if (!grepl(r2, body)) return(head)
title = strip_html(gsub(r2, '\\1', body))
if (knitr:::is_blank(title)) return(head)
x1 = gsub(r1, '\\1', head[i])
x2 = gsub(r1, '\\2', head[i])
x3 = gsub(r1, '\\3', head[i])
if (title == x2) return(head)
head[i] = paste0(x1, title, ' | ', x2, x3)
# update possible OpenGraph tags <meta property="og:title" content="...">
gsub(
paste0(' content="', x2), paste0(' content="', title, ' | ', x2),
head, fixed = TRUE
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.