Nothing
#' Resolve Reference-Style Links
#'
#' @description
#'
#' [Reference style links and
#' images](https://www.markdownguide.org/basic-syntax/#reference-style-links)
#' are a form of markdown syntax that reduces duplication and makes markdown
#' more readable. They come in two parts:
#'
#' 1. The inline part that uses two pairs of square brackets where the second
#' pair of square brackets contains the reference for the anchor part of the
#' link. Example:
#' ```
#' [inline text describing link][link-reference]
#' ```
#' 2. The anchor part, which can be anywhere in the document, contains a pair
#' of square brackets followed by a colon and space with the link and
#' optionally the link title. Example:
#' ```
#' [link-reference]: https://docs.ropensci.org/tinkr/ 'documentation for tinkr'
#' ```
#'
#' Commonmark treats reference-style links as regular links, which can be a
#' pain when converting large documents. This function resolves these
#' links by reading in the source document, finding the reference-style links,
#' and adding them back at the end of the document with the 'anchor' attribute
#' and appending the reference to the link with the 'ref' attribute.
#'
#' @details
#'
#' ## Nomenclature
#'
#' The reference-style link contains two parts, but they don't have common names
#' (the [markdown guide](https://www.markdownguide.org/basic-syntax/) calls
#' these "first part and second part"), so in this documentation, we call the
#' link pattern of `[link text][link-ref]` as the "inline reference-style link"
#' and the pattern of `[link-ref]: <URL>` as the "anchor references-style link".
#'
#' ## Reference-style links in commonmark's XML representation
#'
#' A link or image in XML is represented by a node with the following attributes
#'
#' - destination: the URL for the link
#' - title: an optional title for the link
#'
#' For example, this markdown link `[link text](https://example.com "example
#' link")` is represented in XML as text inside of a link node:
#'
#' ```{r}
#' lnk <- "[link text](https://example.com 'example link')"
#' xml <- xml2::read_xml(commonmark::markdown_xml(lnk))
#' cat(as.character(xml2::xml_find_first(xml, ".//d1:link")))
#' ```
#'
#' However, reference-style links are rendered equivalently:
#'
#' ```{r}
#' lnk <- "
#' [link text][link-ref]
#'
#' [link-ref]: https://example.com 'example link'
#' "
#' xml <- xml2::read_xml(commonmark::markdown_xml(lnk))
#' cat(as.character(xml2::xml_find_first(xml, ".//d1:link")))
#' ```
#'
#' ## XML attributes of reference-style links
#'
#' To preserve the anchor reference-style links, we search the source document
#' for the destination attribute proceded by `]: `, transform that information
#' into a new link node with the `anchor` attribute, and add it to the end of
#' the document. That node looks like this:
#'
#' ```{r, echo = FALSE, comment = NA}
#' lnk <- "[link-ref]: https://example.com 'example link'"
#' al <- withr::with_namespace("tinkr", build_anchor_links(lnk))
#' cat(as.character(xml2::xml_find_first(al, ".//link")))
#' ```
#'
#' From there, we add the anchor text to the node that is present in our
#' document as the `ref` attribute:
#'
#' ```{r, echo = FALSE, comment = NA}
#' lnk <- "
#' [link text][link-ref]
#'
#' [link-ref]: https://example.com 'example link'
#' "
#' xml <- xml2::read_xml(commonmark::markdown_xml(lnk))
#' lnk <- xml2::xml_find_first(xml, ".//d1:link")
#' xml2::xml_set_attr(lnk, "rel", "link-ref")
#' cat(as.character(lnk))
#' ```
#'
#' @note this function is internally used in the function [to_xml()].
#' @param body an XML body
#' @param txt the text of a source file
#' @param ns an the namespace that resolves the Markdown namespace (defaults to
#' [md_ns()])
#' @keywords internal
#' @examples
#' f <- system.file("extdata", "link-test.md", package = "tinkr")
#' md <- yarn$new(f, sourcepos = TRUE, anchor_links = FALSE)
#' md$show()
#' if (requireNamespace("withr")) {
#' lnks <- withr::with_namespace("tinkr",
#' resolve_anchor_links(md$body, readLines(md$path)))
#' md$body <- lnks
#' md$show()
#' }
resolve_anchor_links <- function(body, txt, ns = md_ns()) {
# copy the body so that we can recover from errors
body <- copy_xml(body)
# find all links and images (since either one could have an anchor link)
links <- xml2::xml_find_all(body, ".//md:link | .//md:image", ns)
if (length(links) == 0) {
return(invisible(body))
}
# Search for the pattern that resolves to `]: <LINK>( <TITLE>)` and return the
# line number the link was found on
dests <- xml2::xml_attr(links, "destination")
titles <- xml2::xml_attr(links, "title")
targets <- paste0(clean_targets(dests), "\\s?['\"]?", clean_targets(titles))
rel <- paste0("\\]:\\s+?", targets, "['\"]?\\s*$")
pos <- purrr::map_int(rel, find_anchor_link, txt)
if (sum(pos) == 0) {
return(invisible(body))
}
# extract all of matches from the document
anchors <- txt[pos]
# set the attributes of the links that have anchors
xml2::xml_set_attr(links[pos != 0], "rel", al_name(anchors))
# add the anchors at the end of the document
add_anchor_links(body, unique(anchors))
}
# Lifted from Hmisc::escapeRegex in Hmisc 4.5.0
clean_targets <- function(targets) {
gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\\1", targets)
}
find_anchor_link <- function(target, txt) {
position <- grep(target, txt)
if (length(position) == 0) {
return(0L)
}
max(position) # examples in markdown will create duplicates
}
add_anchor_links <- function(body, links) {
new <- build_anchor_links(links)
add_nodes_to_body(body, new, where = length(xml2::xml_children(body)))
copy_xml(body)
}
# Make an anchor link node from a text string
# build_anchor_links("[CAT!]: https://placekitten.com/200/200 'cute kitty'")
build_anchor_links <- function(link) {
txt <- glue::glue("<text>{al_name(link)}</text>")
attrs <- glue::glue(
"destination='{al_dest(link)}' title='{al_title(link)}' anchor='true'"
)
# wrap the nodes in a paragraph to make sure they don't get screwed up by
# any footer text
make_text_nodes(c(
"<paragraph>",
glue::glue("<link {attrs}>{txt}</link>"),
"</paragraph>"
))
}
# Helpers for parsing anchor links:
#
# [name]: dest 'title'
escape_ampersand <- function(amp) {
# escape ampersands that are not valid code points, though this will still
# allow invalid code points, but it's better than nothing
gsub("[&](?![#]?[A-Za-z0-9]+?[;])", "&", amp, perl = TRUE)
}
al_name <- function(link) {
res <- sub("^[[\\[](.+?)[\\]]:\\s.+?$", "\\1", link, perl = TRUE)
escape_ampersand(res)
}
al_dest <- function(link) {
res <- sub("^[\\[].+?[\\]]:\\s([^\\s]+?)(\\s['\"]?.*?)?$", "\\1", link, perl = TRUE)
escape_ampersand(res)
}
al_title <- function(link) {
# try to find titles, but if they don't exist, they will match exactly with
# the original string, so we need to censor them.
titles <- sub("^[\\[].+?[\\]]:\\s[^\\s]+?(\\s['\"](.*?)['\"])$", "\\2",
link, perl = TRUE)
titles[titles == link] <- ""
escape_ampersand(titles)
}
#nocov start
# Get the position of an element
get_pos <- function(x, e = 1) {
as.integer(
gsub(
"^(\\d+?):(\\d+?)[-](\\d+?):(\\d+?)$",
glue::glue("\\{e}"),
xml2::xml_attr(x, "sourcepos")
)
)
}
# helpers for get_pos
get_linestart <- function(x) get_pos(x, e = 1)
get_colstart <- function(x) get_pos(x, e = 2)
get_lineend <- function(x) get_pos(x, e = 3)
get_colend <- function(x) get_pos(x, e = 4)
#nocov end
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.