#' Download the table of contents of a page.
#'
#' @param revid A numeric vector giving the id of the revision wich to extract the table of contents.
#' @param domain The domain where the wiki is located.
#' @param pander Wheter to return a data-frame
#'
#' @return See \code{pander} argument.
#' @export
#'
#' @importFrom magrittr %>%
#' @import rvest
#' @importFrom xml2 read_html
#' @importFrom stringr str_extract
#' @importFrom pander pandoc.list.return
#' @importFrom plyr alply
page_toc <- function(revid, domain= "fr", pander = TRUE) {
nodes <- paste0("https://", domain, ".wikipedia.org/w/index.php?&oldid=", revid) %>%
read_html() %>%
html_nodes(xpath = "//div[@id='mw-content-text']/h2 | //div[@id='mw-content-text']/h3 | //div[@id='mw-content-text']/h4 | //div[@id='mw-content-text']/h5 | //div[@id='mw-content-text']/h5 | //div[@id='mw-content-text']/h6")
level <- html_name(nodes) %>%
str_extract("[[:digit:]]") %>%
as.numeric() - 1
text <- html_text(nodes)
toc <- data.frame(level, text, stringsAsFactors = FALSE)
if(pander) {
alply(toc, 1, function(x) {
res <- pandoc.list.return(x["text"], indent.level = x["level"], add.end.of.list = FALSE)
}) %>%
unlist %>%
paste(collapse = "") %>%
return
} else {
return(toc)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.