#' Versionning an article
#'
#' @param corpus A character vector built with \code{page_content}
#' @param meta A list of revisions metadata which each item had been built with \code{page_revisions}
#' @param parallel A logical indicating whether the foreach loop should go parallel or not. If \code{TRUE},
#' you need to declare a parallel backend, see \code{foreach} documentation.
#'
#' @return A four-columns data-frame giving for each word an unique id, the word itself,
#' the revision number in which it appear, and the revision number in which it disappear.
#' For the last one, a missing value indicate that the word is still present into the page at the moment of the extraction.
#'
#' @export
#'
#' @import foreach magrittr
#'
#' @importFrom textreuse align_local tokenize_words
#' @importFrom stringr str_split
#' @importFrom plyr ddply
#' @importFrom dplyr filter
#' @importFrom tidyr spread
#'
page_version <-function (corpus, meta, parallel = TRUE)
{
`%op%` <- ifelse(parallel, `%dopar%`, `%do%`)
tbl_version <- foreach(i = seq_along(corpus)[-1], .combine = rbind,
.packages = c("magrittr", "textreuse", "stringr", "dplyr",
"foreach"), .export = c("diff", "diff_lcs", "match_seq")) %op%
{
res <- diff_text(corpus[i - 1], corpus[i]) %>%
cbind(rep(i, nrow(.)), .)
names(res)[1] <- "step"
res[, c("step", "a", "b", "mot", "status")]
}
mot <- tokenize_words(corpus[1])
tbl_version <- rbind(data.frame(step = 1, a = NA, b = 1:length(mot),
mot = mot, status = "+", stringsAsFactors = FALSE), tbl_version)
tbl_version %<>% set_id()
tbl_version %<>% filter(status != "=") %>% select(id, mot,
step, status) %>% spread(status, step)
if(!"-" %in% names(tbl_version)) {
tbl_version[, "-"] <- NA
}
names(tbl_version) <- c("id", "mot", "stop", "begin") # Reprendre page_version
meta %<>% arrange(timestamp)
tbl_version$user.add <- meta$user[tbl_version$begin]
tbl_version$user.delete <- meta$user[tbl_version$stop]
tbl_version$date.add <- meta$timestamp[tbl_version$begin]
tbl_version$date.delete <- meta$timestamp[tbl_version$stop]
tbl_version
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.