R/contrib_version.R

#' Versionning revisions from a given contributor
#'
#' @param corpus A corpus built with \code{contrib_content}, with param \code{get_parent = TRUE}.
#' @param meta A data-frame built with \code{contrib_revisions}
#' @param parallel A logical indicating whether the foreach loop should go parallel or not. If \code{TRUE}, 
#' you need to declare a parallel backend, see \code{foreach} documentation.
#' 
#' @import foreach magrittr dplyr
#'
#' @importFrom textreuse align_local tokenize_words
#' @importFrom stringr str_split
#' @importFrom plyr ddply
#'
#' @return
#' @export
#'
contrib_version <- function(corpus, meta, parallel = TRUE) {
  
  `%op%` <- if(parallel) `%dopar%` else `%do%`
  
  cond_parent <- sapply(corpus, "[[", "parent") %>%  sapply(is.na) %>% unname
  # cond_revision <- sapply(corpus, "[[", "revision") %>%  sapply(is.na) %>% unname
  
  res <- foreach(i = seq_along(corpus),
          .combine = rbind,
          .packages = c("magrittr", "textreuse", "stringr", "dplyr", "foreach"),
          .export = c("diff", "diff_lcs", "match_seq")) %op% {
            
            diff_text(corpus[[i]]$parent, corpus[[i]]$revision) %>%
              select(mot, status) %>%
              filter(status != "=") %>%
              cbind(revision = rep(i, nrow(.)), .)
            
          }
  
  res$date <- meta$timestamp[res$revision]
  res$pageid <- meta$pageid[res$revision]
  res$title <- meta$title[res$revision]
  
  res
  
}
leojoubert/WikiSocio documentation built on May 21, 2019, 5:08 a.m.