R/contrib_content.R

#' Downloading the revisions made by an user
#'
#' @param revisions A data-frame built with \code{contrib_revisions}
#' @param clean Logical. Indicate if the content need to be cleaned with \code{clean_wikitext} function.
#' @param parallel Logical. Indicate whether have to use a parallel backend to run or not.
#' @param domain The domain where the wiki is located
#'
#' @return A character vector containing all the revisions of corresponding to metadata \code{revisions}.
#' 
#' @export
#'
#' @examples
#' \dontrun{
#' revisions <- contrib_revisions("Diti")
#' content <- contrib_content(revisions, clean = TRUE)
#' }

contrib_content <- function(revisions, get_parent = TRUE, clean = FALSE, domain = "fr", parallel = FALSE) {
  
  content <- get_content(revisions$revid, domain = domain, parallel = parallel)
  
  if(clean) {
    
    content %<>% clean_wikitext()
    
  }
  
  if(get_parent) {
    
    parent <- get_content(revisions$parentid, domain = domain, parallel = parallel)
    parent[is.na(parent)] <- "" # pour les création de pages
    
    if(clean) {
      
      parent %<>% clean_wikitext()
      
    }
    
    content <- lapply(1:length(content), function(i) {
      
      list(parent = parent[i],
           revision = content[i],
           ids = c(parent = revisions[i, "parentid"], revision = revisions[i, "revid"]))
      
    })
    
  }
  
  if(length(content) != nrow(revisions)) warning("There was some bad revisions ids in your revisions data-frame.")
  
  content
  
}
leojoubert/WikiSocio documentation built on May 21, 2019, 5:08 a.m.