R/page_history.R

#' Getting the history of a page 
#'
#' @param x Either a revision data-frame built with \code{page_revisions}, or an id or a title of a page.
#' @param wikiAdress The domain of the wiki (without the /wiki folder)
#' @param threesoldImportant The threesold wich indicate the \code{important} variable will be built considering the \code{threesoldImportant \%} principals contributors.
#'
#' @return A data-frame with "revisions", "contrib", "anon", "important" and "time" columns.
#' @export
#'
#' @examples
#' \donttest{
#'  h <- page_history("Action") # downloading the history of the "Action" age of french Wikipedia
#' }
page_history <- function(x,wikiAdress = "https://fr.wikipedia.org/",threesoldImportant = 10) {
  
  if(!is.data.frame(x)) {
    revision <- page_revisions(x)
  } else {
    revision <- x
  }
  
  length <- length(revision[, 1]) - 1
  
  table <- list()
  
  print("History extraction...")
  pb <- txtProgressBar(style = 3)
  
  for(i in 1:length) {
    
    url <- paste(wikiAdress,"w/index.php?oldid=",revision[1,"revid"],sep="")
    row <- data_revisionsPart(revision,url,threesoldImportant)
    row$revid <- revision[1,"revid"]
    row$time <- revision[1,"timestamp"]
    revision <- revision[-1, ]
    
    table[[i]] <- row
    
    setTxtProgressBar(pb,i/length)
    
  }
  return(table)
}
leojoubert/WikiSocio documentation built on May 21, 2019, 5:08 a.m.