R/contrib_revisions.R

#' Getting the list of the contribution of a contributor.
#'
#' @param x The name of the contributor
#' @param namespace The namespace of contributions
#' @param domain The domain of the wiki
#' @param merge_consecutive A boolean. If \code{TRUE}, consecutive revisions mde by the same contributor are merged.
#'
#' @import magrittr
#' @import plyr
#'
#' @return Depending the value of page, weither a character vector containing the names of all contributors, or a data-frame containing all the revisions with the name of the contributor, a timestamp and a weight
#' @export
#'
#'
#' @examples
#'
#' # All the contribution of an user of the french wiki.
#' contrib_revisions('cafeine05')
#'
#' # Return a character vector with all the page modified by this contributor.
#' contrib_revisions('cafeine05')
contrib_revisions <- function(x, namespace = "*", domain = "fr", merge_consecutive = TRUE) {

  result <- matrix(ncol = 8, nrow = 0) %>%
    as.data.frame.matrix

  if(is.character(x) & length(x) != 1) stop("x must be a string")

  query <- list(action = "query",
                list = "usercontribs",
                ucnamespace = namespace,
                ucprop = "ids|title|timestamp|sizediff",
                uclimit = "max",
                ucuser = x,
                uccontinue = NULL)

  repeat {

    exec <- exec_query(query, domain = domain)

    cond <- if(is.null(exec)) {
      warning("Network problem - results may be incomplete")
      FALSE
    } else {
      length(exec$query$usercontribs) > 0
    }

    if(cond) {

      dat <-
        ldply(exec$query$usercontribs, function(x) {

          res <- vector(length = 8)
          key <- c("user", "pageid", "revid", "parentid", "ns", "title", "timestamp", "sizediff")
          cond <- key %in% names(x)

          res[cond] <- x[key[cond]]
          res[!cond] <- NA

          res %>%
            unlist %>%
            matrix(ncol = 8)

        })

      result %<>% rbind(dat)

    }

    query["uccontinue"] <- exec$continue$uccontinue

    if (is.null(query$uccontinue)) {
      break
    }
  }

  names(result) <- c("user", "pageid", "revid", "parentid", "ns", "title", "timestamp", "sizediff")
  result$sizediff %<>% as.character %>% as.numeric
  result$ns %<>% as.character %>% as.numeric
  result$pageid %<>% as.character %>% as.numeric
  result$revid %<>% as.character %>% as.numeric
  result$parentid %<>% as.character %>% as.numeric
  result$title %<>% as.character
  result$user %<>% as.character
  result$timestamp %<>% strptime("%Y-%m-%dT%H:%M:%SZ") %>%
    as.POSIXct()
  result %<>% arrange(timestamp)
  
  if(merge_consecutive == TRUE) {
    
    result <- ddply(result, .(title), function(x) {
      x$test <- cumsum(!x$parentid %in% x$revid)
      ddply(x, .(test), function(y) {
        data.frame(user = y$user[1],
                   pageid = y$pageid[1],
                   revid = y$revid[nrow(y)],
                   parentid = y$parentid[1],
                   ns = y$ns[1],
                   title = y$title[1],
                   timestamp = y$timestamp[nrow(y)],
                   sizediff = sum(y$sizediff))
      })
    })
    
    result$test <- NULL
    
  }
  
  result$sizediff %<>% as.character %>% as.numeric
  result$ns %<>% as.character %>% as.numeric
  result$pageid %<>% as.character %>% as.numeric
  result$revid %<>% as.character %>% as.numeric
  result$parentid %<>% as.character %>% as.numeric
  result$title %<>% as.character
  result$user %<>% as.character
  result %<>% arrange(timestamp)
  
  result
  
}
cafeine05/WikiSocio documentation built on May 13, 2019, 10:39 a.m.