#' Downloading the list of contributions for one page
#'
#' @param x Either id or title of a page
#' @param domain The domain where the wiki is located
#'
#' @return A data-frame containing the username of the user (or the IP if anonymous contribution), the timestamp, the size of the revision, a boolean indicating weither the contribution is anonymous or not, and the difference beetween the contribution and the previous
#' @export
#'
#' @family page functions
#'
#' @import magrittr
#' @importFrom dplyr filter arrange
#'
#' @examples
#' # Downloading the list of contribution for the 'action' page in the french wiki
#' page_revisions('Action')
page_revisions <- function(x, domain = "fr") {
# Création de la requête
query = list(action = "query",
prop = "revisions",
rvlimit = "max",
rvprop = "sha1|timestamp|size|userid|user|ids",
redirects = "",
rvcontinue = NULL)
if(is.numeric(x)) {
query["pageids"] <- x
} else {
query["titles"] <- x
}
# Pré-allocation de l'objet résultats
result <- data.frame(matrix(ncol = 9, nrow = 0))
names(result) <- c("revid","parentid","userid","user", "timestamp", "size", "anon", "sha1")
repeat {
exec <- exec_query(query, domain = domain)
cond <- if(is.null(exec)) {
FALSE
warning("Network problem - results may be incomplete")
} else {
names(exec$query$pages[1]) %>% as.numeric() > 0
}
if (cond) {
content <- exec[["query"]][["pages"]][[1]][["revisions"]]
suppressWarnings({
userid <- sapply(content, "[[", "userid") %>% as.character %>% as.numeric %>% unlist
})
user <- sapply(content, "[[", "user") %>% as.character
revid <- sapply(content, "[[", "revid") %>% as.character %>% as.numeric %>% unlist
parentid <- sapply(content, "[[", "parentid") %>% as.character %>% as.numeric %>% unlist
timestamp <- sapply(content, "[[", "timestamp") %>% unlist %>% as.character
size <- sapply(content, "[[", "size") %>% as.character %>% as.numeric %>% unlist
sha1 <- sapply(content, "[[", "sha1") %>% as.character
anon <- sapply(content, function(x) {
!is.null(x$anon)
}) %>% as.logical
result <- data.frame(revid,parentid,userid,user, timestamp, size, anon,sha1, stringsAsFactors = FALSE) %>%
rbind(result, .)
query$rvcontinue <- exec$continue$rvcontinue %>% unname %>% unlist
if (is.null(query$rvcontinue)) {
break
}
} else {
break
}
}
# Mise en forme du temps
result$timestamp %<>% strptime("%Y-%m-%dT%H:%M:%SZ") %>%
as.POSIXct()
# Calcul des poids
if(nrow(result) > 1) {
result$weight <- c(diff(result$size) * -1, result$size[nrow(result)])
} else {
result$weight <- result$size
}
# Nettoyage des révisions consécutives par le même utilisateur
result %<>% arrange(timestamp)
clean <- sapply(2:nrow(result), function(i) {
result$user[i-1] == result$user[i]
}) %>%
c(result$user[1] == result$user[2], .)
result[clean == FALSE, ]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.