R/get_content.R

#' @import foreach

get_content <- function(id, parallel, domain) {
  
  creation.page <- which(id == 0) %>% as.numeric
  
  `%op%` <- if(parallel) `%dopar%` else `%do%`
  
  ids <- divide_list(id)
  
  query <- list(
    action = "query",
    prop = "revisions",
    rvprop = "ids|content")
  
  res <- foreach(i = seq_along(ids),
                     .combine = "c",
                     .export = c("exec_query")) %op% {
                       
                       query["revids"] <- ids[[i]]
                       
                       exec <- exec_query(query, domain = domain)$query$pages %>%
                         lapply("[[", "revisions")
                       
                       id_res <- lapply(exec, lapply, "[[", "revid") %>% unlist %>% unname
                       cont <- lapply(exec, lapply, "[[", "*") %>% unlist %>% unname
                  
                      
                       pointer <- match(id, id_res)
                       
                       cont <- cont[pointer[!is.na(pointer)]]
                       
                       # Ajout de tout les creation.page dans la fourchette
                       creation.page.incluse <- creation.page[creation.page > (1 + 50*(i-1)) & creation.page < 50*i]
                       
                       for(i in creation.page.incluse) {
                         
                         cont <- append(cont, NA, after = i - 1)
                         
                       }
                       
                       cont
                       
                     }

  res
  
}
cafeine05/WikiSocio documentation built on May 13, 2019, 10:39 a.m.