#' Extract article content from online news sources.
#'
#' @name qnews_extract_article
#' @param x A vector of URLs
#' @param cores An integer value specifying n threads
#' @return A data frame
#'
#'
#' @export
#' @rdname qnews_extract_article
#'
#'
qnews_extract_article <- function(x,
cores) {
batches <- split(x, ceiling(seq_along(x)/20))
build_table <- function (url0) {
x0 <- lapply(url0, function(q) {
y0 <- get_site(q)
y1 <- annotate_site(site = y0)
y2 <- subset(y1, y1$discard == 'keep')
data.table::setDT(y2)
y2[, list(text = paste(text, collapse = " ")),
by = list(url, h1_title)]
})
data.table::rbindlist(x0)
}
clust <- parallel::makeCluster(cores)
parallel::clusterExport(cl = clust,
varlist = c('batches'),
envir = environment())
docs <- pbapply::pblapply(cl = clust,
X = batches,
FUN = build_table)
parallel::stopCluster(clust)
data.table::rbindlist(docs)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.