1 | Crawler()
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | ##---- Should be DIRECTLY executable !! ----
##-- ==> Define data, use random,
##-- or do help(data=index) for the standard data sets.
## The function is currently defined as
function ()
{
cat("Web Crawler Loading", "\n")
library(dplyr)
library(rvest)
cat("What information do you want to collect?", "\n")
key <- readline(prompt = "Input Key Words:")
href <- paste0("https://www.google.com/search?hl=en&gl=us&tbm=nws&authuser=0&q=",
key)
url <- read_html(href)
selector_name <- ".r"
selector_del <- ".slp"
fheader <- html_nodes(x = url, css = selector_name) %>% html_text()
fdeliver <- html_nodes(x = url, css = selector_del) %>% html_text()
i <- 0
while (i < 500) {
selector_name <- ".r"
fheader <- rbind(fheader, html_text(html_nodes(x = url,
css = selector_name)))
fdeliver <- rbind(fdeliver, html_text(html_nodes(x = url,
css = selector_del)))
if (i == 0) {
cat("\r", centerText(paste("Crawler Lauched, Information Collecting, target:",
key)))
url <- html_session(href) %>% follow_link("Next")
}
else {
test <- tryCatch(follow_link(html_session(url$url),
"Next"), error = function(e) e)
if (inherits(test, "error")) {
break
}
else {
url <- html_session(url$url) %>% follow_link("Next")
}
}
i <- i + 1
Sys.sleep(1)
}
deliver <- as.data.frame(matrix(unlist(strsplit(fdeliver,
" - ")), ncol = 2, byrow = TRUE))
text <- cbind(as.character(fheader), deliver)
names(text) <- c("Text", "Deliver", "Time")
cat("Writing Output .......", "\n")
write.csv(text, paste0(key, ".csv"))
return(text)
}
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.