Crawler: Crawler

Usage Examples

View source: R/Crawler.R

Usage

1

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function () 
{
    cat("Web Crawler Loading", "\n")
    library(dplyr)
    library(rvest)
    cat("What information do you want to collect?", "\n")
    key <- readline(prompt = "Input Key Words:")
    href <- paste0("https://www.google.com/search?hl=en&gl=us&tbm=nws&authuser=0&q=", 
        key)
    url <- read_html(href)
    selector_name <- ".r"
    selector_del <- ".slp"
    fheader <- html_nodes(x = url, css = selector_name) %>% html_text()
    fdeliver <- html_nodes(x = url, css = selector_del) %>% html_text()
    i <- 0
    while (i < 500) {
        selector_name <- ".r"
        fheader <- rbind(fheader, html_text(html_nodes(x = url, 
            css = selector_name)))
        fdeliver <- rbind(fdeliver, html_text(html_nodes(x = url, 
            css = selector_del)))
        if (i == 0) {
            cat("\r", centerText(paste("Crawler Lauched, Information Collecting, target:", 
                key)))
            url <- html_session(href) %>% follow_link("Next")
        }
        else {
            test <- tryCatch(follow_link(html_session(url$url), 
                "Next"), error = function(e) e)
            if (inherits(test, "error")) {
                break
            }
            else {
                url <- html_session(url$url) %>% follow_link("Next")
            }
        }
        i <- i + 1
        Sys.sleep(1)
    }
    deliver <- as.data.frame(matrix(unlist(strsplit(fdeliver, 
        " - ")), ncol = 2, byrow = TRUE))
    text <- cbind(as.character(fheader), deliver)
    names(text) <- c("Text", "Deliver", "Time")
    cat("Writing Output .......", "\n")
    write.csv(text, paste0(key, ".csv"))
    return(text)
  }

pingqingsheng/Robot0001 documentation built on May 5, 2019, 5:53 p.m.