knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", out.width = "100%" )
r badger::badge_cran_release("scrappy", "black")
r badger::badge_devel("villegar/scrappy", "yellow")
r badger::badge_github_actions("villegar/scrappy")
The goal of scrappy is to provide simple functions to scrape data from different websites for academic purposes.
You can install the released version of scrappy from CRAN with:
install.packages("scrappy")
And the development version from GitHub with:
# install.packages("devtools") devtools::install_github("villegar/scrappy")
NOTE: To run the following examples on your computer, you need to download
and install Mozilla Firefox (https://www.mozilla.org/en-GB/firefox/new/). Alternatively,
you can replace the value of browser
in the call to RSelenium::rsDriver
.
# Create RSelenium session rD <- RSelenium::rsDriver(browser = "firefox", port = 4549L, verbose = FALSE) # Call scrappy out_newa <- scrappy::newa_nrcc( client = rD$client, year = 2020, month = 12, # December station = "gbe", # Geneva (Bejo) station save_file = FALSE ) # Don't save output out_gmaps <- scrappy::google_maps( client = rD$client, name = "Sefton Park", max_reviews = 20 ) out_gpp <- scrappy::find_a_gp(rD$client, postcode = "L69 3GL") # Stop server conn <- rD$server$stop()
The Network for Environment and Weather Applications at Cornell University. Website: http://newa.cornell.edu
# Create RSelenium session rD <- RSelenium::rsDriver(browser = "firefox", port = 4549L, verbose = FALSE) # Call scrappy out <- scrappy::newa_nrcc( client = rD$client, year = 2020, month = 12, # December station = "gbe", # Geneva (Bejo) station save_file = FALSE ) # Don't save output to a CSV file # Stop server rD$server$stop()
Partial output from the previous example:
knitr::kable(head(out_newa, 10))
Extract the reviews for Sefton Park in Liverpool (only the 20 most recent):
# Create RSelenium session rD <- RSelenium::rsDriver(browser = "firefox", port = 4549L, verbose = FALSE) # Call scrappy out <- scrappy::google_maps( client = rD$client, name = "Sefton Park", max_reviews = 20 ) # Stop server rD$server$stop()
Output after removing original authors' names and URL to their profiles:
`%>%` <- scrappy::`%>%` out_gmaps %>% dplyr::mutate( author = paste0("Author ", seq_along(author)), author_url = "" ) %>% knitr::kable()
# Create RSelenium session rD <- RSelenium::rsDriver(browser = "firefox", port = 4549L, verbose = FALSE) # Retrieve GP practices near L69 3GL # (Waterhouse building, University of Liverpool) out <- scrappy::find_a_gp(rD$client, postcode = "L69 3GL") # Stop server rD$server$stop()
knitr::kable(out_gpp)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.