# Start remote driver with docker ---------------------------
#' Start remote driver with docker using the system command line.
#'
#' @param kill Bolean indicating weather a runing docker container with selenium
#' should be killed.
#' @import RSelenium
#' @importFrom utils capture.output
start_chrome_remDr <- function(kill = FALSE) {
ps_out <- system("docker ps", intern = TRUE)
capt_ps_out <- capture.output(cat(ps_out))
container_running <- stringr::str_detect(
capt_ps_out,
pattern = "selenium/standalone-chrome")
if(kill == TRUE | container_running == TRUE) {
cat("selenium/standalone-chrome docker container already runnign \n")
cat("kill container and restart a new one \n")
selenium_cont <- ps_out[which(
stringr::str_detect(
ps_out,
pattern = "selenium/standalone-chrome") == TRUE)]
docker_id <- stringr::str_split(
selenium_cont,
pattern = stringr::regex( "[[:space:]]" ))[[1]][1]
system(stringr::str_c("docker kill ", docker_id))
Sys.sleep(2)
}
cat("starting selenium/standalone-chrome docker container \n")
system("docker run -d -p 4445:4444 --shm-size 2g selenium/standalone-chrome")
Sys.sleep(2)
}
# connect to remote driver ---------------------------
#' Connect to remote driver
#'
#' This function assumes a docker container was proviously started using
#' \code{\link{start_chrome_remDr}}. It connects R to the docker container and
#' returns a remote driver object class which is used to interact with websites.
#'
#' @return Object class remote driver.
#' @import RSelenium
connect_remDr <- function() {
remDr <- RSelenium::remoteDriver(
remoteServerAddr = "localhost",
port = 4445L,
browserName = "chrome")
return(remDr)
}
# Wrapper function to interact with remote driver ---------------------------
#' Wrapper function to interact with remote driver
#'
#' Wrapper function to do operations with the remote driver. Allows the use of
#' user defined function for a specic link. The idea, is to make the process of
#' remotely interacting with websites, as flexible as possible so that I can
#' pass any function of interest to the scrape or do operations on the
#' link specified.
#'
#' @param remDr remote driver connection object created with
#' \code{\link{connect_remDr}}.
#' @param link url for the website to intereact with.
#' @param FUN user-specified functon
#' @param FUN_input input that will go in \code{FUN}.
#' @return Object generated by \code{FUN}.
#'
#' @import RSelenium
do_remDr <- function(remDr, link, FUN, FUN_input) {
# Wrapper to start werdriver, oper server connection, move the desired website, do something and close the website in one environment.
# open server connection
remDr$open(silent = TRUE)
# set a timeout
remDr$setTimeout(type = "Implicit", milliseconds = 5000)
# navigate to the website
remDr$navigate(url = link)
Sys.sleep(2)
# function to do something on the url
FUN_output <- FUN(FUN_input)
# close connection
remDr$close()
return(FUN_output)
}
# Get page source ---------------
#' Get htlm page source
#'
#' Function to instruct loaded remote driver to connect to url and download page
#' source. Note that indiviual download of source html wihthin each specific
#' query is handled diferently.
#'
#' @param remDr remote driver connection
#' @param link link to the desired webpage
#'
#' @return parsed html file.
get_page_source <- function(remDr, link) {
# open server connection
# remDr$open(silent = TRUE)
# set a timeout
remDr$setTimeout(type = "Implicit", milliseconds = 5000)
# navigate to the website
# remDr$navigate(url = link)
Sys.sleep(3)
# function to do something on the url
source_html <- remDr$getPageSource()[[1]]
parsed_html <- xml2::read_html(source_html)
return(parsed_html)
}
# Connect to Link -----
#' Connect remote driver to specified url
#'
#' @param remDr remote driver connection
#' @param link link to the desired webpage
#'
#' @return no return
#'
#' @import RSelenium
remDr_go_to_link <-
function(remDr, link) {
# open remDr
# remDr$open(silent = TRUE)
# set a timeout
remDr$setTimeout(type = "Implicit", milliseconds = 5000)
# navigate to the website
remDr$navigate(url = link)
}
# open server connection
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.