#' @include CDPRemote.R utils.R wait.R
#' @importFrom assertthat assert_that is.scalar is.number
NULL
#' Execute an asynchronous CDP flow with Chrome
#'
#' The `perform_with_chrome()` function executes an asynchronous Chrome DevTools
#' Protocol flow with Chromium/Chrome and can turn it into a synchronous function.
#' An asynchronous remote flow is a function that takes a connection object and
#' returns a [promise][promises::promise].
#' If several functions are passed to `perform_with_chrome()`, their execution is
#' serial. If one of the asynchronous functions fails, the whole execution also
#' fails.
#'
#' @param ... Asynchronous remote flow functions.
#' @param .list A list of asynchronous remote flow functions - an alternative to
#' `...`.
#' @param timeouts A vector of timeouts applied to each asynchronous function.
#' Repeated.
#' @param cleaning_timeout The delay for cleaning Chrome.
#' @param async Is the result a promise? Required for using `perform_with_chrome()`
#' in Shiny.
#' @param bin Character scalar, the path to Chromium or Chrome executable.
#' If not provided, `crrri` will try to find the chrome binary itself using
#' `find_chrome_binary()`. You can set a path in `HEADLESS_CHROME` environment
#' variable to indicate where it is located.
#' @param debug_port Integer scalar, the Chromium/Chrome remote debugging port.
#' @param local Logical scalar, indicating whether the local version of the
#' protocol (embedded in `crrri`) must be used or the protocol must be
#' fetched _remotely_.
#' @param extra_args Character vector, extra command line arguments passed to
#' Chromium/Chrome. You can know more about command line flags (or switches)
#' from [chromium
#' developers](https://www.chromium.org/developers/how-tos/run-chromium-with-flags)
#'
#' @param headless Logical scalar, indicating whether Chromium/Chrome is launched
#' in headless mode.
#' @param retry_delay Number, delay in seconds between two successive tries to
#' connect to headless Chromium/Chrome.
#' @param max_attempts Logical scalar, number of tries to connect to headless
#' Chromium/Chrome.
#'
#' @return An invisible list with the values of the fulfilled promises for each
#' async function.d If there is only async function, the return value is the value of the
#' fulfilled promise.
#' @export
#'
#' @examples
#' \dontrun{
#' async_save_as_pdf <- function(url) {
#' function(client) {
#' Page <- client$Page
#'
#' Page$enable() %...>% {
#' Page$navigate(url = url)
#' Page$loadEventFired()
#' } %...>% {
#' Page$printToPDF()
#' } %...>% {
#' write_base64(., paste0(httr::parse_url(url)$hostname, ".pdf"))
#' }
#' }
#' }
#'
#' save_as_pdf <- function(...) {
#' list(...) %>%
#' purrr::map(async_save_as_pdf) %>%
#' perform_with_chrome(.list = .)
#' }
#'
#' save_as_pdf("https://www.r-project.org/", "https://rstudio.com/")
#' }
perform_with_chrome <- function(
..., .list = NULL, timeouts = 30, cleaning_timeout = 30, async = FALSE,
bin = NULL, debug_port = 9222L, local = FALSE,
extra_args = NULL, headless = TRUE, retry_delay = 0.2, max_attempts = 15L
) {
# find chrome
bin <- bin %||% find_chrome_binary()
# check arguments
if(is.null(.list)) {
funs <- list(...)
} else {
assert_that(is_list(.list))
funs <- .list
}
purrr::walk(funs, check_is_single_param_fun)
assert_that(is.numeric(timeouts))
assert_that(is.number(cleaning_timeout))
# initialize objects
timeouts <- rep_len(timeouts, length(funs))
total_timeout <- sum(timeouts) + cleaning_timeout
# launch Chrome
chrome <- Chrome$new(
bin = bin, debug_port = debug_port, local = local, extra_args = extra_args,
headless = headless, retry_delay = retry_delay, max_attempts = max_attempts
)
# connect to client
client <- chrome$connect()
# associate a function with its timeout
execute_fun <- function(index) {
fun <- purrr::pluck(funs, index)
delay <- purrr::pluck(timeouts, index)
res <- promises::then(client, function(value) {
res <- (fun)(value)
# fun must be an async function, i.e. a function that returns a promise
if(!promises::is.promising(res)) {
stop(paste0("Function n-", index, " passed to perform_with_chrome does not return a promise."))
}
# turnaround a bug in promise_map
# promise_map has a bug if the value of the fulfilled promise is NULL
# https://github.com/rstudio/promises/issues/47
promises::then(res, function(value) {
if(is.null(value)) {
# return anything but NULL
return(TRUE)
}
value
})
})
timeout(res,
delay = delay,
msg = paste0("The delay of ", delay, " seconds expired in async function n-", index, ".\n"))
}
all_funs_executed <- promises::promise_map(seq_along(funs), execute_fun)
# if only one fun applied, returns the element inside the length 1 list
results_available <- promises::then(
all_funs_executed,
onFulfilled = function(value) {
if(length(value) == 1L) {
value <- value[[1]]
}
value
}
)
results_after_cleaning <- promises::finally(results_available, onFinally = function() {
# it seems that using hold(), i.e. later::run_now() in finally is not a problem
# FMPOV, this seems completely weird, but it works well
chrome$close(async = FALSE)
})
if(isTRUE(async)) {
return(results_after_cleaning)
}
invisible(hold(results_after_cleaning, timeout = total_timeout))
}
#' Launch Chromium or Chrome
#'
#' This class aims to launch Chromium or Chrome in headless mode. It possesses
#' methods to manage connections to headless Chromium/Chrome using the
#' Chrome Debugging Protocol.
#'
#' @section Usage:
#' ```
#' remote <- Chrome$new(bin = NULL, debug_port = 9222L,
#' local = FALSE, extra_args = NULL, headless = TRUE,
#' retry_delay = 0.2, max_attempts = 15L)
#'
#' remote$connect(callback = NULL)
#' remote$listConnections()
#' remote$closeConnections(callback = NULL)
#' remote$version()
#' remote$user_agent
#'
#' remote$close(async = FALSE)
#' remote$view()
#' remote$is_alive()
#' ```
#'
#' @section Arguments:
#' * `remote`: `Chrome` object representing a remote instance of headless
#' Chromium/Chrome.
#' * `bin`: Character scalar, the path to Chromium or Chrome executable.
#' If not provided, `crrri` will try to find the chrome binary itself using
#' `find_chrome_binary()`. You can set a path in `HEADLESS_CHROME` environment
#' variable to indicate where it is located.
#' * `debug_port`: Integer scalar, the Chromium/Chrome remote debugging port.
#' Note that headless Chromium/Chrome will be available at
#' `http://localhost:<debug_port>`.
#' * `local`: Logical scalar, indicating whether the local version of the
#' protocol (embedded in `crrri`) must be used or the protocol must be
#' fetched _remotely_.
#' * `extra_args`: Character vector, extra command line arguments passed to
#' Chromium/Chrome. You can know more about command line flags (or switches)
#' from [chromium
#' developers](https://www.chromium.org/developers/how-tos/run-chromium-with-flags)
#' * `headless`: Logical scalar, indicating whether Chromium/Chrome is launched
#' in headless mode.
#' * `retry_delay`: Number, delay in seconds between two successive tries to
#' connect to headless Chromium/Chrome.
#' * `max_attempts`: Integer scalar, number of tries to connect to headless
#' Chromium/Chrome.
#' * `callback`: Function with one argument.
#' * `async`: Does the function return a promise?
#'
#' @section Details:
#' `$new()` opens a new headless Chromium/Chrome. You can deactivate verbose
#' from chrome process launching byt setting option `crrri.verbose` to FALSE.
#'
#' `$connect(callback = NULL)` connects the R session to the remote instance of
#' headless Chromium/Chrome. The returned value depends on the value of the
#' `callback` argument. When `callback` is a function, the returned value is a
#' connection object. When `callback` is `NULL` the returned value is a promise
#' which fulfills once R is connected to the remote instance of Chromium/Chrome.
#' Once fulfilled, the value of this promise is the connection object.
#'
#' `$listConnections()` returns a list of the connection objects succesfully
#' created using the `$connect()` method.
#'
#' `$closeConnections(callback = NULL)` closes all the connections created using the
#' `$connect()` method. If `callback` is `NULL`, it returns a promise which
#' fulfills when all the connections are closed: once fulfilled, its value is the
#' remote object.
#' If `callback` is not `NULL`, it returns the remote object. In this case,
#' `callback` is called when all the connections are closed and the remote object is
#' passed to this function as the argument.
#'
#' `$version()` executes the DevTools `Version` method. It returns a list of
#' informations available at `http://localhost:<debug_port>/json/version`.
#'
#' `$user_agent` returns a character scalar with the User Agent of the
#' headless Chromium/Chrome.
#'
#' `$close(async = FALSE)` closes the remote instance of headless
#' Chromium/Chrome. If `async` is `FALSE` this method returns the `remote`
#' object invisibly. Is `async` is `TRUE`, a promise is returned. This promise
#' fulfills when Chromium/Chrome is closed. Once fulfilled, its value is the
#' `remote` object.
#'
#' `$view()` opens a visible Chromium/Chrome browser at
#' `http://localhost:<debug_port>`. This is useful to 'see' the headless
#' Chromium/Chrome instance. Returns the process of the visible browser.
#'
#' `$is_alive()` checks if the remote instance is alive. Returns a logical
#' scalar.
#'
#' `$listTargets()` returns a list with information about tabs.
#' @name Chrome
#' @examples
#' \dontrun{
#'
#' remote <- Chrome$new()
#'
#' remote$connect() %...>% (function(client) {
#' Page <- client$Page
#' Runtime <- client$Runtime
#'
#' Page$enable() %...>% {
#' Page$navigate(url = 'http://r-project.org')
#' } %...>% {
#' Page$loadEventFired()
#' } %...>% {
#' Runtime$evaluate(
#' expression = 'document.documentElement.outerHTML'
#' )
#' } %...>% (function(result) {
#' cat(result$result$value, "\n")
#' })
#' }) %...!% {
#' cat("Error:", .$message, "\n")
#' } %>%
#' promises::finally(~ remote$close())
#' }
#'
NULL
#' @importFrom rlang `%||%`
#' @export
Chrome <- R6::R6Class(
"Chrome",
inherit = CDPRemote,
public = list(
initialize = function(
bin = NULL, debug_port = 9222L, local = FALSE,
extra_args = NULL, headless = TRUE, retry_delay = 0.2, max_attempts = 15L
) {
assert_that(is.null(bin) || is_scalar_character(bin))
assert_that(
is_scalar_integerish(debug_port),
is_user_port(debug_port),
is_available_port(debug_port)
)
assert_that(is.scalar(local), is.logical(local))
assert_that(is.scalar(headless), is.logical(headless))
assert_that(is.number(retry_delay))
assert_that(is_scalar_integerish(max_attempts))
private$.bin <- bin %||% find_chrome_binary()
work_dir <- chr_new_data_dir()
chr_process <- chr_launch(bin, debug_port, extra_args, headless, work_dir)
private$.work_dir <- work_dir
private$.process <- chr_process
super$initialize(host = "localhost",
debug_port = debug_port,
secure = FALSE,
local = local,
retry_delay = retry_delay,
max_attempts = max_attempts
)
if(!private$.reachable) {
warning("...closing Chrome.")
private$finalize()
}
},
close = function(async = FALSE) {
if(isTRUE(async)) {
return(private$.async_finalizer())
}
invisible(private$finalize())
},
view = function() {
chr_launch(
private$.bin,
debug_port = NULL,
extra_args = c(
build_http_url(private$.host, private$.port, private$.secure),
'--new-window',
'--no-default-browser-check',
'-incognito'
),
headless = FALSE,
work_dir = NULL
)
},
is_alive = function() private$.process$is_alive(),
print = function() {
super$print()
cat(' Running:', self$is_alive())
}
),
private = list(
.bin = NULL,
.work_dir = NULL,
.process = NULL,
.async_finalizer = function() {
clients_disconnected <- timeout(
self$closeConnections(),
delay = 10,
msg = "The WebSocket connections have not been properly closed."
)
# if the delay expires, this is not really a problem:
# they will be closed when we will kill chrome
caught <- promises::catch(
clients_disconnected,
function(err) {
warning(err$message, call. = FALSE, immediate. = TRUE)
}
)
# now, kill chrome and clean the working directory
killed_and_cleaned <- promises::finally(
clients_disconnected,
onFinally = function() {
killed <- !private$.process$is_alive()
if (!killed) {
"!DEBUG Closing headless Chrome..."
private$.process$kill()
if (private$.process$is_alive()) {
"!DEBUG Cannot close headless Chrome."
stop("Cannot close headless Chrome.\n")
} else {
"!DEBUG ...headless Chrome closed."
}
private$.process$wait()
}
chr_clean_work_dir(private$.work_dir)
}
)
killed_and_cleaned
},
finalize = function() {
killed_and_cleaned <- private$.async_finalizer()
# since we are in finalize(), we can use hold() safely:
hold(
killed_and_cleaned,
timeout = 30,
msg = "Did not succeed to close Chrome properly."
)
}
)
)
chr_new_data_dir <- function(length = 8, slug = "chrome-data-dir-") {
user_data_dir <- rappdirs::user_data_dir(appname = "r-crrri")
random_string <- paste(sample(letters, size = length, replace = TRUE), collapse = "")
normalizePath(file.path(user_data_dir, paste0(slug, random_string)), mustWork = FALSE)
}
# Launch Chrome ---------------------------------------------------
# This function launches a new Chrome process
# The user has to provide a working directory for Chrome: see the helper function chr_new_data_dir()
# The command can silently fail: in this case, NULL is returned.
chr_launch <- function(
bin = NULL, debug_port = 9222, extra_args = NULL, headless = TRUE, work_dir
) {
bin <- bin %||% find_chrome_binary()
proxy <- get_proxy()
behind_proxy <- nzchar(proxy)
travis <- nzchar(Sys.getenv("TRAVIS"))
if (behind_proxy)
extra_args <- c(chr_proxy_args(proxy), extra_args)
if (travis)
extra_args <- c(chr_travis_args(), extra_args)
if (is_os_type("windows"))
extra_args <- c(chr_windows_args(headless), extra_args)
chrome_args <- unique(c(
chr_default_args(),
chr_headless_args(headless),
chr_work_dir_args(work_dir),
chr_debugging_port_args(debug_port),
extra_args
))
"!DEBUG Trying to launch Chrome `if (headless) 'in headless mode'` ..."
chr_process <-
tryCatch(
processx::process$new(
bin,
chrome_args,
echo_cmd = getOption("crrri.verbose", TRUE),
supervise = TRUE
),
error = function(e) NULL
)
if (!is.null(chr_process)) {
"!DEBUG Chrome succesfully launched `if (headless) 'in headless mode'`."
"!DEBUG It should be accessible at http://localhost:`debug_port`"
} else {
stop("Cannot launch Chrome. ",
"Please add the path to your Chrome bin.",
call. = FALSE
)
}
chr_process
}
get_proxy <- function() {
# the order of the variables is important
# we will take the first non empty variable
env_var <- c("https_proxy", "HTTPS_PROXY", "http_proxy", "HTTP_PROXY")
values <- Sys.getenv(env_var)
values <- values[nzchar(values)]
if (length(values) > 0)
return(unname(values[1]))
else
return("")
}
chr_proxy_args <- function(proxy) {
proxy_arg <- paste("--proxy-server", proxy, sep = "=")
no_proxy_urls <- get_no_proxy_urls()
no_proxy_string <- paste(no_proxy_urls, collapse = ";")
no_proxy_arg <- paste("--proxy-bypass-list", no_proxy_string, sep = "=")
c(proxy_arg, no_proxy_arg)
}
get_no_proxy_urls <- function() {
env_var <- Sys.getenv(c("no_proxy", "NO_PROXY"))
urls <- do.call(c, strsplit(env_var, "[,;]"))
urls <- c(default_no_proxy_urls(), unname(urls))
unique(urls)
}
default_no_proxy_urls <- function() {
c("localhost", "127.0.0.1")
}
is_os_type <- function(os) {
identical(.Platform$OS.type, os)
}
chr_windows_args <- function(headless) {
if (headless) c("--disable-gpu", "--no-sandbox")
}
chr_headless_args <- function(headless) {
if(isTRUE(headless)) {
c("--headless")
} else {
c("--new-window")
}
}
chr_default_args <- function() {
c("--no-first-run")
}
chr_travis_args <- function() {
c("--disable-gpu", "--no-sandbox")
}
chr_work_dir_args <- function(work_dir) {
if(!is.null(work_dir)) {
paste("--user-data-dir", work_dir, sep = "=")
}
}
chr_debugging_port_args <- function(debug_port) {
if(!is.null(debug_port)) {
paste("--remote-debugging-port", debug_port, sep = "=")
}
}
# cleaner helpers -----------------------------------------------
chr_clean_work_dir <- function(work_dir) {
cleaned <- !dir.exists(work_dir)
if (!cleaned) {
"!DEBUG Cleaning Chrome working directory..."
Sys.sleep(0.5)
result <- unlink(work_dir, recursive = TRUE, force = TRUE)
cleaned <- result == 0
if (cleaned) {
"!DEBUG ...Chrome working directory succesfully deleted."
} else {
"!DEBUG ...cannot supress the Chrome working directory: `work_dir` \nPlease remove it manually."
warning("...cannot supress the Chrome working directory: ", work_dir,
"\nPlease remove it manually.\n", call. = FALSE, immediate. = TRUE
)
}
}
invisible(cleaned)
}
# find chrome binary ----------------
#' Find Google Chrome binary in the system
#'
#' If the chrome binary path has not already been set in \var{HEADLESS_CHROME}
#' environment variable, the function will try to find the chrome binary
#' on your system using a some hints.
#'
#' ## Windows
#'
#' It will look in the registry for an installed version
#'
#' ## macOS,
#'
#' It will return a hard-coded path of Chrome under \file{/Applications}.
#'
#' ## Linux,
#'
#' It will search for \command{chromium-browser} and \command{google-chrome} from
#' the system's \var{PATH} variable.
#'
#' @return A character string. The path the chrome binary that will
#' be used by `crrri`.
#' @author Yihui Xie, Romain Lesur, Christophe Dervieux
#' @references From `pagedown` R package, licence MIT. [Source on Github](https://github.com/rstudio/pagedown/blob/b93f46fc1ad70182e5dd3d9fc843f752fd12f780/R/chrome.R#L213)
#' @export
find_chrome_binary = function() {
# If the env var is set, do not look for another binary
res <- Sys.getenv("HEADLESS_CHROME", NA_character_)
if (!is.na(res)) {
"!DEBUG Chrome binary path set in HEADLESS_CHROME env var will be used."
return(res)
}
# If not, try to guess
# inspired by pagedown::find_chrome()
"!DEBUG Chrome binary path will be guessed."
msg <- c(
"Please pass the full path of chrome binary to the 'bin' argument ",
"or to the environment variable 'HEADLESS_CHROME'.")
switch(
.Platform$OS.type,
windows = {
res <- tryCatch({
unlist(utils::readRegistry('ChromeHTML\\shell\\open\\command', 'HCR'))
}, error = function(e) '')
res <- unlist(strsplit(res, '"'))
res <- utils::head(res[file.exists(res)], 1)
if (length(res) != 1) {
stop(
'Cannot find Google Chrome automatically from the Windows Registry Hive. ',
msg
)
}
unname(res)
},
unix = {
if (isTRUE(Sys.info()["sysname"] == "Darwin")) { # macOS
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
} else { # linux
for (i in c('google-chrome', 'chromium-browser', 'chromium', 'google-chrome-stable')) {
if ((res <- Sys.which(i)) != '') break
}
if (res == '') stop(
'Cannot find Chromium or Google Chrome in your PATH. ',
msg)
res
}
},
stop('Your platform is not supported')
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.