R/dsl.r

Defines functions make_splash_call splash_add_lua splash_response_body splash_private_mode splash_enable_javascript splash_plugins splash_images splash_go splash_click splash_focus splash_send_text splash_send_keys splash_release splash_press splash_wait splash_har_reset splash_har splash_html splash_png splash_user_agent

Documented in splash_add_lua splash_click splash_enable_javascript splash_focus splash_go splash_har splash_har_reset splash_html splash_images splash_plugins splash_png splash_press splash_private_mode splash_release splash_response_body splash_send_keys splash_send_text splash_user_agent splash_wait

make_splash_call <- function(splash_obj) {

  sprintf('
function main(splash)
%s
end
', paste0(sprintf("  %s", splash_obj$calls), collapse="\n")) -> out

  out

}

#' Add raw lua code into DSL call chain
#'
#' The `splashr` `lua` DSL (domain specific language) wrapper wraps what the package
#' author believes to be the most common/useful `lua` functions. Users of the package
#' may have need to insert some custom `lua` code within a DSL call chain they are
#' building. You can insert any Splash `lua` code you like with this function call.
#'
#' The code is inserted at the position the `splash_add_lua`() is called in the chain
#' which will be within the main "splash' function which is defined as:
#'
#' ```
#' function main(splash)
#'   ...
#' end
#' ```
#'
#' If you need more flexibility, use the [execute_lua()] function.
#'
#' @md
#' @family splash_dsl_functions
#' @param splash_obj splashr object
#' @param lua_code length 1 character vector of raw `lua` code
#' @export
splash_add_lua <- function(splash_obj, lua_code) {
   splash_obj$calls <- c(splash_obj$calls, lua_code, "\n")
   splash_obj
}

#' Enable or disable response content tracking.
#'
#' By default Splash doesn’t keep bodies of each response in memory, for efficiency reasons.
#'
#' @param splash_obj splashr object
#' @param enable logical
#' @export
#' @family splash_dsl_attributes
#' @examples \dontrun{
#' splash_local %>%
#'   splash_response_body(TRUE) %>%
#'   splash_user_agent(ua_macos_chrome) %>%
#'   splash_go("https://rud.is/b") %>%
#'   splash_wait(2) %>%
#'   splash_har() -> rud_har
#' }
splash_response_body <- function(splash_obj, enable=FALSE) {
  splash_obj$calls <- c(splash_obj$calls, sprintf('splash.response_body_enabled = %s',
                                                  if (enable) "true" else "false"))
  splash_obj
}

#' Enable or disable execution of JavaSript code embedded in the page.
#'
#' Private mode is enabled by default unless you pass flag `--disable-private-mode`
#' at Splash (server) startup. Note that if you disable private mode browsing data such
#' as cookies or items kept in local storage may persist between requests.
#'
#' @md
#' @param splash_obj splashr object
#' @param enable logical
#' @family splash_dsl_attributes
#' @export
#' @examples \dontrun{
#' splash_local %>%
#'   splash_response_body(TRUE) %>%
#'   splash_private_mode(TRUE) %>%
#'   splash_user_agent(ua_macos_chrome) %>%
#'   splash_go("https://rud.is/b") %>%
#'   splash_wait(2) %>%
#'   splash_har() -> rud_har
#' }
splash_private_mode <- function(splash_obj, enable=FALSE) {
  splash_obj$calls <- c(splash_obj$calls, sprintf('splash.private_mode_enabled = %s',
                                                  if (enable) "true" else "false"))
  splash_obj
}

#' Enable or disable execution of JavaSript code embedded in the page.
#'
#' JavaScript execution is enabled by default.
#'
#' @md
#' @param splash_obj splashr object
#' @param enable logical
#' @export
#' @family splash_dsl_attributes
#' @examples \dontrun{
#' splash_local %>%
#'   splash_response_body(TRUE) %>%
#'   splash_private_mode(TRUE) %>%
#'   splash_enable_javascript(FALSE) %>%
#'   splash_user_agent(ua_macos_chrome) %>%
#'   splash_go("https://rud.is/b") %>%
#'   splash_wait(2) %>%
#'   splash_har() -> rud_har
#' }
splash_enable_javascript <- function(splash_obj, enable=TRUE) {
  splash_obj$calls <- c(splash_obj$calls, sprintf('splash.js_enabled = %s',
                                                  if (enable) "true" else "false"))
  splash_obj
}

#' Enable or disable browser plugins (e.g. Flash).
#'
#' Plugins are disabled by default.
#'
#' @param splash_obj splashr object
#' @param enable logical
#' @family splash_dsl_attributes
#' @export
#' @examples \dontrun{
#' splash_local %>%
#'   splash_plugins(TRUE) %>%
#'   splash_user_agent(ua_macos_chrome) %>%
#'   splash_go("https://rud.is/b") %>%
#'   splash_wait(2) %>%
#'   splash_har() -> rud_har
#' }
splash_plugins <- function(splash_obj, enable=FALSE) {
  splash_obj$calls <- c(splash_obj$calls, sprintf('splash.plugins_enabled = %s',
                                                  if (enable) "true" else "false"))
  splash_obj
}

#' Enable/disable images
#'
#' By default, images are enabled. Disabling of the images can save a lot of network
#' traffic (usually around ~50%) and make rendering faster. Note that this option can
#' affect the JavaScript code inside page: disabling of the images may change sizes and
#' positions of DOM elements, and scripts may read and use them.
#'
#' @param splash_obj splashr object
#' @param enable logical
#' @family splash_dsl_attributes
#' @export
#' @examples \dontrun{
#' splash_local %>%
#'   splash_images(TRUE) %>%
#'   splash_user_agent(ua_macos_chrome) %>%
#'   splash_go("https://rud.is/b") %>%
#'   splash_wait(2) %>%
#'   splash_har() -> rud_har
#' }
splash_images <- function(splash_obj, enable=TRUE) {
  splash_obj$calls <- c(splash_obj$calls, sprintf('splash.images_enabled  = %s',
                                                  if (enable) "true" else "false"))
  splash_obj
}

#' Go to an URL.
#'
#' This is similar to entering an URL in a browser address bar, pressing Enter and waiting
#' until page loads.
#'
#' @param splash_obj splashr object
#' @param url - URL to load;
#' @family splash_dsl_functions
#' @export
#' @examples \dontrun{
#' splash_local %>%
#'   splash_response_body(TRUE) %>%
#'   splash_user_agent(ua_macos_chrome) %>%
#'   splash_go("https://rud.is/b") %>%
#'   splash_wait(2) %>%
#'   splash_har() -> rud_har
#' }
splash_go <- function(splash_obj, url) {
   splash_obj$calls <- c(splash_obj$calls,
                         sprintf('url = "%s"', url),
                         "splash:go(url)")
   splash_obj
}

#' Trigger mouse click event in web page.
#'
#' @family splash_dsl_functions
#' @param splash_obj splashr object
#' @param x,y coordinates (distances from the left or top, relative to the current viewport)
#' @export
splash_click <- function(splash_obj, x, y) {
   splash_obj$calls <- c(splash_obj$calls,
                         sprintf("splash:mouse_click(%s, %s)", x, y))
   splash_obj
}

#' Focus on a document element provided by a CSS selector
#'
#' @md
#' @family splash_dsl_functions
#' @param splash_obj splashr object
#' @param selector valid CSS selector
#' @references See [the docs](https://splash.readthedocs.io/en/stable/scripting-ref.html#splash-send-text) for more info
#' @export
splash_focus <- function(splash_obj, selector) {
   splash_obj$calls <- c(splash_obj$calls,
                         sprintf('splash:select("%s").node:focus()', selector))
   splash_obj
}

#' Send text as input to page context, literally, character by character.
#'
#' This is different from [splash_send_keys()]
#'
#' @md
#' @family splash_dsl_functions
#' @note This adds a call to `splash:wait` so you do not have to
#' @param splash_obj splashr object
#' @param text string to send
#' @references See [the docs](https://splash.readthedocs.io/en/stable/scripting-ref.html#splash-send-keys) for more info
#' @export
splash_send_text <- function(splash_obj, text) {
   splash_obj$calls <- c(splash_obj$calls,
                         sprintf('splash:send_text("%s")', text),
                         "splash:wait(0.1)")
   splash_obj
}

#' Send keyboard events to page context.
#'
#' - whitespace is ignored and only used to separate the different keys
#' - characters are literally represented
#'
#' This is different from [splash_send_text()]
#'
#' @md
#' @family splash_dsl_functions
#' @param splash_obj splashr object
#' @param keys string to send
#' @references See [the docs](https://splash.readthedocs.io/en/stable/scripting-ref.html#splash-send-keys) for more info
#' @export
splash_send_keys <- function(splash_obj, keys) {
   splash_obj$calls <- c(splash_obj$calls,
                         sprintf('splash:send_keys("%s")', keys),
                         "splash:wait(0.1)")
   splash_obj
}

#' Trigger mouse release event in web page.
#'
#' @family splash_dsl_functions
#' @param splash_obj splashr object
#' @param x,y coordinates (distances from the left or top, relative to the current viewport)
#' @export
splash_release <- function(splash_obj, x, y) {
   splash_obj$calls <- c(splash_obj$calls,
                         sprintf("splash:mouse_release(%s, %s)", x, y))
   splash_obj
}

#' Trigger mouse press event in web page.
#'
#' @family splash_dsl_functions
#' @param splash_obj splashr object
#' @param x,y coordinates (distances from the left or top, relative to the current viewport)
#' @export
splash_press <- function(splash_obj, x, y) {
   splash_obj$calls <- c(splash_obj$calls,
                         sprintf("splash:mouse_press(%s, %s)", x, y))
   splash_obj
}

#' Wait for a period time
#'
#' When script is waiting WebKit continues processing the webpage
#'
#' @md
#' @param splash_obj splashr object
#' @param time number of seconds to wait
#' @family splash_dsl_functions
#' @export
#' @examples \dontrun{
#' splash_local %>%
#'   splash_response_body(TRUE) %>%
#'   splash_user_agent(ua_macos_chrome) %>%
#'   splash_go("https://rud.is/b") %>%
#'   splash_wait(2) %>%
#'   splash_har() -> rud_har
#' }
splash_wait <- function(splash_obj, time=2) {
   splash_obj$calls <- c(splash_obj$calls, sprintf('splash:wait(%s)', time))
   splash_obj
}

#' Drops all internally stored HAR records.
#'
#' @md
#' @param splash_obj splashr object
#' @family splash_dsl_functions
#' @export
splash_har_reset <- function(splash_obj) {
   splash_obj$calls <- c(splash_obj$calls, 'splash:har_reset()')
   splash_obj
}

#' Return information about Splash interaction with a website in HAR format.
#'
#' Similar to [render_har()] but used in a script context. Should be the LAST element in
#' a DSL script chain as this will execute the script and return the HAR content
#'
#' @md
#' @param splash_obj splashr object
#' @family splash_dsl_functions
#' @export
#' @examples \dontrun{
#' splash_local %>%
#'   splash_response_body(TRUE) %>%
#'   splash_user_agent(ua_macos_chrome) %>%
#'   splash_go("https://rud.is/b") %>%
#'   splash_wait(2) %>%
#'   splash_har() -> rud_har
#' }
splash_har <- function(splash_obj) {

  splash_obj$calls <- c(splash_obj$calls, 'return(splash:har())')

  call_function <- make_splash_call(splash_obj)

  res <- execute_lua(splash_obj, call_function)
  as_har(res)

}

#' Return a HTML snapshot of a current page.
#'
#' Similar to [render_html()] but used in a script context. Should be the LAST element in
#' a DSL script chain as this will execute the script and return the HTML content
#'
#' @md
#' @param splash_obj splashr object
#' @param raw_html if `TRUE` then return a character vector vs an XML document.
#' @family splash_dsl_functions
#' @export
#' @examples \dontrun{
#' splash_local %>%
#'   splash_response_body(TRUE) %>%
#'   splash_user_agent(ua_macos_chrome) %>%
#'   splash_go("https://rud.is/b") %>%
#'   splash_wait(2) %>%
#'   splash_html() -> rud_pg
#' }
splash_html <- function(splash_obj, raw_html=FALSE) {

  splash_obj$calls <- c(splash_obj$calls, 'return(splash:html())')

  call_function <- make_splash_call(splash_obj)

  out <- execute_lua(splash_obj, call_function)

  if (!raw_html) out <- xml2::read_html(out)

  out

}

#' Return a screenshot of a current page in PNG format.
#'
#' Similar to [render_png()] but used in a script context. Should be the LAST element in
#' a DSL script chain as this will execute the script and return the PNG content
#'
#' @md
#' @param splash_obj splashr object
#' @family splash_dsl_functions
#' @return a [magick] image object
#' @export
#' @examples \dontrun{
#' splash_local %>%
#'   splash_user_agent(ua_macos_chrome) %>%
#'   splash_go("https://rud.is/b") %>%
#'   splash_wait(2) %>%
#'   splash_png()
#' }
splash_png <- function(splash_obj) {

  splash_obj$calls <- c(splash_obj$calls, 'return splash:png{render_all=true}')

  call_function <- make_splash_call(splash_obj)

  res <- execute_lua(splash_obj, call_function)

  magick::image_read(res)

}

#' Overwrite the User-Agent header for all further requests.
#'
#' There are a few built-in user agents, all beginning with `ua_`.
#'
#' @md
#' @param splash_obj splashr object
#' @param user_agent 1 element character vector, defaults to `splashr/#.#.#`.
#' @family splash_dsl_functions_functions
#' @export
#' @examples \dontrun{
#' library(rvest)
#'
#' URL <- "https://httpbin.org/user-agent"
#'
#' splash_local %>%
#'   splash_response_body(TRUE) %>%
#'   splash_user_agent(ua_macos_chrome) %>%
#'   splash_go(URL) %>%
#'   splash_html() %>%
#'   html_text("body") %>%
#'   jsonlite::fromJSON()
#' }
splash_user_agent <- function(splash_obj, user_agent=ua_splashr) {
  splash_obj$calls <- c(splash_obj$calls, sprintf('splash:set_user_agent("%s")', user_agent))
  splash_obj
}
hrbrmstr/splashr documentation built on Feb. 23, 2020, 2:13 p.m.