kopia lustrzana https://git.sr.ht/~hrbrmstr/splashr
Nie możesz wybrać więcej, niż 25 tematów
Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.
390 wiersze
12 KiB
390 wiersze
12 KiB
make_splash_call <- function(splash_obj) {
|
|
|
|
sprintf('
|
|
function main(splash)
|
|
%s
|
|
end
|
|
', paste0(sprintf(" %s", splash_obj$calls), collapse="\n")) -> out
|
|
|
|
out
|
|
|
|
}
|
|
|
|
#' Add raw lua code into DSL call chain
|
|
#'
|
|
#' The `splashr` `lua` DSL (domain specific language) wrapper wraps what the package
|
|
#' author believes to be the most common/useful `lua` functions. Users of the package
|
|
#' may have need to insert some custom `lua` code within a DSL call chain they are
|
|
#' building. You can insert any Splash `lua` code you like with this function call.
|
|
#'
|
|
#' The code is inserted at the position the `splash_add_lua`() is called in the chain
|
|
#' which will be within the main "splash' function which is defined as:
|
|
#'
|
|
#' ```
|
|
#' function main(splash)
|
|
#' ...
|
|
#' end
|
|
#' ```
|
|
#'
|
|
#' If you need more flexibility, use the [execute_lua()] function.
|
|
#'
|
|
#' @md
|
|
#' @param splash_obj splashr object
|
|
#' @param lua_code length 1 character vector of raw `lua` code
|
|
#' @export
|
|
splash_add_lua <- function(splash_obj, lua_code) {
|
|
splash_obj$calls <- c(splash_obj$calls, lua_code, "\n")
|
|
splash_obj
|
|
}
|
|
|
|
#' Enable or disable response content tracking.
|
|
#'
|
|
#' By default Splash doesn’t keep bodies of each response in memory, for efficiency reasons.
|
|
#'
|
|
#' @param splash_obj splashr object
|
|
#' @param enable logical
|
|
#' @export
|
|
#' @examples \dontrun{
|
|
#' splash_local %>%
|
|
#' splash_response_body(TRUE) %>%
|
|
#' splash_user_agent(ua_macos_chrome) %>%
|
|
#' splash_go("https://rud.is/b") %>%
|
|
#' splash_wait(2) %>%
|
|
#' splash_har() -> rud_har
|
|
#' }
|
|
splash_response_body <- function(splash_obj, enable=FALSE) {
|
|
splash_obj$calls <- c(splash_obj$calls, sprintf('splash.response_body_enabled = %s',
|
|
if (enable) "true" else "false"))
|
|
splash_obj
|
|
}
|
|
|
|
#' Enable or disable browser plugins (e.g. Flash).
|
|
#'
|
|
#' Plugins are disabled by default.
|
|
#'
|
|
#' @param splash_obj splashr object
|
|
#' @param enable logical
|
|
#' @export
|
|
#' @examples \dontrun{
|
|
#' splash_local %>%
|
|
#' splash_plugins(TRUE) %>%
|
|
#' splash_user_agent(ua_macos_chrome) %>%
|
|
#' splash_go("https://rud.is/b") %>%
|
|
#' splash_wait(2) %>%
|
|
#' splash_har() -> rud_har
|
|
#' }
|
|
splash_plugins <- function(splash_obj, enable=FALSE) {
|
|
splash_obj$calls <- c(splash_obj$calls, sprintf('splash.plugins_enabled = %s',
|
|
if (enable) "true" else "false"))
|
|
splash_obj
|
|
}
|
|
|
|
#' Enable/disable images
|
|
#'
|
|
#' By default, images are enabled. Disabling of the images can save a lot of network
|
|
#' traffic (usually around ~50%) and make rendering faster. Note that this option can
|
|
#' affect the JavaScript code inside page: disabling of the images may change sizes and
|
|
#' positions of DOM elements, and scripts may read and use them.
|
|
#'
|
|
#' @param splash_obj splashr object
|
|
#' @param enable logical
|
|
#' @export
|
|
#' @examples \dontrun{
|
|
#' splash_local %>%
|
|
#' splash_images(TRUE) %>%
|
|
#' splash_user_agent(ua_macos_chrome) %>%
|
|
#' splash_go("https://rud.is/b") %>%
|
|
#' splash_wait(2) %>%
|
|
#' splash_har() -> rud_har
|
|
#' }
|
|
splash_images <- function(splash_obj, enable=TRUE) {
|
|
splash_obj$calls <- c(splash_obj$calls, sprintf('splash.images_enabled = %s',
|
|
if (enable) "true" else "false"))
|
|
splash_obj
|
|
}
|
|
|
|
#' Go to an URL.
|
|
#'
|
|
#' This is similar to entering an URL in a browser address bar, pressing Enter and waiting
|
|
#' until page loads.
|
|
#'
|
|
#' @param splash_obj splashr object
|
|
#' @param url - URL to load;
|
|
#' @export
|
|
#' @examples \dontrun{
|
|
#' splash_local %>%
|
|
#' splash_response_body(TRUE) %>%
|
|
#' splash_user_agent(ua_macos_chrome) %>%
|
|
#' splash_go("https://rud.is/b") %>%
|
|
#' splash_wait(2) %>%
|
|
#' splash_har() -> rud_har
|
|
#' }
|
|
splash_go <- function(splash_obj, url) {
|
|
splash_obj$calls <- c(splash_obj$calls,
|
|
sprintf('url = "%s"', url),
|
|
"splash:go(url)")
|
|
splash_obj
|
|
}
|
|
|
|
#' Trigger mouse click event in web page.
|
|
#'
|
|
#' @param splash_obj splashr object
|
|
#' @param x,y coordinates (distances from the left or top, relative to the current viewport)
|
|
#' @export
|
|
splash_click <- function(splash_obj, x, y) {
|
|
splash_obj$calls <- c(splash_obj$calls,
|
|
sprintf("splash:mouse_click(%s, %s)", x, y))
|
|
splash_obj
|
|
}
|
|
|
|
#' Focus on a document element provided by a CSS selector
|
|
#'
|
|
#' @md
|
|
#' @param splash_obj splashr object
|
|
#' @param selector valid CSS selector
|
|
#' @references See [the docs](https://splash.readthedocs.io/en/stable/scripting-ref.html#splash-send-text) for more info
|
|
#' @export
|
|
splash_focus <- function(splash_obj, selector) {
|
|
splash_obj$calls <- c(splash_obj$calls,
|
|
sprintf('splash:select("%s").node:focus()', selector))
|
|
splash_obj
|
|
}
|
|
|
|
#' Send text as input to page context, literally, character by character.
|
|
#'
|
|
#' This is different from [splash_send_keys()]
|
|
#'
|
|
#' @md
|
|
#' @note This adds a call to `splash:wait` so you do not have to
|
|
#' @param splash_obj splashr object
|
|
#' @param text string to send
|
|
#' @references See [the docs](https://splash.readthedocs.io/en/stable/scripting-ref.html#splash-send-keys) for more info
|
|
#' @export
|
|
splash_send_text <- function(splash_obj, text) {
|
|
splash_obj$calls <- c(splash_obj$calls,
|
|
sprintf('splash:send_text("%s")', text),
|
|
"splash:wait(0.1)")
|
|
splash_obj
|
|
}
|
|
|
|
#' Send keyboard events to page context.
|
|
#'
|
|
#' - whitespace is ignored and only used to separate the different keys
|
|
#' - characters are literally represented
|
|
#'
|
|
#' This is different from [splash_send_text()]
|
|
#'
|
|
#' @md
|
|
#' @param splash_obj splashr object
|
|
#' @param keys string to send
|
|
#' @references See [the docs](https://splash.readthedocs.io/en/stable/scripting-ref.html#splash-send-keys) for more info
|
|
#' @export
|
|
splash_send_keys <- function(splash_obj, keys) {
|
|
splash_obj$calls <- c(splash_obj$calls,
|
|
sprintf('splash:send_keys("%s")', keys),
|
|
"splash:wait(0.1)")
|
|
splash_obj
|
|
}
|
|
|
|
#' Trigger mouse release event in web page.
|
|
#'
|
|
#' @param splash_obj splashr object
|
|
#' @param x,y coordinates (distances from the left or top, relative to the current viewport)
|
|
#' @export
|
|
splash_release <- function(splash_obj, x, y) {
|
|
splash_obj$calls <- c(splash_obj$calls,
|
|
sprintf("splash:mouse_release(%s, %s)", x, y))
|
|
splash_obj
|
|
}
|
|
|
|
#' Trigger mouse press event in web page.
|
|
#'
|
|
#' @param splash_obj splashr object
|
|
#' @param x,y coordinates (distances from the left or top, relative to the current viewport)
|
|
#' @export
|
|
splash_press <- function(splash_obj, x, y) {
|
|
splash_obj$calls <- c(splash_obj$calls,
|
|
sprintf("splash:mouse_press(%s, %s)", x, y))
|
|
splash_obj
|
|
}
|
|
|
|
#' Wait for a period time
|
|
#'
|
|
#' When script is waiting WebKit continues processing the webpage
|
|
#'
|
|
#' @md
|
|
#' @param splash_obj splashr object
|
|
#' @param time number of seconds to wait
|
|
#' @export
|
|
#' @examples \dontrun{
|
|
#' splash_local %>%
|
|
#' splash_response_body(TRUE) %>%
|
|
#' splash_user_agent(ua_macos_chrome) %>%
|
|
#' splash_go("https://rud.is/b") %>%
|
|
#' splash_wait(2) %>%
|
|
#' splash_har() -> rud_har
|
|
#' }
|
|
splash_wait <- function(splash_obj, time=2) {
|
|
splash_obj$calls <- c(splash_obj$calls, sprintf('splash:wait(%s)', time))
|
|
splash_obj
|
|
}
|
|
|
|
#' Return information about Splash interaction with a website in HAR format.
|
|
#'
|
|
#' Similar to [render_har()] but used in a script context. Should be the LAST element in
|
|
#' a DSL script chain as this will execute the script and return the HAR content
|
|
#'
|
|
#' @md
|
|
#' @param splash_obj splashr object
|
|
#' @export
|
|
#' @examples \dontrun{
|
|
#' splash_local %>%
|
|
#' splash_response_body(TRUE) %>%
|
|
#' splash_user_agent(ua_macos_chrome) %>%
|
|
#' splash_go("https://rud.is/b") %>%
|
|
#' splash_wait(2) %>%
|
|
#' splash_har() -> rud_har
|
|
#' }
|
|
splash_har <- function(splash_obj) {
|
|
|
|
splash_obj$calls <- c(splash_obj$calls, 'return(splash:har())')
|
|
|
|
call_function <- make_splash_call(splash_obj)
|
|
|
|
res <- execute_lua(splash_obj, call_function)
|
|
as_har(res)
|
|
|
|
}
|
|
|
|
#' Return a HTML snapshot of a current page.
|
|
#'
|
|
#' Similar to [render_html()] but used in a script context. Should be the LAST element in
|
|
#' a DSL script chain as this will execute the script and return the HTML content
|
|
#'
|
|
#' @md
|
|
#' @param splash_obj splashr object
|
|
#' @param raw_html if `TRUE` then return a character vector vs an XML document.
|
|
#' @export
|
|
#' @examples \dontrun{
|
|
#' splash_local %>%
|
|
#' splash_response_body(TRUE) %>%
|
|
#' splash_user_agent(ua_macos_chrome) %>%
|
|
#' splash_go("https://rud.is/b") %>%
|
|
#' splash_wait(2) %>%
|
|
#' splash_html() -> rud_pg
|
|
#' }
|
|
splash_html <- function(splash_obj, raw_html=FALSE) {
|
|
|
|
splash_obj$calls <- c(splash_obj$calls, 'return(splash:html())')
|
|
|
|
call_function <- make_splash_call(splash_obj)
|
|
|
|
out <- execute_lua(splash_obj, call_function)
|
|
|
|
if (!raw_html) out <- xml2::read_html(out)
|
|
|
|
out
|
|
|
|
}
|
|
|
|
#' Return a screenshot of a current page in PNG format.
|
|
#'
|
|
#' Similar to [render_png()] but used in a script context. Should be the LAST element in
|
|
#' a DSL script chain as this will execute the script and return the PNG content
|
|
#'
|
|
#' @md
|
|
#' @param splash_obj splashr object
|
|
#' @return a [magick] image object
|
|
#' @export
|
|
#' @examples \dontrun{
|
|
#' splash_local %>%
|
|
#' splash_user_agent(ua_macos_chrome) %>%
|
|
#' splash_go("https://rud.is/b") %>%
|
|
#' splash_wait(2) %>%
|
|
#' splash_png()
|
|
#' }
|
|
splash_png <- function(splash_obj) {
|
|
|
|
splash_obj$calls <- c(splash_obj$calls, 'return splash:png{render_all=true}')
|
|
|
|
call_function <- make_splash_call(splash_obj)
|
|
|
|
res <- execute_lua(splash_obj, call_function)
|
|
|
|
magick::image_read(res)
|
|
|
|
}
|
|
|
|
#' Overwrite the User-Agent header for all further requests.
|
|
#'
|
|
#' There are a few built-in user agents, all beginning with `ua_`.
|
|
#'
|
|
#' @md
|
|
#' @param splash_obj splashr object
|
|
#' @param user_agent 1 element character vector, defaults to `splashr/#.#.#`.
|
|
#' @export
|
|
#' @examples \dontrun{
|
|
#' library(rvest)
|
|
#'
|
|
#' URL <- "https://httpbin.org/user-agent"
|
|
#'
|
|
#' splash_local %>%
|
|
#' splash_response_body(TRUE) %>%
|
|
#' splash_user_agent(ua_macos_chrome) %>%
|
|
#' splash_go(URL) %>%
|
|
#' splash_html() %>%
|
|
#' html_text("body") %>%
|
|
#' jsonlite::fromJSON()
|
|
#' }
|
|
splash_user_agent <- function(splash_obj, user_agent=ua_splashr) {
|
|
splash_obj$calls <- c(splash_obj$calls, sprintf('splash:set_user_agent("%s")', user_agent))
|
|
splash_obj
|
|
}
|
|
|
|
#' @rdname splash_user_agent
|
|
#' @export
|
|
ua_splashr <- sprintf("splashr/%s", packageVersion("splashr"))
|
|
|
|
#' @rdname splash_user_agent
|
|
#' @export
|
|
ua_win10_chrome <- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
|
|
|
|
#' @rdname splash_user_agent
|
|
#' @export
|
|
ua_win10_firefox <- "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"
|
|
|
|
#' @rdname splash_user_agent
|
|
#' @export
|
|
ua_win10_ie11 <- "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
|
|
|
|
#' @rdname splash_user_agent
|
|
#' @export
|
|
ua_win7_chrome <- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
|
|
|
|
#' @rdname splash_user_agent
|
|
#' @export
|
|
ua_win7_firefox <- "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"
|
|
|
|
#' @rdname splash_user_agent
|
|
#' @export
|
|
ua_win7_ie11 <- "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
|
|
|
|
#' @rdname splash_user_agent
|
|
#' @export
|
|
ua_macos_chrome <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36"
|
|
|
|
#' @rdname splash_user_agent
|
|
#' @export
|
|
ua_macos_safari <- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0.2 Safari/602.3.12"
|
|
|
|
#' @rdname splash_user_agent
|
|
#' @export
|
|
ua_linux_chrome <- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
|
|
|
|
#' @rdname splash_user_agent
|
|
#' @export
|
|
ua_linux_firefox <- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0"
|
|
|
|
#' @rdname splash_user_agent
|
|
#' @export
|
|
ua_ios_safari <- "Mozilla/5.0 (iPad; CPU OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1"
|
|
|