mirror of https://git.sr.ht/~hrbrmstr/splashr
boB Rudis
7 years ago
16 changed files with 451 additions and 10 deletions
@ -0,0 +1,200 @@ |
|||||
|
make_splash_call <- function(splash_obj) { |
||||
|
|
||||
|
sprintf(' |
||||
|
function main(splash) |
||||
|
%s |
||||
|
end |
||||
|
', paste0(sprintf(" %s", splash_obj$calls), collapse="\n")) -> out |
||||
|
|
||||
|
out |
||||
|
|
||||
|
} |
||||
|
|
||||
|
#' Enable or disable response content tracking. |
||||
|
#' |
||||
|
#' By default Splash doesn’t keep bodies of each response in memory, for efficiency reasons. |
||||
|
#' |
||||
|
#' @param splash_obj splashr object |
||||
|
#' @param enable logical |
||||
|
#' @export |
||||
|
#' @examples \dontrun{ |
||||
|
#' splash_local %>% |
||||
|
#' splash_response_body(TRUE) %>% |
||||
|
#' splash_go("https://rud.is/b") %>% |
||||
|
#' splash_wait(2) %>% |
||||
|
#' splash_har() -> rud_har |
||||
|
#' } |
||||
|
splash_response_body <- function(splash_obj, enable=FALSE) { |
||||
|
splash_obj$calls <- c(splash_obj$calls, sprintf('splash.response_body_enabled = %s', |
||||
|
if (enable) "true" else "false")) |
||||
|
splash_obj |
||||
|
} |
||||
|
|
||||
|
#' Enable or disable browser plugins (e.g. Flash). |
||||
|
#' |
||||
|
#' Plugins are disabled by default. |
||||
|
#' |
||||
|
#' @param splash_obj splashr object |
||||
|
#' @param enable logical |
||||
|
#' @export |
||||
|
#' @examples \dontrun{ |
||||
|
#' splash_local %>% |
||||
|
#' splash_plugins(TRUE) %>% |
||||
|
#' splash_go("https://rud.is/b") %>% |
||||
|
#' splash_wait(2) %>% |
||||
|
#' splash_har() -> rud_har |
||||
|
#' } |
||||
|
splash_plugins <- function(splash_obj, enable=FALSE) { |
||||
|
splash_obj$calls <- c(splash_obj$calls, sprintf('splash.plugins_enabled = %s', |
||||
|
if (enable) "true" else "false")) |
||||
|
splash_obj |
||||
|
} |
||||
|
|
||||
|
#' Enable/disable images |
||||
|
#' |
||||
|
#' By default, images are enabled. Disabling of the images can save a lot of network |
||||
|
#' traffic (usually around ~50%) and make rendering faster. Note that this option can |
||||
|
#' affect the JavaScript code inside page: disabling of the images may change sizes and |
||||
|
#' positions of DOM elements, and scripts may read and use them. |
||||
|
#' |
||||
|
#' @param splash_obj splashr object |
||||
|
#' @param enable logical |
||||
|
#' @export |
||||
|
#' @examples \dontrun{ |
||||
|
#' splash_local %>% |
||||
|
#' splash_images(TRUE) %>% |
||||
|
#' splash_go("https://rud.is/b") %>% |
||||
|
#' splash_wait(2) %>% |
||||
|
#' splash_har() -> rud_har |
||||
|
#' } |
||||
|
splash_images <- function(splash_obj, enable=TRUE) { |
||||
|
splash_obj$calls <- c(splash_obj$calls, sprintf('splash.images_enabled = %s', |
||||
|
if (enable) "true" else "false")) |
||||
|
splash_obj |
||||
|
} |
||||
|
|
||||
|
#' Go to an URL. |
||||
|
#' |
||||
|
#' This is similar to entering an URL in a browser address bar, pressing Enter and waiting |
||||
|
#' until page loads. |
||||
|
#' |
||||
|
#' @param splash_obj splashr object |
||||
|
#' @param url - URL to load; |
||||
|
#' @export |
||||
|
#' @examples \dontrun{ |
||||
|
#' splash_local %>% |
||||
|
#' splash_response_body(TRUE) %>% |
||||
|
#' splash_go("https://rud.is/b") %>% |
||||
|
#' splash_wait(2) %>% |
||||
|
#' splash_har() -> rud_har |
||||
|
#' } |
||||
|
splash_go <- function(splash_obj, url) { |
||||
|
splash_obj$calls <- c(splash_obj$calls, |
||||
|
sprintf('url = "%s"', url), |
||||
|
"splash:go(url)") |
||||
|
splash_obj |
||||
|
} |
||||
|
|
||||
|
#' Wait for a period time |
||||
|
#' |
||||
|
#' When script is waiting WebKit continues processing the webpage |
||||
|
#' |
||||
|
#' @md |
||||
|
#' @param splash_obj splashr object |
||||
|
#' @param time number of seconds to wait |
||||
|
#' @export |
||||
|
#' @examples \dontrun{ |
||||
|
#' splash_local %>% |
||||
|
#' splash_response_body(TRUE) %>% |
||||
|
#' splash_go("https://rud.is/b") %>% |
||||
|
#' splash_wait(2) %>% |
||||
|
#' splash_har() -> rud_har |
||||
|
#' } |
||||
|
splash_wait <- function(splash_obj, time=2) { |
||||
|
splash_obj$calls <- c(splash_obj$calls, sprintf('splash:wait(%s)', time)) |
||||
|
splash_obj |
||||
|
} |
||||
|
|
||||
|
#' Return information about Splash interaction with a website in HAR format. |
||||
|
#' |
||||
|
#' Similar to [render_har] but used in a script context. Should be the LAST element in |
||||
|
#' a DSL script chain as this will execute the script and return the HAR content |
||||
|
#' |
||||
|
#' @md |
||||
|
#' @param splash_obj splashr object |
||||
|
#' @export |
||||
|
#' @examples \dontrun{ |
||||
|
#' splash_local %>% |
||||
|
#' splash_response_body(TRUE) %>% |
||||
|
#' splash_go("https://rud.is/b") %>% |
||||
|
#' splash_wait(2) %>% |
||||
|
#' splash_har() -> rud_har |
||||
|
#' } |
||||
|
splash_har <- function(splash_obj) { |
||||
|
|
||||
|
splash_obj$calls <- c(splash_obj$calls, 'return(splash:har())') |
||||
|
|
||||
|
call_function <- make_splash_call(splash_obj) |
||||
|
|
||||
|
res <- execute_lua(splash_obj, call_function) |
||||
|
as_har(res) |
||||
|
|
||||
|
} |
||||
|
|
||||
|
#' Return a HTML snapshot of a current page. |
||||
|
#' |
||||
|
#' Similar to [render_html] but used in a script context. Should be the LAST element in |
||||
|
#' a DSL script chain as this will execute the script and return the HTML content |
||||
|
#' |
||||
|
#' @md |
||||
|
#' @param splash_obj splashr object |
||||
|
#' @param raw_html if `TRUE` then return a character vector vs an XML document. |
||||
|
#' @export |
||||
|
#' @examples \dontrun{ |
||||
|
#' splash_local %>% |
||||
|
#' splash_response_body(TRUE) %>% |
||||
|
#' splash_go("https://rud.is/b") %>% |
||||
|
#' splash_wait(2) %>% |
||||
|
#' splash_html() -> rud_pg |
||||
|
#' } |
||||
|
splash_html <- function(splash_obj, raw_html=FALSE) { |
||||
|
|
||||
|
splash_obj$calls <- c(splash_obj$calls, 'return(splash:html())') |
||||
|
|
||||
|
call_function <- make_splash_call(splash_obj) |
||||
|
|
||||
|
out <- execute_lua(splash_obj, call_function) |
||||
|
|
||||
|
if (!raw_html) out <- xml2::read_html(out) |
||||
|
|
||||
|
out |
||||
|
|
||||
|
} |
||||
|
|
||||
|
#' Return a screenshot of a current page in PNG format. |
||||
|
#' |
||||
|
#' Similar to [render_png] but used in a script context. Should be the LAST element in |
||||
|
#' a DSL script chain as this will execute the script and return the PNG content |
||||
|
#' |
||||
|
#' @md |
||||
|
#' @param splash_obj splashr object |
||||
|
#' @return a [magick] image object |
||||
|
#' @export |
||||
|
#' @examples \dontrun{ |
||||
|
#' splash_local %>% |
||||
|
#' splash_go("https://rud.is/b") %>% |
||||
|
#' splash_wait(2) %>% |
||||
|
#' splash_png() |
||||
|
#' } |
||||
|
splash_png <- function(splash_obj) { |
||||
|
|
||||
|
splash_obj$calls <- c(splash_obj$calls, 'return splash:png{render_all=true}') |
||||
|
|
||||
|
call_function <- make_splash_call(splash_obj) |
||||
|
|
||||
|
res <- execute_lua(splash_obj, call_function) |
||||
|
|
||||
|
magick::image_read(res) |
||||
|
|
||||
|
} |
||||
|
|
@ -0,0 +1,26 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/dsl.r |
||||
|
\name{splash_go} |
||||
|
\alias{splash_go} |
||||
|
\title{Go to an URL.} |
||||
|
\usage{ |
||||
|
splash_go(splash_obj, url) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{splash_obj}{splashr object} |
||||
|
|
||||
|
\item{url}{- URL to load;} |
||||
|
} |
||||
|
\description{ |
||||
|
This is similar to entering an URL in a browser address bar, pressing Enter and waiting |
||||
|
until page loads. |
||||
|
} |
||||
|
\examples{ |
||||
|
\dontrun{ |
||||
|
splash_local \%>\% |
||||
|
splash_response_body(TRUE) \%>\% |
||||
|
splash_go("https://rud.is/b") \%>\% |
||||
|
splash_wait(2) \%>\% |
||||
|
splash_har() -> rud_har |
||||
|
} |
||||
|
} |
@ -0,0 +1,24 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/dsl.r |
||||
|
\name{splash_har} |
||||
|
\alias{splash_har} |
||||
|
\title{Return information about Splash interaction with a website in HAR format.} |
||||
|
\usage{ |
||||
|
splash_har(splash_obj) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{splash_obj}{splashr object} |
||||
|
} |
||||
|
\description{ |
||||
|
Similar to \link{render_har} but used in a script context. Should be the LAST element in |
||||
|
a DSL script chain as this will execute the script and return the HAR content |
||||
|
} |
||||
|
\examples{ |
||||
|
\dontrun{ |
||||
|
splash_local \%>\% |
||||
|
splash_response_body(TRUE) \%>\% |
||||
|
splash_go("https://rud.is/b") \%>\% |
||||
|
splash_wait(2) \%>\% |
||||
|
splash_har() -> rud_har |
||||
|
} |
||||
|
} |
@ -0,0 +1,26 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/dsl.r |
||||
|
\name{splash_html} |
||||
|
\alias{splash_html} |
||||
|
\title{Return a HTML snapshot of a current page.} |
||||
|
\usage{ |
||||
|
splash_html(splash_obj, raw_html = FALSE) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{splash_obj}{splashr object} |
||||
|
|
||||
|
\item{raw_html}{if \code{TRUE} then return a character vector vs an XML document.} |
||||
|
} |
||||
|
\description{ |
||||
|
Similar to \link{render_html} but used in a script context. Should be the LAST element in |
||||
|
a DSL script chain as this will execute the script and return the HTML content |
||||
|
} |
||||
|
\examples{ |
||||
|
\dontrun{ |
||||
|
splash_local \%>\% |
||||
|
splash_response_body(TRUE) \%>\% |
||||
|
splash_go("https://rud.is/b") \%>\% |
||||
|
splash_wait(2) \%>\% |
||||
|
splash_html() -> rud_pg |
||||
|
} |
||||
|
} |
@ -0,0 +1,28 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/dsl.r |
||||
|
\name{splash_images} |
||||
|
\alias{splash_images} |
||||
|
\title{Enable/disable images} |
||||
|
\usage{ |
||||
|
splash_images(splash_obj, enable = TRUE) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{splash_obj}{splashr object} |
||||
|
|
||||
|
\item{enable}{logical} |
||||
|
} |
||||
|
\description{ |
||||
|
By default, images are enabled. Disabling of the images can save a lot of network |
||||
|
traffic (usually around ~50%) and make rendering faster. Note that this option can |
||||
|
affect the JavaScript code inside page: disabling of the images may change sizes and |
||||
|
positions of DOM elements, and scripts may read and use them. |
||||
|
} |
||||
|
\examples{ |
||||
|
\dontrun{ |
||||
|
splash_local \%>\% |
||||
|
splash_images(TRUE) \%>\% |
||||
|
splash_go("https://rud.is/b") \%>\% |
||||
|
splash_wait(2) \%>\% |
||||
|
splash_har() -> rud_har |
||||
|
} |
||||
|
} |
@ -0,0 +1,25 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/dsl.r |
||||
|
\name{splash_plugins} |
||||
|
\alias{splash_plugins} |
||||
|
\title{Enable or disable browser plugins (e.g. Flash).} |
||||
|
\usage{ |
||||
|
splash_plugins(splash_obj, enable = FALSE) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{splash_obj}{splashr object} |
||||
|
|
||||
|
\item{enable}{logical} |
||||
|
} |
||||
|
\description{ |
||||
|
Plugins are disabled by default. |
||||
|
} |
||||
|
\examples{ |
||||
|
\dontrun{ |
||||
|
splash_local \%>\% |
||||
|
splash_plugins(TRUE) \%>\% |
||||
|
splash_go("https://rud.is/b") \%>\% |
||||
|
splash_wait(2) \%>\% |
||||
|
splash_har() -> rud_har |
||||
|
} |
||||
|
} |
@ -0,0 +1,26 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/dsl.r |
||||
|
\name{splash_png} |
||||
|
\alias{splash_png} |
||||
|
\title{Return a screenshot of a current page in PNG format.} |
||||
|
\usage{ |
||||
|
splash_png(splash_obj) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{splash_obj}{splashr object} |
||||
|
} |
||||
|
\value{ |
||||
|
a \link{magick} image object |
||||
|
} |
||||
|
\description{ |
||||
|
Similar to \link{render_png} but used in a script context. Should be the LAST element in |
||||
|
a DSL script chain as this will execute the script and return the PNG content |
||||
|
} |
||||
|
\examples{ |
||||
|
\dontrun{ |
||||
|
splash_local \%>\% |
||||
|
splash_go("https://rud.is/b") \%>\% |
||||
|
splash_wait(2) \%>\% |
||||
|
splash_png() |
||||
|
} |
||||
|
} |
@ -0,0 +1,25 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/dsl.r |
||||
|
\name{splash_response_body} |
||||
|
\alias{splash_response_body} |
||||
|
\title{Enable or disable response content tracking.} |
||||
|
\usage{ |
||||
|
splash_response_body(splash_obj, enable = FALSE) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{splash_obj}{splashr object} |
||||
|
|
||||
|
\item{enable}{logical} |
||||
|
} |
||||
|
\description{ |
||||
|
By default Splash doesn’t keep bodies of each response in memory, for efficiency reasons. |
||||
|
} |
||||
|
\examples{ |
||||
|
\dontrun{ |
||||
|
splash_local \%>\% |
||||
|
splash_response_body(TRUE) \%>\% |
||||
|
splash_go("https://rud.is/b") \%>\% |
||||
|
splash_wait(2) \%>\% |
||||
|
splash_har() -> rud_har |
||||
|
} |
||||
|
} |
@ -0,0 +1,25 @@ |
|||||
|
% Generated by roxygen2: do not edit by hand |
||||
|
% Please edit documentation in R/dsl.r |
||||
|
\name{splash_wait} |
||||
|
\alias{splash_wait} |
||||
|
\title{Wait for a period time} |
||||
|
\usage{ |
||||
|
splash_wait(splash_obj, time = 2) |
||||
|
} |
||||
|
\arguments{ |
||||
|
\item{splash_obj}{splashr object} |
||||
|
|
||||
|
\item{time}{number of seconds to wait} |
||||
|
} |
||||
|
\description{ |
||||
|
When script is waiting WebKit continues processing the webpage |
||||
|
} |
||||
|
\examples{ |
||||
|
\dontrun{ |
||||
|
splash_local \%>\% |
||||
|
splash_response_body(TRUE) \%>\% |
||||
|
splash_go("https://rud.is/b") \%>\% |
||||
|
splash_wait(2) \%>\% |
||||
|
splash_har() -> rud_har |
||||
|
} |
||||
|
} |
Loading…
Reference in new issue