#' Return the HTML of the javascript-rendered page.
#'
#' Similar (i.e. a dynamic equivalent) to `rvest::read_html`.
#'
#' @md
#' @param splash_obj Object created by a call to [splash()]
#' @param url The URL to render (required)
#' @param base_url The base URL to render the page with.
#' @param timeout A timeout (in seconds) for the render (defaults to 30). Without
#' re-configuring the start-up parameters of the Splash server (not this package)
#' the maximum allowed value for the timeout is 60 seconds.
#' @param resource_timeout A timeout (in seconds) for individual network requests.
#' @param wait Time (in seconds) to wait for updates after page is loaded (defaults to 0).
#' @param proxy Proxy profile name or proxy URL.
#' @param js Javascript profile name.
#' @param js_src JavaScript code to be executed in page context.
#' @param filters Comma-separated list of request filter names.
#' @param allowed_domains Comma-separated list of allowed domain names. If present, Splash
#' won’t load anything neither from domains not in this list nor from subdomains of
#' domains not in this list.
#' @param allowed_content_types Comma-separated list of allowed content types. If present,
#' Splash will abort any request if the response’s content type doesn’t match any of
#' the content types in this list. Wildcards are supported.
#' @param forbidden_content_types Comma-separated list of forbidden content types. If
#' present, Splash will abort any request if the response’s content type matches
#' any of the content types in this list. Wildcards are supported.
#' @param viewport View width and height (in pixels) of the browser viewport to render the
#' web page. Format is “width>xheight”, e.g. 800x600. Default value is "full".
#' @param images Whether to download images.
#' @param headers HTTP headers to set for the first outgoing request.
#' @param body Body of HTTP POST request to be sent if method is POST.
#' @param http_method HTTP method of outgoing Splash request.
#' @param save_args A list of argument names to put in cache.
#' @param load_args Parameter values to load from cache
#' @param http2 Enable or disable HTTP2 support. `TRUE` to enable; `FALSE` to disable; defaults to `FALSE`
#' when `engine` is `webkit` due to malformed behaviour in 3.4.x of Splash
#' @param engine one of `webkit` or `chromium`; defaults to `webkit`
#' @param raw_html if `TRUE` then return a character vector vs an XML document. Only valid for `render_html`
#' @family splash_renderers
#' @return An XML document. Note that this is processed by [xml2::read_html()] so it will not be
#' the pristine, raw, rendered HTML from the site. Use `raw_html=TRUE` if you do not want it
#' to be processed first by `xml2`. If you choose `raw_html=TRUE` you'll get back a
#' character vector.
#' @references [Splash docs](http://splash.readthedocs.io/en/stable/index.html)
#' @export
render_html <- function ( splash_obj = splash_local , url , base_url , timeout = 30 , resource_timeout , wait = 0 ,
proxy , js , js_src , filters , allowed_domains , allowed_content_types ,
forbidden_content_types , viewport = " 1024x768" , images , headers , body ,
http_method , save_args , load_args , http2 = FALSE ,
engine = c ( " webkit" , " chromium" ) , raw_html = FALSE ) {
wait <- check_wait ( wait )
engine <- match.arg ( engine [1 ] , c ( " webkit" , " chromium" ) )
http2 <- ifelse ( engine == " chromium" , 1 , as.integer ( as.logical ( http2 [1 ] ) ) )
list (
url = url ,
timeout = timeout ,
wait = wait ,
viewport = jsonlite :: unbox ( viewport ) ,
http2 = http2 ,
engine = engine
) -> params
if ( ! missing ( base_url ) ) params $ base_url <- jsonlite :: unbox ( base_url )
if ( ! missing ( resource_timeout ) ) params $ resource_timeout <- resource_timeout
if ( ! missing ( proxy ) ) params $ proxy <- jsonlite :: unbox ( proxy )
if ( ! missing ( js ) ) params $ js <- jsonlite :: unbox ( js )
if ( ! missing ( js_src ) ) params $ js_src <- jsonlite :: unbox ( js_src )
if ( ! missing ( filters ) ) params $ filters <- jsonlite :: unbox ( filters )
if ( ! missing ( allowed_domains ) ) params $ allowed_domains <- jsonlite :: unbox ( allowed_domains )
if ( ! missing ( allowed_content_types ) ) params $ allowed_content_types <- jsonlite :: unbox ( allowed_content_types )
if ( ! missing ( forbidden_content_types ) ) params $ forbidden_content_types <- jsonlite :: unbox ( forbidden_content_types )
if ( ! missing ( images ) ) params $ images <- as.numeric ( images )
if ( ! missing ( headers ) ) params $ headers <- headers
if ( ! missing ( body ) ) params $ body <- jsonlite :: unbox ( body )
if ( ! missing ( http_method ) ) params $ http_method <- jsonlite :: unbox ( http_method )
if ( ! missing ( save_args ) ) params $ save_args <- jsonlite :: unbox ( save_args )
if ( ! missing ( load_args ) ) params $ load_args <- jsonlite :: unbox ( load_args )
if ( is.null ( splash_obj $ user ) ) {
res <- httr :: GET ( splash_url ( splash_obj ) , path = " render.html" , encode = " json" , query = params )
} else {
res <- httr :: GET (
splash_url ( splash_obj ) , path = " render.html" , encode = " json" , query = params ,
httr :: authenticate ( splash_obj $ user , splash_obj $ pass )
)
}
check_or_report_status ( res )
out <- httr :: content ( res , as = " text" , encoding = " UTF-8" )
if ( ! raw_html ) out <- xml2 :: read_html ( out )
out
}